diff --git a/MAINTAINERS b/MAINTAINERS
index fbbda4671e734d..f0bb5ee1787068 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10260,7 +10260,8 @@ F:	Documentation/networking/device_drivers/ethernet/intel/
 F:	drivers/net/ethernet/intel/
 F:	drivers/net/ethernet/intel/*/
 F:	include/linux/avf/virtchnl.h
-F:	include/linux/net/intel/iidc.h
+F:	include/linux/net/intel/
+F:	include/linux/net/intel/*/
 
 INTEL ETHERNET PROTOCOL DRIVER FOR RDMA
 M:	Mustafa Ismail <mustafa.ismail@intel.com>
diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index 9bc0a951989964..cec4a938fbd0f6 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -84,6 +84,12 @@ config E1000E_HWTS
 	 devices. The cross-timestamp is available through the PTP clock
 	 driver precise cross-timestamp ioctl (PTP_SYS_OFFSET_PRECISE).
 
+config LIBIE
+	tristate
+	help
+	  libie (Intel Ethernet library) is a common library containing
+	  routines shared by several Intel Ethernet drivers.
+
 config IGB
 	tristate "Intel(R) 82575/82576 PCI-Express Gigabit Ethernet support"
 	depends on PCI
@@ -225,6 +231,7 @@ config I40E
 	depends on PTP_1588_CLOCK_OPTIONAL
 	depends on PCI
 	select AUXILIARY_BUS
+	select LIBIE
 	help
 	  This driver supports Intel(R) Ethernet Controller XL710 Family of
 	  devices.  For more information on how to identify your adapter, go
@@ -254,8 +261,9 @@ config IAVF
 	tristate
 config I40EVF
 	tristate "Intel(R) Ethernet Adaptive Virtual Function support"
-	select IAVF
 	depends on PCI_MSI
+	select IAVF
+	select LIBIE
 	help
 	  This driver supports virtual functions for Intel XL710,
 	  X710, X722, XXV710, and all devices advertising support for
@@ -282,6 +290,7 @@ config ICE
 	depends on GNSS || GNSS = n
 	select AUXILIARY_BUS
 	select DIMLIB
+	select LIBIE
 	select NET_DEVLINK
 	select PLDMFW
 	help
diff --git a/drivers/net/ethernet/intel/Makefile b/drivers/net/ethernet/intel/Makefile
index d80d04132073ca..ce622b4d825df7 100644
--- a/drivers/net/ethernet/intel/Makefile
+++ b/drivers/net/ethernet/intel/Makefile
@@ -15,3 +15,4 @@ obj-$(CONFIG_I40E) += i40e/
 obj-$(CONFIG_IAVF) += iavf/
 obj-$(CONFIG_FM10K) += fm10k/
 obj-$(CONFIG_ICE) += ice/
+obj-$(CONFIG_LIBIE) += libie/
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index ed88e38d488b2d..25bb858268fcd6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -383,259 +383,6 @@ int i40e_aq_set_rss_key(struct i40e_hw *hw,
 	return i40e_aq_get_set_rss_key(hw, vsi_id, key, true);
 }
 
-/* The i40e_ptype_lookup table is used to convert from the 8-bit ptype in the
- * hardware to a bit-field that can be used by SW to more easily determine the
- * packet type.
- *
- * Macros are used to shorten the table lines and make this table human
- * readable.
- *
- * We store the PTYPE in the top byte of the bit field - this is just so that
- * we can check that the table doesn't have a row missing, as the index into
- * the table should be the PTYPE.
- *
- * Typical work flow:
- *
- * IF NOT i40e_ptype_lookup[ptype].known
- * THEN
- *      Packet is unknown
- * ELSE IF i40e_ptype_lookup[ptype].outer_ip == I40E_RX_PTYPE_OUTER_IP
- *      Use the rest of the fields to look at the tunnels, inner protocols, etc
- * ELSE
- *      Use the enum i40e_rx_l2_ptype to decode the packet type
- * ENDIF
- */
-
-/* macro to make the table lines short, use explicit indexing with [PTYPE] */
-#define I40E_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\
-	[PTYPE] = { \
-		1, \
-		I40E_RX_PTYPE_OUTER_##OUTER_IP, \
-		I40E_RX_PTYPE_OUTER_##OUTER_IP_VER, \
-		I40E_RX_PTYPE_##OUTER_FRAG, \
-		I40E_RX_PTYPE_TUNNEL_##T, \
-		I40E_RX_PTYPE_TUNNEL_END_##TE, \
-		I40E_RX_PTYPE_##TEF, \
-		I40E_RX_PTYPE_INNER_PROT_##I, \
-		I40E_RX_PTYPE_PAYLOAD_LAYER_##PL }
-
-#define I40E_PTT_UNUSED_ENTRY(PTYPE) [PTYPE] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }
-
-/* shorter macros makes the table fit but are terse */
-#define I40E_RX_PTYPE_NOF		I40E_RX_PTYPE_NOT_FRAG
-#define I40E_RX_PTYPE_FRG		I40E_RX_PTYPE_FRAG
-#define I40E_RX_PTYPE_INNER_PROT_TS	I40E_RX_PTYPE_INNER_PROT_TIMESYNC
-
-/* Lookup table mapping in the 8-bit HW PTYPE to the bit field for decoding */
-struct i40e_rx_ptype_decoded i40e_ptype_lookup[BIT(8)] = {
-	/* L2 Packet types */
-	I40E_PTT_UNUSED_ENTRY(0),
-	I40E_PTT(1,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
-	I40E_PTT(2,  L2, NONE, NOF, NONE, NONE, NOF, TS,   PAY2),
-	I40E_PTT(3,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
-	I40E_PTT_UNUSED_ENTRY(4),
-	I40E_PTT_UNUSED_ENTRY(5),
-	I40E_PTT(6,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
-	I40E_PTT(7,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
-	I40E_PTT_UNUSED_ENTRY(8),
-	I40E_PTT_UNUSED_ENTRY(9),
-	I40E_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
-	I40E_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
-	I40E_PTT(12, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-	I40E_PTT(13, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-	I40E_PTT(14, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-	I40E_PTT(15, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-	I40E_PTT(16, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-	I40E_PTT(17, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-	I40E_PTT(18, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-	I40E_PTT(19, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-	I40E_PTT(20, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-	I40E_PTT(21, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-
-	/* Non Tunneled IPv4 */
-	I40E_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3),
-	I40E_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3),
-	I40E_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(25),
-	I40E_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP,  PAY4),
-	I40E_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4),
-	I40E_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4),
-
-	/* IPv4 --> IPv4 */
-	I40E_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
-	I40E_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
-	I40E_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(32),
-	I40E_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP,  PAY4),
-	I40E_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
-	I40E_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv4 --> IPv6 */
-	I40E_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
-	I40E_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
-	I40E_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(39),
-	I40E_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP,  PAY4),
-	I40E_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
-	I40E_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
-
-	/* IPv4 --> GRE/NAT */
-	I40E_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
-
-	/* IPv4 --> GRE/NAT --> IPv4 */
-	I40E_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
-	I40E_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
-	I40E_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(47),
-	I40E_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP,  PAY4),
-	I40E_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
-	I40E_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv4 --> GRE/NAT --> IPv6 */
-	I40E_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
-	I40E_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
-	I40E_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(54),
-	I40E_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP,  PAY4),
-	I40E_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
-	I40E_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
-
-	/* IPv4 --> GRE/NAT --> MAC */
-	I40E_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
-
-	/* IPv4 --> GRE/NAT --> MAC --> IPv4 */
-	I40E_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
-	I40E_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
-	I40E_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(62),
-	I40E_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP,  PAY4),
-	I40E_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
-	I40E_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv4 --> GRE/NAT -> MAC --> IPv6 */
-	I40E_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
-	I40E_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
-	I40E_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(69),
-	I40E_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP,  PAY4),
-	I40E_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
-	I40E_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
-
-	/* IPv4 --> GRE/NAT --> MAC/VLAN */
-	I40E_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
-
-	/* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */
-	I40E_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
-	I40E_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
-	I40E_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(77),
-	I40E_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP,  PAY4),
-	I40E_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
-	I40E_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */
-	I40E_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
-	I40E_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
-	I40E_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(84),
-	I40E_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP,  PAY4),
-	I40E_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
-	I40E_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
-
-	/* Non Tunneled IPv6 */
-	I40E_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3),
-	I40E_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3),
-	I40E_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(91),
-	I40E_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP,  PAY4),
-	I40E_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4),
-	I40E_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4),
-
-	/* IPv6 --> IPv4 */
-	I40E_PTT(95,  IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
-	I40E_PTT(96,  IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
-	I40E_PTT(97,  IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(98),
-	I40E_PTT(99,  IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP,  PAY4),
-	I40E_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
-	I40E_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv6 --> IPv6 */
-	I40E_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
-	I40E_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
-	I40E_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(105),
-	I40E_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP,  PAY4),
-	I40E_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
-	I40E_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
-
-	/* IPv6 --> GRE/NAT */
-	I40E_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
-
-	/* IPv6 --> GRE/NAT -> IPv4 */
-	I40E_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
-	I40E_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
-	I40E_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(113),
-	I40E_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP,  PAY4),
-	I40E_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
-	I40E_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv6 --> GRE/NAT -> IPv6 */
-	I40E_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
-	I40E_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
-	I40E_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(120),
-	I40E_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP,  PAY4),
-	I40E_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
-	I40E_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
-
-	/* IPv6 --> GRE/NAT -> MAC */
-	I40E_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
-
-	/* IPv6 --> GRE/NAT -> MAC -> IPv4 */
-	I40E_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
-	I40E_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
-	I40E_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(128),
-	I40E_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP,  PAY4),
-	I40E_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
-	I40E_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv6 --> GRE/NAT -> MAC -> IPv6 */
-	I40E_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
-	I40E_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
-	I40E_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(135),
-	I40E_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP,  PAY4),
-	I40E_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
-	I40E_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
-
-	/* IPv6 --> GRE/NAT -> MAC/VLAN */
-	I40E_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
-
-	/* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */
-	I40E_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
-	I40E_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
-	I40E_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(143),
-	I40E_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP,  PAY4),
-	I40E_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
-	I40E_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */
-	I40E_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
-	I40E_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
-	I40E_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(150),
-	I40E_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP,  PAY4),
-	I40E_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
-	I40E_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
-
-	/* unused entries */
-	[154 ... 255] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }
-};
-
 /**
  * i40e_init_shared_code - Initialize the shared code
  * @hw: pointer to hardware structure
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index c8ff5675b29d8b..d89a5fff15983d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -97,6 +97,7 @@ MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all), Debug mask (0x8XXXXXXX
 
 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
 MODULE_DESCRIPTION("Intel(R) Ethernet Connection XL710 Network Driver");
+MODULE_IMPORT_NS(LIBIE);
 MODULE_LICENSE("GPL v2");
 
 static struct workqueue_struct *i40e_wq;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index fe845987d99a55..5287d0ef32d5c7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -380,13 +380,6 @@ void i40e_set_pci_config_data(struct i40e_hw *hw, u16 link_status);
 
 int i40e_set_mac_type(struct i40e_hw *hw);
 
-extern struct i40e_rx_ptype_decoded i40e_ptype_lookup[];
-
-static inline struct i40e_rx_ptype_decoded decode_rx_desc_ptype(u8 ptype)
-{
-	return i40e_ptype_lookup[ptype];
-}
-
 /**
  * i40e_virtchnl_link_speed - Convert AdminQ link_speed to virtchnl definition
  * @link_speed: the speed to convert
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index c8c2cbaa0ede6c..e4bfc7e3c076e1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1,8 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright(c) 2013 - 2018 Intel Corporation. */
 
-#include <linux/prefetch.h>
 #include <linux/bpf_trace.h>
+#include <linux/net/intel/libie/rx.h>
+#include <linux/prefetch.h>
 #include <net/mpls.h>
 #include <net/xdp.h>
 #include "i40e.h"
@@ -1758,40 +1759,32 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
 				    struct sk_buff *skb,
 				    union i40e_rx_desc *rx_desc)
 {
-	struct i40e_rx_ptype_decoded decoded;
+	struct libie_rx_ptype_parsed parsed;
 	u32 rx_error, rx_status;
 	bool ipv4, ipv6;
 	u8 ptype;
 	u64 qword;
 
+	skb->ip_summed = CHECKSUM_NONE;
+
 	qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
 	ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT;
+
+	parsed = libie_parse_rx_ptype(ptype);
+	if (!libie_has_rx_checksum(vsi->netdev, parsed))
+		return;
+
 	rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
 		   I40E_RXD_QW1_ERROR_SHIFT;
 	rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
 		    I40E_RXD_QW1_STATUS_SHIFT;
-	decoded = decode_rx_desc_ptype(ptype);
-
-	skb->ip_summed = CHECKSUM_NONE;
-
-	skb_checksum_none_assert(skb);
-
-	/* Rx csum enabled and ip headers found? */
-	if (!(vsi->netdev->features & NETIF_F_RXCSUM))
-		return;
 
 	/* did the hardware decode the packet and checksum? */
 	if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
 		return;
 
-	/* both known and outer_ip must be set for the below code to work */
-	if (!(decoded.known && decoded.outer_ip))
-		return;
-
-	ipv4 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) &&
-	       (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4);
-	ipv6 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) &&
-	       (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6);
+	ipv4 = parsed.outer_ip == LIBIE_RX_PTYPE_OUTER_IPV4;
+	ipv6 = parsed.outer_ip == LIBIE_RX_PTYPE_OUTER_IPV6;
 
 	if (ipv4 &&
 	    (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) |
@@ -1819,49 +1812,16 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
 	 * we need to bump the checksum level by 1 to reflect the fact that
 	 * we are indicating we validated the inner checksum.
 	 */
-	if (decoded.tunnel_type >= I40E_RX_PTYPE_TUNNEL_IP_GRENAT)
+	if (parsed.tunnel_type >= LIBIE_RX_PTYPE_TUNNEL_IP_GRENAT)
 		skb->csum_level = 1;
 
-	/* Only report checksum unnecessary for TCP, UDP, or SCTP */
-	switch (decoded.inner_prot) {
-	case I40E_RX_PTYPE_INNER_PROT_TCP:
-	case I40E_RX_PTYPE_INNER_PROT_UDP:
-	case I40E_RX_PTYPE_INNER_PROT_SCTP:
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-		fallthrough;
-	default:
-		break;
-	}
-
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
 	return;
 
 checksum_fail:
 	vsi->back->hw_csum_rx_error++;
 }
 
-/**
- * i40e_ptype_to_htype - get a hash type
- * @ptype: the ptype value from the descriptor
- *
- * Returns a hash type to be used by skb_set_hash
- **/
-static inline int i40e_ptype_to_htype(u8 ptype)
-{
-	struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
-
-	if (!decoded.known)
-		return PKT_HASH_TYPE_NONE;
-
-	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
-	    decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4)
-		return PKT_HASH_TYPE_L4;
-	else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
-		 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3)
-		return PKT_HASH_TYPE_L3;
-	else
-		return PKT_HASH_TYPE_L2;
-}
-
 /**
  * i40e_rx_hash - set the hash value in the skb
  * @ring: descriptor ring
@@ -1874,17 +1834,19 @@ static inline void i40e_rx_hash(struct i40e_ring *ring,
 				struct sk_buff *skb,
 				u8 rx_ptype)
 {
+	struct libie_rx_ptype_parsed parsed;
 	u32 hash;
 	const __le64 rss_mask =
 		cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH <<
 			    I40E_RX_DESC_STATUS_FLTSTAT_SHIFT);
 
-	if (!(ring->netdev->features & NETIF_F_RXHASH))
+	parsed = libie_parse_rx_ptype(rx_ptype);
+	if (!libie_has_rx_hash(ring->netdev, parsed))
 		return;
 
 	if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) {
 		hash = le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss);
-		skb_set_hash(skb, hash, i40e_ptype_to_htype(rx_ptype));
+		libie_skb_set_hash(skb, hash, parsed);
 	}
 }
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index 388c3d36d96a55..05b8510f99a930 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -773,94 +773,6 @@ enum i40e_rx_desc_error_l3l4e_fcoe_masks {
 #define I40E_RXD_QW1_PTYPE_SHIFT	30
 #define I40E_RXD_QW1_PTYPE_MASK		(0xFFULL << I40E_RXD_QW1_PTYPE_SHIFT)
 
-/* Packet type non-ip values */
-enum i40e_rx_l2_ptype {
-	I40E_RX_PTYPE_L2_RESERVED			= 0,
-	I40E_RX_PTYPE_L2_MAC_PAY2			= 1,
-	I40E_RX_PTYPE_L2_TIMESYNC_PAY2			= 2,
-	I40E_RX_PTYPE_L2_FIP_PAY2			= 3,
-	I40E_RX_PTYPE_L2_OUI_PAY2			= 4,
-	I40E_RX_PTYPE_L2_MACCNTRL_PAY2			= 5,
-	I40E_RX_PTYPE_L2_LLDP_PAY2			= 6,
-	I40E_RX_PTYPE_L2_ECP_PAY2			= 7,
-	I40E_RX_PTYPE_L2_EVB_PAY2			= 8,
-	I40E_RX_PTYPE_L2_QCN_PAY2			= 9,
-	I40E_RX_PTYPE_L2_EAPOL_PAY2			= 10,
-	I40E_RX_PTYPE_L2_ARP				= 11,
-	I40E_RX_PTYPE_L2_FCOE_PAY3			= 12,
-	I40E_RX_PTYPE_L2_FCOE_FCDATA_PAY3		= 13,
-	I40E_RX_PTYPE_L2_FCOE_FCRDY_PAY3		= 14,
-	I40E_RX_PTYPE_L2_FCOE_FCRSP_PAY3		= 15,
-	I40E_RX_PTYPE_L2_FCOE_FCOTHER_PA		= 16,
-	I40E_RX_PTYPE_L2_FCOE_VFT_PAY3			= 17,
-	I40E_RX_PTYPE_L2_FCOE_VFT_FCDATA		= 18,
-	I40E_RX_PTYPE_L2_FCOE_VFT_FCRDY			= 19,
-	I40E_RX_PTYPE_L2_FCOE_VFT_FCRSP			= 20,
-	I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER		= 21,
-	I40E_RX_PTYPE_GRENAT4_MAC_PAY3			= 58,
-	I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4	= 87,
-	I40E_RX_PTYPE_GRENAT6_MAC_PAY3			= 124,
-	I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4	= 153
-};
-
-struct i40e_rx_ptype_decoded {
-	u32 known:1;
-	u32 outer_ip:1;
-	u32 outer_ip_ver:1;
-	u32 outer_frag:1;
-	u32 tunnel_type:3;
-	u32 tunnel_end_prot:2;
-	u32 tunnel_end_frag:1;
-	u32 inner_prot:4;
-	u32 payload_layer:3;
-};
-
-enum i40e_rx_ptype_outer_ip {
-	I40E_RX_PTYPE_OUTER_L2	= 0,
-	I40E_RX_PTYPE_OUTER_IP	= 1
-};
-
-enum i40e_rx_ptype_outer_ip_ver {
-	I40E_RX_PTYPE_OUTER_NONE	= 0,
-	I40E_RX_PTYPE_OUTER_IPV4	= 0,
-	I40E_RX_PTYPE_OUTER_IPV6	= 1
-};
-
-enum i40e_rx_ptype_outer_fragmented {
-	I40E_RX_PTYPE_NOT_FRAG	= 0,
-	I40E_RX_PTYPE_FRAG	= 1
-};
-
-enum i40e_rx_ptype_tunnel_type {
-	I40E_RX_PTYPE_TUNNEL_NONE		= 0,
-	I40E_RX_PTYPE_TUNNEL_IP_IP		= 1,
-	I40E_RX_PTYPE_TUNNEL_IP_GRENAT		= 2,
-	I40E_RX_PTYPE_TUNNEL_IP_GRENAT_MAC	= 3,
-	I40E_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN	= 4,
-};
-
-enum i40e_rx_ptype_tunnel_end_prot {
-	I40E_RX_PTYPE_TUNNEL_END_NONE	= 0,
-	I40E_RX_PTYPE_TUNNEL_END_IPV4	= 1,
-	I40E_RX_PTYPE_TUNNEL_END_IPV6	= 2,
-};
-
-enum i40e_rx_ptype_inner_prot {
-	I40E_RX_PTYPE_INNER_PROT_NONE		= 0,
-	I40E_RX_PTYPE_INNER_PROT_UDP		= 1,
-	I40E_RX_PTYPE_INNER_PROT_TCP		= 2,
-	I40E_RX_PTYPE_INNER_PROT_SCTP		= 3,
-	I40E_RX_PTYPE_INNER_PROT_ICMP		= 4,
-	I40E_RX_PTYPE_INNER_PROT_TIMESYNC	= 5
-};
-
-enum i40e_rx_ptype_payload_layer {
-	I40E_RX_PTYPE_PAYLOAD_LAYER_NONE	= 0,
-	I40E_RX_PTYPE_PAYLOAD_LAYER_PAY2	= 1,
-	I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3	= 2,
-	I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4	= 3,
-};
-
 #define I40E_RXD_QW1_LENGTH_PBUF_SHIFT	38
 #define I40E_RXD_QW1_LENGTH_PBUF_MASK	(0x3FFFULL << \
 					 I40E_RXD_QW1_LENGTH_PBUF_SHIFT)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 8a4587585acde7..ee2a1e682a1c9b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -612,6 +612,9 @@ static int i40e_config_vsi_tx_queue(struct i40e_vf *vf, u16 vsi_id,
 	u32 qtx_ctl;
 	int ret = 0;
 
+	if (info->ring_len == 0)
+		return 0;
+
 	if (!i40e_vc_isvalid_vsi_id(vf, info->vsi_id)) {
 		ret = -ENOENT;
 		goto error_context;
@@ -688,6 +691,9 @@ static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id,
 	struct i40e_hmc_obj_rxq rx_ctx;
 	int ret = 0;
 
+	if (info->ring_len == 0)
+		return 0;
+
 	/* clear the context structure first */
 	memset(&rx_ctx, 0, sizeof(struct i40e_hmc_obj_rxq));
 
diff --git a/drivers/net/ethernet/intel/iavf/Makefile b/drivers/net/ethernet/intel/iavf/Makefile
index 9c3e45c54d0133..19eb29005e7a06 100644
--- a/drivers/net/ethernet/intel/iavf/Makefile
+++ b/drivers/net/ethernet/intel/iavf/Makefile
@@ -13,4 +13,5 @@ obj-$(CONFIG_IAVF) += iavf.o
 
 iavf-objs := iavf_main.o iavf_ethtool.o iavf_virtchnl.o iavf_fdir.o \
 	     iavf_adv_rss.o \
-	     iavf_txrx.o iavf_common.o iavf_adminq.o iavf_client.o
+	     iavf_txrx.o iavf_common.o iavf_adminq.o iavf_client.o \
+	     iavf_xsk.o
diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
index 2cdce251472c08..a91da041b43fa6 100644
--- a/drivers/net/ethernet/intel/iavf/iavf.h
+++ b/drivers/net/ethernet/intel/iavf/iavf.h
@@ -4,6 +4,7 @@
 #ifndef _IAVF_H_
 #define _IAVF_H_
 
+#include <linux/bpf.h>
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/netdevice.h>
@@ -27,13 +28,17 @@
 #include <linux/etherdevice.h>
 #include <linux/socket.h>
 #include <linux/jiffies.h>
+#include <linux/filter.h>
 #include <net/ip6_checksum.h>
 #include <net/pkt_cls.h>
 #include <net/pkt_sched.h>
 #include <net/udp.h>
 #include <net/tc_act/tc_gact.h>
 #include <net/tc_act/tc_mirred.h>
+#include <net/xdp.h>
+#include <net/xdp_sock_drv.h>
 
+#include "iavf_xsk.h"
 #include "iavf_type.h"
 #include <linux/avf/virtchnl.h>
 #include "iavf_txrx.h"
@@ -83,10 +88,6 @@ struct iavf_vsi {
 
 #define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
 
-#define IAVF_RX_DESC(R, i) (&(((union iavf_32byte_rx_desc *)((R)->desc))[i]))
-#define IAVF_TX_DESC(R, i) (&(((struct iavf_tx_desc *)((R)->desc))[i]))
-#define IAVF_TX_CTXTDESC(R, i) \
-	(&(((struct iavf_tx_context_desc *)((R)->desc))[i]))
 #define IAVF_MAX_REQ_QUEUES 16
 
 #define IAVF_HKEY_ARRAY_SIZE ((IAVF_VFQF_HKEY_MAX_INDEX + 1) * 4)
@@ -107,7 +108,8 @@ struct iavf_q_vector {
 	struct napi_struct napi;
 	struct iavf_ring_container rx;
 	struct iavf_ring_container tx;
-	u32 ring_mask;
+	u32 rx_ring_mask;
+	u32 tx_ring_mask;
 	u8 itr_countdown;	/* when 0 should adjust adaptive ITR */
 	u8 num_ringpairs;	/* total number of ring pairs in vector */
 	u16 v_idx;		/* index in the vsi->q_vector array. */
@@ -243,6 +245,8 @@ struct iavf_cloud_filter {
 	bool add;		/* filter needs to be added */
 };
 
+#define IAVF_XDP_LINK_TIMEOUT_MS 1000
+
 #define IAVF_RESET_WAIT_MS 10
 #define IAVF_RESET_WAIT_DETECTED_COUNT 500
 #define IAVF_RESET_WAIT_COMPLETE_COUNT 2000
@@ -263,11 +267,15 @@ struct iavf_adapter {
 	/* Lock to protect accesses to MAC and VLAN lists */
 	spinlock_t mac_vlan_list_lock;
 	char misc_vector_name[IFNAMSIZ + 9];
-	int num_active_queues;
-	int num_req_queues;
+	u32 num_active_queues;
+	u32 num_xdp_tx_queues;
+	u32 num_req_queues;
+	struct bpf_prog *xdp_prog;
+	unsigned long *af_xdp_zc_qps;
 
 	/* TX */
 	struct iavf_ring *tx_rings;
+	struct iavf_ring *xdp_rings;
 	u32 tx_timeout_count;
 	u32 tx_desc_count;
 
@@ -294,7 +302,7 @@ struct iavf_adapter {
 #define IAVF_FLAG_CLIENT_NEEDS_L2_PARAMS	BIT(12)
 #define IAVF_FLAG_PROMISC_ON			BIT(13)
 #define IAVF_FLAG_ALLMULTI_ON			BIT(14)
-#define IAVF_FLAG_LEGACY_RX			BIT(15)
+/* BIT(15) is free, was IAVF_FLAG_LEGACY_RX */
 #define IAVF_FLAG_REINIT_ITR_NEEDED		BIT(16)
 #define IAVF_FLAG_QUEUES_DISABLED		BIT(17)
 #define IAVF_FLAG_SETUP_NETDEV_FEATURES		BIT(18)
@@ -510,6 +518,30 @@ static inline void iavf_change_state(struct iavf_adapter *adapter,
 		iavf_state_str(adapter->state));
 }
 
+/**
+ * iavf_adapter_xdp_active - Determine if XDP program is loaded
+ * @adapter: board private structure
+ *
+ * Returns true if XDP program is loaded on a given adapter.
+ */
+static inline bool iavf_adapter_xdp_active(struct iavf_adapter *adapter)
+{
+	return !!READ_ONCE(adapter->xdp_prog);
+}
+
+static inline struct xsk_buff_pool *iavf_xsk_pool(struct iavf_ring *ring)
+{
+	struct iavf_adapter *adapter = ring->vsi->back;
+	struct iavf_vsi *vsi = ring->vsi;
+	u16 qid = ring->queue_index;
+
+	if (!iavf_adapter_xdp_active(adapter) ||
+	    !test_bit(qid, adapter->af_xdp_zc_qps))
+		return NULL;
+
+	return xsk_get_pool_from_qid(vsi->netdev, qid);
+}
+
 int iavf_up(struct iavf_adapter *adapter);
 void iavf_down(struct iavf_adapter *adapter);
 int iavf_process_config(struct iavf_adapter *adapter);
@@ -537,11 +569,17 @@ int iavf_send_vf_offload_vlan_v2_msg(struct iavf_adapter *adapter);
 void iavf_set_queue_vlan_tag_loc(struct iavf_adapter *adapter);
 u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter);
 void iavf_irq_enable(struct iavf_adapter *adapter, bool flush);
-void iavf_configure_queues(struct iavf_adapter *adapter);
+int iavf_configure_selected_queues(struct iavf_adapter *adapter, u32 qp_mask,
+				   bool wait);
+int iavf_configure_queues(struct iavf_adapter *adapter, bool wait);
 void iavf_deconfigure_queues(struct iavf_adapter *adapter);
-void iavf_enable_queues(struct iavf_adapter *adapter);
-void iavf_disable_queues(struct iavf_adapter *adapter);
-void iavf_map_queues(struct iavf_adapter *adapter);
+int iavf_enable_queues(struct iavf_adapter *adapter, bool wait);
+int iavf_disable_queues(struct iavf_adapter *adapter, bool wait);
+int iavf_enable_selected_queues(struct iavf_adapter *adapter, u32 rx_queues,
+				u32 tx_queues, bool wait);
+int iavf_disable_selected_queues(struct iavf_adapter *adapter, u32 rx_queues,
+				 u32 tx_queues, bool wait);
+int iavf_map_queues(struct iavf_adapter *adapter, bool wait);
 int iavf_request_queues(struct iavf_adapter *adapter, int num);
 void iavf_add_ether_addrs(struct iavf_adapter *adapter);
 void iavf_del_ether_addrs(struct iavf_adapter *adapter);
@@ -556,9 +594,14 @@ void iavf_set_rss_key(struct iavf_adapter *adapter);
 void iavf_set_rss_lut(struct iavf_adapter *adapter);
 void iavf_enable_vlan_stripping(struct iavf_adapter *adapter);
 void iavf_disable_vlan_stripping(struct iavf_adapter *adapter);
+int iavf_poll_for_link_status(struct iavf_adapter *adapter, unsigned int msecs);
 void iavf_virtchnl_completion(struct iavf_adapter *adapter,
 			      enum virtchnl_ops v_opcode,
 			      enum iavf_status v_retval, u8 *msg, u16 msglen);
+int iavf_process_pending_pf_msg(struct iavf_adapter *adapter,
+				unsigned int timeout_msecs);
+void iavf_configure_rx_ring(struct iavf_adapter *adapter,
+			    struct iavf_ring *rx_ring);
 int iavf_config_rss(struct iavf_adapter *adapter);
 int iavf_lan_add_device(struct iavf_adapter *adapter);
 int iavf_lan_del_device(struct iavf_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_common.c b/drivers/net/ethernet/intel/iavf/iavf_common.c
index dd11dbbd5551a2..ba6c9f154d189a 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_common.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_common.c
@@ -499,259 +499,6 @@ enum iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 vsi_id,
 	return iavf_aq_get_set_rss_key(hw, vsi_id, key, true);
 }
 
-/* The iavf_ptype_lookup table is used to convert from the 8-bit ptype in the
- * hardware to a bit-field that can be used by SW to more easily determine the
- * packet type.
- *
- * Macros are used to shorten the table lines and make this table human
- * readable.
- *
- * We store the PTYPE in the top byte of the bit field - this is just so that
- * we can check that the table doesn't have a row missing, as the index into
- * the table should be the PTYPE.
- *
- * Typical work flow:
- *
- * IF NOT iavf_ptype_lookup[ptype].known
- * THEN
- *      Packet is unknown
- * ELSE IF iavf_ptype_lookup[ptype].outer_ip == IAVF_RX_PTYPE_OUTER_IP
- *      Use the rest of the fields to look at the tunnels, inner protocols, etc
- * ELSE
- *      Use the enum iavf_rx_l2_ptype to decode the packet type
- * ENDIF
- */
-
-/* macro to make the table lines short, use explicit indexing with [PTYPE] */
-#define IAVF_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\
-	[PTYPE] = { \
-		1, \
-		IAVF_RX_PTYPE_OUTER_##OUTER_IP, \
-		IAVF_RX_PTYPE_OUTER_##OUTER_IP_VER, \
-		IAVF_RX_PTYPE_##OUTER_FRAG, \
-		IAVF_RX_PTYPE_TUNNEL_##T, \
-		IAVF_RX_PTYPE_TUNNEL_END_##TE, \
-		IAVF_RX_PTYPE_##TEF, \
-		IAVF_RX_PTYPE_INNER_PROT_##I, \
-		IAVF_RX_PTYPE_PAYLOAD_LAYER_##PL }
-
-#define IAVF_PTT_UNUSED_ENTRY(PTYPE) [PTYPE] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }
-
-/* shorter macros makes the table fit but are terse */
-#define IAVF_RX_PTYPE_NOF		IAVF_RX_PTYPE_NOT_FRAG
-#define IAVF_RX_PTYPE_FRG		IAVF_RX_PTYPE_FRAG
-#define IAVF_RX_PTYPE_INNER_PROT_TS	IAVF_RX_PTYPE_INNER_PROT_TIMESYNC
-
-/* Lookup table mapping the 8-bit HW PTYPE to the bit field for decoding */
-struct iavf_rx_ptype_decoded iavf_ptype_lookup[BIT(8)] = {
-	/* L2 Packet types */
-	IAVF_PTT_UNUSED_ENTRY(0),
-	IAVF_PTT(1,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
-	IAVF_PTT(2,  L2, NONE, NOF, NONE, NONE, NOF, TS,   PAY2),
-	IAVF_PTT(3,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
-	IAVF_PTT_UNUSED_ENTRY(4),
-	IAVF_PTT_UNUSED_ENTRY(5),
-	IAVF_PTT(6,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
-	IAVF_PTT(7,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
-	IAVF_PTT_UNUSED_ENTRY(8),
-	IAVF_PTT_UNUSED_ENTRY(9),
-	IAVF_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
-	IAVF_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
-	IAVF_PTT(12, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-	IAVF_PTT(13, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-	IAVF_PTT(14, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-	IAVF_PTT(15, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-	IAVF_PTT(16, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-	IAVF_PTT(17, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-	IAVF_PTT(18, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-	IAVF_PTT(19, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-	IAVF_PTT(20, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-	IAVF_PTT(21, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-
-	/* Non Tunneled IPv4 */
-	IAVF_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3),
-	IAVF_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3),
-	IAVF_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP,  PAY4),
-	IAVF_PTT_UNUSED_ENTRY(25),
-	IAVF_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP,  PAY4),
-	IAVF_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4),
-	IAVF_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4),
-
-	/* IPv4 --> IPv4 */
-	IAVF_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
-	IAVF_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
-	IAVF_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP,  PAY4),
-	IAVF_PTT_UNUSED_ENTRY(32),
-	IAVF_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP,  PAY4),
-	IAVF_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
-	IAVF_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv4 --> IPv6 */
-	IAVF_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
-	IAVF_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
-	IAVF_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP,  PAY4),
-	IAVF_PTT_UNUSED_ENTRY(39),
-	IAVF_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP,  PAY4),
-	IAVF_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
-	IAVF_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
-
-	/* IPv4 --> GRE/NAT */
-	IAVF_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
-
-	/* IPv4 --> GRE/NAT --> IPv4 */
-	IAVF_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
-	IAVF_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
-	IAVF_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP,  PAY4),
-	IAVF_PTT_UNUSED_ENTRY(47),
-	IAVF_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP,  PAY4),
-	IAVF_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
-	IAVF_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv4 --> GRE/NAT --> IPv6 */
-	IAVF_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
-	IAVF_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
-	IAVF_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP,  PAY4),
-	IAVF_PTT_UNUSED_ENTRY(54),
-	IAVF_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP,  PAY4),
-	IAVF_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
-	IAVF_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
-
-	/* IPv4 --> GRE/NAT --> MAC */
-	IAVF_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
-
-	/* IPv4 --> GRE/NAT --> MAC --> IPv4 */
-	IAVF_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
-	IAVF_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
-	IAVF_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP,  PAY4),
-	IAVF_PTT_UNUSED_ENTRY(62),
-	IAVF_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP,  PAY4),
-	IAVF_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
-	IAVF_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv4 --> GRE/NAT -> MAC --> IPv6 */
-	IAVF_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
-	IAVF_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
-	IAVF_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP,  PAY4),
-	IAVF_PTT_UNUSED_ENTRY(69),
-	IAVF_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP,  PAY4),
-	IAVF_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
-	IAVF_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
-
-	/* IPv4 --> GRE/NAT --> MAC/VLAN */
-	IAVF_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
-
-	/* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */
-	IAVF_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
-	IAVF_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
-	IAVF_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP,  PAY4),
-	IAVF_PTT_UNUSED_ENTRY(77),
-	IAVF_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP,  PAY4),
-	IAVF_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
-	IAVF_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */
-	IAVF_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
-	IAVF_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
-	IAVF_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP,  PAY4),
-	IAVF_PTT_UNUSED_ENTRY(84),
-	IAVF_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP,  PAY4),
-	IAVF_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
-	IAVF_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
-
-	/* Non Tunneled IPv6 */
-	IAVF_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3),
-	IAVF_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3),
-	IAVF_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP,  PAY4),
-	IAVF_PTT_UNUSED_ENTRY(91),
-	IAVF_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP,  PAY4),
-	IAVF_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4),
-	IAVF_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4),
-
-	/* IPv6 --> IPv4 */
-	IAVF_PTT(95,  IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
-	IAVF_PTT(96,  IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
-	IAVF_PTT(97,  IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP,  PAY4),
-	IAVF_PTT_UNUSED_ENTRY(98),
-	IAVF_PTT(99,  IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP,  PAY4),
-	IAVF_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
-	IAVF_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv6 --> IPv6 */
-	IAVF_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
-	IAVF_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
-	IAVF_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP,  PAY4),
-	IAVF_PTT_UNUSED_ENTRY(105),
-	IAVF_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP,  PAY4),
-	IAVF_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
-	IAVF_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
-
-	/* IPv6 --> GRE/NAT */
-	IAVF_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
-
-	/* IPv6 --> GRE/NAT -> IPv4 */
-	IAVF_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
-	IAVF_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
-	IAVF_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP,  PAY4),
-	IAVF_PTT_UNUSED_ENTRY(113),
-	IAVF_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP,  PAY4),
-	IAVF_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
-	IAVF_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv6 --> GRE/NAT -> IPv6 */
-	IAVF_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
-	IAVF_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
-	IAVF_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP,  PAY4),
-	IAVF_PTT_UNUSED_ENTRY(120),
-	IAVF_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP,  PAY4),
-	IAVF_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
-	IAVF_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
-
-	/* IPv6 --> GRE/NAT -> MAC */
-	IAVF_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
-
-	/* IPv6 --> GRE/NAT -> MAC -> IPv4 */
-	IAVF_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
-	IAVF_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
-	IAVF_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP,  PAY4),
-	IAVF_PTT_UNUSED_ENTRY(128),
-	IAVF_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP,  PAY4),
-	IAVF_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
-	IAVF_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv6 --> GRE/NAT -> MAC -> IPv6 */
-	IAVF_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
-	IAVF_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
-	IAVF_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP,  PAY4),
-	IAVF_PTT_UNUSED_ENTRY(135),
-	IAVF_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP,  PAY4),
-	IAVF_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
-	IAVF_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
-
-	/* IPv6 --> GRE/NAT -> MAC/VLAN */
-	IAVF_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
-
-	/* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */
-	IAVF_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
-	IAVF_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
-	IAVF_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP,  PAY4),
-	IAVF_PTT_UNUSED_ENTRY(143),
-	IAVF_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP,  PAY4),
-	IAVF_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
-	IAVF_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */
-	IAVF_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
-	IAVF_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
-	IAVF_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP,  PAY4),
-	IAVF_PTT_UNUSED_ENTRY(150),
-	IAVF_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP,  PAY4),
-	IAVF_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
-	IAVF_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
-
-	/* unused entries */
-	[154 ... 255] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }
-};
-
 /**
  * iavf_aq_send_msg_to_pf
  * @hw: pointer to the hardware structure
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
index 6f171d1d85b75f..e14e5f84f6ebce 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -46,16 +46,6 @@ struct iavf_stats {
 	.stat_offset = offsetof(_type, _stat) \
 }
 
-/* Helper macro for defining some statistics related to queues */
-#define IAVF_QUEUE_STAT(_name, _stat) \
-	IAVF_STAT(struct iavf_ring, _name, _stat)
-
-/* Stats associated with a Tx or Rx ring */
-static const struct iavf_stats iavf_gstrings_queue_stats[] = {
-	IAVF_QUEUE_STAT("%s-%u.packets", stats.packets),
-	IAVF_QUEUE_STAT("%s-%u.bytes", stats.bytes),
-};
-
 /**
  * iavf_add_one_ethtool_stat - copy the stat into the supplied buffer
  * @data: location to store the stat value
@@ -141,43 +131,6 @@ __iavf_add_ethtool_stats(u64 **data, void *pointer,
 #define iavf_add_ethtool_stats(data, pointer, stats) \
 	__iavf_add_ethtool_stats(data, pointer, stats, ARRAY_SIZE(stats))
 
-/**
- * iavf_add_queue_stats - copy queue statistics into supplied buffer
- * @data: ethtool stats buffer
- * @ring: the ring to copy
- *
- * Queue statistics must be copied while protected by
- * u64_stats_fetch_begin, so we can't directly use iavf_add_ethtool_stats.
- * Assumes that queue stats are defined in iavf_gstrings_queue_stats. If the
- * ring pointer is null, zero out the queue stat values and update the data
- * pointer. Otherwise safely copy the stats from the ring into the supplied
- * buffer and update the data pointer when finished.
- *
- * This function expects to be called while under rcu_read_lock().
- **/
-static void
-iavf_add_queue_stats(u64 **data, struct iavf_ring *ring)
-{
-	const unsigned int size = ARRAY_SIZE(iavf_gstrings_queue_stats);
-	const struct iavf_stats *stats = iavf_gstrings_queue_stats;
-	unsigned int start;
-	unsigned int i;
-
-	/* To avoid invalid statistics values, ensure that we keep retrying
-	 * the copy until we get a consistent value according to
-	 * u64_stats_fetch_retry. But first, make sure our ring is
-	 * non-null before attempting to access its syncp.
-	 */
-	do {
-		start = !ring ? 0 : u64_stats_fetch_begin(&ring->syncp);
-		for (i = 0; i < size; i++)
-			iavf_add_one_ethtool_stat(&(*data)[i], ring, &stats[i]);
-	} while (ring && u64_stats_fetch_retry(&ring->syncp, start));
-
-	/* Once we successfully copy the stats in, update the data pointer */
-	*data += size;
-}
-
 /**
  * __iavf_add_stat_strings - copy stat strings into ethtool buffer
  * @p: ethtool supplied buffer
@@ -237,31 +190,6 @@ static const struct iavf_stats iavf_gstrings_stats[] = {
 
 #define IAVF_STATS_LEN	ARRAY_SIZE(iavf_gstrings_stats)
 
-#define IAVF_QUEUE_STATS_LEN	ARRAY_SIZE(iavf_gstrings_queue_stats)
-
-/* For now we have one and only one private flag and it is only defined
- * when we have support for the SKIP_CPU_SYNC DMA attribute.  Instead
- * of leaving all this code sitting around empty we will strip it unless
- * our one private flag is actually available.
- */
-struct iavf_priv_flags {
-	char flag_string[ETH_GSTRING_LEN];
-	u32 flag;
-	bool read_only;
-};
-
-#define IAVF_PRIV_FLAG(_name, _flag, _read_only) { \
-	.flag_string = _name, \
-	.flag = _flag, \
-	.read_only = _read_only, \
-}
-
-static const struct iavf_priv_flags iavf_gstrings_priv_flags[] = {
-	IAVF_PRIV_FLAG("legacy-rx", IAVF_FLAG_LEGACY_RX, 0),
-};
-
-#define IAVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(iavf_gstrings_priv_flags)
-
 /**
  * iavf_get_link_ksettings - Get Link Speed and Duplex settings
  * @netdev: network interface device structure
@@ -331,20 +259,22 @@ static int iavf_get_link_ksettings(struct net_device *netdev,
  **/
 static int iavf_get_sset_count(struct net_device *netdev, int sset)
 {
-	/* Report the maximum number queues, even if not every queue is
-	 * currently configured. Since allocation of queues is in pairs,
-	 * use netdev->real_num_tx_queues * 2. The real_num_tx_queues is set
-	 * at device creation and never changes.
-	 */
+	u32 num;
 
-	if (sset == ETH_SS_STATS)
-		return IAVF_STATS_LEN +
-			(IAVF_QUEUE_STATS_LEN * 2 *
-			 netdev->real_num_tx_queues);
-	else if (sset == ETH_SS_PRIV_FLAGS)
-		return IAVF_PRIV_FLAGS_STR_LEN;
-	else
+	switch (sset) {
+	case ETH_SS_STATS:
+		/* Per-queue */
+		num = libie_rq_stats_get_sset_count();
+		num += libie_sq_stats_get_sset_count();
+		num *= netdev->real_num_tx_queues;
+
+		/* Global */
+		num += IAVF_STATS_LEN;
+
+		return num;
+	default:
 		return -EINVAL;
+	}
 }
 
 /**
@@ -371,37 +301,19 @@ static void iavf_get_ethtool_stats(struct net_device *netdev,
 	 * it to iterate over rings' stats.
 	 */
 	for (i = 0; i < adapter->num_active_queues; i++) {
-		struct iavf_ring *ring;
+		const struct iavf_ring *ring;
 
 		/* Tx rings stats */
-		ring = &adapter->tx_rings[i];
-		iavf_add_queue_stats(&data, ring);
+		libie_sq_stats_get_data(&data, &adapter->tx_rings[i].sq_stats);
 
 		/* Rx rings stats */
 		ring = &adapter->rx_rings[i];
-		iavf_add_queue_stats(&data, ring);
+		libie_rq_stats_get_data(&data, &ring->rq_stats,
+					ring->rx_pages ? ring->pool : NULL);
 	}
 	rcu_read_unlock();
 }
 
-/**
- * iavf_get_priv_flag_strings - Get private flag strings
- * @netdev: network interface device structure
- * @data: buffer for string data
- *
- * Builds the private flags string table
- **/
-static void iavf_get_priv_flag_strings(struct net_device *netdev, u8 *data)
-{
-	unsigned int i;
-
-	for (i = 0; i < IAVF_PRIV_FLAGS_STR_LEN; i++) {
-		snprintf(data, ETH_GSTRING_LEN, "%s",
-			 iavf_gstrings_priv_flags[i].flag_string);
-		data += ETH_GSTRING_LEN;
-	}
-}
-
 /**
  * iavf_get_stat_strings - Get stat strings
  * @netdev: network interface device structure
@@ -419,10 +331,8 @@ static void iavf_get_stat_strings(struct net_device *netdev, u8 *data)
 	 * real_num_tx_queues for both Tx and Rx queues.
 	 */
 	for (i = 0; i < netdev->real_num_tx_queues; i++) {
-		iavf_add_stat_strings(&data, iavf_gstrings_queue_stats,
-				      "tx", i);
-		iavf_add_stat_strings(&data, iavf_gstrings_queue_stats,
-				      "rx", i);
+		libie_sq_stats_get_strings(&data, i);
+		libie_rq_stats_get_strings(&data, i);
 	}
 }
 
@@ -440,105 +350,11 @@ static void iavf_get_strings(struct net_device *netdev, u32 sset, u8 *data)
 	case ETH_SS_STATS:
 		iavf_get_stat_strings(netdev, data);
 		break;
-	case ETH_SS_PRIV_FLAGS:
-		iavf_get_priv_flag_strings(netdev, data);
-		break;
 	default:
 		break;
 	}
 }
 
-/**
- * iavf_get_priv_flags - report device private flags
- * @netdev: network interface device structure
- *
- * The get string set count and the string set should be matched for each
- * flag returned.  Add new strings for each flag to the iavf_gstrings_priv_flags
- * array.
- *
- * Returns a u32 bitmap of flags.
- **/
-static u32 iavf_get_priv_flags(struct net_device *netdev)
-{
-	struct iavf_adapter *adapter = netdev_priv(netdev);
-	u32 i, ret_flags = 0;
-
-	for (i = 0; i < IAVF_PRIV_FLAGS_STR_LEN; i++) {
-		const struct iavf_priv_flags *priv_flags;
-
-		priv_flags = &iavf_gstrings_priv_flags[i];
-
-		if (priv_flags->flag & adapter->flags)
-			ret_flags |= BIT(i);
-	}
-
-	return ret_flags;
-}
-
-/**
- * iavf_set_priv_flags - set private flags
- * @netdev: network interface device structure
- * @flags: bit flags to be set
- **/
-static int iavf_set_priv_flags(struct net_device *netdev, u32 flags)
-{
-	struct iavf_adapter *adapter = netdev_priv(netdev);
-	u32 orig_flags, new_flags, changed_flags;
-	u32 i;
-
-	orig_flags = READ_ONCE(adapter->flags);
-	new_flags = orig_flags;
-
-	for (i = 0; i < IAVF_PRIV_FLAGS_STR_LEN; i++) {
-		const struct iavf_priv_flags *priv_flags;
-
-		priv_flags = &iavf_gstrings_priv_flags[i];
-
-		if (flags & BIT(i))
-			new_flags |= priv_flags->flag;
-		else
-			new_flags &= ~(priv_flags->flag);
-
-		if (priv_flags->read_only &&
-		    ((orig_flags ^ new_flags) & ~BIT(i)))
-			return -EOPNOTSUPP;
-	}
-
-	/* Before we finalize any flag changes, any checks which we need to
-	 * perform to determine if the new flags will be supported should go
-	 * here...
-	 */
-
-	/* Compare and exchange the new flags into place. If we failed, that
-	 * is if cmpxchg returns anything but the old value, this means
-	 * something else must have modified the flags variable since we
-	 * copied it. We'll just punt with an error and log something in the
-	 * message buffer.
-	 */
-	if (cmpxchg(&adapter->flags, orig_flags, new_flags) != orig_flags) {
-		dev_warn(&adapter->pdev->dev,
-			 "Unable to update adapter->flags as it was modified by another thread...\n");
-		return -EAGAIN;
-	}
-
-	changed_flags = orig_flags ^ new_flags;
-
-	/* Process any additional changes needed as a result of flag changes.
-	 * The changed_flags value reflects the list of bits that were changed
-	 * in the code above.
-	 */
-
-	/* issue a reset to force legacy-rx change to take effect */
-	if (changed_flags & IAVF_FLAG_LEGACY_RX) {
-		if (netif_running(netdev)) {
-			adapter->flags |= IAVF_FLAG_RESET_NEEDED;
-			queue_work(adapter->wq, &adapter->reset_task);
-		}
-	}
-
-	return 0;
-}
-
 /**
  * iavf_get_msglevel - Get debug message level
  * @netdev: network interface device structure
@@ -584,7 +400,6 @@ static void iavf_get_drvinfo(struct net_device *netdev,
 	strscpy(drvinfo->driver, iavf_driver_name, 32);
 	strscpy(drvinfo->fw_version, "N/A", 4);
 	strscpy(drvinfo->bus_info, pci_name(adapter->pdev), 32);
-	drvinfo->n_priv_flags = IAVF_PRIV_FLAGS_STR_LEN;
 }
 
 /**
@@ -1829,6 +1644,7 @@ static int iavf_set_channels(struct net_device *netdev,
 			     struct ethtool_channels *ch)
 {
 	struct iavf_adapter *adapter = netdev_priv(netdev);
+	u32 num_allowed = adapter->vsi_res->num_queue_pairs;
 	u32 num_req = ch->combined_count;
 	int i;
 
@@ -1841,9 +1657,15 @@ static int iavf_set_channels(struct net_device *netdev,
 	/* All of these should have already been checked by ethtool before this
 	 * even gets to us, but just to be sure.
 	 */
-	if (num_req == 0 || num_req > adapter->vsi_res->num_queue_pairs)
+	if (num_req == 0 || num_req > num_allowed)
 		return -EINVAL;
 
+	if (iavf_adapter_xdp_active(adapter) && num_req * 2 > num_allowed) {
+		netdev_err(netdev, "XDP is enabled, so maximum allowed queue number is reduced to %u, %u queues where requested\n",
+			   num_allowed / 2, num_allowed);
+		return -EINVAL;
+	}
+
 	if (num_req == adapter->num_active_queues)
 		return 0;
 
@@ -1969,8 +1791,6 @@ static const struct ethtool_ops iavf_ethtool_ops = {
 	.get_strings		= iavf_get_strings,
 	.get_ethtool_stats	= iavf_get_ethtool_stats,
 	.get_sset_count		= iavf_get_sset_count,
-	.get_priv_flags		= iavf_get_priv_flags,
-	.set_priv_flags		= iavf_set_priv_flags,
 	.get_msglevel		= iavf_get_msglevel,
 	.set_msglevel		= iavf_set_msglevel,
 	.get_coalesce		= iavf_get_coalesce,
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 095201e83c9db0..59e47968ad264a 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright(c) 2013 - 2018 Intel Corporation. */
 
+#include <linux/net/intel/libie/rx.h>
+
 #include "iavf.h"
 #include "iavf_prototype.h"
 #include "iavf_client.h"
@@ -46,6 +48,7 @@ MODULE_DEVICE_TABLE(pci, iavf_pci_tbl);
 MODULE_ALIAS("i40evf");
 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
 MODULE_DESCRIPTION("Intel(R) Ethernet Adaptive Virtual Function Network Driver");
+MODULE_IMPORT_NS(LIBIE);
 MODULE_LICENSE("GPL v2");
 
 static const struct net_device_ops iavf_netdev_ops;
@@ -452,7 +455,7 @@ iavf_map_vector_to_rxq(struct iavf_adapter *adapter, int v_idx, int r_idx)
 	q_vector->rx.count++;
 	q_vector->rx.next_update = jiffies + 1;
 	q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
-	q_vector->ring_mask |= BIT(r_idx);
+	q_vector->rx_ring_mask |= BIT(r_idx);
 	wr32(hw, IAVF_VFINT_ITRN1(IAVF_RX_ITR, q_vector->reg_idx),
 	     q_vector->rx.current_itr >> 1);
 	q_vector->rx.current_itr = q_vector->rx.target_itr;
@@ -463,12 +466,15 @@ iavf_map_vector_to_rxq(struct iavf_adapter *adapter, int v_idx, int r_idx)
  * @adapter: board private structure
  * @v_idx: interrupt number
  * @t_idx: queue number
- **/
+ * @xdpq: set to true if Tx queue is XDP Tx queue
+ */
 static void
-iavf_map_vector_to_txq(struct iavf_adapter *adapter, int v_idx, int t_idx)
+iavf_map_vector_to_txq(struct iavf_adapter *adapter, int v_idx, int t_idx,
+		       bool xdpq)
 {
+	struct iavf_ring *tx_ring =  xdpq ? &adapter->xdp_rings[t_idx]
+					  : &adapter->tx_rings[t_idx];
 	struct iavf_q_vector *q_vector = &adapter->q_vectors[v_idx];
-	struct iavf_ring *tx_ring = &adapter->tx_rings[t_idx];
 	struct iavf_hw *hw = &adapter->hw;
 
 	tx_ring->q_vector = q_vector;
@@ -478,7 +484,7 @@ iavf_map_vector_to_txq(struct iavf_adapter *adapter, int v_idx, int t_idx)
 	q_vector->tx.count++;
 	q_vector->tx.next_update = jiffies + 1;
 	q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
-	q_vector->num_ringpairs++;
+	q_vector->tx_ring_mask |= BIT(tx_ring->queue_index);
 	wr32(hw, IAVF_VFINT_ITRN1(IAVF_TX_ITR, q_vector->reg_idx),
 	     q_vector->tx.target_itr >> 1);
 	q_vector->tx.current_itr = q_vector->tx.target_itr;
@@ -504,7 +510,11 @@ static void iavf_map_rings_to_vectors(struct iavf_adapter *adapter)
 
 	for (; ridx < rings_remaining; ridx++) {
 		iavf_map_vector_to_rxq(adapter, vidx, ridx);
-		iavf_map_vector_to_txq(adapter, vidx, ridx);
+		iavf_map_vector_to_txq(adapter, vidx, ridx, false);
+		if (iavf_adapter_xdp_active(adapter))
+			iavf_map_vector_to_txq(adapter, vidx, ridx, true);
+
+		adapter->q_vectors[vidx].num_ringpairs++;
 
 		/* In the case where we have more queues than vectors, continue
 		 * round-robin on vectors until all queues are mapped.
@@ -516,6 +526,54 @@ static void iavf_map_rings_to_vectors(struct iavf_adapter *adapter)
 	adapter->aq_required |= IAVF_FLAG_AQ_MAP_VECTORS;
 }
 
+/**
+ * iavf_unmap_rings_from_vectors - Clear existing mapping for queues and vectors
+ * @adapter: board private structure
+ *
+ */
+static void iavf_unmap_rings_from_vectors(struct iavf_adapter *adapter)
+{
+	struct iavf_ring *rx_ring, *tx_ring;
+	struct iavf_q_vector *q_vector;
+	int num_q_vectors, i;
+
+	num_q_vectors = adapter->num_msix_vectors - NONQ_VECS;
+	for (i = 0; i < num_q_vectors; i++) {
+		q_vector = &adapter->q_vectors[i];
+		q_vector->tx.ring = NULL;
+		q_vector->tx.count = 0;
+		q_vector->tx.next_update = 0;
+		q_vector->tx.target_itr = 0;
+		q_vector->tx.current_itr = 0;
+		q_vector->num_ringpairs = 0;
+
+		q_vector->rx.ring = NULL;
+		q_vector->rx.count = 0;
+		q_vector->rx.next_update = 0;
+		q_vector->rx.target_itr = 0;
+		q_vector->rx.current_itr = 0;
+		q_vector->rx_ring_mask = 0;
+		q_vector->tx_ring_mask = 0;
+	}
+
+	for (i = 0; i < adapter->num_active_queues; i++) {
+		rx_ring = &adapter->rx_rings[i];
+		tx_ring = &adapter->tx_rings[i];
+
+		rx_ring->q_vector = NULL;
+		rx_ring->next = NULL;
+		tx_ring->q_vector = NULL;
+		tx_ring->next = NULL;
+	}
+
+	for (i = 0; i < adapter->num_xdp_tx_queues; i++) {
+		tx_ring = &adapter->xdp_rings[i];
+
+		tx_ring->q_vector = NULL;
+		tx_ring->next = NULL;
+	}
+}
+
 /**
  * iavf_irq_affinity_notify - Callback for affinity changes
  * @notify: context as to what irq was changed
@@ -654,6 +712,7 @@ static int iavf_request_misc_irq(struct iavf_adapter *adapter)
  **/
 static void iavf_free_traffic_irqs(struct iavf_adapter *adapter)
 {
+	struct iavf_q_vector *q_vector;
 	int vector, irq_num, q_vectors;
 
 	if (!adapter->msix_entries)
@@ -662,10 +721,14 @@ static void iavf_free_traffic_irqs(struct iavf_adapter *adapter)
 	q_vectors = adapter->num_msix_vectors - NONQ_VECS;
 
 	for (vector = 0; vector < q_vectors; vector++) {
+		q_vector = &adapter->q_vectors[vector];
+		if (!q_vector->tx.ring && !q_vector->rx.ring)
+			continue;
+
 		irq_num = adapter->msix_entries[vector + NONQ_VECS].vector;
 		irq_set_affinity_notifier(irq_num, NULL);
 		irq_update_affinity_hint(irq_num, NULL);
-		free_irq(irq_num, &adapter->q_vectors[vector]);
+		free_irq(irq_num, q_vector);
 	}
 }
 
@@ -694,54 +757,69 @@ static void iavf_free_misc_irq(struct iavf_adapter *adapter)
 static void iavf_configure_tx(struct iavf_adapter *adapter)
 {
 	struct iavf_hw *hw = &adapter->hw;
-	int i;
+	int i, j;
 
-	for (i = 0; i < adapter->num_active_queues; i++)
-		adapter->tx_rings[i].tail = hw->hw_addr + IAVF_QTX_TAIL1(i);
+	for (i = 0, j = 0; i < adapter->num_active_queues; i++, j++)
+		adapter->tx_rings[i].tail = hw->hw_addr + IAVF_QTX_TAIL1(j);
+
+	for (i = 0; i < adapter->num_xdp_tx_queues; i++, j++)
+		adapter->xdp_rings[i].tail = hw->hw_addr + IAVF_QTX_TAIL1(j);
 }
 
 /**
- * iavf_configure_rx - Configure Receive Unit after Reset
+ * iavf_configure_rx_ring - Configure a single Rx ring
  * @adapter: board private structure
- *
- * Configure the Rx unit of the MAC after a reset.
- **/
-static void iavf_configure_rx(struct iavf_adapter *adapter)
+ * @rx_ring: Rx ring to be configured
+ * @rx_buf_len: buffer length that shall be used for the given Rx ring.
+ */
+void iavf_configure_rx_ring(struct iavf_adapter *adapter,
+			    struct iavf_ring *rx_ring)
 {
-	unsigned int rx_buf_len = IAVF_RXBUFFER_2048;
-	struct iavf_hw *hw = &adapter->hw;
-	int i;
+	u32 queue_idx = rx_ring->queue_index;
+	int err;
 
-	/* Legacy Rx will always default to a 2048 buffer size. */
-#if (PAGE_SIZE < 8192)
-	if (!(adapter->flags & IAVF_FLAG_LEGACY_RX)) {
-		struct net_device *netdev = adapter->netdev;
+	rx_ring->tail = adapter->hw.hw_addr + IAVF_QRX_TAIL1(queue_idx);
 
-		/* For jumbo frames on systems with 4K pages we have to use
-		 * an order 1 page, so we might as well increase the size
-		 * of our Rx buffer to make better use of the available space
-		 */
-		rx_buf_len = IAVF_RXBUFFER_3072;
+	if (!xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+		err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
+				       rx_ring->queue_index,
+				       rx_ring->q_vector->napi.napi_id);
 
-		/* We use a 1536 buffer size for configurations with
-		 * standard Ethernet mtu.  On x86 this gives us enough room
-		 * for shared info and 192 bytes of padding.
-		 */
-		if (!IAVF_2K_TOO_SMALL_WITH_PADDING &&
-		    (netdev->mtu <= ETH_DATA_LEN))
-			rx_buf_len = IAVF_RXBUFFER_1536 - NET_IP_ALIGN;
-	}
-#endif
+	if (rx_ring->flags & IAVF_TXRX_FLAGS_XSK) {
+		err = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq,
+						 MEM_TYPE_XSK_BUFF_POOL,
+						 NULL);
+		if (err)
+			netdev_err(adapter->netdev, "xdp_rxq_info_reg_mem_model returned %d\n",
+				   err);
 
-	for (i = 0; i < adapter->num_active_queues; i++) {
-		adapter->rx_rings[i].tail = hw->hw_addr + IAVF_QRX_TAIL1(i);
-		adapter->rx_rings[i].rx_buf_len = rx_buf_len;
+		xsk_pool_set_rxq_info(rx_ring->xsk_pool, &rx_ring->xdp_rxq);
 
-		if (adapter->flags & IAVF_FLAG_LEGACY_RX)
-			clear_ring_build_skb_enabled(&adapter->rx_rings[i]);
-		else
-			set_ring_build_skb_enabled(&adapter->rx_rings[i]);
+		iavf_check_alloc_rx_buffers_zc(adapter, rx_ring);
+	} else {
+		err = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq,
+						 MEM_TYPE_PAGE_POOL,
+						 rx_ring->pool);
+		if (err)
+			netdev_err(adapter->netdev, "Could not register XDP memory model for RX queue %u, error: %d\n",
+				   queue_idx, err);
+
+		iavf_alloc_rx_pages(rx_ring);
 	}
+
+	RCU_INIT_POINTER(rx_ring->xdp_prog, adapter->xdp_prog);
+}
+
+/**
+ * iavf_configure_rx - Configure Receive Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Rx unit of the MAC after a reset.
+ */
+static void iavf_configure_rx(struct iavf_adapter *adapter)
+{
+	for (u32 i = 0; i < adapter->num_active_queues; i++)
+		iavf_configure_rx_ring(adapter, &adapter->rx_rings[i]);
 }
 
 /**
@@ -1233,19 +1311,12 @@ static void iavf_napi_disable_all(struct iavf_adapter *adapter)
 static void iavf_configure(struct iavf_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
-	int i;
 
 	iavf_set_rx_mode(netdev);
 
 	iavf_configure_tx(adapter);
 	iavf_configure_rx(adapter);
 	adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_QUEUES;
-
-	for (i = 0; i < adapter->num_active_queues; i++) {
-		struct iavf_ring *ring = &adapter->rx_rings[i];
-
-		iavf_alloc_rx_buffers(ring, IAVF_DESC_UNUSED(ring));
-	}
 }
 
 /**
@@ -1378,23 +1449,47 @@ static void iavf_clear_adv_rss_conf(struct iavf_adapter *adapter)
 }
 
 /**
- * iavf_down - Shutdown the connection processing
+ * iavf_stop_traffic - Stop NAPI and interrupts before link down
  * @adapter: board private structure
- *
- * Expects to be called while holding the __IAVF_IN_CRITICAL_TASK bit lock.
- **/
-void iavf_down(struct iavf_adapter *adapter)
+ */
+void iavf_stop_traffic(struct iavf_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
 
-	if (adapter->state <= __IAVF_DOWN_PENDING)
-		return;
-
 	netif_carrier_off(netdev);
 	netif_tx_disable(netdev);
 	adapter->link_up = false;
 	iavf_napi_disable_all(adapter);
 	iavf_irq_disable(adapter);
+}
+
+/**
+ * iavf_start_traffic - Start NAPI and interrupts after link up
+ * @adapter: board private structure
+ */
+void iavf_start_traffic(struct iavf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+
+	iavf_napi_enable_all(adapter);
+	iavf_irq_enable(adapter, true);
+	adapter->link_up = true;
+	netif_tx_start_all_queues(netdev);
+	netif_carrier_on(netdev);
+}
+
+/**
+ * iavf_down - Shutdown the connection processing
+ * @adapter: board private structure
+ *
+ * Expects to be called while holding the __IAVF_IN_CRITICAL_TASK bit lock.
+ */
+void iavf_down(struct iavf_adapter *adapter)
+{
+	if (adapter->state <= __IAVF_DOWN_PENDING)
+		return;
+
+	iavf_stop_traffic(adapter);
 
 	iavf_clear_mac_vlan_filters(adapter);
 	iavf_clear_cloud_filters(adapter);
@@ -1467,6 +1562,19 @@ iavf_acquire_msix_vectors(struct iavf_adapter *adapter, int vectors)
 	return 0;
 }
 
+/**
+ * iavf_free_xdp_queues - Free memory for XDP rings
+ * @adapter: board private structure to update
+ *
+ * Free all of the memory associated with XDP queues.
+ */
+static void iavf_free_xdp_queues(struct iavf_adapter *adapter)
+{
+	adapter->num_xdp_tx_queues = 0;
+	kfree(adapter->xdp_rings);
+	adapter->xdp_rings = NULL;
+}
+
 /**
  * iavf_free_queues - Free memory for all rings
  * @adapter: board private structure to initialize
@@ -1475,13 +1583,92 @@ iavf_acquire_msix_vectors(struct iavf_adapter *adapter, int vectors)
  **/
 static void iavf_free_queues(struct iavf_adapter *adapter)
 {
-	if (!adapter->vsi_res)
-		return;
 	adapter->num_active_queues = 0;
 	kfree(adapter->tx_rings);
 	adapter->tx_rings = NULL;
 	kfree(adapter->rx_rings);
 	adapter->rx_rings = NULL;
+	iavf_free_xdp_queues(adapter);
+}
+
+/**
+ * iavf_set_rx_queue_vlan_tag_loc - set location for VLAN tag offload in Rx
+ * @adapter: board private structure
+ * @rx_ring: Rx ring where VLAN tag offload for VLAN will be set
+ *
+ * Helper function for setting VLAN tag offload location in a given Rx ring.
+ */
+static void iavf_set_rx_queue_vlan_tag_loc(struct iavf_adapter *adapter,
+					   struct iavf_ring *rx_ring)
+{
+	struct virtchnl_vlan_supported_caps *caps;
+
+	/* prevent multiple L2TAG bits being set after VFR */
+	rx_ring->flags &=
+		~(IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1 |
+		  IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2);
+
+	if (VLAN_ALLOWED(adapter)) {
+		rx_ring->flags |= IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1;
+		return;
+	}
+
+	if (!VLAN_V2_ALLOWED(adapter))
+		return;
+
+	caps = &adapter->vlan_v2_caps.offloads.stripping_support;
+
+	if ((caps->outer | caps->inner) & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1)
+		rx_ring->flags |= IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1;
+	else if ((caps->outer | caps->inner) & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2_2)
+		rx_ring->flags |= IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2;
+}
+
+/**
+ * iavf_set_tx_queue_vlan_tag_loc - set location for VLAN tag offload in Tx
+ * @adapter: board private structure
+ * @tx_ring: Tx ring where VLAN tag offload for VLAN will be set
+ *
+ * Helper function for setting VLAN tag offload location in a given Tx ring.
+ */
+static void iavf_set_tx_queue_vlan_tag_loc(struct iavf_adapter *adapter,
+					   struct iavf_ring *tx_ring)
+{
+	struct virtchnl_vlan_supported_caps *caps;
+
+	/* prevent multiple L2TAG bits being set after VFR */
+	tx_ring->flags &=
+		~(IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1 |
+		  IAVF_TXR_FLAGS_VLAN_TAG_LOC_L2TAG2);
+
+	if (VLAN_ALLOWED(adapter)) {
+		tx_ring->flags |= IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1;
+		return;
+	}
+
+	if (!VLAN_V2_ALLOWED(adapter))
+		return;
+
+	caps = &adapter->vlan_v2_caps.offloads.insertion_support;
+
+	if ((caps->outer | caps->inner) & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1)
+		tx_ring->flags |= IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1;
+	else if ((caps->outer | caps->inner) & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2)
+		tx_ring->flags |= IAVF_TXR_FLAGS_VLAN_TAG_LOC_L2TAG2;
+}
+
+/**
+ * iavf_set_xdp_queue_vlan_tag_loc - set location for VLAN tag on XDP ring
+ * @adapter: board private structure
+ *
+ * Variation of iavf_set_queue_vlan_tag_loc, which configures XDP rings only.
+ */
+static void iavf_set_xdp_queue_vlan_tag_loc(struct iavf_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_xdp_tx_queues; i++)
+		iavf_set_tx_queue_vlan_tag_loc(adapter, &adapter->xdp_rings[i]);
 }
 
 /**
@@ -1498,70 +1685,117 @@ void iavf_set_queue_vlan_tag_loc(struct iavf_adapter *adapter)
 	int i;
 
 	for (i = 0; i < adapter->num_active_queues; i++) {
-		struct iavf_ring *tx_ring = &adapter->tx_rings[i];
-		struct iavf_ring *rx_ring = &adapter->rx_rings[i];
-
-		/* prevent multiple L2TAG bits being set after VFR */
-		tx_ring->flags &=
-			~(IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1 |
-			  IAVF_TXR_FLAGS_VLAN_TAG_LOC_L2TAG2);
-		rx_ring->flags &=
-			~(IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1 |
-			  IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2);
-
-		if (VLAN_ALLOWED(adapter)) {
-			tx_ring->flags |= IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1;
-			rx_ring->flags |= IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1;
-		} else if (VLAN_V2_ALLOWED(adapter)) {
-			struct virtchnl_vlan_supported_caps *stripping_support;
-			struct virtchnl_vlan_supported_caps *insertion_support;
-
-			stripping_support =
-				&adapter->vlan_v2_caps.offloads.stripping_support;
-			insertion_support =
-				&adapter->vlan_v2_caps.offloads.insertion_support;
-
-			if (stripping_support->outer) {
-				if (stripping_support->outer &
-				    VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1)
-					rx_ring->flags |=
-						IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1;
-				else if (stripping_support->outer &
-					 VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2_2)
-					rx_ring->flags |=
-						IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2;
-			} else if (stripping_support->inner) {
-				if (stripping_support->inner &
-				    VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1)
-					rx_ring->flags |=
-						IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1;
-				else if (stripping_support->inner &
-					 VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2_2)
-					rx_ring->flags |=
-						IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2;
-			}
+		iavf_set_rx_queue_vlan_tag_loc(adapter, &adapter->rx_rings[i]);
+		iavf_set_tx_queue_vlan_tag_loc(adapter, &adapter->tx_rings[i]);
+	}
 
-			if (insertion_support->outer) {
-				if (insertion_support->outer &
-				    VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1)
-					tx_ring->flags |=
-						IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1;
-				else if (insertion_support->outer &
-					 VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2)
-					tx_ring->flags |=
-						IAVF_TXR_FLAGS_VLAN_TAG_LOC_L2TAG2;
-			} else if (insertion_support->inner) {
-				if (insertion_support->inner &
-				    VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1)
-					tx_ring->flags |=
-						IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1;
-				else if (insertion_support->inner &
-					 VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2)
-					tx_ring->flags |=
-						IAVF_TXR_FLAGS_VLAN_TAG_LOC_L2TAG2;
-			}
-		}
+	iavf_set_xdp_queue_vlan_tag_loc(adapter);
+}
+
+/**
+ * iavf_init_rx_ring - Init pointers and flags for a given Rx ring
+ * @adapter: board private structure to initialize
+ * @ring_index: index of the ring to be initialized
+ *
+ * Init all basic pointers and flags in a newly allocated Rx ring.
+ */
+static void iavf_init_rx_ring(struct iavf_adapter *adapter,
+			      int ring_index)
+{
+	struct iavf_ring *rx_ring = &adapter->rx_rings[ring_index];
+
+	rx_ring->vsi = &adapter->vsi;
+	rx_ring->queue_index = ring_index;
+	rx_ring->netdev = adapter->netdev;
+	rx_ring->dev = &adapter->pdev->dev;
+	rx_ring->count = adapter->rx_desc_count;
+	rx_ring->itr_setting = IAVF_ITR_RX_DEF;
+}
+
+/**
+ * iavf_init_tx_ring - Init pointers and flags for a given Tx ring
+ * @adapter: board private structure to initialize
+ * @ring_index: index of the ring to be initialized
+ * @xdp_ring: set to true if the ring is XDP Tx queue
+ *
+ * Init all basic pointers and flags in a newly allocated Tx ring.
+ */
+static void iavf_init_tx_ring(struct iavf_adapter *adapter,
+			      int ring_index,
+			      bool xdp_ring)
+{
+	struct iavf_ring *tx_ring = xdp_ring ? &adapter->xdp_rings[ring_index]
+					     : &adapter->tx_rings[ring_index];
+
+	tx_ring->vsi = &adapter->vsi;
+	tx_ring->queue_index = ring_index;
+	tx_ring->netdev = adapter->netdev;
+	tx_ring->dev = &adapter->pdev->dev;
+	tx_ring->count = adapter->tx_desc_count;
+	tx_ring->itr_setting = IAVF_ITR_TX_DEF;
+
+	tx_ring->flags = 0;
+
+	if (adapter->flags & IAVF_FLAG_WB_ON_ITR_CAPABLE)
+		tx_ring->flags |= IAVF_TXR_FLAGS_WB_ON_ITR;
+
+	u64_stats_init(&tx_ring->sq_stats.syncp);
+
+	if (xdp_ring) {
+		tx_ring->queue_index += adapter->num_active_queues;
+		tx_ring->flags |= IAVF_TXRX_FLAGS_XDP;
+		spin_lock_init(&tx_ring->tx_lock);
+	}
+}
+
+/**
+ * iavf_xdp_cfg_tx_sharing - Enable XDP TxQ sharing, if needed
+ * @adapter: board private structure
+ *
+ * If there is more CPUs than rings, sharing XDP TxQ allows us
+ * to handle XDP_REDIRECT from other interfaces.
+ */
+static void iavf_xdp_cfg_tx_sharing(struct iavf_adapter *adapter)
+{
+	u32 num_active_queues = adapter->num_active_queues;
+	u32 num_cpus = num_online_cpus();
+
+	if (!iavf_adapter_xdp_active(adapter) || num_active_queues >= num_cpus)
+		return;
+
+	netdev_warn(adapter->netdev,
+		    "System has %u CPUs, but only %u XDP queues can be configured, entering XDP TxQ sharing mode, performance is decreased\n",
+		    num_cpus, num_active_queues);
+	static_branch_inc(&iavf_xdp_locking_key);
+}
+
+/**
+ * iavf_alloc_xdp_queues - Allocate memory for XDP rings
+ * @adapter: board private structure to initialize
+ * @num_active_queues: number of exposed queue pairs
+ *
+ * Variation of iavf_alloc_queues(), which configures XDP queues only.
+ */
+static int iavf_alloc_xdp_queues(struct iavf_adapter *adapter, u32 num_active_queues)
+{
+	int i;
+
+	adapter->xdp_rings = kcalloc(num_active_queues,
+				     sizeof(struct iavf_ring), GFP_KERNEL);
+	if (!adapter->xdp_rings)
+		return -ENOMEM;
+
+	adapter->num_xdp_tx_queues = num_active_queues;
+
+	/* Setup extra XDP Tx queues if there are any */
+	for (i = 0; i < adapter->num_xdp_tx_queues; i++) {
+		iavf_init_tx_ring(adapter, i, true);
+		adapter->rx_rings[i].xdp_ring = &adapter->xdp_rings[i];
 	}
+
+	iavf_xdp_cfg_tx_sharing(adapter);
+
+	return 0;
 }
 
 /**
@@ -1574,7 +1808,8 @@ void iavf_set_queue_vlan_tag_loc(struct iavf_adapter *adapter)
  **/
 static int iavf_alloc_queues(struct iavf_adapter *adapter)
 {
-	int i, num_active_queues;
+	u32 num_active_queues;
+	int i;
 
 	/* If we're in reset reallocating queues we don't actually know yet for
 	 * certain the PF gave us the number of queues we asked for but we'll
@@ -1591,7 +1826,6 @@ static int iavf_alloc_queues(struct iavf_adapter *adapter)
 					  adapter->vsi_res->num_queue_pairs,
 					  (int)(num_online_cpus()));
 
-
 	adapter->tx_rings = kcalloc(num_active_queues,
 				    sizeof(struct iavf_ring), GFP_KERNEL);
 	if (!adapter->tx_rings)
@@ -1601,29 +1835,16 @@ static int iavf_alloc_queues(struct iavf_adapter *adapter)
 	if (!adapter->rx_rings)
 		goto err_out;
 
-	for (i = 0; i < num_active_queues; i++) {
-		struct iavf_ring *tx_ring;
-		struct iavf_ring *rx_ring;
-
-		tx_ring = &adapter->tx_rings[i];
-
-		tx_ring->queue_index = i;
-		tx_ring->netdev = adapter->netdev;
-		tx_ring->dev = &adapter->pdev->dev;
-		tx_ring->count = adapter->tx_desc_count;
-		tx_ring->itr_setting = IAVF_ITR_TX_DEF;
-		if (adapter->flags & IAVF_FLAG_WB_ON_ITR_CAPABLE)
-			tx_ring->flags |= IAVF_TXR_FLAGS_WB_ON_ITR;
+	adapter->num_active_queues = num_active_queues;
 
-		rx_ring = &adapter->rx_rings[i];
-		rx_ring->queue_index = i;
-		rx_ring->netdev = adapter->netdev;
-		rx_ring->dev = &adapter->pdev->dev;
-		rx_ring->count = adapter->rx_desc_count;
-		rx_ring->itr_setting = IAVF_ITR_RX_DEF;
+	for (i = 0; i < num_active_queues; i++) {
+		iavf_init_tx_ring(adapter, i, false);
+		iavf_init_rx_ring(adapter, i);
 	}
 
-	adapter->num_active_queues = num_active_queues;
+	if (iavf_adapter_xdp_active(adapter))
+		if (iavf_alloc_xdp_queues(adapter, num_active_queues))
+			goto err_out;
 
 	iavf_set_queue_vlan_tag_loc(adapter);
 
@@ -2001,12 +2222,12 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter)
 	if (adapter->aq_required & IAVF_FLAG_AQ_GET_OFFLOAD_VLAN_V2_CAPS)
 		return iavf_send_vf_offload_vlan_v2_msg(adapter);
 	if (adapter->aq_required & IAVF_FLAG_AQ_DISABLE_QUEUES) {
-		iavf_disable_queues(adapter);
+		iavf_disable_queues(adapter, false);
 		return 0;
 	}
 
 	if (adapter->aq_required & IAVF_FLAG_AQ_MAP_VECTORS) {
-		iavf_map_queues(adapter);
+		iavf_map_queues(adapter, false);
 		return 0;
 	}
 
@@ -2041,12 +2262,12 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter)
 	}
 
 	if (adapter->aq_required & IAVF_FLAG_AQ_CONFIGURE_QUEUES) {
-		iavf_configure_queues(adapter);
+		iavf_configure_queues(adapter, false);
 		return 0;
 	}
 
 	if (adapter->aq_required & IAVF_FLAG_AQ_ENABLE_QUEUES) {
-		iavf_enable_queues(adapter);
+		iavf_enable_queues(adapter, false);
 		return 0;
 	}
 
@@ -2396,7 +2617,6 @@ int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter)
 
 		return -EAGAIN;
 	}
-	adapter->num_req_queues = 0;
 	adapter->vsi.id = adapter->vsi_res->vsi_id;
 
 	adapter->vsi.back = adapter;
@@ -2590,11 +2810,10 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter)
 
 	netdev->netdev_ops = &iavf_netdev_ops;
 	iavf_set_ethtool_ops(netdev);
-	netdev->watchdog_timeo = 5 * HZ;
+	netdev->max_mtu = LIBIE_MAX_MTU;
 
-	/* MTU range: 68 - 9710 */
-	netdev->min_mtu = ETH_MIN_MTU;
-	netdev->max_mtu = IAVF_MAX_RXBUFFER - IAVF_PACKET_HDR_PAD;
+	netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+			       NETDEV_XDP_ACT_XSK_ZEROCOPY;
 
 	if (!is_valid_ether_addr(adapter->hw.mac.addr)) {
 		dev_info(&pdev->dev, "Invalid MAC address %pM, using random\n",
@@ -2652,6 +2871,11 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter)
 	set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
 	rtnl_unlock();
 
+	adapter->af_xdp_zc_qps = bitmap_zalloc(adapter->num_active_queues,
+					       GFP_KERNEL);
+	if (!adapter->af_xdp_zc_qps)
+		goto err_zc_qps;
+
 	iavf_misc_irq_enable(adapter);
 	wake_up(&adapter->down_waitqueue);
 
@@ -2673,6 +2897,8 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter)
 	return;
 err_mem:
 	iavf_free_rss(adapter);
+err_zc_qps:
+	bitmap_free(adapter->af_xdp_zc_qps);
 err_register:
 	iavf_free_misc_irq(adapter);
 err_sw_init:
@@ -2871,6 +3097,36 @@ static void iavf_watchdog_task(struct work_struct *work)
 				   HZ * 2);
 }
 
+/**
+ * iavf_xchg_xdp_prog - set new prog and get an old one
+ * @adapter: board private structure
+ * @prog: new XDP program
+ *
+ * Returns pointer to the old XDP program.
+ * adapter->xdp_prog is not used in packet processing, so it can be
+ * safely set kinda like a flag before resource re-configuration (reset)
+ */
+static struct bpf_prog *iavf_xchg_xdp_prog(struct iavf_adapter *adapter,
+					   struct bpf_prog *prog)
+{
+	return xchg(&adapter->xdp_prog, prog);
+}
+
+/**
+ * iavf_free_xdp_prog - Release XDP program, if present
+ * @adapter: board private structure
+ *
+ * Should be used when adapter is being removed.
+ */
+static void iavf_free_xdp_prog(struct iavf_adapter *adapter)
+{
+	struct bpf_prog *old_xdp_prog;
+
+	old_xdp_prog = iavf_xchg_xdp_prog(adapter, NULL);
+	if (old_xdp_prog)
+		bpf_prog_put(old_xdp_prog);
+}
+
 /**
  * iavf_disable_vf - disable VF
  * @adapter: board private structure
@@ -2902,6 +3158,9 @@ static void iavf_disable_vf(struct iavf_adapter *adapter)
 		iavf_free_all_rx_resources(adapter);
 	}
 
+	iavf_free_xdp_prog(adapter);
+	bitmap_free(adapter->af_xdp_zc_qps);
+
 	spin_lock_bh(&adapter->mac_vlan_list_lock);
 
 	/* Delete all of the filters */
@@ -3354,9 +3613,16 @@ void iavf_free_all_tx_resources(struct iavf_adapter *adapter)
 	if (!adapter->tx_rings)
 		return;
 
+	if (static_key_enabled(&iavf_xdp_locking_key))
+		static_branch_dec(&iavf_xdp_locking_key);
+
 	for (i = 0; i < adapter->num_active_queues; i++)
 		if (adapter->tx_rings[i].desc)
 			iavf_free_tx_resources(&adapter->tx_rings[i]);
+
+	for (i = 0; i < adapter->num_xdp_tx_queues; i++)
+		if (adapter->xdp_rings[i].desc)
+			iavf_free_tx_resources(&adapter->xdp_rings[i]);
 }
 
 /**
@@ -3368,14 +3634,16 @@ void iavf_free_all_tx_resources(struct iavf_adapter *adapter)
  * callers duty to clean those orphaned rings.
  *
  * Return 0 on success, negative on failure
- **/
+ */
 static int iavf_setup_all_tx_resources(struct iavf_adapter *adapter)
 {
+	struct iavf_ring *ring;
 	int i, err = 0;
 
 	for (i = 0; i < adapter->num_active_queues; i++) {
-		adapter->tx_rings[i].count = adapter->tx_desc_count;
-		err = iavf_setup_tx_descriptors(&adapter->tx_rings[i]);
+		ring = &adapter->tx_rings[i];
+		ring->count = adapter->tx_desc_count;
+		err = iavf_setup_tx_descriptors(ring);
 		if (!err)
 			continue;
 		dev_err(&adapter->pdev->dev,
@@ -3383,6 +3651,17 @@ static int iavf_setup_all_tx_resources(struct iavf_adapter *adapter)
 		break;
 	}
 
+	for (i = 0; i < adapter->num_xdp_tx_queues; i++) {
+		ring = &adapter->xdp_rings[i];
+		ring->count = adapter->tx_desc_count;
+		err = iavf_setup_tx_descriptors(ring);
+		if (!err)
+			continue;
+		dev_err(&adapter->pdev->dev,
+			"Allocation for XDP Queue %u failed\n", i);
+		break;
+	}
+
 	return err;
 }
 
@@ -3398,10 +3677,13 @@ static int iavf_setup_all_tx_resources(struct iavf_adapter *adapter)
  **/
 static int iavf_setup_all_rx_resources(struct iavf_adapter *adapter)
 {
+	struct iavf_ring *rx_ring;
 	int i, err = 0;
 
 	for (i = 0; i < adapter->num_active_queues; i++) {
-		adapter->rx_rings[i].count = adapter->rx_desc_count;
+		rx_ring = &adapter->rx_rings[i];
+		rx_ring->count = adapter->rx_desc_count;
+
 		err = iavf_setup_rx_descriptors(&adapter->rx_rings[i]);
 		if (!err)
 			continue;
@@ -4675,6 +4957,347 @@ static netdev_features_t iavf_fix_features(struct net_device *netdev,
 	return iavf_fix_netdev_vlan_features(adapter, features);
 }
 
+/**
+ * iavf_copy_xdp_prog_to_rings - update XDP prog references in rings
+ * @adapter: board private structure
+ *
+ * If program change also requires XDP resources reconfiguration,
+ * schedule a reset instead
+ */
+static void iavf_copy_xdp_prog_to_rings(const struct iavf_adapter *adapter)
+{
+	for (u32 i = 0; i < adapter->num_active_queues; i++)
+		rcu_assign_pointer(adapter->rx_rings[i].xdp_prog,
+				   adapter->xdp_prog);
+
+	/* No queue changes are needed, but running RX processing must finish */
+	synchronize_net();
+}
+
+/**
+ * iavf_assign_bpf_prog - Assign a given BPF program to adapter
+ * @adapter: board private structure
+ * @prog: BPF program to be assigned to adapter
+ *
+ * Returns 0 on success, negative on failure
+ */
+static void iavf_assign_bpf_prog(struct iavf_adapter *adapter,
+				 struct bpf_prog *prog)
+{
+	struct bpf_prog *old_prog;
+
+	old_prog = iavf_xchg_xdp_prog(adapter, prog);
+	if (old_prog)
+		bpf_prog_put(old_prog);
+}
+
+#define IAVF_XDP_LOCK_TIMEOUT_MS	5000
+
+/**
+ * iavf_close_sync - Synchronous version of 'iavf_close', dedicated to XDP setup
+ * @adapter: board private structure
+ *
+ * Caller of this function needs to lock 'adapter->crit_lock' in order
+ * to prevent race conditions with 'reset_task' and VIRTCHNL communication.
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int iavf_close_sync(struct iavf_adapter *adapter)
+{
+	int err;
+
+	iavf_stop_traffic(adapter);
+
+	err = iavf_disable_queues(adapter, true);
+	if (err) {
+		dev_err(&adapter->pdev->dev, "cannot disable queues for XDP setup, error: %d\n",
+			err);
+		goto err_virtchnl;
+	}
+
+	iavf_free_all_tx_resources(adapter);
+	iavf_free_all_rx_resources(adapter);
+
+	iavf_free_traffic_irqs(adapter);
+
+	return 0;
+
+err_virtchnl:
+	iavf_start_traffic(adapter);
+
+	return err;
+}
+
+/**
+ * iavf_open_sync - Synchronous version of 'iavf_open', dedicated to XDP setup
+ * @adapter: board private structure
+ *
+ * Caller of this function needs to lock 'adapter->crit_lock' in order
+ * to prevent race conditions with 'reset_task' and VIRTCHNL communication.
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int iavf_open_sync(struct iavf_adapter *adapter)
+{
+	int err, ret;
+
+	err = iavf_setup_all_tx_resources(adapter);
+	if (err) {
+		dev_err(&adapter->pdev->dev,
+			"cannot setup Tx resources, error: %d\n", err);
+		goto err_setup_tx_resources;
+	}
+
+	err = iavf_setup_all_rx_resources(adapter);
+	if (err) {
+		dev_err(&adapter->pdev->dev,
+			"cannot setup Rx resources, error: %d\n", err);
+		goto err_setup_rx_resources;
+	}
+
+	err = iavf_request_traffic_irqs(adapter, adapter->netdev->name);
+	if (err) {
+		dev_err(&adapter->pdev->dev,
+			"cannot request interrupts, error: %d\n", err);
+		goto err_request_irq;
+	}
+
+	iavf_configure_tx(adapter);
+	iavf_configure_rx(adapter);
+
+	err = iavf_configure_queues(adapter, true);
+	if (err) {
+		dev_err(&adapter->pdev->dev,
+			"cannot configure queues in PF, error: %d\n", err);
+		goto err_virtchnl_req;
+	}
+
+	err = iavf_map_queues(adapter, true);
+	if (err) {
+		dev_err(&adapter->pdev->dev,
+			"cannot map queues to vectors in PF, error: %d\n", err);
+		goto err_virtchnl_req;
+	}
+
+	err = iavf_enable_queues(adapter, true);
+	if (err) {
+		dev_err(&adapter->pdev->dev,
+			"cannot enable queues in PF, error: %d\n", err);
+		goto err_virtchnl_req;
+	}
+
+	ret = iavf_poll_for_link_status(adapter, IAVF_XDP_LINK_TIMEOUT_MS);
+	if (ret < 0) {
+		err = ret;
+		dev_err(&adapter->pdev->dev,
+			"cannot bring the link up, error: %d\n", err);
+		goto err_wrong_link_status;
+	} else if (!ret) {
+		err = -EBUSY;
+		dev_err(&adapter->pdev->dev,
+			"pf returned link down status, error: %d\n", err);
+		goto err_wrong_link_status;
+	}
+
+	iavf_start_traffic(adapter);
+
+	return 0;
+
+err_wrong_link_status:
+	iavf_close_sync(adapter);
+err_virtchnl_req:
+err_request_irq:
+	iavf_free_traffic_irqs(adapter);
+err_setup_rx_resources:
+	iavf_free_all_rx_resources(adapter);
+err_setup_tx_resources:
+	iavf_free_all_tx_resources(adapter);
+
+	return err;
+}
+
+/**
+ * iavf_destroy_xdp_rings - remove XDP program from adapter and release
+ *			    XDP rings related to that program.
+ * @adapter: board private structure
+ */
+static void iavf_destroy_xdp_rings(struct iavf_adapter *adapter)
+{
+	iavf_unmap_rings_from_vectors(adapter);
+	iavf_free_xdp_queues(adapter);
+	iavf_assign_bpf_prog(adapter, NULL);
+	iavf_map_rings_to_vectors(adapter);
+}
+
+/**
+ * iavf_prepare_xdp_rings - add XDP program to adapter and setup XDP rings
+ *			    to handle that program.
+ * @adapter: board private structure
+ * @prog: XDP program
+ */
+static int iavf_prepare_xdp_rings(struct iavf_adapter *adapter,
+				  struct bpf_prog *prog)
+{
+	int i, err;
+
+	iavf_unmap_rings_from_vectors(adapter);
+	iavf_assign_bpf_prog(adapter, prog);
+
+	err = iavf_alloc_xdp_queues(adapter, adapter->num_active_queues);
+	if (err) {
+		dev_err(&adapter->pdev->dev,
+			"cannot allocate memory for queues, error: %d\n", err);
+		goto err_alloc_queues;
+	}
+
+	iavf_set_xdp_queue_vlan_tag_loc(adapter);
+
+	iavf_map_rings_to_vectors(adapter);
+
+	for_each_set_bit(i, adapter->af_xdp_zc_qps, adapter->num_active_queues)
+		napi_schedule(&adapter->rx_rings[i].q_vector->napi);
+
+	return 0;
+
+err_alloc_queues:
+	iavf_assign_bpf_prog(adapter, NULL);
+
+	return err;
+}
+
+/**
+ * iavf_xdp_can_create_queues - check if queue number is appropriate for XDP
+ * @adapter: board private structure
+ * @extack: netlink extended ack
+ */
+static bool iavf_xdp_can_create_queues(struct iavf_adapter *adapter,
+				       struct netlink_ext_ack *extack)
+{
+	u32 max_qp_num = adapter->vsi_res->num_queue_pairs;
+	u32 num_active_queues = adapter->num_active_queues;
+
+	if (num_active_queues * 2 <= max_qp_num)
+		return true;
+
+	netdev_warn(adapter->netdev,
+		    "Current number of queue pairs (%u) set on adapter is too high to enable XDP, please configure queue number through ethtool to be no bigger than %u",
+		    num_active_queues, max_qp_num);
+
+	NL_SET_ERR_MSG_MOD(extack,
+			   "XDP cannot be enabled due to configured queue number being too large, please check dmesg for more info");
+
+	return false;
+}
+
+/**
+ * iavf_setup_xdp - handle xdp program change
+ * @adapter: board private structure
+ * @prog: XDP program
+ * @extack: netlink extended ack
+ */
+static int iavf_setup_xdp(struct iavf_adapter *adapter, struct bpf_prog *prog,
+			  struct netlink_ext_ack *extack)
+{
+	u32 frame_size = READ_ONCE(adapter->netdev->mtu) + LIBIE_RX_LL_LEN;
+	bool needs_reconfig = !!prog != iavf_adapter_xdp_active(adapter);
+	bool was_running = netif_running(adapter->netdev);
+	int err;
+
+	if (prog && frame_size > LIBIE_RX_BUF_LEN(LIBIE_XDP_HEADROOM)) {
+		NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP");
+		return -EOPNOTSUPP;
+	}
+
+	if (needs_reconfig) {
+		if (!iavf_xdp_can_create_queues(adapter, extack)) {
+			err = -EOPNOTSUPP;
+			goto err_no_queues;
+		}
+
+		if (iavf_lock_timeout(&adapter->crit_lock,
+				      IAVF_XDP_LOCK_TIMEOUT_MS)) {
+			err = -EBUSY;
+			dev_err(&adapter->pdev->dev,
+				"failed to acquire crit_lock in %s\n",
+				__func__);
+			goto err_crit_lock;
+		}
+		err = iavf_process_pending_pf_msg(adapter,
+						  IAVF_XDP_LOCK_TIMEOUT_MS);
+		if (err)
+			goto err_pending_pf_msg;
+
+		if (was_running) {
+			err = iavf_close_sync(adapter);
+			if (err) {
+				dev_err(&adapter->pdev->dev,
+					"cannot close the interface to setup XDP, error: %d\n",
+					err);
+				goto err_close_if;
+			}
+		}
+
+		if (prog) {
+			err = iavf_prepare_xdp_rings(adapter, prog);
+			if (err) {
+				dev_err(&adapter->pdev->dev,
+					"cannot prepare rings to support XDP, error: %d\n",
+					err);
+				goto err_prepare_xdp_rings;
+			}
+		} else {
+			iavf_destroy_xdp_rings(adapter);
+		}
+
+		if (was_running) {
+			err = iavf_open_sync(adapter);
+			if (err) {
+				dev_err(&adapter->pdev->dev,
+					"cannot open the interface after XDP setup, error: %d\n",
+					err);
+				goto err_open_if;
+			}
+		}
+		mutex_unlock(&adapter->crit_lock);
+	} else {
+		iavf_assign_bpf_prog(adapter, prog);
+		iavf_copy_xdp_prog_to_rings(adapter);
+	}
+
+	return 0;
+
+err_open_if:
+err_prepare_xdp_rings:
+	iavf_destroy_xdp_rings(adapter);
+	iavf_open_sync(adapter);
+err_close_if:
+err_pending_pf_msg:
+	mutex_unlock(&adapter->crit_lock);
+err_crit_lock:
+err_no_queues:
+	return err;
+}
+
+/**
+ * iavf_xdp - XDP command handler
+ * @netdev: netdevice
+ * @xdp: XDP command
+ */
+static int iavf_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return iavf_setup_xdp(adapter, xdp->prog, xdp->extack);
+	case XDP_SETUP_XSK_POOL:
+		return iavf_xsk_pool_setup(adapter, xdp->xsk.pool,
+					   xdp->xsk.queue_id);
+	default:
+		return -EINVAL;
+	}
+}
+
 static const struct net_device_ops iavf_netdev_ops = {
 	.ndo_open		= iavf_open,
 	.ndo_stop		= iavf_close,
@@ -4690,6 +5313,9 @@ static const struct net_device_ops iavf_netdev_ops = {
 	.ndo_fix_features	= iavf_fix_features,
 	.ndo_set_features	= iavf_set_features,
 	.ndo_setup_tc		= iavf_setup_tc,
+	.ndo_bpf		= iavf_xdp,
+	.ndo_xdp_xmit		= iavf_xdp_xmit,
+	.ndo_xsk_wakeup		= iavf_xsk_wakeup
 };
 
 /**
@@ -5124,6 +5750,8 @@ static void iavf_remove(struct pci_dev *pdev)
 	iavf_free_all_rx_resources(adapter);
 	iavf_free_misc_irq(adapter);
 
+	iavf_free_xdp_prog(adapter);
+
 	iavf_reset_interrupt_capability(adapter);
 	iavf_free_q_vectors(adapter);
 
diff --git a/drivers/net/ethernet/intel/iavf/iavf_prototype.h b/drivers/net/ethernet/intel/iavf/iavf_prototype.h
index edebfbbcffdc2e..c2e5dbc0a75a35 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_prototype.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_prototype.h
@@ -51,13 +51,6 @@ enum iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 seid,
 
 enum iavf_status iavf_set_mac_type(struct iavf_hw *hw);
 
-extern struct iavf_rx_ptype_decoded iavf_ptype_lookup[];
-
-static inline struct iavf_rx_ptype_decoded decode_rx_desc_ptype(u8 ptype)
-{
-	return iavf_ptype_lookup[ptype];
-}
-
 void iavf_vf_parse_hw_config(struct iavf_hw *hw,
 			     struct virtchnl_vf_resource *msg);
 enum iavf_status iavf_vf_reset(struct iavf_hw *hw);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_trace.h b/drivers/net/ethernet/intel/iavf/iavf_trace.h
index 82fda6f5abf043..383a5375392a20 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_trace.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_trace.h
@@ -145,6 +145,14 @@ DEFINE_EVENT(
 
 	TP_ARGS(ring, desc, skb));
 
+DEFINE_EVENT(
+	iavf_rx_template, iavf_clean_rx_irq_zc,
+	TP_PROTO(struct iavf_ring *ring,
+		 union iavf_32byte_rx_desc *desc,
+		 struct sk_buff *skb),
+
+	TP_ARGS(ring, desc, skb));
+
 DEFINE_EVENT(
 	iavf_rx_template, iavf_clean_rx_irq_rx,
 	TP_PROTO(struct iavf_ring *ring,
@@ -153,6 +161,14 @@ DEFINE_EVENT(
 
 	TP_ARGS(ring, desc, skb));
 
+DEFINE_EVENT(
+	iavf_rx_template, iavf_clean_rx_irq_zc_rx,
+	TP_PROTO(struct iavf_ring *ring,
+		 union iavf_32byte_rx_desc *desc,
+		 struct sk_buff *skb),
+
+	TP_ARGS(ring, desc, skb));
+
 DECLARE_EVENT_CLASS(
 	iavf_xmit_template,
 
diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
index e989feda133c1e..cca9907bee5a1a 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
@@ -1,21 +1,20 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright(c) 2013 - 2018 Intel Corporation. */
 
+#include <linux/bitfield.h>
+#include <linux/bpf_trace.h>
+#include <linux/filter.h>
+#include <linux/net/intel/libie/rx.h>
 #include <linux/prefetch.h>
 
 #include "iavf.h"
 #include "iavf_trace.h"
 #include "iavf_prototype.h"
 
-static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
-				u32 td_tag)
-{
-	return cpu_to_le64(IAVF_TX_DESC_DTYPE_DATA |
-			   ((u64)td_cmd  << IAVF_TXD_QW1_CMD_SHIFT) |
-			   ((u64)td_offset << IAVF_TXD_QW1_OFFSET_SHIFT) |
-			   ((u64)size  << IAVF_TXD_QW1_TX_BUF_SZ_SHIFT) |
-			   ((u64)td_tag  << IAVF_TXD_QW1_L2TAG1_SHIFT));
-}
+DEFINE_STATIC_KEY_FALSE(iavf_xdp_locking_key);
+
+static bool iavf_xdp_xmit_back(const struct xdp_buff *buff,
+			       struct iavf_ring *xdp_ring);
 
 #define IAVF_TXD_CMD (IAVF_TX_DESC_CMD_EOP | IAVF_TX_DESC_CMD_RS)
 
@@ -27,22 +26,14 @@ static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
 static void iavf_unmap_and_free_tx_resource(struct iavf_ring *ring,
 					    struct iavf_tx_buffer *tx_buffer)
 {
-	if (tx_buffer->skb) {
-		if (tx_buffer->tx_flags & IAVF_TX_FLAGS_FD_SB)
-			kfree(tx_buffer->raw_buf);
-		else
-			dev_kfree_skb_any(tx_buffer->skb);
-		if (dma_unmap_len(tx_buffer, len))
-			dma_unmap_single(ring->dev,
-					 dma_unmap_addr(tx_buffer, dma),
-					 dma_unmap_len(tx_buffer, len),
-					 DMA_TO_DEVICE);
-	} else if (dma_unmap_len(tx_buffer, len)) {
+	if (tx_buffer->skb)
+		dev_kfree_skb_any(tx_buffer->skb);
+
+	if (dma_unmap_len(tx_buffer, len))
 		dma_unmap_page(ring->dev,
 			       dma_unmap_addr(tx_buffer, dma),
 			       dma_unmap_len(tx_buffer, len),
 			       DMA_TO_DEVICE);
-	}
 
 	tx_buffer->next_to_watch = NULL;
 	tx_buffer->skb = NULL;
@@ -50,6 +41,51 @@ static void iavf_unmap_and_free_tx_resource(struct iavf_ring *ring,
 	/* tx_buffer must be completely set up in the transmit path */
 }
 
+/**
+ * iavf_free_xdp_resource - Correctly free XDP TX buffer
+ * @ring:	XDP ring
+ * @tx_buffer:	the buffer being released
+ */
+static void iavf_free_xdp_resource(struct iavf_ring *ring,
+				   struct iavf_tx_buffer *tx_buffer)
+{
+	struct page *page;
+	u32 put_size;
+
+	switch (tx_buffer->xdp_type) {
+	case IAVF_XDP_BUFFER_TX:
+		page = tx_buffer->page;
+		put_size = dma_unmap_len(tx_buffer, len);
+		page_pool_put_page(page->pp, page, put_size, true);
+		break;
+	case IAVF_XDP_BUFFER_FRAME:
+		dma_unmap_page(ring->dev,
+			       dma_unmap_addr(tx_buffer, dma),
+			       dma_unmap_len(tx_buffer, len),
+			       DMA_TO_DEVICE);
+		xdp_return_frame(tx_buffer->xdpf);
+		break;
+	}
+
+	tx_buffer->xdp_type = IAVF_XDP_BUFFER_NONE;
+}
+
+/**
+ * iavf_release_tx_resources - Release all Tx buffers on ring
+ * @ring: TX or XDP ring
+ */
+static void iavf_release_tx_resources(struct iavf_ring *ring)
+{
+	bool is_xdp = iavf_ring_is_xdp(ring);
+	u32 i;
+
+	for (i = 0; i < ring->count; i++)
+		if (is_xdp)
+			iavf_free_xdp_resource(ring, &ring->tx_bi[i]);
+		else
+			iavf_unmap_and_free_tx_resource(ring, &ring->tx_bi[i]);
+}
+
 /**
  * iavf_clean_tx_ring - Free any empty Tx buffers
  * @tx_ring: ring to be cleaned
@@ -57,15 +93,17 @@ static void iavf_unmap_and_free_tx_resource(struct iavf_ring *ring,
 void iavf_clean_tx_ring(struct iavf_ring *tx_ring)
 {
 	unsigned long bi_size;
-	u16 i;
 
 	/* ring already cleared, nothing to do */
 	if (!tx_ring->tx_bi)
 		return;
 
-	/* Free all the Tx ring sk_buffs */
-	for (i = 0; i < tx_ring->count; i++)
-		iavf_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
+	if (tx_ring->flags & IAVF_TXRX_FLAGS_XSK) {
+		iavf_xsk_clean_xdp_ring(tx_ring);
+	} else {
+		/* Free all the Tx ring sk_buffs */
+		iavf_release_tx_resources(tx_ring);
+	}
 
 	bi_size = sizeof(struct iavf_tx_buffer) * tx_ring->count;
 	memset(tx_ring->tx_bi, 0, bi_size);
@@ -80,7 +118,8 @@ void iavf_clean_tx_ring(struct iavf_ring *tx_ring)
 		return;
 
 	/* cleanup Tx queue statistics */
-	netdev_tx_reset_queue(txring_txq(tx_ring));
+	if (!(tx_ring->flags & IAVF_TXRX_FLAGS_XDP))
+		netdev_tx_reset_queue(txring_txq(tx_ring));
 }
 
 /**
@@ -95,6 +134,11 @@ void iavf_free_tx_resources(struct iavf_ring *tx_ring)
 	kfree(tx_ring->tx_bi);
 	tx_ring->tx_bi = NULL;
 
+	if (tx_ring->flags & IAVF_TXRX_FLAGS_XSK) {
+		tx_ring->dev = tx_ring->xsk_pool->dev;
+		tx_ring->flags &= ~IAVF_TXRX_FLAGS_XSK;
+	}
+
 	if (tx_ring->desc) {
 		dma_free_coherent(tx_ring->dev, tx_ring->size,
 				  tx_ring->desc, tx_ring->dma);
@@ -157,6 +201,9 @@ void iavf_detect_recover_hung(struct iavf_vsi *vsi)
 	for (i = 0; i < vsi->back->num_active_queues; i++) {
 		tx_ring = &vsi->back->tx_rings[i];
 		if (tx_ring && tx_ring->desc) {
+			const struct libie_sq_stats *st = &tx_ring->sq_stats;
+			u32 start;
+
 			/* If packet counter has not changed the queue is
 			 * likely stalled, so force an interrupt for this
 			 * queue.
@@ -164,8 +211,13 @@ void iavf_detect_recover_hung(struct iavf_vsi *vsi)
 			 * prev_pkt_ctr would be negative if there was no
 			 * pending work.
 			 */
-			packets = tx_ring->stats.packets & INT_MAX;
-			if (tx_ring->tx_stats.prev_pkt_ctr == packets) {
+			do {
+				start = u64_stats_fetch_begin(&st->syncp);
+				packets = u64_stats_read(&st->packets) &
+					  INT_MAX;
+			} while (u64_stats_fetch_retry(&st->syncp, start));
+
+			if (tx_ring->prev_pkt_ctr == packets) {
 				iavf_force_wb(vsi, tx_ring->q_vector);
 				continue;
 			}
@@ -174,7 +226,7 @@ void iavf_detect_recover_hung(struct iavf_vsi *vsi)
 			 * to iavf_get_tx_pending()
 			 */
 			smp_rmb();
-			tx_ring->tx_stats.prev_pkt_ctr =
+			tx_ring->prev_pkt_ctr =
 			  iavf_get_tx_pending(tx_ring, true) ? packets : -1;
 		}
 	}
@@ -193,10 +245,10 @@ void iavf_detect_recover_hung(struct iavf_vsi *vsi)
 static bool iavf_clean_tx_irq(struct iavf_vsi *vsi,
 			      struct iavf_ring *tx_ring, int napi_budget)
 {
+	struct libie_sq_onstack_stats stats = { };
 	int i = tx_ring->next_to_clean;
 	struct iavf_tx_buffer *tx_buf;
 	struct iavf_tx_desc *tx_desc;
-	unsigned int total_bytes = 0, total_packets = 0;
 	unsigned int budget = IAVF_DEFAULT_IRQ_WORK;
 
 	tx_buf = &tx_ring->tx_bi[i];
@@ -223,8 +275,8 @@ static bool iavf_clean_tx_irq(struct iavf_vsi *vsi,
 		tx_buf->next_to_watch = NULL;
 
 		/* update the statistics for this packet */
-		total_bytes += tx_buf->bytecount;
-		total_packets += tx_buf->gso_segs;
+		stats.bytes += tx_buf->bytecount;
+		stats.packets += tx_buf->gso_segs;
 
 		/* free the skb */
 		napi_consume_skb(tx_buf->skb, napi_budget);
@@ -281,12 +333,7 @@ static bool iavf_clean_tx_irq(struct iavf_vsi *vsi,
 
 	i += tx_ring->count;
 	tx_ring->next_to_clean = i;
-	u64_stats_update_begin(&tx_ring->syncp);
-	tx_ring->stats.bytes += total_bytes;
-	tx_ring->stats.packets += total_packets;
-	u64_stats_update_end(&tx_ring->syncp);
-	tx_ring->q_vector->tx.total_bytes += total_bytes;
-	tx_ring->q_vector->tx.total_packets += total_packets;
+	iavf_update_tx_ring_stats(tx_ring, &stats);
 
 	if (tx_ring->flags & IAVF_TXR_FLAGS_WB_ON_ITR) {
 		/* check to see if there are < 4 descriptors
@@ -300,15 +347,16 @@ static bool iavf_clean_tx_irq(struct iavf_vsi *vsi,
 		    ((j / WB_STRIDE) == 0) && (j > 0) &&
 		    !test_bit(__IAVF_VSI_DOWN, vsi->state) &&
 		    (IAVF_DESC_UNUSED(tx_ring) != tx_ring->count))
-			tx_ring->arm_wb = true;
+			tx_ring->flags |= IAVF_TXRX_FLAGS_ARM_WB;
 	}
 
 	/* notify netdev of completed buffers */
-	netdev_tx_completed_queue(txring_txq(tx_ring),
-				  total_packets, total_bytes);
+	if (!(tx_ring->flags & IAVF_TXRX_FLAGS_XDP))
+		netdev_tx_completed_queue(txring_txq(tx_ring),
+					  stats.packets, stats.bytes);
 
 #define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2))
-	if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
+	if (unlikely(stats.packets && netif_carrier_ok(tx_ring->netdev) &&
 		     (IAVF_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
 		/* Make sure that anybody stopping the queue after this
 		 * sees the new next_to_clean.
@@ -319,7 +367,7 @@ static bool iavf_clean_tx_irq(struct iavf_vsi *vsi,
 		   !test_bit(__IAVF_VSI_DOWN, vsi->state)) {
 			netif_wake_subqueue(tx_ring->netdev,
 					    tx_ring->queue_index);
-			++tx_ring->tx_stats.restart_queue;
+			libie_stats_inc_one(&tx_ring->sq_stats, restarts);
 		}
 	}
 
@@ -649,7 +697,8 @@ static void iavf_update_itr(struct iavf_q_vector *q_vector,
 int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring)
 {
 	struct device *dev = tx_ring->dev;
-	int bi_size;
+	struct iavf_tx_desc *tx_desc;
+	int bi_size, j;
 
 	if (!dev)
 		return -ENOMEM;
@@ -674,7 +723,14 @@ int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring)
 
 	tx_ring->next_to_use = 0;
 	tx_ring->next_to_clean = 0;
-	tx_ring->tx_stats.prev_pkt_ctr = -1;
+	tx_ring->prev_pkt_ctr = -1;
+	for (j = 0; j < tx_ring->count; j++) {
+		tx_desc = IAVF_TX_DESC(tx_ring, j);
+		tx_desc->cmd_type_offset_bsz = 0;
+	}
+
+	iavf_xsk_setup_xdp_ring(tx_ring);
+
 	return 0;
 
 err:
@@ -683,17 +739,30 @@ int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring)
 	return -ENOMEM;
 }
 
+static void iavf_clean_rx_pages(struct iavf_ring *rx_ring)
+{
+	for (u32 i = 0; i < rx_ring->count; i++) {
+		struct page *page = rx_ring->rx_pages[i];
+
+		if (!page)
+			continue;
+
+		/* Invalidate cache lines that may have been written to by
+		 * device so that we avoid corrupting memory.
+		 */
+		page_pool_dma_sync_full_for_cpu(rx_ring->pool, page);
+		page_pool_put_full_page(rx_ring->pool, page, false);
+	}
+}
+
 /**
  * iavf_clean_rx_ring - Free Rx buffers
  * @rx_ring: ring to be cleaned
  **/
 void iavf_clean_rx_ring(struct iavf_ring *rx_ring)
 {
-	unsigned long bi_size;
-	u16 i;
-
 	/* ring already cleared, nothing to do */
-	if (!rx_ring->rx_bi)
+	if (!rx_ring->rx_pages)
 		return;
 
 	if (rx_ring->skb) {
@@ -701,41 +770,11 @@ void iavf_clean_rx_ring(struct iavf_ring *rx_ring)
 		rx_ring->skb = NULL;
 	}
 
-	/* Free all the Rx ring sk_buffs */
-	for (i = 0; i < rx_ring->count; i++) {
-		struct iavf_rx_buffer *rx_bi = &rx_ring->rx_bi[i];
-
-		if (!rx_bi->page)
-			continue;
-
-		/* Invalidate cache lines that may have been written to by
-		 * device so that we avoid corrupting memory.
-		 */
-		dma_sync_single_range_for_cpu(rx_ring->dev,
-					      rx_bi->dma,
-					      rx_bi->page_offset,
-					      rx_ring->rx_buf_len,
-					      DMA_FROM_DEVICE);
-
-		/* free resources associated with mapping */
-		dma_unmap_page_attrs(rx_ring->dev, rx_bi->dma,
-				     iavf_rx_pg_size(rx_ring),
-				     DMA_FROM_DEVICE,
-				     IAVF_RX_DMA_ATTR);
-
-		__page_frag_cache_drain(rx_bi->page, rx_bi->pagecnt_bias);
-
-		rx_bi->page = NULL;
-		rx_bi->page_offset = 0;
-	}
-
-	bi_size = sizeof(struct iavf_rx_buffer) * rx_ring->count;
-	memset(rx_ring->rx_bi, 0, bi_size);
-
-	/* Zero out the descriptor ring */
-	memset(rx_ring->desc, 0, rx_ring->size);
+	if (rx_ring->flags & IAVF_TXRX_FLAGS_XSK)
+		iavf_xsk_clean_rx_ring(rx_ring);
+	else
+		iavf_clean_rx_pages(rx_ring);
 
-	rx_ring->next_to_alloc = 0;
 	rx_ring->next_to_clean = 0;
 	rx_ring->next_to_use = 0;
 }
@@ -748,9 +787,25 @@ void iavf_clean_rx_ring(struct iavf_ring *rx_ring)
  **/
 void iavf_free_rx_resources(struct iavf_ring *rx_ring)
 {
+	struct device *dev;
+
 	iavf_clean_rx_ring(rx_ring);
-	kfree(rx_ring->rx_bi);
-	rx_ring->rx_bi = NULL;
+	kfree(rx_ring->rx_pages);
+	rx_ring->rx_pages = NULL;
+
+	/* This also unregisters memory model */
+	if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+		xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+
+	if (rx_ring->flags & IAVF_TXRX_FLAGS_XSK) {
+		dev = rx_ring->xsk_pool->dev;
+		rx_ring->flags &= ~IAVF_TXRX_FLAGS_XSK;
+	} else {
+		dev = rx_ring->pool->p.dev;
+		libie_rx_page_pool_destroy(rx_ring->pool, &rx_ring->rq_stats);
+	}
+
+	rx_ring->dev = dev;
 
 	if (rx_ring->desc) {
 		dma_free_coherent(rx_ring->dev, rx_ring->size,
@@ -759,6 +814,17 @@ void iavf_free_rx_resources(struct iavf_ring *rx_ring)
 	}
 }
 
+/**
+ * iavf_is_xdp_enabled - Check if XDP is enabled on the RX ring
+ * @rx_ring: Rx descriptor ring
+ *
+ * Returns true, if the ring has been configured for XDP.
+ */
+static bool iavf_is_xdp_enabled(const struct iavf_ring *rx_ring)
+{
+	return !!rcu_access_pointer(rx_ring->xdp_prog);
+}
+
 /**
  * iavf_setup_rx_descriptors - Allocate Rx descriptors
  * @rx_ring: Rx descriptor ring (for a specific queue) to setup
@@ -768,16 +834,17 @@ void iavf_free_rx_resources(struct iavf_ring *rx_ring)
 int iavf_setup_rx_descriptors(struct iavf_ring *rx_ring)
 {
 	struct device *dev = rx_ring->dev;
-	int bi_size;
+	struct page_pool *pool;
+	int ret = -ENOMEM;
 
 	/* warn if we are about to overwrite the pointer */
-	WARN_ON(rx_ring->rx_bi);
-	bi_size = sizeof(struct iavf_rx_buffer) * rx_ring->count;
-	rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
-	if (!rx_ring->rx_bi)
-		goto err;
+	WARN_ON(rx_ring->rx_pages);
 
-	u64_stats_init(&rx_ring->syncp);
+	/* Both iavf_ring::rx_pages and ::xdp_buff are arrays of pointers */
+	rx_ring->rx_pages = kcalloc(rx_ring->count, sizeof(*rx_ring->rx_pages),
+				    GFP_KERNEL);
+	if (!rx_ring->rx_pages)
+		return ret;
 
 	/* Round up to nearest 4K */
 	rx_ring->size = rx_ring->count * sizeof(union iavf_32byte_rx_desc);
@@ -791,240 +858,127 @@ int iavf_setup_rx_descriptors(struct iavf_ring *rx_ring)
 		goto err;
 	}
 
-	rx_ring->next_to_alloc = 0;
-	rx_ring->next_to_clean = 0;
-	rx_ring->next_to_use = 0;
-
-	return 0;
-err:
-	kfree(rx_ring->rx_bi);
-	rx_ring->rx_bi = NULL;
-	return -ENOMEM;
-}
-
-/**
- * iavf_release_rx_desc - Store the new tail and head values
- * @rx_ring: ring to bump
- * @val: new head index
- **/
-static inline void iavf_release_rx_desc(struct iavf_ring *rx_ring, u32 val)
-{
-	rx_ring->next_to_use = val;
-
-	/* update next to alloc since we have filled the ring */
-	rx_ring->next_to_alloc = val;
-
-	/* Force memory writes to complete before letting h/w
-	 * know there are new descriptors to fetch.  (Only
-	 * applicable for weak-ordered memory model archs,
-	 * such as IA-64).
-	 */
-	wmb();
-	writel(val, rx_ring->tail);
-}
-
-/**
- * iavf_rx_offset - Return expected offset into page to access data
- * @rx_ring: Ring we are requesting offset of
- *
- * Returns the offset value for ring into the data buffer.
- */
-static inline unsigned int iavf_rx_offset(struct iavf_ring *rx_ring)
-{
-	return ring_uses_build_skb(rx_ring) ? IAVF_SKB_PAD : 0;
-}
-
-/**
- * iavf_alloc_mapped_page - recycle or make a new page
- * @rx_ring: ring to use
- * @bi: rx_buffer struct to modify
- *
- * Returns true if the page was successfully allocated or
- * reused.
- **/
-static bool iavf_alloc_mapped_page(struct iavf_ring *rx_ring,
-				   struct iavf_rx_buffer *bi)
-{
-	struct page *page = bi->page;
-	dma_addr_t dma;
-
-	/* since we are recycling buffers we should seldom need to alloc */
-	if (likely(page)) {
-		rx_ring->rx_stats.page_reuse_count++;
-		return true;
-	}
-
-	/* alloc new page for storage */
-	page = dev_alloc_pages(iavf_rx_pg_order(rx_ring));
-	if (unlikely(!page)) {
-		rx_ring->rx_stats.alloc_page_failed++;
-		return false;
-	}
-
-	/* map page for use */
-	dma = dma_map_page_attrs(rx_ring->dev, page, 0,
-				 iavf_rx_pg_size(rx_ring),
-				 DMA_FROM_DEVICE,
-				 IAVF_RX_DMA_ATTR);
+	iavf_xsk_setup_rx_ring(rx_ring);
+	if (rx_ring->flags & IAVF_TXRX_FLAGS_XSK)
+		goto finish;
 
-	/* if mapping failed free memory back to system since
-	 * there isn't much point in holding memory we can't use
-	 */
-	if (dma_mapping_error(rx_ring->dev, dma)) {
-		__free_pages(page, iavf_rx_pg_order(rx_ring));
-		rx_ring->rx_stats.alloc_page_failed++;
-		return false;
+	pool = libie_rx_page_pool_create(rx_ring->netdev, rx_ring->count,
+					 iavf_is_xdp_enabled(rx_ring));
+	if (IS_ERR(pool)) {
+		ret = PTR_ERR(pool);
+		goto err_free_dma;
 	}
 
-	bi->dma = dma;
-	bi->page = page;
-	bi->page_offset = iavf_rx_offset(rx_ring);
+	rx_ring->pool = pool;
 
-	/* initialize pagecnt_bias to 1 representing we fully own page */
-	bi->pagecnt_bias = 1;
-
-	return true;
-}
+finish:
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
 
-/**
- * iavf_receive_skb - Send a completed packet up the stack
- * @rx_ring:  rx ring in play
- * @skb: packet to send up
- * @vlan_tag: vlan tag for packet
- **/
-static void iavf_receive_skb(struct iavf_ring *rx_ring,
-			     struct sk_buff *skb, u16 vlan_tag)
-{
-	struct iavf_q_vector *q_vector = rx_ring->q_vector;
+	return 0;
 
-	if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
-	    (vlan_tag & VLAN_VID_MASK))
-		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
-	else if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_STAG_RX) &&
-		 vlan_tag & VLAN_VID_MASK)
-		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021AD), vlan_tag);
+err_free_dma:
+	dma_free_coherent(dev, rx_ring->size, rx_ring->desc, rx_ring->dma);
+err:
+	kfree(rx_ring->rx_pages);
+	rx_ring->rx_pages = NULL;
 
-	napi_gro_receive(&q_vector->napi, skb);
+	return ret;
 }
 
 /**
- * iavf_alloc_rx_buffers - Replace used receive buffers
+ * __iavf_alloc_rx_pages - Replace used receive pages
  * @rx_ring: ring to place buffers on
- * @cleaned_count: number of buffers to replace
+ * @to_refill: number of buffers to replace
+ * @gfp: GFP mask to allocate pages
  *
- * Returns false if all allocations were successful, true if any fail
+ * Returns 0 if all allocations were successful or the number of buffers left
+ * to refill in case of an allocation failure.
  **/
-bool iavf_alloc_rx_buffers(struct iavf_ring *rx_ring, u16 cleaned_count)
+static u32 __iavf_alloc_rx_pages(struct iavf_ring *rx_ring, u32 to_refill,
+				 gfp_t gfp)
 {
-	u16 ntu = rx_ring->next_to_use;
+	struct page_pool *pool = rx_ring->pool;
+	u32 ntu = rx_ring->next_to_use;
 	union iavf_rx_desc *rx_desc;
-	struct iavf_rx_buffer *bi;
+	u32 hr = pool->p.offset;
 
 	/* do nothing if no valid netdev defined */
-	if (!rx_ring->netdev || !cleaned_count)
-		return false;
+	if (unlikely(!rx_ring->netdev || !to_refill))
+		return 0;
 
 	rx_desc = IAVF_RX_DESC(rx_ring, ntu);
-	bi = &rx_ring->rx_bi[ntu];
 
 	do {
-		if (!iavf_alloc_mapped_page(rx_ring, bi))
-			goto no_buffers;
+		struct page *page;
+		dma_addr_t dma;
 
-		/* sync the buffer for use by the device */
-		dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
-						 bi->page_offset,
-						 rx_ring->rx_buf_len,
-						 DMA_FROM_DEVICE);
+		page = page_pool_alloc_pages(pool, gfp);
+		if (!page)
+			break;
+
+		rx_ring->rx_pages[ntu] = page;
+		dma = page_pool_get_dma_addr(page);
 
 		/* Refresh the desc even if buffer_addrs didn't change
 		 * because each write-back erases this info.
 		 */
-		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
+		rx_desc->read.pkt_addr = cpu_to_le64(dma + hr);
 
 		rx_desc++;
-		bi++;
 		ntu++;
 		if (unlikely(ntu == rx_ring->count)) {
 			rx_desc = IAVF_RX_DESC(rx_ring, 0);
-			bi = rx_ring->rx_bi;
 			ntu = 0;
 		}
 
 		/* clear the status bits for the next_to_use descriptor */
 		rx_desc->wb.qword1.status_error_len = 0;
-
-		cleaned_count--;
-	} while (cleaned_count);
+	} while (--to_refill);
 
 	if (rx_ring->next_to_use != ntu)
 		iavf_release_rx_desc(rx_ring, ntu);
 
-	return false;
-
-no_buffers:
-	if (rx_ring->next_to_use != ntu)
-		iavf_release_rx_desc(rx_ring, ntu);
+	return to_refill;
+}
 
-	/* make sure to come back via polling to try again after
-	 * allocation failure
-	 */
-	return true;
+void iavf_alloc_rx_pages(struct iavf_ring *rxr)
+{
+	__iavf_alloc_rx_pages(rxr, IAVF_DESC_UNUSED(rxr), GFP_KERNEL);
 }
 
 /**
  * iavf_rx_checksum - Indicate in skb if hw indicated a good cksum
  * @vsi: the VSI we care about
  * @skb: skb currently being received and modified
- * @rx_desc: the receive descriptor
+ * @qword: `wb.qword1.status_error_len` from the descriptor
+ * @parsed: TODO
  **/
-static inline void iavf_rx_checksum(struct iavf_vsi *vsi,
-				    struct sk_buff *skb,
-				    union iavf_rx_desc *rx_desc)
+static void iavf_rx_checksum(struct iavf_vsi *vsi, struct sk_buff *skb,
+			     u64 qword, struct libie_rx_ptype_parsed parsed)
 {
-	struct iavf_rx_ptype_decoded decoded;
 	u32 rx_error, rx_status;
-	bool ipv4, ipv6;
-	u8 ptype;
-	u64 qword;
 
-	qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
-	ptype = (qword & IAVF_RXD_QW1_PTYPE_MASK) >> IAVF_RXD_QW1_PTYPE_SHIFT;
-	rx_error = (qword & IAVF_RXD_QW1_ERROR_MASK) >>
-		   IAVF_RXD_QW1_ERROR_SHIFT;
+	if (!libie_has_rx_checksum(vsi->netdev, parsed))
+		return;
+
 	rx_status = (qword & IAVF_RXD_QW1_STATUS_MASK) >>
 		    IAVF_RXD_QW1_STATUS_SHIFT;
-	decoded = decode_rx_desc_ptype(ptype);
-
-	skb->ip_summed = CHECKSUM_NONE;
-
-	skb_checksum_none_assert(skb);
-
-	/* Rx csum enabled and ip headers found? */
-	if (!(vsi->netdev->features & NETIF_F_RXCSUM))
-		return;
 
 	/* did the hardware decode the packet and checksum? */
 	if (!(rx_status & BIT(IAVF_RX_DESC_STATUS_L3L4P_SHIFT)))
 		return;
 
-	/* both known and outer_ip must be set for the below code to work */
-	if (!(decoded.known && decoded.outer_ip))
-		return;
-
-	ipv4 = (decoded.outer_ip == IAVF_RX_PTYPE_OUTER_IP) &&
-	       (decoded.outer_ip_ver == IAVF_RX_PTYPE_OUTER_IPV4);
-	ipv6 = (decoded.outer_ip == IAVF_RX_PTYPE_OUTER_IP) &&
-	       (decoded.outer_ip_ver == IAVF_RX_PTYPE_OUTER_IPV6);
+	rx_error = (qword & IAVF_RXD_QW1_ERROR_MASK) >>
+		   IAVF_RXD_QW1_ERROR_SHIFT;
 
-	if (ipv4 &&
+	if (parsed.outer_ip == LIBIE_RX_PTYPE_OUTER_IPV4 &&
 	    (rx_error & (BIT(IAVF_RX_DESC_ERROR_IPE_SHIFT) |
 			 BIT(IAVF_RX_DESC_ERROR_EIPE_SHIFT))))
 		goto checksum_fail;
-
 	/* likely incorrect csum if alternate IP extension headers found */
-	if (ipv6 &&
-	    rx_status & BIT(IAVF_RX_DESC_STATUS_IPV6EXADD_SHIFT))
+	else if (parsed.outer_ip == LIBIE_RX_PTYPE_OUTER_IPV6 &&
+		 (rx_status & BIT(IAVF_RX_DESC_STATUS_IPV6EXADD_SHIFT)))
 		/* don't increment checksum err here, non-fatal err */
 		return;
 
@@ -1039,447 +993,206 @@ static inline void iavf_rx_checksum(struct iavf_vsi *vsi,
 	if (rx_error & BIT(IAVF_RX_DESC_ERROR_PPRS_SHIFT))
 		return;
 
-	/* Only report checksum unnecessary for TCP, UDP, or SCTP */
-	switch (decoded.inner_prot) {
-	case IAVF_RX_PTYPE_INNER_PROT_TCP:
-	case IAVF_RX_PTYPE_INNER_PROT_UDP:
-	case IAVF_RX_PTYPE_INNER_PROT_SCTP:
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-		fallthrough;
-	default:
-		break;
-	}
-
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
 	return;
 
 checksum_fail:
 	vsi->back->hw_csum_rx_error++;
 }
 
-/**
- * iavf_ptype_to_htype - get a hash type
- * @ptype: the ptype value from the descriptor
- *
- * Returns a hash type to be used by skb_set_hash
- **/
-static inline int iavf_ptype_to_htype(u8 ptype)
-{
-	struct iavf_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
-
-	if (!decoded.known)
-		return PKT_HASH_TYPE_NONE;
-
-	if (decoded.outer_ip == IAVF_RX_PTYPE_OUTER_IP &&
-	    decoded.payload_layer == IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY4)
-		return PKT_HASH_TYPE_L4;
-	else if (decoded.outer_ip == IAVF_RX_PTYPE_OUTER_IP &&
-		 decoded.payload_layer == IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY3)
-		return PKT_HASH_TYPE_L3;
-	else
-		return PKT_HASH_TYPE_L2;
-}
-
 /**
  * iavf_rx_hash - set the hash value in the skb
  * @ring: descriptor ring
  * @rx_desc: specific descriptor
  * @skb: skb currently being received and modified
- * @rx_ptype: Rx packet type
+ * @qword: `wb.qword1.status_error_len` from the descriptor
+ * @parsed: TODO
  **/
-static inline void iavf_rx_hash(struct iavf_ring *ring,
-				union iavf_rx_desc *rx_desc,
-				struct sk_buff *skb,
-				u8 rx_ptype)
+static void iavf_rx_hash(const struct iavf_ring *ring,
+			 const union iavf_rx_desc *rx_desc,
+			 struct sk_buff *skb, u64 qword,
+			 struct libie_rx_ptype_parsed parsed)
 {
+	const u64 rss_mask = (u64)IAVF_RX_DESC_FLTSTAT_RSS_HASH <<
+			     IAVF_RX_DESC_STATUS_FLTSTAT_SHIFT;
 	u32 hash;
-	const __le64 rss_mask =
-		cpu_to_le64((u64)IAVF_RX_DESC_FLTSTAT_RSS_HASH <<
-			    IAVF_RX_DESC_STATUS_FLTSTAT_SHIFT);
 
-	if (!(ring->netdev->features & NETIF_F_RXHASH))
+	if (!libie_has_rx_hash(ring->netdev, parsed) ||
+	    (qword & rss_mask) != rss_mask)
 		return;
 
-	if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) {
-		hash = le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss);
-		skb_set_hash(skb, hash, iavf_ptype_to_htype(rx_ptype));
-	}
+	hash = le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss);
+	libie_skb_set_hash(skb, hash, parsed);
 }
 
-/**
- * iavf_process_skb_fields - Populate skb header fields from Rx descriptor
- * @rx_ring: rx descriptor ring packet is being transacted on
- * @rx_desc: pointer to the EOP Rx descriptor
- * @skb: pointer to current skb being populated
- * @rx_ptype: the packet type decoded by hardware
- *
- * This function checks the ring, descriptor, and packet information in
- * order to populate the hash, checksum, VLAN, protocol, and
- * other fields within the skb.
- **/
-static inline
-void iavf_process_skb_fields(struct iavf_ring *rx_ring,
-			     union iavf_rx_desc *rx_desc, struct sk_buff *skb,
-			     u8 rx_ptype)
+static void iavf_rx_vlan(const struct iavf_ring *rx_ring,
+			 const union iavf_rx_desc *rx_desc,
+			 struct sk_buff *skb, u64 qword)
 {
-	iavf_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
+	u16 vlan_tag;
+	__be16 prot;
 
-	iavf_rx_checksum(rx_ring->vsi, skb, rx_desc);
+	if (rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
+		prot = htons(ETH_P_8021Q);
+	else if (rx_ring->netdev->features & NETIF_F_HW_VLAN_STAG_RX)
+		prot = htons(ETH_P_8021AD);
+	else
+		return;
 
-	skb_record_rx_queue(skb, rx_ring->queue_index);
+	if ((qword & BIT(IAVF_RX_DESC_STATUS_L2TAG1P_SHIFT)) &&
+	    (rx_ring->flags & IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1))
+		vlan_tag = le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1);
+	else if ((rx_ring->flags & IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2) &&
+		 (rx_desc->wb.qword2.ext_status &
+		  cpu_to_le16(BIT(IAVF_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT))))
+		vlan_tag = le16_to_cpu(rx_desc->wb.qword2.l2tag2_2);
+	else
+		vlan_tag = 0;
 
-	/* modifies the skb - consumes the enet header */
-	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+	if (vlan_tag & VLAN_VID_MASK)
+		__vlan_hwaccel_put_tag(skb, prot, vlan_tag);
 }
 
 /**
- * iavf_cleanup_headers - Correct empty headers
+ * iavf_process_skb_fields - Populate skb header fields from Rx descriptor
  * @rx_ring: rx descriptor ring packet is being transacted on
- * @skb: pointer to current skb being fixed
- *
- * Also address the case where we are pulling data in on pages only
- * and as such no data is present in the skb header.
- *
- * In addition if skb is not at least 60 bytes we need to pad it so that
- * it is large enough to qualify as a valid Ethernet frame.
- *
- * Returns true if an error was encountered and skb was freed.
- **/
-static bool iavf_cleanup_headers(struct iavf_ring *rx_ring, struct sk_buff *skb)
-{
-	/* if eth_skb_pad returns an error the skb was freed */
-	if (eth_skb_pad(skb))
-		return true;
-
-	return false;
-}
-
-/**
- * iavf_reuse_rx_page - page flip buffer and store it back on the ring
- * @rx_ring: rx descriptor ring to store buffers on
- * @old_buff: donor buffer to have page reused
- *
- * Synchronizes page for reuse by the adapter
- **/
-static void iavf_reuse_rx_page(struct iavf_ring *rx_ring,
-			       struct iavf_rx_buffer *old_buff)
-{
-	struct iavf_rx_buffer *new_buff;
-	u16 nta = rx_ring->next_to_alloc;
-
-	new_buff = &rx_ring->rx_bi[nta];
-
-	/* update, and store next to alloc */
-	nta++;
-	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
-
-	/* transfer page from old buffer to new buffer */
-	new_buff->dma		= old_buff->dma;
-	new_buff->page		= old_buff->page;
-	new_buff->page_offset	= old_buff->page_offset;
-	new_buff->pagecnt_bias	= old_buff->pagecnt_bias;
-}
-
-/**
- * iavf_can_reuse_rx_page - Determine if this page can be reused by
- * the adapter for another receive
- *
- * @rx_buffer: buffer containing the page
- *
- * If page is reusable, rx_buffer->page_offset is adjusted to point to
- * an unused region in the page.
- *
- * For small pages, @truesize will be a constant value, half the size
- * of the memory at page.  We'll attempt to alternate between high and
- * low halves of the page, with one half ready for use by the hardware
- * and the other half being consumed by the stack.  We use the page
- * ref count to determine whether the stack has finished consuming the
- * portion of this page that was passed up with a previous packet.  If
- * the page ref count is >1, we'll assume the "other" half page is
- * still busy, and this page cannot be reused.
- *
- * For larger pages, @truesize will be the actual space used by the
- * received packet (adjusted upward to an even multiple of the cache
- * line size).  This will advance through the page by the amount
- * actually consumed by the received packets while there is still
- * space for a buffer.  Each region of larger pages will be used at
- * most once, after which the page will not be reused.
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ * @qword: `wb.qword1.status_error_len` from the descriptor
  *
- * In either case, if the page is reusable its refcount is increased.
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, VLAN, protocol, and
+ * other fields within the skb.
  **/
-static bool iavf_can_reuse_rx_page(struct iavf_rx_buffer *rx_buffer)
+void iavf_process_skb_fields(const struct iavf_ring *rx_ring,
+			     const union iavf_rx_desc *rx_desc,
+			     struct sk_buff *skb, u64 qword)
 {
-	unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
-	struct page *page = rx_buffer->page;
+	struct libie_rx_ptype_parsed parsed;
+	u32 ptype;
 
-	/* Is any reuse possible? */
-	if (!dev_page_is_reusable(page))
-		return false;
-
-#if (PAGE_SIZE < 8192)
-	/* if we are only owner of page we can reuse it */
-	if (unlikely((page_count(page) - pagecnt_bias) > 1))
-		return false;
-#else
-#define IAVF_LAST_OFFSET \
-	(SKB_WITH_OVERHEAD(PAGE_SIZE) - IAVF_RXBUFFER_2048)
-	if (rx_buffer->page_offset > IAVF_LAST_OFFSET)
-		return false;
-#endif
+	ptype = FIELD_GET(IAVF_RXD_QW1_PTYPE_MASK, qword);
+	parsed = libie_parse_rx_ptype(ptype);
 
-	/* If we have drained the page fragment pool we need to update
-	 * the pagecnt_bias and page count so that we fully restock the
-	 * number of references the driver holds.
-	 */
-	if (unlikely(!pagecnt_bias)) {
-		page_ref_add(page, USHRT_MAX);
-		rx_buffer->pagecnt_bias = USHRT_MAX;
-	}
+	iavf_rx_hash(rx_ring, rx_desc, skb, qword, parsed);
+	iavf_rx_checksum(rx_ring->vsi, skb, qword, parsed);
+	iavf_rx_vlan(rx_ring, rx_desc, skb, qword);
 
-	return true;
+	skb_record_rx_queue(skb, rx_ring->queue_index);
 }
 
 /**
  * iavf_add_rx_frag - Add contents of Rx buffer to sk_buff
- * @rx_ring: rx descriptor ring to transact packets on
- * @rx_buffer: buffer containing page to add
  * @skb: sk_buff to place the data into
+ * @page: page containing data to add
+ * @hr: headroom in front of the data
  * @size: packet length from rx_desc
  *
- * This function will add the data contained in rx_buffer->page to the skb.
+ * This function will add the data contained in page to the skb.
  * It will just attach the page as a frag to the skb.
- *
- * The function will then update the page offset.
- **/
-static void iavf_add_rx_frag(struct iavf_ring *rx_ring,
-			     struct iavf_rx_buffer *rx_buffer,
-			     struct sk_buff *skb,
-			     unsigned int size)
-{
-#if (PAGE_SIZE < 8192)
-	unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2;
-#else
-	unsigned int truesize = SKB_DATA_ALIGN(size + iavf_rx_offset(rx_ring));
-#endif
-
-	if (!size)
-		return;
-
-	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
-			rx_buffer->page_offset, size, truesize);
-
-	/* page is being used so we must update the page offset */
-#if (PAGE_SIZE < 8192)
-	rx_buffer->page_offset ^= truesize;
-#else
-	rx_buffer->page_offset += truesize;
-#endif
-}
-
-/**
- * iavf_get_rx_buffer - Fetch Rx buffer and synchronize data for use
- * @rx_ring: rx descriptor ring to transact packets on
- * @size: size of buffer to add to skb
- *
- * This function will pull an Rx buffer from the ring and synchronize it
- * for use by the CPU.
- */
-static struct iavf_rx_buffer *iavf_get_rx_buffer(struct iavf_ring *rx_ring,
-						 const unsigned int size)
-{
-	struct iavf_rx_buffer *rx_buffer;
-
-	rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean];
-	prefetchw(rx_buffer->page);
-	if (!size)
-		return rx_buffer;
-
-	/* we are reusing so sync this buffer for CPU use */
-	dma_sync_single_range_for_cpu(rx_ring->dev,
-				      rx_buffer->dma,
-				      rx_buffer->page_offset,
-				      size,
-				      DMA_FROM_DEVICE);
-
-	/* We have pulled a buffer for use, so decrement pagecnt_bias */
-	rx_buffer->pagecnt_bias--;
-
-	return rx_buffer;
-}
-
-/**
- * iavf_construct_skb - Allocate skb and populate it
- * @rx_ring: rx descriptor ring to transact packets on
- * @rx_buffer: rx buffer to pull data from
- * @size: size of buffer to add to skb
- *
- * This function allocates an skb.  It then populates it with the page
- * data from the current receive descriptor, taking care to set up the
- * skb correctly.
  */
-static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring,
-					  struct iavf_rx_buffer *rx_buffer,
-					  unsigned int size)
+static void iavf_add_rx_frag(struct sk_buff *skb, struct page *page, u32 hr,
+			     u32 size)
 {
-	void *va;
-#if (PAGE_SIZE < 8192)
-	unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2;
-#else
-	unsigned int truesize = SKB_DATA_ALIGN(size);
-#endif
-	unsigned int headlen;
-	struct sk_buff *skb;
-
-	if (!rx_buffer)
-		return NULL;
-	/* prefetch first cache line of first page */
-	va = page_address(rx_buffer->page) + rx_buffer->page_offset;
-	net_prefetch(va);
-
-	/* allocate a skb to store the frags */
-	skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
-			       IAVF_RX_HDR_SIZE,
-			       GFP_ATOMIC | __GFP_NOWARN);
-	if (unlikely(!skb))
-		return NULL;
-
-	/* Determine available headroom for copy */
-	headlen = size;
-	if (headlen > IAVF_RX_HDR_SIZE)
-		headlen = eth_get_headlen(skb->dev, va, IAVF_RX_HDR_SIZE);
-
-	/* align pull length to size of long to optimize memcpy performance */
-	memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
-
-	/* update all of the pointers */
-	size -= headlen;
-	if (size) {
-		skb_add_rx_frag(skb, 0, rx_buffer->page,
-				rx_buffer->page_offset + headlen,
-				size, truesize);
-
-		/* buffer is used by skb, update page_offset */
-#if (PAGE_SIZE < 8192)
-		rx_buffer->page_offset ^= truesize;
-#else
-		rx_buffer->page_offset += truesize;
-#endif
-	} else {
-		/* buffer is unused, reset bias back to rx_buffer */
-		rx_buffer->pagecnt_bias++;
-	}
-
-	return skb;
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, hr, size,
+			LIBIE_RX_TRUESIZE);
 }
 
 /**
  * iavf_build_skb - Build skb around an existing buffer
- * @rx_ring: Rx descriptor ring to transact packets on
- * @rx_buffer: Rx buffer to pull data from
- * @size: size of buffer to add to skb
+ * @xdp: initialized XDP buffer
  *
  * This function builds an skb around an existing Rx buffer, taking care
  * to set up the skb correctly and avoid any memcpy overhead.
  */
-static struct sk_buff *iavf_build_skb(struct iavf_ring *rx_ring,
-				      struct iavf_rx_buffer *rx_buffer,
-				      unsigned int size)
+static struct sk_buff *iavf_build_skb(const struct xdp_buff *xdp)
 {
-	void *va;
-#if (PAGE_SIZE < 8192)
-	unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2;
-#else
-	unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
-				SKB_DATA_ALIGN(IAVF_SKB_PAD + size);
-#endif
 	struct sk_buff *skb;
+	u32 metasize;
 
-	if (!rx_buffer || !size)
-		return NULL;
-	/* prefetch first cache line of first page */
-	va = page_address(rx_buffer->page) + rx_buffer->page_offset;
-	net_prefetch(va);
+	net_prefetch(xdp->data_meta);
 
 	/* build an skb around the page buffer */
-	skb = napi_build_skb(va - IAVF_SKB_PAD, truesize);
+	skb = napi_build_skb(xdp->data_hard_start, LIBIE_RX_TRUESIZE);
 	if (unlikely(!skb))
 		return NULL;
 
+	skb_mark_for_recycle(skb);
+
 	/* update pointers within the skb to store the data */
-	skb_reserve(skb, IAVF_SKB_PAD);
-	__skb_put(skb, size);
+	skb_reserve(skb, xdp->data - xdp->data_hard_start);
+	__skb_put(skb, xdp->data_end - xdp->data);
 
-	/* buffer is used by skb, update page_offset */
-#if (PAGE_SIZE < 8192)
-	rx_buffer->page_offset ^= truesize;
-#else
-	rx_buffer->page_offset += truesize;
-#endif
+	metasize = xdp->data - xdp->data_meta;
+	if (metasize)
+		skb_metadata_set(skb, metasize);
 
 	return skb;
 }
 
 /**
- * iavf_put_rx_buffer - Clean up used buffer and either recycle or free
- * @rx_ring: rx descriptor ring to transact packets on
- * @rx_buffer: rx buffer to pull data from
- *
- * This function will clean up the contents of the rx_buffer.  It will
- * either recycle the buffer or unmap it and free the associated resources.
- */
-static void iavf_put_rx_buffer(struct iavf_ring *rx_ring,
-			       struct iavf_rx_buffer *rx_buffer)
+ * iavf_is_non_eop - check whether a buffer is non-EOP
+ * @qword: `wb.qword1.status_error_len` from the descriptor
+ * @stats: NAPI poll local stats to update
+ **/
+static bool iavf_is_non_eop(u64 qword, struct libie_rq_onstack_stats *stats)
 {
-	if (!rx_buffer)
-		return;
+	/* if we are the last buffer then there is nothing else to do */
+	if (likely(iavf_test_staterr(qword, IAVF_RX_DESC_STATUS_EOF_SHIFT)))
+		return false;
 
-	if (iavf_can_reuse_rx_page(rx_buffer)) {
-		/* hand second half of page back to the ring */
-		iavf_reuse_rx_page(rx_ring, rx_buffer);
-		rx_ring->rx_stats.page_reuse_count++;
-	} else {
-		/* we are not reusing the buffer so unmap it */
-		dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
-				     iavf_rx_pg_size(rx_ring),
-				     DMA_FROM_DEVICE, IAVF_RX_DMA_ATTR);
-		__page_frag_cache_drain(rx_buffer->page,
-					rx_buffer->pagecnt_bias);
-	}
+	stats->fragments++;
 
-	/* clear contents of buffer_info */
-	rx_buffer->page = NULL;
+	return true;
 }
 
 /**
- * iavf_is_non_eop - process handling of non-EOP buffers
- * @rx_ring: Rx ring being processed
- * @rx_desc: Rx descriptor for current buffer
- * @skb: Current socket buffer containing buffer in progress
+ * iavf_run_xdp - Run XDP program and perform resulting action
+ * @rx_ring: RX descriptor ring to transact packets on
+ * @xdp: a prepared XDP buffer
+ * @xdp_prog: an XDP program assigned to the interface
+ * @xdp_ring: XDP TX queue assigned to the RX ring
+ * @rxq_xdp_act: Logical OR of flags of XDP actions that require finalization
  *
- * This function updates next to clean.  If the buffer is an EOP buffer
- * this function exits returning false, otherwise it will place the
- * sk_buff in the next buffer to be chained and return true indicating
- * that this is in fact a non-EOP buffer.
- **/
-static bool iavf_is_non_eop(struct iavf_ring *rx_ring,
-			    union iavf_rx_desc *rx_desc,
-			    struct sk_buff *skb)
+ * Returns resulting XDP action.
+ */
+static unsigned int
+iavf_run_xdp(struct iavf_ring *rx_ring, struct xdp_buff *xdp,
+	     struct bpf_prog *xdp_prog, struct iavf_ring *xdp_ring,
+	     u32 *rxq_xdp_act)
 {
-	u32 ntc = rx_ring->next_to_clean + 1;
+	unsigned int xdp_act;
 
-	/* fetch, update, and store next to clean */
-	ntc = (ntc < rx_ring->count) ? ntc : 0;
-	rx_ring->next_to_clean = ntc;
+	xdp_act = bpf_prog_run_xdp(xdp_prog, xdp);
 
-	prefetch(IAVF_RX_DESC(rx_ring, ntc));
+	switch (xdp_act) {
+	case XDP_PASS:
+	case XDP_DROP:
+		break;
+	case XDP_TX:
+		if (unlikely(!iavf_xdp_xmit_back(xdp, xdp_ring)))
+			goto xdp_err;
 
-	/* if we are the last buffer then there is nothing else to do */
-#define IAVF_RXD_EOF BIT(IAVF_RX_DESC_STATUS_EOF_SHIFT)
-	if (likely(iavf_test_staterr(rx_desc, IAVF_RXD_EOF)))
-		return false;
+		*rxq_xdp_act |= IAVF_RXQ_XDP_ACT_FINALIZE_TX;
+		break;
+	case XDP_REDIRECT:
+		if (unlikely(xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog)))
+			goto xdp_err;
 
-	rx_ring->rx_stats.non_eop_descs++;
+		*rxq_xdp_act |= IAVF_RXQ_XDP_ACT_FINALIZE_REDIR;
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, xdp_act);
 
-	return true;
+		fallthrough;
+	case XDP_ABORTED:
+xdp_err:
+		trace_xdp_exception(rx_ring->netdev, xdp_prog, xdp_act);
+
+		return XDP_DROP;
+	}
+
+	return xdp_act;
 }
 
 /**
@@ -1496,27 +1209,45 @@ static bool iavf_is_non_eop(struct iavf_ring *rx_ring,
  **/
 static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget)
 {
-	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+	const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
+	struct libie_rq_onstack_stats stats = { };
+	u32 to_refill = IAVF_DESC_UNUSED(rx_ring);
+	struct page_pool *pool = rx_ring->pool;
 	struct sk_buff *skb = rx_ring->skb;
-	u16 cleaned_count = IAVF_DESC_UNUSED(rx_ring);
-	bool failure = false;
+	u32 ntc = rx_ring->next_to_clean;
+	u32 ring_size = rx_ring->count;
+	struct iavf_ring *xdp_ring;
+	struct bpf_prog *xdp_prog;
+	u32 hr = pool->p.offset;
+	u32 cleaned_count = 0;
+	unsigned int xdp_act;
+	struct xdp_buff xdp;
+	u32 rxq_xdp_act = 0;
+	u16 cached_ntu;
+
+	xdp_prog = rcu_dereference(rx_ring->xdp_prog);
+	if (xdp_prog) {
+		xdp_ring = rx_ring->xdp_ring;
+		cached_ntu = xdp_ring->next_to_use;
+	}
+	xdp_init_buff(&xdp, PAGE_SIZE, &rx_ring->xdp_rxq);
 
-	while (likely(total_rx_packets < (unsigned int)budget)) {
-		struct iavf_rx_buffer *rx_buffer;
+	while (likely(cleaned_count < budget)) {
 		union iavf_rx_desc *rx_desc;
-		unsigned int size;
-		u16 vlan_tag = 0;
-		u8 rx_ptype;
+		u32 size, put_size;
+		struct page *page;
 		u64 qword;
 
 		/* return some buffers to hardware, one at a time is too slow */
-		if (cleaned_count >= IAVF_RX_BUFFER_WRITE) {
-			failure = failure ||
-				  iavf_alloc_rx_buffers(rx_ring, cleaned_count);
-			cleaned_count = 0;
+		if (to_refill >= IAVF_RX_BUFFER_WRITE) {
+			to_refill = __iavf_alloc_rx_pages(rx_ring, to_refill,
+							  gfp);
+			if (unlikely(to_refill))
+				libie_stats_inc_one(&rx_ring->rq_stats,
+						    alloc_page_fail);
 		}
 
-		rx_desc = IAVF_RX_DESC(rx_ring, rx_ring->next_to_clean);
+		rx_desc = IAVF_RX_DESC(rx_ring, ntc);
 
 		/* status_error_len will always be zero for unused descriptors
 		 * because it's cleared in cleanup, and overlaps with hdr_addr
@@ -1524,97 +1255,128 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget)
 		 * hardware wrote DD then the length will be non-zero
 		 */
 		qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
+		if (!iavf_test_staterr(qword, IAVF_RX_DESC_STATUS_DD_SHIFT))
+			break;
 
 		/* This memory barrier is needed to keep us from reading
 		 * any other fields out of the rx_desc until we have
 		 * verified the descriptor has been written back.
 		 */
 		dma_rmb();
-#define IAVF_RXD_DD BIT(IAVF_RX_DESC_STATUS_DD_SHIFT)
-		if (!iavf_test_staterr(rx_desc, IAVF_RXD_DD))
-			break;
 
 		size = (qword & IAVF_RXD_QW1_LENGTH_PBUF_MASK) >>
 		       IAVF_RXD_QW1_LENGTH_PBUF_SHIFT;
 
 		iavf_trace(clean_rx_irq, rx_ring, rx_desc, skb);
-		rx_buffer = iavf_get_rx_buffer(rx_ring, size);
 
+		page = rx_ring->rx_pages[ntc];
+		rx_ring->rx_pages[ntc] = NULL;
+
+		/* Very rare, but possible case. The most common reason:
+		 * the last fragment contained FCS only, which was then
+		 * stripped by the HW.
+		 */
+		if (unlikely(!size)) {
+			page_pool_recycle_direct(pool, page);
+			goto no_skb;
+		}
+
+		page_pool_dma_sync_for_cpu(pool, page, size);
+		put_size = size;
+
+		xdp_prepare_buff(&xdp, page_address(page), hr, size, true);
+		if (!xdp_prog)
+			goto construct_skb;
+
+		xdp_act = iavf_run_xdp(rx_ring, &xdp, xdp_prog, xdp_ring,
+				       &rxq_xdp_act);
+		put_size = max_t(u32, xdp.data_end - xdp.data_hard_start - hr,
+				 put_size);
+
+		if (xdp_act == XDP_PASS)
+			goto construct_skb;
+		else if (xdp_act == XDP_DROP)
+			page_pool_put_page(pool, page, put_size, true);
+
+		stats.bytes += size;
+		stats.packets++;
+
+		skb = NULL;
+		goto no_skb;
+
+construct_skb:
 		/* retrieve a buffer from the ring */
 		if (skb)
-			iavf_add_rx_frag(rx_ring, rx_buffer, skb, size);
-		else if (ring_uses_build_skb(rx_ring))
-			skb = iavf_build_skb(rx_ring, rx_buffer, size);
+			iavf_add_rx_frag(skb, page, hr, size);
 		else
-			skb = iavf_construct_skb(rx_ring, rx_buffer, size);
+			skb = iavf_build_skb(&xdp);
 
 		/* exit if we failed to retrieve a buffer */
 		if (!skb) {
-			rx_ring->rx_stats.alloc_buff_failed++;
-			if (rx_buffer && size)
-				rx_buffer->pagecnt_bias++;
+			page_pool_put_page(pool, page, put_size, true);
+			libie_stats_inc_one(&rx_ring->rq_stats,
+					    build_skb_fail);
 			break;
 		}
 
-		iavf_put_rx_buffer(rx_ring, rx_buffer);
+no_skb:
 		cleaned_count++;
+		to_refill++;
+		if (unlikely(++ntc == ring_size))
+			ntc = 0;
 
-		if (iavf_is_non_eop(rx_ring, rx_desc, skb))
+		if (iavf_is_non_eop(qword, &stats) || !skb)
 			continue;
 
+		prefetch(rx_desc);
+
 		/* ERR_MASK will only have valid bits if EOP set, and
 		 * what we are doing here is actually checking
 		 * IAVF_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in
 		 * the error field
 		 */
-		if (unlikely(iavf_test_staterr(rx_desc, BIT(IAVF_RXD_QW1_ERROR_SHIFT)))) {
-			dev_kfree_skb_any(skb);
-			skb = NULL;
-			continue;
-		}
-
-		if (iavf_cleanup_headers(rx_ring, skb)) {
+		if (unlikely(iavf_test_staterr(qword,
+					       IAVF_RXD_QW1_ERROR_SHIFT))) {
+			dev_kfree_skb(skb);
 			skb = NULL;
 			continue;
 		}
 
 		/* probably a little skewed due to removing CRC */
-		total_rx_bytes += skb->len;
-
-		qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
-		rx_ptype = (qword & IAVF_RXD_QW1_PTYPE_MASK) >>
-			   IAVF_RXD_QW1_PTYPE_SHIFT;
+		stats.bytes += skb->len;
 
 		/* populate checksum, VLAN, and protocol */
-		iavf_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
-
-		if (qword & BIT(IAVF_RX_DESC_STATUS_L2TAG1P_SHIFT) &&
-		    rx_ring->flags & IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1)
-			vlan_tag = le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1);
-		if (rx_desc->wb.qword2.ext_status &
-		    cpu_to_le16(BIT(IAVF_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT)) &&
-		    rx_ring->flags & IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2)
-			vlan_tag = le16_to_cpu(rx_desc->wb.qword2.l2tag2_2);
+		iavf_process_skb_fields(rx_ring, rx_desc, skb, qword);
 
 		iavf_trace(clean_rx_irq_rx, rx_ring, rx_desc, skb);
-		iavf_receive_skb(rx_ring, skb, vlan_tag);
+		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+		napi_gro_receive(&rx_ring->q_vector->napi, skb);
 		skb = NULL;
 
 		/* update budget accounting */
-		total_rx_packets++;
+		stats.packets++;
 	}
 
+	rx_ring->next_to_clean = ntc;
 	rx_ring->skb = skb;
 
-	u64_stats_update_begin(&rx_ring->syncp);
-	rx_ring->stats.packets += total_rx_packets;
-	rx_ring->stats.bytes += total_rx_bytes;
-	u64_stats_update_end(&rx_ring->syncp);
-	rx_ring->q_vector->rx.total_packets += total_rx_packets;
-	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
+	iavf_finalize_xdp_rx(xdp_ring, rxq_xdp_act, cached_ntu);
+
+	if (to_refill >= IAVF_RX_BUFFER_WRITE) {
+		to_refill = __iavf_alloc_rx_pages(rx_ring, to_refill, gfp);
+		/* guarantee a trip back through this routine if there was
+		 * a failure
+		 */
+		if (unlikely(to_refill)) {
+			libie_stats_inc_one(&rx_ring->rq_stats,
+					    alloc_page_fail);
+			cleaned_count = budget;
+		}
+	}
+
+	iavf_update_rx_ring_stats(rx_ring, &stats);
 
-	/* guarantee a trip back through this routine if there was a failure */
-	return failure ? budget : (int)total_rx_packets;
+	return cleaned_count;
 }
 
 static inline u32 iavf_buildreg_itr(const int type, u16 itr)
@@ -1743,12 +1505,21 @@ int iavf_napi_poll(struct napi_struct *napi, int budget)
 	 * budget and be more aggressive about cleaning up the Tx descriptors.
 	 */
 	iavf_for_each_ring(ring, q_vector->tx) {
-		if (!iavf_clean_tx_irq(vsi, ring, budget)) {
+		bool wd;
+
+		if (ring->flags & IAVF_TXRX_FLAGS_XSK)
+			wd = iavf_xmit_zc(ring);
+		else if (ring->flags & IAVF_TXRX_FLAGS_XDP)
+			wd = true;
+		else
+			wd = iavf_clean_tx_irq(vsi, ring, budget);
+
+		if (!wd) {
 			clean_complete = false;
 			continue;
 		}
-		arm_wb |= ring->arm_wb;
-		ring->arm_wb = false;
+		arm_wb |= !!(ring->flags & IAVF_TXRX_FLAGS_ARM_WB);
+		ring->flags &= ~IAVF_TXRX_FLAGS_ARM_WB;
 	}
 
 	/* Handle case where we are called by netpoll with a budget of 0 */
@@ -1760,8 +1531,12 @@ int iavf_napi_poll(struct napi_struct *napi, int budget)
 	 */
 	budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
 
+	rcu_read_lock();
+
 	iavf_for_each_ring(ring, q_vector->rx) {
-		int cleaned = iavf_clean_rx_irq(ring, budget_per_ring);
+		int cleaned = !!(ring->flags & IAVF_TXRX_FLAGS_XSK) ?
+			      iavf_clean_rx_irq_zc(ring, budget_per_ring) :
+			      iavf_clean_rx_irq(ring, budget_per_ring);
 
 		work_done += cleaned;
 		/* if we clean as many as budgeted, we must not be done */
@@ -1769,6 +1544,8 @@ int iavf_napi_poll(struct napi_struct *napi, int budget)
 			clean_complete = false;
 	}
 
+	rcu_read_unlock();
+
 	/* If work not completed, return budget and polling will return */
 	if (!clean_complete) {
 		int cpu_id = smp_processor_id();
@@ -1791,10 +1568,8 @@ int iavf_napi_poll(struct napi_struct *napi, int budget)
 			return budget - 1;
 		}
 tx_only:
-		if (arm_wb) {
-			q_vector->tx.ring[0].tx_stats.tx_force_wb++;
+		if (arm_wb)
 			iavf_enable_wb_on_itr(vsi, q_vector);
-		}
 		return budget;
 	}
 
@@ -2253,6 +2028,7 @@ bool __iavf_chk_linearize(struct sk_buff *skb)
 int __iavf_maybe_stop_tx(struct iavf_ring *tx_ring, int size)
 {
 	netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
+	libie_stats_inc_one(&tx_ring->sq_stats, stops);
 	/* Memory barrier before checking head and tail */
 	smp_mb();
 
@@ -2262,7 +2038,8 @@ int __iavf_maybe_stop_tx(struct iavf_ring *tx_ring, int size)
 
 	/* A reprieve! - use start_queue because it doesn't call schedule */
 	netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
-	++tx_ring->tx_stats.restart_queue;
+	libie_stats_inc_one(&tx_ring->sq_stats, restarts);
+
 	return 0;
 }
 
@@ -2318,8 +2095,8 @@ static inline void iavf_tx_map(struct iavf_ring *tx_ring, struct sk_buff *skb,
 
 		while (unlikely(size > IAVF_MAX_DATA_PER_TXD)) {
 			tx_desc->cmd_type_offset_bsz =
-				build_ctob(td_cmd, td_offset,
-					   max_data, td_tag);
+				iavf_build_ctob(td_cmd, td_offset,
+						max_data, td_tag);
 
 			tx_desc++;
 			i++;
@@ -2339,8 +2116,9 @@ static inline void iavf_tx_map(struct iavf_ring *tx_ring, struct sk_buff *skb,
 		if (likely(!data_len))
 			break;
 
-		tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
-							  size, td_tag);
+		tx_desc->cmd_type_offset_bsz = iavf_build_ctob(td_cmd,
+							       td_offset,
+							       size, td_tag);
 
 		tx_desc++;
 		i++;
@@ -2372,7 +2150,7 @@ static inline void iavf_tx_map(struct iavf_ring *tx_ring, struct sk_buff *skb,
 	/* write last descriptor with RS and EOP bits */
 	td_cmd |= IAVF_TXD_CMD;
 	tx_desc->cmd_type_offset_bsz =
-			build_ctob(td_cmd, td_offset, size, td_tag);
+			iavf_build_ctob(td_cmd, td_offset, size, td_tag);
 
 	skb_tx_timestamp(skb);
 
@@ -2443,7 +2221,7 @@ static netdev_tx_t iavf_xmit_frame_ring(struct sk_buff *skb,
 			return NETDEV_TX_OK;
 		}
 		count = iavf_txd_use_count(skb->len);
-		tx_ring->tx_stats.tx_linearize++;
+		libie_stats_inc_one(&tx_ring->sq_stats, linearized);
 	}
 
 	/* need: 1 descriptor per page * PAGE_SIZE/IAVF_MAX_DATA_PER_TXD,
@@ -2453,7 +2231,7 @@ static netdev_tx_t iavf_xmit_frame_ring(struct sk_buff *skb,
 	 * otherwise try next time
 	 */
 	if (iavf_maybe_stop_tx(tx_ring, count + 4 + 1)) {
-		tx_ring->tx_stats.tx_busy++;
+		libie_stats_inc_one(&tx_ring->sq_stats, busy);
 		return NETDEV_TX_BUSY;
 	}
 
@@ -2536,3 +2314,200 @@ netdev_tx_t iavf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 
 	return iavf_xmit_frame_ring(skb, tx_ring);
 }
+
+/**
+ * iavf_clean_xdp_irq - Reclaim a batch of TX resources from completed XDP_TX
+ * @xdp_ring: XDP Tx ring
+ *
+ * Returns number of cleaned descriptors.
+ */
+static u32 iavf_clean_xdp_irq(struct iavf_ring *xdp_ring)
+{
+	struct libie_sq_onstack_stats stats = { };
+	struct iavf_tx_desc *last_rs_desc;
+	u32 ntc = xdp_ring->next_to_clean;
+	u32 cnt = xdp_ring->count;
+	u16 done_frames = 0;
+	u16 rs_idx;
+	u32 i;
+
+	/* Last RS index is invalid in xsk frames */
+	if (!xdp_ring->tx_bi[ntc].page)
+		return 0;
+
+	rs_idx = xdp_ring->tx_bi[ntc].rs_desc_idx;
+	last_rs_desc = IAVF_TX_DESC(xdp_ring, rs_idx);
+	if (last_rs_desc->cmd_type_offset_bsz &
+	    cpu_to_le64(IAVF_TX_DESC_DTYPE_DESC_DONE)) {
+		done_frames = rs_idx >= ntc ? rs_idx - ntc + 1 :
+					      rs_idx + cnt - ntc + 1;
+		last_rs_desc->cmd_type_offset_bsz = 0;
+	}
+
+	for (i = 0; i < done_frames; i++) {
+		struct iavf_tx_buffer *tx_buf = &xdp_ring->tx_bi[ntc];
+
+		stats.bytes += tx_buf->bytecount;
+		/* normally tx_buf->gso_segs was taken but at this point
+		 * it's always 1 for us
+		 */
+		stats.packets++;
+
+		iavf_free_xdp_resource(xdp_ring, tx_buf);
+
+		ntc++;
+		if (ntc >= xdp_ring->count)
+			ntc = 0;
+	}
+
+	xdp_ring->next_to_clean = ntc;
+	iavf_update_tx_ring_stats(xdp_ring, &stats);
+
+	return i;
+}
+
+/**
+ * iavf_xmit_xdp_buff - submit single buffer to XDP ring for transmission
+ * @xdp: XDP buffer pointer
+ * @xdp_ring: XDP ring for transmission
+ * @frame: whether the function is called from .ndo_xdp_xmit()
+ *
+ * Returns negative on failure, 0 on success.
+ */
+static int iavf_xmit_xdp_buff(const struct xdp_buff *xdp,
+			      struct iavf_ring *xdp_ring,
+			      bool frame)
+{
+	u32 batch_sz = IAVF_RING_QUARTER(xdp_ring);
+	u32 size = xdp->data_end - xdp->data;
+	u32 ntu = xdp_ring->next_to_use;
+	struct iavf_tx_buffer *tx_buff;
+	struct iavf_tx_desc *tx_desc;
+	void *data = xdp->data;
+	dma_addr_t dma;
+	u32 free;
+
+	free = IAVF_DESC_UNUSED(xdp_ring);
+	if (unlikely(free < batch_sz))
+		free += iavf_clean_xdp_irq(xdp_ring);
+	if (unlikely(!free)) {
+		libie_stats_inc_one(&xdp_ring->sq_stats, busy);
+		return -EBUSY;
+	}
+
+	if (frame) {
+		dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE);
+		if (dma_mapping_error(xdp_ring->dev, dma))
+			return -ENOMEM;
+	} else {
+		struct page *page = virt_to_page(data);
+		u32 hr = data - xdp->data_hard_start;
+
+		dma = page_pool_get_dma_addr(page) + hr;
+		dma_sync_single_for_device(xdp_ring->dev, dma, size,
+					   DMA_BIDIRECTIONAL);
+	}
+
+	tx_buff = &xdp_ring->tx_bi[ntu];
+	tx_buff->bytecount = size;
+	tx_buff->gso_segs = 1;
+	if (frame) {
+		tx_buff->xdp_type = IAVF_XDP_BUFFER_FRAME;
+		tx_buff->xdpf = xdp->data_hard_start;
+	} else {
+		tx_buff->xdp_type = IAVF_XDP_BUFFER_TX;
+		tx_buff->page = virt_to_page(data);
+	}
+
+	/* record length, and DMA address */
+	dma_unmap_len_set(tx_buff, len, size);
+	dma_unmap_addr_set(tx_buff, dma, dma);
+
+	tx_desc = IAVF_TX_DESC(xdp_ring, ntu);
+	tx_desc->buffer_addr = cpu_to_le64(dma);
+	tx_desc->cmd_type_offset_bsz = iavf_build_ctob(IAVF_TX_DESC_CMD_EOP, 0,
+						       size, 0);
+
+	xdp_ring->xdp_tx_active++;
+	ntu++;
+
+	if (ntu == xdp_ring->count)
+		ntu = 0;
+
+	xdp_ring->next_to_use = ntu;
+
+	return 0;
+}
+
+static bool iavf_xdp_xmit_back(const struct xdp_buff *buff,
+			       struct iavf_ring *xdp_ring)
+{
+	bool ret;
+
+	if (static_branch_unlikely(&iavf_xdp_locking_key))
+		spin_lock(&xdp_ring->tx_lock);
+
+	/* TODO: improve XDP_TX by batching */
+	ret = !iavf_xmit_xdp_buff(buff, xdp_ring, false);
+
+	if (static_branch_unlikely(&iavf_xdp_locking_key))
+		spin_unlock(&xdp_ring->tx_lock);
+
+	return ret;
+}
+
+int iavf_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+		  u32 flags)
+{
+	struct iavf_adapter *adapter = netdev_priv(dev);
+	struct iavf_tx_buffer *tx_buf;
+	struct iavf_ring *xdp_ring;
+	u32 queue_index, nxmit = 0;
+	int err = 0;
+
+	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+		return -EINVAL;
+
+	if (unlikely(adapter->state == __IAVF_DOWN))
+		return -ENETDOWN;
+
+	if (!iavf_adapter_xdp_active(adapter))
+		return -ENXIO;
+
+	queue_index = smp_processor_id();
+	if (static_branch_unlikely(&iavf_xdp_locking_key))
+		queue_index %= adapter->num_xdp_tx_queues;
+	if (queue_index >= adapter->num_active_queues)
+		return -ENXIO;
+
+	xdp_ring = &adapter->xdp_rings[queue_index];
+
+	if (static_branch_unlikely(&iavf_xdp_locking_key))
+		spin_lock(&xdp_ring->tx_lock);
+
+	tx_buf = &xdp_ring->tx_bi[xdp_ring->next_to_use];
+	for (u32 i = 0; i < n; i++) {
+		struct xdp_frame *xdpf = frames[i];
+		struct xdp_buff xdp;
+
+		xdp_convert_frame_to_buff(xdpf, &xdp);
+		err = iavf_xmit_xdp_buff(&xdp, xdp_ring, true);
+		if (unlikely(err)) {
+			netdev_err(dev, "XDP frame TX failed, error: %d\n",
+				   err);
+			break;
+		}
+
+		nxmit++;
+	}
+
+	if (likely(nxmit))
+		tx_buf->rs_desc_idx = iavf_set_rs_bit(xdp_ring);
+	if (flags & XDP_XMIT_FLUSH)
+		iavf_xdp_ring_update_tail(xdp_ring);
+
+	if (static_branch_unlikely(&iavf_xdp_locking_key))
+		spin_unlock(&xdp_ring->tx_lock);
+
+	return nxmit;
+}
diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.h b/drivers/net/ethernet/intel/iavf/iavf_txrx.h
index 2624bf6d009e36..775eda86f05b54 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.h
@@ -4,6 +4,8 @@
 #ifndef _IAVF_TXRX_H_
 #define _IAVF_TXRX_H_
 
+#include <linux/net/intel/libie/stats.h>
+
 /* Interrupt Throttling and Rate Limiting Goodies */
 #define IAVF_DEFAULT_IRQ_WORK      256
 
@@ -81,94 +83,21 @@ enum iavf_dyn_idx_t {
 	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | \
 	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP))
 
-/* Supported Rx Buffer Sizes (a multiple of 128) */
-#define IAVF_RXBUFFER_256   256
-#define IAVF_RXBUFFER_1536  1536  /* 128B aligned standard Ethernet frame */
-#define IAVF_RXBUFFER_2048  2048
-#define IAVF_RXBUFFER_3072  3072  /* Used for large frames w/ padding */
-#define IAVF_MAX_RXBUFFER   9728  /* largest size for single descriptor */
-
-/* NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN means we
- * reserve 2 more, and skb_shared_info adds an additional 384 bytes more,
- * this adds up to 512 bytes of extra data meaning the smallest allocation
- * we could have is 1K.
- * i.e. RXBUFFER_256 --> 960 byte skb (size-1024 slab)
- * i.e. RXBUFFER_512 --> 1216 byte skb (size-2048 slab)
- */
-#define IAVF_RX_HDR_SIZE IAVF_RXBUFFER_256
-#define IAVF_PACKET_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2))
 #define iavf_rx_desc iavf_32byte_rx_desc
 
-#define IAVF_RX_DMA_ATTR \
-	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
-
-/* Attempt to maximize the headroom available for incoming frames.  We
- * use a 2K buffer for receives and need 1536/1534 to store the data for
- * the frame.  This leaves us with 512 bytes of room.  From that we need
- * to deduct the space needed for the shared info and the padding needed
- * to IP align the frame.
- *
- * Note: For cache line sizes 256 or larger this value is going to end
- *	 up negative.  In these cases we should fall back to the legacy
- *	 receive path.
- */
-#if (PAGE_SIZE < 8192)
-#define IAVF_2K_TOO_SMALL_WITH_PADDING \
-((NET_SKB_PAD + IAVF_RXBUFFER_1536) > SKB_WITH_OVERHEAD(IAVF_RXBUFFER_2048))
-
-static inline int iavf_compute_pad(int rx_buf_len)
-{
-	int page_size, pad_size;
-
-	page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
-	pad_size = SKB_WITH_OVERHEAD(page_size) - rx_buf_len;
-
-	return pad_size;
-}
-
-static inline int iavf_skb_pad(void)
-{
-	int rx_buf_len;
-
-	/* If a 2K buffer cannot handle a standard Ethernet frame then
-	 * optimize padding for a 3K buffer instead of a 1.5K buffer.
-	 *
-	 * For a 3K buffer we need to add enough padding to allow for
-	 * tailroom due to NET_IP_ALIGN possibly shifting us out of
-	 * cache-line alignment.
-	 */
-	if (IAVF_2K_TOO_SMALL_WITH_PADDING)
-		rx_buf_len = IAVF_RXBUFFER_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN);
-	else
-		rx_buf_len = IAVF_RXBUFFER_1536;
-
-	/* if needed make room for NET_IP_ALIGN */
-	rx_buf_len -= NET_IP_ALIGN;
-
-	return iavf_compute_pad(rx_buf_len);
-}
-
-#define IAVF_SKB_PAD iavf_skb_pad()
-#else
-#define IAVF_2K_TOO_SMALL_WITH_PADDING false
-#define IAVF_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
-#endif
-
 /**
  * iavf_test_staterr - tests bits in Rx descriptor status and error fields
- * @rx_desc: pointer to receive descriptor (in le64 format)
- * @stat_err_bits: value to mask
+ * @qword: `wb.qword1.status_error_len` from the descriptor
+ * @stat_err: bit number to mask
  *
  * This function does some fast chicanery in order to return the
  * value of the mask which is really only used for boolean tests.
  * The status_error_len doesn't need to be shifted because it begins
  * at offset zero.
  */
-static inline bool iavf_test_staterr(union iavf_rx_desc *rx_desc,
-				     const u64 stat_err_bits)
+static inline bool iavf_test_staterr(u64 qword, const u64 stat_err)
 {
-	return !!(rx_desc->wb.qword1.status_error_len &
-		  cpu_to_le64(stat_err_bits));
+	return !!(qword & BIT_ULL(stat_err));
 }
 
 /* How many Rx Buffers do we bundle into one write to the hardware ? */
@@ -250,7 +179,7 @@ static inline unsigned int iavf_txd_use_count(unsigned int size)
 #define IAVF_TX_FLAGS_IPV6			BIT(5)
 #define IAVF_TX_FLAGS_FCCRC			BIT(6)
 #define IAVF_TX_FLAGS_FSO			BIT(7)
-#define IAVF_TX_FLAGS_FD_SB			BIT(9)
+/* BIT(9) is free, was IAVF_TX_FLAGS_FD_SB */
 #define IAVF_TX_FLAGS_VXLAN_TUNNEL		BIT(10)
 #define IAVF_TX_FLAGS_HW_OUTER_SINGLE_VLAN	BIT(11)
 #define IAVF_TX_FLAGS_VLAN_MASK			0xffff0000
@@ -258,60 +187,40 @@ static inline unsigned int iavf_txd_use_count(unsigned int size)
 #define IAVF_TX_FLAGS_VLAN_PRIO_SHIFT		29
 #define IAVF_TX_FLAGS_VLAN_SHIFT		16
 
+/**
+ * enum iavf_xdp_buffer_type - type of &iavf_tx_buffer on XDP queue
+ * @IAVF_XDP_BUFFER_NONE: unused, no action required
+ * @IAVF_XDP_BUFFER_TX: free according to our memory model
+ * @IAVF_XDP_BUFFER_FRAME: use xdp_return_frame()
+ */
+enum iavf_xdp_buffer_type {
+	IAVF_XDP_BUFFER_NONE	= 0U,
+	IAVF_XDP_BUFFER_TX,
+	IAVF_XDP_BUFFER_FRAME,
+};
+
 struct iavf_tx_buffer {
-	struct iavf_tx_desc *next_to_watch;
+
+	/* Track the last frame in batch/packet */
+	union {
+		struct iavf_tx_desc *next_to_watch;	/* on skb TX queue */
+		u16 rs_desc_idx;			/* on XDP queue */
+	};
 	union {
-		struct sk_buff *skb;
-		void *raw_buf;
+		struct sk_buff *skb;		/* used for .ndo_start_xmit() */
+		struct page *page;		/* used for XDP_TX */
+		struct xdp_frame *xdpf;		/* used for .ndo_xdp_xmit() */
+		struct xdp_buff *xdp;		/* used for XDP_TX in ZC mode */
 	};
 	unsigned int bytecount;
 	unsigned short gso_segs;
+	unsigned short xdp_type;
 
 	DEFINE_DMA_UNMAP_ADDR(dma);
 	DEFINE_DMA_UNMAP_LEN(len);
 	u32 tx_flags;
 };
 
-struct iavf_rx_buffer {
-	dma_addr_t dma;
-	struct page *page;
-#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
-	__u32 page_offset;
-#else
-	__u16 page_offset;
-#endif
-	__u16 pagecnt_bias;
-};
-
-struct iavf_queue_stats {
-	u64 packets;
-	u64 bytes;
-};
-
-struct iavf_tx_queue_stats {
-	u64 restart_queue;
-	u64 tx_busy;
-	u64 tx_done_old;
-	u64 tx_linearize;
-	u64 tx_force_wb;
-	int prev_pkt_ctr;
-	u64 tx_lost_interrupt;
-};
-
-struct iavf_rx_queue_stats {
-	u64 non_eop_descs;
-	u64 alloc_page_failed;
-	u64 alloc_buff_failed;
-	u64 page_reuse_count;
-	u64 realloc_count;
-};
-
-enum iavf_ring_state_t {
-	__IAVF_TX_FDIR_INIT_DONE,
-	__IAVF_TX_XPS_INIT_DONE,
-	__IAVF_RING_STATE_NBITS /* must be last */
-};
-
 /* some useful defines for virtchannel interface, which
  * is the only remaining user of header split
  */
@@ -327,16 +236,20 @@ enum iavf_ring_state_t {
 struct iavf_ring {
 	struct iavf_ring *next;		/* pointer to next ring in q_vector */
 	void *desc;			/* Descriptor ring memory */
-	struct device *dev;		/* Used for DMA mapping */
+	union {
+		struct xsk_buff_pool *xsk_pool; /* Used on XSk queue pairs */
+		struct page_pool *pool;	/* Used for Rx page management */
+		struct device *dev;	/* Used for DMA mapping on Tx */
+	};
 	struct net_device *netdev;	/* netdev ring maps to */
 	union {
 		struct iavf_tx_buffer *tx_bi;
-		struct iavf_rx_buffer *rx_bi;
+		struct xdp_buff **xdp_buff;
+		struct page **rx_pages;
 	};
-	DECLARE_BITMAP(state, __IAVF_RING_STATE_NBITS);
+	u8 __iomem *tail;
 	u16 queue_index;		/* Queue number of ring */
 	u8 dcb_tc;			/* Traffic class of ring */
-	u8 __iomem *tail;
 
 	/* high bit set means dynamic, use accessors routines to read/write.
 	 * hardware only supports 2us resolution for the ITR registers.
@@ -345,45 +258,29 @@ struct iavf_ring {
 	 */
 	u16 itr_setting;
 
-	u16 count;			/* Number of descriptors */
 	u16 reg_idx;			/* HW register index of the ring */
-	u16 rx_buf_len;
+	u16 count;			/* Number of descriptors */
 
 	/* used in interrupt processing */
 	u16 next_to_use;
 	u16 next_to_clean;
 
-	u8 atr_sample_rate;
-	u8 atr_count;
-
-	bool ring_active;		/* is ring online or not */
-	bool arm_wb;		/* do something to arm write back */
-	u8 packet_stride;
-
 	u16 flags;
 #define IAVF_TXR_FLAGS_WB_ON_ITR		BIT(0)
-#define IAVF_RXR_FLAGS_BUILD_SKB_ENABLED	BIT(1)
+#define IAVF_TXRX_FLAGS_ARM_WB			BIT(1)
+#define IAVF_TXRX_FLAGS_XDP			BIT(2)
 #define IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1	BIT(3)
 #define IAVF_TXR_FLAGS_VLAN_TAG_LOC_L2TAG2	BIT(4)
 #define IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2	BIT(5)
+#define IAVF_TXRX_FLAGS_XSK			BIT(6)
 
-	/* stats structs */
-	struct iavf_queue_stats	stats;
-	struct u64_stats_sync syncp;
 	union {
-		struct iavf_tx_queue_stats tx_stats;
-		struct iavf_rx_queue_stats rx_stats;
+		struct bpf_prog __rcu *xdp_prog;
+		u32 xdp_tx_active;		/* TODO: comment */
 	};
-
-	unsigned int size;		/* length of descriptor ring in bytes */
-	dma_addr_t dma;			/* physical address of ring */
-
-	struct iavf_vsi *vsi;		/* Backreference to associated VSI */
-	struct iavf_q_vector *q_vector;	/* Backreference to associated vector */
-
-	struct rcu_head rcu;		/* to avoid race on free */
-	u16 next_to_alloc;
-	struct sk_buff *skb;		/* When iavf_clean_rx_ring_irq() must
+	struct iavf_ring *xdp_ring;
+	union {
+		struct sk_buff *skb;	/* When iavf_clean_rx_ring_irq() must
 					 * return before it sees the EOP for
 					 * the current packet, we save that skb
 					 * here and resume receiving this
@@ -391,22 +288,31 @@ struct iavf_ring {
 					 * iavf_clean_rx_ring_irq() is called
 					 * for this ring.
 					 */
-} ____cacheline_internodealigned_in_smp;
+		spinlock_t tx_lock;	/* Protect XDP TX ring, when shared */
+	};
 
-static inline bool ring_uses_build_skb(struct iavf_ring *ring)
-{
-	return !!(ring->flags & IAVF_RXR_FLAGS_BUILD_SKB_ENABLED);
-}
+	/* stats structs */
+	union {
+		struct libie_sq_stats sq_stats;
+		struct libie_rq_stats rq_stats;
+	};
 
-static inline void set_ring_build_skb_enabled(struct iavf_ring *ring)
-{
-	ring->flags |= IAVF_RXR_FLAGS_BUILD_SKB_ENABLED;
-}
+	struct iavf_vsi *vsi;		/* Backreference to associated VSI */
+	struct iavf_q_vector *q_vector;	/* Backreference to associated vector */
 
-static inline void clear_ring_build_skb_enabled(struct iavf_ring *ring)
-{
-	ring->flags &= ~IAVF_RXR_FLAGS_BUILD_SKB_ENABLED;
-}
+	int prev_pkt_ctr;		/* For stall detection */
+	unsigned int size;		/* length of descriptor ring in bytes */
+	dma_addr_t dma;			/* physical address of ring */
+
+	struct rcu_head rcu;		/* to avoid race on free */
+	struct xdp_rxq_info xdp_rxq;
+} ____cacheline_internodealigned_in_smp;
+
+#define IAVF_RING_QUARTER(R)		((R)->count >> 2)
+#define IAVF_RX_DESC(R, i) (&(((union iavf_32byte_rx_desc *)((R)->desc))[i]))
+#define IAVF_TX_DESC(R, i) (&(((struct iavf_tx_desc *)((R)->desc))[i]))
+#define IAVF_TX_CTXTDESC(R, i) \
+	(&(((struct iavf_tx_context_desc *)((R)->desc))[i]))
 
 #define IAVF_ITR_ADAPTIVE_MIN_INC	0x0002
 #define IAVF_ITR_ADAPTIVE_MIN_USECS	0x0002
@@ -429,18 +335,7 @@ struct iavf_ring_container {
 #define iavf_for_each_ring(pos, head) \
 	for (pos = (head).ring; pos != NULL; pos = pos->next)
 
-static inline unsigned int iavf_rx_pg_order(struct iavf_ring *ring)
-{
-#if (PAGE_SIZE < 8192)
-	if (ring->rx_buf_len > (PAGE_SIZE / 2))
-		return 1;
-#endif
-	return 0;
-}
-
-#define iavf_rx_pg_size(_ring) (PAGE_SIZE << iavf_rx_pg_order(_ring))
-
-bool iavf_alloc_rx_buffers(struct iavf_ring *rxr, u16 cleaned_count);
+void iavf_alloc_rx_pages(struct iavf_ring *rxr);
 netdev_tx_t iavf_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
 void iavf_clean_tx_ring(struct iavf_ring *tx_ring);
 void iavf_clean_rx_ring(struct iavf_ring *rx_ring);
@@ -455,6 +350,24 @@ void iavf_detect_recover_hung(struct iavf_vsi *vsi);
 int __iavf_maybe_stop_tx(struct iavf_ring *tx_ring, int size);
 bool __iavf_chk_linearize(struct sk_buff *skb);
 
+DECLARE_STATIC_KEY_FALSE(iavf_xdp_locking_key);
+
+void iavf_process_skb_fields(const struct iavf_ring *rx_ring,
+			     const union iavf_rx_desc *rx_desc,
+			     struct sk_buff *skb, u64 qword);
+int iavf_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+		  u32 flags);
+
+static inline __le64 iavf_build_ctob(u32 td_cmd, u32 td_offset,
+				     unsigned int size, u32 td_tag)
+{
+	return cpu_to_le64(IAVF_TX_DESC_DTYPE_DATA |
+			   ((u64)td_cmd  << IAVF_TXD_QW1_CMD_SHIFT) |
+			   ((u64)td_offset << IAVF_TXD_QW1_OFFSET_SHIFT) |
+			   ((u64)size  << IAVF_TXD_QW1_TX_BUF_SZ_SHIFT) |
+			   ((u64)td_tag  << IAVF_TXD_QW1_L2TAG1_SHIFT));
+}
+
 /**
  * iavf_xmit_descriptor_count - calculate number of Tx descriptors needed
  * @skb:     send buffer
@@ -462,7 +375,7 @@ bool __iavf_chk_linearize(struct sk_buff *skb);
  * Returns number of data descriptors needed for this skb. Returns 0 to indicate
  * there is not enough descriptors available in this ring since we need at least
  * one descriptor.
- **/
+ */
 static inline int iavf_xmit_descriptor_count(struct sk_buff *skb)
 {
 	const skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
@@ -487,7 +400,7 @@ static inline int iavf_xmit_descriptor_count(struct sk_buff *skb)
  * @size:    the size buffer we want to assure is available
  *
  * Returns 0 if stop is not needed
- **/
+ */
 static inline int iavf_maybe_stop_tx(struct iavf_ring *tx_ring, int size)
 {
 	if (likely(IAVF_DESC_UNUSED(tx_ring) >= size))
@@ -503,7 +416,7 @@ static inline int iavf_maybe_stop_tx(struct iavf_ring *tx_ring, int size)
  * Note: Our HW can't scatter-gather more than 8 fragments to build
  * a packet on the wire and so we need to figure out the cases where we
  * need to linearize the skb.
- **/
+ */
 static inline bool iavf_chk_linearize(struct sk_buff *skb, int count)
 {
 	/* Both TSO and single send will work if count is less than 8 */
@@ -519,9 +432,134 @@ static inline bool iavf_chk_linearize(struct sk_buff *skb, int count)
 /**
  * txring_txq - helper to convert from a ring to a queue
  * @ring: Tx ring to find the netdev equivalent of
- **/
+ */
 static inline struct netdev_queue *txring_txq(const struct iavf_ring *ring)
 {
 	return netdev_get_tx_queue(ring->netdev, ring->queue_index);
 }
+
+/**
+ * iavf_xdp_ring_update_tail - Updates the XDP Tx ring tail register
+ * @xdp_ring: XDP Tx ring
+ *
+ * Notify hardware the new descriptor is ready to be transmitted
+ */
+static inline void iavf_xdp_ring_update_tail(const struct iavf_ring *xdp_ring)
+{
+	/* Force memory writes to complete before letting h/w
+	 * know there are new descriptors to fetch.
+	 */
+	wmb();
+	writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
+}
+
+/**
+ * iavf_update_tx_ring_stats - Update TX ring stats after transmit completes
+ * @tx_ring: TX descriptor ring
+ * @tc: TODO
+ * @total_pkts: Number of packets transmitted since the last update
+ * @total_bytes: Number of bytes transmitted since the last update
+ */
+static inline void
+__iavf_update_tx_ring_stats(struct iavf_ring *tx_ring,
+			    struct iavf_ring_container *tc,
+			    const struct libie_sq_onstack_stats *stats)
+{
+	libie_sq_napi_stats_add(&tx_ring->sq_stats, stats);
+	tc->total_bytes += stats->bytes;
+	tc->total_packets += stats->packets;
+}
+
+#define iavf_update_tx_ring_stats(r, s) \
+	__iavf_update_tx_ring_stats(r, &(r)->q_vector->tx, s)
+
+/**
+ * iavf_update_rx_ring_stats - Update RX ring stats
+ * @rx_ring: ring to bump
+ * @rc: TODO
+ * @rx_bytes: number of bytes processed since last update
+ * @rx_packets: number of packets processed since last update
+ */
+static inline void
+__iavf_update_rx_ring_stats(struct iavf_ring *rx_ring,
+			    struct iavf_ring_container *rc,
+			    const struct libie_rq_onstack_stats *stats)
+{
+	libie_rq_napi_stats_add(&rx_ring->rq_stats, stats);
+	rc->total_packets += stats->packets;
+	rc->total_bytes += stats->bytes;
+}
+
+#define iavf_update_rx_ring_stats(r, s) \
+	__iavf_update_rx_ring_stats(r, &(r)->q_vector->rx, s)
+
+/**
+ * iavf_release_rx_desc - Store the new tail and head values
+ * @rx_ring: ring to bump
+ * @val: new head index
+ */
+static inline void iavf_release_rx_desc(struct iavf_ring *rx_ring, u32 val)
+{
+	rx_ring->next_to_use = val;
+
+	/* Force memory writes to complete before letting h/w
+	 * know there are new descriptors to fetch.  (Only
+	 * applicable for weak-ordered memory model archs,
+	 * such as IA-64).
+	 */
+	wmb();
+	writel(val, rx_ring->tail);
+}
+
+#define IAVF_RXQ_XDP_ACT_FINALIZE_TX	BIT(0)
+#define IAVF_RXQ_XDP_ACT_FINALIZE_REDIR	BIT(1)
+#define IAVF_RXQ_XDP_ACT_STOP_NOW	BIT(2)
+
+/**
+ * iavf_set_rs_bit - set RS bit on last produced descriptor.
+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
+ *
+ * Returns the index of descriptor RS bit was set on (one behind current NTU).
+ */
+static inline u16 iavf_set_rs_bit(struct iavf_ring *xdp_ring)
+{
+	u16 rs_idx = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 :
+					     xdp_ring->count - 1;
+	struct iavf_tx_desc *tx_desc;
+
+	tx_desc = IAVF_TX_DESC(xdp_ring, rs_idx);
+	tx_desc->cmd_type_offset_bsz |=
+		cpu_to_le64(IAVF_TX_DESC_CMD_RS << IAVF_TXD_QW1_CMD_SHIFT);
+
+	return rs_idx;
+}
+
+/**
+ * iavf_finalize_xdp_rx - Finalize XDP actions once per RX ring clean
+ * @xdp_ring: XDP TX queue assigned to a given RX ring
+ * @rxq_xdp_act: Logical OR of flags of XDP actions that require finalization
+ * @first_idx: index of the first frame in the transmitted batch on XDP queue
+ */
+static inline void iavf_finalize_xdp_rx(struct iavf_ring *xdp_ring,
+					u32 rxq_xdp_act, u32 first_idx)
+{
+	if (rxq_xdp_act & IAVF_RXQ_XDP_ACT_FINALIZE_REDIR)
+		xdp_do_flush_map();
+	if (rxq_xdp_act & IAVF_RXQ_XDP_ACT_FINALIZE_TX) {
+		struct iavf_tx_buffer *tx_buf = &xdp_ring->tx_bi[first_idx];
+
+		if (static_branch_unlikely(&iavf_xdp_locking_key))
+			spin_lock(&xdp_ring->tx_lock);
+		tx_buf->rs_desc_idx = iavf_set_rs_bit(xdp_ring);
+		iavf_xdp_ring_update_tail(xdp_ring);
+		if (static_branch_unlikely(&iavf_xdp_locking_key))
+			spin_unlock(&xdp_ring->tx_lock);
+	}
+}
+
+static inline bool iavf_ring_is_xdp(struct iavf_ring *ring)
+{
+	return !!(ring->flags & IAVF_TXRX_FLAGS_XDP);
+}
+
 #endif /* _IAVF_TXRX_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_type.h b/drivers/net/ethernet/intel/iavf/iavf_type.h
index 9f1f523807c4e6..bb90d8f3ad7efe 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_type.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_type.h
@@ -10,8 +10,6 @@
 #include "iavf_adminq.h"
 #include "iavf_devids.h"
 
-#define IAVF_RXQ_CTX_DBUFF_SHIFT 7
-
 /* IAVF_MASK is a macro used on 32 bit registers */
 #define IAVF_MASK(mask, shift) ((u32)(mask) << (shift))
 
@@ -339,94 +337,6 @@ enum iavf_rx_desc_error_l3l4e_fcoe_masks {
 #define IAVF_RXD_QW1_PTYPE_SHIFT	30
 #define IAVF_RXD_QW1_PTYPE_MASK		(0xFFULL << IAVF_RXD_QW1_PTYPE_SHIFT)
 
-/* Packet type non-ip values */
-enum iavf_rx_l2_ptype {
-	IAVF_RX_PTYPE_L2_RESERVED			= 0,
-	IAVF_RX_PTYPE_L2_MAC_PAY2			= 1,
-	IAVF_RX_PTYPE_L2_TIMESYNC_PAY2			= 2,
-	IAVF_RX_PTYPE_L2_FIP_PAY2			= 3,
-	IAVF_RX_PTYPE_L2_OUI_PAY2			= 4,
-	IAVF_RX_PTYPE_L2_MACCNTRL_PAY2			= 5,
-	IAVF_RX_PTYPE_L2_LLDP_PAY2			= 6,
-	IAVF_RX_PTYPE_L2_ECP_PAY2			= 7,
-	IAVF_RX_PTYPE_L2_EVB_PAY2			= 8,
-	IAVF_RX_PTYPE_L2_QCN_PAY2			= 9,
-	IAVF_RX_PTYPE_L2_EAPOL_PAY2			= 10,
-	IAVF_RX_PTYPE_L2_ARP				= 11,
-	IAVF_RX_PTYPE_L2_FCOE_PAY3			= 12,
-	IAVF_RX_PTYPE_L2_FCOE_FCDATA_PAY3		= 13,
-	IAVF_RX_PTYPE_L2_FCOE_FCRDY_PAY3		= 14,
-	IAVF_RX_PTYPE_L2_FCOE_FCRSP_PAY3		= 15,
-	IAVF_RX_PTYPE_L2_FCOE_FCOTHER_PA		= 16,
-	IAVF_RX_PTYPE_L2_FCOE_VFT_PAY3			= 17,
-	IAVF_RX_PTYPE_L2_FCOE_VFT_FCDATA		= 18,
-	IAVF_RX_PTYPE_L2_FCOE_VFT_FCRDY			= 19,
-	IAVF_RX_PTYPE_L2_FCOE_VFT_FCRSP			= 20,
-	IAVF_RX_PTYPE_L2_FCOE_VFT_FCOTHER		= 21,
-	IAVF_RX_PTYPE_GRENAT4_MAC_PAY3			= 58,
-	IAVF_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4	= 87,
-	IAVF_RX_PTYPE_GRENAT6_MAC_PAY3			= 124,
-	IAVF_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4	= 153
-};
-
-struct iavf_rx_ptype_decoded {
-	u32 known:1;
-	u32 outer_ip:1;
-	u32 outer_ip_ver:1;
-	u32 outer_frag:1;
-	u32 tunnel_type:3;
-	u32 tunnel_end_prot:2;
-	u32 tunnel_end_frag:1;
-	u32 inner_prot:4;
-	u32 payload_layer:3;
-};
-
-enum iavf_rx_ptype_outer_ip {
-	IAVF_RX_PTYPE_OUTER_L2	= 0,
-	IAVF_RX_PTYPE_OUTER_IP	= 1
-};
-
-enum iavf_rx_ptype_outer_ip_ver {
-	IAVF_RX_PTYPE_OUTER_NONE	= 0,
-	IAVF_RX_PTYPE_OUTER_IPV4	= 0,
-	IAVF_RX_PTYPE_OUTER_IPV6	= 1
-};
-
-enum iavf_rx_ptype_outer_fragmented {
-	IAVF_RX_PTYPE_NOT_FRAG	= 0,
-	IAVF_RX_PTYPE_FRAG	= 1
-};
-
-enum iavf_rx_ptype_tunnel_type {
-	IAVF_RX_PTYPE_TUNNEL_NONE		= 0,
-	IAVF_RX_PTYPE_TUNNEL_IP_IP		= 1,
-	IAVF_RX_PTYPE_TUNNEL_IP_GRENAT		= 2,
-	IAVF_RX_PTYPE_TUNNEL_IP_GRENAT_MAC	= 3,
-	IAVF_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN	= 4,
-};
-
-enum iavf_rx_ptype_tunnel_end_prot {
-	IAVF_RX_PTYPE_TUNNEL_END_NONE	= 0,
-	IAVF_RX_PTYPE_TUNNEL_END_IPV4	= 1,
-	IAVF_RX_PTYPE_TUNNEL_END_IPV6	= 2,
-};
-
-enum iavf_rx_ptype_inner_prot {
-	IAVF_RX_PTYPE_INNER_PROT_NONE		= 0,
-	IAVF_RX_PTYPE_INNER_PROT_UDP		= 1,
-	IAVF_RX_PTYPE_INNER_PROT_TCP		= 2,
-	IAVF_RX_PTYPE_INNER_PROT_SCTP		= 3,
-	IAVF_RX_PTYPE_INNER_PROT_ICMP		= 4,
-	IAVF_RX_PTYPE_INNER_PROT_TIMESYNC	= 5
-};
-
-enum iavf_rx_ptype_payload_layer {
-	IAVF_RX_PTYPE_PAYLOAD_LAYER_NONE	= 0,
-	IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY2	= 1,
-	IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY3	= 2,
-	IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY4	= 3,
-};
-
 #define IAVF_RXD_QW1_LENGTH_PBUF_SHIFT	38
 #define IAVF_RXD_QW1_LENGTH_PBUF_MASK	(0x3FFFULL << \
 					 IAVF_RXD_QW1_LENGTH_PBUF_SHIFT)
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index 4e17d006c52d46..d23d5097db97ad 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -1,10 +1,14 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright(c) 2013 - 2018 Intel Corporation. */
 
+#include <linux/net/intel/libie/rx.h>
+
 #include "iavf.h"
 #include "iavf_prototype.h"
 #include "iavf_client.h"
 
+#define IAVF_VC_MSG_TIMEOUT_MS		300
+
 /**
  * iavf_send_pf_msg
  * @adapter: adapter structure
@@ -50,6 +54,59 @@ int iavf_send_api_ver(struct iavf_adapter *adapter)
 				sizeof(vvi));
 }
 
+/**
+ * iavf_poll_virtchnl_msg_timeout
+ * @hw: HW configuration structure
+ * @event: event to populate on success
+ * @op_to_poll: requested virtchnl op to poll for
+ * @msecs: timeout in milliseconds
+ *
+ * Initialize poll for virtchnl msg matching the requested_op. Returns 0
+ * if a message of the correct opcode is in the queue or an error code
+ * if no message matching the op code is waiting and other failures
+ * (including timeout). In case of timeout -EBUSY error is returned.
+ */
+static int
+iavf_poll_virtchnl_msg_timeout(struct iavf_hw *hw,
+			       struct iavf_arq_event_info *event,
+			       enum virtchnl_ops op_to_poll,
+			       unsigned int msecs)
+{
+	unsigned int wait, delay = 10;
+	enum virtchnl_ops received_op;
+	enum iavf_status status;
+	u32 v_retval;
+
+	for (wait = 0; wait < msecs; wait += delay) {
+		/* When the AQ is empty, iavf_clean_arq_element will be
+		 * nonzero and after some delay this loop will check again
+		 * if any message is added to the AQ.
+		 */
+		status = iavf_clean_arq_element(hw, event, NULL);
+		if (status == IAVF_ERR_ADMIN_QUEUE_NO_WORK)
+			goto wait_for_msg;
+		else if (status != IAVF_SUCCESS)
+			break;
+		received_op =
+		    (enum virtchnl_ops)le32_to_cpu(event->desc.cookie_high);
+		if (op_to_poll == received_op)
+			break;
+wait_for_msg:
+		msleep(delay);
+		status = IAVF_ERR_NOT_READY;
+	}
+
+	if (status == IAVF_SUCCESS) {
+		v_retval = le32_to_cpu(event->desc.cookie_low);
+		v_retval = virtchnl_status_to_errno((enum virtchnl_status_code)
+						    v_retval);
+	} else {
+		v_retval = iavf_status_to_errno(status);
+	}
+
+	return v_retval;
+}
+
 /**
  * iavf_poll_virtchnl_msg
  * @hw: HW configuration structure
@@ -85,6 +142,83 @@ iavf_poll_virtchnl_msg(struct iavf_hw *hw, struct iavf_arq_event_info *event,
 	return virtchnl_status_to_errno((enum virtchnl_status_code)v_retval);
 }
 
+/**
+ * iavf_process_pending_pf_msg
+ * @adapter: adapter structure
+ * @timeout_msecs: timeout in milliseconds
+ *
+ * Check if any VIRTCHNL message is currently pending and process it
+ * if needed.
+ * Poll the admin queue for the PF response and process it using
+ * a standard handler.
+ * If no PF response has been received within a given timeout, exit
+ * with an error.
+ */
+int
+iavf_process_pending_pf_msg(struct iavf_adapter *adapter,
+			    unsigned int timeout_msecs)
+{
+	enum virtchnl_ops current_op = adapter->current_op;
+	struct iavf_hw *hw = &adapter->hw;
+	struct iavf_arq_event_info event;
+	enum virtchnl_ops v_op;
+	enum iavf_status v_ret;
+	int err;
+
+	if (current_op == VIRTCHNL_OP_UNKNOWN)
+		return 0;
+
+	event.buf_len = IAVF_MAX_AQ_BUF_SIZE;
+	event.msg_buf = kzalloc(IAVF_MAX_AQ_BUF_SIZE, GFP_KERNEL);
+	if (!event.msg_buf)
+		return -ENOMEM;
+
+	err = iavf_poll_virtchnl_msg_timeout(hw, &event, current_op,
+					     timeout_msecs);
+	if (err)
+		goto free_exit;
+
+	v_op = (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high);
+	v_ret = (enum iavf_status)le32_to_cpu(event.desc.cookie_low);
+
+	iavf_virtchnl_completion(adapter, v_op, v_ret, event.msg_buf,
+				 event.msg_len);
+
+free_exit:
+	kfree(event.msg_buf);
+
+	return err;
+}
+
+/**
+ * iavf_get_vf_op_result
+ * @adapter: adapter structure
+ * @op: virtchnl operation
+ * @msecs: timeout in milliseconds
+ *
+ * Return a result of a given operation returned by PF
+ * or exit with timeout.
+ */
+static int iavf_get_vf_op_result(struct iavf_adapter *adapter,
+				 enum virtchnl_ops op,
+				 unsigned int msecs)
+{
+	struct iavf_hw *hw = &adapter->hw;
+	struct iavf_arq_event_info event;
+	int err;
+
+	event.buf_len = IAVF_MAX_AQ_BUF_SIZE;
+	event.msg_buf = kzalloc(IAVF_MAX_AQ_BUF_SIZE, GFP_KERNEL);
+	if (!event.msg_buf)
+		return -ENOMEM;
+
+	err = iavf_poll_virtchnl_msg_timeout(hw, &event, op, msecs);
+	kfree(event.msg_buf);
+	adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+
+	return err;
+}
+
 /**
  * iavf_verify_api_ver
  * @adapter: adapter structure
@@ -261,58 +395,114 @@ int iavf_get_vf_vlan_v2_caps(struct iavf_adapter *adapter)
 }
 
 /**
- * iavf_configure_queues
+ * iavf_set_qp_config_info
+ * @vqpi: virtchannel structure for queue pair configuration
  * @adapter: adapter structure
+ * @queue_index: index of queue pair in the adapter structure
+ * @max_frame: maximal frame size supported by the adapter
+ * @xdp_pair: true if the queue pair is assigned to XDP queues
  *
- * Request that the PF set up our (previously allocated) queues.
- **/
-void iavf_configure_queues(struct iavf_adapter *adapter)
+ * Fill virtchannel queue pair configuration structure
+ * with data for the Rx and Tx queues of a given index.
+ * To handle XDP queues, only Tx part of vqpi structure is filled
+ * with data. Because of virtchnl protocol can operate on queue pairs only,
+ * associate each extra Tx queue with an empty Rx queue
+ * (with zero length).
+ */
+static void iavf_set_qp_config_info(struct virtchnl_queue_pair_info *vqpi,
+				    struct iavf_adapter *adapter,
+				    int queue_index, u32 max_frame,
+				    bool xdp_pair)
+{
+	struct iavf_ring *rxq = &adapter->rx_rings[queue_index];
+	struct iavf_ring *txq;
+	u32 hr, max_len;
+	int xdpq_idx;
+
+	if (xdp_pair) {
+		xdpq_idx = queue_index - adapter->num_xdp_tx_queues;
+		txq = &adapter->xdp_rings[xdpq_idx];
+	} else {
+		txq = &adapter->tx_rings[queue_index];
+	}
+	vqpi->txq.vsi_id = adapter->vsi_res->vsi_id;
+	vqpi->txq.queue_id = queue_index;
+	vqpi->txq.ring_len = txq->count;
+	vqpi->txq.dma_ring_addr = txq->dma;
+
+	vqpi->rxq.vsi_id = adapter->vsi_res->vsi_id;
+	vqpi->rxq.queue_id = queue_index;
+	if (xdp_pair) {
+		vqpi->rxq.ring_len = 0;
+		return;
+	}
+
+	if (rxq->flags & IAVF_TXRX_FLAGS_XSK) {
+		hr = xsk_pool_get_headroom(rxq->xsk_pool);
+		max_len = xsk_pool_get_rx_frame_size(rxq->xsk_pool);
+	} else {
+		hr = rxq->pool->p.offset;
+		max_len = rxq->pool->p.max_len;
+	}
+
+	max_frame = min_not_zero(max_frame, LIBIE_MAX_RX_FRM_LEN(hr));
+
+	vqpi->rxq.ring_len = rxq->count;
+	vqpi->rxq.dma_ring_addr = rxq->dma;
+	vqpi->rxq.max_pkt_size = max_frame;
+	vqpi->rxq.databuffer_size = max_len;
+}
+
+/**
+ * iavf_configure_selected_queues
+ * @adapter: adapter structure
+ * @qp_mask: mask of queue pairs to configure
+ * @wait: if true, wait until the request is completed
+ *
+ * Request PF to set up our selected (previously allocated) queues.
+ * Returns 0 if the command succeeds or negative value in case of error.
+ *
+ * Note: The caller must ensure that the calling context has taken
+ *	 'adapter->crit_lock' mutex when 'wait' parameter is set to true.
+ */
+int iavf_configure_selected_queues(struct iavf_adapter *adapter, u32 qp_mask,
+				   bool wait)
 {
+	int pairs = adapter->num_active_queues + adapter->num_xdp_tx_queues;
+	unsigned long num_qps_to_config, mask = qp_mask;
+	u32 idx, max_frame = adapter->vf_res->max_mtu;
 	struct virtchnl_vsi_queue_config_info *vqci;
-	int i, max_frame = adapter->vf_res->max_mtu;
-	int pairs = adapter->num_active_queues;
 	struct virtchnl_queue_pair_info *vqpi;
 	size_t len;
 
-	if (max_frame > IAVF_MAX_RXBUFFER || !max_frame)
-		max_frame = IAVF_MAX_RXBUFFER;
-
 	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
-		dev_err(&adapter->pdev->dev, "Cannot configure queues, command %d pending\n",
+		dev_err(&adapter->pdev->dev,
+			"Cannot configure queues, command %d pending\n",
 			adapter->current_op);
-		return;
+		return -EBUSY;
 	}
+	num_qps_to_config = hweight_long(mask);
 	adapter->current_op = VIRTCHNL_OP_CONFIG_VSI_QUEUES;
-	len = struct_size(vqci, qpair, pairs);
+	len = struct_size(vqci, qpair, num_qps_to_config);
 	vqci = kzalloc(len, GFP_KERNEL);
 	if (!vqci)
-		return;
-
-	/* Limit maximum frame size when jumbo frames is not enabled */
-	if (!(adapter->flags & IAVF_FLAG_LEGACY_RX) &&
-	    (adapter->netdev->mtu <= ETH_DATA_LEN))
-		max_frame = IAVF_RXBUFFER_1536 - NET_IP_ALIGN;
+		return -ENOMEM;
 
 	vqci->vsi_id = adapter->vsi_res->vsi_id;
-	vqci->num_queue_pairs = pairs;
+	vqci->num_queue_pairs = num_qps_to_config;
 	vqpi = vqci->qpair;
 	/* Size check is not needed here - HW max is 16 queue pairs, and we
 	 * can fit info for 31 of them into the AQ buffer before it overflows.
 	 */
-	for (i = 0; i < pairs; i++) {
-		vqpi->txq.vsi_id = vqci->vsi_id;
-		vqpi->txq.queue_id = i;
-		vqpi->txq.ring_len = adapter->tx_rings[i].count;
-		vqpi->txq.dma_ring_addr = adapter->tx_rings[i].dma;
-		vqpi->rxq.vsi_id = vqci->vsi_id;
-		vqpi->rxq.queue_id = i;
-		vqpi->rxq.ring_len = adapter->rx_rings[i].count;
-		vqpi->rxq.dma_ring_addr = adapter->rx_rings[i].dma;
-		vqpi->rxq.max_pkt_size = max_frame;
-		vqpi->rxq.databuffer_size =
-			ALIGN(adapter->rx_rings[i].rx_buf_len,
-			      BIT_ULL(IAVF_RXQ_CTX_DBUFF_SHIFT));
+	for_each_set_bit(idx, &mask, adapter->num_active_queues) {
+		iavf_set_qp_config_info(vqpi, adapter, idx, max_frame, false);
+		vqpi++;
+	}
+
+	/* Set configuration info for XDP Tx queues. */
+	for_each_set_bit_from(idx, &mask, pairs) {
+		iavf_set_qp_config_info(vqpi, adapter, idx, max_frame, true);
 		vqpi++;
 	}
 
@@ -320,66 +510,170 @@ void iavf_configure_queues(struct iavf_adapter *adapter)
 	iavf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_VSI_QUEUES,
 			 (u8 *)vqci, len);
 	kfree(vqci);
+
+	if (wait)
+		return iavf_get_vf_op_result(adapter,
+					     VIRTCHNL_OP_CONFIG_VSI_QUEUES,
+					     IAVF_VC_MSG_TIMEOUT_MS);
+	return 0;
 }
 
 /**
- * iavf_enable_queues
+ * iavf_configure_queues
  * @adapter: adapter structure
+ * @wait: if true, wait until the request is completed
  *
- * Request that the PF enable all of our queues.
- **/
-void iavf_enable_queues(struct iavf_adapter *adapter)
+ * Send a request to PF to set up all allocated queues.
+ * Returns 0 if the command succeeds or negative value in case of error.
+ *
+ * Note: The caller must ensure that the calling context has taken
+ *	 'adapter->crit_lock' mutex when 'wait' parameter is set to true.
+ */
+int iavf_configure_queues(struct iavf_adapter *adapter, bool wait)
+{
+	int pairs = adapter->num_active_queues + adapter->num_xdp_tx_queues;
+	u32 qpair_mask = BIT(pairs) - 1;
+
+	return iavf_configure_selected_queues(adapter, qpair_mask, wait);
+}
+
+/**
+ * iavf_enable_selected_queues
+ * @adapter: adapter structure
+ * @rx_queues: mask of Rx queues
+ * @tx_queues: mask of Tx queues
+ * @wait: if true, wait until the request is completed
+ *
+ * Send a request to PF to enable selected queues.
+ * Returns 0 if the command succeeds or negative value in case of error.
+ *
+ * Note: The caller must ensure that the calling context has taken
+ *	 'adapter->crit_lock' mutex when 'wait' parameter is set to true.
+ */
+int iavf_enable_selected_queues(struct iavf_adapter *adapter, u32 rx_queues,
+				u32 tx_queues, bool wait)
 {
 	struct virtchnl_queue_select vqs;
 
 	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
-		dev_err(&adapter->pdev->dev, "Cannot enable queues, command %d pending\n",
+		dev_err(&adapter->pdev->dev,
+			"Cannot enable queues, command %d pending\n",
 			adapter->current_op);
-		return;
+		return -EBUSY;
 	}
 	adapter->current_op = VIRTCHNL_OP_ENABLE_QUEUES;
 	vqs.vsi_id = adapter->vsi_res->vsi_id;
-	vqs.tx_queues = BIT(adapter->num_active_queues) - 1;
-	vqs.rx_queues = vqs.tx_queues;
+	vqs.tx_queues = tx_queues;
+	vqs.rx_queues = rx_queues;
 	adapter->aq_required &= ~IAVF_FLAG_AQ_ENABLE_QUEUES;
 	iavf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_QUEUES,
 			 (u8 *)&vqs, sizeof(vqs));
+
+	if (wait)
+		return iavf_get_vf_op_result(adapter, VIRTCHNL_OP_ENABLE_QUEUES,
+					     IAVF_VC_MSG_TIMEOUT_MS);
+	return 0;
 }
 
 /**
- * iavf_disable_queues
+ * iavf_disable_selected_queues
  * @adapter: adapter structure
+ * @rx_queues: mask of Rx queues
+ * @tx_queues: mask of Tx queues
+ * @wait: if true, wait until the request is completed
  *
- * Request that the PF disable all of our queues.
- **/
-void iavf_disable_queues(struct iavf_adapter *adapter)
+ * Send a request to PF to disable selected queues.
+ * Returns 0 if the command succeeds or negative value in case of error.
+ *
+ * Note: The caller must ensure that the calling context has taken
+ *	 'adapter->crit_lock' mutex when 'wait' parameter is set to true.
+ */
+int iavf_disable_selected_queues(struct iavf_adapter *adapter, u32 rx_queues,
+				 u32 tx_queues, bool wait)
 {
 	struct virtchnl_queue_select vqs;
 
 	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
-		dev_err(&adapter->pdev->dev, "Cannot disable queues, command %d pending\n",
+		dev_err(&adapter->pdev->dev,
+			"Cannot disable queues, command %d pending\n",
 			adapter->current_op);
-		return;
+		return -EBUSY;
 	}
 	adapter->current_op = VIRTCHNL_OP_DISABLE_QUEUES;
 	vqs.vsi_id = adapter->vsi_res->vsi_id;
-	vqs.tx_queues = BIT(adapter->num_active_queues) - 1;
-	vqs.rx_queues = vqs.tx_queues;
+	vqs.tx_queues = tx_queues;
+	vqs.rx_queues = rx_queues;
 	adapter->aq_required &= ~IAVF_FLAG_AQ_DISABLE_QUEUES;
 	iavf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_QUEUES,
 			 (u8 *)&vqs, sizeof(vqs));
+
+	if (wait)
+		return iavf_get_vf_op_result(adapter,
+					     VIRTCHNL_OP_DISABLE_QUEUES,
+					     IAVF_VC_MSG_TIMEOUT_MS);
+	return 0;
+}
+
+/**
+ * iavf_enable_queues
+ * @adapter: adapter structure
+ * @wait: if true, wait until the request is completed
+ *
+ * Send a request to PF to enable all allocated queues.
+ * Returns 0 if the command succeeds or negative value in case of error.
+ *
+ * Note: The caller must ensure that the calling context has taken
+ *	 'adapter->crit_lock' mutex when 'wait' parameter is set to true.
+ */
+int iavf_enable_queues(struct iavf_adapter *adapter, bool wait)
+{
+	u32 num_tx_queues = adapter->num_active_queues +
+			    adapter->num_xdp_tx_queues;
+
+	u32 rx_queues = BIT(adapter->num_active_queues) - 1;
+	u32 tx_queues = BIT(num_tx_queues) - 1;
+
+	return iavf_enable_selected_queues(adapter, rx_queues, tx_queues, wait);
+}
+
+/**
+ * iavf_disable_queues
+ * @adapter: adapter structure
+ * @wait: if true, wait until the request is completed
+ *
+ * Send a request to PF to disable all allocated queues.
+ * Returns 0 if the command succeeds or negative value in case of error.
+ *
+ * Note: The caller must ensure that the calling context has taken
+ *	 'adapter->crit_lock' mutex when 'wait' parameter is set to true.
+ */
+int iavf_disable_queues(struct iavf_adapter *adapter, bool wait)
+{
+	u32 num_tx_queues = adapter->num_active_queues +
+			    adapter->num_xdp_tx_queues;
+
+	u32 rx_queues = BIT(adapter->num_active_queues) - 1;
+	u32 tx_queues = BIT(num_tx_queues) - 1;
+
+	return iavf_disable_selected_queues(adapter, rx_queues, tx_queues,
+					    wait);
 }
 
 /**
  * iavf_map_queues
  * @adapter: adapter structure
+ * @wait: if true, wait until the request is completed
  *
- * Request that the PF map queues to interrupt vectors. Misc causes, including
- * admin queue, are always mapped to vector 0.
- **/
-void iavf_map_queues(struct iavf_adapter *adapter)
+ * Send a request to PF to update the mapping queues to interrupt vectors.
+ * Misc causes, including admin queue, are always mapped to vector 0.
+ * Returns 0 if the command succeeds or negative value in case of error.
+ *
+ * Note: The caller must ensure that the calling context has taken
+ *       'adapter->crit_lock' mutex when 'wait' parameter is set to true.
+ */
+int iavf_map_queues(struct iavf_adapter *adapter, bool wait)
 {
 	struct virtchnl_irq_map_info *vimi;
 	struct virtchnl_vector_map *vecmap;
@@ -391,7 +685,7 @@ void iavf_map_queues(struct iavf_adapter *adapter)
 		/* bail because we already have a command pending */
 		dev_err(&adapter->pdev->dev, "Cannot map queues to vectors, command %d pending\n",
 			adapter->current_op);
-		return;
+		return -EBUSY;
 	}
 	adapter->current_op = VIRTCHNL_OP_CONFIG_IRQ_MAP;
 
@@ -400,7 +694,7 @@ void iavf_map_queues(struct iavf_adapter *adapter)
 	len = struct_size(vimi, vecmap, adapter->num_msix_vectors);
 	vimi = kzalloc(len, GFP_KERNEL);
 	if (!vimi)
-		return;
+		return -ENOMEM;
 
 	vimi->num_vectors = adapter->num_msix_vectors;
 	/* Queue vectors first */
@@ -410,8 +704,8 @@ void iavf_map_queues(struct iavf_adapter *adapter)
 
 		vecmap->vsi_id = adapter->vsi_res->vsi_id;
 		vecmap->vector_id = v_idx + NONQ_VECS;
-		vecmap->txq_map = q_vector->ring_mask;
-		vecmap->rxq_map = q_vector->ring_mask;
+		vecmap->txq_map = q_vector->tx_ring_mask;
+		vecmap->rxq_map = q_vector->rx_ring_mask;
 		vecmap->rxitr_idx = IAVF_RX_ITR;
 		vecmap->txitr_idx = IAVF_TX_ITR;
 	}
@@ -426,6 +720,12 @@ void iavf_map_queues(struct iavf_adapter *adapter)
 	iavf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_IRQ_MAP,
 			 (u8 *)vimi, len);
 	kfree(vimi);
+
+	if (wait)
+		return iavf_get_vf_op_result(adapter,
+					     VIRTCHNL_OP_CONFIG_IRQ_MAP,
+					     IAVF_VC_MSG_TIMEOUT_MS);
+	return 0;
 }
 
 /**
@@ -1886,6 +2186,52 @@ static void iavf_netdev_features_vlan_strip_set(struct net_device *netdev,
 		netdev->features &= ~NETIF_F_HW_VLAN_CTAG_RX;
 }
 
+/**
+ * iavf_poll_for_link_status - poll for PF notification about link status
+ * @adapter: adapter structure
+ * @msecs: timeout in milliseconds
+ *
+ * Returns:
+ *   0 - if notification about link down was received,
+ *   1 - if notification about link up was received,
+ *   or negative error code in case of error.
+ */
+int iavf_poll_for_link_status(struct iavf_adapter *adapter, unsigned int msecs)
+{
+	struct iavf_hw *hw = &adapter->hw;
+	struct iavf_arq_event_info event;
+	struct virtchnl_pf_event *vpe;
+	int ret;
+
+	event.buf_len = IAVF_MAX_AQ_BUF_SIZE;
+	event.msg_buf = kzalloc(IAVF_MAX_AQ_BUF_SIZE, GFP_KERNEL);
+	if (!event.msg_buf)
+		return -ENOMEM;
+
+	ret = iavf_poll_virtchnl_msg_timeout(hw, &event, VIRTCHNL_OP_EVENT,
+					     msecs);
+	if (ret)
+		goto virtchnl_msg_err;
+
+	vpe = (struct virtchnl_pf_event *)event.msg_buf;
+	if (vpe->event == VIRTCHNL_EVENT_LINK_CHANGE) {
+		bool link_up = iavf_get_vpe_link_status(adapter, vpe);
+
+		iavf_set_adapter_link_speed_from_vpe(adapter, vpe);
+
+		ret = link_up ? 1 : 0;
+	} else {
+		iavf_virtchnl_completion(adapter, VIRTCHNL_OP_EVENT, 0,
+					 event.msg_buf, event.msg_len);
+		ret = -EBUSY;
+	}
+
+virtchnl_msg_err:
+	kfree(event.msg_buf);
+
+	return ret;
+}
+
 /**
  * iavf_virtchnl_completion
  * @adapter: adapter structure
diff --git a/drivers/net/ethernet/intel/iavf/iavf_xsk.c b/drivers/net/ethernet/intel/iavf/iavf_xsk.c
new file mode 100644
index 00000000000000..f0f88f25e4e011
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_xsk.c
@@ -0,0 +1,1158 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2022 Intel Corporation. */
+
+#include <linux/bpf_trace.h>
+#include <linux/filter.h>
+#include <linux/net/intel/libie/rx.h>
+#include <net/xdp_sock_drv.h>
+#include <net/xdp_sock.h>
+#include "iavf.h"
+#include "iavf_trace.h"
+#include "iavf_xsk.h"
+
+#define IAVF_CRIT_LOCK_WAIT_TIMEOUT_MS	1000
+#define IAVF_VC_MSG_TIMEOUT_MS		3000
+
+/**
+ * iavf_max_xdp_queues_count - Returns the maximal number of XDP queues
+ *			       that can be created for current configuration
+ *			       of a given adapter.
+ * @adapter: adapter where XDP socket will be set up
+ */
+static u32
+iavf_max_xdp_queues_count(struct iavf_adapter *adapter)
+{
+	u32 max_qp_num = adapter->vsi_res->num_queue_pairs;
+	u32 num_active_queues = adapter->num_active_queues;
+
+	return num_active_queues * 2 > max_qp_num ? max_qp_num / 2 :
+						    num_active_queues;
+}
+
+/**
+ * iavf_qp_clean_rings - Cleans all the rings of a given index
+ * @adapter: adapter that contains rings of interest
+ * @q_idx: ring index in array
+ */
+static void
+iavf_qp_clean_rings(struct iavf_adapter *adapter, u16 q_idx)
+{
+	iavf_clean_tx_ring(&adapter->tx_rings[q_idx]);
+	if (iavf_adapter_xdp_active(adapter)) {
+		synchronize_rcu();
+		iavf_clean_tx_ring(&adapter->xdp_rings[q_idx]);
+	}
+	iavf_clean_rx_ring(&adapter->rx_rings[q_idx]);
+}
+
+/**
+ * iavf_qvec_toggle_napi - Enables/disables NAPI for a given q_vector
+ * @adapter: adapter that has netdev
+ * @q_vector: q_vector that has NAPI context
+ * @enable: true for enable, false for disable
+ */
+static void
+iavf_qvec_toggle_napi(struct iavf_adapter *adapter,
+		      struct iavf_q_vector *q_vector, bool enable)
+{
+	if (!adapter->vsi.netdev || !q_vector)
+		return;
+
+	if (enable)
+		napi_enable(&q_vector->napi);
+	else
+		napi_disable(&q_vector->napi);
+}
+
+/**
+ * iavf_trigger_sw_intr - trigger a software interrupt
+ * @adapter: adapter of interest
+ * @q_vector: interrupt vector to trigger the software interrupt for
+ */
+static void
+iavf_trigger_sw_intr(struct iavf_adapter *adapter,
+		     struct iavf_q_vector *q_vector)
+{
+        struct iavf_hw *hw = &adapter->hw;
+
+        wr32(hw, IAVF_VFINT_DYN_CTLN1(q_vector->reg_idx),
+             (IAVF_VFINT_DYN_CTLN1_INTENA_MASK |
+              IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK |
+              IAVF_VFINT_DYN_CTLN1_SWINT_TRIG_MASK |
+              IAVF_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK));
+
+        iavf_flush(hw);
+}
+
+/**
+ * iavf_qvec_dis_irq - Mask off queue interrupt generation on given ring
+ * @adapter: the adapter that contains queue vector being un-configured
+ * @q_vector: queue vector
+ */
+static void
+iavf_qvec_dis_irq(struct iavf_adapter *adapter, struct iavf_q_vector *q_vector)
+{
+	int base = adapter->vsi.base_vector;
+	struct iavf_hw *hw = &adapter->hw;
+	u16 reg = q_vector->reg_idx;
+
+	wr32(hw, IAVF_VFINT_DYN_CTLN1(reg), 0);
+	synchronize_irq(adapter->msix_entries[reg + base].vector);
+	iavf_flush(hw);
+}
+
+/**
+ * iavf_qvec_ena_irq - Enable IRQ for given queue vector
+ * @adapter: the adapter that contains queue vector
+ * @q_vector: queue vector
+ */
+static void
+iavf_qvec_ena_irq(struct iavf_adapter *adapter, struct iavf_q_vector *q_vector)
+{
+	struct iavf_hw *hw = &adapter->hw;
+
+	if (adapter)
+		if (adapter->state == __IAVF_DOWN)
+			return;
+
+	wr32(hw, IAVF_VFINT_DYN_CTLN1(q_vector->reg_idx),
+	     IAVF_VFINT_DYN_CTLN1_INTENA_MASK |
+	     IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK);
+
+	iavf_flush(hw);
+}
+
+/**
+ * iavf_qp_dis - Disables a queue pair
+ * @adapter: adapter of interest
+ * @q_idx: ring index in array
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int iavf_qp_dis(struct iavf_adapter *adapter, u16 q_idx)
+{
+	struct iavf_vsi *vsi = &adapter->vsi;
+	struct iavf_ring *rx_ring, *xdp_ring;
+	struct iavf_q_vector *q_vector;
+	u32 rx_queues, tx_queues;
+	int err;
+
+	if (q_idx >= adapter->num_active_queues)
+		return -EINVAL;
+
+	rx_ring = &adapter->rx_rings[q_idx];
+	q_vector = rx_ring->q_vector;
+
+	rx_queues = BIT(q_idx);
+	tx_queues = rx_queues;
+
+	netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
+
+	iavf_qvec_toggle_napi(adapter, q_vector, false);
+	iavf_qvec_dis_irq(adapter, q_vector);
+
+	xdp_ring = &adapter->xdp_rings[q_idx];
+
+	tx_queues |= BIT(xdp_ring->queue_index);
+
+	err = iavf_disable_selected_queues(adapter, rx_queues, tx_queues, true);
+	if (err)
+		goto dis_exit;
+
+	iavf_qp_clean_rings(adapter, q_idx);
+	if (!(rx_ring->flags & IAVF_TXRX_FLAGS_XSK)) {
+		struct device *dev = rx_ring->pool->p.dev;
+
+		libie_rx_page_pool_destroy(rx_ring->pool, &rx_ring->rq_stats);
+		rx_ring->dev = dev;
+	}
+dis_exit:
+	return err;
+}
+
+/**
+ * iavf_qp_ena - Enables a queue pair
+ * @adapter: adapter of interest
+ * @q_idx: ring index in array
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int iavf_qp_ena(struct iavf_adapter *adapter, u16 q_idx)
+{
+	struct iavf_vsi *vsi = &adapter->vsi;
+	struct iavf_ring *rx_ring, *xdp_ring;
+	struct iavf_q_vector *q_vector;
+	u32 rx_queues, tx_queues;
+	int ret, err = 0;
+
+	if (q_idx >= adapter->num_active_queues)
+		return -EINVAL;
+
+	xdp_ring = &adapter->xdp_rings[q_idx];
+	rx_ring = &adapter->rx_rings[q_idx];
+	q_vector = rx_ring->q_vector;
+
+	rx_queues = BIT(q_idx);
+	tx_queues = rx_queues;
+	tx_queues |= BIT(xdp_ring->queue_index);
+
+	iavf_xsk_setup_xdp_ring(xdp_ring);
+	iavf_xsk_setup_rx_ring(rx_ring);
+
+	if (!(rx_ring->flags & IAVF_TXRX_FLAGS_XSK)) {
+		rx_ring->pool = libie_rx_page_pool_create(rx_ring->netdev,
+							  rx_ring->count,
+							  true);
+		if (IS_ERR(rx_ring->pool)) {
+			err = PTR_ERR(rx_ring->pool);
+			goto ena_exit;
+		}
+	}
+
+	iavf_configure_rx_ring(adapter, rx_ring);
+
+	/* Use 'tx_queues' mask as a queue pair mask to configure
+	 * also an extra XDP Tx queue.
+	 */
+	err = iavf_configure_selected_queues(adapter, tx_queues, true);
+	if (err)
+		goto ena_exit;
+
+	err = iavf_enable_selected_queues(adapter, rx_queues, tx_queues, true);
+	if (err)
+		goto ena_exit;
+
+	ret = iavf_poll_for_link_status(adapter, IAVF_XDP_LINK_TIMEOUT_MS);
+	if (ret < 0) {
+		err = ret;
+		dev_err(&adapter->pdev->dev,
+			"cannot bring the link up, error: %d\n", err);
+		goto ena_exit;
+	} else if (!ret) {
+		err = -EBUSY;
+		dev_err(&adapter->pdev->dev,
+			"pf returned link down status, error: %d\n", err);
+		goto ena_exit;
+	}
+
+	iavf_qvec_toggle_napi(adapter, q_vector, true);
+	iavf_qvec_ena_irq(adapter, q_vector);
+
+	netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
+ena_exit:
+	return err;
+}
+
+/**
+ * iavf_xsk_pool_disable - disable a buffer pool region
+ * @adapter: Current adapter
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int iavf_xsk_pool_disable(struct iavf_adapter *adapter, u16 qid)
+{
+	struct xsk_buff_pool *pool = xsk_get_pool_from_qid(adapter->vsi.netdev,
+							   qid);
+	if (!pool)
+		return -EINVAL;
+
+	clear_bit(qid, adapter->af_xdp_zc_qps);
+	xsk_pool_dma_unmap(pool, LIBIE_RX_DMA_ATTR);
+
+	return 0;
+}
+
+/**
+ * iavf_xsk_pool_enable - enable a buffer pool region
+ * @adapter: Current adapter
+ * @pool: pointer to a requested buffer pool region
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int
+iavf_xsk_pool_enable(struct iavf_adapter *adapter, struct xsk_buff_pool *pool,
+		     u16 qid)
+{
+	struct iavf_vsi *vsi = &adapter->vsi;
+	int err;
+
+	if (qid >= vsi->netdev->real_num_rx_queues ||
+	    qid >= vsi->netdev->real_num_tx_queues)
+		return -EINVAL;
+
+	err = xsk_pool_dma_map(pool, &adapter->pdev->dev, LIBIE_RX_DMA_ATTR);
+	if (err)
+		return err;
+
+	set_bit(qid, adapter->af_xdp_zc_qps);
+
+	return 0;
+}
+
+/**
+ * iavf_xsk_pool_setup - enable/disable a buffer pool region depending
+ * 			 on its state
+ * @adapter: Current adapter
+ * @pool: buffer pool to enable/associate to a ring, NULL to disable
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+int iavf_xsk_pool_setup(struct iavf_adapter *adapter,
+			struct xsk_buff_pool *pool, u32 qid)
+{
+	bool if_running, pool_present = !!pool;
+	struct iavf_vsi *vsi = &adapter->vsi;
+	int ret = 0, pool_failure = 0;
+
+	if (qid >= iavf_max_xdp_queues_count(adapter)) {
+		netdev_err(vsi->netdev, "Wrong queue index for XDP.\n");
+		pool_failure = -EINVAL;
+		goto failure;
+	}
+
+	if_running = netif_running(vsi->netdev) &&
+		     iavf_adapter_xdp_active(adapter);
+
+	if (if_running) {
+		if (iavf_lock_timeout(&adapter->crit_lock,
+				      IAVF_CRIT_LOCK_WAIT_TIMEOUT_MS))
+			return -EBUSY;
+
+		ret = iavf_process_pending_pf_msg(adapter,
+						  IAVF_VC_MSG_TIMEOUT_MS);
+		if (ret)
+			goto xsk_pool_if_up;
+
+		ret = iavf_qp_dis(adapter, qid);
+		if (ret) {
+			netdev_err(vsi->netdev, "iavf_qp_dis error = %d\n", ret);
+			goto xsk_pool_if_up;
+		}
+	}
+
+	pool_failure = pool_present ? iavf_xsk_pool_enable(adapter, pool, qid) :
+				      iavf_xsk_pool_disable(adapter, qid);
+
+xsk_pool_if_up:
+	if (if_running) {
+		ret = iavf_qp_ena(adapter, qid);
+		mutex_unlock(&adapter->crit_lock);
+		if (!ret && pool_present)
+			napi_schedule(&adapter->rx_rings[qid].q_vector->napi);
+		else if (ret)
+			netdev_err(vsi->netdev, "iavf_qp_ena error = %d\n", ret);
+	}
+
+failure:
+	if (pool_failure) {
+		netdev_err(vsi->netdev, "Could not %sable buffer pool, error = %d\n",
+			   pool_present ? "en" : "dis", pool_failure);
+		return pool_failure;
+	}
+
+	return ret;
+}
+
+/**
+ * iavf_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
+ * @xdp_ring: XDP Tx ring
+ * @tx_buf: Tx buffer to clean
+ */
+static void
+iavf_clean_xdp_tx_buf(struct iavf_ring *xdp_ring, struct iavf_tx_buffer *tx_buf)
+{
+	switch (tx_buf->xdp_type) {
+	case IAVF_XDP_BUFFER_FRAME:
+		dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
+				 dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
+		dma_unmap_len_set(tx_buf, len, 0);
+		xdp_return_frame(tx_buf->xdpf);
+		tx_buf->xdpf = NULL;
+		break;
+	case IAVF_XDP_BUFFER_TX:
+		xsk_buff_free(tx_buf->xdp);
+		break;
+	}
+
+	xdp_ring->xdp_tx_active--;
+	tx_buf->xdp_type = IAVF_XDP_BUFFER_NONE;
+}
+
+/**
+ * iavf_clean_xdp_irq_zc - produce AF_XDP descriptors to CQ
+ * @xdp_ring: XDP Tx ring
+ */
+static void iavf_clean_xdp_irq_zc(struct iavf_ring *xdp_ring)
+{
+	u16 ntc = xdp_ring->next_to_clean;
+	struct iavf_tx_buffer *tx_buf;
+	struct iavf_tx_desc *tx_desc;
+	u16 cnt = xdp_ring->count;
+	u16 done_frames = 0;
+	u16 xsk_frames = 0;
+	u16 last_rs;
+	int i;
+
+	last_rs = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : cnt - 1;
+	tx_desc = IAVF_TX_DESC(xdp_ring, last_rs);
+	if ((tx_desc->cmd_type_offset_bsz &
+	    cpu_to_le64(IAVF_TX_DESC_DTYPE_DESC_DONE))) {
+		if (last_rs >= ntc)
+			done_frames = last_rs - ntc + 1;
+		else
+			done_frames = last_rs + cnt - ntc + 1;
+	}
+
+	if (!done_frames)
+		return;
+
+	if (likely(!xdp_ring->xdp_tx_active)) {
+		xsk_frames = done_frames;
+		goto skip;
+	}
+
+	ntc = xdp_ring->next_to_clean;
+	for (i = 0; i < done_frames; i++) {
+		tx_buf = &xdp_ring->tx_bi[ntc];
+
+		if (tx_buf->xdp_type)
+			iavf_clean_xdp_tx_buf(xdp_ring, tx_buf);
+		else
+			xsk_frames++;
+
+		ntc++;
+		if (ntc >= xdp_ring->count)
+			ntc = 0;
+	}
+skip:
+	tx_desc->cmd_type_offset_bsz = 0;
+	xdp_ring->next_to_clean += done_frames;
+	if (xdp_ring->next_to_clean >= cnt)
+		xdp_ring->next_to_clean -= cnt;
+	if (xsk_frames)
+		xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
+}
+
+/**
+ * iavf_xmit_pkt - produce a single HW Tx descriptor out of AF_XDP descriptor
+ * @xdp_ring: XDP ring to produce the HW Tx descriptor on
+ * @desc: AF_XDP descriptor to pull the DMA address and length from
+ * @total_bytes: bytes accumulator that will be used for stats update
+ */
+static void iavf_xmit_pkt(struct iavf_ring *xdp_ring, struct xdp_desc *desc,
+			  unsigned int *total_bytes)
+{
+	struct iavf_tx_desc *tx_desc;
+	dma_addr_t dma;
+
+	dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc->addr);
+	xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, desc->len);
+
+	tx_desc = IAVF_TX_DESC(xdp_ring, xdp_ring->next_to_use++);
+	tx_desc->buffer_addr = cpu_to_le64(dma);
+	tx_desc->cmd_type_offset_bsz = iavf_build_ctob(IAVF_TX_DESC_CMD_EOP,
+						       0, desc->len, 0);
+
+	*total_bytes += desc->len;
+}
+
+/**
+ * iavf_xmit_pkt_batch - produce a batch of HW Tx descriptors out
+ * 			 of AF_XDP descriptors
+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
+ * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
+ * @total_bytes: bytes accumulator that will be used for stats update
+ */
+static void iavf_xmit_pkt_batch(struct iavf_ring *xdp_ring,
+				struct xdp_desc *descs,
+				unsigned int *total_bytes)
+{
+	u16 ntu = xdp_ring->next_to_use;
+	struct iavf_tx_desc *tx_desc;
+	u32 i;
+
+	loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) {
+		dma_addr_t dma;
+
+		dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, descs[i].addr);
+		xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma,
+						 descs[i].len);
+
+		tx_desc = IAVF_TX_DESC(xdp_ring, ntu++);
+		tx_desc->buffer_addr = cpu_to_le64(dma);
+		tx_desc->cmd_type_offset_bsz =
+			iavf_build_ctob(IAVF_TX_DESC_CMD_EOP, 0,
+					descs[i].len, 0);
+
+		*total_bytes += descs[i].len;
+	}
+
+	xdp_ring->next_to_use = ntu;
+}
+
+/**
+ * iavf_fill_tx_hw_ring - produce the number of Tx descriptors onto ring
+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
+ * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
+ * @nb_pkts: count of packets to be send
+ * @total_bytes: bytes accumulator that will be used for stats update
+ */
+static void iavf_fill_tx_hw_ring(struct iavf_ring *xdp_ring,
+				 struct xdp_desc *descs, u32 nb_pkts,
+				 unsigned int *total_bytes)
+{
+	u32 batched, leftover, i;
+
+	batched = ALIGN_DOWN(nb_pkts, PKTS_PER_BATCH);
+	leftover = nb_pkts & (PKTS_PER_BATCH - 1);
+
+	for (i = 0; i < batched; i += PKTS_PER_BATCH)
+		iavf_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes);
+	for (; i < batched + leftover; i++)
+		iavf_xmit_pkt(xdp_ring, &descs[i], total_bytes);
+}
+
+/**
+ * iavf_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring
+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
+ *
+ * Returns true if there is no more work that needs to be done, false otherwise
+ */
+bool iavf_xmit_zc(struct iavf_ring *xdp_ring)
+{
+	struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
+	struct libie_sq_onstack_stats stats = { };
+	u32 nb_processed = 0;
+	bool ret = true;
+	int budget;
+
+	if (static_branch_unlikely(&iavf_xdp_locking_key))
+		spin_lock(&xdp_ring->tx_lock);
+
+	iavf_clean_xdp_irq_zc(xdp_ring);
+
+	budget = IAVF_DESC_UNUSED(xdp_ring);
+	budget = min_t(u16, budget, IAVF_RING_QUARTER(xdp_ring));
+
+	stats.packets = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool,
+						       budget);
+	if (!stats.packets)
+		goto unlock;
+
+	if (xdp_ring->next_to_use + stats.packets >= xdp_ring->count) {
+		nb_processed = xdp_ring->count - xdp_ring->next_to_use;
+		iavf_fill_tx_hw_ring(xdp_ring, descs, nb_processed,
+				     &stats.bytes);
+		xdp_ring->next_to_use = 0;
+	}
+
+	iavf_fill_tx_hw_ring(xdp_ring, &descs[nb_processed],
+			     stats.packets - nb_processed, &stats.bytes);
+
+	iavf_set_rs_bit(xdp_ring);
+	iavf_xdp_ring_update_tail(xdp_ring);
+	iavf_update_tx_ring_stats(xdp_ring, &stats);
+
+	if (xsk_uses_need_wakeup(xdp_ring->xsk_pool))
+		xsk_set_tx_need_wakeup(xdp_ring->xsk_pool);
+
+	ret = stats.packets < budget;
+unlock:
+	if (static_branch_unlikely(&iavf_xdp_locking_key))
+		spin_unlock(&xdp_ring->tx_lock);
+
+	return ret;
+}
+
+/**
+ * iavf_xsk_wakeup - Implements ndo_xsk_wakeup
+ * @netdev: net_device
+ * @queue_id: queue to wake up
+ * @flags: ignored in our case, since we have Rx and Tx in the same NAPI
+ *
+ * Returns negative on error, zero otherwise.
+ */
+int iavf_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	struct iavf_q_vector *q_vector;
+	struct iavf_ring *ring;
+
+	if (adapter->state == __IAVF_DOWN ||
+	    adapter->state == __IAVF_RESETTING)
+		return -ENETDOWN;
+
+	if (!iavf_adapter_xdp_active(adapter))
+		return -EINVAL;
+
+	if (queue_id >= adapter->num_active_queues)
+		return -EINVAL;
+
+	ring = &adapter->rx_rings[queue_id];
+
+	if (!(ring->xdp_ring->flags & IAVF_TXRX_FLAGS_XSK))
+		return -EINVAL;
+
+	q_vector = ring->q_vector;
+	if (!napi_if_scheduled_mark_missed(&q_vector->napi))
+		iavf_trigger_sw_intr(adapter, q_vector);
+
+	return 0;
+}
+
+static u32 iavf_get_xdp_tx_qid(struct iavf_ring *ring)
+{
+	struct iavf_adapter *adapter = ring->vsi->back;
+
+	return ring->queue_index - adapter->num_active_queues;
+}
+
+static struct xsk_buff_pool *iavf_tx_xsk_pool(struct iavf_ring *ring)
+{
+	struct iavf_adapter *adapter = ring->vsi->back;
+	u32 qid;
+
+	if (!iavf_adapter_xdp_active(adapter) ||
+	    !(ring->flags & IAVF_TXRX_FLAGS_XDP))
+		return NULL;
+
+	qid = iavf_get_xdp_tx_qid(ring);
+	if (!test_bit(qid, adapter->af_xdp_zc_qps))
+		return NULL;
+
+	return xsk_get_pool_from_qid(adapter->netdev, qid);
+}
+
+void iavf_xsk_setup_xdp_ring(struct iavf_ring *xdp_ring)
+{
+	struct xsk_buff_pool *pool;
+
+	pool = iavf_tx_xsk_pool(xdp_ring);
+	if (pool) {
+		xdp_ring->xsk_pool = pool;
+		xdp_ring->flags |= IAVF_TXRX_FLAGS_XSK;
+	} else {
+		xdp_ring->dev = &xdp_ring->vsi->back->pdev->dev;
+		xdp_ring->flags &= ~IAVF_TXRX_FLAGS_XSK;
+	}
+}
+
+/**
+ * iavf_xsk_clean_xdp_ring - Clean the XDP Tx ring and its buffer pool queues
+ * @xdp_ring: XDP_Tx ring
+ */
+void iavf_xsk_clean_xdp_ring(struct iavf_ring *xdp_ring)
+{
+	u16 ntc = xdp_ring->next_to_clean, ntu = xdp_ring->next_to_use;
+	u32 xsk_frames = 0;
+
+	while (ntc != ntu) {
+		struct iavf_tx_buffer *tx_buf = &xdp_ring->tx_bi[ntc];
+
+		if (tx_buf->xdp_type)
+			iavf_clean_xdp_tx_buf(xdp_ring, tx_buf);
+		else
+			xsk_frames++;
+
+		tx_buf->page = NULL;
+
+		ntc++;
+		if (ntc >= xdp_ring->count)
+			ntc = 0;
+	}
+
+	if (xsk_frames)
+		xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
+}
+
+/**
+ * iavf_init_rx_descs_zc - pick buffers from XSK buffer pool and use it
+ * @pool: XSK Buffer pool to pull the buffers from
+ * @xdp: SW ring of xdp_buff that will hold the buffers
+ * @rx_desc: Pointer to Rx descriptors that will be filled
+ * @count: The number of buffers to allocate
+ *
+ * This function allocates a number of Rx buffers from the fill ring
+ * or the internal recycle mechanism and places them on the Rx ring.
+ *
+ * Note that ring wrap should be handled by caller of this function.
+ *
+ * Returns the amount of allocated Rx descriptors
+ */
+static u16 iavf_init_rx_descs_zc(struct xsk_buff_pool *pool,
+				 struct xdp_buff **xdp,
+				 union iavf_rx_desc *rx_desc, u16 count)
+{
+	dma_addr_t dma;
+	u16 num_buffs;
+	u16 i;
+
+	num_buffs = xsk_buff_alloc_batch(pool, xdp, count);
+	for (i = 0; i < num_buffs; i++) {
+		dma = xsk_buff_xdp_get_dma(*xdp);
+		rx_desc->read.pkt_addr = cpu_to_le64(dma);
+		rx_desc->wb.qword1.status_error_len = 0;
+
+		rx_desc++;
+		xdp++;
+	}
+
+	return num_buffs;
+}
+
+static struct xdp_buff **iavf_get_xdp_buff(struct iavf_ring *ring, u32 idx)
+{
+	return &ring->xdp_buff[idx];
+}
+
+/**
+ * __iavf_alloc_rx_buffers_zc - allocate a number of Rx buffers
+ * @rx_ring: Rx ring
+ * @count: The number of buffers to allocate
+ *
+ * Place the @count of descriptors onto Rx ring. Handle the ring wrap
+ * for case where space from next_to_use up to the end of ring is less
+ * than @count. Finally do a tail bump.
+ *
+ * Returns true if all allocations were successful, false if any fail.
+ */
+static bool __iavf_alloc_rx_buffers_zc(struct iavf_ring *rx_ring, u16 count)
+{
+	u32 nb_buffs_extra = 0, nb_buffs = 0;
+	u16 ntu = rx_ring->next_to_use;
+	union iavf_rx_desc *rx_desc;
+	u16 total_count = count;
+	struct xdp_buff **xdp;
+
+	rx_desc = IAVF_RX_DESC(rx_ring, ntu);
+	xdp = iavf_get_xdp_buff(rx_ring, ntu);
+
+	if (ntu + count >= rx_ring->count) {
+		nb_buffs_extra = iavf_init_rx_descs_zc(rx_ring->xsk_pool, xdp,
+						       rx_desc,
+						       rx_ring->count - ntu);
+		if (nb_buffs_extra != rx_ring->count - ntu) {
+			ntu += nb_buffs_extra;
+			goto exit;
+		}
+		rx_desc = IAVF_RX_DESC(rx_ring, 0);
+		xdp = iavf_get_xdp_buff(rx_ring, 0);
+		ntu = 0;
+		count -= nb_buffs_extra;
+		iavf_release_rx_desc(rx_ring, 0);
+
+		if (!count)
+			goto exit;
+	}
+
+	nb_buffs = iavf_init_rx_descs_zc(rx_ring->xsk_pool, xdp, rx_desc, count);
+
+	ntu += nb_buffs;
+	if (ntu == rx_ring->count)
+		ntu = 0;
+
+exit:
+	if (rx_ring->next_to_use != ntu)
+		iavf_release_rx_desc(rx_ring, ntu);
+
+	return total_count == (nb_buffs_extra + nb_buffs);
+}
+
+/**
+ * iavf_alloc_rx_buffers_zc - allocate a number of Rx buffers
+ * @rx_ring: Rx ring
+ * @count: The number of buffers to allocate
+ *
+ * Wrapper for internal allocation routine; figure out how many tail
+ * bumps should take place based on the given threshold
+ *
+ * Returns true if all calls to internal alloc routine succeeded
+ */
+static bool iavf_alloc_rx_buffers_zc(struct iavf_ring *rx_ring, u16 count)
+{
+	u16 rx_thresh = IAVF_RING_QUARTER(rx_ring);
+	u16 leftover, i, tail_bumps;
+
+	tail_bumps = count / rx_thresh;
+	leftover = count - (tail_bumps * rx_thresh);
+
+	for (i = 0; i < tail_bumps; i++)
+		if (!__iavf_alloc_rx_buffers_zc(rx_ring, rx_thresh))
+			return false;
+	return __iavf_alloc_rx_buffers_zc(rx_ring, leftover);
+}
+
+/**
+ * iavf_check_alloc_rx_buffers_zc - allocate a number of Rx buffers with logs
+ * @adapter: board private structure
+ * @rx_ring: Rx ring
+ *
+ * Wrapper for internal allocation routine; Prints out logs, if allocation
+ * did not go as expected
+ */
+void iavf_check_alloc_rx_buffers_zc(struct iavf_adapter *adapter,
+				    struct iavf_ring *rx_ring)
+{
+	u32 count = IAVF_DESC_UNUSED(rx_ring);
+
+	if (!xsk_buff_can_alloc(rx_ring->xsk_pool, count)) {
+		netdev_warn(adapter->netdev,
+			    "XSK buffer pool does not provide enough addresses to fill %d buffers on Rx ring %d\n",
+			    count, rx_ring->queue_index);
+		netdev_warn(adapter->netdev,
+			    "Change Rx ring/fill queue size to avoid performance issues\n");
+	}
+
+	if (!iavf_alloc_rx_buffers_zc(rx_ring, count))
+		netdev_warn(adapter->netdev,
+			    "Failed to allocate some buffers on XSK buffer pool enabled Rx ring %d\n",
+			    rx_ring->queue_index);
+}
+
+/**
+ * iavf_rx_xsk_pool - Get a valid xsk pool for RX ring
+ * @ring: Rx ring being configured
+ *
+ * Do not return a xsk pool, if socket is TX-only
+ **/
+static struct xsk_buff_pool *iavf_rx_xsk_pool(struct iavf_ring *ring)
+{
+	struct iavf_adapter *adapter = ring->vsi->back;
+	u16 qid = ring->queue_index;
+	struct xsk_buff_pool *pool;
+
+	if (!iavf_adapter_xdp_active(adapter) ||
+	    !test_bit(qid, adapter->af_xdp_zc_qps))
+		return NULL;
+
+	pool = xsk_get_pool_from_qid(adapter->netdev, qid);
+	if (!pool || !xsk_buff_can_alloc(pool, 1))
+		return NULL;
+
+	return pool;
+}
+
+void iavf_xsk_setup_rx_ring(struct iavf_ring *rx_ring)
+{
+	struct xsk_buff_pool *pool;
+
+	pool = iavf_rx_xsk_pool(rx_ring);
+	if (pool) {
+		rx_ring->xsk_pool = pool;
+		rx_ring->flags |= IAVF_TXRX_FLAGS_XSK;
+	} else {
+		rx_ring->dev = &rx_ring->vsi->back->pdev->dev;
+		rx_ring->flags &= ~IAVF_TXRX_FLAGS_XSK;
+	}
+}
+
+/**
+ * iavf_xsk_clean_rx_ring - clean buffer pool queues connected to a given Rx ring
+ * @rx_ring: ring to be cleaned
+ */
+void iavf_xsk_clean_rx_ring(struct iavf_ring *rx_ring)
+{
+	u16 ntc = rx_ring->next_to_clean;
+	u16 ntu = rx_ring->next_to_use;
+
+	while (ntc != ntu) {
+		struct xdp_buff *xdp = *iavf_get_xdp_buff(rx_ring, ntc);
+
+		xsk_buff_free(xdp);
+		ntc++;
+		if (ntc >= rx_ring->count)
+			ntc = 0;
+	}
+}
+
+/**
+ * iavf_xmit_xdp_tx_zc - AF_XDP ZC handler for XDP_TX
+ * @xdp: XDP buffer to xmit
+ * @xdp_ring: XDP ring to produce descriptor onto
+ *
+ * Returns 0 for successfully produced desc,
+ * -EBUSY if there was not enough space on XDP ring.
+ */
+static int iavf_xmit_xdp_tx_zc(struct xdp_buff *xdp,
+			       struct iavf_ring *xdp_ring)
+{
+	u32 size = xdp->data_end - xdp->data;
+	u32 ntu = xdp_ring->next_to_use;
+	struct iavf_tx_buffer *tx_buf;
+	struct iavf_tx_desc *tx_desc;
+	dma_addr_t dma;
+
+	if (IAVF_DESC_UNUSED(xdp_ring) < IAVF_RING_QUARTER(xdp_ring))
+		iavf_clean_xdp_irq_zc(xdp_ring);
+
+	if (unlikely(!IAVF_DESC_UNUSED(xdp_ring))) {
+		libie_stats_inc_one(&xdp_ring->sq_stats, busy);
+		return -EBUSY;
+	}
+
+	dma = xsk_buff_xdp_get_dma(xdp);
+	xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, size);
+
+	tx_buf = &xdp_ring->tx_bi[ntu];
+	tx_buf->bytecount = size;
+	tx_buf->gso_segs = 1;
+	tx_buf->xdp_type = IAVF_XDP_BUFFER_TX;
+	tx_buf->xdp = xdp;
+
+	tx_desc = IAVF_TX_DESC(xdp_ring, ntu);
+	tx_desc->buffer_addr = cpu_to_le64(dma);
+	tx_desc->cmd_type_offset_bsz = iavf_build_ctob(IAVF_TX_DESC_CMD_EOP,
+						       0, size, 0);
+
+	xdp_ring->xdp_tx_active++;
+
+	if (++ntu == xdp_ring->count)
+		ntu = 0;
+	xdp_ring->next_to_use = ntu;
+
+	return 0;
+}
+
+static int iavf_xmit_xdp_tx_zc_locked(struct xdp_buff *xdp,
+				      struct iavf_ring *xdp_ring)
+{
+	int ret;
+
+	if (static_branch_unlikely(&iavf_xdp_locking_key))
+		spin_lock(&xdp_ring->tx_lock);
+	ret = iavf_xmit_xdp_tx_zc(xdp, xdp_ring);
+	if (static_branch_unlikely(&iavf_xdp_locking_key))
+		spin_unlock(&xdp_ring->tx_lock);
+
+	return ret;
+}
+
+/**
+ * iavf_run_xdp_zc - Run XDP program and perform resulting action for ZC
+ * @rx_ring: RX descriptor ring to transact packets on
+ * @xdp: a prepared XDP buffer
+ * @xdp_prog: an XDP program assigned to the interface
+ * @xdp_ring: XDP TX queue assigned to the RX ring
+ * @rxq_xdp_act: Logical OR of flags of XDP actions that require finalization
+ *
+ * Returns resulting XDP action.
+ */
+static unsigned int
+iavf_run_xdp_zc(struct iavf_ring *rx_ring, struct xdp_buff *xdp,
+		struct bpf_prog *xdp_prog, struct iavf_ring *xdp_ring,
+		u32 *rxq_xdp_act)
+{
+	unsigned int xdp_act;
+	int err;
+
+	xdp_act = bpf_prog_run_xdp(xdp_prog, xdp);
+
+	if (likely(xdp_act == XDP_REDIRECT)) {
+		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+		if (likely(!err)) {
+			*rxq_xdp_act |= IAVF_RXQ_XDP_ACT_FINALIZE_REDIR;
+			return XDP_REDIRECT;
+		}
+
+		if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS)
+			*rxq_xdp_act |= IAVF_RXQ_XDP_ACT_STOP_NOW;
+
+		goto xdp_err;
+	}
+
+	switch (xdp_act) {
+	case XDP_PASS:
+		break;
+	case XDP_TX:
+		err = iavf_xmit_xdp_tx_zc_locked(xdp, xdp_ring);
+		if (unlikely(err))
+			goto xdp_err;
+
+		*rxq_xdp_act |= IAVF_RXQ_XDP_ACT_FINALIZE_TX;
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, xdp_act);
+
+		fallthrough;
+	case XDP_ABORTED:
+xdp_err:
+		trace_xdp_exception(rx_ring->netdev, xdp_prog, xdp_act);
+
+		fallthrough;
+	case XDP_DROP:
+		xsk_buff_free(xdp);
+
+		return XDP_DROP;
+	}
+
+	return xdp_act;
+}
+
+/**
+ * iavf_construct_skb_zc - Create an sk_buff from zero-copy buffer
+ * @rx_ring: Rx ring
+ * @xdp: Pointer to XDP buffer
+ *
+ * This function allocates a new skb from a zero-copy Rx buffer.
+ *
+ * Returns the skb on success, NULL on failure.
+ */
+static struct sk_buff *
+iavf_construct_skb_zc(struct iavf_ring *rx_ring, struct xdp_buff *xdp)
+{
+	unsigned int totalsize = xdp->data_end - xdp->data_meta;
+	unsigned int metasize = xdp->data - xdp->data_meta;
+	struct sk_buff *skb;
+
+	net_prefetch(xdp->data_meta);
+
+	skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize,
+			       GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(!skb))
+		return NULL;
+
+	memcpy(__skb_put(skb, totalsize), xdp->data_meta,
+	       ALIGN(totalsize, sizeof(long)));
+
+	if (metasize) {
+		skb_metadata_set(skb, metasize);
+		__skb_pull(skb, metasize);
+	}
+
+	xsk_buff_free(xdp);
+
+	return skb;
+}
+
+/**
+ * iavf_clean_rx_irq_zc - consumes packets from the hardware ring
+ * @rx_ring: AF_XDP Rx ring
+ * @budget: NAPI budget
+ *
+ * Returns number of processed packets on success, remaining budget on failure.
+ */
+int iavf_clean_rx_irq_zc(struct iavf_ring *rx_ring, int budget)
+{
+	struct libie_rq_onstack_stats stats = { };
+	u32 ntc = rx_ring->next_to_clean;
+	u32 ring_size = rx_ring->count;
+	struct iavf_ring *xdp_ring;
+	struct bpf_prog *xdp_prog;
+	u32 cleaned_count = 0;
+	bool failure = false;
+	u32 rxq_xdp_act = 0;
+	u32 to_refill;
+
+	xdp_prog = rcu_dereference(rx_ring->xdp_prog);
+	xdp_ring = rx_ring->xdp_ring;
+
+	while (likely(cleaned_count < budget)) {
+		union iavf_rx_desc *rx_desc;
+		struct xdp_buff *xdp;
+		unsigned int xdp_act;
+		struct sk_buff *skb;
+		unsigned int size;
+		u64 qword;
+
+		rx_desc = IAVF_RX_DESC(rx_ring, ntc);
+
+		/* status_error_len will always be zero for unused descriptors
+		 * because it's cleared in cleanup, and overlaps with hdr_addr
+		 * which is always zero because packet split isn't used, if the
+		 * hardware wrote DD then the length will be non-zero
+		 */
+		qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
+		if (!iavf_test_staterr(qword, IAVF_RX_DESC_STATUS_DD_SHIFT))
+			break;
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we have
+		 * verified the descriptor has been written back.
+		 */
+		dma_rmb();
+
+		size = (qword & IAVF_RXD_QW1_LENGTH_PBUF_MASK) >>
+		       IAVF_RXD_QW1_LENGTH_PBUF_SHIFT;
+
+		xdp = *iavf_get_xdp_buff(rx_ring, ntc);
+		iavf_trace(clean_rx_irq_zc, rx_ring, rx_desc, NULL);
+
+		if (unlikely(!size)) {
+			xsk_buff_free(xdp);
+			goto next;
+		}
+
+		xsk_buff_set_size(xdp, size);
+		xsk_buff_dma_sync_for_cpu(xdp, rx_ring->xsk_pool);
+
+		xdp_act = iavf_run_xdp_zc(rx_ring, xdp, xdp_prog, xdp_ring,
+					  &rxq_xdp_act);
+		if (xdp_act == XDP_PASS)
+			goto construct_skb;
+
+		if (unlikely(rxq_xdp_act & IAVF_RXQ_XDP_ACT_STOP_NOW)) {
+			failure = true;
+			break;
+		}
+
+		stats.bytes += size;
+		stats.packets++;
+
+next:
+		cleaned_count++;
+		if (unlikely(++ntc == ring_size))
+			ntc = 0;
+
+		continue;
+
+construct_skb:
+		skb = iavf_construct_skb_zc(rx_ring, xdp);
+		if (!skb) {
+			libie_stats_inc_one(&rx_ring->rq_stats,
+					    build_skb_fail);
+			break;
+		}
+
+		cleaned_count++;
+		if (unlikely(++ntc == ring_size))
+			ntc = 0;
+
+		prefetch(rx_desc);
+
+		/* probably a little skewed due to removing CRC */
+		stats.bytes += skb->len;
+
+		/* populate checksum, VLAN, and protocol */
+		iavf_process_skb_fields(rx_ring, rx_desc, skb, qword);
+
+		iavf_trace(clean_rx_irq_zc_rx, rx_ring, rx_desc, skb);
+		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+		napi_gro_receive(&rx_ring->q_vector->napi, skb);
+
+		stats.packets++;
+	}
+
+	rx_ring->next_to_clean = ntc;
+
+	iavf_finalize_xdp_rx(xdp_ring, rxq_xdp_act, 0);
+
+	to_refill = IAVF_DESC_UNUSED(rx_ring);
+	if (to_refill > IAVF_RING_QUARTER(rx_ring))
+		failure |= !iavf_alloc_rx_buffers_zc(rx_ring, to_refill);
+
+	iavf_update_rx_ring_stats(rx_ring, &stats);
+
+	if (xsk_uses_need_wakeup(rx_ring->xsk_pool)) {
+		if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
+			xsk_set_rx_need_wakeup(rx_ring->xsk_pool);
+		else
+			xsk_clear_rx_need_wakeup(rx_ring->xsk_pool);
+
+		return cleaned_count;
+	}
+
+	return unlikely(failure) ? budget : cleaned_count;
+}
diff --git a/drivers/net/ethernet/intel/iavf/iavf_xsk.h b/drivers/net/ethernet/intel/iavf/iavf_xsk.h
new file mode 100644
index 00000000000000..65aae299db4c0a
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_xsk.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2022 Intel Corporation. */
+
+#ifndef _IAVF_XSK_H_
+#define _IAVF_XSK_H_
+
+#include <linux/types.h>
+
+#define PKTS_PER_BATCH 8
+
+#ifdef __clang__
+#define loop_unrolled_for _Pragma("clang loop unroll_count(8)") for
+#elif __GNUC__ >= 8
+#define loop_unrolled_for _Pragma("GCC unroll 8") for
+#else
+#define loop_unrolled_for for
+#endif
+
+struct iavf_adapter;
+struct iavf_ring;
+struct net_device;
+struct xsk_buff_pool;
+
+int iavf_xsk_pool_setup(struct iavf_adapter *adapter,
+			struct xsk_buff_pool *pool, u32 qid);
+
+int iavf_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags);
+bool iavf_xmit_zc(struct iavf_ring *xdp_ring);
+void iavf_xsk_clean_xdp_ring(struct iavf_ring *xdp_ring);
+
+void iavf_xsk_clean_rx_ring(struct iavf_ring *rx_ring);
+int iavf_clean_rx_irq_zc(struct iavf_ring *rx_ring, int budget);
+void iavf_check_alloc_rx_buffers_zc(struct iavf_adapter *adapter,
+				    struct iavf_ring *rx_ring);
+
+void iavf_xsk_setup_xdp_ring(struct iavf_ring *xdp_ring);
+void iavf_xsk_setup_rx_ring(struct iavf_ring *rx_ring);
+
+#endif /* !_IAVF_XSK_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index 89f986a75cc855..611577ebc29d82 100644
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -160,64 +160,6 @@ struct ice_fltr_desc {
 				(0x1ULL << ICE_FXD_FLTR_WB_QW1_FAIL_PROF_S)
 #define ICE_FXD_FLTR_WB_QW1_FAIL_PROF_YES	0x1ULL
 
-struct ice_rx_ptype_decoded {
-	u32 known:1;
-	u32 outer_ip:1;
-	u32 outer_ip_ver:2;
-	u32 outer_frag:1;
-	u32 tunnel_type:3;
-	u32 tunnel_end_prot:2;
-	u32 tunnel_end_frag:1;
-	u32 inner_prot:4;
-	u32 payload_layer:3;
-};
-
-enum ice_rx_ptype_outer_ip {
-	ICE_RX_PTYPE_OUTER_L2	= 0,
-	ICE_RX_PTYPE_OUTER_IP	= 1,
-};
-
-enum ice_rx_ptype_outer_ip_ver {
-	ICE_RX_PTYPE_OUTER_NONE	= 0,
-	ICE_RX_PTYPE_OUTER_IPV4	= 1,
-	ICE_RX_PTYPE_OUTER_IPV6	= 2,
-};
-
-enum ice_rx_ptype_outer_fragmented {
-	ICE_RX_PTYPE_NOT_FRAG	= 0,
-	ICE_RX_PTYPE_FRAG	= 1,
-};
-
-enum ice_rx_ptype_tunnel_type {
-	ICE_RX_PTYPE_TUNNEL_NONE		= 0,
-	ICE_RX_PTYPE_TUNNEL_IP_IP		= 1,
-	ICE_RX_PTYPE_TUNNEL_IP_GRENAT		= 2,
-	ICE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC	= 3,
-	ICE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN	= 4,
-};
-
-enum ice_rx_ptype_tunnel_end_prot {
-	ICE_RX_PTYPE_TUNNEL_END_NONE	= 0,
-	ICE_RX_PTYPE_TUNNEL_END_IPV4	= 1,
-	ICE_RX_PTYPE_TUNNEL_END_IPV6	= 2,
-};
-
-enum ice_rx_ptype_inner_prot {
-	ICE_RX_PTYPE_INNER_PROT_NONE		= 0,
-	ICE_RX_PTYPE_INNER_PROT_UDP		= 1,
-	ICE_RX_PTYPE_INNER_PROT_TCP		= 2,
-	ICE_RX_PTYPE_INNER_PROT_SCTP		= 3,
-	ICE_RX_PTYPE_INNER_PROT_ICMP		= 4,
-	ICE_RX_PTYPE_INNER_PROT_TIMESYNC	= 5,
-};
-
-enum ice_rx_ptype_payload_layer {
-	ICE_RX_PTYPE_PAYLOAD_LAYER_NONE	= 0,
-	ICE_RX_PTYPE_PAYLOAD_LAYER_PAY2	= 1,
-	ICE_RX_PTYPE_PAYLOAD_LAYER_PAY3	= 2,
-	ICE_RX_PTYPE_PAYLOAD_LAYER_PAY4	= 3,
-};
-
 /* Rx Flex Descriptor
  * This descriptor is used instead of the legacy version descriptor when
  * ice_rlan_ctx.adv_desc is set
@@ -651,262 +593,4 @@ struct ice_tlan_ctx {
 	u8 int_q_state;	/* width not needed - internal - DO NOT WRITE!!! */
 };
 
-/* The ice_ptype_lkup table is used to convert from the 10-bit ptype in the
- * hardware to a bit-field that can be used by SW to more easily determine the
- * packet type.
- *
- * Macros are used to shorten the table lines and make this table human
- * readable.
- *
- * We store the PTYPE in the top byte of the bit field - this is just so that
- * we can check that the table doesn't have a row missing, as the index into
- * the table should be the PTYPE.
- *
- * Typical work flow:
- *
- * IF NOT ice_ptype_lkup[ptype].known
- * THEN
- *      Packet is unknown
- * ELSE IF ice_ptype_lkup[ptype].outer_ip == ICE_RX_PTYPE_OUTER_IP
- *      Use the rest of the fields to look at the tunnels, inner protocols, etc
- * ELSE
- *      Use the enum ice_rx_l2_ptype to decode the packet type
- * ENDIF
- */
-
-/* macro to make the table lines short, use explicit indexing with [PTYPE] */
-#define ICE_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\
-	[PTYPE] = { \
-		1, \
-		ICE_RX_PTYPE_OUTER_##OUTER_IP, \
-		ICE_RX_PTYPE_OUTER_##OUTER_IP_VER, \
-		ICE_RX_PTYPE_##OUTER_FRAG, \
-		ICE_RX_PTYPE_TUNNEL_##T, \
-		ICE_RX_PTYPE_TUNNEL_END_##TE, \
-		ICE_RX_PTYPE_##TEF, \
-		ICE_RX_PTYPE_INNER_PROT_##I, \
-		ICE_RX_PTYPE_PAYLOAD_LAYER_##PL }
-
-#define ICE_PTT_UNUSED_ENTRY(PTYPE) [PTYPE] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }
-
-/* shorter macros makes the table fit but are terse */
-#define ICE_RX_PTYPE_NOF		ICE_RX_PTYPE_NOT_FRAG
-#define ICE_RX_PTYPE_FRG		ICE_RX_PTYPE_FRAG
-
-/* Lookup table mapping in the 10-bit HW PTYPE to the bit field for decoding */
-static const struct ice_rx_ptype_decoded ice_ptype_lkup[BIT(10)] = {
-	/* L2 Packet types */
-	ICE_PTT_UNUSED_ENTRY(0),
-	ICE_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
-	ICE_PTT_UNUSED_ENTRY(2),
-	ICE_PTT_UNUSED_ENTRY(3),
-	ICE_PTT_UNUSED_ENTRY(4),
-	ICE_PTT_UNUSED_ENTRY(5),
-	ICE_PTT(6, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
-	ICE_PTT(7, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
-	ICE_PTT_UNUSED_ENTRY(8),
-	ICE_PTT_UNUSED_ENTRY(9),
-	ICE_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
-	ICE_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
-	ICE_PTT_UNUSED_ENTRY(12),
-	ICE_PTT_UNUSED_ENTRY(13),
-	ICE_PTT_UNUSED_ENTRY(14),
-	ICE_PTT_UNUSED_ENTRY(15),
-	ICE_PTT_UNUSED_ENTRY(16),
-	ICE_PTT_UNUSED_ENTRY(17),
-	ICE_PTT_UNUSED_ENTRY(18),
-	ICE_PTT_UNUSED_ENTRY(19),
-	ICE_PTT_UNUSED_ENTRY(20),
-	ICE_PTT_UNUSED_ENTRY(21),
-
-	/* Non Tunneled IPv4 */
-	ICE_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3),
-	ICE_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3),
-	ICE_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP,  PAY4),
-	ICE_PTT_UNUSED_ENTRY(25),
-	ICE_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP,  PAY4),
-	ICE_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4),
-	ICE_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4),
-
-	/* IPv4 --> IPv4 */
-	ICE_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
-	ICE_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
-	ICE_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP,  PAY4),
-	ICE_PTT_UNUSED_ENTRY(32),
-	ICE_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP,  PAY4),
-	ICE_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
-	ICE_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv4 --> IPv6 */
-	ICE_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
-	ICE_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
-	ICE_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP,  PAY4),
-	ICE_PTT_UNUSED_ENTRY(39),
-	ICE_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP,  PAY4),
-	ICE_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
-	ICE_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
-
-	/* IPv4 --> GRE/NAT */
-	ICE_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
-
-	/* IPv4 --> GRE/NAT --> IPv4 */
-	ICE_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
-	ICE_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
-	ICE_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP,  PAY4),
-	ICE_PTT_UNUSED_ENTRY(47),
-	ICE_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP,  PAY4),
-	ICE_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
-	ICE_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv4 --> GRE/NAT --> IPv6 */
-	ICE_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
-	ICE_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
-	ICE_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP,  PAY4),
-	ICE_PTT_UNUSED_ENTRY(54),
-	ICE_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP,  PAY4),
-	ICE_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
-	ICE_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
-
-	/* IPv4 --> GRE/NAT --> MAC */
-	ICE_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
-
-	/* IPv4 --> GRE/NAT --> MAC --> IPv4 */
-	ICE_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
-	ICE_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
-	ICE_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP,  PAY4),
-	ICE_PTT_UNUSED_ENTRY(62),
-	ICE_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP,  PAY4),
-	ICE_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
-	ICE_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv4 --> GRE/NAT -> MAC --> IPv6 */
-	ICE_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
-	ICE_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
-	ICE_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP,  PAY4),
-	ICE_PTT_UNUSED_ENTRY(69),
-	ICE_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP,  PAY4),
-	ICE_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
-	ICE_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
-
-	/* IPv4 --> GRE/NAT --> MAC/VLAN */
-	ICE_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
-
-	/* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */
-	ICE_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
-	ICE_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
-	ICE_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP,  PAY4),
-	ICE_PTT_UNUSED_ENTRY(77),
-	ICE_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP,  PAY4),
-	ICE_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
-	ICE_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */
-	ICE_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
-	ICE_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
-	ICE_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP,  PAY4),
-	ICE_PTT_UNUSED_ENTRY(84),
-	ICE_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP,  PAY4),
-	ICE_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
-	ICE_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
-
-	/* Non Tunneled IPv6 */
-	ICE_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3),
-	ICE_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3),
-	ICE_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP,  PAY4),
-	ICE_PTT_UNUSED_ENTRY(91),
-	ICE_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP,  PAY4),
-	ICE_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4),
-	ICE_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4),
-
-	/* IPv6 --> IPv4 */
-	ICE_PTT(95, IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
-	ICE_PTT(96, IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
-	ICE_PTT(97, IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP,  PAY4),
-	ICE_PTT_UNUSED_ENTRY(98),
-	ICE_PTT(99, IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP,  PAY4),
-	ICE_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
-	ICE_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv6 --> IPv6 */
-	ICE_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
-	ICE_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
-	ICE_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP,  PAY4),
-	ICE_PTT_UNUSED_ENTRY(105),
-	ICE_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP,  PAY4),
-	ICE_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
-	ICE_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
-
-	/* IPv6 --> GRE/NAT */
-	ICE_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
-
-	/* IPv6 --> GRE/NAT -> IPv4 */
-	ICE_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
-	ICE_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
-	ICE_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP,  PAY4),
-	ICE_PTT_UNUSED_ENTRY(113),
-	ICE_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP,  PAY4),
-	ICE_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
-	ICE_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv6 --> GRE/NAT -> IPv6 */
-	ICE_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
-	ICE_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
-	ICE_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP,  PAY4),
-	ICE_PTT_UNUSED_ENTRY(120),
-	ICE_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP,  PAY4),
-	ICE_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
-	ICE_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
-
-	/* IPv6 --> GRE/NAT -> MAC */
-	ICE_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
-
-	/* IPv6 --> GRE/NAT -> MAC -> IPv4 */
-	ICE_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
-	ICE_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
-	ICE_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP,  PAY4),
-	ICE_PTT_UNUSED_ENTRY(128),
-	ICE_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP,  PAY4),
-	ICE_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
-	ICE_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv6 --> GRE/NAT -> MAC -> IPv6 */
-	ICE_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
-	ICE_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
-	ICE_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP,  PAY4),
-	ICE_PTT_UNUSED_ENTRY(135),
-	ICE_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP,  PAY4),
-	ICE_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
-	ICE_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
-
-	/* IPv6 --> GRE/NAT -> MAC/VLAN */
-	ICE_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
-
-	/* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */
-	ICE_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
-	ICE_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
-	ICE_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP,  PAY4),
-	ICE_PTT_UNUSED_ENTRY(143),
-	ICE_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP,  PAY4),
-	ICE_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
-	ICE_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */
-	ICE_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
-	ICE_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
-	ICE_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP,  PAY4),
-	ICE_PTT_UNUSED_ENTRY(150),
-	ICE_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP,  PAY4),
-	ICE_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
-	ICE_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
-
-	/* unused entries */
-	[154 ... 1023] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }
-};
-
-static inline struct ice_rx_ptype_decoded ice_decode_rx_desc_ptype(u16 ptype)
-{
-	return ice_ptype_lkup[ptype];
-}
-
-
 #endif /* _ICE_LAN_TX_RX_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index a1f7c8edc22f34..f3d9c5ddef33e3 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -34,6 +34,7 @@ static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation.";
 
 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
 MODULE_DESCRIPTION(DRV_SUMMARY);
+MODULE_IMPORT_NS(LIBIE);
 MODULE_LICENSE("GPL v2");
 MODULE_FIRMWARE(ICE_DDP_PKG_FILE);
 
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
index 7bc5aa340c7df7..3b3793428ab9ca 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2019, Intel Corporation. */
 
 #include <linux/filter.h>
+#include <linux/net/intel/libie/rx.h>
 
 #include "ice_txrx_lib.h"
 #include "ice_eswitch.h"
@@ -38,30 +39,6 @@ void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val)
 	}
 }
 
-/**
- * ice_ptype_to_htype - get a hash type
- * @ptype: the ptype value from the descriptor
- *
- * Returns appropriate hash type (such as PKT_HASH_TYPE_L2/L3/L4) to be used by
- * skb_set_hash based on PTYPE as parsed by HW Rx pipeline and is part of
- * Rx desc.
- */
-static enum pkt_hash_types ice_ptype_to_htype(u16 ptype)
-{
-	struct ice_rx_ptype_decoded decoded = ice_decode_rx_desc_ptype(ptype);
-
-	if (!decoded.known)
-		return PKT_HASH_TYPE_NONE;
-	if (decoded.payload_layer == ICE_RX_PTYPE_PAYLOAD_LAYER_PAY4)
-		return PKT_HASH_TYPE_L4;
-	if (decoded.payload_layer == ICE_RX_PTYPE_PAYLOAD_LAYER_PAY3)
-		return PKT_HASH_TYPE_L3;
-	if (decoded.outer_ip == ICE_RX_PTYPE_OUTER_L2)
-		return PKT_HASH_TYPE_L2;
-
-	return PKT_HASH_TYPE_NONE;
-}
-
 /**
  * ice_rx_hash - set the hash value in the skb
  * @rx_ring: descriptor ring
@@ -74,9 +51,11 @@ ice_rx_hash(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
 	    struct sk_buff *skb, u16 rx_ptype)
 {
 	struct ice_32b_rx_flex_desc_nic *nic_mdid;
+	struct libie_rx_ptype_parsed parsed;
 	u32 hash;
 
-	if (!(rx_ring->netdev->features & NETIF_F_RXHASH))
+	parsed = libie_parse_rx_ptype(rx_ptype);
+	if (!libie_has_rx_hash(rx_ring->netdev, parsed))
 		return;
 
 	if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC)
@@ -84,7 +63,7 @@ ice_rx_hash(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
 
 	nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc;
 	hash = le32_to_cpu(nic_mdid->rss_hash);
-	skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype));
+	libie_skb_set_hash(skb, hash, parsed);
 }
 
 /**
@@ -92,7 +71,7 @@ ice_rx_hash(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
  * @ring: the ring we care about
  * @skb: skb currently being received and modified
  * @rx_desc: the receive descriptor
- * @ptype: the packet type decoded by hardware
+ * @ptype: the packet type parsed by hardware
  *
  * skb->protocol must be set before this function is called
  */
@@ -100,34 +79,26 @@ static void
 ice_rx_csum(struct ice_rx_ring *ring, struct sk_buff *skb,
 	    union ice_32b_rx_flex_desc *rx_desc, u16 ptype)
 {
-	struct ice_rx_ptype_decoded decoded;
+	struct libie_rx_ptype_parsed parsed;
 	u16 rx_status0, rx_status1;
 	bool ipv4, ipv6;
 
-	rx_status0 = le16_to_cpu(rx_desc->wb.status_error0);
-	rx_status1 = le16_to_cpu(rx_desc->wb.status_error1);
-
-	decoded = ice_decode_rx_desc_ptype(ptype);
-
 	/* Start with CHECKSUM_NONE and by default csum_level = 0 */
 	skb->ip_summed = CHECKSUM_NONE;
-	skb_checksum_none_assert(skb);
 
-	/* check if Rx checksum is enabled */
-	if (!(ring->netdev->features & NETIF_F_RXCSUM))
+	parsed = libie_parse_rx_ptype(ptype);
+	if (!libie_has_rx_checksum(ring->netdev, parsed))
 		return;
 
-	/* check if HW has decoded the packet and checksum */
-	if (!(rx_status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S)))
-		return;
+	rx_status0 = le16_to_cpu(rx_desc->wb.status_error0);
+	rx_status1 = le16_to_cpu(rx_desc->wb.status_error1);
 
-	if (!(decoded.known && decoded.outer_ip))
+	/* check if HW has parsed the packet and checksum */
+	if (!(rx_status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S)))
 		return;
 
-	ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
-	       (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4);
-	ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
-	       (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6);
+	ipv4 = parsed.outer_ip == LIBIE_RX_PTYPE_OUTER_IPV4;
+	ipv6 = parsed.outer_ip == LIBIE_RX_PTYPE_OUTER_IPV6;
 
 	if (ipv4 && (rx_status0 & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
 				   BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S))))
@@ -151,19 +122,10 @@ ice_rx_csum(struct ice_rx_ring *ring, struct sk_buff *skb,
 	 * we need to bump the checksum level by 1 to reflect the fact that
 	 * we are indicating we validated the inner checksum.
 	 */
-	if (decoded.tunnel_type >= ICE_RX_PTYPE_TUNNEL_IP_GRENAT)
+	if (parsed.tunnel_type >= LIBIE_RX_PTYPE_TUNNEL_IP_GRENAT)
 		skb->csum_level = 1;
 
-	/* Only report checksum unnecessary for TCP, UDP, or SCTP */
-	switch (decoded.inner_prot) {
-	case ICE_RX_PTYPE_INNER_PROT_TCP:
-	case ICE_RX_PTYPE_INNER_PROT_UDP:
-	case ICE_RX_PTYPE_INNER_PROT_SCTP:
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-		break;
-	default:
-		break;
-	}
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
 	return;
 
 checksum_fail:
@@ -175,7 +137,7 @@ ice_rx_csum(struct ice_rx_ring *ring, struct sk_buff *skb,
  * @rx_ring: Rx descriptor ring packet is being transacted on
  * @rx_desc: pointer to the EOP Rx descriptor
  * @skb: pointer to current skb being populated
- * @ptype: the packet type decoded by hardware
+ * @ptype: the packet type parsed by hardware
  *
  * This function checks the ring, descriptor, and packet information in
  * order to populate the hash, checksum, VLAN, protocol, and
diff --git a/drivers/net/ethernet/intel/libie/Makefile b/drivers/net/ethernet/intel/libie/Makefile
new file mode 100644
index 00000000000000..76f32253481b70
--- /dev/null
+++ b/drivers/net/ethernet/intel/libie/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright(c) 2023 Intel Corporation.
+
+obj-$(CONFIG_LIBIE)	+= libie.o
+
+libie-objs		+= rx.o
+libie-objs		+= stats.o
diff --git a/drivers/net/ethernet/intel/libie/internal.h b/drivers/net/ethernet/intel/libie/internal.h
new file mode 100644
index 00000000000000..083398dc37c63d
--- /dev/null
+++ b/drivers/net/ethernet/intel/libie/internal.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* libie internal declarations not to be used in drivers.
+ *
+ * Copyright(c) 2023 Intel Corporation.
+ */
+
+#ifndef __LIBIE_INTERNAL_H
+#define __LIBIE_INTERNAL_H
+
+struct libie_rq_stats;
+struct page_pool;
+
+#ifdef CONFIG_PAGE_POOL_STATS
+void libie_rq_stats_sync_pp(struct libie_rq_stats *stats,
+			    struct page_pool *pool);
+#else
+static inline void libie_rq_stats_sync_pp(struct libie_rq_stats *stats,
+					  struct page_pool *pool)
+{
+}
+#endif
+
+#endif /* __LIBIE_INTERNAL_H */
diff --git a/drivers/net/ethernet/intel/libie/rx.c b/drivers/net/ethernet/intel/libie/rx.c
new file mode 100644
index 00000000000000..65475bf6d2d27f
--- /dev/null
+++ b/drivers/net/ethernet/intel/libie/rx.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2023 Intel Corporation. */
+
+#include <linux/net/intel/libie/rx.h>
+
+#include "internal.h"
+
+/* O(1) converting i40e/ice/iavf's 8/10-bit hardware packet type to a parsed
+ * bitfield struct.
+ */
+
+#define LIBIE_RX_PTYPE(oip, ofrag, tun, tp, tefr, iprot, pl) {		   \
+		.outer_ip		= LIBIE_RX_PTYPE_OUTER_##oip,	   \
+		.outer_frag		= LIBIE_RX_PTYPE_##ofrag,	   \
+		.tunnel_type		= LIBIE_RX_PTYPE_TUNNEL_IP_##tun,  \
+		.tunnel_end_prot	= LIBIE_RX_PTYPE_TUNNEL_END_##tp,  \
+		.tunnel_end_frag	= LIBIE_RX_PTYPE_##tefr,	   \
+		.inner_prot		= LIBIE_RX_PTYPE_INNER_##iprot,	   \
+		.payload_layer		= LIBIE_RX_PTYPE_PAYLOAD_##pl,	   \
+	}
+
+#define LIBIE_RX_PTYPE_UNUSED		{ }
+
+#define __LIBIE_RX_PTYPE_L2(iprot, pl)					   \
+	LIBIE_RX_PTYPE(L2, NOT_FRAG, NONE, NONE, NOT_FRAG, iprot, pl)
+#define LIBIE_RX_PTYPE_L2		__LIBIE_RX_PTYPE_L2(NONE, L2)
+#define LIBIE_RX_PTYPE_TS		__LIBIE_RX_PTYPE_L2(TIMESYNC, L2)
+#define LIBIE_RX_PTYPE_L3		__LIBIE_RX_PTYPE_L2(NONE, L3)
+
+#define LIBIE_RX_PTYPE_IP_FRAG(oip)					   \
+	LIBIE_RX_PTYPE(IPV##oip, FRAG, NONE, NONE, NOT_FRAG, NONE, L3)
+#define LIBIE_RX_PTYPE_IP_L3(oip, tun, teprot, tefr)			   \
+	LIBIE_RX_PTYPE(IPV##oip, NOT_FRAG, tun, teprot, tefr, NONE, L3)
+#define LIBIE_RX_PTYPE_IP_L4(oip, tun, teprot, iprot)			   \
+	LIBIE_RX_PTYPE(IPV##oip, NOT_FRAG, tun, teprot, NOT_FRAG, iprot, L4)
+
+#define LIBIE_RX_PTYPE_IP_NOF(oip, tun, ver)				   \
+	LIBIE_RX_PTYPE_IP_L3(oip, tun, ver, NOT_FRAG),			   \
+	LIBIE_RX_PTYPE_IP_L4(oip, tun, ver, UDP),			   \
+	LIBIE_RX_PTYPE_UNUSED,						   \
+	LIBIE_RX_PTYPE_IP_L4(oip, tun, ver, TCP),			   \
+	LIBIE_RX_PTYPE_IP_L4(oip, tun, ver, SCTP),			   \
+	LIBIE_RX_PTYPE_IP_L4(oip, tun, ver, ICMP)
+
+/* IPv oip --> tun --> IPv ver */
+#define LIBIE_RX_PTYPE_IP_TUN_VER(oip, tun, ver)			   \
+	LIBIE_RX_PTYPE_IP_L3(oip, tun, ver, FRAG),			   \
+	LIBIE_RX_PTYPE_IP_NOF(oip, tun, ver)
+
+/* Non Tunneled IPv oip */
+#define LIBIE_RX_PTYPE_IP_RAW(oip)					   \
+	LIBIE_RX_PTYPE_IP_FRAG(oip),					   \
+	LIBIE_RX_PTYPE_IP_NOF(oip, NONE, NONE)
+
+/* IPv oip --> tun --> { IPv4, IPv6 } */
+#define LIBIE_RX_PTYPE_IP_TUN(oip, tun)					   \
+	LIBIE_RX_PTYPE_IP_TUN_VER(oip, tun, IPV4),			   \
+	LIBIE_RX_PTYPE_IP_TUN_VER(oip, tun, IPV6)
+
+/* IPv oip --> GRE/NAT tun --> { x, IPv4, IPv6 } */
+#define LIBIE_RX_PTYPE_IP_GRE(oip, tun)					   \
+	LIBIE_RX_PTYPE_IP_L3(oip, tun, NONE, NOT_FRAG),			   \
+	LIBIE_RX_PTYPE_IP_TUN(oip, tun)
+
+/* Non Tunneled IPv oip
+ * IPv oip --> { IPv4, IPv6 }
+ * IPv oip --> GRE/NAT --> { x, IPv4, IPv6 }
+ * IPv oip --> GRE/NAT --> MAC --> { x, IPv4, IPv6 }
+ * IPv oip --> GRE/NAT --> MAC/VLAN --> { x, IPv4, IPv6 }
+ */
+#define LIBIE_RX_PTYPE_IP(oip)						   \
+	LIBIE_RX_PTYPE_IP_RAW(oip),					   \
+	LIBIE_RX_PTYPE_IP_TUN(oip, IP),					   \
+	LIBIE_RX_PTYPE_IP_GRE(oip, GRENAT),				   \
+	LIBIE_RX_PTYPE_IP_GRE(oip, GRENAT_MAC),				   \
+	LIBIE_RX_PTYPE_IP_GRE(oip, GRENAT_MAC_VLAN)
+
+/* Lookup table mapping for O(1) parsing */
+const struct libie_rx_ptype_parsed libie_rx_ptype_lut[LIBIE_RX_PTYPE_NUM] = {
+	/* L2 packet types */
+	LIBIE_RX_PTYPE_UNUSED,
+	LIBIE_RX_PTYPE_L2,
+	LIBIE_RX_PTYPE_TS,
+	LIBIE_RX_PTYPE_L2,
+	LIBIE_RX_PTYPE_UNUSED,
+	LIBIE_RX_PTYPE_UNUSED,
+	LIBIE_RX_PTYPE_L2,
+	LIBIE_RX_PTYPE_L2,
+	LIBIE_RX_PTYPE_UNUSED,
+	LIBIE_RX_PTYPE_UNUSED,
+	LIBIE_RX_PTYPE_L2,
+	LIBIE_RX_PTYPE_UNUSED,
+
+	LIBIE_RX_PTYPE_L3,
+	LIBIE_RX_PTYPE_L3,
+	LIBIE_RX_PTYPE_L3,
+	LIBIE_RX_PTYPE_L3,
+	LIBIE_RX_PTYPE_L3,
+	LIBIE_RX_PTYPE_L3,
+	LIBIE_RX_PTYPE_L3,
+	LIBIE_RX_PTYPE_L3,
+	LIBIE_RX_PTYPE_L3,
+	LIBIE_RX_PTYPE_L3,
+
+	LIBIE_RX_PTYPE_IP(4),
+	LIBIE_RX_PTYPE_IP(6),
+};
+EXPORT_SYMBOL_NS_GPL(libie_rx_ptype_lut, LIBIE);
+
+/* Page Pool */
+
+/**
+ * libie_rx_sync_len - get the actual buffer size to be synced and passed to HW
+ * @dev: &net_device to calculate the size for
+ * @hr: headroom in front of each frame
+ *
+ * Returns the buffer size to pass it to HW and use for DMA synchronization
+ * for the MTU the @dev has.
+ */
+static u32 libie_rx_sync_len(const struct net_device *dev, u32 hr)
+{
+	u32 len;
+
+	len = READ_ONCE(dev->mtu) + LIBIE_RX_LL_LEN;
+	len = ALIGN(len, LIBIE_RX_BUF_LEN_ALIGN);
+	len = min(len, LIBIE_RX_BUF_LEN(hr));
+
+	return len;
+}
+
+/**
+ * libie_rx_page_pool_create - create a PP with the default libie settings
+ * @dev: &net_device which a PP will be created for
+ * @size: size of the PP, usually simply Rx queue len
+ * @xdp: whether XDP is enabled on the device
+ *
+ * Returns &page_pool on success, casted -errno on failure.
+ */
+struct page_pool *libie_rx_page_pool_create(const struct net_device *dev,
+					    u32 size, bool xdp)
+{
+	u32 hr = xdp ? LIBIE_XDP_HEADROOM : LIBIE_SKB_HEADROOM;
+	const struct page_pool_params pp = {
+		.flags		= PP_FLAG_DMA_MAP | PP_FLAG_DMA_MAP_WEAK |
+				  PP_FLAG_DMA_SYNC_DEV,
+		.order		= LIBIE_RX_PAGE_ORDER,
+		.pool_size	= size,
+		.nid		= NUMA_NO_NODE,
+		.dev		= dev->dev.parent,
+		.dma_dir	= xdp ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE,
+		.max_len	= libie_rx_sync_len(dev, hr),
+		.offset		= hr,
+	};
+
+	static_assert((PP_FLAG_DMA_MAP | PP_FLAG_DMA_MAP_WEAK) ==
+		      LIBIE_RX_DMA_ATTR);
+
+	return page_pool_create(&pp);
+}
+EXPORT_SYMBOL_NS_GPL(libie_rx_page_pool_create, LIBIE);
+
+/**
+ * libie_rx_page_pool_destroy - destroy a &page_pool created by libie
+ * @pool: pool to destroy
+ * @stats: RQ stats from the ring (or %NULL to skip updating PP stats)
+ *
+ * As the stats usually has the same lifetime as the device, but PP is usually
+ * created/destroyed on ifup/ifdown, in order to not lose the stats accumulated
+ * during the last ifup, the PP stats need to be added to the driver stats
+ * container. Then the PP gets destroyed.
+ */
+void libie_rx_page_pool_destroy(struct page_pool *pool,
+				struct libie_rq_stats *stats)
+{
+	libie_rq_stats_sync_pp(stats, pool);
+	page_pool_destroy(pool);
+}
+EXPORT_SYMBOL_NS_GPL(libie_rx_page_pool_destroy, LIBIE);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) Ethernet common library");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/intel/libie/stats.c b/drivers/net/ethernet/intel/libie/stats.c
new file mode 100644
index 00000000000000..95bbb38c39e348
--- /dev/null
+++ b/drivers/net/ethernet/intel/libie/stats.c
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2023 Intel Corporation. */
+
+#include <linux/ethtool.h>
+#include <linux/net/intel/libie/stats.h>
+
+#include "internal.h"
+
+/* Rx per-queue stats */
+
+static const char * const libie_rq_stats_str[] = {
+#define act(s)	__stringify(s),
+	DECLARE_LIBIE_RQ_STATS(act)
+#undef act
+};
+
+#define LIBIE_RQ_STATS_NUM	ARRAY_SIZE(libie_rq_stats_str)
+
+#ifdef CONFIG_PAGE_POOL_STATS
+/**
+ * libie_rq_stats_get_pp - get the current stats from a &page_pool
+ * @sarr: local array to add stats to
+ * @pool: pool to get the stats from
+ *
+ * Adds the current "live" stats from an online PP to the stats read from
+ * the RQ container, so that the actual totals will be returned.
+ */
+static void libie_rq_stats_get_pp(u64 *sarr, struct page_pool *pool)
+{
+	struct page_pool_stats *pps;
+	/* Used only to calculate pos below */
+	struct libie_rq_stats tmp;
+	u32 pos;
+
+	/* Validate the libie PP stats array can be casted <-> PP struct */
+	static_assert(sizeof(tmp.pp) == sizeof(*pps));
+
+	if (!pool)
+		return;
+
+	/* Position of the first Page Pool stats field */
+	pos = (u64_stats_t *)&tmp.pp - tmp.raw;
+	pps = (typeof(pps))&sarr[pos];
+
+	page_pool_get_stats(pool, pps);
+}
+
+/**
+ * libie_rq_stats_sync_pp - add the current PP stats to the RQ stats container
+ * @stats: stats structure to update
+ * @pool: pool to read the stats
+ *
+ * Called by libie_rx_page_pool_destroy() to save the stats before destroying
+ * the pool.
+ */
+void libie_rq_stats_sync_pp(struct libie_rq_stats *stats,
+			    struct page_pool *pool)
+{
+	u64_stats_t *qarr = (u64_stats_t *)&stats->pp;
+	struct page_pool_stats pps = { };
+	u64 *sarr = (u64 *)&pps;
+
+	if (!stats)
+		return;
+
+	page_pool_get_stats(pool, &pps);
+
+	u64_stats_update_begin(&stats->syncp);
+
+	for (u32 i = 0; i < sizeof(pps) / sizeof(*sarr); i++)
+		u64_stats_add(&qarr[i], sarr[i]);
+
+	u64_stats_update_end(&stats->syncp);
+}
+#else
+static inline void libie_rq_stats_get_pp(u64 *sarr, struct page_pool *pool)
+{
+}
+
+/* static inline void libie_rq_stats_sync_pp() is declared in "internal.h" */
+#endif
+
+/**
+ * libie_rq_stats_get_sset_count - get the number of Ethtool RQ stats provided
+ *
+ * Returns the number of per-queue Rx stats supported by the library.
+ */
+u32 libie_rq_stats_get_sset_count(void)
+{
+	return LIBIE_RQ_STATS_NUM;
+}
+EXPORT_SYMBOL_NS_GPL(libie_rq_stats_get_sset_count, LIBIE);
+
+/**
+ * libie_rq_stats_get_strings - get the name strings of Ethtool RQ stats
+ * @data: reference to the cursor pointing to the output buffer
+ * @qid: RQ number to print in the prefix
+ */
+void libie_rq_stats_get_strings(u8 **data, u32 qid)
+{
+	for (u32 i = 0; i < LIBIE_RQ_STATS_NUM; i++)
+		ethtool_sprintf(data, "rq%u_%s", qid, libie_rq_stats_str[i]);
+}
+EXPORT_SYMBOL_NS_GPL(libie_rq_stats_get_strings, LIBIE);
+
+/**
+ * libie_rq_stats_get_data - get the RQ stats in Ethtool format
+ * @data: reference to the cursor pointing to the output array
+ * @stats: RQ stats container from the queue
+ * @pool: &page_pool from the queue (%NULL to ignore PP "live" stats)
+ */
+void libie_rq_stats_get_data(u64 **data, const struct libie_rq_stats *stats,
+			     struct page_pool *pool)
+{
+	u64 sarr[LIBIE_RQ_STATS_NUM];
+	u32 start;
+
+	do {
+		start = u64_stats_fetch_begin(&stats->syncp);
+
+		for (u32 i = 0; i < LIBIE_RQ_STATS_NUM; i++)
+			sarr[i] = u64_stats_read(&stats->raw[i]);
+	} while (u64_stats_fetch_retry(&stats->syncp, start));
+
+	libie_rq_stats_get_pp(sarr, pool);
+
+	for (u32 i = 0; i < LIBIE_RQ_STATS_NUM; i++)
+		(*data)[i] += sarr[i];
+
+	*data += LIBIE_RQ_STATS_NUM;
+}
+EXPORT_SYMBOL_NS_GPL(libie_rq_stats_get_data, LIBIE);
+
+/* Tx per-queue stats */
+
+static const char * const libie_sq_stats_str[] = {
+#define act(s)	__stringify(s),
+	DECLARE_LIBIE_SQ_STATS(act)
+#undef act
+};
+
+#define LIBIE_SQ_STATS_NUM	ARRAY_SIZE(libie_sq_stats_str)
+
+/**
+ * libie_sq_stats_get_sset_count - get the number of Ethtool SQ stats provided
+ *
+ * Returns the number of per-queue Tx stats supported by the library.
+ */
+u32 libie_sq_stats_get_sset_count(void)
+{
+	return LIBIE_SQ_STATS_NUM;
+}
+EXPORT_SYMBOL_NS_GPL(libie_sq_stats_get_sset_count, LIBIE);
+
+/**
+ * libie_sq_stats_get_strings - get the name strings of Ethtool SQ stats
+ * @data: reference to the cursor pointing to the output buffer
+ * @qid: SQ number to print in the prefix
+ */
+void libie_sq_stats_get_strings(u8 **data, u32 qid)
+{
+	for (u32 i = 0; i < LIBIE_SQ_STATS_NUM; i++)
+		ethtool_sprintf(data, "sq%u_%s", qid, libie_sq_stats_str[i]);
+}
+EXPORT_SYMBOL_NS_GPL(libie_sq_stats_get_strings, LIBIE);
+
+/**
+ * libie_sq_stats_get_data - get the SQ stats in Ethtool format
+ * @data: reference to the cursor pointing to the output array
+ * @stats: SQ stats container from the queue
+ */
+void libie_sq_stats_get_data(u64 **data, const struct libie_sq_stats *stats)
+{
+	u64 sarr[LIBIE_SQ_STATS_NUM];
+	u32 start;
+
+	do {
+		start = u64_stats_fetch_begin(&stats->syncp);
+
+		for (u32 i = 0; i < LIBIE_SQ_STATS_NUM; i++)
+			sarr[i] = u64_stats_read(&stats->raw[i]);
+	} while (u64_stats_fetch_retry(&stats->syncp, start));
+
+	for (u32 i = 0; i < LIBIE_SQ_STATS_NUM; i++)
+		(*data)[i] += sarr[i];
+
+	*data += LIBIE_SQ_STATS_NUM;
+}
+EXPORT_SYMBOL_NS_GPL(libie_sq_stats_get_data, LIBIE);
diff --git a/include/linux/net/intel/libie/rx.h b/include/linux/net/intel/libie/rx.h
new file mode 100644
index 00000000000000..d73efd721ffc6f
--- /dev/null
+++ b/include/linux/net/intel/libie/rx.h
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2023 Intel Corporation. */
+
+#ifndef __LIBIE_RX_H
+#define __LIBIE_RX_H
+
+#include <linux/if_vlan.h>
+#include <linux/netdevice.h>
+#include <uapi/linux/bpf.h>
+
+/* O(1) converting i40e/ice/iavf's 8/10-bit hardware packet type to a parsed
+ * bitfield struct.
+ */
+
+struct libie_rx_ptype_parsed {
+	u16	outer_ip:2;
+	u16	outer_frag:1;
+	u16	tunnel_type:3;
+	u16	tunnel_end_prot:2;
+	u16	tunnel_end_frag:1;
+	u16	inner_prot:3;
+	u16	payload_layer:2;
+};
+
+enum libie_rx_ptype_outer_ip {
+	LIBIE_RX_PTYPE_OUTER_L2				= 0U,
+	LIBIE_RX_PTYPE_OUTER_IPV4,
+	LIBIE_RX_PTYPE_OUTER_IPV6,
+};
+
+enum libie_rx_ptype_outer_fragmented {
+	LIBIE_RX_PTYPE_NOT_FRAG				= 0U,
+	LIBIE_RX_PTYPE_FRAG,
+};
+
+enum libie_rx_ptype_tunnel_type {
+	LIBIE_RX_PTYPE_TUNNEL_IP_NONE			= 0U,
+	LIBIE_RX_PTYPE_TUNNEL_IP_IP,
+	LIBIE_RX_PTYPE_TUNNEL_IP_GRENAT,
+	LIBIE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC,
+	LIBIE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN,
+};
+
+enum libie_rx_ptype_tunnel_end_prot {
+	LIBIE_RX_PTYPE_TUNNEL_END_NONE			= 0U,
+	LIBIE_RX_PTYPE_TUNNEL_END_IPV4,
+	LIBIE_RX_PTYPE_TUNNEL_END_IPV6,
+};
+
+enum libie_rx_ptype_inner_prot {
+	LIBIE_RX_PTYPE_INNER_NONE			= 0U,
+	LIBIE_RX_PTYPE_INNER_UDP,
+	LIBIE_RX_PTYPE_INNER_TCP,
+	LIBIE_RX_PTYPE_INNER_SCTP,
+	LIBIE_RX_PTYPE_INNER_ICMP,
+	LIBIE_RX_PTYPE_INNER_TIMESYNC,
+};
+
+enum libie_rx_ptype_payload_layer {
+	LIBIE_RX_PTYPE_PAYLOAD_NONE			= PKT_HASH_TYPE_NONE,
+	LIBIE_RX_PTYPE_PAYLOAD_L2			= PKT_HASH_TYPE_L2,
+	LIBIE_RX_PTYPE_PAYLOAD_L3			= PKT_HASH_TYPE_L3,
+	LIBIE_RX_PTYPE_PAYLOAD_L4			= PKT_HASH_TYPE_L4,
+};
+
+#define LIBIE_RX_PTYPE_NUM				154
+
+extern const struct libie_rx_ptype_parsed
+libie_rx_ptype_lut[LIBIE_RX_PTYPE_NUM];
+
+/**
+ * libie_parse_rx_ptype - convert HW packet type to software bitfield structure
+ * @ptype: 10-bit hardware packet type value from the descriptor
+ *
+ * @libie_rx_ptype_lut must be accessed only using this wrapper.
+ *
+ * Returns the parsed bitfield struct corresponding to the provided ptype.
+ */
+static inline struct libie_rx_ptype_parsed libie_parse_rx_ptype(u32 ptype)
+{
+	if (unlikely(ptype >= LIBIE_RX_PTYPE_NUM))
+		ptype = 0;
+
+	return libie_rx_ptype_lut[ptype];
+}
+
+/* libie_has_*() can be used to quickly check whether the HW metadata is
+ * available to avoid further expensive processing such as descriptor reads.
+ * They already check for the corresponding netdev feature to be enabled,
+ * thus can be used as drop-in replacements.
+ */
+
+static inline bool libie_has_rx_checksum(const struct net_device *dev,
+					 struct libie_rx_ptype_parsed parsed)
+{
+	/* _INNER_{SCTP,TCP,UDP} are possible only when _OUTER_IPV* is set,
+	 * it is enough to check only for the L4 type.
+	 */
+	switch (parsed.inner_prot) {
+	case LIBIE_RX_PTYPE_INNER_TCP:
+	case LIBIE_RX_PTYPE_INNER_UDP:
+	case LIBIE_RX_PTYPE_INNER_SCTP:
+		return dev->features & NETIF_F_RXCSUM;
+	default:
+		return false;
+	}
+}
+
+static inline bool libie_has_rx_hash(const struct net_device *dev,
+				     struct libie_rx_ptype_parsed parsed)
+{
+	if (parsed.payload_layer < LIBIE_RX_PTYPE_PAYLOAD_L2)
+		return false;
+
+	return dev->features & NETIF_F_RXHASH;
+}
+
+/**
+ * libie_skb_set_hash - fill in skb hash value basing on the parsed ptype
+ * @skb: skb to fill the hash in
+ * @hash: 32-bit hash value from the descriptor
+ * @parsed: parsed packet type
+ */
+static inline void libie_skb_set_hash(struct sk_buff *skb, u32 hash,
+				      struct libie_rx_ptype_parsed parsed)
+{
+	skb_set_hash(skb, hash, parsed.payload_layer);
+}
+
+/* Rx MTU/buffer/truesize helpers. Mostly pure software-side; HW-defined values
+ * are valid for all Intel HW.
+ */
+
+/* Space reserved in front of each frame */
+#define LIBIE_SKB_HEADROOM	(NET_SKB_PAD + NET_IP_ALIGN)
+#define LIBIE_XDP_HEADROOM	(max(XDP_PACKET_HEADROOM, NET_SKB_PAD) +    \
+				 NET_IP_ALIGN)
+/* Maximum headroom to calculate max MTU below */
+#define LIBIE_MAX_HEADROOM	LIBIE_XDP_HEADROOM
+/* Link layer / L2 overhead: Ethernet, 2 VLAN tags (C + S), FCS */
+#define LIBIE_RX_LL_LEN		(ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN)
+
+/* Truesize: total space wasted on each frame. Always use order-0 pages */
+#define LIBIE_RX_PAGE_ORDER	0
+#define LIBIE_RX_TRUESIZE	(PAGE_SIZE << LIBIE_RX_PAGE_ORDER)
+/* Rx buffer size config is a multiple of 128 */
+#define LIBIE_RX_BUF_LEN_ALIGN	128
+/* HW-writeable space in one buffer: truesize - headroom/tailroom,
+ * HW-aligned
+ */
+#define __LIBIE_RX_BUF_LEN(hr)						    \
+	ALIGN_DOWN(SKB_MAX_ORDER(hr, LIBIE_RX_PAGE_ORDER),		    \
+		   LIBIE_RX_BUF_LEN_ALIGN)
+/* The largest size for a single descriptor as per HW */
+#define LIBIE_MAX_RX_BUF_LEN	9728U
+/* "True" HW-writeable space: minimum from SW and HW values */
+#define LIBIE_RX_BUF_LEN(hr)	min_t(u32, __LIBIE_RX_BUF_LEN(hr),	    \
+				      LIBIE_MAX_RX_BUF_LEN)
+
+/* The maximum frame size as per HW (S/G) */
+#define __LIBIE_MAX_RX_FRM_LEN	16382U
+/* ATST, HW can chain up to 5 Rx descriptors */
+#define LIBIE_MAX_RX_FRM_LEN(hr)					    \
+	min_t(u32, __LIBIE_MAX_RX_FRM_LEN, LIBIE_RX_BUF_LEN(hr) * 5)
+/* Maximum frame size minus LL overhead */
+#define LIBIE_MAX_MTU		(LIBIE_MAX_RX_FRM_LEN(LIBIE_MAX_HEADROOM) - \
+				 LIBIE_RX_LL_LEN)
+
+/* DMA mapping attributes for Rx buffers: no impl. sync + relaxed on Sparc */
+#define LIBIE_RX_DMA_ATTR						    \
+	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
+struct libie_rq_stats;
+
+struct page_pool *libie_rx_page_pool_create(const struct net_device *dev,
+					    u32 size, bool xdp);
+void libie_rx_page_pool_destroy(struct page_pool *pool,
+				struct libie_rq_stats *stats);
+
+#endif /* __LIBIE_RX_H */
diff --git a/include/linux/net/intel/libie/stats.h b/include/linux/net/intel/libie/stats.h
new file mode 100644
index 00000000000000..23ca0079a90586
--- /dev/null
+++ b/include/linux/net/intel/libie/stats.h
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2023 Intel Corporation. */
+
+#ifndef __LIBIE_STATS_H
+#define __LIBIE_STATS_H
+
+#include <linux/u64_stats_sync.h>
+
+/* Common */
+
+/* Use 32-byte alignment to reduce false sharing */
+#define __libie_stats_aligned	__aligned(4 * sizeof(u64_stats_t))
+
+/**
+ * libie_stats_add - update one structure counter from a local struct
+ * @qs: queue stats structure to update (&libie_rq_stats or &libie_sq_stats)
+ * @ss: local/onstack stats structure
+ * @f: name of the field to update
+ *
+ * If a local/onstack stats structure is used to collect statistics during
+ * hotpath loops, this macro can be used to shorthand updates, given that
+ * the fields have the same name.
+ * Must be guarded with u64_stats_update_{begin,end}().
+ */
+#define libie_stats_add(qs, ss, f)			\
+	u64_stats_add(&(qs)->f, (ss)->f)
+
+/**
+ * __libie_stats_inc_one - safely increment one stats structure counter
+ * @s: queue stats structure to update (&libie_rq_stats or &libie_sq_stats)
+ * @f: name of the field to increment
+ * @n: name of the temporary variable, result of __UNIQUE_ID()
+ *
+ * To be used on exception or slow paths -- allocation fails, queue stops etc.
+ */
+#define __libie_stats_inc_one(s, f, n) ({		\
+	typeof(*(s)) *n = (s);				\
+							\
+	u64_stats_update_begin(&n->syncp);		\
+	u64_stats_inc(&n->f);				\
+	u64_stats_update_end(&n->syncp);		\
+})
+#define libie_stats_inc_one(s, f)			\
+	__libie_stats_inc_one(s, f, __UNIQUE_ID(qs_))
+
+/* Rx per-queue stats:
+ * packets: packets received on this queue
+ * bytes: bytes received on this queue
+ * fragments: number of processed descriptors carrying only a fragment
+ * alloc_page_fail: number of Rx page allocation fails
+ * build_skb_fail: number of build_skb() fails
+ * pp_alloc_fast: pages taken from the cache or ring
+ * pp_alloc_slow: actual page allocations
+ * pp_alloc_slow_ho: non-order-0 page allocations
+ * pp_alloc_empty: number of times the pool was empty
+ * pp_alloc_refill: number of cache refills
+ * pp_alloc_waive: NUMA node mismatches during recycling
+ * pp_recycle_cached: direct recyclings into the cache
+ * pp_recycle_cache_full: number of times the cache was full
+ * pp_recycle_ring: recyclings into the ring
+ * pp_recycle_ring_full: number of times the ring was full
+ * pp_recycle_released_ref: pages released due to elevated refcnt
+ */
+
+#define DECLARE_LIBIE_RQ_NAPI_STATS(act)		\
+	act(packets)					\
+	act(bytes)					\
+	act(fragments)
+
+#define DECLARE_LIBIE_RQ_FAIL_STATS(act)		\
+	act(alloc_page_fail)				\
+	act(build_skb_fail)
+
+#ifdef CONFIG_PAGE_POOL_STATS
+#define DECLARE_LIBIE_RQ_PP_STATS(act)			\
+	act(pp_alloc_fast)				\
+	act(pp_alloc_slow)				\
+	act(pp_alloc_slow_ho)				\
+	act(pp_alloc_empty)				\
+	act(pp_alloc_refill)				\
+	act(pp_alloc_waive)				\
+	act(pp_recycle_cached)				\
+	act(pp_recycle_cache_full)			\
+	act(pp_recycle_ring)				\
+	act(pp_recycle_ring_full)			\
+	act(pp_recycle_released_ref)
+#else
+#define DECLARE_LIBIE_RQ_PP_STATS(act)
+#endif
+
+#define DECLARE_LIBIE_RQ_STATS(act)			\
+	DECLARE_LIBIE_RQ_NAPI_STATS(act)		\
+	DECLARE_LIBIE_RQ_FAIL_STATS(act)		\
+	DECLARE_LIBIE_RQ_PP_STATS(act)
+
+struct page_pool;
+
+struct libie_rq_stats {
+	struct u64_stats_sync	syncp;
+
+	union {
+		struct {
+#define act(s)	u64_stats_t	s;
+			DECLARE_LIBIE_RQ_NAPI_STATS(act);
+			DECLARE_LIBIE_RQ_FAIL_STATS(act);
+			struct_group(pp,
+				DECLARE_LIBIE_RQ_PP_STATS(act);
+			);
+#undef act
+		};
+		DECLARE_FLEX_ARRAY(u64_stats_t, raw);
+	};
+} __libie_stats_aligned;
+
+/* Rx stats being modified frequently during the NAPI polling, to sync them
+ * with the queue stats once after the loop is finished.
+ */
+struct libie_rq_onstack_stats {
+	union {
+		struct {
+#define act(s)	u32		s;
+			DECLARE_LIBIE_RQ_NAPI_STATS(act);
+#undef act
+		};
+		DECLARE_FLEX_ARRAY(u32, raw);
+	};
+};
+
+/**
+ * libie_rq_napi_stats_add - add onstack Rx stats to the queue container
+ * @qs: Rx queue stats structure to update
+ * @ss: onstack structure to get the values from, updated during the NAPI loop
+ */
+static inline void
+libie_rq_napi_stats_add(struct libie_rq_stats *qs,
+			const struct libie_rq_onstack_stats *ss)
+{
+	u64_stats_update_begin(&qs->syncp);
+	libie_stats_add(qs, ss, packets);
+	libie_stats_add(qs, ss, bytes);
+	libie_stats_add(qs, ss, fragments);
+	u64_stats_update_end(&qs->syncp);
+}
+
+u32 libie_rq_stats_get_sset_count(void);
+void libie_rq_stats_get_strings(u8 **data, u32 qid);
+void libie_rq_stats_get_data(u64 **data, const struct libie_rq_stats *stats,
+			     struct page_pool *pool);
+
+/* Tx per-queue stats:
+ * packets: packets sent from this queue
+ * bytes: bytes sent from this queue
+ * busy: number of xmit failures due to the ring being full
+ * stops: number times the ring was stopped from the driver
+ * restarts: number times it was started after being stopped
+ * linearized: number of skbs linearized due to HW limits
+ */
+
+#define DECLARE_LIBIE_SQ_NAPI_STATS(act)		\
+	act(packets)					\
+	act(bytes)
+
+#define DECLARE_LIBIE_SQ_XMIT_STATS(act)		\
+	act(busy)					\
+	act(stops)					\
+	act(restarts)					\
+	act(linearized)
+
+#define DECLARE_LIBIE_SQ_STATS(act)			\
+	DECLARE_LIBIE_SQ_NAPI_STATS(act)		\
+	DECLARE_LIBIE_SQ_XMIT_STATS(act)
+
+struct libie_sq_stats {
+	struct u64_stats_sync	syncp;
+
+	union {
+		struct {
+#define act(s)	u64_stats_t	s;
+			DECLARE_LIBIE_SQ_STATS(act);
+#undef act
+		};
+		DECLARE_FLEX_ARRAY(u64_stats_t, raw);
+	};
+} __libie_stats_aligned;
+
+struct libie_sq_onstack_stats {
+#define act(s)	u32		s;
+	DECLARE_LIBIE_SQ_NAPI_STATS(act);
+#undef act
+};
+
+/**
+ * libie_sq_napi_stats_add - add onstack Tx stats to the queue container
+ * @qs: Tx queue stats structure to update
+ * @ss: onstack structure to get the values from, updated during the NAPI loop
+ */
+static inline void
+libie_sq_napi_stats_add(struct libie_sq_stats *qs,
+			const struct libie_sq_onstack_stats *ss)
+{
+	if (unlikely(!ss->packets))
+		return;
+
+	u64_stats_update_begin(&qs->syncp);
+	libie_stats_add(qs, ss, packets);
+	libie_stats_add(qs, ss, bytes);
+	u64_stats_update_end(&qs->syncp);
+}
+
+u32 libie_sq_stats_get_sset_count(void);
+void libie_sq_stats_get_strings(u8 **data, u32 qid);
+void libie_sq_stats_get_data(u64 **data, const struct libie_sq_stats *stats);
+
+#endif /* __LIBIE_STATS_H */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index fe661011644b8f..3f3a2a82a86b30 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -5069,12 +5069,12 @@ static inline u64 skb_get_kcov_handle(struct sk_buff *skb)
 #endif
 }
 
-#ifdef CONFIG_PAGE_POOL
 static inline void skb_mark_for_recycle(struct sk_buff *skb)
 {
+#ifdef CONFIG_PAGE_POOL
 	skb->pp_recycle = 1;
-}
 #endif
+}
 
 #endif	/* __KERNEL__ */
 #endif	/* _LINUX_SKBUFF_H */
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index ddfa0b32867776..fb949d168e14c0 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -32,12 +32,18 @@
 
 #include <linux/mm.h> /* Needed by ptr_ring */
 #include <linux/ptr_ring.h>
-#include <linux/dma-direction.h>
+#include <linux/dma-mapping.h>
 
-#define PP_FLAG_DMA_MAP		BIT(0) /* Should page_pool do the DMA
+#define PP_FLAG_DMA_MAP		BIT(5) /* Should page_pool do the DMA
 					* map/unmap
 					*/
-#define PP_FLAG_DMA_SYNC_DEV	BIT(1) /* If set all pages that the driver gets
+#define PP_FLAG_DMA_MAP_WEAK	BIT(1) /* Map with %DMA_ATTR_WEAK_ORDERING */
+/* These flags correspond to the DMA map attributes to pass them directly to
+ * dma_map_page_attrs(), see page_pool_dma_map().
+ */
+#define PP_FLAG_DMA_ATTR	(PP_FLAG_DMA_MAP | \
+				 PP_FLAG_DMA_MAP_WEAK)
+#define PP_FLAG_DMA_SYNC_DEV	BIT(0) /* If set all pages that the driver gets
 					* from page_pool will be
 					* DMA-synced-for-device according to
 					* the length provided by the device
@@ -46,7 +52,7 @@
 					* device driver responsibility
 					*/
 #define PP_FLAG_PAGE_FRAG	BIT(2) /* for page frag feature */
-#define PP_FLAG_ALL		(PP_FLAG_DMA_MAP |\
+#define PP_FLAG_ALL		(PP_FLAG_DMA_ATTR |\
 				 PP_FLAG_DMA_SYNC_DEV |\
 				 PP_FLAG_PAGE_FRAG)
 
@@ -233,8 +239,8 @@ static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
 /* get the stored dma direction. A driver might decide to treat this locally and
  * avoid the extra cache line from page_pool to determine the direction
  */
-static
-inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool)
+static inline enum dma_data_direction
+page_pool_get_dma_dir(const struct page_pool *pool)
 {
 	return pool->p.dma_dir;
 }
@@ -354,7 +360,7 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
 #define PAGE_POOL_DMA_USE_PP_FRAG_COUNT	\
 		(sizeof(dma_addr_t) > sizeof(unsigned long))
 
-static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
+static inline dma_addr_t page_pool_get_dma_addr(const struct page *page)
 {
 	dma_addr_t ret = page->dma_addr;
 
@@ -371,6 +377,37 @@ static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
 		page->dma_addr_upper = upper_32_bits(addr);
 }
 
+/**
+ * page_pool_dma_sync_for_cpu - sync Rx page for CPU after it's written by HW
+ * @pool: page_pool which this page belongs to
+ * @page: page to sync
+ * @dma_sync_size: size of the data written to the page
+ *
+ * Can be used as a shorthand to sync Rx pages before accessing them in the
+ * driver. The caller must ensure the pool was created with %PP_FLAG_DMA_MAP.
+ */
+static inline void page_pool_dma_sync_for_cpu(const struct page_pool *pool,
+					      const struct page *page,
+					      u32 dma_sync_size)
+{
+	dma_sync_single_range_for_cpu(pool->p.dev,
+				      page_pool_get_dma_addr(page),
+				      pool->p.offset, dma_sync_size,
+				      page_pool_get_dma_dir(pool));
+}
+
+/**
+ * page_pool_dma_sync_for_cpu - sync full Rx page for CPU
+ * @pool: page_pool which this page belongs to
+ * @page: page to sync
+ */
+static inline void
+page_pool_dma_sync_full_for_cpu(const struct page_pool *pool,
+				const struct page *page)
+{
+	page_pool_dma_sync_for_cpu(pool, page, pool->p.max_len);
+}
+
 static inline bool is_page_pool_compiled_in(void)
 {
 #ifdef CONFIG_PAGE_POOL
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 41c57b8b167147..383b25b426a482 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -317,35 +317,6 @@ void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq);
 void xdp_return_frame_bulk(struct xdp_frame *xdpf,
 			   struct xdp_frame_bulk *bq);
 
-/* When sending xdp_frame into the network stack, then there is no
- * return point callback, which is needed to release e.g. DMA-mapping
- * resources with page_pool.  Thus, have explicit function to release
- * frame resources.
- */
-void __xdp_release_frame(void *data, struct xdp_mem_info *mem);
-static inline void xdp_release_frame(struct xdp_frame *xdpf)
-{
-	struct xdp_mem_info *mem = &xdpf->mem;
-	struct skb_shared_info *sinfo;
-	int i;
-
-	/* Curr only page_pool needs this */
-	if (mem->type != MEM_TYPE_PAGE_POOL)
-		return;
-
-	if (likely(!xdp_frame_has_frags(xdpf)))
-		goto out;
-
-	sinfo = xdp_get_shared_info_from_frame(xdpf);
-	for (i = 0; i < sinfo->nr_frags; i++) {
-		struct page *page = skb_frag_page(&sinfo->frags[i]);
-
-		__xdp_release_frame(page_address(page), mem);
-	}
-out:
-	__xdp_release_frame(xdpf->data, mem);
-}
-
 static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf)
 {
 	struct skb_shared_info *sinfo;
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 193c1879986503..74e25b55e2f062 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -161,6 +161,13 @@ static int page_pool_init(struct page_pool *pool,
 			return -EINVAL;
 	}
 
+	/* Passing DMA mapping attributes without asking PP to map pages
+	 * makes no sense.
+	 */
+	if ((pool->p.flags & PP_FLAG_DMA_ATTR) &&
+	    !(pool->p.flags & PP_FLAG_DMA_MAP))
+		return -EINVAL;
+
 	if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) {
 		/* In order to request DMA-sync-for-device the page
 		 * needs to be mapped
@@ -308,6 +315,14 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
 {
 	dma_addr_t dma;
 
+	/* Pages are always mapped with %DMA_ATTR_SKIP_CPU_SYNC, so its value
+	 * corresponds to %PP_FLAG_DMA_MAP, which is always set when reaching
+	 * this function.
+	 */
+	static_assert(PP_FLAG_DMA_MAP == DMA_ATTR_SKIP_CPU_SYNC);
+	/* Drivers may set this for PP to map with weak ordering */
+	static_assert(PP_FLAG_DMA_MAP_WEAK == DMA_ATTR_WEAK_ORDERING);
+
 	/* Setup DMA mapping: use 'struct page' area for storing DMA-addr
 	 * since dma_addr_t can be either 32 or 64 bits and does not always fit
 	 * into page private data (i.e 32bit cpu with 64bit DMA caps)
@@ -315,7 +330,8 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
 	 */
 	dma = dma_map_page_attrs(pool->p.dev, page, 0,
 				 (PAGE_SIZE << pool->p.order),
-				 pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
+				 pool->p.dma_dir,
+				 pool->p.flags & PP_FLAG_DMA_ATTR);
 	if (dma_mapping_error(pool->p.dev, dma))
 		return false;
 
@@ -483,7 +499,7 @@ void page_pool_release_page(struct page_pool *pool, struct page *page)
 	/* When page is unmapped, it cannot be returned to our pool */
 	dma_unmap_page_attrs(pool->p.dev, dma,
 			     PAGE_SIZE << pool->p.order, pool->p.dma_dir,
-			     DMA_ATTR_SKIP_CPU_SYNC);
+			     pool->p.flags & PP_FLAG_DMA_ATTR);
 	page_pool_set_dma_addr(page, 0);
 skip_dma_unmap:
 	page_pool_clear_pp_info(page);
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 528d4b37983df8..018e0fe4e71405 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -531,21 +531,6 @@ void xdp_return_buff(struct xdp_buff *xdp)
 }
 EXPORT_SYMBOL_GPL(xdp_return_buff);
 
-/* Only called for MEM_TYPE_PAGE_POOL see xdp.h */
-void __xdp_release_frame(void *data, struct xdp_mem_info *mem)
-{
-	struct xdp_mem_allocator *xa;
-	struct page *page;
-
-	rcu_read_lock();
-	xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
-	page = virt_to_head_page(data);
-	if (xa)
-		page_pool_release_page(xa->page_pool, page);
-	rcu_read_unlock();
-}
-EXPORT_SYMBOL_GPL(__xdp_release_frame);
-
 void xdp_attachment_setup(struct xdp_attachment_info *info,
 			  struct netdev_bpf *bpf)
 {
@@ -658,8 +643,8 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
 	 * - RX ring dev queue index	(skb_record_rx_queue)
 	 */
 
-	/* Until page_pool get SKB return path, release DMA here */
-	xdp_release_frame(xdpf);
+	if (xdpf->mem.type == MEM_TYPE_PAGE_POOL)
+		skb_mark_for_recycle(skb);
 
 	/* Allow SKB to reuse area used by xdp_frame */
 	xdp_scrub_frame(xdpf);
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
index 77a123071940ae..cd2d4e3258b899 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
@@ -4,6 +4,19 @@
 
 #define ETH_ALEN 6
 #define HDR_SZ (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct udphdr))
+
+/**
+ * enum frame_mark - magics to distinguish page/packet paths
+ * @MARK_XMIT: page was recycled due to the frame being "xmitted" by the NIC.
+ * @MARK_IN: frame is being processed by the input XDP prog.
+ * @MARK_SKB: frame did hit the TC ingress hook as an skb.
+ */
+enum frame_mark {
+	MARK_XMIT	= 0U,
+	MARK_IN		= 0x42,
+	MARK_SKB	= 0x45,
+};
+
 const volatile int ifindex_out;
 const volatile int ifindex_in;
 const volatile __u8 expect_dst[ETH_ALEN];
@@ -34,10 +47,10 @@ int xdp_redirect(struct xdp_md *xdp)
 	if (*metadata != 0x42)
 		return XDP_ABORTED;
 
-	if (*payload == 0) {
-		*payload = 0x42;
+	if (*payload == MARK_XMIT)
 		pkts_seen_zero++;
-	}
+
+	*payload = MARK_IN;
 
 	if (bpf_xdp_adjust_meta(xdp, 4))
 		return XDP_ABORTED;
@@ -51,7 +64,7 @@ int xdp_redirect(struct xdp_md *xdp)
 	return ret;
 }
 
-static bool check_pkt(void *data, void *data_end)
+static bool check_pkt(void *data, void *data_end, const __u32 mark)
 {
 	struct ipv6hdr *iph = data + sizeof(struct ethhdr);
 	__u8 *payload = data + HDR_SZ;
@@ -59,13 +72,13 @@ static bool check_pkt(void *data, void *data_end)
 	if (payload + 1 > data_end)
 		return false;
 
-	if (iph->nexthdr != IPPROTO_UDP || *payload != 0x42)
+	if (iph->nexthdr != IPPROTO_UDP || *payload != MARK_IN)
 		return false;
 
 	/* reset the payload so the same packet doesn't get counted twice when
 	 * it cycles back through the kernel path and out the dst veth
 	 */
-	*payload = 0;
+	*payload = mark;
 	return true;
 }
 
@@ -75,11 +88,11 @@ int xdp_count_pkts(struct xdp_md *xdp)
 	void *data = (void *)(long)xdp->data;
 	void *data_end = (void *)(long)xdp->data_end;
 
-	if (check_pkt(data, data_end))
+	if (check_pkt(data, data_end, MARK_XMIT))
 		pkts_seen_xdp++;
 
-	/* Return XDP_DROP to make sure the data page is recycled, like when it
-	 * exits a physical NIC. Recycled pages will be counted in the
+	/* Return %XDP_DROP to recycle the data page with %MARK_XMIT, like
+	 * it exited a physical NIC. Those pages will be counted in the
 	 * pkts_seen_zero counter above.
 	 */
 	return XDP_DROP;
@@ -91,9 +104,12 @@ int tc_count_pkts(struct __sk_buff *skb)
 	void *data = (void *)(long)skb->data;
 	void *data_end = (void *)(long)skb->data_end;
 
-	if (check_pkt(data, data_end))
+	if (check_pkt(data, data_end, MARK_SKB))
 		pkts_seen_tc++;
 
+	/* Will be either recycled or freed, %MARK_SKB makes sure it won't
+	 * hit any of the counters above.
+	 */
 	return 0;
 }