From 5b67c13e40df408ee40c6d79c1d071cb0db61ff1 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Wed, 21 Jul 2021 18:30:42 -0400 Subject: [PATCH 01/24] uapi: add metadata flag to xdp flags Second approach to inform driver about metadata. Let user decide if metadata should be supported or not. Add this flag to allow user to inform driver that metadata is used. Set flag is sent to driver via exsisting ndo_bpf call in flag field. Signed-off-by: Michal Swiatkowski --- include/uapi/linux/if_link.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 4882e81514b664..9c69a5df4ece90 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1179,11 +1179,13 @@ enum { #define XDP_FLAGS_DRV_MODE (1U << 2) #define XDP_FLAGS_HW_MODE (1U << 3) #define XDP_FLAGS_REPLACE (1U << 4) +#define XDP_FLAGS_USE_METADATA (1U << 5) #define XDP_FLAGS_MODES (XDP_FLAGS_SKB_MODE | \ XDP_FLAGS_DRV_MODE | \ XDP_FLAGS_HW_MODE) #define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST | \ - XDP_FLAGS_MODES | XDP_FLAGS_REPLACE) + XDP_FLAGS_MODES | XDP_FLAGS_REPLACE | \ + XDP_FLAGS_USE_METADATA) /* These are stored into IFLA_XDP_ATTACHED on dump. */ enum { From 2590b715c8364ef00926938065c448c01150edf7 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Mon, 9 Aug 2021 20:25:18 -0400 Subject: [PATCH 02/24] net: include xdp generic metadata definition Definition is only a proposal. There should be free place for 8B of tx timestamp. Signed-off-by: Michal Swiatkowski --- include/net/xdp.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/include/net/xdp.h b/include/net/xdp.h index 5533f0ab2afc07..80427bdddb882c 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -76,6 +76,24 @@ struct xdp_buff { u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/ }; +struct xdp_meta_generic { + // Tx part + u32 flags; + u16 free_slot; + u16 csum_off; + u16 txcvid; + + // Rx part + u16 rxcvid; + u32 csum; + u32 hash; + u64 tstamp; + + // BTF ID + u32 btf_id; +} __packed __aligned(8); +static_assert(sizeof(struct xdp_meta_generic) == 32); + static __always_inline void xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq) { From c354ec1381654a923f336bd44901149585d3d42a Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Mon, 9 Aug 2021 23:21:15 -0400 Subject: [PATCH 03/24] ice: use xdp generic metadata As starting point add vlan id and rss hash if xdp metadata is supported. Add xd_metadata_support field in VSI to allow easy passing this value to ring configuration. Signed-off-by: Michal Swiatkowski --- drivers/net/ethernet/intel/ice/ice.h | 2 ++ drivers/net/ethernet/intel/ice/ice_main.c | 8 ++++++-- drivers/net/ethernet/intel/ice/ice_txrx.c | 3 +++ drivers/net/ethernet/intel/ice/ice_txrx.h | 3 +++ drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 10 ++++++++++ 5 files changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index a450343fbb92d0..081bceb9a4907f 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -348,6 +348,8 @@ struct ice_vsi { u16 num_xdp_txq; /* Used XDP queues */ u8 xdp_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */ + bool xdp_metadata_support; /* true if VSI should support xdp meta */ + /* setup back reference, to which aggregator node this VSI * corresponds to */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index ef8d1815af5618..89d96c22a53ec4 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2377,6 +2377,7 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) xdp_ring->netdev = NULL; xdp_ring->dev = dev; xdp_ring->count = vsi->num_tx_desc; + xdp_ring->xdp_metadata_support = vsi->xdp_metadata_support; WRITE_ONCE(vsi->xdp_rings[i], xdp_ring); if (ice_setup_tx_ring(xdp_ring)) goto free_xdp_rings; @@ -2605,7 +2606,7 @@ static void ice_vsi_rx_napi_schedule(struct ice_vsi *vsi) */ static int ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, u32 flags) { int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD; bool if_running = netif_running(vsi->netdev); @@ -2625,6 +2626,9 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, } } + if (flags & XDP_FLAGS_USE_METADATA) + vsi->xdp_metadata_support = true; + if (!ice_is_xdp_ena_vsi(vsi) && prog) { vsi->num_xdp_txq = vsi->alloc_rxq; xdp_ring_err = ice_prepare_xdp_rings(vsi, prog); @@ -2678,7 +2682,7 @@ static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp) switch (xdp->command) { case XDP_SETUP_PROG: - return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack); + return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack, xdp->flags); case XDP_SETUP_XSK_POOL: return ice_xsk_pool_setup(vsi, xdp->xsk.pool, xdp->xsk.queue_id); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 6ee8e0032d52cb..128fd49383439a 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1135,6 +1135,9 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) hard_start = page_address(rx_buf->page) + rx_buf->page_offset - offset; xdp_prepare_buff(&xdp, hard_start, offset, size, true); + + if (likely(rx_ring->xdp_metadata_support)) + ice_xdp_set_meta(&xdp, rx_desc); #if (PAGE_SIZE > 4096) /* At larger PAGE_SIZE, frame_sz depend on len size */ xdp.frame_sz = ice_rx_frame_truesize(rx_ring, size); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 1e46e80f3d6f89..b43923ddf883cf 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -276,6 +276,7 @@ struct ice_ring { u16 q_handle; /* Queue handle per TC */ u8 ring_active:1; /* is ring online or not */ + u8 xdp_metadata_support:1; /* is xdp metadata support */ u16 count; /* Number of descriptors */ u16 reg_idx; /* HW register index of the ring */ @@ -301,6 +302,8 @@ struct ice_ring { /* CL3 - 3rd cacheline starts here */ struct xdp_rxq_info xdp_rxq; struct sk_buff *skb; + + /* CLX - the below items are only accessed infrequently and should be * in their own cache line if possible */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h index 05ac3075290260..d77bc7686b61bd 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h @@ -46,6 +46,16 @@ static inline void ice_xdp_ring_update_tail(struct ice_ring *xdp_ring) writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail); } +static inline void ice_xdp_set_meta(struct xdp_buff *xdp, union ice_32b_rx_flex_desc *desc) +{ + struct ice_32b_rx_flex_desc_nic *flex = (struct ice_32b_rx_flex_desc_nic *)desc; + struct xdp_meta_generic *md = xdp->data - sizeof(struct xdp_meta_generic); + + xdp->data_meta = md; + md->rxcvid = le16_to_cpu(flex->flex_ts.flex.vlan_id); + md->hash = le32_to_cpu(flex->rss_hash); +} + void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res); int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring); int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring); From d7691ffdf4a430a9a9b2b49eecec5c984a3bc006 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 29 Jul 2021 17:20:21 +0100 Subject: [PATCH 04/24] libbpf: Return non-null error on failures in libbpf_find_prog_btf_id() Variable "err" is initialised to -EINVAL so that this error code is returned when something goes wrong in libbpf_find_prog_btf_id(). However, a recent change in the function made use of the variable in such a way that it is set to 0 if retrieving linear information on the program is successful, and this 0 value remains if we error out on failures at later stages. Let's fix this by setting err to -EINVAL later in the function. Fixes: e9fc3ce99b34 ("libbpf: Streamline error reporting for high-level APIs") Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210729162028.29512-2-quentin@isovalent.com --- tools/lib/bpf/libbpf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 6f5e2757bb3cfa..5e7d9b7c5ea704 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -9515,7 +9515,7 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) struct bpf_prog_info_linear *info_linear; struct bpf_prog_info *info; struct btf *btf = NULL; - int err = -EINVAL; + int err; info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0); err = libbpf_get_error(info_linear); @@ -9524,6 +9524,8 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) attach_prog_fd); return err; } + + err = -EINVAL; info = &info_linear->info; if (!info->btf_id) { pr_warn("The target program doesn't have BTF\n"); From caca8ad9ba4549c43caf9bdf054804dc683fca16 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 29 Jul 2021 17:20:22 +0100 Subject: [PATCH 05/24] libbpf: Rename btf__load() as btf__load_into_kernel() As part of the effort to move towards a v1.0 for libbpf, rename btf__load() function, used to "upload" BTF information into the kernel, as btf__load_into_kernel(). This new name better reflects what the function does. References: - https://github.com/libbpf/libbpf/issues/278 - https://github.com/libbpf/libbpf/wiki/Libbpf:-the-road-to-v1.0#btfh-apis Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20210729162028.29512-3-quentin@isovalent.com --- tools/lib/bpf/btf.c | 3 ++- tools/lib/bpf/btf.h | 1 + tools/lib/bpf/libbpf.c | 2 +- tools/lib/bpf/libbpf.map | 1 + 4 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index b46760b93bb40d..7e0de560490ec9 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1180,7 +1180,7 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf) static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian); -int btf__load(struct btf *btf) +int btf__load_into_kernel(struct btf *btf) { __u32 log_buf_size = 0, raw_size; char *log_buf = NULL; @@ -1228,6 +1228,7 @@ int btf__load(struct btf *btf) free(log_buf); return libbpf_err(err); } +int btf__load(struct btf *) __attribute__((alias("btf__load_into_kernel"))); int btf__fd(const struct btf *btf) { diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index b54f1c3ebd57e6..b36f1b2805dc5c 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -46,6 +46,7 @@ LIBBPF_API struct btf *btf__parse_raw_split(const char *path, struct btf *base_b LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf); LIBBPF_API int btf__load(struct btf *btf); +LIBBPF_API int btf__load_into_kernel(struct btf *btf); LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, const char *type_name); LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 5e7d9b7c5ea704..589cf5fd79e263 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2768,7 +2768,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) */ btf__set_fd(kern_btf, 0); } else { - err = btf__load(kern_btf); + err = btf__load_into_kernel(kern_btf); } if (sanitize) { if (!err) { diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 944c99d1ded398..81baa32ba2ab89 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -373,5 +373,6 @@ LIBBPF_0.5.0 { bpf_map__initial_value; bpf_map_lookup_and_delete_elem_flags; bpf_object__gen_loader; + btf__load_into_kernel; libbpf_set_strict_mode; } LIBBPF_0.4.0; From 10d0b3b54df1e9b1e10e8123ed7a21feea0f6776 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 29 Jul 2021 17:20:23 +0100 Subject: [PATCH 06/24] libbpf: Rename btf__get_from_id() as btf__load_from_kernel_by_id() Rename function btf__get_from_id() as btf__load_from_kernel_by_id() to better indicate what the function does. Change the new function so that, instead of requiring a pointer to the pointer to update and returning with an error code, it takes a single argument (the id of the BTF object) and returns the corresponding pointer. This is more in line with the existing constructors. The other tools calling the (soon-to-be) deprecated btf__get_from_id() function will be updated in a future commit. References: - https://github.com/libbpf/libbpf/issues/278 - https://github.com/libbpf/libbpf/wiki/Libbpf:-the-road-to-v1.0#btfh-apis Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20210729162028.29512-4-quentin@isovalent.com --- tools/lib/bpf/btf.c | 25 +++++++++++++++++-------- tools/lib/bpf/btf.h | 4 +++- tools/lib/bpf/libbpf.c | 5 +++-- tools/lib/bpf/libbpf.map | 1 + 4 files changed, 24 insertions(+), 11 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 7e0de560490ec9..948c29fee4472f 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1383,21 +1383,30 @@ struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf) return btf; } -int btf__get_from_id(__u32 id, struct btf **btf) +struct btf *btf__load_from_kernel_by_id(__u32 id) { - struct btf *res; - int err, btf_fd; + struct btf *btf; + int btf_fd; - *btf = NULL; btf_fd = bpf_btf_get_fd_by_id(id); if (btf_fd < 0) - return libbpf_err(-errno); - - res = btf_get_from_fd(btf_fd, NULL); - err = libbpf_get_error(res); + return libbpf_err_ptr(-errno); + btf = btf_get_from_fd(btf_fd, NULL); close(btf_fd); + return libbpf_ptr(btf); +} + +int btf__get_from_id(__u32 id, struct btf **btf) +{ + struct btf *res; + int err; + + *btf = NULL; + res = btf__load_from_kernel_by_id(id); + err = libbpf_get_error(res); + if (err) return libbpf_err(err); diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index b36f1b2805dc5c..b93b9726dc3d46 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -44,6 +44,9 @@ LIBBPF_API struct btf *btf__parse_elf_split(const char *path, struct btf *base_b LIBBPF_API struct btf *btf__parse_raw(const char *path); LIBBPF_API struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf); +LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id); +LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); + LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf); LIBBPF_API int btf__load(struct btf *btf); LIBBPF_API int btf__load_into_kernel(struct btf *btf); @@ -67,7 +70,6 @@ LIBBPF_API void btf__set_fd(struct btf *btf, int fd); LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset); -LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, __u32 expected_key_size, __u32 expected_value_size, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 589cf5fd79e263..f28ffd84775418 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -9514,7 +9514,7 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) { struct bpf_prog_info_linear *info_linear; struct bpf_prog_info *info; - struct btf *btf = NULL; + struct btf *btf; int err; info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0); @@ -9531,7 +9531,8 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) pr_warn("The target program doesn't have BTF\n"); goto out; } - if (btf__get_from_id(info->btf_id, &btf)) { + btf = btf__load_from_kernel_by_id(info->btf_id); + if (libbpf_get_error(btf)) { pr_warn("Failed to get BTF of the program\n"); goto out; } diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 81baa32ba2ab89..8d8edf317c927f 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -373,6 +373,7 @@ LIBBPF_0.5.0 { bpf_map__initial_value; bpf_map_lookup_and_delete_elem_flags; bpf_object__gen_loader; + btf__load_from_kernel_by_id; btf__load_into_kernel; libbpf_set_strict_mode; } LIBBPF_0.4.0; From 8f2e323a0625dc53bc5a287b696cb9fae0347ee6 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 29 Jul 2021 17:20:24 +0100 Subject: [PATCH 07/24] tools: Free BTF objects at various locations Make sure to call btf__free() (and not simply free(), which does not free all pointers stored in the struct) on pointers to struct btf objects retrieved at various locations. These were found while updating the calls to btf__get_from_id(). Fixes: 999d82cbc044 ("tools/bpf: enhance test_btf file testing to test func info") Fixes: 254471e57a86 ("tools/bpf: bpftool: add support for func types") Fixes: 7b612e291a5a ("perf tools: Synthesize PERF_RECORD_* for loaded BPF programs") Fixes: d56354dc4909 ("perf tools: Save bpf_prog_info and BTF of new BPF programs") Fixes: 47c09d6a9f67 ("bpftool: Introduce "prog profile" command") Fixes: fa853c4b839e ("perf stat: Enable counting events for BPF programs") Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210729162028.29512-5-quentin@isovalent.com --- tools/bpf/bpftool/prog.c | 5 ++++- tools/perf/util/bpf-event.c | 4 ++-- tools/perf/util/bpf_counter.c | 3 ++- tools/testing/selftests/bpf/prog_tests/btf.c | 1 + 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index cc48726740ade0..9d709b42766558 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -781,6 +781,8 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, kernel_syms_destroy(&dd); } + btf__free(btf); + return 0; } @@ -2002,8 +2004,8 @@ static char *profile_target_name(int tgt_fd) struct bpf_prog_info_linear *info_linear; struct bpf_func_info *func_info; const struct btf_type *t; + struct btf *btf = NULL; char *name = NULL; - struct btf *btf; info_linear = bpf_program__get_prog_info_linear( tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO); @@ -2027,6 +2029,7 @@ static char *profile_target_name(int tgt_fd) } name = strdup(btf__name_by_offset(btf, t->name_off)); out: + btf__free(btf); free(info_linear); return name; } diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index cdecda1ddd36e2..17a9844e4fbf80 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -296,7 +296,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, out: free(info_linear); - free(btf); + btf__free(btf); return err ? -1 : 0; } @@ -486,7 +486,7 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id) perf_env__fetch_btf(env, btf_id, btf); out: - free(btf); + btf__free(btf); close(fd); } diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 8150e03367bbaf..beca55129b0b27 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -64,8 +64,8 @@ static char *bpf_target_prog_name(int tgt_fd) struct bpf_prog_info_linear *info_linear; struct bpf_func_info *func_info; const struct btf_type *t; + struct btf *btf = NULL; char *name = NULL; - struct btf *btf; info_linear = bpf_program__get_prog_info_linear( tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO); @@ -89,6 +89,7 @@ static char *bpf_target_prog_name(int tgt_fd) } name = strdup(btf__name_by_offset(btf, t->name_off)); out: + btf__free(btf); free(info_linear); return name; } diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 857e3f26086fec..68e415f4d33cd2 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -4386,6 +4386,7 @@ static void do_test_file(unsigned int test_num) fprintf(stderr, "OK"); done: + btf__free(btf); free(func_info); bpf_object__close(obj); } From c0915ba14fa8afbce28ff64f4ba3848ef15d53bf Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 29 Jul 2021 17:20:25 +0100 Subject: [PATCH 08/24] tools: Replace btf__get_from_id() with btf__load_from_kernel_by_id() Replace the calls to function btf__get_from_id(), which we plan to deprecate before the library reaches v1.0, with calls to btf__load_from_kernel_by_id() in tools/ (bpftool, perf, selftests). Update the surrounding code accordingly (instead of passing a pointer to the btf struct, get it as a return value from the function). Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20210729162028.29512-6-quentin@isovalent.com --- tools/bpf/bpftool/btf.c | 8 ++----- tools/bpf/bpftool/btf_dumper.c | 6 +++-- tools/bpf/bpftool/map.c | 14 ++++++------ tools/bpf/bpftool/prog.c | 24 +++++++++++++------- tools/perf/util/bpf-event.c | 7 +++--- tools/perf/util/bpf_counter.c | 9 ++++++-- tools/testing/selftests/bpf/prog_tests/btf.c | 3 ++- 7 files changed, 42 insertions(+), 29 deletions(-) diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 385d5c955cf3de..9162a18e84c073 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -580,16 +580,12 @@ static int do_dump(int argc, char **argv) } if (!btf) { - err = btf__get_from_id(btf_id, &btf); + btf = btf__load_from_kernel_by_id(btf_id); + err = libbpf_get_error(btf); if (err) { p_err("get btf by id (%u): %s", btf_id, strerror(err)); goto done; } - if (!btf) { - err = -ENOENT; - p_err("can't find btf with ID (%u)", btf_id); - goto done; - } } if (dump_c) { diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index 7ca54d046362c0..9c25286a5c7379 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -64,8 +64,10 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d, } info = &prog_info->info; - if (!info->btf_id || !info->nr_func_info || - btf__get_from_id(info->btf_id, &prog_btf)) + if (!info->btf_id || !info->nr_func_info) + goto print; + prog_btf = btf__load_from_kernel_by_id(info->btf_id); + if (libbpf_get_error(prog_btf)) goto print; finfo = u64_to_ptr(info->func_info); func_type = btf__type_by_id(prog_btf, finfo->type_id); diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 09ae0381205b69..7e7f748bb0be19 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -807,10 +807,11 @@ static struct btf *get_map_kv_btf(const struct bpf_map_info *info) } else if (info->btf_value_type_id) { int err; - err = btf__get_from_id(info->btf_id, &btf); - if (err || !btf) { + btf = btf__load_from_kernel_by_id(info->btf_id); + err = libbpf_get_error(btf); + if (err) { p_err("failed to get btf"); - btf = err ? ERR_PTR(err) : ERR_PTR(-ESRCH); + btf = ERR_PTR(err); } } @@ -1039,11 +1040,10 @@ static void print_key_value(struct bpf_map_info *info, void *key, void *value) { json_writer_t *btf_wtr; - struct btf *btf = NULL; - int err; + struct btf *btf; - err = btf__get_from_id(info->btf_id, &btf); - if (err) { + btf = btf__load_from_kernel_by_id(info->btf_id); + if (libbpf_get_error(btf)) { p_err("failed to get btf"); return; } diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 9d709b42766558..b1996b8f1d4223 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -249,10 +249,10 @@ static void show_prog_metadata(int fd, __u32 num_maps) struct bpf_map_info map_info; struct btf_var_secinfo *vsi; bool printed_header = false; - struct btf *btf = NULL; unsigned int i, vlen; void *value = NULL; const char *name; + struct btf *btf; int err; if (!num_maps) @@ -263,8 +263,8 @@ static void show_prog_metadata(int fd, __u32 num_maps) if (!value) return; - err = btf__get_from_id(map_info.btf_id, &btf); - if (err || !btf) + btf = btf__load_from_kernel_by_id(map_info.btf_id); + if (libbpf_get_error(btf)) goto out_free; t_datasec = btf__type_by_id(btf, map_info.btf_value_type_id); @@ -646,9 +646,12 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, member_len = info->xlated_prog_len; } - if (info->btf_id && btf__get_from_id(info->btf_id, &btf)) { - p_err("failed to get btf"); - return -1; + if (info->btf_id) { + btf = btf__load_from_kernel_by_id(info->btf_id); + if (libbpf_get_error(btf)) { + p_err("failed to get btf"); + return -1; + } } func_info = u64_to_ptr(info->func_info); @@ -2014,12 +2017,17 @@ static char *profile_target_name(int tgt_fd) return NULL; } - if (info_linear->info.btf_id == 0 || - btf__get_from_id(info_linear->info.btf_id, &btf)) { + if (info_linear->info.btf_id == 0) { p_err("prog FD %d doesn't have valid btf", tgt_fd); goto out; } + btf = btf__load_from_kernel_by_id(info_linear->info.btf_id); + if (libbpf_get_error(btf)) { + p_err("failed to load btf for prog FD %d", tgt_fd); + goto out; + } + func_info = u64_to_ptr(info_linear->info.func_info); t = btf__type_by_id(btf, func_info[0].type_id); if (!t) { diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 17a9844e4fbf80..996d025b8ed83e 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -223,10 +223,10 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, free(info_linear); return -1; } - if (btf__get_from_id(info->btf_id, &btf)) { + btf = btf__load_from_kernel_by_id(info->btf_id); + if (libbpf_get_error(btf)) { pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info->btf_id); err = -1; - btf = NULL; goto out; } perf_env__fetch_btf(env, info->btf_id, btf); @@ -478,7 +478,8 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id) if (btf_id == 0) goto out; - if (btf__get_from_id(btf_id, &btf)) { + btf = btf__load_from_kernel_by_id(btf_id); + if (libbpf_get_error(btf)) { pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, btf_id); goto out; diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index beca55129b0b27..ba0f208536511f 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -74,12 +74,17 @@ static char *bpf_target_prog_name(int tgt_fd) return NULL; } - if (info_linear->info.btf_id == 0 || - btf__get_from_id(info_linear->info.btf_id, &btf)) { + if (info_linear->info.btf_id == 0) { pr_debug("prog FD %d doesn't have valid btf\n", tgt_fd); goto out; } + btf = btf__load_from_kernel_by_id(info_linear->info.btf_id); + if (libbpf_get_error(btf)) { + pr_debug("failed to load btf for prog FD %d\n", tgt_fd); + goto out; + } + func_info = u64_to_ptr(info_linear->info.func_info); t = btf__type_by_id(btf, func_info[0].type_id); if (!t) { diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 68e415f4d33cd2..649f87382c8d87 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -4350,7 +4350,8 @@ static void do_test_file(unsigned int test_num) goto done; } - err = btf__get_from_id(info.btf_id, &btf); + btf = btf__load_from_kernel_by_id(info.btf_id); + err = libbpf_get_error(btf); if (CHECK(err, "cannot get btf from kernel, err: %d", err)) goto done; From 54e9c44091bc6b9c209f7eea05c2108c6931a9b0 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 29 Jul 2021 17:20:27 +0100 Subject: [PATCH 09/24] libbpf: Add split BTF support for btf__load_from_kernel_by_id() Add a new API function btf__load_from_kernel_by_id_split(), which takes a pointer to a base BTF object in order to support split BTF objects when retrieving BTF information from the kernel. Reference: https://github.com/libbpf/libbpf/issues/314 Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20210729162028.29512-8-quentin@isovalent.com --- tools/lib/bpf/btf.c | 9 +++++++-- tools/lib/bpf/btf.h | 1 + tools/lib/bpf/libbpf.map | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 948c29fee4472f..cafa4f6bd9b127 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1383,7 +1383,7 @@ struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf) return btf; } -struct btf *btf__load_from_kernel_by_id(__u32 id) +struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf) { struct btf *btf; int btf_fd; @@ -1392,12 +1392,17 @@ struct btf *btf__load_from_kernel_by_id(__u32 id) if (btf_fd < 0) return libbpf_err_ptr(-errno); - btf = btf_get_from_fd(btf_fd, NULL); + btf = btf_get_from_fd(btf_fd, base_btf); close(btf_fd); return libbpf_ptr(btf); } +struct btf *btf__load_from_kernel_by_id(__u32 id) +{ + return btf__load_from_kernel_by_id_split(id, NULL); +} + int btf__get_from_id(__u32 id, struct btf **btf) { struct btf *res; diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index b93b9726dc3d46..029c3209e541ba 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -45,6 +45,7 @@ LIBBPF_API struct btf *btf__parse_raw(const char *path); LIBBPF_API struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf); LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id); +LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf); LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 8d8edf317c927f..be4157c58bdb3e 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -374,6 +374,7 @@ LIBBPF_0.5.0 { bpf_map_lookup_and_delete_elem_flags; bpf_object__gen_loader; btf__load_from_kernel_by_id; + btf__load_from_kernel_by_id_split; btf__load_into_kernel; libbpf_set_strict_mode; } LIBBPF_0.4.0; From 59406b0c001ca183271a24290b657d0f14742152 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Wed, 29 Jul 2020 09:57:08 -0700 Subject: [PATCH 10/24] igc: Fix race condition in PTP Tx code Currently, the igc driver supports timestamping only one Tx packet at a time. During the transmission flow, the skb that requires hardware timestamping is saved in adapter->ptp_tx_skb. Once hardware has the timestamp, an interrupt is delivered, and adapter->ptp_tx_work is scheduled. In igc_ptp_tx_work(), we read the timestamp register, update adapter->ptp_tx_skb, and notify the network stack. While the thread executing the transmission flow (the user process running in kernel mode) and the thread executing ptp_tx_work don't access adapter->ptp_tx_skb concurrently, there are two other places where adapter->ptp_tx_skb is accessed: igc_ptp_tx_hang() and igc_ptp_suspend(). igc_ptp_tx_hang() is executed by the adapter->watchdog_task worker thread which runs periodically so it is possible we have two threads accessing ptp_tx_skb at the same time. Consider the following scenario: right after __IGC_PTP_TX_IN_PROGRESS is set in igc_xmit_frame_ring(), igc_ptp_tx_hang() is executed. Since adapter->ptp_tx_start hasn't been written yet, this is considered a timeout and adapter->ptp_tx_skb is cleaned up. This patch fixes the issue described above by adding the ptp_tx_lock to protect access to ptp_tx_skb and ptp_tx_start fields from igc_adapter. Since igc_xmit_frame_ring() called in atomic context by the networking stack, ptp_tx_lock is defined as a spinlock. With the introduction of the ptp_tx_lock, the __IGC_PTP_TX_IN_PROGRESS flag doesn't provide much of a use anymore so this patch gets rid of it. Signed-off-by: Andre Guedes --- drivers/net/ethernet/intel/igc/igc.h | 5 ++- drivers/net/ethernet/intel/igc/igc_main.c | 7 +++- drivers/net/ethernet/intel/igc/igc_ptp.c | 49 ++++++++++++++--------- 3 files changed, 40 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 5901ed9fb545e8..7e3f85472a38f3 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -215,6 +215,10 @@ struct igc_adapter { struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_caps; struct work_struct ptp_tx_work; + /* Access to ptp_tx_skb and ptp_tx_start is protected by the + * ptp_tx_lock. + */ + spinlock_t ptp_tx_lock; struct sk_buff *ptp_tx_skb; struct hwtstamp_config tstamp_config; unsigned long ptp_tx_start; @@ -385,7 +389,6 @@ enum igc_state_t { __IGC_TESTING, __IGC_RESETTING, __IGC_DOWN, - __IGC_PTP_TX_IN_PROGRESS, }; enum igc_tx_flags { diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index e29aadbc674418..be0406c03c6cf8 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1439,13 +1439,14 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { struct igc_adapter *adapter = netdev_priv(tx_ring->netdev); + spin_lock(&adapter->ptp_tx_lock); + /* FIXME: add support for retrieving timestamps from * the other timer registers before skipping the * timestamping request. */ if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && - !test_and_set_bit_lock(__IGC_PTP_TX_IN_PROGRESS, - &adapter->state)) { + !adapter->ptp_tx_skb) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; tx_flags |= IGC_TX_FLAGS_TSTAMP; @@ -1454,6 +1455,8 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, } else { adapter->tx_hwtstamp_skipped++; } + + spin_unlock(&adapter->ptp_tx_lock); } if (skb_vlan_tag_present(skb)) { diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 69617d2c1be23f..92ed2760485bdf 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -598,35 +598,35 @@ static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter, return 0; } +/* Requires adapter->ptp_tx_lock held by caller. */ static void igc_ptp_tx_timeout(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; dev_kfree_skb_any(adapter->ptp_tx_skb); adapter->ptp_tx_skb = NULL; + adapter->ptp_tx_start = 0; adapter->tx_hwtstamp_timeouts++; - clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); /* Clear the tx valid bit in TSYNCTXCTL register to enable interrupt. */ rd32(IGC_TXSTMPH); + netdev_warn(adapter->netdev, "Tx timestamp timeout\n"); } void igc_ptp_tx_hang(struct igc_adapter *adapter) { - bool timeout = time_is_before_jiffies(adapter->ptp_tx_start + - IGC_PTP_TX_TIMEOUT); + spin_lock(&adapter->ptp_tx_lock); - if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state)) - return; + if (!adapter->ptp_tx_skb) + goto unlock; - /* If we haven't received a timestamp within the timeout, it is - * reasonable to assume that it will never occur, so we can unlock the - * timestamp bit when this occurs. - */ - if (timeout) { - cancel_work_sync(&adapter->ptp_tx_work); - igc_ptp_tx_timeout(adapter); - } + if (time_is_after_jiffies(adapter->ptp_tx_start + IGC_PTP_TX_TIMEOUT)) + goto unlock; + + igc_ptp_tx_timeout(adapter); + +unlock: + spin_unlock(&adapter->ptp_tx_lock); } /** @@ -636,6 +636,8 @@ void igc_ptp_tx_hang(struct igc_adapter *adapter) * If we were asked to do hardware stamping and such a time stamp is * available, then it must have been for this skb here because we only * allow only one such packet into the queue. + * + * Context: Expects adapter->ptp_tx_lock to be held by caller. */ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) { @@ -676,7 +678,7 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) * while we're notifying the stack. */ adapter->ptp_tx_skb = NULL; - clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); + adapter->ptp_tx_start = 0; /* Notify the stack and free the skb after we've unlocked */ skb_tstamp_tx(skb, &shhwtstamps); @@ -697,14 +699,19 @@ static void igc_ptp_tx_work(struct work_struct *work) struct igc_hw *hw = &adapter->hw; u32 tsynctxctl; - if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state)) - return; + spin_lock(&adapter->ptp_tx_lock); + + if (!adapter->ptp_tx_skb) + goto unlock; tsynctxctl = rd32(IGC_TSYNCTXCTL); if (WARN_ON_ONCE(!(tsynctxctl & IGC_TSYNCTXCTL_TXTT_0))) - return; + goto unlock; igc_ptp_tx_hwtstamp(adapter); + +unlock: + spin_unlock(&adapter->ptp_tx_lock); } /** @@ -795,6 +802,7 @@ void igc_ptp_init(struct igc_adapter *adapter) } spin_lock_init(&adapter->tmreg_lock); + spin_lock_init(&adapter->ptp_tx_lock); INIT_WORK(&adapter->ptp_tx_work, igc_ptp_tx_work); adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; @@ -845,9 +853,14 @@ void igc_ptp_suspend(struct igc_adapter *adapter) return; cancel_work_sync(&adapter->ptp_tx_work); + + spin_lock(&adapter->ptp_tx_lock); + dev_kfree_skb_any(adapter->ptp_tx_skb); adapter->ptp_tx_skb = NULL; - clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); + adapter->ptp_tx_start = 0; + + spin_unlock(&adapter->ptp_tx_lock); igc_ptp_time_save(adapter); } From 381d5201d2251118ac0f319e1c05b6a3b8915c1d Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Mon, 12 Apr 2021 17:09:56 -0700 Subject: [PATCH 11/24] igc: Retrieve the TX timestamp directly (instead of in a interrupt) Handling of TX timestamp interrupt should be simple enough to not cause issues during the interrupt context. This way, the processing is simplified and potentially more performant. This patch is inspired by the i40 driver approach. Signed-off-by: Vinicius Costa Gomes --- drivers/net/ethernet/intel/igc/igc.h | 2 +- drivers/net/ethernet/intel/igc/igc_main.c | 6 +++- drivers/net/ethernet/intel/igc/igc_ptp.c | 41 +++++------------------ 3 files changed, 14 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 7e3f85472a38f3..eccb3cc06f89bd 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -214,7 +214,6 @@ struct igc_adapter { struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_caps; - struct work_struct ptp_tx_work; /* Access to ptp_tx_skb and ptp_tx_start is protected by the * ptp_tx_lock. */ @@ -595,6 +594,7 @@ void igc_ptp_reset(struct igc_adapter *adapter); void igc_ptp_suspend(struct igc_adapter *adapter); void igc_ptp_stop(struct igc_adapter *adapter); ktime_t igc_ptp_rx_pktstamp(struct igc_adapter *adapter, __le32 *buf); +void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter); int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr); int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr); void igc_ptp_tx_hang(struct igc_adapter *adapter); diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index be0406c03c6cf8..7a786ec46c15c9 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -4685,8 +4685,12 @@ static void igc_tsync_interrupt(struct igc_adapter *adapter) } if (tsicr & IGC_TSICR_TXTS) { + u32 tsynctxctl = rd32(IGC_TSYNCTXCTL);; + /* retrieve hardware timestamp */ - schedule_work(&adapter->ptp_tx_work); + if (tsynctxctl & IGC_TSYNCTXCTL_TXTT_0) + igc_ptp_tx_hwtstamp(adapter); + ack |= IGC_TSICR_TXTS; } diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 92ed2760485bdf..3ec0baa8451a98 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -639,16 +639,19 @@ void igc_ptp_tx_hang(struct igc_adapter *adapter) * * Context: Expects adapter->ptp_tx_lock to be held by caller. */ -static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) +void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) { - struct sk_buff *skb = adapter->ptp_tx_skb; struct skb_shared_hwtstamps shhwtstamps; struct igc_hw *hw = &adapter->hw; + struct sk_buff *skb; int adjust = 0; u64 regval; + spin_lock(&adapter->ptp_tx_lock); + skb = adapter->ptp_tx_skb; + if (WARN_ON_ONCE(!skb)) - return; + goto done; regval = rd32(IGC_TXSTMPL); regval |= (u64)rd32(IGC_TXSTMPH) << 32; @@ -683,35 +686,10 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) /* Notify the stack and free the skb after we've unlocked */ skb_tstamp_tx(skb, &shhwtstamps); dev_kfree_skb_any(skb); -} -/** - * igc_ptp_tx_work - * @work: pointer to work struct - * - * This work function polls the TSYNCTXCTL valid bit to determine when a - * timestamp has been taken for the current stored skb. - */ -static void igc_ptp_tx_work(struct work_struct *work) -{ - struct igc_adapter *adapter = container_of(work, struct igc_adapter, - ptp_tx_work); - struct igc_hw *hw = &adapter->hw; - u32 tsynctxctl; - - spin_lock(&adapter->ptp_tx_lock); - - if (!adapter->ptp_tx_skb) - goto unlock; - - tsynctxctl = rd32(IGC_TSYNCTXCTL); - if (WARN_ON_ONCE(!(tsynctxctl & IGC_TSYNCTXCTL_TXTT_0))) - goto unlock; - - igc_ptp_tx_hwtstamp(adapter); - -unlock: +done: spin_unlock(&adapter->ptp_tx_lock); + } /** @@ -803,7 +781,6 @@ void igc_ptp_init(struct igc_adapter *adapter) spin_lock_init(&adapter->tmreg_lock); spin_lock_init(&adapter->ptp_tx_lock); - INIT_WORK(&adapter->ptp_tx_work, igc_ptp_tx_work); adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF; @@ -852,8 +829,6 @@ void igc_ptp_suspend(struct igc_adapter *adapter) if (!(adapter->ptp_flags & IGC_PTP_ENABLED)) return; - cancel_work_sync(&adapter->ptp_tx_work); - spin_lock(&adapter->ptp_tx_lock); dev_kfree_skb_any(adapter->ptp_tx_skb); From 363fe5d6828e53dc85c74957c98de309220707a8 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Mon, 19 Apr 2021 17:04:33 -0700 Subject: [PATCH 12/24] igc: Add support for multiple in-flight TX timestamps Adds support for using the four sets of timestamping registers that i225 has available for TX. In some TSN workloads, where multiple applications request hardware transmission timestamps, it was possible that some of those requests were denied because the only in use register was already occupied. Signed-off-by: Vinicius Costa Gomes --- drivers/net/ethernet/intel/igc/igc.h | 20 ++- drivers/net/ethernet/intel/igc/igc_base.h | 3 + drivers/net/ethernet/intel/igc/igc_defines.h | 7 + drivers/net/ethernet/intel/igc/igc_main.c | 45 +++-- drivers/net/ethernet/intel/igc/igc_ptp.c | 172 +++++++++++++------ drivers/net/ethernet/intel/igc/igc_regs.h | 12 ++ 6 files changed, 192 insertions(+), 67 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index eccb3cc06f89bd..04735db330fd09 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -65,6 +65,17 @@ struct igc_rx_packet_stats { u64 other_packets; }; +#define IGC_MAX_TX_TSTAMP_TIMERS 4 + +struct igc_tx_timestamp_request { + struct sk_buff *skb; + unsigned long start; + u32 mask; + u32 regl; + u32 regh; + u32 flags; +}; + struct igc_ring_container { struct igc_ring *ring; /* pointer to linked list of rings */ unsigned int total_bytes; /* total bytes processed this int */ @@ -218,9 +229,8 @@ struct igc_adapter { * ptp_tx_lock. */ spinlock_t ptp_tx_lock; - struct sk_buff *ptp_tx_skb; + struct igc_tx_timestamp_request tx_tstamp[IGC_MAX_TX_TSTAMP_TIMERS]; struct hwtstamp_config tstamp_config; - unsigned long ptp_tx_start; unsigned int ptp_flags; /* System time value lock */ spinlock_t tmreg_lock; @@ -399,6 +409,10 @@ enum igc_tx_flags { /* olinfo flags */ IGC_TX_FLAGS_IPV4 = 0x10, IGC_TX_FLAGS_CSUM = 0x20, + + IGC_TX_FLAGS_TSTAMP_1 = 0x100, + IGC_TX_FLAGS_TSTAMP_2 = 0x200, + IGC_TX_FLAGS_TSTAMP_3 = 0x400, }; enum igc_boards { @@ -594,7 +608,7 @@ void igc_ptp_reset(struct igc_adapter *adapter); void igc_ptp_suspend(struct igc_adapter *adapter); void igc_ptp_stop(struct igc_adapter *adapter); ktime_t igc_ptp_rx_pktstamp(struct igc_adapter *adapter, __le32 *buf); -void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter); +void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter, u32 mask); int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr); int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr); void igc_ptp_tx_hang(struct igc_adapter *adapter); diff --git a/drivers/net/ethernet/intel/igc/igc_base.h b/drivers/net/ethernet/intel/igc/igc_base.h index ce530f5fd7bdad..0d2b4482cb2f7a 100644 --- a/drivers/net/ethernet/intel/igc/igc_base.h +++ b/drivers/net/ethernet/intel/igc/igc_base.h @@ -32,6 +32,9 @@ struct igc_adv_tx_context_desc { /* Adv Transmit Descriptor Config Masks */ #define IGC_ADVTXD_MAC_TSTAMP 0x00080000 /* IEEE1588 Timestamp packet */ +#define IGC_ADVTXD_TSTAMP_REG_1 0x00010000 /* IEEE1588 Timestamp packet */ +#define IGC_ADVTXD_TSTAMP_REG_2 0x00020000 /* IEEE1588 Timestamp packet */ +#define IGC_ADVTXD_TSTAMP_REG_3 0x00030000 /* IEEE1588 Timestamp packet */ #define IGC_ADVTXD_DTYP_CTXT 0x00200000 /* Advanced Context Descriptor */ #define IGC_ADVTXD_DTYP_DATA 0x00300000 /* Advanced Data Descriptor */ #define IGC_ADVTXD_DCMD_EOP 0x01000000 /* End of Packet */ diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index c3a5a5518790c2..0db7cf9d7e2bb1 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -404,6 +404,9 @@ /* Time Sync Transmit Control bit definitions */ #define IGC_TSYNCTXCTL_TXTT_0 0x00000001 /* Tx timestamp reg 0 valid */ +#define IGC_TSYNCTXCTL_TXTT_1 0x00000002 /* Tx timestamp reg 1 valid */ +#define IGC_TSYNCTXCTL_TXTT_2 0x00000004 /* Tx timestamp reg 2 valid */ +#define IGC_TSYNCTXCTL_TXTT_3 0x00000008 /* Tx timestamp reg 3 valid */ #define IGC_TSYNCTXCTL_ENABLED 0x00000010 /* enable Tx timestamping */ #define IGC_TSYNCTXCTL_MAX_ALLOWED_DLY_MASK 0x0000F000 /* max delay */ #define IGC_TSYNCTXCTL_SYNC_COMP_ERR 0x20000000 /* sync err */ @@ -411,6 +414,10 @@ #define IGC_TSYNCTXCTL_START_SYNC 0x80000000 /* initiate sync */ #define IGC_TSYNCTXCTL_TXSYNSIG 0x00000020 /* Sample TX tstamp in PHY sop */ +#define IGC_TSYNCTXCTL_TXTT_ANY ( \ + IGC_TSYNCTXCTL_TXTT_0 | IGC_TSYNCTXCTL_TXTT_1 | \ + IGC_TSYNCTXCTL_TXTT_2 | IGC_TSYNCTXCTL_TXTT_3) + /* Timer selection bits */ #define IGC_AUX_IO_TIMER_SEL_SYSTIM0 (0u << 30) /* Select SYSTIM0 for auxiliary time stamp */ #define IGC_AUX_IO_TIMER_SEL_SYSTIM1 (1u << 30) /* Select SYSTIM1 for auxiliary time stamp */ diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 7a786ec46c15c9..cdb2e31ee5a8e3 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1146,6 +1146,15 @@ static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags) cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP, (IGC_ADVTXD_MAC_TSTAMP)); + cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_1, + (IGC_ADVTXD_TSTAMP_REG_1)); + + cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_2, + (IGC_ADVTXD_TSTAMP_REG_2)); + + cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_3, + (IGC_ADVTXD_TSTAMP_REG_3)); + /* insert frame checksum */ cmd_type ^= IGC_SET_FLAG(skb->no_fcs, 1, IGC_ADVTXD_DCMD_IFCS); @@ -1403,6 +1412,26 @@ static int igc_tso(struct igc_ring *tx_ring, return 1; } +static bool igc_request_tx_tstamp(struct igc_adapter *adapter, struct sk_buff *skb, u32 *flags) +{ + int i; + + for (i = 0; i < IGC_MAX_TX_TSTAMP_TIMERS; i++) { + struct igc_tx_timestamp_request *tstamp = &adapter->tx_tstamp[i]; + + if (tstamp->skb) + continue; + + tstamp->skb = skb_get(skb); + tstamp->start = jiffies; + *flags = tstamp->flags; + + return true; + } + + return false; +} + static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, struct igc_ring *tx_ring) { @@ -1438,20 +1467,14 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { struct igc_adapter *adapter = netdev_priv(tx_ring->netdev); + u32 tstamp_flags; spin_lock(&adapter->ptp_tx_lock); - /* FIXME: add support for retrieving timestamps from - * the other timer registers before skipping the - * timestamping request. - */ if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && - !adapter->ptp_tx_skb) { + igc_request_tx_tstamp(adapter, skb, &tstamp_flags)) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; - tx_flags |= IGC_TX_FLAGS_TSTAMP; - - adapter->ptp_tx_skb = skb_get(skb); - adapter->ptp_tx_start = jiffies; + tx_flags |= IGC_TX_FLAGS_TSTAMP | tstamp_flags; } else { adapter->tx_hwtstamp_skipped++; } @@ -4687,9 +4710,7 @@ static void igc_tsync_interrupt(struct igc_adapter *adapter) if (tsicr & IGC_TSICR_TXTS) { u32 tsynctxctl = rd32(IGC_TSYNCTXCTL);; - /* retrieve hardware timestamp */ - if (tsynctxctl & IGC_TSYNCTXCTL_TXTT_0) - igc_ptp_tx_hwtstamp(adapter); + igc_ptp_tx_hwtstamp(adapter, tsynctxctl & IGC_TSYNCTXCTL_TXTT_ANY); ack |= IGC_TSICR_TXTS; } diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 3ec0baa8451a98..e286b034157538 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -541,8 +541,17 @@ static void igc_ptp_enable_tx_timestamp(struct igc_adapter *adapter) wr32(IGC_TSYNCTXCTL, IGC_TSYNCTXCTL_ENABLED | IGC_TSYNCTXCTL_TXSYNSIG); /* Read TXSTMP registers to discard any timestamp previously stored. */ - rd32(IGC_TXSTMPL); - rd32(IGC_TXSTMPH); + rd32(IGC_TXSTMPL_0); + rd32(IGC_TXSTMPH_0); + + rd32(IGC_TXSTMPL_1); + rd32(IGC_TXSTMPH_1); + + rd32(IGC_TXSTMPL_2); + rd32(IGC_TXSTMPH_2); + + rd32(IGC_TXSTMPL_3); + rd32(IGC_TXSTMPH_3); } /** @@ -599,33 +608,40 @@ static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter, } /* Requires adapter->ptp_tx_lock held by caller. */ -static void igc_ptp_tx_timeout(struct igc_adapter *adapter) +static void igc_ptp_tx_timeout(struct igc_adapter *adapter, + struct igc_tx_timestamp_request *tstamp) { struct igc_hw *hw = &adapter->hw; - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; - adapter->ptp_tx_start = 0; + dev_kfree_skb_any(tstamp->skb); + tstamp->skb = NULL; + tstamp->start = 0; adapter->tx_hwtstamp_timeouts++; /* Clear the tx valid bit in TSYNCTXCTL register to enable interrupt. */ - rd32(IGC_TXSTMPH); + rd32(tstamp->regh); netdev_warn(adapter->netdev, "Tx timestamp timeout\n"); } void igc_ptp_tx_hang(struct igc_adapter *adapter) { + struct igc_tx_timestamp_request *tstamp; + int i; + spin_lock(&adapter->ptp_tx_lock); - if (!adapter->ptp_tx_skb) - goto unlock; + for (i = 0; i < IGC_MAX_TX_TSTAMP_TIMERS; i++) { + tstamp = &adapter->tx_tstamp[i]; - if (time_is_after_jiffies(adapter->ptp_tx_start + IGC_PTP_TX_TIMEOUT)) - goto unlock; + if (!tstamp->skb) + continue; - igc_ptp_tx_timeout(adapter); + if (time_is_after_jiffies(tstamp->start + IGC_PTP_TX_TIMEOUT)) + continue; + + igc_ptp_tx_timeout(adapter, tstamp); + } -unlock: spin_unlock(&adapter->ptp_tx_lock); } @@ -639,57 +655,73 @@ void igc_ptp_tx_hang(struct igc_adapter *adapter) * * Context: Expects adapter->ptp_tx_lock to be held by caller. */ -void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) +void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter, u32 mask) { struct skb_shared_hwtstamps shhwtstamps; struct igc_hw *hw = &adapter->hw; struct sk_buff *skb; int adjust = 0; u64 regval; + int i; +again: spin_lock(&adapter->ptp_tx_lock); - skb = adapter->ptp_tx_skb; - - if (WARN_ON_ONCE(!skb)) - goto done; - regval = rd32(IGC_TXSTMPL); - regval |= (u64)rd32(IGC_TXSTMPH) << 32; - igc_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval); - - switch (adapter->link_speed) { - case SPEED_10: - adjust = IGC_I225_TX_LATENCY_10; - break; - case SPEED_100: - adjust = IGC_I225_TX_LATENCY_100; - break; - case SPEED_1000: - adjust = IGC_I225_TX_LATENCY_1000; - break; - case SPEED_2500: - adjust = IGC_I225_TX_LATENCY_2500; - break; - } + for (i = 0; i < IGC_MAX_TX_TSTAMP_TIMERS; i++) { + struct igc_tx_timestamp_request *tstamp = &adapter->tx_tstamp[i]; + + if (!(mask & tstamp->mask)) + continue; + + skb = tstamp->skb; + if (!skb) + continue; + + regval = rd32(tstamp->regl); + regval |= (u64)rd32(tstamp->regh) << 32; + igc_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval); + + switch (adapter->link_speed) { + case SPEED_10: + adjust = IGC_I225_TX_LATENCY_10; + break; + case SPEED_100: + adjust = IGC_I225_TX_LATENCY_100; + break; + case SPEED_1000: + adjust = IGC_I225_TX_LATENCY_1000; + break; + case SPEED_2500: + adjust = IGC_I225_TX_LATENCY_2500; + break; + } - shhwtstamps.hwtstamp = - ktime_add_ns(shhwtstamps.hwtstamp, adjust); + shhwtstamps.hwtstamp = + ktime_add_ns(shhwtstamps.hwtstamp, adjust); - /* Clear the lock early before calling skb_tstamp_tx so that - * applications are not woken up before the lock bit is clear. We use - * a copy of the skb pointer to ensure other threads can't change it - * while we're notifying the stack. - */ - adapter->ptp_tx_skb = NULL; - adapter->ptp_tx_start = 0; + /* Clear the lock early before calling skb_tstamp_tx so that + * applications are not woken up before the lock bit is clear. We use + * a copy of the skb pointer to ensure other threads can't change it + * while we're notifying the stack. + */ + tstamp->skb = NULL; + tstamp->start = 0; - /* Notify the stack and free the skb after we've unlocked */ - skb_tstamp_tx(skb, &shhwtstamps); - dev_kfree_skb_any(skb); + /* Notify the stack and free the skb after we've unlocked */ + skb_tstamp_tx(skb, &shhwtstamps); + dev_kfree_skb_any(skb); + } -done: spin_unlock(&adapter->ptp_tx_lock); + mask = rd32(IGC_TSYNCTXCTL) & IGC_TSYNCTXCTL_TXTT_ANY; + if (mask) { + /* Some timestamps arrived while we were handling the + * previous ones + */ + goto again; + } + } /** @@ -747,9 +779,34 @@ int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr) void igc_ptp_init(struct igc_adapter *adapter) { struct net_device *netdev = adapter->netdev; + struct igc_tx_timestamp_request *tstamp; struct igc_hw *hw = &adapter->hw; int i; + tstamp = &adapter->tx_tstamp[0]; + tstamp->mask = IGC_TSYNCTXCTL_TXTT_0; + tstamp->regl = IGC_TXSTMPL_0; + tstamp->regh = IGC_TXSTMPH_0; + tstamp->flags = 0; + + tstamp = &adapter->tx_tstamp[1]; + tstamp->mask = IGC_TSYNCTXCTL_TXTT_1; + tstamp->regl = IGC_TXSTMPL_1; + tstamp->regh = IGC_TXSTMPH_1; + tstamp->flags = IGC_TX_FLAGS_TSTAMP_1; + + tstamp = &adapter->tx_tstamp[2]; + tstamp->mask = IGC_TSYNCTXCTL_TXTT_2; + tstamp->regl = IGC_TXSTMPL_2; + tstamp->regh = IGC_TXSTMPH_2; + tstamp->flags = IGC_TX_FLAGS_TSTAMP_2; + + tstamp = &adapter->tx_tstamp[3]; + tstamp->mask = IGC_TSYNCTXCTL_TXTT_3; + tstamp->regl = IGC_TXSTMPL_3; + tstamp->regh = IGC_TXSTMPH_3; + tstamp->flags = IGC_TX_FLAGS_TSTAMP_3; + switch (hw->mac.type) { case igc_i225: for (i = 0; i < IGC_N_SDP; i++) { @@ -817,6 +874,19 @@ static void igc_ptp_time_restore(struct igc_adapter *adapter) igc_ptp_write_i225(adapter, &ts); } +static void igc_tx_tstamp_clear(struct igc_adapter *adapter) +{ + int i; + + for (i = 0; i < IGC_MAX_TX_TSTAMP_TIMERS; i++) { + struct igc_tx_timestamp_request *tstamp = &adapter->tx_tstamp[i]; + + dev_kfree_skb_any(tstamp->skb); + tstamp->skb = NULL; + tstamp->start = 0; + } +} + /** * igc_ptp_suspend - Disable PTP work items and prepare for suspend * @adapter: Board private structure @@ -831,9 +901,7 @@ void igc_ptp_suspend(struct igc_adapter *adapter) spin_lock(&adapter->ptp_tx_lock); - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; - adapter->ptp_tx_start = 0; + igc_tx_tstamp_clear(adapter); spin_unlock(&adapter->ptp_tx_lock); diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h index 0f82990567d982..40c8430cf77f51 100644 --- a/drivers/net/ethernet/intel/igc/igc_regs.h +++ b/drivers/net/ethernet/intel/igc/igc_regs.h @@ -226,6 +226,18 @@ #define IGC_SYSTIMR 0x0B6F8 /* System time register Residue */ #define IGC_TIMINCA 0x0B608 /* Increment attributes register - RW */ +/* TX Timestamp Low */ +#define IGC_TXSTMPL_0 0x0B618 +#define IGC_TXSTMPL_1 0x0B698 +#define IGC_TXSTMPL_2 0x0B6B8 +#define IGC_TXSTMPL_3 0x0B6D8 + +/* TX Timestamp High */ +#define IGC_TXSTMPH_0 0x0B61C +#define IGC_TXSTMPH_1 0x0B69C +#define IGC_TXSTMPH_2 0x0B6BC +#define IGC_TXSTMPH_3 0x0B6DC + #define IGC_TXSTMPL 0x0B618 /* Tx timestamp value Low - RO */ #define IGC_TXSTMPH 0x0B61C /* Tx timestamp value High - RO */ From 6e972737a24405550be8d9344142ccd9573f28c0 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Fri, 23 Apr 2021 17:43:24 -0700 Subject: [PATCH 13/24] igc: Use irq safe locks for timestamping Now that the timestamping is done in interrupt context we should protect against concurrent access using irq safe locks. Signed-off-by: Vinicius Costa Gomes --- drivers/net/ethernet/intel/igc/igc_main.c | 5 +++-- drivers/net/ethernet/intel/igc/igc_ptp.c | 16 ++++++++++------ 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index cdb2e31ee5a8e3..60957af95c943c 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1467,9 +1467,10 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { struct igc_adapter *adapter = netdev_priv(tx_ring->netdev); + unsigned long flags; u32 tstamp_flags; - spin_lock(&adapter->ptp_tx_lock); + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && igc_request_tx_tstamp(adapter, skb, &tstamp_flags)) { @@ -1479,7 +1480,7 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, adapter->tx_hwtstamp_skipped++; } - spin_unlock(&adapter->ptp_tx_lock); + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); } if (skb_vlan_tag_present(skb)) { diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index e286b034157538..911c36a909a49b 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -626,9 +626,10 @@ static void igc_ptp_tx_timeout(struct igc_adapter *adapter, void igc_ptp_tx_hang(struct igc_adapter *adapter) { struct igc_tx_timestamp_request *tstamp; + unsigned long flags; int i; - spin_lock(&adapter->ptp_tx_lock); + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); for (i = 0; i < IGC_MAX_TX_TSTAMP_TIMERS; i++) { tstamp = &adapter->tx_tstamp[i]; @@ -642,7 +643,7 @@ void igc_ptp_tx_hang(struct igc_adapter *adapter) igc_ptp_tx_timeout(adapter, tstamp); } - spin_unlock(&adapter->ptp_tx_lock); + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); } /** @@ -659,13 +660,14 @@ void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter, u32 mask) { struct skb_shared_hwtstamps shhwtstamps; struct igc_hw *hw = &adapter->hw; + unsigned long flags; struct sk_buff *skb; int adjust = 0; u64 regval; int i; again: - spin_lock(&adapter->ptp_tx_lock); + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); for (i = 0; i < IGC_MAX_TX_TSTAMP_TIMERS; i++) { struct igc_tx_timestamp_request *tstamp = &adapter->tx_tstamp[i]; @@ -712,7 +714,7 @@ void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter, u32 mask) dev_kfree_skb_any(skb); } - spin_unlock(&adapter->ptp_tx_lock); + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); mask = rd32(IGC_TSYNCTXCTL) & IGC_TSYNCTXCTL_TXTT_ANY; if (mask) { @@ -896,14 +898,16 @@ static void igc_tx_tstamp_clear(struct igc_adapter *adapter) */ void igc_ptp_suspend(struct igc_adapter *adapter) { + unsigned long flags; + if (!(adapter->ptp_flags & IGC_PTP_ENABLED)) return; - spin_lock(&adapter->ptp_tx_lock); + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); igc_tx_tstamp_clear(adapter); - spin_unlock(&adapter->ptp_tx_lock); + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); igc_ptp_time_save(adapter); } From 1a083b7b98ee1f49ed06c191e818f0169e8d58e8 Mon Sep 17 00:00:00 2001 From: Ederson de Souza Date: Thu, 19 Aug 2021 16:58:10 -0700 Subject: [PATCH 14/24] tools: Add XDP_FLAGS_USE_METADATA flag New flag used by bpf programs or AF_XDP applications to inform the driver to use XDP metadata information. Signed-off-by: Ederson de Souza --- tools/include/uapi/linux/if_link.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index d208b2af697fde..3ae296a9f9ccec 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -967,11 +967,13 @@ enum { #define XDP_FLAGS_DRV_MODE (1U << 2) #define XDP_FLAGS_HW_MODE (1U << 3) #define XDP_FLAGS_REPLACE (1U << 4) +#define XDP_FLAGS_USE_METADATA (1U << 5) #define XDP_FLAGS_MODES (XDP_FLAGS_SKB_MODE | \ XDP_FLAGS_DRV_MODE | \ XDP_FLAGS_HW_MODE) #define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST | \ - XDP_FLAGS_MODES | XDP_FLAGS_REPLACE) + XDP_FLAGS_MODES | XDP_FLAGS_REPLACE | \ + XDP_FLAGS_USE_METADATA) /* These are stored into IFLA_XDP_ATTACHED on dump. */ enum { From f58530a5b8a8cbd5712af06c570984c12bd8bf44 Mon Sep 17 00:00:00 2001 From: Ederson de Souza Date: Thu, 19 Aug 2021 16:58:25 -0700 Subject: [PATCH 15/24] xdp, net: Allow XDP_FLAGS_USE_METADATA to be used for link XDP New XDP_FLAGS_USE_METADATA should be available for AF_XDP applications, which use link XDP. Signed-off-by: Ederson de Souza --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 8f1a47ad6781ab..c7a0ab2a984947 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9475,7 +9475,7 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack if (link && (new_prog || old_prog)) return -EINVAL; /* link supports only XDP mode flags */ - if (link && (flags & ~XDP_FLAGS_MODES)) { + if (link && (flags & ~(XDP_FLAGS_MODES | XDP_FLAGS_USE_METADATA))) { NL_SET_ERR_MSG(extack, "Invalid XDP flags for BPF link attachment"); return -EINVAL; } From 6b318b37c046ca0a8bc1956b9d3a15ef7c83218d Mon Sep 17 00:00:00 2001 From: Ederson de Souza Date: Thu, 19 Aug 2021 16:58:39 -0700 Subject: [PATCH 16/24] bpf: Export btf_obj_id and bpf_get_btf_vmlinux symbols A network driver may need to get the BTF ID information currently associated with xdp_meta_generic. To do so, it needs to access btf_obj_id and bpf_get_btf_vmlinux functions, so, let's export them. Signed-off-by: Ederson de Souza --- kernel/bpf/btf.c | 1 + kernel/bpf/verifier.c | 1 + 2 files changed, 2 insertions(+) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index cb4b72997d9b90..0db36251062dff 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5930,6 +5930,7 @@ u32 btf_obj_id(const struct btf *btf) { return btf->id; } +EXPORT_SYMBOL(btf_obj_id); bool btf_is_kernel(const struct btf *btf) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f9bda5476ea55c..0701808df304f5 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -13313,6 +13313,7 @@ struct btf *bpf_get_btf_vmlinux(void) } return btf_vmlinux; } +EXPORT_SYMBOL_GPL(bpf_get_btf_vmlinux); int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr) { From bcdbd55a05b20099b64dfef64649a01c664e38fe Mon Sep 17 00:00:00 2001 From: Ederson de Souza Date: Thu, 19 Aug 2021 16:58:44 -0700 Subject: [PATCH 17/24] bpf: Add btf_get_from_module function A network driver interested in using its associated BTF ID needs to find its associated BTF. This patch introduces a new function, btf_get_from_module that allows a module to get the BTF associated with it. Signed-off-by: Ederson de Souza --- include/linux/btf.h | 1 + kernel/bpf/btf.c | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/include/linux/btf.h b/include/linux/btf.h index 94a0c976c90fdf..376d19cb7329af 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -94,6 +94,7 @@ u32 btf_obj_id(const struct btf *btf); bool btf_is_kernel(const struct btf *btf); bool btf_is_module(const struct btf *btf); struct module *btf_try_get_module(const struct btf *btf); +struct btf *btf_get_from_module(const struct module *module); u32 btf_nr_types(const struct btf *btf); bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, const struct btf_member *m, diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 0db36251062dff..5f1e8b5fcc974f 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6077,6 +6077,28 @@ static int __init btf_module_init(void) fs_initcall(btf_module_init); #endif /* CONFIG_DEBUG_INFO_BTF_MODULES */ +struct btf *btf_get_from_module(const struct module *module) +{ + struct btf *res = NULL; +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES + struct btf_module *btf_mod, *tmp; + + mutex_lock(&btf_module_mutex); + list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) { + if (btf_mod->module != module) + continue; + + res = btf_mod->btf; + + break; + } + mutex_unlock(&btf_module_mutex); +#endif + + return res; +} +EXPORT_SYMBOL_GPL(btf_get_from_module); + struct module *btf_try_get_module(const struct btf *btf) { struct module *res = NULL; From f1f4fff5ff8f37954d298ea51f50c7a0432cd9ce Mon Sep 17 00:00:00 2001 From: Ederson de Souza Date: Thu, 19 Aug 2021 16:59:24 -0700 Subject: [PATCH 18/24] igc: XDP packet RX timestamp Using XDP hints, driver adds the PTP timestamp of when a packet was received by the i225 NIC. Signed-off-by: Ederson de Souza --- drivers/net/ethernet/intel/igc/igc.h | 4 ++ drivers/net/ethernet/intel/igc/igc_main.c | 57 ++++++++++++++++++++--- drivers/net/ethernet/intel/igc/igc_xdp.c | 4 +- drivers/net/ethernet/intel/igc/igc_xdp.h | 7 ++- 4 files changed, 64 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 04735db330fd09..280529a66732bf 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -13,6 +13,7 @@ #include #include #include +#include #include "igc_hw.h" @@ -250,6 +251,9 @@ struct igc_adapter { struct timespec64 start; struct timespec64 period; } perout[IGC_N_PEROUT]; + + u32 btf_id; + bool btf_enabled; }; void igc_up(struct igc_adapter *adapter); diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 60957af95c943c..ee7195a539f200 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2018 Intel Corporation */ +#include +#include #include #include #include @@ -2311,6 +2313,14 @@ static void igc_finalize_xdp(struct igc_adapter *adapter, int status) xdp_do_flush(); } +void igc_clean_btf_id(void *addr) +{ + struct xdp_meta_generic___igc *hints; + + hints = addr - sizeof(*hints); + hints->btf_id = 0; +} + static void igc_update_rx_stats(struct igc_q_vector *q_vector, unsigned int packets, unsigned int bytes) { @@ -2374,8 +2384,21 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) if (!skb) { xdp_init_buff(&xdp, truesize, &rx_ring->xdp_rxq); + xdp_prepare_buff(&xdp, pktbuf - igc_rx_offset(rx_ring), - igc_rx_offset(rx_ring) + pkt_offset, size, false); + igc_rx_offset(rx_ring) + pkt_offset, size, + adapter->btf_enabled); + + if (adapter->btf_enabled) { + struct xdp_meta_generic___igc *hints; + + hints = xdp.data - sizeof(*hints); + xdp.data_meta = hints; + hints->tstamp = timestamp; + hints->btf_id = adapter->btf_id; + } else { + igc_clean_btf_id(xdp.data); + } skb = igc_xdp_run_prog(adapter, &xdp); } @@ -2539,12 +2562,19 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) bi->xdp->data); bi->xdp->data += IGC_TS_HDR_LEN; - - /* HW timestamp has been copied into local variable. Metadata - * length when XDP program is called should be 0. - */ bi->xdp->data_meta += IGC_TS_HDR_LEN; size -= IGC_TS_HDR_LEN; + + if (adapter->btf_enabled) { + struct xdp_meta_generic___igc *hints; + + hints = bi->xdp->data - sizeof(*hints); + bi->xdp->data_meta = hints; + hints->tstamp = timestamp; + hints->btf_id = adapter->btf_id; + } else { + igc_clean_btf_id(bi->xdp->data); + } } bi->xdp->data_end = bi->xdp->data + size; @@ -4206,6 +4236,19 @@ static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix) return err; } +static void igc_btf_init(struct igc_adapter *adapter) +{ + struct module *owner = THIS_MODULE; + struct btf *btf; + + if (owner) + btf = btf_get_from_module(owner); + else + btf = bpf_get_btf_vmlinux(); + + adapter->btf_id = btf_obj_id(btf); +} + /** * igc_sw_init - Initialize general software structures (struct igc_adapter) * @adapter: board private structure to initialize @@ -4259,6 +4302,8 @@ static int igc_sw_init(struct igc_adapter *adapter) set_bit(__IGC_DOWN, &adapter->state); + igc_btf_init(adapter); + return 0; } @@ -5630,7 +5675,7 @@ static int igc_bpf(struct net_device *dev, struct netdev_bpf *bpf) switch (bpf->command) { case XDP_SETUP_PROG: - return igc_xdp_set_prog(adapter, bpf->prog, bpf->extack); + return igc_xdp_set_prog(adapter, bpf->prog, bpf->extack, bpf->flags); case XDP_SETUP_XSK_POOL: return igc_xdp_setup_pool(adapter, bpf->xsk.pool, bpf->xsk.queue_id); diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.c b/drivers/net/ethernet/intel/igc/igc_xdp.c index a8cf5374be47a0..ec7e9d92f4891d 100644 --- a/drivers/net/ethernet/intel/igc/igc_xdp.c +++ b/drivers/net/ethernet/intel/igc/igc_xdp.c @@ -7,7 +7,7 @@ #include "igc_xdp.h" int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog, - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, u32 flags) { struct net_device *dev = adapter->netdev; bool if_running = netif_running(dev); @@ -24,6 +24,8 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog, if (if_running) igc_close(dev); + adapter->btf_enabled = flags & XDP_FLAGS_USE_METADATA; + old_prog = xchg(&adapter->xdp_prog, prog); if (old_prog) bpf_prog_put(old_prog); diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.h b/drivers/net/ethernet/intel/igc/igc_xdp.h index a74e5487d19989..a49b3d965d505d 100644 --- a/drivers/net/ethernet/intel/igc/igc_xdp.h +++ b/drivers/net/ethernet/intel/igc/igc_xdp.h @@ -5,7 +5,7 @@ #define _IGC_XDP_H_ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog, - struct netlink_ext_ack *extack); + struct netlink_ext_ack *extack, u32 flags); int igc_xdp_setup_pool(struct igc_adapter *adapter, struct xsk_buff_pool *pool, u16 queue_id); @@ -14,4 +14,9 @@ static inline bool igc_xdp_is_enabled(struct igc_adapter *adapter) return !!adapter->xdp_prog; } +struct xdp_meta_generic___igc { + u64 tstamp; + u32 btf_id; +} __packed; + #endif /* _IGC_XDP_H_ */ From 3acf07a2b9856e4df14114ffd7967ad63a6c6736 Mon Sep 17 00:00:00 2001 From: Ederson de Souza Date: Wed, 23 Jun 2021 14:26:56 -0700 Subject: [PATCH 19/24] igc: XDP packet TX timestamp ADD the PTP timestamp of when a packet was transmitted to the XDP hints. An application using AF_XDP can get this timestamp by inspecting the XDP frame metadata when it gets to the completion queue. One notable difference from TX timestamp for SKB, is that the XDP frame actually resides in the UMEM. As such, the timestamp is added to the frame, and user space applications can access it when the frame is sent to the completion queue. When performing the clean-up of TX descriptors, driver will check if an XDP socket frame is "expecting" a TX timestamp. If so, driver will stop clean-up to give an opportunity for the TX timestamp interrupt arrive. Signed-off-by: Ederson de Souza --- drivers/net/ethernet/intel/igc/igc.h | 22 ++-- drivers/net/ethernet/intel/igc/igc_main.c | 123 ++++++++++++++++++---- drivers/net/ethernet/intel/igc/igc_ptp.c | 46 +++++--- drivers/net/ethernet/intel/igc/igc_xdp.h | 1 + 4 files changed, 153 insertions(+), 39 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 280529a66732bf..cb2475be2d9af0 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -66,11 +66,23 @@ struct igc_rx_packet_stats { u64 other_packets; }; +enum igc_tx_buffer_type { + IGC_TX_BUFFER_TYPE_SKB, + IGC_TX_BUFFER_TYPE_XDP, + IGC_TX_BUFFER_TYPE_XSK, +}; + #define IGC_MAX_TX_TSTAMP_TIMERS 4 struct igc_tx_timestamp_request { - struct sk_buff *skb; + union igc_pending_ts_pkt { + struct sk_buff *skb; + struct xdp_desc xsk_desc; + void *ptr; + } pending_ts_pkt; + struct xsk_buff_pool *xsk_pool; unsigned long start; + enum igc_tx_buffer_type type; u32 mask; u32 regl; u32 regh; @@ -433,12 +445,6 @@ enum igc_boards { #define TXD_USE_COUNT(S) DIV_ROUND_UP((S), IGC_MAX_DATA_PER_TXD) #define DESC_NEEDED (MAX_SKB_FRAGS + 4) -enum igc_tx_buffer_type { - IGC_TX_BUFFER_TYPE_SKB, - IGC_TX_BUFFER_TYPE_XDP, - IGC_TX_BUFFER_TYPE_XSK, -}; - /* wrapper around a pointer to a socket buffer, * so a DMA handle can be stored along with the buffer */ @@ -449,6 +455,7 @@ struct igc_tx_buffer { union { struct sk_buff *skb; struct xdp_frame *xdpf; + struct xdp_desc xsk_desc; }; unsigned int bytecount; u16 gso_segs; @@ -617,6 +624,7 @@ int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr); int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr); void igc_ptp_tx_hang(struct igc_adapter *adapter); void igc_ptp_read(struct igc_adapter *adapter, struct timespec64 *ts); +ktime_t igc_retrieve_ptp_tx_timestamp(struct igc_adapter *adapter); #define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring)) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index ee7195a539f200..39b691e6805156 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1158,7 +1158,8 @@ static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags) (IGC_ADVTXD_TSTAMP_REG_3)); /* insert frame checksum */ - cmd_type ^= IGC_SET_FLAG(skb->no_fcs, 1, IGC_ADVTXD_DCMD_IFCS); + if (skb) + cmd_type ^= IGC_SET_FLAG(skb->no_fcs, 1, IGC_ADVTXD_DCMD_IFCS); return cmd_type; } @@ -1414,17 +1415,25 @@ static int igc_tso(struct igc_ring *tx_ring, return 1; } -static bool igc_request_tx_tstamp(struct igc_adapter *adapter, struct sk_buff *skb, u32 *flags) +static bool igc_request_tx_tstamp(struct igc_adapter *adapter, + union igc_pending_ts_pkt ts_pkt, u32 *flags, + struct xsk_buff_pool *xsk_pool) { int i; for (i = 0; i < IGC_MAX_TX_TSTAMP_TIMERS; i++) { struct igc_tx_timestamp_request *tstamp = &adapter->tx_tstamp[i]; - if (tstamp->skb) + if (tstamp->pending_ts_pkt.ptr) continue; - tstamp->skb = skb_get(skb); + tstamp->pending_ts_pkt = ts_pkt; + if (xsk_pool) { + tstamp->xsk_pool = xsk_pool; + tstamp->type = IGC_TX_BUFFER_TYPE_XSK; + } else { + tstamp->type = IGC_TX_BUFFER_TYPE_SKB; + } tstamp->start = jiffies; *flags = tstamp->flags; @@ -1469,17 +1478,20 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { struct igc_adapter *adapter = netdev_priv(tx_ring->netdev); + union igc_pending_ts_pkt ts_pkt; unsigned long flags; u32 tstamp_flags; spin_lock_irqsave(&adapter->ptp_tx_lock, flags); + ts_pkt.skb = skb_get(skb); if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && - igc_request_tx_tstamp(adapter, skb, &tstamp_flags)) { + igc_request_tx_tstamp(adapter, ts_pkt, &tstamp_flags, NULL)) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; tx_flags |= IGC_TX_FLAGS_TSTAMP | tstamp_flags; } else { adapter->tx_hwtstamp_skipped++; + skb_unref(skb); } spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); @@ -2164,7 +2176,8 @@ static int igc_xdp_init_tx_buffer(struct igc_tx_buffer *buffer, /* This function requires __netif_tx_lock is held by the caller. */ static int igc_xdp_init_tx_descriptor(struct igc_ring *ring, - struct xdp_frame *xdpf) + struct xdp_frame *xdpf, + u32 tx_flags) { struct igc_tx_buffer *buffer; union igc_adv_tx_desc *desc; @@ -2192,6 +2205,7 @@ static int igc_xdp_init_tx_descriptor(struct igc_ring *ring, netdev_tx_sent_queue(txring_txq(ring), buffer->bytecount); buffer->next_to_watch = desc; + buffer->tx_flags = tx_flags; ring->next_to_use++; if (ring->next_to_use == ring->count) @@ -2229,7 +2243,7 @@ static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp) nq = txring_txq(ring); __netif_tx_lock(nq, cpu); - res = igc_xdp_init_tx_descriptor(ring, xdpf); + res = igc_xdp_init_tx_descriptor(ring, xdpf, 0); __netif_tx_unlock(nq); return res; } @@ -2641,6 +2655,7 @@ static void igc_update_tx_stats(struct igc_q_vector *q_vector, static void igc_xdp_xmit_zc(struct igc_ring *ring) { + struct igc_adapter *adapter = netdev_priv(ring->netdev); struct xsk_buff_pool *pool = ring->xsk_pool; struct netdev_queue *nq = txring_txq(ring); union igc_adv_tx_desc *tx_desc = NULL; @@ -2657,13 +2672,37 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring) budget = igc_desc_unused(ring); while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) { - u32 cmd_type, olinfo_status; + u32 cmd_type, olinfo_status, tx_flags = 0; struct igc_tx_buffer *bi; + unsigned long flags; dma_addr_t dma; - cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT | - IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD | - xdp_desc.len; + if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && + adapter->btf_enabled) { + union igc_pending_ts_pkt ts_pkt; + struct xdp_meta_generic___igc *hints; + u32 tstamp_flags; + + /* Ensure there's no garbage on metadata */ + hints = (struct xdp_meta_generic___igc *) + ((char *)xsk_buff_raw_get_data(pool, xdp_desc.addr) + - sizeof(*hints)); + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); + + ts_pkt.xsk_desc = xdp_desc; + if (igc_request_tx_tstamp(adapter, ts_pkt, &tstamp_flags, pool)) { + tx_flags |= IGC_TX_FLAGS_TSTAMP | tstamp_flags; + hints->tx_tstamp = 0; + } else { + adapter->tx_hwtstamp_skipped++; + } + + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); + } else { + igc_clean_btf_id(xsk_buff_raw_get_data(pool, xdp_desc.addr)); + } + + cmd_type = igc_tx_cmd_type(NULL, tx_flags) | IGC_TXD_DCMD | xdp_desc.len; olinfo_status = xdp_desc.len << IGC_ADVTXD_PAYLEN_SHIFT; dma = xsk_buff_raw_get_dma(pool, xdp_desc.addr); @@ -2681,6 +2720,7 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring) bi->gso_segs = 1; bi->time_stamp = jiffies; bi->next_to_watch = tx_desc; + bi->xsk_desc = xdp_desc; netdev_tx_sent_queue(txring_txq(ring), xdp_desc.len); @@ -2698,6 +2738,47 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring) __netif_tx_unlock(nq); } +static bool igc_xsk_complete_tx_tstamp(struct igc_adapter *adapter, + struct igc_tx_buffer *tx_buffer) +{ + unsigned long flags; + bool ret = true; + int i; + + if (!adapter->btf_enabled) + return ret; + + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); + for (i = 0; i < IGC_MAX_TX_TSTAMP_TIMERS; i++) { + struct igc_tx_timestamp_request *tstamp = &adapter->tx_tstamp[i]; + + if (!tstamp->pending_ts_pkt.ptr) + continue; + + if (tstamp->type == IGC_TX_BUFFER_TYPE_XSK) { + struct xdp_desc xdp_desc = tstamp->pending_ts_pkt.xsk_desc; + + if (xdp_desc.addr == tx_buffer->xsk_desc.addr) { + struct xdp_meta_generic___igc *hints; + struct xsk_buff_pool *pool; + + pool = tstamp->xsk_pool; + hints = (struct xdp_meta_generic___igc *) + ((char *)xsk_buff_raw_get_data(pool, xdp_desc.addr) + - sizeof(*hints)); + if (!hints->tx_tstamp) { + ret = false; + break; + } + tstamp->pending_ts_pkt.ptr = NULL; + } + } + } + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); + + return ret; +} + /** * igc_clean_tx_irq - Reclaim resources after transmit completes * @q_vector: pointer to q_vector containing needed info @@ -2737,15 +2818,10 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) if (!(eop_desc->wb.status & cpu_to_le32(IGC_TXD_STAT_DD))) break; - /* clear next_to_watch to prevent false hangs */ - tx_buffer->next_to_watch = NULL; - - /* update the statistics for this packet */ - total_bytes += tx_buffer->bytecount; - total_packets += tx_buffer->gso_segs; - switch (tx_buffer->type) { case IGC_TX_BUFFER_TYPE_XSK: + if (!igc_xsk_complete_tx_tstamp(adapter, tx_buffer)) + goto budget_out; xsk_frames++; break; case IGC_TX_BUFFER_TYPE_XDP: @@ -2761,6 +2837,13 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) break; } + /* clear next_to_watch to prevent false hangs */ + tx_buffer->next_to_watch = NULL; + + /* update the statistics for this packet */ + total_bytes += tx_buffer->bytecount; + total_packets += tx_buffer->gso_segs; + /* clear last DMA location and unmap remaining buffers */ while (tx_desc != eop_desc) { tx_buffer++; @@ -2794,6 +2877,7 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) budget--; } while (likely(budget)); +budget_out: netdev_tx_completed_queue(txring_txq(tx_ring), total_packets, total_bytes); @@ -5691,6 +5775,7 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, int cpu = smp_processor_id(); struct netdev_queue *nq; struct igc_ring *ring; + u32 tx_flags = 0; int i, drops; if (unlikely(test_bit(__IGC_DOWN, &adapter->state))) @@ -5709,7 +5794,7 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, int err; struct xdp_frame *xdpf = frames[i]; - err = igc_xdp_init_tx_descriptor(ring, xdpf); + err = igc_xdp_init_tx_descriptor(ring, xdpf, tx_flags); if (err) { xdp_return_frame_rx_napi(xdpf); drops++; diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 911c36a909a49b..51a6daf0302b4e 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -9,6 +9,9 @@ #include #include #include +#include + +#include "igc_xdp.h" #define INCVALUE_MASK 0x7fffffff #define ISGN 0x80000000 @@ -613,9 +616,10 @@ static void igc_ptp_tx_timeout(struct igc_adapter *adapter, { struct igc_hw *hw = &adapter->hw; - dev_kfree_skb_any(tstamp->skb); - tstamp->skb = NULL; + if (tstamp->type == IGC_TX_BUFFER_TYPE_SKB) + dev_kfree_skb_any(tstamp->pending_ts_pkt.skb); tstamp->start = 0; + tstamp->pending_ts_pkt.ptr = NULL; adapter->tx_hwtstamp_timeouts++; /* Clear the tx valid bit in TSYNCTXCTL register to enable interrupt. */ rd32(tstamp->regh); @@ -634,7 +638,7 @@ void igc_ptp_tx_hang(struct igc_adapter *adapter) for (i = 0; i < IGC_MAX_TX_TSTAMP_TIMERS; i++) { tstamp = &adapter->tx_tstamp[i]; - if (!tstamp->skb) + if (!tstamp->start) continue; if (time_is_after_jiffies(tstamp->start + IGC_PTP_TX_TIMEOUT)) @@ -661,7 +665,6 @@ void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter, u32 mask) struct skb_shared_hwtstamps shhwtstamps; struct igc_hw *hw = &adapter->hw; unsigned long flags; - struct sk_buff *skb; int adjust = 0; u64 regval; int i; @@ -675,12 +678,13 @@ void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter, u32 mask) if (!(mask & tstamp->mask)) continue; - skb = tstamp->skb; - if (!skb) - continue; - + /* Always need to read register, to clean interrupt cause */ regval = rd32(tstamp->regl); regval |= (u64)rd32(tstamp->regh) << 32; + + if (!tstamp->pending_ts_pkt.ptr) + continue; + igc_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval); switch (adapter->link_speed) { @@ -706,12 +710,26 @@ void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter, u32 mask) * a copy of the skb pointer to ensure other threads can't change it * while we're notifying the stack. */ - tstamp->skb = NULL; tstamp->start = 0; /* Notify the stack and free the skb after we've unlocked */ - skb_tstamp_tx(skb, &shhwtstamps); - dev_kfree_skb_any(skb); + if (tstamp->type == IGC_TX_BUFFER_TYPE_SKB) { + skb_tstamp_tx(tstamp->pending_ts_pkt.skb, &shhwtstamps); + dev_kfree_skb_any(tstamp->pending_ts_pkt.skb); + tstamp->pending_ts_pkt.ptr = NULL; + } else if (tstamp->type == IGC_TX_BUFFER_TYPE_XSK) { + struct xdp_meta_generic___igc *hints; + struct xsk_buff_pool *pool; + struct xdp_desc xdp_desc; + + pool = tstamp->xsk_pool; + xdp_desc = tstamp->pending_ts_pkt.xsk_desc; + hints = (struct xdp_meta_generic___igc *) + ((char *)xsk_buff_raw_get_data(pool, xdp_desc.addr) + - sizeof(*hints)); + hints->tx_tstamp = shhwtstamps.hwtstamp; + hints->btf_id = adapter->btf_id; + } } spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); @@ -883,8 +901,10 @@ static void igc_tx_tstamp_clear(struct igc_adapter *adapter) for (i = 0; i < IGC_MAX_TX_TSTAMP_TIMERS; i++) { struct igc_tx_timestamp_request *tstamp = &adapter->tx_tstamp[i]; - dev_kfree_skb_any(tstamp->skb); - tstamp->skb = NULL; + if (tstamp->pending_ts_pkt.ptr && tstamp->type == IGC_TX_BUFFER_TYPE_SKB) + dev_kfree_skb_any(tstamp->pending_ts_pkt.skb); + + tstamp->pending_ts_pkt.ptr = NULL; tstamp->start = 0; } } diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.h b/drivers/net/ethernet/intel/igc/igc_xdp.h index a49b3d965d505d..7a6932ac6ed800 100644 --- a/drivers/net/ethernet/intel/igc/igc_xdp.h +++ b/drivers/net/ethernet/intel/igc/igc_xdp.h @@ -15,6 +15,7 @@ static inline bool igc_xdp_is_enabled(struct igc_adapter *adapter) } struct xdp_meta_generic___igc { + u64 tx_tstamp; u64 tstamp; u32 btf_id; } __packed; From 46d8aa05c384bb3d95002b85ca7ad8e1cea5fdca Mon Sep 17 00:00:00 2001 From: Ederson de Souza Date: Wed, 23 Jun 2021 14:11:01 -0700 Subject: [PATCH 20/24] ethtool,igc: Add "xdp_headroom" driver info This information can be used by user space applications to determine how much headroom is needed for the XDP frame. igc driver is also changed to add this new information. Signed-off-by: Ederson de Souza --- drivers/net/ethernet/intel/igc/igc_ethtool.c | 2 ++ include/uapi/linux/ethtool.h | 3 +++ 2 files changed, 5 insertions(+) diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index fa4171860623f7..70210e52fa77bc 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -8,6 +8,7 @@ #include "igc.h" #include "igc_diag.h" +#include "igc_xdp.h" /* forward declaration */ struct igc_stats { @@ -156,6 +157,7 @@ static void igc_ethtool_get_drvinfo(struct net_device *netdev, sizeof(drvinfo->bus_info)); drvinfo->n_priv_flags = IGC_PRIV_FLAGS_STR_LEN; + drvinfo->xdp_headroom = XDP_PACKET_HEADROOM; } static int igc_ethtool_get_regs_len(struct net_device *netdev) diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 67aa7134b3019e..dcf14ad4dccd79 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -176,6 +176,8 @@ static inline __u32 ethtool_cmd_speed(const struct ethtool_cmd *ep) * and %ETHTOOL_SEEPROM commands, in bytes * @regdump_len: Size of register dump returned by the %ETHTOOL_GREGS * command, in bytes + * @xdp_headroom: Size of minimum XDP headroom needed by the driver + * to fill with metadata information. * * Users can use the %ETHTOOL_GSSET_INFO command to get the number of * strings in any string set (from Linux 2.6.34). @@ -197,6 +199,7 @@ struct ethtool_drvinfo { __u32 testinfo_len; __u32 eedump_len; __u32 regdump_len; + __u32 xdp_headroom; }; #define SOPASS_MAX 6 From 01a8c287bb882592dc66dc0c1caccd5b9aef5c56 Mon Sep 17 00:00:00 2001 From: Ederson de Souza Date: Wed, 23 Jun 2021 14:05:30 -0700 Subject: [PATCH 21/24] libbpf: Helpers to access XDP frame metadata Two new pairs of helpers: `xsk_umem__adjust_prod_data` and `xsk_umem__adjust_prod_data_meta` for data that is being produced by the application - such as data that will be sent; and `xsk_umem__adjust_cons_data` and `xsk_umem__adjust_cons_data_meta`, for data being consumed - such as data obtained from the completion queue. Those function should usually be used on data obtained via `xsk_umem__get_data`. Didn't change this function to avoid API breaks. Signed-off-by: Ederson de Souza --- tools/lib/bpf/libbpf.map | 4 ++++ tools/lib/bpf/xsk.c | 26 ++++++++++++++++++++++++++ tools/lib/bpf/xsk.h | 7 +++++++ 3 files changed, 37 insertions(+) diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index be4157c58bdb3e..22cbaf27da6143 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -377,4 +377,8 @@ LIBBPF_0.5.0 { btf__load_from_kernel_by_id_split; btf__load_into_kernel; libbpf_set_strict_mode; + xsk_umem__adjust_cons_data; + xsk_umem__adjust_cons_data_meta; + xsk_umem__adjust_prod_data; + xsk_umem__adjust_prod_data_meta; } LIBBPF_0.4.0; diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index e9b619aa0cdf3b..17e8045eac0e66 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -119,6 +119,30 @@ int xsk_socket__fd(const struct xsk_socket *xsk) return xsk ? xsk->fd : -EINVAL; } +void *xsk_umem__adjust_prod_data(void *umem_data, const struct xsk_umem *umem) +{ + return umem_data + umem->config.frame_headroom + umem->config.xdp_headroom; +} + +void *xsk_umem__adjust_prod_data_meta(void *umem_data, const struct xsk_umem *umem) +{ + if (!umem->config.xdp_headroom) + return NULL; + return umem_data; +} + +void *xsk_umem__adjust_cons_data(void *umem_data, const struct xsk_umem *umem) +{ + return umem_data; +} + +void *xsk_umem__adjust_cons_data_meta(void *umem_data, const struct xsk_umem *umem) +{ + if (!umem->config.xdp_headroom) + return NULL; + return umem_data; +} + static bool xsk_page_aligned(void *buffer) { unsigned long addr = (unsigned long)buffer; @@ -135,6 +159,7 @@ static void xsk_set_umem_config(struct xsk_umem_config *cfg, cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM; cfg->flags = XSK_UMEM__DEFAULT_FLAGS; + cfg->xdp_headroom = XSK_UMEM__DEFAULT_XDP_HEADROOM; return; } @@ -143,6 +168,7 @@ static void xsk_set_umem_config(struct xsk_umem_config *cfg, cfg->frame_size = usr_cfg->frame_size; cfg->frame_headroom = usr_cfg->frame_headroom; cfg->flags = usr_cfg->flags; + cfg->xdp_headroom = usr_cfg->xdp_headroom; } static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h index 01c12dca9c100f..7f414315074646 100644 --- a/tools/lib/bpf/xsk.h +++ b/tools/lib/bpf/xsk.h @@ -248,12 +248,18 @@ static inline __u64 xsk_umem__add_offset_to_addr(__u64 addr) LIBBPF_API int xsk_umem__fd(const struct xsk_umem *umem); LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk); +LIBBPF_API void *xsk_umem__adjust_prod_data(void *umem_data, const struct xsk_umem *umem); +LIBBPF_API void *xsk_umem__adjust_prod_data_meta(void *umem_data, const struct xsk_umem *umem); +LIBBPF_API void *xsk_umem__adjust_cons_data(void *umem_data, const struct xsk_umem *umem); +LIBBPF_API void *xsk_umem__adjust_cons_data_meta(void *umem_data, const struct xsk_umem *umem); + #define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048 #define XSK_RING_PROD__DEFAULT_NUM_DESCS 2048 #define XSK_UMEM__DEFAULT_FRAME_SHIFT 12 /* 4096 bytes */ #define XSK_UMEM__DEFAULT_FRAME_SIZE (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT) #define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0 #define XSK_UMEM__DEFAULT_FLAGS 0 +#define XSK_UMEM__DEFAULT_XDP_HEADROOM 0 struct xsk_umem_config { __u32 fill_size; @@ -261,6 +267,7 @@ struct xsk_umem_config { __u32 frame_size; __u32 frame_headroom; __u32 flags; + __u32 xdp_headroom; }; LIBBPF_API int xsk_setup_xdp_prog(int ifindex, From fe782faadf1ae7887bd62ed00962f0c90cf879c3 Mon Sep 17 00:00:00 2001 From: Ederson de Souza Date: Tue, 20 Jul 2021 10:38:09 -0700 Subject: [PATCH 22/24] libbpf: Helpers to access XDP hints based on BTF definitions A new set of functions to help get the BTF definition of XDP hints structure and get the information based on it. `xsk_umem__btf_id` helps retrieve the BTF id of XDP metadata. `xsk_btf__init` sets up a context based on the BTF, including a hashmap, so that subsequent queries are faster. `xsk_btf__read` returns a pointer to the position in the XDP metadata containing a given field. `xsk_btf__has_field` checks the presence of a field in the BTF. `xsk_btf__free` frees up the context. Besides those, a macro `XSK_BTF_READ_INTO` acts as a convenient helper to read the field contents into a given variable. Note that currently, the hashmap used to speed-up offset location into the BTF doesn't use the field name as a string as key to the hashmap. It directly uses the pointer value instead, as it is expected that most of time, field names will be addressed by a shared constant string residing on read-only memory, thus saving some time. If this assumption is not entirely true, this optimisation needs to be rethought (or discarded altogether). Signed-off-by: Ederson de Souza --- tools/lib/bpf/libbpf.map | 5 + tools/lib/bpf/xsk.c | 226 +++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/xsk.h | 15 +++ 3 files changed, 246 insertions(+) diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 22cbaf27da6143..be6b05a603ae74 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -377,8 +377,13 @@ LIBBPF_0.5.0 { btf__load_from_kernel_by_id_split; btf__load_into_kernel; libbpf_set_strict_mode; + xsk_btf__init; + xsk_btf__read; + xsk_btf__has_field; + xsk_btf__free; xsk_umem__adjust_cons_data; xsk_umem__adjust_cons_data_meta; xsk_umem__adjust_prod_data; xsk_umem__adjust_prod_data_meta; + xsk_umem__btf_id; } LIBBPF_0.4.0; diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 17e8045eac0e66..510968542e261f 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -31,6 +31,7 @@ #include #include "bpf.h" +#include "hashmap.h" #include "libbpf.h" #include "libbpf_internal.h" #include "xsk.h" @@ -143,6 +144,14 @@ void *xsk_umem__adjust_cons_data_meta(void *umem_data, const struct xsk_umem *um return umem_data; } +int xsk_umem__btf_id(void *umem_data, const struct xsk_umem *umem) +{ + if (umem->config.xdp_headroom < sizeof(int)) + return -EINVAL; + + return *(int *)(umem_data - sizeof(int)); +} + static bool xsk_page_aligned(void *buffer) { unsigned long addr = (unsigned long)buffer; @@ -1290,3 +1299,220 @@ void xsk_socket__delete(struct xsk_socket *xsk) close(xsk->fd); free(xsk); } + +struct xsk_btf_info { + struct hashmap map; + struct btf *base; + struct btf *btf; + const struct btf_type *type; +}; + +struct xsk_btf_entry { + __u32 offset; + __u32 size; +}; + +static void __xsk_btf_free_hash(struct xsk_btf_info *xbi) +{ + struct hashmap_entry *entry; + int i; + + hashmap__for_each_entry((&(xbi->map)), entry, i) { + free(entry->value); + } + hashmap__clear(&(xbi->map)); +} + +static size_t __xsk_hash_fn(const void *key, void *ctx) +{ + return (size_t)key; +} + +static bool __xsk_equal_fn(const void *k1, const void *k2, void *ctx) +{ + return k1 == k2; +} + +static bool __xsk_btf_match_md_name(const char *name) +{ + const char *md_name = "xdp_meta_generic"; + size_t s1, s2; + + s1 = strlen(name); + s2 = strlen(md_name); + + if (s1 < s2) + return false; + + if (!strcmp(name, md_name)) + return true; + + if (s1 > (s2 + 3)) { + return !strncmp(name, md_name, s2) + && name[s2] == '_' + && name[s2 + 1] == '_' + && name[s2 + 2] == '_'; + } + + return false; +} + +static const struct btf_type *__xsk_btf_find_md_btf(struct btf *btf) +{ + const struct btf_type *t; + const char *name; + __u32 nr_types; + int i; + + nr_types = btf__get_nr_types(btf); + /* 0th type is void, we must ignore it */ + for (i = 1; i < nr_types; i++) { + t = btf__type_by_id(btf, i); + name = btf__name_by_offset(btf, t->name_off); + if (name && __xsk_btf_match_md_name(name)) + return t; + } + + return NULL; +} + +int xsk_btf__init(__u32 btf_id, struct xsk_btf_info **xbi) +{ + const struct btf_member *m; + const struct btf_type *t; + struct btf *btf, *base; + unsigned short vlen; + int i, ret = 0; + + if (!xbi) + return -EINVAL; + + base = btf__parse("/sys/kernel/btf/vmlinux", NULL); + if (!base) + return -ENOENT; + + btf = btf__load_from_kernel_by_id_split(btf_id, base); + ret = libbpf_get_error(btf); + if (ret) + goto error_load; + + t = __xsk_btf_find_md_btf(btf); + if (!t) { + ret = -ENOENT; + goto error_btf; + } + + *xbi = malloc(sizeof(**xbi)); + if (!*xbi) { + ret = -ENOMEM; + goto error_btf; + } + + hashmap__init(&(*xbi)->map, __xsk_hash_fn, __xsk_equal_fn, NULL); + + /* Validate no BTF field is a bitfield */ + m = btf_members(t); + vlen = BTF_INFO_VLEN(t->info); + for (i = 0; i < vlen; i++, m++) { + if (BTF_MEMBER_BITFIELD_SIZE(m->offset)) { + ret = -ENOTSUP; + goto error_entry; + } + } + + (*xbi)->base = base; + (*xbi)->btf = btf; + (*xbi)->type = t; + + return ret; + +error_entry: + __xsk_btf_free_hash(*xbi); + free(*xbi); + +error_btf: + btf__free(btf); + +error_load: + btf__free(base); + return ret; +} + +static int __xsk_btf_field_entry(struct xsk_btf_info *xbi, const char *field, + struct xsk_btf_entry **entry) +{ + const struct btf_member *m; + unsigned short vlen; + int i; + + m = btf_members(xbi->type); + vlen = BTF_INFO_VLEN(xbi->type->info); + for (i = 0; i < vlen; i++, m++) { + const struct btf_type *member_type; + const char *name = btf__name_by_offset(xbi->btf, m->name_off); + int type_id; + + if (strcmp(name, field)) + continue; + + if (entry) { + type_id = btf__resolve_type(xbi->btf, m->type); + member_type = btf__type_by_id(xbi->btf, type_id); + *entry = malloc(sizeof(*entry)); + if (!entry) + return -ENOMEM; + + /* As we bail out at init for bit fields, there should + * be no entries whose offset is not a multiple of byte + */ + (*entry)->offset = BTF_MEMBER_BIT_OFFSET(m->offset) / 8; + (*entry)->size = member_type->size; + } + return 0; + } + + return -ENOENT; +} + +bool xsk_btf__has_field(const char *field, struct xsk_btf_info *xbi) +{ + if (!xbi) + return false; + + return __xsk_btf_field_entry(xbi, field, NULL); +} + +void xsk_btf__free(struct xsk_btf_info *xbi) +{ + if (!xbi) + return; + + __xsk_btf_free_hash(xbi); + btf__free(xbi->btf); + btf__free(xbi->base); + free(xbi); +} + +int xsk_btf__read(void **dest, size_t size, const char *field, struct xsk_btf_info *xbi, + const void *addr) +{ + struct xsk_btf_entry *entry; + int err; + + if (!field || !xbi || !dest || !addr) + return -EINVAL; + + if (!hashmap__find(&(xbi->map), field, (void **)&entry)) { + err = __xsk_btf_field_entry(xbi, field, &entry); + if (err) + return err; + + hashmap__add(&(xbi->map), field, entry); + } + + if (entry->size != size) + return -EINVAL; + + *dest = (void *)((char *)addr - xbi->type->size + entry->offset); + return 0; +} diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h index 7f414315074646..b0bddc70c5a667 100644 --- a/tools/lib/bpf/xsk.h +++ b/tools/lib/bpf/xsk.h @@ -253,6 +253,8 @@ LIBBPF_API void *xsk_umem__adjust_prod_data_meta(void *umem_data, const struct x LIBBPF_API void *xsk_umem__adjust_cons_data(void *umem_data, const struct xsk_umem *umem); LIBBPF_API void *xsk_umem__adjust_cons_data_meta(void *umem_data, const struct xsk_umem *umem); +LIBBPF_API int xsk_umem__btf_id(void *umem_data, const struct xsk_umem *umem); + #define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048 #define XSK_RING_PROD__DEFAULT_NUM_DESCS 2048 #define XSK_UMEM__DEFAULT_FRAME_SHIFT 12 /* 4096 bytes */ @@ -322,6 +324,19 @@ xsk_socket__create_shared(struct xsk_socket **xsk_ptr, LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem); LIBBPF_API void xsk_socket__delete(struct xsk_socket *xsk); +struct xsk_btf_info; + +LIBBPF_API int xsk_btf__init(__u32 btf_id, struct xsk_btf_info **xbi); +LIBBPF_API int xsk_btf__read(void **dest, size_t size, const char *field, struct xsk_btf_info *xbi, + const void *addr); +LIBBPF_API bool xsk_btf__has_field(const char *field, struct xsk_btf_info *xbi); +LIBBPF_API void xsk_btf__free(struct xsk_btf_info *xbi); + +#define XSK_BTF_READ_INTO(dest, field, xbi, addr) ({ \ + typeof(dest) *_d; \ + xsk_btf__read((void **)&_d, sizeof(dest), #field, xbi, addr); \ + dest = *_d; }) + #ifdef __cplusplus } /* extern "C" */ #endif From 613b86c11907bf3532e49ed9eaff04014cbed05b Mon Sep 17 00:00:00 2001 From: Ederson de Souza Date: Wed, 23 Jun 2021 14:12:53 -0700 Subject: [PATCH 23/24] samples/bpf: XDP hints AF_XDP example Using -D option, xdpsock now shows the RX or TX timestamp of last sent/received packets (for rx only or tx only modes). Signed-off-by: Ederson de Souza --- samples/bpf/xdpsock_user.c | 130 +++++++++++++++++++++++++++++++++++-- 1 file changed, 126 insertions(+), 4 deletions(-) diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 33d0bdebbed81d..6b1a214d0cf979 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -7,11 +7,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -25,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -99,6 +102,7 @@ static u32 opt_num_xsks = 1; static u32 prog_id; static bool opt_busy_poll; static bool opt_reduced_cap; +static bool opt_metadata; struct xsk_ring_stats { unsigned long rx_npkts; @@ -142,6 +146,14 @@ struct xsk_umem_info { struct xsk_ring_cons cq; struct xsk_umem *umem; void *buffer; + u32 frame_headroom; +}; + +struct xsk_metadata { + struct xsk_btf_info *xbi; + unsigned long rx_timestamp; + unsigned long tx_timestamp; + u32 btf_id; }; struct xsk_socket_info { @@ -152,6 +164,7 @@ struct xsk_socket_info { struct xsk_ring_stats ring_stats; struct xsk_app_stats app_stats; struct xsk_driver_stats drv_stats; + struct xsk_metadata metadata; u32 outstanding_tx; }; @@ -159,6 +172,32 @@ static int num_socks; struct xsk_socket_info *xsks[MAX_SOCKS]; int sock; +static u32 get_xdp_headroom(void) +{ + struct ethtool_drvinfo drvinfo = { .cmd = ETHTOOL_GDRVINFO }; + struct ifreq ifr = {}; + int fd, err, ret; + + fd = socket(AF_LOCAL, SOCK_DGRAM, 0); + if (fd < 0) + return 0; + + ifr.ifr_data = (void *)&drvinfo; + memcpy(ifr.ifr_name, opt_if, strlen(opt_if) + 1); + err = ioctl(fd, SIOCETHTOOL, &ifr); + + if (err) { + ret = 0; + goto out; + } + + ret = drvinfo.xdp_headroom; + +out: + close(fd); + return ret; +} + static unsigned long get_nsecs(void) { struct timespec ts; @@ -258,6 +297,34 @@ static void dump_app_stats(long dt) } } +static struct xsk_btf_info *init_xsk_metadata_info(u32 btf_id) +{ + struct xsk_btf_info *xbi; + + if (xsk_btf__init(btf_id, &xbi) < 0) + return NULL; + + return xbi; +} + +static void save_metadata_tx(void *meta, struct xsk_socket_info *xsk) +{ + if (!meta) + return; + + XSK_BTF_READ_INTO(xsk->metadata.tx_timestamp, + tx_tstamp, xsk->metadata.xbi, meta); +} + +static void save_metadata_rx(void *meta, struct xsk_socket_info *xsk) +{ + if (!meta) + return; + + XSK_BTF_READ_INTO(xsk->metadata.rx_timestamp, + tstamp, xsk->metadata.xbi, meta); +} + static bool get_interrupt_number(void) { FILE *f_int_proc; @@ -432,6 +499,12 @@ static void dump_stats(void) printf("%-15s\n", "Error retrieving extra stats"); } } + + if (opt_metadata) { + printf("Last TX time: %lu\n", xsks[i]->metadata.tx_timestamp); + printf("Last RX time: %lu\n", xsks[i]->metadata.rx_timestamp); + } + } if (opt_app_stats) @@ -798,8 +871,10 @@ static void gen_eth_hdr_data(void) static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr) { - memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, - PKT_SIZE); + void *data = xsk_umem__get_data(umem->buffer, addr); + + data = xsk_umem__adjust_prod_data(data, umem->umem); + memcpy(data, pkt_data, PKT_SIZE); } static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size) @@ -819,6 +894,7 @@ static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size) .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, .frame_size = opt_xsk_frame_size, .frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM, + .xdp_headroom = get_xdp_headroom(), .flags = opt_umem_flags }; int ret; @@ -833,6 +909,7 @@ static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size) exit_with_error(-ret); umem->buffer = buffer; + umem->frame_headroom = cfg.frame_headroom; return umem; } @@ -927,6 +1004,7 @@ static struct option long_options[] = { {"irq-string", no_argument, 0, 'I'}, {"busy-poll", no_argument, 0, 'B'}, {"reduce-cap", no_argument, 0, 'R'}, + {"metadata", no_argument, 0, 'D'}, {0, 0, 0, 0} }; @@ -967,6 +1045,7 @@ static void usage(const char *prog) " -I, --irq-string Display driver interrupt statistics for interface associated with irq-string.\n" " -B, --busy-poll Busy poll.\n" " -R, --reduce-cap Use reduced capabilities (cannot be used with -M)\n" + " -D, --metadata Display latest packet metadata\n" "\n"; fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE, opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE, @@ -982,7 +1061,7 @@ static void parse_command_line(int argc, char **argv) opterr = 0; for (;;) { - c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQaI:BR", + c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQaI:BRD", long_options, &option_index); if (c == -1) break; @@ -1087,6 +1166,10 @@ static void parse_command_line(int argc, char **argv) case 'R': opt_reduced_cap = true; break; + case 'D': + opt_metadata = true; + opt_xdp_flags |= XDP_FLAGS_USE_METADATA; + break; default: usage(basename(argv[0])); } @@ -1193,6 +1276,25 @@ static inline void complete_tx_only(struct xsk_socket_info *xsk, rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx); if (rcvd > 0) { + if (opt_metadata) { + __u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx); + char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr); + __u32 btf_id = xsk_umem__btf_id(pkt, xsk->umem->umem); + + if (btf_id > 0) { + if (!xsk->metadata.xbi) { + xsk->metadata.xbi = init_xsk_metadata_info(btf_id); + if (xsk->metadata.xbi) + xsk->metadata.btf_id = btf_id; + } + if (xsk->metadata.btf_id == btf_id) { + void *m; + + m = xsk_umem__adjust_cons_data_meta(pkt, xsk->umem->umem); + save_metadata_tx(m, xsk); + } + } + } xsk_ring_cons__release(&xsk->umem->cq, rcvd); xsk->outstanding_tx -= rcvd; } @@ -1232,6 +1334,24 @@ static void rx_drop(struct xsk_socket_info *xsk) addr = xsk_umem__add_offset_to_addr(addr); char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr); + if (opt_metadata) { + __u32 btf_id = xsk_umem__btf_id(pkt, xsk->umem->umem); + + if (btf_id > 0) { + if (!xsk->metadata.xbi) { + xsk->metadata.xbi = init_xsk_metadata_info(btf_id); + if (xsk->metadata.xbi) + xsk->metadata.btf_id = btf_id; + } + if (xsk->metadata.btf_id == btf_id) { + void *m; + + m = xsk_umem__adjust_cons_data_meta(pkt, xsk->umem->umem); + save_metadata_rx(m, xsk); + } + } + } + hex_dump(pkt, len, addr); *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig; } @@ -1283,7 +1403,9 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size) for (i = 0; i < batch_size; i++) { struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i); - tx_desc->addr = (*frame_nb + i) * opt_xsk_frame_size; + tx_desc->addr = (__u64)xsk_umem__adjust_prod_data( + (void *)(__u64)((*frame_nb + i) * opt_xsk_frame_size), + xsk->umem->umem); tx_desc->len = PKT_SIZE; } From dd4f41b24f96438b64092cc559a69045e3f9781c Mon Sep 17 00:00:00 2001 From: Ederson de Souza Date: Tue, 21 Sep 2021 12:22:08 -0700 Subject: [PATCH 24/24] libbpf,xdp,igc: Use union to split XDP TX and RX metadata Assuming that XDP metadata related to RX does not overlap with metadata related to TX, this patch splits TX and RX data inside xdp_meta_generic into different anonymous structs united by another anonymous union. This allows more data to fit in the 32 bytes target size for xdp_meta_generic. With help of BTF CO-RE, this is transparent for BPF applications. For AF_XDP ones, libbpf has been modified to account for the new layout. The flip side is that holes appear in the struct, and one has to carefully take care of padding, so that `btf_id` field is still at the very end of struct. Signed-off-by: Ederson de Souza --- drivers/net/ethernet/intel/igc/igc_main.c | 15 +++--- drivers/net/ethernet/intel/igc/igc_ptp.c | 4 +- include/net/xdp.h | 31 ++++++++----- tools/lib/bpf/xsk.c | 56 ++++++++++++----------- 4 files changed, 59 insertions(+), 47 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 39b691e6805156..2c61e67ecad94b 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -2329,7 +2329,7 @@ static void igc_finalize_xdp(struct igc_adapter *adapter, int status) void igc_clean_btf_id(void *addr) { - struct xdp_meta_generic___igc *hints; + struct xdp_meta_generic *hints; hints = addr - sizeof(*hints); hints->btf_id = 0; @@ -2404,10 +2404,11 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) adapter->btf_enabled); if (adapter->btf_enabled) { - struct xdp_meta_generic___igc *hints; + struct xdp_meta_generic *hints; hints = xdp.data - sizeof(*hints); xdp.data_meta = hints; + hints->tx_tstamp = -1; hints->tstamp = timestamp; hints->btf_id = adapter->btf_id; } else { @@ -2580,7 +2581,7 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) size -= IGC_TS_HDR_LEN; if (adapter->btf_enabled) { - struct xdp_meta_generic___igc *hints; + struct xdp_meta_generic *hints; hints = bi->xdp->data - sizeof(*hints); bi->xdp->data_meta = hints; @@ -2680,11 +2681,11 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring) if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && adapter->btf_enabled) { union igc_pending_ts_pkt ts_pkt; - struct xdp_meta_generic___igc *hints; + struct xdp_meta_generic *hints; u32 tstamp_flags; /* Ensure there's no garbage on metadata */ - hints = (struct xdp_meta_generic___igc *) + hints = (struct xdp_meta_generic *) ((char *)xsk_buff_raw_get_data(pool, xdp_desc.addr) - sizeof(*hints)); spin_lock_irqsave(&adapter->ptp_tx_lock, flags); @@ -2759,11 +2760,11 @@ static bool igc_xsk_complete_tx_tstamp(struct igc_adapter *adapter, struct xdp_desc xdp_desc = tstamp->pending_ts_pkt.xsk_desc; if (xdp_desc.addr == tx_buffer->xsk_desc.addr) { - struct xdp_meta_generic___igc *hints; + struct xdp_meta_generic *hints; struct xsk_buff_pool *pool; pool = tstamp->xsk_pool; - hints = (struct xdp_meta_generic___igc *) + hints = (struct xdp_meta_generic *) ((char *)xsk_buff_raw_get_data(pool, xdp_desc.addr) - sizeof(*hints)); if (!hints->tx_tstamp) { diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 51a6daf0302b4e..d1cbc907980da6 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -718,13 +718,13 @@ void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter, u32 mask) dev_kfree_skb_any(tstamp->pending_ts_pkt.skb); tstamp->pending_ts_pkt.ptr = NULL; } else if (tstamp->type == IGC_TX_BUFFER_TYPE_XSK) { - struct xdp_meta_generic___igc *hints; + struct xdp_meta_generic *hints; struct xsk_buff_pool *pool; struct xdp_desc xdp_desc; pool = tstamp->xsk_pool; xdp_desc = tstamp->pending_ts_pkt.xsk_desc; - hints = (struct xdp_meta_generic___igc *) + hints = (struct xdp_meta_generic *) ((char *)xsk_buff_raw_get_data(pool, xdp_desc.addr) - sizeof(*hints)); hints->tx_tstamp = shhwtstamps.hwtstamp; diff --git a/include/net/xdp.h b/include/net/xdp.h index 80427bdddb882c..c8b3a407f0d796 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -77,19 +77,26 @@ struct xdp_buff { }; struct xdp_meta_generic { - // Tx part - u32 flags; - u16 free_slot; - u16 csum_off; - u16 txcvid; - - // Rx part - u16 rxcvid; - u32 csum; - u32 hash; - u64 tstamp; - + union { + // Tx part + struct { + u16 free_slot; + u16 csum_off; + u16 txcvid; + u32 flags; + u64 tx_tstamp; + }; + + // Rx part + struct { + u16 rxcvid; + u32 csum; + u32 hash; + u64 tstamp; + }; + }; // BTF ID + u32 pad; u32 btf_id; } __packed __aligned(8); static_assert(sizeof(struct xdp_meta_generic) == 32); diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 510968542e261f..f0863643e27738 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -1378,11 +1378,9 @@ static const struct btf_type *__xsk_btf_find_md_btf(struct btf *btf) int xsk_btf__init(__u32 btf_id, struct xsk_btf_info **xbi) { - const struct btf_member *m; const struct btf_type *t; struct btf *btf, *base; - unsigned short vlen; - int i, ret = 0; + int ret = 0; if (!xbi) return -EINVAL; @@ -1410,26 +1408,12 @@ int xsk_btf__init(__u32 btf_id, struct xsk_btf_info **xbi) hashmap__init(&(*xbi)->map, __xsk_hash_fn, __xsk_equal_fn, NULL); - /* Validate no BTF field is a bitfield */ - m = btf_members(t); - vlen = BTF_INFO_VLEN(t->info); - for (i = 0; i < vlen; i++, m++) { - if (BTF_MEMBER_BITFIELD_SIZE(m->offset)) { - ret = -ENOTSUP; - goto error_entry; - } - } - (*xbi)->base = base; (*xbi)->btf = btf; (*xbi)->type = t; return ret; -error_entry: - __xsk_btf_free_hash(*xbi); - free(*xbi); - error_btf: btf__free(btf); @@ -1438,28 +1422,42 @@ int xsk_btf__init(__u32 btf_id, struct xsk_btf_info **xbi) return ret; } -static int __xsk_btf_field_entry(struct xsk_btf_info *xbi, const char *field, - struct xsk_btf_entry **entry) +static int __xsk_btf_field_find_entry(struct xsk_btf_info *xbi, const char *field, + struct xsk_btf_entry **entry, const struct btf_type *type) { const struct btf_member *m; unsigned short vlen; - int i; + int i, ret; - m = btf_members(xbi->type); - vlen = BTF_INFO_VLEN(xbi->type->info); + m = btf_members(type); + vlen = BTF_INFO_VLEN(type->info); for (i = 0; i < vlen; i++, m++) { const struct btf_type *member_type; const char *name = btf__name_by_offset(xbi->btf, m->name_off); - int type_id; + int type_id, kind; + + type_id = btf__resolve_type(xbi->btf, m->type); + member_type = btf__type_by_id(xbi->btf, type_id); + + kind = BTF_INFO_KIND(member_type->info); + if (kind == BTF_KIND_UNION || kind == BTF_KIND_STRUCT) { + ret = __xsk_btf_field_find_entry(xbi, field, entry, member_type); + if (ret == -ENOENT) + continue; + return ret; + } if (strcmp(name, field)) continue; if (entry) { - type_id = btf__resolve_type(xbi->btf, m->type); - member_type = btf__type_by_id(xbi->btf, type_id); + if (BTF_MEMBER_BITFIELD_SIZE(m->offset)) { + /* No support for bit fields for now */ + return -EOPNOTSUPP; + } + *entry = malloc(sizeof(*entry)); - if (!entry) + if (!*entry) return -ENOMEM; /* As we bail out at init for bit fields, there should @@ -1474,6 +1472,12 @@ static int __xsk_btf_field_entry(struct xsk_btf_info *xbi, const char *field, return -ENOENT; } +static int __xsk_btf_field_entry(struct xsk_btf_info *xbi, const char *field, + struct xsk_btf_entry **entry) +{ + return __xsk_btf_field_find_entry(xbi, field, entry, xbi->type); +} + bool xsk_btf__has_field(const char *field, struct xsk_btf_info *xbi) { if (!xbi)