|  | /* SPDX-License-Identifier: GPL-2.0-only */ | 
|  | /* Copyright (C) 2024-2025 Intel Corporation */ | 
|  |  | 
|  | #ifndef __LIBETH_RX_H | 
|  | #define __LIBETH_RX_H | 
|  |  | 
|  | #include <linux/if_vlan.h> | 
|  |  | 
|  | #include <net/page_pool/helpers.h> | 
|  | #include <net/xdp.h> | 
|  |  | 
|  | /* Rx buffer management */ | 
|  |  | 
|  | /* Space reserved in front of each frame */ | 
|  | #define LIBETH_SKB_HEADROOM	(NET_SKB_PAD + NET_IP_ALIGN) | 
|  | #define LIBETH_XDP_HEADROOM	(ALIGN(XDP_PACKET_HEADROOM, NET_SKB_PAD) + \ | 
|  | NET_IP_ALIGN) | 
|  | /* Maximum headroom for worst-case calculations */ | 
|  | #define LIBETH_MAX_HEADROOM	LIBETH_XDP_HEADROOM | 
|  | /* Link layer / L2 overhead: Ethernet, 2 VLAN tags (C + S), FCS */ | 
|  | #define LIBETH_RX_LL_LEN	(ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN) | 
|  | /* Maximum supported L2-L4 header length */ | 
|  | #define LIBETH_MAX_HEAD		roundup_pow_of_two(max(MAX_HEADER, 256)) | 
|  |  | 
|  | /* Always use order-0 pages */ | 
|  | #define LIBETH_RX_PAGE_ORDER	0 | 
|  | /* Pick a sane buffer stride and align to a cacheline boundary */ | 
|  | #define LIBETH_RX_BUF_STRIDE	SKB_DATA_ALIGN(128) | 
|  | /* HW-writeable space in one buffer: truesize - headroom/tailroom, aligned */ | 
|  | #define LIBETH_RX_PAGE_LEN(hr)						  \ | 
|  | ALIGN_DOWN(SKB_MAX_ORDER(hr, LIBETH_RX_PAGE_ORDER),		  \ | 
|  | LIBETH_RX_BUF_STRIDE) | 
|  |  | 
|  | /** | 
|  | * struct libeth_fqe - structure representing an Rx buffer (fill queue element) | 
|  | * @netmem: network memory reference holding the buffer | 
|  | * @offset: offset from the page start (to the headroom) | 
|  | * @truesize: total space occupied by the buffer (w/ headroom and tailroom) | 
|  | * | 
|  | * Depending on the MTU, API switches between one-page-per-frame and shared | 
|  | * page model (to conserve memory on bigger-page platforms). In case of the | 
|  | * former, @offset is always 0 and @truesize is always ```PAGE_SIZE```. | 
|  | */ | 
|  | struct libeth_fqe { | 
|  | netmem_ref		netmem; | 
|  | u32			offset; | 
|  | u32			truesize; | 
|  | } __aligned_largest; | 
|  |  | 
|  | /** | 
|  | * enum libeth_fqe_type - enum representing types of Rx buffers | 
|  | * @LIBETH_FQE_MTU: buffer size is determined by MTU | 
|  | * @LIBETH_FQE_SHORT: buffer size is smaller than MTU, for short frames | 
|  | * @LIBETH_FQE_HDR: buffer size is ```LIBETH_MAX_HEAD```-sized, for headers | 
|  | */ | 
|  | enum libeth_fqe_type { | 
|  | LIBETH_FQE_MTU		= 0U, | 
|  | LIBETH_FQE_SHORT, | 
|  | LIBETH_FQE_HDR, | 
|  | }; | 
|  |  | 
|  | /** | 
|  | * struct libeth_fq - structure representing a buffer (fill) queue | 
|  | * @fp: hotpath part of the structure | 
|  | * @pp: &page_pool for buffer management | 
|  | * @fqes: array of Rx buffers | 
|  | * @truesize: size to allocate per buffer, w/overhead | 
|  | * @count: number of descriptors/buffers the queue has | 
|  | * @type: type of the buffers this queue has | 
|  | * @hsplit: flag whether header split is enabled | 
|  | * @xdp: flag indicating whether XDP is enabled | 
|  | * @buf_len: HW-writeable length per each buffer | 
|  | * @nid: ID of the closest NUMA node with memory | 
|  | */ | 
|  | struct libeth_fq { | 
|  | struct_group_tagged(libeth_fq_fp, fp, | 
|  | struct page_pool	*pp; | 
|  | struct libeth_fqe	*fqes; | 
|  |  | 
|  | u32			truesize; | 
|  | u32			count; | 
|  | ); | 
|  |  | 
|  | /* Cold fields */ | 
|  | enum libeth_fqe_type	type:2; | 
|  | bool			hsplit:1; | 
|  | bool			xdp:1; | 
|  |  | 
|  | u32			buf_len; | 
|  | int			nid; | 
|  | }; | 
|  |  | 
|  | int libeth_rx_fq_create(struct libeth_fq *fq, struct napi_struct *napi); | 
|  | void libeth_rx_fq_destroy(struct libeth_fq *fq); | 
|  |  | 
|  | /** | 
|  | * libeth_rx_alloc - allocate a new Rx buffer | 
|  | * @fq: fill queue to allocate for | 
|  | * @i: index of the buffer within the queue | 
|  | * | 
|  | * Return: DMA address to be passed to HW for Rx on successful allocation, | 
|  | * ```DMA_MAPPING_ERROR``` otherwise. | 
|  | */ | 
|  | static inline dma_addr_t libeth_rx_alloc(const struct libeth_fq_fp *fq, u32 i) | 
|  | { | 
|  | struct libeth_fqe *buf = &fq->fqes[i]; | 
|  |  | 
|  | buf->truesize = fq->truesize; | 
|  | buf->netmem = page_pool_dev_alloc_netmem(fq->pp, &buf->offset, | 
|  | &buf->truesize); | 
|  | if (unlikely(!buf->netmem)) | 
|  | return DMA_MAPPING_ERROR; | 
|  |  | 
|  | return page_pool_get_dma_addr_netmem(buf->netmem) + buf->offset + | 
|  | fq->pp->p.offset; | 
|  | } | 
|  |  | 
|  | void libeth_rx_recycle_slow(netmem_ref netmem); | 
|  |  | 
|  | /** | 
|  | * libeth_rx_sync_for_cpu - synchronize or recycle buffer post DMA | 
|  | * @fqe: buffer to process | 
|  | * @len: frame length from the descriptor | 
|  | * | 
|  | * Process the buffer after it's written by HW. The regular path is to | 
|  | * synchronize DMA for CPU, but in case of no data it will be immediately | 
|  | * recycled back to its PP. | 
|  | * | 
|  | * Return: true when there's data to process, false otherwise. | 
|  | */ | 
|  | static inline bool libeth_rx_sync_for_cpu(const struct libeth_fqe *fqe, | 
|  | u32 len) | 
|  | { | 
|  | netmem_ref netmem = fqe->netmem; | 
|  |  | 
|  | /* Very rare, but possible case. The most common reason: | 
|  | * the last fragment contained FCS only, which was then | 
|  | * stripped by the HW. | 
|  | */ | 
|  | if (unlikely(!len)) { | 
|  | libeth_rx_recycle_slow(netmem); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | page_pool_dma_sync_netmem_for_cpu(netmem_get_pp(netmem), netmem, | 
|  | fqe->offset, len); | 
|  |  | 
|  | return true; | 
|  | } | 
|  |  | 
|  | /* Converting abstract packet type numbers into a software structure with | 
|  | * the packet parameters to do O(1) lookup on Rx. | 
|  | */ | 
|  |  | 
|  | enum { | 
|  | LIBETH_RX_PT_OUTER_L2			= 0U, | 
|  | LIBETH_RX_PT_OUTER_IPV4, | 
|  | LIBETH_RX_PT_OUTER_IPV6, | 
|  | }; | 
|  |  | 
|  | enum { | 
|  | LIBETH_RX_PT_NOT_FRAG			= 0U, | 
|  | LIBETH_RX_PT_FRAG, | 
|  | }; | 
|  |  | 
|  | enum { | 
|  | LIBETH_RX_PT_TUNNEL_IP_NONE		= 0U, | 
|  | LIBETH_RX_PT_TUNNEL_IP_IP, | 
|  | LIBETH_RX_PT_TUNNEL_IP_GRENAT, | 
|  | LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC, | 
|  | LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC_VLAN, | 
|  | }; | 
|  |  | 
|  | enum { | 
|  | LIBETH_RX_PT_TUNNEL_END_NONE		= 0U, | 
|  | LIBETH_RX_PT_TUNNEL_END_IPV4, | 
|  | LIBETH_RX_PT_TUNNEL_END_IPV6, | 
|  | }; | 
|  |  | 
|  | enum { | 
|  | LIBETH_RX_PT_INNER_NONE			= 0U, | 
|  | LIBETH_RX_PT_INNER_UDP, | 
|  | LIBETH_RX_PT_INNER_TCP, | 
|  | LIBETH_RX_PT_INNER_SCTP, | 
|  | LIBETH_RX_PT_INNER_ICMP, | 
|  | LIBETH_RX_PT_INNER_TIMESYNC, | 
|  | }; | 
|  |  | 
|  | #define LIBETH_RX_PT_PAYLOAD_NONE		PKT_HASH_TYPE_NONE | 
|  | #define LIBETH_RX_PT_PAYLOAD_L2			PKT_HASH_TYPE_L2 | 
|  | #define LIBETH_RX_PT_PAYLOAD_L3			PKT_HASH_TYPE_L3 | 
|  | #define LIBETH_RX_PT_PAYLOAD_L4			PKT_HASH_TYPE_L4 | 
|  |  | 
|  | struct libeth_rx_pt { | 
|  | u32					outer_ip:2; | 
|  | u32					outer_frag:1; | 
|  | u32					tunnel_type:3; | 
|  | u32					tunnel_end_prot:2; | 
|  | u32					tunnel_end_frag:1; | 
|  | u32					inner_prot:3; | 
|  | enum pkt_hash_types			payload_layer:2; | 
|  |  | 
|  | u32					pad:2; | 
|  | enum xdp_rss_hash_type			hash_type:16; | 
|  | }; | 
|  |  | 
|  | /** | 
|  | * struct libeth_rx_csum - checksum offload bits decoded from the Rx descriptor | 
|  | * @l3l4p: detectable L3 and L4 integrity check is processed by the hardware | 
|  | * @ipe: IP checksum error | 
|  | * @eipe: external (outermost) IP header (only for tunels) | 
|  | * @eudpe: external (outermost) UDP checksum error (only for tunels) | 
|  | * @ipv6exadd: IPv6 header with extension headers | 
|  | * @l4e: L4 integrity error | 
|  | * @pprs: set for packets that skip checksum calculation in the HW pre parser | 
|  | * @nat: the packet is a UDP tunneled packet | 
|  | * @raw_csum_valid: set if raw checksum is valid | 
|  | * @pad: padding to naturally align raw_csum field | 
|  | * @raw_csum: raw checksum | 
|  | */ | 
|  | struct libeth_rx_csum { | 
|  | u32					l3l4p:1; | 
|  | u32					ipe:1; | 
|  | u32					eipe:1; | 
|  | u32					eudpe:1; | 
|  | u32					ipv6exadd:1; | 
|  | u32					l4e:1; | 
|  | u32					pprs:1; | 
|  | u32					nat:1; | 
|  |  | 
|  | u32					raw_csum_valid:1; | 
|  | u32					pad:7; | 
|  | u32					raw_csum:16; | 
|  | }; | 
|  |  | 
|  | /** | 
|  | * struct libeth_rqe_info - receive queue element info | 
|  | * @len: packet length | 
|  | * @ptype: packet type based on types programmed into the device | 
|  | * @eop: whether it's the last fragment of the packet | 
|  | * @rxe: MAC errors: CRC, Alignment, Oversize, Undersizes, Length error | 
|  | * @vlan: C-VLAN or S-VLAN tag depending on the VLAN offload configuration | 
|  | */ | 
|  | struct libeth_rqe_info { | 
|  | u32					len; | 
|  |  | 
|  | u32					ptype:14; | 
|  | u32					eop:1; | 
|  | u32					rxe:1; | 
|  |  | 
|  | u32					vlan:16; | 
|  | }; | 
|  |  | 
|  | void libeth_rx_pt_gen_hash_type(struct libeth_rx_pt *pt); | 
|  |  | 
|  | /** | 
|  | * libeth_rx_pt_get_ip_ver - get IP version from a packet type structure | 
|  | * @pt: packet type params | 
|  | * | 
|  | * Wrapper to compile out the IPv6 code from the drivers when not supported | 
|  | * by the kernel. | 
|  | * | 
|  | * Return: @pt.outer_ip or stub for IPv6 when not compiled-in. | 
|  | */ | 
|  | static inline u32 libeth_rx_pt_get_ip_ver(struct libeth_rx_pt pt) | 
|  | { | 
|  | #if !IS_ENABLED(CONFIG_IPV6) | 
|  | switch (pt.outer_ip) { | 
|  | case LIBETH_RX_PT_OUTER_IPV4: | 
|  | return LIBETH_RX_PT_OUTER_IPV4; | 
|  | default: | 
|  | return LIBETH_RX_PT_OUTER_L2; | 
|  | } | 
|  | #else | 
|  | return pt.outer_ip; | 
|  | #endif | 
|  | } | 
|  |  | 
|  | /* libeth_has_*() can be used to quickly check whether the HW metadata is | 
|  | * available to avoid further expensive processing such as descriptor reads. | 
|  | * They already check for the corresponding netdev feature to be enabled, | 
|  | * thus can be used as drop-in replacements. | 
|  | */ | 
|  |  | 
|  | static inline bool libeth_rx_pt_has_checksum(const struct net_device *dev, | 
|  | struct libeth_rx_pt pt) | 
|  | { | 
|  | /* Non-zero _INNER* is only possible when _OUTER_IPV* is set, | 
|  | * it is enough to check only for the L4 type. | 
|  | */ | 
|  | return likely(pt.inner_prot > LIBETH_RX_PT_INNER_NONE && | 
|  | (dev->features & NETIF_F_RXCSUM)); | 
|  | } | 
|  |  | 
|  | static inline bool libeth_rx_pt_has_hash(const struct net_device *dev, | 
|  | struct libeth_rx_pt pt) | 
|  | { | 
|  | return likely(pt.payload_layer > LIBETH_RX_PT_PAYLOAD_NONE && | 
|  | (dev->features & NETIF_F_RXHASH)); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * libeth_rx_pt_set_hash - fill in skb hash value basing on the PT | 
|  | * @skb: skb to fill the hash in | 
|  | * @hash: 32-bit hash value from the descriptor | 
|  | * @pt: packet type | 
|  | */ | 
|  | static inline void libeth_rx_pt_set_hash(struct sk_buff *skb, u32 hash, | 
|  | struct libeth_rx_pt pt) | 
|  | { | 
|  | skb_set_hash(skb, hash, pt.payload_layer); | 
|  | } | 
|  |  | 
|  | #endif /* __LIBETH_RX_H */ |