1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* Copyright (C) 2024 Intel Corporation */ 3 4 #ifndef __LIBETH_RX_H 5 #define __LIBETH_RX_H 6 7 #include <linux/if_vlan.h> 8 9 #include <net/page_pool/helpers.h> 10 #include <net/xdp.h> 11 12 /* Rx buffer management */ 13 14 /* Space reserved in front of each frame */ 15 #define LIBETH_SKB_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN) 16 /* Maximum headroom for worst-case calculations */ 17 #define LIBETH_MAX_HEADROOM LIBETH_SKB_HEADROOM 18 /* Link layer / L2 overhead: Ethernet, 2 VLAN tags (C + S), FCS */ 19 #define LIBETH_RX_LL_LEN (ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN) 20 /* Maximum supported L2-L4 header length */ 21 #define LIBETH_MAX_HEAD roundup_pow_of_two(max(MAX_HEADER, 256)) 22 23 /* Always use order-0 pages */ 24 #define LIBETH_RX_PAGE_ORDER 0 25 /* Pick a sane buffer stride and align to a cacheline boundary */ 26 #define LIBETH_RX_BUF_STRIDE SKB_DATA_ALIGN(128) 27 /* HW-writeable space in one buffer: truesize - headroom/tailroom, aligned */ 28 #define LIBETH_RX_PAGE_LEN(hr) \ 29 ALIGN_DOWN(SKB_MAX_ORDER(hr, LIBETH_RX_PAGE_ORDER), \ 30 LIBETH_RX_BUF_STRIDE) 31 32 /** 33 * struct libeth_fqe - structure representing an Rx buffer (fill queue element) 34 * @page: page holding the buffer 35 * @offset: offset from the page start (to the headroom) 36 * @truesize: total space occupied by the buffer (w/ headroom and tailroom) 37 * 38 * Depending on the MTU, API switches between one-page-per-frame and shared 39 * page model (to conserve memory on bigger-page platforms). In case of the 40 * former, @offset is always 0 and @truesize is always ```PAGE_SIZE```. 41 */ 42 struct libeth_fqe { 43 struct page *page; 44 u32 offset; 45 u32 truesize; 46 } __aligned_largest; 47 48 /** 49 * enum libeth_fqe_type - enum representing types of Rx buffers 50 * @LIBETH_FQE_MTU: buffer size is determined by MTU 51 * @LIBETH_FQE_SHORT: buffer size is smaller than MTU, for short frames 52 * @LIBETH_FQE_HDR: buffer size is ```LIBETH_MAX_HEAD```-sized, for headers 53 */ 54 enum libeth_fqe_type { 55 LIBETH_FQE_MTU = 0U, 56 LIBETH_FQE_SHORT, 57 LIBETH_FQE_HDR, 58 }; 59 60 /** 61 * struct libeth_fq - structure representing a buffer (fill) queue 62 * @fp: hotpath part of the structure 63 * @pp: &page_pool for buffer management 64 * @fqes: array of Rx buffers 65 * @truesize: size to allocate per buffer, w/overhead 66 * @count: number of descriptors/buffers the queue has 67 * @type: type of the buffers this queue has 68 * @hsplit: flag whether header split is enabled 69 * @buf_len: HW-writeable length per each buffer 70 * @nid: ID of the closest NUMA node with memory 71 */ 72 struct libeth_fq { 73 struct_group_tagged(libeth_fq_fp, fp, 74 struct page_pool *pp; 75 struct libeth_fqe *fqes; 76 77 u32 truesize; 78 u32 count; 79 ); 80 81 /* Cold fields */ 82 enum libeth_fqe_type type:2; 83 bool hsplit:1; 84 85 u32 buf_len; 86 int nid; 87 }; 88 89 int libeth_rx_fq_create(struct libeth_fq *fq, struct napi_struct *napi); 90 void libeth_rx_fq_destroy(struct libeth_fq *fq); 91 92 /** 93 * libeth_rx_alloc - allocate a new Rx buffer 94 * @fq: fill queue to allocate for 95 * @i: index of the buffer within the queue 96 * 97 * Return: DMA address to be passed to HW for Rx on successful allocation, 98 * ```DMA_MAPPING_ERROR``` otherwise. 99 */ 100 static inline dma_addr_t libeth_rx_alloc(const struct libeth_fq_fp *fq, u32 i) 101 { 102 struct libeth_fqe *buf = &fq->fqes[i]; 103 104 buf->truesize = fq->truesize; 105 buf->page = page_pool_dev_alloc(fq->pp, &buf->offset, &buf->truesize); 106 if (unlikely(!buf->page)) 107 return DMA_MAPPING_ERROR; 108 109 return page_pool_get_dma_addr(buf->page) + buf->offset + 110 fq->pp->p.offset; 111 } 112 113 void libeth_rx_recycle_slow(struct page *page); 114 115 /** 116 * libeth_rx_sync_for_cpu - synchronize or recycle buffer post DMA 117 * @fqe: buffer to process 118 * @len: frame length from the descriptor 119 * 120 * Process the buffer after it's written by HW. The regular path is to 121 * synchronize DMA for CPU, but in case of no data it will be immediately 122 * recycled back to its PP. 123 * 124 * Return: true when there's data to process, false otherwise. 125 */ 126 static inline bool libeth_rx_sync_for_cpu(const struct libeth_fqe *fqe, 127 u32 len) 128 { 129 struct page *page = fqe->page; 130 131 /* Very rare, but possible case. The most common reason: 132 * the last fragment contained FCS only, which was then 133 * stripped by the HW. 134 */ 135 if (unlikely(!len)) { 136 libeth_rx_recycle_slow(page); 137 return false; 138 } 139 140 page_pool_dma_sync_for_cpu(page->pp, page, fqe->offset, len); 141 142 return true; 143 } 144 145 /* Converting abstract packet type numbers into a software structure with 146 * the packet parameters to do O(1) lookup on Rx. 147 */ 148 149 enum { 150 LIBETH_RX_PT_OUTER_L2 = 0U, 151 LIBETH_RX_PT_OUTER_IPV4, 152 LIBETH_RX_PT_OUTER_IPV6, 153 }; 154 155 enum { 156 LIBETH_RX_PT_NOT_FRAG = 0U, 157 LIBETH_RX_PT_FRAG, 158 }; 159 160 enum { 161 LIBETH_RX_PT_TUNNEL_IP_NONE = 0U, 162 LIBETH_RX_PT_TUNNEL_IP_IP, 163 LIBETH_RX_PT_TUNNEL_IP_GRENAT, 164 LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC, 165 LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC_VLAN, 166 }; 167 168 enum { 169 LIBETH_RX_PT_TUNNEL_END_NONE = 0U, 170 LIBETH_RX_PT_TUNNEL_END_IPV4, 171 LIBETH_RX_PT_TUNNEL_END_IPV6, 172 }; 173 174 enum { 175 LIBETH_RX_PT_INNER_NONE = 0U, 176 LIBETH_RX_PT_INNER_UDP, 177 LIBETH_RX_PT_INNER_TCP, 178 LIBETH_RX_PT_INNER_SCTP, 179 LIBETH_RX_PT_INNER_ICMP, 180 LIBETH_RX_PT_INNER_TIMESYNC, 181 }; 182 183 #define LIBETH_RX_PT_PAYLOAD_NONE PKT_HASH_TYPE_NONE 184 #define LIBETH_RX_PT_PAYLOAD_L2 PKT_HASH_TYPE_L2 185 #define LIBETH_RX_PT_PAYLOAD_L3 PKT_HASH_TYPE_L3 186 #define LIBETH_RX_PT_PAYLOAD_L4 PKT_HASH_TYPE_L4 187 188 struct libeth_rx_pt { 189 u32 outer_ip:2; 190 u32 outer_frag:1; 191 u32 tunnel_type:3; 192 u32 tunnel_end_prot:2; 193 u32 tunnel_end_frag:1; 194 u32 inner_prot:3; 195 enum pkt_hash_types payload_layer:2; 196 197 u32 pad:2; 198 enum xdp_rss_hash_type hash_type:16; 199 }; 200 201 void libeth_rx_pt_gen_hash_type(struct libeth_rx_pt *pt); 202 203 /** 204 * libeth_rx_pt_get_ip_ver - get IP version from a packet type structure 205 * @pt: packet type params 206 * 207 * Wrapper to compile out the IPv6 code from the drivers when not supported 208 * by the kernel. 209 * 210 * Return: @pt.outer_ip or stub for IPv6 when not compiled-in. 211 */ 212 static inline u32 libeth_rx_pt_get_ip_ver(struct libeth_rx_pt pt) 213 { 214 #if !IS_ENABLED(CONFIG_IPV6) 215 switch (pt.outer_ip) { 216 case LIBETH_RX_PT_OUTER_IPV4: 217 return LIBETH_RX_PT_OUTER_IPV4; 218 default: 219 return LIBETH_RX_PT_OUTER_L2; 220 } 221 #else 222 return pt.outer_ip; 223 #endif 224 } 225 226 /* libeth_has_*() can be used to quickly check whether the HW metadata is 227 * available to avoid further expensive processing such as descriptor reads. 228 * They already check for the corresponding netdev feature to be enabled, 229 * thus can be used as drop-in replacements. 230 */ 231 232 static inline bool libeth_rx_pt_has_checksum(const struct net_device *dev, 233 struct libeth_rx_pt pt) 234 { 235 /* Non-zero _INNER* is only possible when _OUTER_IPV* is set, 236 * it is enough to check only for the L4 type. 237 */ 238 return likely(pt.inner_prot > LIBETH_RX_PT_INNER_NONE && 239 (dev->features & NETIF_F_RXCSUM)); 240 } 241 242 static inline bool libeth_rx_pt_has_hash(const struct net_device *dev, 243 struct libeth_rx_pt pt) 244 { 245 return likely(pt.payload_layer > LIBETH_RX_PT_PAYLOAD_NONE && 246 (dev->features & NETIF_F_RXHASH)); 247 } 248 249 /** 250 * libeth_rx_pt_set_hash - fill in skb hash value basing on the PT 251 * @skb: skb to fill the hash in 252 * @hash: 32-bit hash value from the descriptor 253 * @pt: packet type 254 */ 255 static inline void libeth_rx_pt_set_hash(struct sk_buff *skb, u32 hash, 256 struct libeth_rx_pt pt) 257 { 258 skb_set_hash(skb, hash, pt.payload_layer); 259 } 260 261 #endif /* __LIBETH_RX_H */ 262
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.