i40e: Initial support to add hw hints for xdp
- Parse the completed rx descriptors and various headers to populate
the meta data required by XDP programs
- The value of keeping the IP addresses and ports are questionable.
If needed, they can be enabled through a conditional compilation
- The amount of cycles consumed for processing packet headers
is dumped to printk log buffer
- A module parameter has been introduced to populate the hw hints
Change-Id: Ifa0920ed24c458fb6022201e1a9dae21071b7133
Signed-off-by: Jayaprakash Shanmugam <jayaprakash.shanmugam@intel.com>
Acked-by: PJ Waskiewicz <peter.waskiewicz.jr@intel.com>
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 7a80652..226c0d4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -602,10 +602,10 @@
u32 ioremap_len;
u32 fd_inv;
u16 phy_led_val;
-
u16 override_q_count;
u16 last_sw_conf_flags;
u16 last_sw_conf_valid_flags;
+ u16 xdp_hint_level;
};
/**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index c944bd1..45ba99fa 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -87,6 +87,10 @@
module_param(debug, uint, 0);
MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all), Debug mask (0x8XXXXXXX)");
+static int xdp_hint_level = 0;
+module_param(xdp_hint_level, int, 0644);
+MODULE_PARM_DESC(xdp_hint_level, "Populate XDP hints 0=nil 1=minimal 2=maximal");
+
MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
MODULE_DESCRIPTION("Intel(R) Ethernet Connection XL710 Network Driver");
MODULE_LICENSE("GPL");
@@ -14034,6 +14038,8 @@
pf->hw_features |= I40E_HW_PHY_CONTROLS_LEDS;
if (pf->hw.device_id == I40E_DEV_ID_SFP_I_X722)
pf->hw_features |= I40E_HW_HAVE_CRT_RETIMER;
+
+ pf->xdp_hint_level = xdp_hint_level;
/* print a string summarizing features */
i40e_print_features(pf);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 8ffb745..09536d7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -5,6 +5,7 @@
#include <net/busy_poll.h>
#include <linux/bpf_trace.h>
#include <net/xdp.h>
+#include <asm/msr.h>
#include "i40e.h"
#include "i40e_trace.h"
#include "i40e_prototype.h"
@@ -2293,6 +2294,160 @@
*/
wmb();
writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
+};
+
+const size_t len_ethhdr = sizeof(struct ethhdr);
+const size_t len_ipv4hdr = sizeof(struct iphdr);
+const size_t len_ipv6hdr = sizeof(struct ipv6hdr);
+
+enum XDP_META_PTYPE
+{
+ XDP_META_PTYPE_VALID,
+ XDP_META_PTYPE_IPV6,
+ XDP_META_PTYPE_TCP,
+ XDP_META_PTYPE_UDP,
+ XDP_META_PTYPE_ICMP,
+ XDP_META_PTYPE_END
+};
+
+/* #define PARSE_HEADERS */
+struct xdp_hw_hints
+{
+ DECLARE_BITMAP(ptype,XDP_META_PTYPE_END);
+ u32 hash;
+ u8 l3hdroffset;
+ u8 l4hdroffset;
+ u8 proto;
+ bool fragmented;
+#ifdef PARSE_HEADERS
+ union {
+ __be32 addr4;
+ __be32 addr6[4];
+ }srcip;
+ union {
+ __be32 addr4;
+ __be32 addr6[4];
+ }dstip;
+ __u16 srcport;
+ __u16 dstport;
+#endif
+};
+
+/**
+ * i40e_process_hints - Retrieve the packet info from hw and package it
+ **/
+static void i40e_process_hints(union i40e_rx_desc *rxdesc,
+ struct xdp_buff *xdp)
+{
+
+ struct i40e_rx_ptype_decoded ptype={0};
+ u16 fltstatus = 0;
+ unsigned long long start, end;
+ struct xdp_hw_hints hints = {0};
+ start = rdtsc();
+
+ do {
+ u64 stserr = rxdesc->wb.qword1.status_error_len;
+ u64 sts = stserr & 0xFFFF;
+ struct iphdr *ip4h;
+ struct ipv6hdr *ip6h;
+ struct tcphdr *tcph;
+ struct udphdr *udph;
+
+ // Check if the packet is complete
+ if (!(sts & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
+ break;
+
+ ptype = i40e_ptype_lookup[(stserr & I40E_RXD_QW1_PTYPE_MASK) >>
+ I40E_RXD_QW1_PTYPE_SHIFT];
+ if (!ptype.known)
+ break;
+
+ hints.l3hdroffset = len_ethhdr;
+ /*
+ * Adjustment for vlan tag - Not needed for loadbalancers
+ *
+ * if (rxdesc->wb.qword1.status_error_len &
+ * (0x1ULL << (I40E_RX_DESC_STATUS_L2TAG1P_SHIFT + I40E_RXD_QW1_STATUS_SHIFT)))
+ * hints.l3hdroffset += 4;
+ */
+
+ set_bit(XDP_META_PTYPE_VALID, hints.ptype);
+ if(ptype.outer_ip == I40E_RX_PTYPE_OUTER_IP) {
+ if (ptype.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) {
+ /* IPv4 packet */
+ clear_bit(XDP_META_PTYPE_IPV6, hints.ptype);
+ ip4h = xdp->data + hints.l3hdroffset;
+ hints.proto = ip4h->protocol;
+#ifdef PARSE_HEADERS
+ hints.srcip.addr4 = ip4h->saddr;
+ hints.dstip.addr4 = ip4h->daddr;
+#endif
+ hints.l4hdroffset = len_ethhdr + ip4h->ihl * 4;
+
+ }
+ else if (ptype.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6){
+ /* IPv6 packet */
+ set_bit(XDP_META_PTYPE_IPV6, hints.ptype);
+ ip6h = xdp->data + hints.l3hdroffset;
+ hints.proto = ip6h->nexthdr;
+#ifdef PARSE_HEADERS
+ memcpy(hints.srcip.addr6,
+ ip6h->saddr.s6_addr32,16);
+ memcpy(hints.dstip.addr6,
+ ip6h->daddr.s6_addr32, 16);
+#endif
+ /* Todo: Adjust for IP Options */
+ hints.l4hdroffset = len_ethhdr + len_ipv6hdr;
+ }
+ }
+
+ hints.fragmented = ptype.outer_frag;
+ trace_printk("ptype=%d IPV6= %d inner_proto= %d hash %x fragmented:%d l3hdroffset:%d"
+ " l4hdroffset:%d",ptype.ptype, ptype.outer_ip_ver,
+ ptype.inner_prot, hints.hash, hints.fragmented,
+ hints.l3hdroffset,hints.l4hdroffset);
+
+ if (hints.fragmented)
+ break;
+
+ if (ptype.inner_prot == I40E_RX_PTYPE_INNER_PROT_UDP ) {
+ set_bit(XDP_META_PTYPE_UDP,hints.ptype);
+ udph = xdp->data + hints.l4hdroffset;
+#ifdef PARSE_HEADERS
+ hints.srcport = htons(udph->source);
+ hints.dstport = htons(udph->dest);
+#endif
+ }
+ else if (ptype.inner_prot == I40E_RX_PTYPE_INNER_PROT_TCP) {
+ set_bit(XDP_META_PTYPE_TCP, hints.ptype);
+ tcph = xdp->data + hints.l4hdroffset;
+#ifdef PARSE_HEADERS
+ hints.srcport = htons(tcph->source);
+ hints.dstport = htons(tcph->dest);
+#endif
+ }
+ else if (ptype.inner_prot == I40E_RX_PTYPE_INNER_PROT_ICMP) {
+ set_bit(XDP_META_PTYPE_ICMP, hints.ptype);
+ }
+
+
+ fltstatus = (stserr >> I40E_RX_DESC_STATUS_FLTSTAT_SHIFT) &
+ I40E_RX_DESC_FLTSTAT_RSS_HASH;
+ if (fltstatus == I40E_RX_DESC_FLTSTAT_RSS_HASH)
+ hints.hash = rxdesc->wb.qword0.hi_dword.rss;
+#ifdef PARSE_HEADERS
+ trace_printk("SrcAddr=%X DestAddr=%X srcport=%d dstport=%d",
+ hints.srcip.addr4, hints.dstip.addr4, hints.srcport,
+ hints.dstport);
+#endif
+ }while(0);
+
+ xdp->data_meta = xdp->data - sizeof(struct xdp_hw_hints);
+ memcpy(xdp->data_meta, &hints, sizeof(struct xdp_hw_hints));
+
+ end = rdtsc();
+ trace_printk("Total cycles: %llu",end-start);
}
/**
@@ -2313,6 +2468,7 @@
struct sk_buff *skb = rx_ring->skb;
u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
bool failure = false, xdp_xmit = false;
+ struct i40e_pf *pf = rx_ring->vsi->back;
struct xdp_buff xdp;
xdp.rxq = &rx_ring->xdp_rxq;
@@ -2367,8 +2523,14 @@
xdp.data_meta = xdp.data;
xdp.data_hard_start = xdp.data -
i40e_rx_offset(rx_ring);
- xdp.data_end = xdp.data + size;
+ /* Retrieve hints from HW if there is enough room
+ * available */
+ if (pf->xdp_hint_level && (i40e_rx_offset(rx_ring) >
+ sizeof(struct xdp_hw_hints))) {
+ i40e_process_hints(rx_desc, &xdp);
+ }
+ xdp.data_end = xdp.data + size;
skb = i40e_run_xdp(rx_ring, &xdp);
}