@@ -781,10 +781,13 @@ iavf_dev_tx_queue_setup(struct rte_eth_dev *dev,
else
insertion_cap = insertion_support->inner;
- if (insertion_cap & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1)
+ if (insertion_cap & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1) {
txq->vlan_flag = IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1;
- else if (insertion_cap & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2)
+ PMD_INIT_LOG(DEBUG, "VLAN insertion_cap: L2TAG1");
+ } else if (insertion_cap & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2) {
txq->vlan_flag = IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2;
+ PMD_INIT_LOG(DEBUG, "VLAN insertion_cap: L2TAG2");
+ }
} else {
txq->vlan_flag = IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1;
}
@@ -3229,7 +3232,7 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
if (check_ret >= 0 &&
rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
/* SSE and AVX2 not support offload path yet. */
- if (check_ret == IAVF_VECTOR_PATH) {
+ if (check_ret == IAVF_VECTOR_PATH || check_ret == IAVF_VECTOR_CTX_PATH) {
use_sse = true;
if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
@@ -3261,11 +3264,21 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
dev->tx_pkt_burst = iavf_xmit_pkts_vec_avx512;
PMD_DRV_LOG(DEBUG, "Using AVX512 Vector Tx (port %d).",
dev->data->port_id);
- } else {
+ } else if (check_ret == IAVF_VECTOR_OFFLOAD_PATH) {
dev->tx_pkt_burst = iavf_xmit_pkts_vec_avx512_offload;
dev->tx_pkt_prepare = iavf_prep_pkts;
PMD_DRV_LOG(DEBUG, "Using AVX512 OFFLOAD Vector Tx (port %d).",
dev->data->port_id);
+ } else if (check_ret == IAVF_VECTOR_CTX_PATH) {
+ dev->tx_pkt_burst = iavf_xmit_pkts_vec_avx512_ctx;
+ dev->tx_pkt_prepare = iavf_prep_pkts;
+ PMD_DRV_LOG(DEBUG, "Using AVX512 CONTEXT Vector Tx (port %d).",
+ dev->data->port_id);
+ } else {
+ dev->tx_pkt_burst = iavf_xmit_pkts_vec_avx512_ctx_offload;
+ dev->tx_pkt_prepare = iavf_prep_pkts;
+ PMD_DRV_LOG(DEBUG, "Using AVX512 CONTEXT OFFLOAD Vector Tx (port %d).",
+ dev->data->port_id);
}
}
#endif
@@ -26,8 +26,6 @@
#define IAVF_TX_NO_VECTOR_FLAGS ( \
RTE_ETH_TX_OFFLOAD_MULTI_SEGS | \
RTE_ETH_TX_OFFLOAD_TCP_TSO | \
- RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM | \
- RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM | \
RTE_ETH_TX_OFFLOAD_SECURITY)
#define IAVF_TX_VECTOR_OFFLOAD ( \
@@ -38,14 +36,29 @@
RTE_ETH_TX_OFFLOAD_UDP_CKSUM | \
RTE_ETH_TX_OFFLOAD_TCP_CKSUM)
+#define IAVF_TX_VECTOR_OFFLOAD_CTX ( \
+ RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM | \
+ RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM)
+
#define IAVF_RX_VECTOR_OFFLOAD ( \
RTE_ETH_RX_OFFLOAD_CHECKSUM | \
RTE_ETH_RX_OFFLOAD_SCTP_CKSUM | \
RTE_ETH_RX_OFFLOAD_VLAN | \
RTE_ETH_RX_OFFLOAD_RSS_HASH)
+/**
+ * According to the vlan capabilities returned by the driver and FW, the vlan tci
+ * needs to be inserted to the L2TAG1 or L2TAG2 fields.
+ * If L2TAG1, it should be inserted to the L2TAG1 field in data desc.
+ * If L2TAG2, it should be inserted to the L2TAG2 field in ctx desc.
+ * Besides, tunneling parameters and other fields need be configured in ctx desc
+ * if the outer checksum offload is enabled.
+ */
+
#define IAVF_VECTOR_PATH 0
#define IAVF_VECTOR_OFFLOAD_PATH 1
+#define IAVF_VECTOR_CTX_PATH 2
+#define IAVF_VECTOR_CTX_OFFLOAD_PATH 3
#define DEFAULT_TX_RS_THRESH 32
#define DEFAULT_TX_FREE_THRESH 32
@@ -281,6 +294,7 @@ struct iavf_tx_queue {
#define IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2 BIT(1)
uint8_t vlan_flag;
uint8_t tc;
+ uint8_t use_ctx:1; /* if use the ctx desc, a packet needs two descriptors */
};
/* Offload features */
@@ -713,6 +727,10 @@ uint16_t iavf_xmit_pkts_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t iavf_xmit_pkts_vec_avx512_offload(void *tx_queue,
struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
+uint16_t iavf_xmit_pkts_vec_avx512_ctx(void *tx_queue, struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts);
+uint16_t iavf_xmit_pkts_vec_avx512_ctx_offload(void *tx_queue, struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts);
int iavf_txq_vec_setup_avx512(struct iavf_tx_queue *txq);
uint8_t iavf_proto_xtr_type_to_rxdid(uint8_t xtr_type);
@@ -1782,13 +1782,13 @@ iavf_tx_free_bufs_avx512(struct iavf_tx_queue *txq)
rte_cpu_to_le_64(IAVF_TX_DESC_DTYPE_DESC_DONE))
return 0;
- n = txq->rs_thresh;
+ n = txq->rs_thresh >> txq->use_ctx;
/* first buffer to free from S/W ring is at index
* tx_next_dd - (tx_rs_thresh-1)
*/
txep = (void *)txq->sw_ring;
- txep += txq->next_dd - (n - 1);
+ txep += (txq->next_dd >> txq->use_ctx) - (n - 1);
if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE && (n & 31) == 0) {
struct rte_mempool *mp = txep[0].mbuf->pool;
@@ -1887,14 +1887,15 @@ tx_backlog_entry_avx512(struct iavf_tx_vec_entry *txep,
static __rte_always_inline void
iavf_vtx1(volatile struct iavf_tx_desc *txdp,
- struct rte_mbuf *pkt, uint64_t flags, bool offload)
+ struct rte_mbuf *pkt, uint64_t flags,
+ bool offload, uint8_t vlan_flag)
{
uint64_t high_qw =
(IAVF_TX_DESC_DTYPE_DATA |
((uint64_t)flags << IAVF_TXD_QW1_CMD_SHIFT) |
((uint64_t)pkt->data_len << IAVF_TXD_QW1_TX_BUF_SZ_SHIFT));
if (offload)
- iavf_txd_enable_offload(pkt, &high_qw);
+ iavf_txd_enable_offload(pkt, &high_qw, vlan_flag);
__m128i descriptor = _mm_set_epi64x(high_qw,
pkt->buf_iova + pkt->data_off);
@@ -1905,15 +1906,15 @@ iavf_vtx1(volatile struct iavf_tx_desc *txdp,
#define IAVF_TX_OFF_MASK 0x55
static __rte_always_inline void
iavf_vtx(volatile struct iavf_tx_desc *txdp,
- struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags,
- bool offload)
+ struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags,
+ bool offload, uint8_t vlan_flag)
{
const uint64_t hi_qw_tmpl = (IAVF_TX_DESC_DTYPE_DATA |
((uint64_t)flags << IAVF_TXD_QW1_CMD_SHIFT));
/* if unaligned on 32-bit boundary, do one to align */
if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) {
- iavf_vtx1(txdp, *pkt, flags, offload);
+ iavf_vtx1(txdp, *pkt, flags, offload, vlan_flag);
nb_pkts--, txdp++, pkt++;
}
@@ -1923,26 +1924,24 @@ iavf_vtx(volatile struct iavf_tx_desc *txdp,
hi_qw_tmpl |
((uint64_t)pkt[3]->data_len <<
IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
- if (offload)
- iavf_txd_enable_offload(pkt[3], &hi_qw3);
uint64_t hi_qw2 =
hi_qw_tmpl |
((uint64_t)pkt[2]->data_len <<
IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
- if (offload)
- iavf_txd_enable_offload(pkt[2], &hi_qw2);
uint64_t hi_qw1 =
hi_qw_tmpl |
((uint64_t)pkt[1]->data_len <<
IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
- if (offload)
- iavf_txd_enable_offload(pkt[1], &hi_qw1);
uint64_t hi_qw0 =
hi_qw_tmpl |
((uint64_t)pkt[0]->data_len <<
IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
- if (offload)
- iavf_txd_enable_offload(pkt[0], &hi_qw0);
+ if (offload) {
+ iavf_txd_enable_offload(pkt[3], &hi_qw3, vlan_flag);
+ iavf_txd_enable_offload(pkt[2], &hi_qw2, vlan_flag);
+ iavf_txd_enable_offload(pkt[1], &hi_qw1, vlan_flag);
+ iavf_txd_enable_offload(pkt[0], &hi_qw0, vlan_flag);
+ }
__m512i desc0_3 =
_mm512_set_epi64
@@ -1959,11 +1958,187 @@ iavf_vtx(volatile struct iavf_tx_desc *txdp,
/* do any last ones */
while (nb_pkts) {
- iavf_vtx1(txdp, *pkt, flags, offload);
+ iavf_vtx1(txdp, *pkt, flags, offload, vlan_flag);
txdp++, pkt++, nb_pkts--;
}
}
+static __rte_always_inline void
+iavf_fill_ctx_desc_tunneling_avx512(uint64_t *low_ctx_qw, struct rte_mbuf *pkt)
+{
+ if (pkt->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) {
+ uint64_t eip_typ = IAVF_TX_CTX_DESC_EIPT_NONE;
+ uint64_t eip_len = 0;
+ uint64_t eip_noinc = 0;
+ /* Default - IP_ID is increment in each segment of LSO */
+
+ switch (pkt->ol_flags & (RTE_MBUF_F_TX_OUTER_IPV4 |
+ RTE_MBUF_F_TX_OUTER_IPV6 |
+ RTE_MBUF_F_TX_OUTER_IP_CKSUM)) {
+ case RTE_MBUF_F_TX_OUTER_IPV4:
+ eip_typ = IAVF_TX_CTX_DESC_EIPT_IPV4_NO_CHECKSUM_OFFLOAD;
+ eip_len = pkt->outer_l3_len >> 2;
+ break;
+ case RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IP_CKSUM:
+ eip_typ = IAVF_TX_CTX_DESC_EIPT_IPV4_CHECKSUM_OFFLOAD;
+ eip_len = pkt->outer_l3_len >> 2;
+ break;
+ case RTE_MBUF_F_TX_OUTER_IPV6:
+ eip_typ = IAVF_TX_CTX_DESC_EIPT_IPV6;
+ eip_len = pkt->outer_l3_len >> 2;
+ break;
+ }
+
+ /* L4TUNT: L4 Tunneling Type */
+ switch (pkt->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) {
+ case RTE_MBUF_F_TX_TUNNEL_IPIP:
+ /* for non UDP / GRE tunneling, set to 00b */
+ break;
+ case RTE_MBUF_F_TX_TUNNEL_VXLAN:
+ case RTE_MBUF_F_TX_TUNNEL_VXLAN_GPE:
+ case RTE_MBUF_F_TX_TUNNEL_GTP:
+ case RTE_MBUF_F_TX_TUNNEL_GENEVE:
+ eip_typ |= IAVF_TXD_CTX_UDP_TUNNELING;
+ break;
+ case RTE_MBUF_F_TX_TUNNEL_GRE:
+ eip_typ |= IAVF_TXD_CTX_GRE_TUNNELING;
+ break;
+ default:
+ PMD_TX_LOG(ERR, "Tunnel type not supported");
+ return;
+ }
+
+ /* L4TUNLEN: L4 Tunneling Length, in Words
+ *
+ * We depend on app to set rte_mbuf.l2_len correctly.
+ * For IP in GRE it should be set to the length of the GRE
+ * header;
+ * For MAC in GRE or MAC in UDP it should be set to the length
+ * of the GRE or UDP headers plus the inner MAC up to including
+ * its last Ethertype.
+ * If MPLS labels exists, it should include them as well.
+ */
+ eip_typ |= (pkt->l2_len >> 1) << IAVF_TXD_CTX_QW0_NATLEN_SHIFT;
+
+ /**
+ * Calculate the tunneling UDP checksum.
+ * Shall be set only if L4TUNT = 01b and EIPT is not zero
+ */
+ if ((eip_typ & (IAVF_TX_CTX_EXT_IP_IPV4 |
+ IAVF_TX_CTX_EXT_IP_IPV6 |
+ IAVF_TX_CTX_EXT_IP_IPV4_NO_CSUM)) &&
+ (eip_typ & IAVF_TXD_CTX_UDP_TUNNELING) &&
+ (pkt->ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM))
+ eip_typ |= IAVF_TXD_CTX_QW0_L4T_CS_MASK;
+
+ *low_ctx_qw = eip_typ << IAVF_TXD_CTX_QW0_TUN_PARAMS_EIPT_SHIFT |
+ eip_len << IAVF_TXD_CTX_QW0_TUN_PARAMS_EIPLEN_SHIFT |
+ eip_noinc << IAVF_TXD_CTX_QW0_TUN_PARAMS_EIP_NOINC_SHIFT;
+
+ } else {
+ *low_ctx_qw = 0;
+ }
+}
+
+static __rte_always_inline void
+ctx_vtx1(volatile struct iavf_tx_desc *txdp, struct rte_mbuf *pkt,
+ uint64_t flags, bool offload, uint8_t vlan_flag)
+{
+ uint64_t high_ctx_qw = IAVF_TX_DESC_DTYPE_CONTEXT;
+ uint64_t low_ctx_qw = 0;
+
+ if (((pkt->ol_flags & RTE_MBUF_F_TX_VLAN) || offload)) {
+ if (offload)
+ iavf_fill_ctx_desc_tunneling_avx512(&low_ctx_qw, pkt);
+ if ((pkt->ol_flags & RTE_MBUF_F_TX_VLAN) ||
+ (vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2)) {
+ high_ctx_qw |= IAVF_TX_CTX_DESC_IL2TAG2 << IAVF_TXD_CTX_QW1_CMD_SHIFT;
+ low_ctx_qw |= (uint64_t)pkt->vlan_tci << IAVF_TXD_CTX_QW0_L2TAG2_PARAM;
+ }
+ }
+ uint64_t high_data_qw = (IAVF_TX_DESC_DTYPE_DATA |
+ ((uint64_t)flags << IAVF_TXD_QW1_CMD_SHIFT) |
+ ((uint64_t)pkt->data_len << IAVF_TXD_QW1_TX_BUF_SZ_SHIFT));
+ if (offload)
+ iavf_txd_enable_offload(pkt, &high_data_qw, vlan_flag);
+
+ __m256i ctx_data_desc = _mm256_set_epi64x(high_data_qw, pkt->buf_iova + pkt->data_off,
+ high_ctx_qw, low_ctx_qw);
+
+ _mm256_storeu_si256((__m256i *)txdp, ctx_data_desc);
+}
+
+static __rte_always_inline void
+ctx_vtx(volatile struct iavf_tx_desc *txdp,
+ struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags,
+ bool offload, uint8_t vlan_flag)
+{
+ uint64_t hi_data_qw_tmpl = (IAVF_TX_DESC_DTYPE_DATA |
+ ((uint64_t)flags << IAVF_TXD_QW1_CMD_SHIFT));
+
+ /* if unaligned on 32-bit boundary, do one to align */
+ if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) {
+ ctx_vtx1(txdp, *pkt, flags, offload, vlan_flag);
+ nb_pkts--, txdp++, pkt++;
+ }
+
+ for (; nb_pkts > 1; txdp += 4, pkt += 2, nb_pkts -= 2) {
+ uint64_t hi_ctx_qw1 = IAVF_TX_DESC_DTYPE_CONTEXT;
+ uint64_t hi_ctx_qw0 = IAVF_TX_DESC_DTYPE_CONTEXT;
+ uint64_t low_ctx_qw1 = 0;
+ uint64_t low_ctx_qw0 = 0;
+ uint64_t hi_data_qw1 = 0;
+ uint64_t hi_data_qw0 = 0;
+
+ hi_data_qw1 = hi_data_qw_tmpl |
+ ((uint64_t)pkt[1]->data_len <<
+ IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
+ hi_data_qw0 = hi_data_qw_tmpl |
+ ((uint64_t)pkt[0]->data_len <<
+ IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
+
+ if (pkt[1]->ol_flags & RTE_MBUF_F_TX_VLAN) {
+ if (vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2) {
+ hi_ctx_qw1 |=
+ IAVF_TX_CTX_DESC_IL2TAG2 << IAVF_TXD_CTX_QW1_CMD_SHIFT;
+ low_ctx_qw1 |=
+ (uint64_t)pkt[1]->vlan_tci << IAVF_TXD_CTX_QW0_L2TAG2_PARAM;
+ } else {
+ hi_data_qw1 |=
+ (uint64_t)pkt[1]->vlan_tci << IAVF_TXD_QW1_L2TAG1_SHIFT;
+ }
+ }
+
+ if (pkt[0]->ol_flags & RTE_MBUF_F_TX_VLAN) {
+ if (vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2) {
+ hi_ctx_qw0 |=
+ IAVF_TX_CTX_DESC_IL2TAG2 << IAVF_TXD_CTX_QW1_CMD_SHIFT;
+ low_ctx_qw0 |=
+ (uint64_t)pkt[0]->vlan_tci << IAVF_TXD_CTX_QW0_L2TAG2_PARAM;
+ } else {
+ hi_data_qw0 |=
+ (uint64_t)pkt[0]->vlan_tci << IAVF_TXD_QW1_L2TAG1_SHIFT;
+ }
+ }
+
+ if (offload) {
+ iavf_txd_enable_offload(pkt[1], &hi_data_qw1, vlan_flag);
+ iavf_txd_enable_offload(pkt[0], &hi_data_qw0, vlan_flag);
+ }
+
+ __m512i desc0_3 =
+ _mm512_set_epi64
+ (hi_data_qw1, pkt[1]->buf_iova + pkt[1]->data_off,
+ hi_ctx_qw1, low_ctx_qw1,
+ hi_data_qw0, pkt[0]->buf_iova + pkt[0]->data_off,
+ hi_ctx_qw0, low_ctx_qw0);
+ _mm512_storeu_si512((void *)txdp, desc0_3);
+ }
+
+ if (nb_pkts)
+ ctx_vtx1(txdp, *pkt, flags, offload, vlan_flag);
+}
+
static __rte_always_inline uint16_t
iavf_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts, bool offload)
@@ -1994,11 +2169,11 @@ iavf_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
if (nb_commit >= n) {
tx_backlog_entry_avx512(txep, tx_pkts, n);
- iavf_vtx(txdp, tx_pkts, n - 1, flags, offload);
+ iavf_vtx(txdp, tx_pkts, n - 1, flags, offload, txq->vlan_flag);
tx_pkts += (n - 1);
txdp += (n - 1);
- iavf_vtx1(txdp, *tx_pkts++, rs, offload);
+ iavf_vtx1(txdp, *tx_pkts++, rs, offload, txq->vlan_flag);
nb_commit = (uint16_t)(nb_commit - n);
@@ -2013,7 +2188,7 @@ iavf_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_backlog_entry_avx512(txep, tx_pkts, nb_commit);
- iavf_vtx(txdp, tx_pkts, nb_commit, flags, offload);
+ iavf_vtx(txdp, tx_pkts, nb_commit, flags, offload, txq->vlan_flag);
tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->next_rs) {
@@ -2031,6 +2206,73 @@ iavf_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
return nb_pkts;
}
+static __rte_always_inline uint16_t
+iavf_xmit_fixed_burst_vec_avx512_ctx(void *tx_queue, struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts, bool offload)
+{
+ struct iavf_tx_queue *txq = (struct iavf_tx_queue *)tx_queue;
+ volatile struct iavf_tx_desc *txdp;
+ struct iavf_tx_vec_entry *txep;
+ uint16_t n, nb_commit, nb_mbuf, tx_id;
+ /* bit2 is reserved and must be set to 1 according to Spec */
+ uint64_t flags = IAVF_TX_DESC_CMD_EOP | IAVF_TX_DESC_CMD_ICRC;
+ uint64_t rs = IAVF_TX_DESC_CMD_RS | flags;
+
+ if (txq->nb_free < txq->free_thresh)
+ iavf_tx_free_bufs_avx512(txq);
+
+ nb_commit = (uint16_t)RTE_MIN(txq->nb_free, nb_pkts << 1);
+ nb_commit &= 0xFFFE;
+ if (unlikely(nb_commit == 0))
+ return 0;
+
+ nb_pkts = nb_commit >> 1;
+ tx_id = txq->tx_tail;
+ txdp = &txq->tx_ring[tx_id];
+ txep = (void *)txq->sw_ring;
+ txep += (tx_id >> 1);
+
+ txq->nb_free = (uint16_t)(txq->nb_free - nb_commit);
+ n = (uint16_t)(txq->nb_tx_desc - tx_id);
+
+ if (n != 0 && nb_commit >= n) {
+ nb_mbuf = n >> 1;
+ tx_backlog_entry_avx512(txep, tx_pkts, nb_mbuf);
+
+ ctx_vtx(txdp, tx_pkts, nb_mbuf - 1, flags, offload, txq->vlan_flag);
+ tx_pkts += (nb_mbuf - 1);
+ txdp += (n - 2);
+ ctx_vtx1(txdp, *tx_pkts++, rs, offload, txq->vlan_flag);
+
+ nb_commit = (uint16_t)(nb_commit - n);
+
+ txq->next_rs = (uint16_t)(txq->rs_thresh - 1);
+ tx_id = 0;
+ /* avoid reach the end of ring */
+ txdp = txq->tx_ring;
+ txep = (void *)txq->sw_ring;
+ }
+
+ nb_mbuf = nb_commit >> 1;
+ tx_backlog_entry_avx512(txep, tx_pkts, nb_mbuf);
+
+ ctx_vtx(txdp, tx_pkts, nb_mbuf, flags, offload, txq->vlan_flag);
+ tx_id = (uint16_t)(tx_id + nb_commit);
+
+ if (tx_id > txq->next_rs) {
+ txq->tx_ring[txq->next_rs].cmd_type_offset_bsz |=
+ rte_cpu_to_le_64(((uint64_t)IAVF_TX_DESC_CMD_RS) <<
+ IAVF_TXD_QW1_CMD_SHIFT);
+ txq->next_rs =
+ (uint16_t)(txq->next_rs + txq->rs_thresh);
+ }
+
+ txq->tx_tail = tx_id;
+
+ IAVF_PCI_REG_WC_WRITE(txq->qtx_tail, txq->tx_tail);
+ return nb_pkts;
+}
+
static __rte_always_inline uint16_t
iavf_xmit_pkts_vec_avx512_cmn(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts, bool offload)
@@ -2071,9 +2313,11 @@ iavf_tx_queue_release_mbufs_avx512(struct iavf_tx_queue *txq)
if (!txq->sw_ring || txq->nb_free == max_desc)
return;
- i = txq->next_dd - txq->rs_thresh + 1;
+ i = (txq->next_dd >> txq->use_ctx) + 1 -
+ (txq->rs_thresh >> txq->use_ctx);
+
if (txq->tx_tail < i) {
- for (; i < txq->nb_tx_desc; i++) {
+ for (; i < (unsigned int)(txq->nb_tx_desc >> txq->use_ctx); i++) {
rte_pktmbuf_free_seg(swr[i].mbuf);
swr[i].mbuf = NULL;
}
@@ -2094,3 +2338,41 @@ iavf_xmit_pkts_vec_avx512_offload(void *tx_queue, struct rte_mbuf **tx_pkts,
{
return iavf_xmit_pkts_vec_avx512_cmn(tx_queue, tx_pkts, nb_pkts, true);
}
+
+static __rte_always_inline uint16_t
+iavf_xmit_pkts_vec_avx512_ctx_cmn(void *tx_queue, struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts, bool offload)
+{
+ uint16_t nb_tx = 0;
+ struct iavf_tx_queue *txq = (struct iavf_tx_queue *)tx_queue;
+
+ while (nb_pkts) {
+ uint16_t ret, num;
+
+ /* cross rs_thresh boundary is not allowed */
+ num = (uint16_t)RTE_MIN(nb_pkts << 1, txq->rs_thresh);
+ num = num >> 1;
+ ret = iavf_xmit_fixed_burst_vec_avx512_ctx(tx_queue, &tx_pkts[nb_tx],
+ num, offload);
+ nb_tx += ret;
+ nb_pkts -= ret;
+ if (ret < num)
+ break;
+ }
+
+ return nb_tx;
+}
+
+uint16_t
+iavf_xmit_pkts_vec_avx512_ctx(void *tx_queue, struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts)
+{
+ return iavf_xmit_pkts_vec_avx512_ctx_cmn(tx_queue, tx_pkts, nb_pkts, false);
+}
+
+uint16_t
+iavf_xmit_pkts_vec_avx512_ctx_offload(void *tx_queue, struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts)
+{
+ return iavf_xmit_pkts_vec_avx512_ctx_cmn(tx_queue, tx_pkts, nb_pkts, true);
+}
@@ -253,10 +253,28 @@ iavf_tx_vec_queue_default(struct iavf_tx_queue *txq)
if (txq->offloads & IAVF_TX_NO_VECTOR_FLAGS)
return -1;
- if (txq->offloads & IAVF_TX_VECTOR_OFFLOAD)
- return IAVF_VECTOR_OFFLOAD_PATH;
-
- return IAVF_VECTOR_PATH;
+ /**
+ * Vlan tci needs to be inserted via ctx desc, if the vlan_flag is L2TAG2.
+ * Tunneling parameters and other fields need be configured in ctx desc
+ * if the outer checksum offload is enabled.
+ */
+ if (txq->vlan_flag == IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2) {
+ txq->use_ctx = 1;
+ if (txq->offloads & (IAVF_TX_VECTOR_OFFLOAD |
+ IAVF_TX_VECTOR_OFFLOAD_CTX))
+ return IAVF_VECTOR_CTX_OFFLOAD_PATH;
+ else
+ return IAVF_VECTOR_CTX_PATH;
+ } else {
+ if (txq->offloads & IAVF_TX_VECTOR_OFFLOAD_CTX) {
+ txq->use_ctx = 1;
+ return IAVF_VECTOR_CTX_OFFLOAD_PATH;
+ } else if (txq->offloads & IAVF_TX_VECTOR_OFFLOAD) {
+ return IAVF_VECTOR_OFFLOAD_PATH;
+ } else {
+ return IAVF_VECTOR_PATH;
+ }
+ }
}
static inline int
@@ -313,7 +331,7 @@ iavf_tx_vec_dev_check_default(struct rte_eth_dev *dev)
static __rte_always_inline void
iavf_txd_enable_offload(__rte_unused struct rte_mbuf *tx_pkt,
- uint64_t *txd_hi)
+ uint64_t *txd_hi, uint8_t vlan_flag)
{
#if defined(IAVF_TX_CSUM_OFFLOAD) || defined(IAVF_TX_VLAN_QINQ_OFFLOAD)
uint64_t ol_flags = tx_pkt->ol_flags;
@@ -325,14 +343,20 @@ iavf_txd_enable_offload(__rte_unused struct rte_mbuf *tx_pkt,
#ifdef IAVF_TX_CSUM_OFFLOAD
/* Set MACLEN */
- td_offset |= (tx_pkt->l2_len >> 1) <<
- IAVF_TX_DESC_LENGTH_MACLEN_SHIFT;
+ if (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)
+ td_offset |= (tx_pkt->outer_l2_len >> 1)
+ << IAVF_TX_DESC_LENGTH_MACLEN_SHIFT;
+ else
+ td_offset |= (tx_pkt->l2_len >> 1)
+ << IAVF_TX_DESC_LENGTH_MACLEN_SHIFT;
/* Enable L3 checksum offloads */
if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM) {
- td_cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4_CSUM;
- td_offset |= (tx_pkt->l3_len >> 2) <<
- IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
+ if (ol_flags & RTE_MBUF_F_TX_IPV4) {
+ td_cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4_CSUM;
+ td_offset |= (tx_pkt->l3_len >> 2) <<
+ IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
+ }
} else if (ol_flags & RTE_MBUF_F_TX_IPV4) {
td_cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4;
td_offset |= (tx_pkt->l3_len >> 2) <<
@@ -368,7 +392,8 @@ iavf_txd_enable_offload(__rte_unused struct rte_mbuf *tx_pkt,
#endif
#ifdef IAVF_TX_VLAN_QINQ_OFFLOAD
- if (ol_flags & (RTE_MBUF_F_TX_VLAN | RTE_MBUF_F_TX_QINQ)) {
+ if ((ol_flags & (RTE_MBUF_F_TX_VLAN | RTE_MBUF_F_TX_QINQ)) &&
+ (vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1)) {
td_cmd |= IAVF_TX_DESC_CMD_IL2TAG1;
*txd_hi |= ((uint64_t)tx_pkt->vlan_tci <<
IAVF_TXD_QW1_L2TAG1_SHIFT);