On 10/20/2022 11:36 AM, Junfeng Guo wrote:
>
> Add Rx/Tx of GQI_QPL queue format and GQI_RDA queue format.
>
> Signed-off-by: Xiaoyun Li <xiaoyun.li@intel.com>
> Signed-off-by: Junfeng Guo <junfeng.guo@intel.com>
<...>
> +uint16_t
> +gve_rx_burst(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
> +{
> + volatile struct gve_rx_desc *rxr, *rxd;
> + struct gve_rx_queue *rxq = rx_queue;
> + uint16_t rx_id = rxq->rx_tail;
> + struct rte_mbuf *rxe;
> + uint16_t nb_rx, len;
> + uint64_t addr;
> + uint16_t i;
> +
> + rxr = rxq->rx_desc_ring;
> + nb_rx = 0;
> +
> + for (i = 0; i < nb_pkts; i++) {
> + rxd = &rxr[rx_id];
> + if (GVE_SEQNO(rxd->flags_seq) != rxq->expected_seqno)
> + break;
> +
> + if (rxd->flags_seq & GVE_RXF_ERR)
> + continue;
> +
> + len = rte_be_to_cpu_16(rxd->len) - GVE_RX_PAD;
> + rxe = rxq->sw_ring[rx_id];
> + if (rxq->is_gqi_qpl) {
> + addr = (uint64_t)(rxq->qpl->mz->addr) + rx_id * PAGE_SIZE + GVE_RX_PAD;
> + rte_memcpy((void *)((size_t)rxe->buf_addr + rxe->data_off),
> + (void *)(size_t)addr, len);
> + }
> + rxe->pkt_len = len;
> + rxe->data_len = len;
> + rxe->port = rxq->port_id;
> + rxe->ol_flags = 0;
> +
> + if (rxd->flags_seq & GVE_RXF_TCP)
> + rxe->packet_type |= RTE_PTYPE_L4_TCP;
> + if (rxd->flags_seq & GVE_RXF_UDP)
> + rxe->packet_type |= RTE_PTYPE_L4_UDP;
> + if (rxd->flags_seq & GVE_RXF_IPV4)
> + rxe->packet_type |= RTE_PTYPE_L3_IPV4;
> + if (rxd->flags_seq & GVE_RXF_IPV6)
> + rxe->packet_type |= RTE_PTYPE_L3_IPV6;
> +
> + if (gve_needs_rss(rxd->flags_seq)) {
> + rxe->ol_flags |= RTE_MBUF_F_RX_RSS_HASH;
> + rxe->hash.rss = rte_be_to_cpu_32(rxd->rss_hash);
You are updating "m->hash.rss" anyway, and if this is without and cost
you can force enable as done in previous version:
'dev->data->dev_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH;'
<...>
> +static inline void
> +gve_free_bulk_mbuf(struct rte_mbuf **txep, int num)
> +{
> + struct rte_mbuf *m, *free[GVE_TX_MAX_FREE_SZ];
> + int nb_free = 0;
> + int i, s;
> +
> + if (unlikely(num == 0))
> + return;
> +
> + /* Find the 1st mbuf which needs to be free */
> + for (s = 0; s < num; s++) {
> + if (txep[s] != NULL) {
> + m = rte_pktmbuf_prefree_seg(txep[s]);
> + if (m != NULL)
> + break;
> + }
'}' indentation is wrong.
<...>
> +static inline void
> +gve_tx_clean_swr_qpl(struct gve_tx_queue *txq)
> +{
> + uint32_t start = txq->sw_ntc;
> + uint32_t ntc, nb_clean;
> +
> + ntc = txq->sw_tail;
> +
> + if (ntc == start)
> + return;
> +
> + /* if wrap around, free twice. */
> + if (ntc < start) {
> + nb_clean = txq->nb_tx_desc - start;
> + if (nb_clean > GVE_TX_MAX_FREE_SZ)
> + nb_clean = GVE_TX_MAX_FREE_SZ;
> + gve_free_bulk_mbuf(&txq->sw_ring[start], nb_clean);
> +
> + txq->sw_nb_free += nb_clean;
> + start += nb_clean;
> + if (start == txq->nb_tx_desc)
> + start = 0;
> + txq->sw_ntc = start;
> + }
> +
> + if (ntc > start) {
may be can drop the 'if' block, since "ntc == start" and "ntc < start"
cases already covered.
<...>
> +uint16_t
> +gve_tx_burst(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
> +{
> + struct gve_tx_queue *txq = tx_queue;
> +
> + if (txq->is_gqi_qpl)
> + return gve_tx_burst_qpl(tx_queue, tx_pkts, nb_pkts);
> +
> + return gve_tx_burst_ra(tx_queue, tx_pkts, nb_pkts);
> +}
> +
Can there be mix of queue types?
If only one queue type is supported in specific config, perhaps burst
function can be set during configuration, to prevent if check on datapath.
This is optimization and can be done later, it doesn't have to be in the
set.
> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@amd.com>
> Sent: Thursday, October 20, 2022 22:47
> To: Guo, Junfeng <junfeng.guo@intel.com>; Zhang, Qi Z
> <qi.z.zhang@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>; Xing,
> Beilei <beilei.xing@intel.com>
> Cc: dev@dpdk.org; Li, Xiaoyun <xiaoyun.li@intel.com>;
> awogbemila@google.com; Richardson, Bruce
> <bruce.richardson@intel.com>; hemant.agrawal@nxp.com;
> stephen@networkplumber.org; Xia, Chenbo <chenbo.xia@intel.com>;
> Zhang, Helin <helin.zhang@intel.com>
> Subject: Re: [PATCH v6 8/8] net/gve: add support for Rx/Tx
>
> On 10/20/2022 11:36 AM, Junfeng Guo wrote:
>
> >
> > Add Rx/Tx of GQI_QPL queue format and GQI_RDA queue format.
> >
> > Signed-off-by: Xiaoyun Li <xiaoyun.li@intel.com>
> > Signed-off-by: Junfeng Guo <junfeng.guo@intel.com>
>
> <...>
>
> > +uint16_t
> > +gve_rx_burst(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t
> nb_pkts)
> > +{
> > + volatile struct gve_rx_desc *rxr, *rxd;
> > + struct gve_rx_queue *rxq = rx_queue;
> > + uint16_t rx_id = rxq->rx_tail;
> > + struct rte_mbuf *rxe;
> > + uint16_t nb_rx, len;
> > + uint64_t addr;
> > + uint16_t i;
> > +
> > + rxr = rxq->rx_desc_ring;
> > + nb_rx = 0;
> > +
> > + for (i = 0; i < nb_pkts; i++) {
> > + rxd = &rxr[rx_id];
> > + if (GVE_SEQNO(rxd->flags_seq) != rxq->expected_seqno)
> > + break;
> > +
> > + if (rxd->flags_seq & GVE_RXF_ERR)
> > + continue;
> > +
> > + len = rte_be_to_cpu_16(rxd->len) - GVE_RX_PAD;
> > + rxe = rxq->sw_ring[rx_id];
> > + if (rxq->is_gqi_qpl) {
> > + addr = (uint64_t)(rxq->qpl->mz->addr) + rx_id * PAGE_SIZE
> + GVE_RX_PAD;
> > + rte_memcpy((void *)((size_t)rxe->buf_addr + rxe-
> >data_off),
> > + (void *)(size_t)addr, len);
> > + }
> > + rxe->pkt_len = len;
> > + rxe->data_len = len;
> > + rxe->port = rxq->port_id;
> > + rxe->ol_flags = 0;
> > +
> > + if (rxd->flags_seq & GVE_RXF_TCP)
> > + rxe->packet_type |= RTE_PTYPE_L4_TCP;
> > + if (rxd->flags_seq & GVE_RXF_UDP)
> > + rxe->packet_type |= RTE_PTYPE_L4_UDP;
> > + if (rxd->flags_seq & GVE_RXF_IPV4)
> > + rxe->packet_type |= RTE_PTYPE_L3_IPV4;
> > + if (rxd->flags_seq & GVE_RXF_IPV6)
> > + rxe->packet_type |= RTE_PTYPE_L3_IPV6;
> > +
> > + if (gve_needs_rss(rxd->flags_seq)) {
> > + rxe->ol_flags |= RTE_MBUF_F_RX_RSS_HASH;
> > + rxe->hash.rss = rte_be_to_cpu_32(rxd->rss_hash);
>
> You are updating "m->hash.rss" anyway, and if this is without and cost
> you can force enable as done in previous version:
> 'dev->data->dev_conf.rxmode.offloads |=
> RTE_ETH_RX_OFFLOAD_RSS_HASH;'
Yes, it seems the RSS is enabled by default with no obvious perf loss.
There is no RSS init stage. We will also force enable this in the dev config
stage. Thanks!
>
> <...>
>
> > +static inline void
> > +gve_free_bulk_mbuf(struct rte_mbuf **txep, int num)
> > +{
> > + struct rte_mbuf *m, *free[GVE_TX_MAX_FREE_SZ];
> > + int nb_free = 0;
> > + int i, s;
> > +
> > + if (unlikely(num == 0))
> > + return;
> > +
> > + /* Find the 1st mbuf which needs to be free */
> > + for (s = 0; s < num; s++) {
> > + if (txep[s] != NULL) {
> > + m = rte_pktmbuf_prefree_seg(txep[s]);
> > + if (m != NULL)
> > + break;
> > + }
>
> '}' indentation is wrong.
Thanks for the catch! Will update in the coming version.
>
> <...>
>
> > +static inline void
> > +gve_tx_clean_swr_qpl(struct gve_tx_queue *txq)
> > +{
> > + uint32_t start = txq->sw_ntc;
> > + uint32_t ntc, nb_clean;
> > +
> > + ntc = txq->sw_tail;
> > +
> > + if (ntc == start)
> > + return;
> > +
> > + /* if wrap around, free twice. */
> > + if (ntc < start) {
> > + nb_clean = txq->nb_tx_desc - start;
> > + if (nb_clean > GVE_TX_MAX_FREE_SZ)
> > + nb_clean = GVE_TX_MAX_FREE_SZ;
> > + gve_free_bulk_mbuf(&txq->sw_ring[start], nb_clean);
> > +
> > + txq->sw_nb_free += nb_clean;
> > + start += nb_clean;
> > + if (start == txq->nb_tx_desc)
> > + start = 0;
> > + txq->sw_ntc = start;
> > + }
> > +
> > + if (ntc > start) {
>
> may be can drop the 'if' block, since "ntc == start" and "ntc < start"
> cases already covered.
Sure, will drop this 'if' in the coming version. Thanks!
>
> <...>
>
> > +uint16_t
> > +gve_tx_burst(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t
> nb_pkts)
> > +{
> > + struct gve_tx_queue *txq = tx_queue;
> > +
> > + if (txq->is_gqi_qpl)
> > + return gve_tx_burst_qpl(tx_queue, tx_pkts, nb_pkts);
> > +
> > + return gve_tx_burst_ra(tx_queue, tx_pkts, nb_pkts);
> > +}
> > +
>
> Can there be mix of queue types?
> If only one queue type is supported in specific config, perhaps burst
> function can be set during configuration, to prevent if check on datapath.
>
> This is optimization and can be done later, it doesn't have to be in the
> set.
Maybe not. There are three queue format types, and we can get the real
used one via adminq with 'priv->queue_format'. So there may not have
mix of the queue types. And currently only GQI_QPL and GQI_RDA queue
format are supported in PMD. Also, only GQI_QPL queue format is in use
on GCP since GQI_RDA hasn't been released in production.
Will do some refactors for the queue types later. Thanks for the advice!
@@ -7,7 +7,9 @@
Speed capabilities = Y
Link status = Y
MTU update = Y
+TSO = Y
RSS hash = Y
+L4 checksum offload = Y
Linux = Y
x86-32 = Y
x86-64 = Y
@@ -62,8 +62,12 @@ In this release, the GVE PMD provides the basic functionality of packet
reception and transmission.
Supported features of the GVE PMD are:
+- Multiple queues for TX and RX
- Receiver Side Scaling (RSS)
+- TSO offload
- Link state information
+- TX multi-segments (Scatter TX)
+- Tx UDP/TCP/SCTP Checksum
Currently, only GQI_QPL and GQI_RDA queue format are supported in PMD.
Jumbo Frame is not supported in PMD for now. It'll be added in the future
@@ -284,7 +284,13 @@ gve_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
dev_info->min_mtu = RTE_ETHER_MIN_MTU;
dev_info->rx_offload_capa = 0;
- dev_info->tx_offload_capa = 0;
+ dev_info->tx_offload_capa =
+ RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
+ RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |
+ RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
+ RTE_ETH_TX_OFFLOAD_TCP_CKSUM |
+ RTE_ETH_TX_OFFLOAD_SCTP_CKSUM |
+ RTE_ETH_TX_OFFLOAD_TCP_TSO;
if (priv->queue_format == GVE_DQO_RDA_FORMAT)
dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_TCP_LRO;
@@ -633,6 +639,13 @@ gve_dev_init(struct rte_eth_dev *eth_dev)
if (err)
return err;
+ if (gve_is_gqi(priv)) {
+ eth_dev->rx_pkt_burst = gve_rx_burst;
+ eth_dev->tx_pkt_burst = gve_tx_burst;
+ } else {
+ PMD_DRV_LOG(ERR, "DQO_RDA is not implemented and will be added in the future");
+ }
+
eth_dev->data->mac_addrs = rte_zmalloc("gve_mac", sizeof(struct rte_ether_addr), 0);
if (!eth_dev->data->mac_addrs) {
PMD_DRV_LOG(ERR, "Failed to allocate memory to store mac address");
@@ -34,6 +34,18 @@ union gve_tx_desc {
struct gve_tx_seg_desc seg; /* subsequent descs for a packet */
};
+/* Offload features */
+union gve_tx_offload {
+ uint64_t data;
+ struct {
+ uint64_t l2_len:7; /* L2 (MAC) Header Length. */
+ uint64_t l3_len:9; /* L3 (IP) Header Length. */
+ uint64_t l4_len:8; /* L4 Header Length. */
+ uint64_t tso_segsz:16; /* TCP TSO segment size */
+ /* uint64_t unused : 24; */
+ };
+};
+
struct gve_tx_iovec {
uint32_t iov_base; /* offset in fifo */
uint32_t iov_len;
@@ -274,4 +286,10 @@ gve_stop_tx_queues(struct rte_eth_dev *dev);
void
gve_stop_rx_queues(struct rte_eth_dev *dev);
+uint16_t
+gve_rx_burst(void *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+
+uint16_t
+gve_tx_burst(void *txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+
#endif /* _GVE_ETHDEV_H_ */
@@ -5,6 +5,148 @@
#include "gve_ethdev.h"
#include "base/gve_adminq.h"
+static inline void
+gve_rx_refill(struct gve_rx_queue *rxq)
+{
+ uint16_t mask = rxq->nb_rx_desc - 1;
+ uint16_t idx = rxq->next_avail & mask;
+ uint32_t next_avail = rxq->next_avail;
+ uint16_t nb_alloc, i;
+ struct rte_mbuf *nmb;
+ int diag;
+
+ /* wrap around */
+ nb_alloc = rxq->nb_rx_desc - idx;
+ if (nb_alloc <= rxq->nb_avail) {
+ diag = rte_pktmbuf_alloc_bulk(rxq->mpool, &rxq->sw_ring[idx], nb_alloc);
+ if (diag < 0) {
+ for (i = 0; i < nb_alloc; i++) {
+ nmb = rte_pktmbuf_alloc(rxq->mpool);
+ if (!nmb)
+ break;
+ rxq->sw_ring[idx + i] = nmb;
+ }
+ if (i != nb_alloc)
+ nb_alloc = i;
+ }
+ rxq->nb_avail -= nb_alloc;
+ next_avail += nb_alloc;
+
+ /* queue page list mode doesn't need real refill. */
+ if (rxq->is_gqi_qpl) {
+ idx += nb_alloc;
+ } else {
+ for (i = 0; i < nb_alloc; i++) {
+ nmb = rxq->sw_ring[idx];
+ rxq->rx_data_ring[idx].addr =
+ rte_cpu_to_be_64(rte_mbuf_data_iova(nmb));
+ idx++;
+ }
+ }
+ if (idx == rxq->nb_rx_desc)
+ idx = 0;
+ }
+
+ if (rxq->nb_avail > 0) {
+ nb_alloc = rxq->nb_avail;
+ if (rxq->nb_rx_desc < idx + rxq->nb_avail)
+ nb_alloc = rxq->nb_rx_desc - idx;
+ diag = rte_pktmbuf_alloc_bulk(rxq->mpool, &rxq->sw_ring[idx], nb_alloc);
+ if (diag < 0) {
+ for (i = 0; i < nb_alloc; i++) {
+ nmb = rte_pktmbuf_alloc(rxq->mpool);
+ if (!nmb)
+ break;
+ rxq->sw_ring[idx + i] = nmb;
+ }
+ nb_alloc = i;
+ }
+ rxq->nb_avail -= nb_alloc;
+ next_avail += nb_alloc;
+
+ if (!rxq->is_gqi_qpl) {
+ for (i = 0; i < nb_alloc; i++) {
+ nmb = rxq->sw_ring[idx];
+ rxq->rx_data_ring[idx].addr =
+ rte_cpu_to_be_64(rte_mbuf_data_iova(nmb));
+ idx++;
+ }
+ }
+ }
+
+ if (next_avail != rxq->next_avail) {
+ rte_write32(rte_cpu_to_be_32(next_avail), rxq->qrx_tail);
+ rxq->next_avail = next_avail;
+ }
+}
+
+uint16_t
+gve_rx_burst(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ volatile struct gve_rx_desc *rxr, *rxd;
+ struct gve_rx_queue *rxq = rx_queue;
+ uint16_t rx_id = rxq->rx_tail;
+ struct rte_mbuf *rxe;
+ uint16_t nb_rx, len;
+ uint64_t addr;
+ uint16_t i;
+
+ rxr = rxq->rx_desc_ring;
+ nb_rx = 0;
+
+ for (i = 0; i < nb_pkts; i++) {
+ rxd = &rxr[rx_id];
+ if (GVE_SEQNO(rxd->flags_seq) != rxq->expected_seqno)
+ break;
+
+ if (rxd->flags_seq & GVE_RXF_ERR)
+ continue;
+
+ len = rte_be_to_cpu_16(rxd->len) - GVE_RX_PAD;
+ rxe = rxq->sw_ring[rx_id];
+ if (rxq->is_gqi_qpl) {
+ addr = (uint64_t)(rxq->qpl->mz->addr) + rx_id * PAGE_SIZE + GVE_RX_PAD;
+ rte_memcpy((void *)((size_t)rxe->buf_addr + rxe->data_off),
+ (void *)(size_t)addr, len);
+ }
+ rxe->pkt_len = len;
+ rxe->data_len = len;
+ rxe->port = rxq->port_id;
+ rxe->ol_flags = 0;
+
+ if (rxd->flags_seq & GVE_RXF_TCP)
+ rxe->packet_type |= RTE_PTYPE_L4_TCP;
+ if (rxd->flags_seq & GVE_RXF_UDP)
+ rxe->packet_type |= RTE_PTYPE_L4_UDP;
+ if (rxd->flags_seq & GVE_RXF_IPV4)
+ rxe->packet_type |= RTE_PTYPE_L3_IPV4;
+ if (rxd->flags_seq & GVE_RXF_IPV6)
+ rxe->packet_type |= RTE_PTYPE_L3_IPV6;
+
+ if (gve_needs_rss(rxd->flags_seq)) {
+ rxe->ol_flags |= RTE_MBUF_F_RX_RSS_HASH;
+ rxe->hash.rss = rte_be_to_cpu_32(rxd->rss_hash);
+ }
+
+ rxq->expected_seqno = gve_next_seqno(rxq->expected_seqno);
+
+ rx_id++;
+ if (rx_id == rxq->nb_rx_desc)
+ rx_id = 0;
+
+ rx_pkts[nb_rx] = rxe;
+ nb_rx++;
+ }
+
+ rxq->nb_avail += nb_rx;
+ rxq->rx_tail = rx_id;
+
+ if (rxq->nb_avail > rxq->free_thresh)
+ gve_rx_refill(rxq);
+
+ return nb_rx;
+}
+
static inline void
gve_reset_rxq(struct gve_rx_queue *rxq)
{
@@ -5,6 +5,461 @@
#include "gve_ethdev.h"
#include "base/gve_adminq.h"
+static inline void
+gve_free_bulk_mbuf(struct rte_mbuf **txep, int num)
+{
+ struct rte_mbuf *m, *free[GVE_TX_MAX_FREE_SZ];
+ int nb_free = 0;
+ int i, s;
+
+ if (unlikely(num == 0))
+ return;
+
+ /* Find the 1st mbuf which needs to be free */
+ for (s = 0; s < num; s++) {
+ if (txep[s] != NULL) {
+ m = rte_pktmbuf_prefree_seg(txep[s]);
+ if (m != NULL)
+ break;
+ }
+ }
+
+ if (s == num)
+ return;
+
+ free[0] = m;
+ nb_free = 1;
+ for (i = s + 1; i < num; i++) {
+ if (likely(txep[i] != NULL)) {
+ m = rte_pktmbuf_prefree_seg(txep[i]);
+ if (likely(m != NULL)) {
+ if (likely(m->pool == free[0]->pool)) {
+ free[nb_free++] = m;
+ } else {
+ rte_mempool_put_bulk(free[0]->pool, (void *)free, nb_free);
+ free[0] = m;
+ nb_free = 1;
+ }
+ }
+ txep[i] = NULL;
+ }
+ }
+ rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
+}
+
+static inline void
+gve_tx_clean(struct gve_tx_queue *txq)
+{
+ uint16_t mask = txq->nb_tx_desc - 1;
+ uint32_t start = txq->next_to_clean & mask;
+ uint32_t ntc, nb_clean, i;
+ struct gve_tx_iovec *iov;
+
+ ntc = rte_be_to_cpu_32(rte_read32(txq->qtx_head));
+ ntc = ntc & mask;
+
+ if (ntc == start)
+ return;
+
+ /* if wrap around, free twice. */
+ if (ntc < start) {
+ nb_clean = txq->nb_tx_desc - start;
+ if (nb_clean > GVE_TX_MAX_FREE_SZ)
+ nb_clean = GVE_TX_MAX_FREE_SZ;
+ if (txq->is_gqi_qpl) {
+ for (i = start; i < start + nb_clean; i++) {
+ iov = &txq->iov_ring[i];
+ txq->fifo_avail += iov->iov_len;
+ iov->iov_base = 0;
+ iov->iov_len = 0;
+ }
+ } else {
+ gve_free_bulk_mbuf(&txq->sw_ring[start], nb_clean);
+ }
+ txq->nb_free += nb_clean;
+ start += nb_clean;
+ if (start == txq->nb_tx_desc)
+ start = 0;
+ txq->next_to_clean += nb_clean;
+ }
+
+ if (ntc > start) {
+ nb_clean = ntc - start;
+ if (nb_clean > GVE_TX_MAX_FREE_SZ)
+ nb_clean = GVE_TX_MAX_FREE_SZ;
+ if (txq->is_gqi_qpl) {
+ for (i = start; i < start + nb_clean; i++) {
+ iov = &txq->iov_ring[i];
+ txq->fifo_avail += iov->iov_len;
+ iov->iov_base = 0;
+ iov->iov_len = 0;
+ }
+ } else {
+ gve_free_bulk_mbuf(&txq->sw_ring[start], nb_clean);
+ }
+ txq->nb_free += nb_clean;
+ txq->next_to_clean += nb_clean;
+ }
+}
+
+static inline void
+gve_tx_clean_swr_qpl(struct gve_tx_queue *txq)
+{
+ uint32_t start = txq->sw_ntc;
+ uint32_t ntc, nb_clean;
+
+ ntc = txq->sw_tail;
+
+ if (ntc == start)
+ return;
+
+ /* if wrap around, free twice. */
+ if (ntc < start) {
+ nb_clean = txq->nb_tx_desc - start;
+ if (nb_clean > GVE_TX_MAX_FREE_SZ)
+ nb_clean = GVE_TX_MAX_FREE_SZ;
+ gve_free_bulk_mbuf(&txq->sw_ring[start], nb_clean);
+
+ txq->sw_nb_free += nb_clean;
+ start += nb_clean;
+ if (start == txq->nb_tx_desc)
+ start = 0;
+ txq->sw_ntc = start;
+ }
+
+ if (ntc > start) {
+ nb_clean = ntc - start;
+ if (nb_clean > GVE_TX_MAX_FREE_SZ)
+ nb_clean = GVE_TX_MAX_FREE_SZ;
+ gve_free_bulk_mbuf(&txq->sw_ring[start], nb_clean);
+ txq->sw_nb_free += nb_clean;
+ start += nb_clean;
+ txq->sw_ntc = start;
+ }
+}
+
+static inline void
+gve_tx_fill_pkt_desc(volatile union gve_tx_desc *desc, struct rte_mbuf *mbuf,
+ uint8_t desc_cnt, uint16_t len, uint64_t addr)
+{
+ uint64_t csum_l4 = mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK;
+ uint8_t l4_csum_offset = 0;
+ uint8_t l4_hdr_offset = 0;
+
+ if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)
+ csum_l4 |= RTE_MBUF_F_TX_TCP_CKSUM;
+
+ switch (csum_l4) {
+ case RTE_MBUF_F_TX_TCP_CKSUM:
+ l4_csum_offset = offsetof(struct rte_tcp_hdr, cksum);
+ l4_hdr_offset = mbuf->l2_len + mbuf->l3_len;
+ break;
+ case RTE_MBUF_F_TX_UDP_CKSUM:
+ l4_csum_offset = offsetof(struct rte_udp_hdr, dgram_cksum);
+ l4_hdr_offset = mbuf->l2_len + mbuf->l3_len;
+ break;
+ case RTE_MBUF_F_TX_SCTP_CKSUM:
+ l4_csum_offset = offsetof(struct rte_sctp_hdr, cksum);
+ l4_hdr_offset = mbuf->l2_len + mbuf->l3_len;
+ break;
+ }
+
+ if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
+ desc->pkt.type_flags = GVE_TXD_TSO | GVE_TXF_L4CSUM;
+ desc->pkt.l4_csum_offset = l4_csum_offset >> 1;
+ desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1;
+ } else if (mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
+ desc->pkt.type_flags = GVE_TXD_STD | GVE_TXF_L4CSUM;
+ desc->pkt.l4_csum_offset = l4_csum_offset >> 1;
+ desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1;
+ } else {
+ desc->pkt.type_flags = GVE_TXD_STD;
+ desc->pkt.l4_csum_offset = 0;
+ desc->pkt.l4_hdr_offset = 0;
+ }
+ desc->pkt.desc_cnt = desc_cnt;
+ desc->pkt.len = rte_cpu_to_be_16(mbuf->pkt_len);
+ desc->pkt.seg_len = rte_cpu_to_be_16(len);
+ desc->pkt.seg_addr = rte_cpu_to_be_64(addr);
+}
+
+static inline void
+gve_tx_fill_seg_desc(volatile union gve_tx_desc *desc, uint64_t ol_flags,
+ union gve_tx_offload tx_offload,
+ uint16_t len, uint64_t addr)
+{
+ desc->seg.type_flags = GVE_TXD_SEG;
+ if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
+ if (ol_flags & RTE_MBUF_F_TX_IPV6)
+ desc->seg.type_flags |= GVE_TXSF_IPV6;
+ desc->seg.l3_offset = tx_offload.l2_len >> 1;
+ desc->seg.mss = rte_cpu_to_be_16(tx_offload.tso_segsz);
+ }
+ desc->seg.seg_len = rte_cpu_to_be_16(len);
+ desc->seg.seg_addr = rte_cpu_to_be_64(addr);
+}
+
+static inline bool
+is_fifo_avail(struct gve_tx_queue *txq, uint16_t len)
+{
+ if (txq->fifo_avail < len)
+ return false;
+ /* Don't split segment. */
+ if (txq->fifo_head + len > txq->fifo_size &&
+ txq->fifo_size - txq->fifo_head + len > txq->fifo_avail)
+ return false;
+ return true;
+}
+static inline uint64_t
+gve_tx_alloc_from_fifo(struct gve_tx_queue *txq, uint16_t tx_id, uint16_t len)
+{
+ uint32_t head = txq->fifo_head;
+ uint32_t size = txq->fifo_size;
+ struct gve_tx_iovec *iov;
+ uint32_t aligned_head;
+ uint32_t iov_len = 0;
+ uint64_t fifo_addr;
+
+ iov = &txq->iov_ring[tx_id];
+
+ /* Don't split segment */
+ if (head + len > size) {
+ iov_len += (size - head);
+ head = 0;
+ }
+
+ fifo_addr = head;
+ iov_len += len;
+ iov->iov_base = head;
+
+ /* Re-align to a cacheline for next head */
+ head += len;
+ aligned_head = RTE_ALIGN(head, RTE_CACHE_LINE_SIZE);
+ iov_len += (aligned_head - head);
+ iov->iov_len = iov_len;
+
+ if (aligned_head == txq->fifo_size)
+ aligned_head = 0;
+ txq->fifo_head = aligned_head;
+ txq->fifo_avail -= iov_len;
+
+ return fifo_addr;
+}
+
+static inline uint16_t
+gve_tx_burst_qpl(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ union gve_tx_offload tx_offload = {0};
+ volatile union gve_tx_desc *txr, *txd;
+ struct gve_tx_queue *txq = tx_queue;
+ struct rte_mbuf **sw_ring = txq->sw_ring;
+ uint16_t mask = txq->nb_tx_desc - 1;
+ uint16_t tx_id = txq->tx_tail & mask;
+ uint64_t ol_flags, addr, fifo_addr;
+ uint32_t tx_tail = txq->tx_tail;
+ struct rte_mbuf *tx_pkt, *first;
+ uint16_t sw_id = txq->sw_tail;
+ uint16_t nb_used, i;
+ uint16_t nb_tx = 0;
+ uint32_t hlen;
+
+ txr = txq->tx_desc_ring;
+
+ if (txq->nb_free < txq->free_thresh || txq->fifo_avail == 0)
+ gve_tx_clean(txq);
+
+ if (txq->sw_nb_free < txq->free_thresh)
+ gve_tx_clean_swr_qpl(txq);
+
+ for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
+ tx_pkt = *tx_pkts++;
+ ol_flags = tx_pkt->ol_flags;
+
+ if (txq->sw_nb_free < tx_pkt->nb_segs) {
+ gve_tx_clean_swr_qpl(txq);
+ if (txq->sw_nb_free < tx_pkt->nb_segs)
+ goto end_of_tx;
+ }
+
+ /* Even for multi-segs, use 1 qpl buf for data */
+ nb_used = 1;
+ if (ol_flags & RTE_MBUF_F_TX_TCP_SEG)
+ nb_used++;
+
+ if (txq->nb_free < nb_used)
+ goto end_of_tx;
+
+ tx_offload.l2_len = tx_pkt->l2_len;
+ tx_offload.l3_len = tx_pkt->l3_len;
+ tx_offload.l4_len = tx_pkt->l4_len;
+ tx_offload.tso_segsz = tx_pkt->tso_segsz;
+
+ first = tx_pkt;
+ txd = &txr[tx_id];
+ hlen = ol_flags & RTE_MBUF_F_TX_TCP_SEG ?
+ (uint32_t)(tx_offload.l2_len + tx_offload.l3_len + tx_offload.l4_len) :
+ tx_pkt->pkt_len;
+
+ sw_ring[sw_id] = tx_pkt;
+ if (!is_fifo_avail(txq, hlen)) {
+ gve_tx_clean(txq);
+ if (!is_fifo_avail(txq, hlen))
+ goto end_of_tx;
+ }
+ addr = (uint64_t)(tx_pkt->buf_addr) + tx_pkt->data_off;
+ fifo_addr = gve_tx_alloc_from_fifo(txq, tx_id, hlen);
+
+ /* For TSO, check if there's enough fifo space for data first */
+ if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
+ if (!is_fifo_avail(txq, tx_pkt->pkt_len - hlen)) {
+ gve_tx_clean(txq);
+ if (!is_fifo_avail(txq, tx_pkt->pkt_len - hlen))
+ goto end_of_tx;
+ }
+ }
+ if (tx_pkt->nb_segs == 1 || ol_flags & RTE_MBUF_F_TX_TCP_SEG)
+ rte_memcpy((void *)(size_t)(fifo_addr + txq->fifo_base),
+ (void *)(size_t)addr, hlen);
+ else
+ rte_pktmbuf_read(tx_pkt, 0, hlen,
+ (void *)(size_t)(fifo_addr + txq->fifo_base));
+ gve_tx_fill_pkt_desc(txd, tx_pkt, nb_used, hlen, fifo_addr);
+
+ if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
+ tx_id = (tx_id + 1) & mask;
+ txd = &txr[tx_id];
+ addr = (uint64_t)(tx_pkt->buf_addr) + tx_pkt->data_off + hlen;
+ fifo_addr = gve_tx_alloc_from_fifo(txq, tx_id, tx_pkt->pkt_len - hlen);
+ if (tx_pkt->nb_segs == 1)
+ rte_memcpy((void *)(size_t)(fifo_addr + txq->fifo_base),
+ (void *)(size_t)addr,
+ tx_pkt->pkt_len - hlen);
+ else
+ rte_pktmbuf_read(tx_pkt, hlen, tx_pkt->pkt_len - hlen,
+ (void *)(size_t)(fifo_addr + txq->fifo_base));
+
+ gve_tx_fill_seg_desc(txd, ol_flags, tx_offload,
+ tx_pkt->pkt_len - hlen, fifo_addr);
+ }
+
+ /* record mbuf in sw_ring for free */
+ for (i = 1; i < first->nb_segs; i++) {
+ sw_id = (sw_id + 1) & mask;
+ tx_pkt = tx_pkt->next;
+ sw_ring[sw_id] = tx_pkt;
+ }
+
+ sw_id = (sw_id + 1) & mask;
+ tx_id = (tx_id + 1) & mask;
+
+ txq->nb_free -= nb_used;
+ txq->sw_nb_free -= first->nb_segs;
+ tx_tail += nb_used;
+ }
+
+end_of_tx:
+ if (nb_tx) {
+ rte_write32(rte_cpu_to_be_32(tx_tail), txq->qtx_tail);
+ txq->tx_tail = tx_tail;
+ txq->sw_tail = sw_id;
+ }
+
+ return nb_tx;
+}
+
+static inline uint16_t
+gve_tx_burst_ra(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ union gve_tx_offload tx_offload = {0};
+ volatile union gve_tx_desc *txr, *txd;
+ struct gve_tx_queue *txq = tx_queue;
+ struct rte_mbuf **sw_ring = txq->sw_ring;
+ uint16_t mask = txq->nb_tx_desc - 1;
+ uint16_t tx_id = txq->tx_tail & mask;
+ uint32_t tx_tail = txq->tx_tail;
+ struct rte_mbuf *tx_pkt, *first;
+ uint16_t nb_used, hlen, i;
+ uint64_t ol_flags, addr;
+ uint16_t nb_tx = 0;
+
+ txr = txq->tx_desc_ring;
+
+ if (txq->nb_free < txq->free_thresh)
+ gve_tx_clean(txq);
+
+ for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
+ tx_pkt = *tx_pkts++;
+ ol_flags = tx_pkt->ol_flags;
+
+ nb_used = tx_pkt->nb_segs;
+ if (ol_flags & RTE_MBUF_F_TX_TCP_SEG)
+ nb_used++;
+
+ if (txq->nb_free < nb_used)
+ goto end_of_tx;
+
+ tx_offload.l2_len = tx_pkt->l2_len;
+ tx_offload.l3_len = tx_pkt->l3_len;
+ tx_offload.l4_len = tx_pkt->l4_len;
+ tx_offload.tso_segsz = tx_pkt->tso_segsz;
+
+ first = tx_pkt;
+ txd = &txr[tx_id];
+
+ hlen = ol_flags & RTE_MBUF_F_TX_TCP_SEG ?
+ (uint32_t)(tx_offload.l2_len + tx_offload.l3_len + tx_offload.l4_len) :
+ tx_pkt->pkt_len;
+ /*
+ * if tso, the driver needs to fill 2 descs for 1 mbuf
+ * so only put this mbuf into the 1st tx entry in sw ring
+ */
+ sw_ring[tx_id] = tx_pkt;
+ addr = rte_mbuf_data_iova(tx_pkt);
+ gve_tx_fill_pkt_desc(txd, tx_pkt, nb_used, hlen, addr);
+
+ if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
+ tx_id = (tx_id + 1) & mask;
+ txd = &txr[tx_id];
+ addr = rte_mbuf_data_iova(tx_pkt) + hlen;
+ gve_tx_fill_seg_desc(txd, ol_flags, tx_offload,
+ tx_pkt->data_len - hlen, addr);
+ }
+
+ for (i = 1; i < first->nb_segs; i++) {
+ tx_id = (tx_id + 1) & mask;
+ txd = &txr[tx_id];
+ tx_pkt = tx_pkt->next;
+ sw_ring[tx_id] = tx_pkt;
+ addr = rte_mbuf_data_iova(tx_pkt);
+ gve_tx_fill_seg_desc(txd, ol_flags, tx_offload,
+ tx_pkt->data_len, addr);
+ }
+ tx_id = (tx_id + 1) & mask;
+
+ txq->nb_free -= nb_used;
+ tx_tail += nb_used;
+ }
+
+end_of_tx:
+ if (nb_tx) {
+ rte_write32(rte_cpu_to_be_32(tx_tail), txq->qtx_tail);
+ txq->tx_tail = tx_tail;
+ }
+
+ return nb_tx;
+}
+
+uint16_t
+gve_tx_burst(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ struct gve_tx_queue *txq = tx_queue;
+
+ if (txq->is_gqi_qpl)
+ return gve_tx_burst_qpl(tx_queue, tx_pkts, nb_pkts);
+
+ return gve_tx_burst_ra(tx_queue, tx_pkts, nb_pkts);
+}
+
static inline void
gve_reset_txq(struct gve_tx_queue *txq)
{