[v6,15/18] net/mana: add function to send packets

Message ID 1661899911-13086-16-git-send-email-longli@linuxonhyperv.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series Introduce Microsoft Azure Network Adatper (MANA) PMD |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Long Li Aug. 30, 2022, 10:51 p.m. UTC
  From: Long Li <longli@microsoft.com>

With all the TX queues created, MANA can send packets over those queues.

Signed-off-by: Long Li <longli@microsoft.com>
---
Change log:
v2:
Rename all camel cases.

 doc/guides/nics/features/mana.ini |   1 +
 drivers/net/mana/mana.c           |   1 +
 drivers/net/mana/mana.h           |  65 ++++++++
 drivers/net/mana/mp.c             |   1 +
 drivers/net/mana/tx.c             | 241 ++++++++++++++++++++++++++++++
 5 files changed, 309 insertions(+)
  

Comments

fengchengwen Sept. 2, 2022, 12:18 p.m. UTC | #1
On 2022/8/31 6:51, longli@linuxonhyperv.com wrote:
> From: Long Li <longli@microsoft.com>
> 
> With all the TX queues created, MANA can send packets over those queues.
> 
> Signed-off-by: Long Li <longli@microsoft.com>

...

>  }
> +
> +uint16_t mana_tx_burst(void *dpdk_txq, struct rte_mbuf **tx_pkts,
> +		       uint16_t nb_pkts)
> +{
> +	struct mana_txq *txq = dpdk_txq;
> +	struct mana_priv *priv = txq->priv;
> +	struct gdma_comp comp;
> +	int ret;
> +	void *db_page;
> +
> +	/* Process send completions from GDMA */
> +	while (gdma_poll_completion_queue(&txq->gdma_cq, &comp) == 1) {
> +		struct mana_txq_desc *desc =
> +			&txq->desc_ring[txq->desc_ring_tail];
> +		struct mana_tx_comp_oob *oob =
> +			(struct mana_tx_comp_oob *)&comp.completion_data[0];
> +
> +		if (oob->cqe_hdr.cqe_type != CQE_TX_OKAY) {
> +			DRV_LOG(ERR,
> +				"mana_tx_comp_oob cqe_type %u vendor_err %u",
> +				oob->cqe_hdr.cqe_type, oob->cqe_hdr.vendor_err);
> +			txq->stats.errors++;
> +		} else {
> +			DRV_LOG(DEBUG, "mana_tx_comp_oob CQE_TX_OKAY");
> +			txq->stats.packets++;
> +		}
> +
> +		if (!desc->pkt) {
> +			DRV_LOG(ERR, "mana_txq_desc has a NULL pkt");
> +		} else {
> +			txq->stats.bytes += desc->pkt->data_len;
> +			rte_pktmbuf_free(desc->pkt);
> +		}
> +
> +		desc->pkt = NULL;
> +		txq->desc_ring_tail = (txq->desc_ring_tail + 1) % txq->num_desc;
> +		txq->gdma_sq.tail += desc->wqe_size_in_bu;
> +	}
> +
> +	/* Post send requests to GDMA */
> +	uint16_t pkt_idx;
> +
> +	for (pkt_idx = 0; pkt_idx < nb_pkts; pkt_idx++) {
> +		struct rte_mbuf *m_pkt = tx_pkts[pkt_idx];
> +		struct rte_mbuf *m_seg = m_pkt;
> +		struct transmit_oob_v2 tx_oob = {0};
> +		struct one_sgl sgl = {0};
> +
> +		/* Drop the packet if it exceeds max segments */
> +		if (m_pkt->nb_segs > priv->max_send_sge) {
> +			DRV_LOG(ERR, "send packet segments %d exceeding max",
> +				m_pkt->nb_segs);

This branch violate rte_eth_tx_burst definition, which defined the return value is " *   The maximum number of packets to transmit."

Also I notice the driver didn't implement tx-prepare, which could hold such checking in framework's definition.

> +			continue;
> +		}
> +
> +		/* Fill in the oob */
> +		tx_oob.short_oob.packet_format = short_packet_format;
> +		tx_oob.short_oob.tx_is_outer_ipv4 =
> +			m_pkt->ol_flags & RTE_MBUF_F_TX_IPV4 ? 1 : 0;
> +		tx_oob.short_oob.tx_is_outer_ipv6 =
> +			m_pkt->ol_flags & RTE_MBUF_F_TX_IPV6 ? 1 : 0;
> +
> +		tx_oob.short_oob.tx_compute_IP_header_checksum =
> +			m_pkt->ol_flags & RTE_MBUF_F_TX_IP_CKSUM ? 1 : 0;
> +
> +		if ((m_pkt->ol_flags & RTE_MBUF_F_TX_L4_MASK) ==
> +				RTE_MBUF_F_TX_TCP_CKSUM) {
> +			struct rte_tcp_hdr *tcp_hdr;
> +
> +			/* HW needs partial TCP checksum */
> +
> +			tcp_hdr = rte_pktmbuf_mtod_offset(m_pkt,
> +					  struct rte_tcp_hdr *,
> +					  m_pkt->l2_len + m_pkt->l3_len);
> +
> +			if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV4) {
> +				struct rte_ipv4_hdr *ip_hdr;
> +
> +				ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
> +						struct rte_ipv4_hdr *,
> +						m_pkt->l2_len);
> +				tcp_hdr->cksum = rte_ipv4_phdr_cksum(ip_hdr,
> +							m_pkt->ol_flags);
> +
> +			} else if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV6) {
> +				struct rte_ipv6_hdr *ip_hdr;
> +
> +				ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
> +						struct rte_ipv6_hdr *,
> +						m_pkt->l2_len);
> +				tcp_hdr->cksum = rte_ipv6_phdr_cksum(ip_hdr,
> +							m_pkt->ol_flags);
> +			} else {
> +				DRV_LOG(ERR, "Invalid input for TCP CKSUM");
> +			}
> +
> +			tx_oob.short_oob.tx_compute_TCP_checksum = 1;
> +			tx_oob.short_oob.tx_transport_header_offset =
> +				m_pkt->l2_len + m_pkt->l3_len;
> +		}
> +
> +		if ((m_pkt->ol_flags & RTE_MBUF_F_TX_L4_MASK) ==
> +				RTE_MBUF_F_TX_UDP_CKSUM) {
> +			struct rte_udp_hdr *udp_hdr;
> +
> +			/* HW needs partial UDP checksum */
> +			udp_hdr = rte_pktmbuf_mtod_offset(m_pkt,
> +					struct rte_udp_hdr *,
> +					m_pkt->l2_len + m_pkt->l3_len);
> +
> +			if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV4) {
> +				struct rte_ipv4_hdr *ip_hdr;
> +
> +				ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
> +						struct rte_ipv4_hdr *,
> +						m_pkt->l2_len);
> +
> +				udp_hdr->dgram_cksum =
> +					rte_ipv4_phdr_cksum(ip_hdr,
> +							    m_pkt->ol_flags);
> +
> +			} else if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV6) {
> +				struct rte_ipv6_hdr *ip_hdr;
> +
> +				ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
> +						struct rte_ipv6_hdr *,
> +						m_pkt->l2_len);
> +
> +				udp_hdr->dgram_cksum =
> +					rte_ipv6_phdr_cksum(ip_hdr,
> +							    m_pkt->ol_flags);
> +
> +			} else {
> +				DRV_LOG(ERR, "Invalid input for UDP CKSUM");
> +			}
> +
> +			tx_oob.short_oob.tx_compute_UDP_checksum = 1;
> +		}

Why not have TSO process, which was reported support in dev_info_get ?

> +
> +		tx_oob.short_oob.suppress_tx_CQE_generation = 0;
> +		tx_oob.short_oob.VCQ_number = txq->gdma_cq.id;
> +
> +		tx_oob.short_oob.VSQ_frame_num =
> +			get_vsq_frame_num(txq->gdma_sq.id);
> +		tx_oob.short_oob.short_vport_offset = txq->tx_vp_offset;
> +
> +		DRV_LOG(DEBUG, "tx_oob packet_format %u ipv4 %u ipv6 %u",
> +			tx_oob.short_oob.packet_format,
> +			tx_oob.short_oob.tx_is_outer_ipv4,
> +			tx_oob.short_oob.tx_is_outer_ipv6);
> +
> +		DRV_LOG(DEBUG, "tx_oob checksum ip %u tcp %u udp %u offset %u",
> +			tx_oob.short_oob.tx_compute_IP_header_checksum,
> +			tx_oob.short_oob.tx_compute_TCP_checksum,
> +			tx_oob.short_oob.tx_compute_UDP_checksum,
> +			tx_oob.short_oob.tx_transport_header_offset);
> +
> +		DRV_LOG(DEBUG, "pkt[%d]: buf_addr 0x%p, nb_segs %d, pkt_len %d",
> +			pkt_idx, m_pkt->buf_addr, m_pkt->nb_segs,
> +			m_pkt->pkt_len);
> +
> +		/* Create SGL for packet data buffers */
> +		for (uint16_t seg_idx = 0; seg_idx < m_pkt->nb_segs; seg_idx++) {
> +			struct mana_mr_cache *mr =
> +				mana_find_pmd_mr(&txq->mr_btree, priv, m_seg);
> +
> +			if (!mr) {
> +				DRV_LOG(ERR, "failed to get MR, pkt_idx %u",
> +					pkt_idx);
> +				return pkt_idx;
> +			}
> +
> +			sgl.gdma_sgl[seg_idx].address =
> +				rte_cpu_to_le_64(rte_pktmbuf_mtod(m_seg,
> +								  uint64_t));
> +			sgl.gdma_sgl[seg_idx].size = m_seg->data_len;
> +			sgl.gdma_sgl[seg_idx].memory_key = mr->lkey;
> +
> +			DRV_LOG(DEBUG,
> +				"seg idx %u addr 0x%" PRIx64 " size %x key %x",
> +				seg_idx, sgl.gdma_sgl[seg_idx].address,
> +				sgl.gdma_sgl[seg_idx].size,
> +				sgl.gdma_sgl[seg_idx].memory_key);
> +
> +			m_seg = m_seg->next;
> +		}
> +
> +		struct gdma_work_request work_req = {0};
> +		struct gdma_posted_wqe_info wqe_info = {0};
> +
> +		work_req.gdma_header.struct_size = sizeof(work_req);
> +		wqe_info.gdma_header.struct_size = sizeof(wqe_info);
> +
> +		work_req.sgl = sgl.gdma_sgl;
> +		work_req.num_sgl_elements = m_pkt->nb_segs;
> +		work_req.inline_oob_size_in_bytes =
> +			sizeof(struct transmit_short_oob_v2);
> +		work_req.inline_oob_data = &tx_oob;
> +		work_req.flags = 0;
> +		work_req.client_data_unit = NOT_USING_CLIENT_DATA_UNIT;
> +
> +		ret = gdma_post_work_request(&txq->gdma_sq, &work_req,
> +					     &wqe_info);
> +		if (!ret) {
> +			struct mana_txq_desc *desc =
> +				&txq->desc_ring[txq->desc_ring_head];
> +
> +			/* Update queue for tracking pending requests */
> +			desc->pkt = m_pkt;
> +			desc->wqe_size_in_bu = wqe_info.wqe_size_in_bu;
> +			txq->desc_ring_head =
> +				(txq->desc_ring_head + 1) % txq->num_desc;
> +
> +			DRV_LOG(DEBUG, "nb_pkts %u pkt[%d] sent",
> +				nb_pkts, pkt_idx);
> +		} else {
> +			DRV_LOG(INFO, "pkt[%d] failed to post send ret %d",
> +				pkt_idx, ret);
> +			break;
> +		}
> +	}
> +
> +	/* Ring hardware door bell */
> +	db_page = priv->db_page;
> +	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
> +		struct rte_eth_dev *dev =
> +			&rte_eth_devices[priv->dev_data->port_id];
> +		struct mana_process_priv *process_priv = dev->process_private;
> +
> +		db_page = process_priv->db_page;
> +	}
> +
> +	ret = mana_ring_doorbell(db_page, gdma_queue_send,
> +				 txq->gdma_sq.id,
> +				 txq->gdma_sq.head *
> +					GDMA_WQE_ALIGNMENT_UNIT_SIZE);
> +	if (ret)
> +		DRV_LOG(ERR, "mana_ring_doorbell failed ret %d", ret);
> +
> +	return pkt_idx;
> +}
>
  
Long Li Sept. 2, 2022, 7:40 p.m. UTC | #2
> Subject: Re: [Patch v6 15/18] net/mana: add function to send packets
> 
> On 2022/8/31 6:51, longli@linuxonhyperv.com wrote:
> > From: Long Li <longli@microsoft.com>
> >
> > With all the TX queues created, MANA can send packets over those queues.
> >
> > Signed-off-by: Long Li <longli@microsoft.com>
> 
> ...
> 
> >  }
> > +
> > +uint16_t mana_tx_burst(void *dpdk_txq, struct rte_mbuf **tx_pkts,
> > +		       uint16_t nb_pkts)
> > +{
> > +	struct mana_txq *txq = dpdk_txq;
> > +	struct mana_priv *priv = txq->priv;
> > +	struct gdma_comp comp;
> > +	int ret;
> > +	void *db_page;
> > +
> > +	/* Process send completions from GDMA */
> > +	while (gdma_poll_completion_queue(&txq->gdma_cq, &comp) == 1) {
> > +		struct mana_txq_desc *desc =
> > +			&txq->desc_ring[txq->desc_ring_tail];
> > +		struct mana_tx_comp_oob *oob =
> > +			(struct mana_tx_comp_oob
> *)&comp.completion_data[0];
> > +
> > +		if (oob->cqe_hdr.cqe_type != CQE_TX_OKAY) {
> > +			DRV_LOG(ERR,
> > +				"mana_tx_comp_oob cqe_type %u
> vendor_err %u",
> > +				oob->cqe_hdr.cqe_type, oob-
> >cqe_hdr.vendor_err);
> > +			txq->stats.errors++;
> > +		} else {
> > +			DRV_LOG(DEBUG, "mana_tx_comp_oob
> CQE_TX_OKAY");
> > +			txq->stats.packets++;
> > +		}
> > +
> > +		if (!desc->pkt) {
> > +			DRV_LOG(ERR, "mana_txq_desc has a NULL pkt");
> > +		} else {
> > +			txq->stats.bytes += desc->pkt->data_len;
> > +			rte_pktmbuf_free(desc->pkt);
> > +		}
> > +
> > +		desc->pkt = NULL;
> > +		txq->desc_ring_tail = (txq->desc_ring_tail + 1) % txq->num_desc;
> > +		txq->gdma_sq.tail += desc->wqe_size_in_bu;
> > +	}
> > +
> > +	/* Post send requests to GDMA */
> > +	uint16_t pkt_idx;
> > +
> > +	for (pkt_idx = 0; pkt_idx < nb_pkts; pkt_idx++) {
> > +		struct rte_mbuf *m_pkt = tx_pkts[pkt_idx];
> > +		struct rte_mbuf *m_seg = m_pkt;
> > +		struct transmit_oob_v2 tx_oob = {0};
> > +		struct one_sgl sgl = {0};
> > +
> > +		/* Drop the packet if it exceeds max segments */
> > +		if (m_pkt->nb_segs > priv->max_send_sge) {
> > +			DRV_LOG(ERR, "send packet segments %d exceeding
> max",
> > +				m_pkt->nb_segs);
> 
> This branch violate rte_eth_tx_burst definition, which defined the return value is
> " *   The maximum number of packets to transmit."

Will fix this.

> 
> Also I notice the driver didn't implement tx-prepare, which could hold such
> checking in framework's definition.
> 
> > +			continue;
> > +		}
> > +
> > +		/* Fill in the oob */
> > +		tx_oob.short_oob.packet_format = short_packet_format;
> > +		tx_oob.short_oob.tx_is_outer_ipv4 =
> > +			m_pkt->ol_flags & RTE_MBUF_F_TX_IPV4 ? 1 : 0;
> > +		tx_oob.short_oob.tx_is_outer_ipv6 =
> > +			m_pkt->ol_flags & RTE_MBUF_F_TX_IPV6 ? 1 : 0;
> > +
> > +		tx_oob.short_oob.tx_compute_IP_header_checksum =
> > +			m_pkt->ol_flags & RTE_MBUF_F_TX_IP_CKSUM ? 1 : 0;
> > +
> > +		if ((m_pkt->ol_flags & RTE_MBUF_F_TX_L4_MASK) ==
> > +				RTE_MBUF_F_TX_TCP_CKSUM) {
> > +			struct rte_tcp_hdr *tcp_hdr;
> > +
> > +			/* HW needs partial TCP checksum */
> > +
> > +			tcp_hdr = rte_pktmbuf_mtod_offset(m_pkt,
> > +					  struct rte_tcp_hdr *,
> > +					  m_pkt->l2_len + m_pkt->l3_len);
> > +
> > +			if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV4) {
> > +				struct rte_ipv4_hdr *ip_hdr;
> > +
> > +				ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
> > +						struct rte_ipv4_hdr *,
> > +						m_pkt->l2_len);
> > +				tcp_hdr->cksum = rte_ipv4_phdr_cksum(ip_hdr,
> > +							m_pkt->ol_flags);
> > +
> > +			} else if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV6) {
> > +				struct rte_ipv6_hdr *ip_hdr;
> > +
> > +				ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
> > +						struct rte_ipv6_hdr *,
> > +						m_pkt->l2_len);
> > +				tcp_hdr->cksum = rte_ipv6_phdr_cksum(ip_hdr,
> > +							m_pkt->ol_flags);
> > +			} else {
> > +				DRV_LOG(ERR, "Invalid input for TCP CKSUM");
> > +			}
> > +
> > +			tx_oob.short_oob.tx_compute_TCP_checksum = 1;
> > +			tx_oob.short_oob.tx_transport_header_offset =
> > +				m_pkt->l2_len + m_pkt->l3_len;
> > +		}
> > +
> > +		if ((m_pkt->ol_flags & RTE_MBUF_F_TX_L4_MASK) ==
> > +				RTE_MBUF_F_TX_UDP_CKSUM) {
> > +			struct rte_udp_hdr *udp_hdr;
> > +
> > +			/* HW needs partial UDP checksum */
> > +			udp_hdr = rte_pktmbuf_mtod_offset(m_pkt,
> > +					struct rte_udp_hdr *,
> > +					m_pkt->l2_len + m_pkt->l3_len);
> > +
> > +			if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV4) {
> > +				struct rte_ipv4_hdr *ip_hdr;
> > +
> > +				ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
> > +						struct rte_ipv4_hdr *,
> > +						m_pkt->l2_len);
> > +
> > +				udp_hdr->dgram_cksum =
> > +					rte_ipv4_phdr_cksum(ip_hdr,
> > +							    m_pkt->ol_flags);
> > +
> > +			} else if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV6) {
> > +				struct rte_ipv6_hdr *ip_hdr;
> > +
> > +				ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
> > +						struct rte_ipv6_hdr *,
> > +						m_pkt->l2_len);
> > +
> > +				udp_hdr->dgram_cksum =
> > +					rte_ipv6_phdr_cksum(ip_hdr,
> > +							    m_pkt->ol_flags);
> > +
> > +			} else {
> > +				DRV_LOG(ERR, "Invalid input for UDP CKSUM");
> > +			}
> > +
> > +			tx_oob.short_oob.tx_compute_UDP_checksum = 1;
> > +		}
> 
> Why not have TSO process, which was reported support in dev_info_get ?

TSO is currently not implemented. I'm changing BNIC_DEV_TX_OFFLOAD_SUPPORT to fix this.

> 
> > +
> > +		tx_oob.short_oob.suppress_tx_CQE_generation = 0;
> > +		tx_oob.short_oob.VCQ_number = txq->gdma_cq.id;
> > +
> > +		tx_oob.short_oob.VSQ_frame_num =
> > +			get_vsq_frame_num(txq->gdma_sq.id);
> > +		tx_oob.short_oob.short_vport_offset = txq->tx_vp_offset;
> > +
> > +		DRV_LOG(DEBUG, "tx_oob packet_format %u ipv4 %u ipv6 %u",
> > +			tx_oob.short_oob.packet_format,
> > +			tx_oob.short_oob.tx_is_outer_ipv4,
> > +			tx_oob.short_oob.tx_is_outer_ipv6);
> > +
> > +		DRV_LOG(DEBUG, "tx_oob checksum ip %u tcp %u udp %u
> offset %u",
> > +			tx_oob.short_oob.tx_compute_IP_header_checksum,
> > +			tx_oob.short_oob.tx_compute_TCP_checksum,
> > +			tx_oob.short_oob.tx_compute_UDP_checksum,
> > +			tx_oob.short_oob.tx_transport_header_offset);
> > +
> > +		DRV_LOG(DEBUG, "pkt[%d]: buf_addr 0x%p, nb_segs %d,
> pkt_len %d",
> > +			pkt_idx, m_pkt->buf_addr, m_pkt->nb_segs,
> > +			m_pkt->pkt_len);
> > +
> > +		/* Create SGL for packet data buffers */
> > +		for (uint16_t seg_idx = 0; seg_idx < m_pkt->nb_segs; seg_idx++)
> {
> > +			struct mana_mr_cache *mr =
> > +				mana_find_pmd_mr(&txq->mr_btree, priv,
> m_seg);
> > +
> > +			if (!mr) {
> > +				DRV_LOG(ERR, "failed to get MR, pkt_idx %u",
> > +					pkt_idx);
> > +				return pkt_idx;
> > +			}
> > +
> > +			sgl.gdma_sgl[seg_idx].address =
> > +				rte_cpu_to_le_64(rte_pktmbuf_mtod(m_seg,
> > +								  uint64_t));
> > +			sgl.gdma_sgl[seg_idx].size = m_seg->data_len;
> > +			sgl.gdma_sgl[seg_idx].memory_key = mr->lkey;
> > +
> > +			DRV_LOG(DEBUG,
> > +				"seg idx %u addr 0x%" PRIx64 " size %x key %x",
> > +				seg_idx, sgl.gdma_sgl[seg_idx].address,
> > +				sgl.gdma_sgl[seg_idx].size,
> > +				sgl.gdma_sgl[seg_idx].memory_key);
> > +
> > +			m_seg = m_seg->next;
> > +		}
> > +
> > +		struct gdma_work_request work_req = {0};
> > +		struct gdma_posted_wqe_info wqe_info = {0};
> > +
> > +		work_req.gdma_header.struct_size = sizeof(work_req);
> > +		wqe_info.gdma_header.struct_size = sizeof(wqe_info);
> > +
> > +		work_req.sgl = sgl.gdma_sgl;
> > +		work_req.num_sgl_elements = m_pkt->nb_segs;
> > +		work_req.inline_oob_size_in_bytes =
> > +			sizeof(struct transmit_short_oob_v2);
> > +		work_req.inline_oob_data = &tx_oob;
> > +		work_req.flags = 0;
> > +		work_req.client_data_unit = NOT_USING_CLIENT_DATA_UNIT;
> > +
> > +		ret = gdma_post_work_request(&txq->gdma_sq, &work_req,
> > +					     &wqe_info);
> > +		if (!ret) {
> > +			struct mana_txq_desc *desc =
> > +				&txq->desc_ring[txq->desc_ring_head];
> > +
> > +			/* Update queue for tracking pending requests */
> > +			desc->pkt = m_pkt;
> > +			desc->wqe_size_in_bu = wqe_info.wqe_size_in_bu;
> > +			txq->desc_ring_head =
> > +				(txq->desc_ring_head + 1) % txq->num_desc;
> > +
> > +			DRV_LOG(DEBUG, "nb_pkts %u pkt[%d] sent",
> > +				nb_pkts, pkt_idx);
> > +		} else {
> > +			DRV_LOG(INFO, "pkt[%d] failed to post send ret %d",
> > +				pkt_idx, ret);
> > +			break;
> > +		}
> > +	}
> > +
> > +	/* Ring hardware door bell */
> > +	db_page = priv->db_page;
> > +	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
> > +		struct rte_eth_dev *dev =
> > +			&rte_eth_devices[priv->dev_data->port_id];
> > +		struct mana_process_priv *process_priv = dev->process_private;
> > +
> > +		db_page = process_priv->db_page;
> > +	}
> > +
> > +	ret = mana_ring_doorbell(db_page, gdma_queue_send,
> > +				 txq->gdma_sq.id,
> > +				 txq->gdma_sq.head *
> > +					GDMA_WQE_ALIGNMENT_UNIT_SIZE);
> > +	if (ret)
> > +		DRV_LOG(ERR, "mana_ring_doorbell failed ret %d", ret);
> > +
> > +	return pkt_idx;
> > +}
> >
  

Patch

diff --git a/doc/guides/nics/features/mana.ini b/doc/guides/nics/features/mana.ini
index fdbf22d335..7922816d66 100644
--- a/doc/guides/nics/features/mana.ini
+++ b/doc/guides/nics/features/mana.ini
@@ -4,6 +4,7 @@ 
 ; Refer to default.ini for the full list of available PMD features.
 ;
 [Features]
+Free Tx mbuf on demand = Y
 Link status          = P
 Linux                = Y
 L3 checksum offload  = Y
diff --git a/drivers/net/mana/mana.c b/drivers/net/mana/mana.c
index c349822991..0dcd3f3124 100644
--- a/drivers/net/mana/mana.c
+++ b/drivers/net/mana/mana.c
@@ -950,6 +950,7 @@  static int mana_pci_probe_mac(struct rte_pci_driver *pci_drv __rte_unused,
 				/* fd is no not used after mapping doorbell */
 				close(fd);
 
+				eth_dev->tx_pkt_burst = mana_tx_burst;
 				eth_dev->rx_pkt_burst = mana_rx_burst;
 
 				rte_spinlock_lock(&mana_shared_data->lock);
diff --git a/drivers/net/mana/mana.h b/drivers/net/mana/mana.h
index bafc4d6082..b4056bd50b 100644
--- a/drivers/net/mana/mana.h
+++ b/drivers/net/mana/mana.h
@@ -62,6 +62,47 @@  struct mana_shared_data {
 
 #define NOT_USING_CLIENT_DATA_UNIT 0
 
+enum tx_packet_format_v2 {
+	short_packet_format = 0,
+	long_packet_format = 1
+};
+
+struct transmit_short_oob_v2 {
+	enum tx_packet_format_v2 packet_format : 2;
+	uint32_t tx_is_outer_ipv4 : 1;
+	uint32_t tx_is_outer_ipv6 : 1;
+	uint32_t tx_compute_IP_header_checksum : 1;
+	uint32_t tx_compute_TCP_checksum : 1;
+	uint32_t tx_compute_UDP_checksum : 1;
+	uint32_t suppress_tx_CQE_generation : 1;
+	uint32_t VCQ_number : 24;
+	uint32_t tx_transport_header_offset : 10;
+	uint32_t VSQ_frame_num : 14;
+	uint32_t short_vport_offset : 8;
+};
+
+struct transmit_long_oob_v2 {
+	uint32_t tx_is_encapsulated_packet : 1;
+	uint32_t tx_inner_is_ipv6 : 1;
+	uint32_t tx_inner_TCP_options_present : 1;
+	uint32_t inject_vlan_prior_tag : 1;
+	uint32_t reserved1 : 12;
+	uint32_t priority_code_point : 3;
+	uint32_t drop_eligible_indicator : 1;
+	uint32_t vlan_identifier : 12;
+	uint32_t tx_inner_frame_offset : 10;
+	uint32_t tx_inner_IP_header_relative_offset : 6;
+	uint32_t long_vport_offset : 12;
+	uint32_t reserved3 : 4;
+	uint32_t reserved4 : 32;
+	uint32_t reserved5 : 32;
+};
+
+struct transmit_oob_v2 {
+	struct transmit_short_oob_v2 short_oob;
+	struct transmit_long_oob_v2 long_oob;
+};
+
 enum gdma_queue_types {
 	gdma_queue_type_invalid = 0,
 	gdma_queue_send,
@@ -183,6 +224,17 @@  enum mana_cqe_type {
 	CQE_RX_COALESCED_4              = 2,
 	CQE_RX_OBJECT_FENCE             = 3,
 	CQE_RX_TRUNCATED                = 4,
+
+	CQE_TX_OKAY                     = 32,
+	CQE_TX_SA_DROP                  = 33,
+	CQE_TX_MTU_DROP                 = 34,
+	CQE_TX_INVALID_OOB              = 35,
+	CQE_TX_INVALID_ETH_TYPE         = 36,
+	CQE_TX_HDR_PROCESSING_ERROR     = 37,
+	CQE_TX_VF_DISABLED              = 38,
+	CQE_TX_VPORT_IDX_OUT_OF_RANGE   = 39,
+	CQE_TX_VPORT_DISABLED           = 40,
+	CQE_TX_VLAN_TAGGING_VIOLATION   = 41,
 };
 
 struct mana_cqe_header {
@@ -191,6 +243,17 @@  struct mana_cqe_header {
 	uint32_t vendor_err  : 24;
 }; /* HW DATA */
 
+struct mana_tx_comp_oob {
+	struct mana_cqe_header cqe_hdr;
+
+	uint32_t tx_data_offset;
+
+	uint32_t tx_sgl_offset       : 5;
+	uint32_t tx_wqe_offset       : 27;
+
+	uint32_t reserved[12];
+}; /* HW DATA */
+
 /* NDIS HASH Types */
 #define BIT(nr)		(1 << (nr))
 #define NDIS_HASH_IPV4          BIT(0)
@@ -407,6 +470,8 @@  uint8_t *gdma_get_wqe_pointer(struct mana_gdma_queue *queue);
 
 uint16_t mana_rx_burst(void *dpdk_rxq, struct rte_mbuf **rx_pkts,
 		       uint16_t pkts_n);
+uint16_t mana_tx_burst(void *dpdk_txq, struct rte_mbuf **tx_pkts,
+		       uint16_t pkts_n);
 
 uint16_t mana_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts,
 			       uint16_t pkts_n);
diff --git a/drivers/net/mana/mp.c b/drivers/net/mana/mp.c
index 36a88c561a..da9c0f36a1 100644
--- a/drivers/net/mana/mp.c
+++ b/drivers/net/mana/mp.c
@@ -138,6 +138,7 @@  static int mana_mp_secondary_handle(const struct rte_mp_msg *mp_msg,
 	case MANA_MP_REQ_START_RXTX:
 		DRV_LOG(INFO, "Port %u starting datapath", dev->data->port_id);
 
+		dev->tx_pkt_burst = mana_tx_burst;
 		dev->rx_pkt_burst = mana_rx_burst;
 
 		rte_mb();
diff --git a/drivers/net/mana/tx.c b/drivers/net/mana/tx.c
index fbeea40ef2..0aefab1b6c 100644
--- a/drivers/net/mana/tx.c
+++ b/drivers/net/mana/tx.c
@@ -161,3 +161,244 @@  static inline uint16_t get_vsq_frame_num(uint32_t vsq)
 	v.gdma_txq_id = vsq;
 	return v.vsq_frame;
 }
+
+uint16_t mana_tx_burst(void *dpdk_txq, struct rte_mbuf **tx_pkts,
+		       uint16_t nb_pkts)
+{
+	struct mana_txq *txq = dpdk_txq;
+	struct mana_priv *priv = txq->priv;
+	struct gdma_comp comp;
+	int ret;
+	void *db_page;
+
+	/* Process send completions from GDMA */
+	while (gdma_poll_completion_queue(&txq->gdma_cq, &comp) == 1) {
+		struct mana_txq_desc *desc =
+			&txq->desc_ring[txq->desc_ring_tail];
+		struct mana_tx_comp_oob *oob =
+			(struct mana_tx_comp_oob *)&comp.completion_data[0];
+
+		if (oob->cqe_hdr.cqe_type != CQE_TX_OKAY) {
+			DRV_LOG(ERR,
+				"mana_tx_comp_oob cqe_type %u vendor_err %u",
+				oob->cqe_hdr.cqe_type, oob->cqe_hdr.vendor_err);
+			txq->stats.errors++;
+		} else {
+			DRV_LOG(DEBUG, "mana_tx_comp_oob CQE_TX_OKAY");
+			txq->stats.packets++;
+		}
+
+		if (!desc->pkt) {
+			DRV_LOG(ERR, "mana_txq_desc has a NULL pkt");
+		} else {
+			txq->stats.bytes += desc->pkt->data_len;
+			rte_pktmbuf_free(desc->pkt);
+		}
+
+		desc->pkt = NULL;
+		txq->desc_ring_tail = (txq->desc_ring_tail + 1) % txq->num_desc;
+		txq->gdma_sq.tail += desc->wqe_size_in_bu;
+	}
+
+	/* Post send requests to GDMA */
+	uint16_t pkt_idx;
+
+	for (pkt_idx = 0; pkt_idx < nb_pkts; pkt_idx++) {
+		struct rte_mbuf *m_pkt = tx_pkts[pkt_idx];
+		struct rte_mbuf *m_seg = m_pkt;
+		struct transmit_oob_v2 tx_oob = {0};
+		struct one_sgl sgl = {0};
+
+		/* Drop the packet if it exceeds max segments */
+		if (m_pkt->nb_segs > priv->max_send_sge) {
+			DRV_LOG(ERR, "send packet segments %d exceeding max",
+				m_pkt->nb_segs);
+			continue;
+		}
+
+		/* Fill in the oob */
+		tx_oob.short_oob.packet_format = short_packet_format;
+		tx_oob.short_oob.tx_is_outer_ipv4 =
+			m_pkt->ol_flags & RTE_MBUF_F_TX_IPV4 ? 1 : 0;
+		tx_oob.short_oob.tx_is_outer_ipv6 =
+			m_pkt->ol_flags & RTE_MBUF_F_TX_IPV6 ? 1 : 0;
+
+		tx_oob.short_oob.tx_compute_IP_header_checksum =
+			m_pkt->ol_flags & RTE_MBUF_F_TX_IP_CKSUM ? 1 : 0;
+
+		if ((m_pkt->ol_flags & RTE_MBUF_F_TX_L4_MASK) ==
+				RTE_MBUF_F_TX_TCP_CKSUM) {
+			struct rte_tcp_hdr *tcp_hdr;
+
+			/* HW needs partial TCP checksum */
+
+			tcp_hdr = rte_pktmbuf_mtod_offset(m_pkt,
+					  struct rte_tcp_hdr *,
+					  m_pkt->l2_len + m_pkt->l3_len);
+
+			if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV4) {
+				struct rte_ipv4_hdr *ip_hdr;
+
+				ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
+						struct rte_ipv4_hdr *,
+						m_pkt->l2_len);
+				tcp_hdr->cksum = rte_ipv4_phdr_cksum(ip_hdr,
+							m_pkt->ol_flags);
+
+			} else if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV6) {
+				struct rte_ipv6_hdr *ip_hdr;
+
+				ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
+						struct rte_ipv6_hdr *,
+						m_pkt->l2_len);
+				tcp_hdr->cksum = rte_ipv6_phdr_cksum(ip_hdr,
+							m_pkt->ol_flags);
+			} else {
+				DRV_LOG(ERR, "Invalid input for TCP CKSUM");
+			}
+
+			tx_oob.short_oob.tx_compute_TCP_checksum = 1;
+			tx_oob.short_oob.tx_transport_header_offset =
+				m_pkt->l2_len + m_pkt->l3_len;
+		}
+
+		if ((m_pkt->ol_flags & RTE_MBUF_F_TX_L4_MASK) ==
+				RTE_MBUF_F_TX_UDP_CKSUM) {
+			struct rte_udp_hdr *udp_hdr;
+
+			/* HW needs partial UDP checksum */
+			udp_hdr = rte_pktmbuf_mtod_offset(m_pkt,
+					struct rte_udp_hdr *,
+					m_pkt->l2_len + m_pkt->l3_len);
+
+			if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV4) {
+				struct rte_ipv4_hdr *ip_hdr;
+
+				ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
+						struct rte_ipv4_hdr *,
+						m_pkt->l2_len);
+
+				udp_hdr->dgram_cksum =
+					rte_ipv4_phdr_cksum(ip_hdr,
+							    m_pkt->ol_flags);
+
+			} else if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV6) {
+				struct rte_ipv6_hdr *ip_hdr;
+
+				ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
+						struct rte_ipv6_hdr *,
+						m_pkt->l2_len);
+
+				udp_hdr->dgram_cksum =
+					rte_ipv6_phdr_cksum(ip_hdr,
+							    m_pkt->ol_flags);
+
+			} else {
+				DRV_LOG(ERR, "Invalid input for UDP CKSUM");
+			}
+
+			tx_oob.short_oob.tx_compute_UDP_checksum = 1;
+		}
+
+		tx_oob.short_oob.suppress_tx_CQE_generation = 0;
+		tx_oob.short_oob.VCQ_number = txq->gdma_cq.id;
+
+		tx_oob.short_oob.VSQ_frame_num =
+			get_vsq_frame_num(txq->gdma_sq.id);
+		tx_oob.short_oob.short_vport_offset = txq->tx_vp_offset;
+
+		DRV_LOG(DEBUG, "tx_oob packet_format %u ipv4 %u ipv6 %u",
+			tx_oob.short_oob.packet_format,
+			tx_oob.short_oob.tx_is_outer_ipv4,
+			tx_oob.short_oob.tx_is_outer_ipv6);
+
+		DRV_LOG(DEBUG, "tx_oob checksum ip %u tcp %u udp %u offset %u",
+			tx_oob.short_oob.tx_compute_IP_header_checksum,
+			tx_oob.short_oob.tx_compute_TCP_checksum,
+			tx_oob.short_oob.tx_compute_UDP_checksum,
+			tx_oob.short_oob.tx_transport_header_offset);
+
+		DRV_LOG(DEBUG, "pkt[%d]: buf_addr 0x%p, nb_segs %d, pkt_len %d",
+			pkt_idx, m_pkt->buf_addr, m_pkt->nb_segs,
+			m_pkt->pkt_len);
+
+		/* Create SGL for packet data buffers */
+		for (uint16_t seg_idx = 0; seg_idx < m_pkt->nb_segs; seg_idx++) {
+			struct mana_mr_cache *mr =
+				mana_find_pmd_mr(&txq->mr_btree, priv, m_seg);
+
+			if (!mr) {
+				DRV_LOG(ERR, "failed to get MR, pkt_idx %u",
+					pkt_idx);
+				return pkt_idx;
+			}
+
+			sgl.gdma_sgl[seg_idx].address =
+				rte_cpu_to_le_64(rte_pktmbuf_mtod(m_seg,
+								  uint64_t));
+			sgl.gdma_sgl[seg_idx].size = m_seg->data_len;
+			sgl.gdma_sgl[seg_idx].memory_key = mr->lkey;
+
+			DRV_LOG(DEBUG,
+				"seg idx %u addr 0x%" PRIx64 " size %x key %x",
+				seg_idx, sgl.gdma_sgl[seg_idx].address,
+				sgl.gdma_sgl[seg_idx].size,
+				sgl.gdma_sgl[seg_idx].memory_key);
+
+			m_seg = m_seg->next;
+		}
+
+		struct gdma_work_request work_req = {0};
+		struct gdma_posted_wqe_info wqe_info = {0};
+
+		work_req.gdma_header.struct_size = sizeof(work_req);
+		wqe_info.gdma_header.struct_size = sizeof(wqe_info);
+
+		work_req.sgl = sgl.gdma_sgl;
+		work_req.num_sgl_elements = m_pkt->nb_segs;
+		work_req.inline_oob_size_in_bytes =
+			sizeof(struct transmit_short_oob_v2);
+		work_req.inline_oob_data = &tx_oob;
+		work_req.flags = 0;
+		work_req.client_data_unit = NOT_USING_CLIENT_DATA_UNIT;
+
+		ret = gdma_post_work_request(&txq->gdma_sq, &work_req,
+					     &wqe_info);
+		if (!ret) {
+			struct mana_txq_desc *desc =
+				&txq->desc_ring[txq->desc_ring_head];
+
+			/* Update queue for tracking pending requests */
+			desc->pkt = m_pkt;
+			desc->wqe_size_in_bu = wqe_info.wqe_size_in_bu;
+			txq->desc_ring_head =
+				(txq->desc_ring_head + 1) % txq->num_desc;
+
+			DRV_LOG(DEBUG, "nb_pkts %u pkt[%d] sent",
+				nb_pkts, pkt_idx);
+		} else {
+			DRV_LOG(INFO, "pkt[%d] failed to post send ret %d",
+				pkt_idx, ret);
+			break;
+		}
+	}
+
+	/* Ring hardware door bell */
+	db_page = priv->db_page;
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+		struct rte_eth_dev *dev =
+			&rte_eth_devices[priv->dev_data->port_id];
+		struct mana_process_priv *process_priv = dev->process_private;
+
+		db_page = process_priv->db_page;
+	}
+
+	ret = mana_ring_doorbell(db_page, gdma_queue_send,
+				 txq->gdma_sq.id,
+				 txq->gdma_sq.head *
+					GDMA_WQE_ALIGNMENT_UNIT_SIZE);
+	if (ret)
+		DRV_LOG(ERR, "mana_ring_doorbell failed ret %d", ret);
+
+	return pkt_idx;
+}