[v6,15/18] net/mana: add function to send packets
Checks
Commit Message
From: Long Li <longli@microsoft.com>
With all the TX queues created, MANA can send packets over those queues.
Signed-off-by: Long Li <longli@microsoft.com>
---
Change log:
v2:
Rename all camel cases.
doc/guides/nics/features/mana.ini | 1 +
drivers/net/mana/mana.c | 1 +
drivers/net/mana/mana.h | 65 ++++++++
drivers/net/mana/mp.c | 1 +
drivers/net/mana/tx.c | 241 ++++++++++++++++++++++++++++++
5 files changed, 309 insertions(+)
Comments
On 2022/8/31 6:51, longli@linuxonhyperv.com wrote:
> From: Long Li <longli@microsoft.com>
>
> With all the TX queues created, MANA can send packets over those queues.
>
> Signed-off-by: Long Li <longli@microsoft.com>
...
> }
> +
> +uint16_t mana_tx_burst(void *dpdk_txq, struct rte_mbuf **tx_pkts,
> + uint16_t nb_pkts)
> +{
> + struct mana_txq *txq = dpdk_txq;
> + struct mana_priv *priv = txq->priv;
> + struct gdma_comp comp;
> + int ret;
> + void *db_page;
> +
> + /* Process send completions from GDMA */
> + while (gdma_poll_completion_queue(&txq->gdma_cq, &comp) == 1) {
> + struct mana_txq_desc *desc =
> + &txq->desc_ring[txq->desc_ring_tail];
> + struct mana_tx_comp_oob *oob =
> + (struct mana_tx_comp_oob *)&comp.completion_data[0];
> +
> + if (oob->cqe_hdr.cqe_type != CQE_TX_OKAY) {
> + DRV_LOG(ERR,
> + "mana_tx_comp_oob cqe_type %u vendor_err %u",
> + oob->cqe_hdr.cqe_type, oob->cqe_hdr.vendor_err);
> + txq->stats.errors++;
> + } else {
> + DRV_LOG(DEBUG, "mana_tx_comp_oob CQE_TX_OKAY");
> + txq->stats.packets++;
> + }
> +
> + if (!desc->pkt) {
> + DRV_LOG(ERR, "mana_txq_desc has a NULL pkt");
> + } else {
> + txq->stats.bytes += desc->pkt->data_len;
> + rte_pktmbuf_free(desc->pkt);
> + }
> +
> + desc->pkt = NULL;
> + txq->desc_ring_tail = (txq->desc_ring_tail + 1) % txq->num_desc;
> + txq->gdma_sq.tail += desc->wqe_size_in_bu;
> + }
> +
> + /* Post send requests to GDMA */
> + uint16_t pkt_idx;
> +
> + for (pkt_idx = 0; pkt_idx < nb_pkts; pkt_idx++) {
> + struct rte_mbuf *m_pkt = tx_pkts[pkt_idx];
> + struct rte_mbuf *m_seg = m_pkt;
> + struct transmit_oob_v2 tx_oob = {0};
> + struct one_sgl sgl = {0};
> +
> + /* Drop the packet if it exceeds max segments */
> + if (m_pkt->nb_segs > priv->max_send_sge) {
> + DRV_LOG(ERR, "send packet segments %d exceeding max",
> + m_pkt->nb_segs);
This branch violate rte_eth_tx_burst definition, which defined the return value is " * The maximum number of packets to transmit."
Also I notice the driver didn't implement tx-prepare, which could hold such checking in framework's definition.
> + continue;
> + }
> +
> + /* Fill in the oob */
> + tx_oob.short_oob.packet_format = short_packet_format;
> + tx_oob.short_oob.tx_is_outer_ipv4 =
> + m_pkt->ol_flags & RTE_MBUF_F_TX_IPV4 ? 1 : 0;
> + tx_oob.short_oob.tx_is_outer_ipv6 =
> + m_pkt->ol_flags & RTE_MBUF_F_TX_IPV6 ? 1 : 0;
> +
> + tx_oob.short_oob.tx_compute_IP_header_checksum =
> + m_pkt->ol_flags & RTE_MBUF_F_TX_IP_CKSUM ? 1 : 0;
> +
> + if ((m_pkt->ol_flags & RTE_MBUF_F_TX_L4_MASK) ==
> + RTE_MBUF_F_TX_TCP_CKSUM) {
> + struct rte_tcp_hdr *tcp_hdr;
> +
> + /* HW needs partial TCP checksum */
> +
> + tcp_hdr = rte_pktmbuf_mtod_offset(m_pkt,
> + struct rte_tcp_hdr *,
> + m_pkt->l2_len + m_pkt->l3_len);
> +
> + if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV4) {
> + struct rte_ipv4_hdr *ip_hdr;
> +
> + ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
> + struct rte_ipv4_hdr *,
> + m_pkt->l2_len);
> + tcp_hdr->cksum = rte_ipv4_phdr_cksum(ip_hdr,
> + m_pkt->ol_flags);
> +
> + } else if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV6) {
> + struct rte_ipv6_hdr *ip_hdr;
> +
> + ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
> + struct rte_ipv6_hdr *,
> + m_pkt->l2_len);
> + tcp_hdr->cksum = rte_ipv6_phdr_cksum(ip_hdr,
> + m_pkt->ol_flags);
> + } else {
> + DRV_LOG(ERR, "Invalid input for TCP CKSUM");
> + }
> +
> + tx_oob.short_oob.tx_compute_TCP_checksum = 1;
> + tx_oob.short_oob.tx_transport_header_offset =
> + m_pkt->l2_len + m_pkt->l3_len;
> + }
> +
> + if ((m_pkt->ol_flags & RTE_MBUF_F_TX_L4_MASK) ==
> + RTE_MBUF_F_TX_UDP_CKSUM) {
> + struct rte_udp_hdr *udp_hdr;
> +
> + /* HW needs partial UDP checksum */
> + udp_hdr = rte_pktmbuf_mtod_offset(m_pkt,
> + struct rte_udp_hdr *,
> + m_pkt->l2_len + m_pkt->l3_len);
> +
> + if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV4) {
> + struct rte_ipv4_hdr *ip_hdr;
> +
> + ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
> + struct rte_ipv4_hdr *,
> + m_pkt->l2_len);
> +
> + udp_hdr->dgram_cksum =
> + rte_ipv4_phdr_cksum(ip_hdr,
> + m_pkt->ol_flags);
> +
> + } else if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV6) {
> + struct rte_ipv6_hdr *ip_hdr;
> +
> + ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
> + struct rte_ipv6_hdr *,
> + m_pkt->l2_len);
> +
> + udp_hdr->dgram_cksum =
> + rte_ipv6_phdr_cksum(ip_hdr,
> + m_pkt->ol_flags);
> +
> + } else {
> + DRV_LOG(ERR, "Invalid input for UDP CKSUM");
> + }
> +
> + tx_oob.short_oob.tx_compute_UDP_checksum = 1;
> + }
Why not have TSO process, which was reported support in dev_info_get ?
> +
> + tx_oob.short_oob.suppress_tx_CQE_generation = 0;
> + tx_oob.short_oob.VCQ_number = txq->gdma_cq.id;
> +
> + tx_oob.short_oob.VSQ_frame_num =
> + get_vsq_frame_num(txq->gdma_sq.id);
> + tx_oob.short_oob.short_vport_offset = txq->tx_vp_offset;
> +
> + DRV_LOG(DEBUG, "tx_oob packet_format %u ipv4 %u ipv6 %u",
> + tx_oob.short_oob.packet_format,
> + tx_oob.short_oob.tx_is_outer_ipv4,
> + tx_oob.short_oob.tx_is_outer_ipv6);
> +
> + DRV_LOG(DEBUG, "tx_oob checksum ip %u tcp %u udp %u offset %u",
> + tx_oob.short_oob.tx_compute_IP_header_checksum,
> + tx_oob.short_oob.tx_compute_TCP_checksum,
> + tx_oob.short_oob.tx_compute_UDP_checksum,
> + tx_oob.short_oob.tx_transport_header_offset);
> +
> + DRV_LOG(DEBUG, "pkt[%d]: buf_addr 0x%p, nb_segs %d, pkt_len %d",
> + pkt_idx, m_pkt->buf_addr, m_pkt->nb_segs,
> + m_pkt->pkt_len);
> +
> + /* Create SGL for packet data buffers */
> + for (uint16_t seg_idx = 0; seg_idx < m_pkt->nb_segs; seg_idx++) {
> + struct mana_mr_cache *mr =
> + mana_find_pmd_mr(&txq->mr_btree, priv, m_seg);
> +
> + if (!mr) {
> + DRV_LOG(ERR, "failed to get MR, pkt_idx %u",
> + pkt_idx);
> + return pkt_idx;
> + }
> +
> + sgl.gdma_sgl[seg_idx].address =
> + rte_cpu_to_le_64(rte_pktmbuf_mtod(m_seg,
> + uint64_t));
> + sgl.gdma_sgl[seg_idx].size = m_seg->data_len;
> + sgl.gdma_sgl[seg_idx].memory_key = mr->lkey;
> +
> + DRV_LOG(DEBUG,
> + "seg idx %u addr 0x%" PRIx64 " size %x key %x",
> + seg_idx, sgl.gdma_sgl[seg_idx].address,
> + sgl.gdma_sgl[seg_idx].size,
> + sgl.gdma_sgl[seg_idx].memory_key);
> +
> + m_seg = m_seg->next;
> + }
> +
> + struct gdma_work_request work_req = {0};
> + struct gdma_posted_wqe_info wqe_info = {0};
> +
> + work_req.gdma_header.struct_size = sizeof(work_req);
> + wqe_info.gdma_header.struct_size = sizeof(wqe_info);
> +
> + work_req.sgl = sgl.gdma_sgl;
> + work_req.num_sgl_elements = m_pkt->nb_segs;
> + work_req.inline_oob_size_in_bytes =
> + sizeof(struct transmit_short_oob_v2);
> + work_req.inline_oob_data = &tx_oob;
> + work_req.flags = 0;
> + work_req.client_data_unit = NOT_USING_CLIENT_DATA_UNIT;
> +
> + ret = gdma_post_work_request(&txq->gdma_sq, &work_req,
> + &wqe_info);
> + if (!ret) {
> + struct mana_txq_desc *desc =
> + &txq->desc_ring[txq->desc_ring_head];
> +
> + /* Update queue for tracking pending requests */
> + desc->pkt = m_pkt;
> + desc->wqe_size_in_bu = wqe_info.wqe_size_in_bu;
> + txq->desc_ring_head =
> + (txq->desc_ring_head + 1) % txq->num_desc;
> +
> + DRV_LOG(DEBUG, "nb_pkts %u pkt[%d] sent",
> + nb_pkts, pkt_idx);
> + } else {
> + DRV_LOG(INFO, "pkt[%d] failed to post send ret %d",
> + pkt_idx, ret);
> + break;
> + }
> + }
> +
> + /* Ring hardware door bell */
> + db_page = priv->db_page;
> + if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
> + struct rte_eth_dev *dev =
> + &rte_eth_devices[priv->dev_data->port_id];
> + struct mana_process_priv *process_priv = dev->process_private;
> +
> + db_page = process_priv->db_page;
> + }
> +
> + ret = mana_ring_doorbell(db_page, gdma_queue_send,
> + txq->gdma_sq.id,
> + txq->gdma_sq.head *
> + GDMA_WQE_ALIGNMENT_UNIT_SIZE);
> + if (ret)
> + DRV_LOG(ERR, "mana_ring_doorbell failed ret %d", ret);
> +
> + return pkt_idx;
> +}
>
> Subject: Re: [Patch v6 15/18] net/mana: add function to send packets
>
> On 2022/8/31 6:51, longli@linuxonhyperv.com wrote:
> > From: Long Li <longli@microsoft.com>
> >
> > With all the TX queues created, MANA can send packets over those queues.
> >
> > Signed-off-by: Long Li <longli@microsoft.com>
>
> ...
>
> > }
> > +
> > +uint16_t mana_tx_burst(void *dpdk_txq, struct rte_mbuf **tx_pkts,
> > + uint16_t nb_pkts)
> > +{
> > + struct mana_txq *txq = dpdk_txq;
> > + struct mana_priv *priv = txq->priv;
> > + struct gdma_comp comp;
> > + int ret;
> > + void *db_page;
> > +
> > + /* Process send completions from GDMA */
> > + while (gdma_poll_completion_queue(&txq->gdma_cq, &comp) == 1) {
> > + struct mana_txq_desc *desc =
> > + &txq->desc_ring[txq->desc_ring_tail];
> > + struct mana_tx_comp_oob *oob =
> > + (struct mana_tx_comp_oob
> *)&comp.completion_data[0];
> > +
> > + if (oob->cqe_hdr.cqe_type != CQE_TX_OKAY) {
> > + DRV_LOG(ERR,
> > + "mana_tx_comp_oob cqe_type %u
> vendor_err %u",
> > + oob->cqe_hdr.cqe_type, oob-
> >cqe_hdr.vendor_err);
> > + txq->stats.errors++;
> > + } else {
> > + DRV_LOG(DEBUG, "mana_tx_comp_oob
> CQE_TX_OKAY");
> > + txq->stats.packets++;
> > + }
> > +
> > + if (!desc->pkt) {
> > + DRV_LOG(ERR, "mana_txq_desc has a NULL pkt");
> > + } else {
> > + txq->stats.bytes += desc->pkt->data_len;
> > + rte_pktmbuf_free(desc->pkt);
> > + }
> > +
> > + desc->pkt = NULL;
> > + txq->desc_ring_tail = (txq->desc_ring_tail + 1) % txq->num_desc;
> > + txq->gdma_sq.tail += desc->wqe_size_in_bu;
> > + }
> > +
> > + /* Post send requests to GDMA */
> > + uint16_t pkt_idx;
> > +
> > + for (pkt_idx = 0; pkt_idx < nb_pkts; pkt_idx++) {
> > + struct rte_mbuf *m_pkt = tx_pkts[pkt_idx];
> > + struct rte_mbuf *m_seg = m_pkt;
> > + struct transmit_oob_v2 tx_oob = {0};
> > + struct one_sgl sgl = {0};
> > +
> > + /* Drop the packet if it exceeds max segments */
> > + if (m_pkt->nb_segs > priv->max_send_sge) {
> > + DRV_LOG(ERR, "send packet segments %d exceeding
> max",
> > + m_pkt->nb_segs);
>
> This branch violate rte_eth_tx_burst definition, which defined the return value is
> " * The maximum number of packets to transmit."
Will fix this.
>
> Also I notice the driver didn't implement tx-prepare, which could hold such
> checking in framework's definition.
>
> > + continue;
> > + }
> > +
> > + /* Fill in the oob */
> > + tx_oob.short_oob.packet_format = short_packet_format;
> > + tx_oob.short_oob.tx_is_outer_ipv4 =
> > + m_pkt->ol_flags & RTE_MBUF_F_TX_IPV4 ? 1 : 0;
> > + tx_oob.short_oob.tx_is_outer_ipv6 =
> > + m_pkt->ol_flags & RTE_MBUF_F_TX_IPV6 ? 1 : 0;
> > +
> > + tx_oob.short_oob.tx_compute_IP_header_checksum =
> > + m_pkt->ol_flags & RTE_MBUF_F_TX_IP_CKSUM ? 1 : 0;
> > +
> > + if ((m_pkt->ol_flags & RTE_MBUF_F_TX_L4_MASK) ==
> > + RTE_MBUF_F_TX_TCP_CKSUM) {
> > + struct rte_tcp_hdr *tcp_hdr;
> > +
> > + /* HW needs partial TCP checksum */
> > +
> > + tcp_hdr = rte_pktmbuf_mtod_offset(m_pkt,
> > + struct rte_tcp_hdr *,
> > + m_pkt->l2_len + m_pkt->l3_len);
> > +
> > + if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV4) {
> > + struct rte_ipv4_hdr *ip_hdr;
> > +
> > + ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
> > + struct rte_ipv4_hdr *,
> > + m_pkt->l2_len);
> > + tcp_hdr->cksum = rte_ipv4_phdr_cksum(ip_hdr,
> > + m_pkt->ol_flags);
> > +
> > + } else if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV6) {
> > + struct rte_ipv6_hdr *ip_hdr;
> > +
> > + ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
> > + struct rte_ipv6_hdr *,
> > + m_pkt->l2_len);
> > + tcp_hdr->cksum = rte_ipv6_phdr_cksum(ip_hdr,
> > + m_pkt->ol_flags);
> > + } else {
> > + DRV_LOG(ERR, "Invalid input for TCP CKSUM");
> > + }
> > +
> > + tx_oob.short_oob.tx_compute_TCP_checksum = 1;
> > + tx_oob.short_oob.tx_transport_header_offset =
> > + m_pkt->l2_len + m_pkt->l3_len;
> > + }
> > +
> > + if ((m_pkt->ol_flags & RTE_MBUF_F_TX_L4_MASK) ==
> > + RTE_MBUF_F_TX_UDP_CKSUM) {
> > + struct rte_udp_hdr *udp_hdr;
> > +
> > + /* HW needs partial UDP checksum */
> > + udp_hdr = rte_pktmbuf_mtod_offset(m_pkt,
> > + struct rte_udp_hdr *,
> > + m_pkt->l2_len + m_pkt->l3_len);
> > +
> > + if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV4) {
> > + struct rte_ipv4_hdr *ip_hdr;
> > +
> > + ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
> > + struct rte_ipv4_hdr *,
> > + m_pkt->l2_len);
> > +
> > + udp_hdr->dgram_cksum =
> > + rte_ipv4_phdr_cksum(ip_hdr,
> > + m_pkt->ol_flags);
> > +
> > + } else if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV6) {
> > + struct rte_ipv6_hdr *ip_hdr;
> > +
> > + ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
> > + struct rte_ipv6_hdr *,
> > + m_pkt->l2_len);
> > +
> > + udp_hdr->dgram_cksum =
> > + rte_ipv6_phdr_cksum(ip_hdr,
> > + m_pkt->ol_flags);
> > +
> > + } else {
> > + DRV_LOG(ERR, "Invalid input for UDP CKSUM");
> > + }
> > +
> > + tx_oob.short_oob.tx_compute_UDP_checksum = 1;
> > + }
>
> Why not have TSO process, which was reported support in dev_info_get ?
TSO is currently not implemented. I'm changing BNIC_DEV_TX_OFFLOAD_SUPPORT to fix this.
>
> > +
> > + tx_oob.short_oob.suppress_tx_CQE_generation = 0;
> > + tx_oob.short_oob.VCQ_number = txq->gdma_cq.id;
> > +
> > + tx_oob.short_oob.VSQ_frame_num =
> > + get_vsq_frame_num(txq->gdma_sq.id);
> > + tx_oob.short_oob.short_vport_offset = txq->tx_vp_offset;
> > +
> > + DRV_LOG(DEBUG, "tx_oob packet_format %u ipv4 %u ipv6 %u",
> > + tx_oob.short_oob.packet_format,
> > + tx_oob.short_oob.tx_is_outer_ipv4,
> > + tx_oob.short_oob.tx_is_outer_ipv6);
> > +
> > + DRV_LOG(DEBUG, "tx_oob checksum ip %u tcp %u udp %u
> offset %u",
> > + tx_oob.short_oob.tx_compute_IP_header_checksum,
> > + tx_oob.short_oob.tx_compute_TCP_checksum,
> > + tx_oob.short_oob.tx_compute_UDP_checksum,
> > + tx_oob.short_oob.tx_transport_header_offset);
> > +
> > + DRV_LOG(DEBUG, "pkt[%d]: buf_addr 0x%p, nb_segs %d,
> pkt_len %d",
> > + pkt_idx, m_pkt->buf_addr, m_pkt->nb_segs,
> > + m_pkt->pkt_len);
> > +
> > + /* Create SGL for packet data buffers */
> > + for (uint16_t seg_idx = 0; seg_idx < m_pkt->nb_segs; seg_idx++)
> {
> > + struct mana_mr_cache *mr =
> > + mana_find_pmd_mr(&txq->mr_btree, priv,
> m_seg);
> > +
> > + if (!mr) {
> > + DRV_LOG(ERR, "failed to get MR, pkt_idx %u",
> > + pkt_idx);
> > + return pkt_idx;
> > + }
> > +
> > + sgl.gdma_sgl[seg_idx].address =
> > + rte_cpu_to_le_64(rte_pktmbuf_mtod(m_seg,
> > + uint64_t));
> > + sgl.gdma_sgl[seg_idx].size = m_seg->data_len;
> > + sgl.gdma_sgl[seg_idx].memory_key = mr->lkey;
> > +
> > + DRV_LOG(DEBUG,
> > + "seg idx %u addr 0x%" PRIx64 " size %x key %x",
> > + seg_idx, sgl.gdma_sgl[seg_idx].address,
> > + sgl.gdma_sgl[seg_idx].size,
> > + sgl.gdma_sgl[seg_idx].memory_key);
> > +
> > + m_seg = m_seg->next;
> > + }
> > +
> > + struct gdma_work_request work_req = {0};
> > + struct gdma_posted_wqe_info wqe_info = {0};
> > +
> > + work_req.gdma_header.struct_size = sizeof(work_req);
> > + wqe_info.gdma_header.struct_size = sizeof(wqe_info);
> > +
> > + work_req.sgl = sgl.gdma_sgl;
> > + work_req.num_sgl_elements = m_pkt->nb_segs;
> > + work_req.inline_oob_size_in_bytes =
> > + sizeof(struct transmit_short_oob_v2);
> > + work_req.inline_oob_data = &tx_oob;
> > + work_req.flags = 0;
> > + work_req.client_data_unit = NOT_USING_CLIENT_DATA_UNIT;
> > +
> > + ret = gdma_post_work_request(&txq->gdma_sq, &work_req,
> > + &wqe_info);
> > + if (!ret) {
> > + struct mana_txq_desc *desc =
> > + &txq->desc_ring[txq->desc_ring_head];
> > +
> > + /* Update queue for tracking pending requests */
> > + desc->pkt = m_pkt;
> > + desc->wqe_size_in_bu = wqe_info.wqe_size_in_bu;
> > + txq->desc_ring_head =
> > + (txq->desc_ring_head + 1) % txq->num_desc;
> > +
> > + DRV_LOG(DEBUG, "nb_pkts %u pkt[%d] sent",
> > + nb_pkts, pkt_idx);
> > + } else {
> > + DRV_LOG(INFO, "pkt[%d] failed to post send ret %d",
> > + pkt_idx, ret);
> > + break;
> > + }
> > + }
> > +
> > + /* Ring hardware door bell */
> > + db_page = priv->db_page;
> > + if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
> > + struct rte_eth_dev *dev =
> > + &rte_eth_devices[priv->dev_data->port_id];
> > + struct mana_process_priv *process_priv = dev->process_private;
> > +
> > + db_page = process_priv->db_page;
> > + }
> > +
> > + ret = mana_ring_doorbell(db_page, gdma_queue_send,
> > + txq->gdma_sq.id,
> > + txq->gdma_sq.head *
> > + GDMA_WQE_ALIGNMENT_UNIT_SIZE);
> > + if (ret)
> > + DRV_LOG(ERR, "mana_ring_doorbell failed ret %d", ret);
> > +
> > + return pkt_idx;
> > +}
> >
@@ -4,6 +4,7 @@
; Refer to default.ini for the full list of available PMD features.
;
[Features]
+Free Tx mbuf on demand = Y
Link status = P
Linux = Y
L3 checksum offload = Y
@@ -950,6 +950,7 @@ static int mana_pci_probe_mac(struct rte_pci_driver *pci_drv __rte_unused,
/* fd is no not used after mapping doorbell */
close(fd);
+ eth_dev->tx_pkt_burst = mana_tx_burst;
eth_dev->rx_pkt_burst = mana_rx_burst;
rte_spinlock_lock(&mana_shared_data->lock);
@@ -62,6 +62,47 @@ struct mana_shared_data {
#define NOT_USING_CLIENT_DATA_UNIT 0
+enum tx_packet_format_v2 {
+ short_packet_format = 0,
+ long_packet_format = 1
+};
+
+struct transmit_short_oob_v2 {
+ enum tx_packet_format_v2 packet_format : 2;
+ uint32_t tx_is_outer_ipv4 : 1;
+ uint32_t tx_is_outer_ipv6 : 1;
+ uint32_t tx_compute_IP_header_checksum : 1;
+ uint32_t tx_compute_TCP_checksum : 1;
+ uint32_t tx_compute_UDP_checksum : 1;
+ uint32_t suppress_tx_CQE_generation : 1;
+ uint32_t VCQ_number : 24;
+ uint32_t tx_transport_header_offset : 10;
+ uint32_t VSQ_frame_num : 14;
+ uint32_t short_vport_offset : 8;
+};
+
+struct transmit_long_oob_v2 {
+ uint32_t tx_is_encapsulated_packet : 1;
+ uint32_t tx_inner_is_ipv6 : 1;
+ uint32_t tx_inner_TCP_options_present : 1;
+ uint32_t inject_vlan_prior_tag : 1;
+ uint32_t reserved1 : 12;
+ uint32_t priority_code_point : 3;
+ uint32_t drop_eligible_indicator : 1;
+ uint32_t vlan_identifier : 12;
+ uint32_t tx_inner_frame_offset : 10;
+ uint32_t tx_inner_IP_header_relative_offset : 6;
+ uint32_t long_vport_offset : 12;
+ uint32_t reserved3 : 4;
+ uint32_t reserved4 : 32;
+ uint32_t reserved5 : 32;
+};
+
+struct transmit_oob_v2 {
+ struct transmit_short_oob_v2 short_oob;
+ struct transmit_long_oob_v2 long_oob;
+};
+
enum gdma_queue_types {
gdma_queue_type_invalid = 0,
gdma_queue_send,
@@ -183,6 +224,17 @@ enum mana_cqe_type {
CQE_RX_COALESCED_4 = 2,
CQE_RX_OBJECT_FENCE = 3,
CQE_RX_TRUNCATED = 4,
+
+ CQE_TX_OKAY = 32,
+ CQE_TX_SA_DROP = 33,
+ CQE_TX_MTU_DROP = 34,
+ CQE_TX_INVALID_OOB = 35,
+ CQE_TX_INVALID_ETH_TYPE = 36,
+ CQE_TX_HDR_PROCESSING_ERROR = 37,
+ CQE_TX_VF_DISABLED = 38,
+ CQE_TX_VPORT_IDX_OUT_OF_RANGE = 39,
+ CQE_TX_VPORT_DISABLED = 40,
+ CQE_TX_VLAN_TAGGING_VIOLATION = 41,
};
struct mana_cqe_header {
@@ -191,6 +243,17 @@ struct mana_cqe_header {
uint32_t vendor_err : 24;
}; /* HW DATA */
+struct mana_tx_comp_oob {
+ struct mana_cqe_header cqe_hdr;
+
+ uint32_t tx_data_offset;
+
+ uint32_t tx_sgl_offset : 5;
+ uint32_t tx_wqe_offset : 27;
+
+ uint32_t reserved[12];
+}; /* HW DATA */
+
/* NDIS HASH Types */
#define BIT(nr) (1 << (nr))
#define NDIS_HASH_IPV4 BIT(0)
@@ -407,6 +470,8 @@ uint8_t *gdma_get_wqe_pointer(struct mana_gdma_queue *queue);
uint16_t mana_rx_burst(void *dpdk_rxq, struct rte_mbuf **rx_pkts,
uint16_t pkts_n);
+uint16_t mana_tx_burst(void *dpdk_txq, struct rte_mbuf **tx_pkts,
+ uint16_t pkts_n);
uint16_t mana_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts,
uint16_t pkts_n);
@@ -138,6 +138,7 @@ static int mana_mp_secondary_handle(const struct rte_mp_msg *mp_msg,
case MANA_MP_REQ_START_RXTX:
DRV_LOG(INFO, "Port %u starting datapath", dev->data->port_id);
+ dev->tx_pkt_burst = mana_tx_burst;
dev->rx_pkt_burst = mana_rx_burst;
rte_mb();
@@ -161,3 +161,244 @@ static inline uint16_t get_vsq_frame_num(uint32_t vsq)
v.gdma_txq_id = vsq;
return v.vsq_frame;
}
+
+uint16_t mana_tx_burst(void *dpdk_txq, struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts)
+{
+ struct mana_txq *txq = dpdk_txq;
+ struct mana_priv *priv = txq->priv;
+ struct gdma_comp comp;
+ int ret;
+ void *db_page;
+
+ /* Process send completions from GDMA */
+ while (gdma_poll_completion_queue(&txq->gdma_cq, &comp) == 1) {
+ struct mana_txq_desc *desc =
+ &txq->desc_ring[txq->desc_ring_tail];
+ struct mana_tx_comp_oob *oob =
+ (struct mana_tx_comp_oob *)&comp.completion_data[0];
+
+ if (oob->cqe_hdr.cqe_type != CQE_TX_OKAY) {
+ DRV_LOG(ERR,
+ "mana_tx_comp_oob cqe_type %u vendor_err %u",
+ oob->cqe_hdr.cqe_type, oob->cqe_hdr.vendor_err);
+ txq->stats.errors++;
+ } else {
+ DRV_LOG(DEBUG, "mana_tx_comp_oob CQE_TX_OKAY");
+ txq->stats.packets++;
+ }
+
+ if (!desc->pkt) {
+ DRV_LOG(ERR, "mana_txq_desc has a NULL pkt");
+ } else {
+ txq->stats.bytes += desc->pkt->data_len;
+ rte_pktmbuf_free(desc->pkt);
+ }
+
+ desc->pkt = NULL;
+ txq->desc_ring_tail = (txq->desc_ring_tail + 1) % txq->num_desc;
+ txq->gdma_sq.tail += desc->wqe_size_in_bu;
+ }
+
+ /* Post send requests to GDMA */
+ uint16_t pkt_idx;
+
+ for (pkt_idx = 0; pkt_idx < nb_pkts; pkt_idx++) {
+ struct rte_mbuf *m_pkt = tx_pkts[pkt_idx];
+ struct rte_mbuf *m_seg = m_pkt;
+ struct transmit_oob_v2 tx_oob = {0};
+ struct one_sgl sgl = {0};
+
+ /* Drop the packet if it exceeds max segments */
+ if (m_pkt->nb_segs > priv->max_send_sge) {
+ DRV_LOG(ERR, "send packet segments %d exceeding max",
+ m_pkt->nb_segs);
+ continue;
+ }
+
+ /* Fill in the oob */
+ tx_oob.short_oob.packet_format = short_packet_format;
+ tx_oob.short_oob.tx_is_outer_ipv4 =
+ m_pkt->ol_flags & RTE_MBUF_F_TX_IPV4 ? 1 : 0;
+ tx_oob.short_oob.tx_is_outer_ipv6 =
+ m_pkt->ol_flags & RTE_MBUF_F_TX_IPV6 ? 1 : 0;
+
+ tx_oob.short_oob.tx_compute_IP_header_checksum =
+ m_pkt->ol_flags & RTE_MBUF_F_TX_IP_CKSUM ? 1 : 0;
+
+ if ((m_pkt->ol_flags & RTE_MBUF_F_TX_L4_MASK) ==
+ RTE_MBUF_F_TX_TCP_CKSUM) {
+ struct rte_tcp_hdr *tcp_hdr;
+
+ /* HW needs partial TCP checksum */
+
+ tcp_hdr = rte_pktmbuf_mtod_offset(m_pkt,
+ struct rte_tcp_hdr *,
+ m_pkt->l2_len + m_pkt->l3_len);
+
+ if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV4) {
+ struct rte_ipv4_hdr *ip_hdr;
+
+ ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
+ struct rte_ipv4_hdr *,
+ m_pkt->l2_len);
+ tcp_hdr->cksum = rte_ipv4_phdr_cksum(ip_hdr,
+ m_pkt->ol_flags);
+
+ } else if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV6) {
+ struct rte_ipv6_hdr *ip_hdr;
+
+ ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
+ struct rte_ipv6_hdr *,
+ m_pkt->l2_len);
+ tcp_hdr->cksum = rte_ipv6_phdr_cksum(ip_hdr,
+ m_pkt->ol_flags);
+ } else {
+ DRV_LOG(ERR, "Invalid input for TCP CKSUM");
+ }
+
+ tx_oob.short_oob.tx_compute_TCP_checksum = 1;
+ tx_oob.short_oob.tx_transport_header_offset =
+ m_pkt->l2_len + m_pkt->l3_len;
+ }
+
+ if ((m_pkt->ol_flags & RTE_MBUF_F_TX_L4_MASK) ==
+ RTE_MBUF_F_TX_UDP_CKSUM) {
+ struct rte_udp_hdr *udp_hdr;
+
+ /* HW needs partial UDP checksum */
+ udp_hdr = rte_pktmbuf_mtod_offset(m_pkt,
+ struct rte_udp_hdr *,
+ m_pkt->l2_len + m_pkt->l3_len);
+
+ if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV4) {
+ struct rte_ipv4_hdr *ip_hdr;
+
+ ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
+ struct rte_ipv4_hdr *,
+ m_pkt->l2_len);
+
+ udp_hdr->dgram_cksum =
+ rte_ipv4_phdr_cksum(ip_hdr,
+ m_pkt->ol_flags);
+
+ } else if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV6) {
+ struct rte_ipv6_hdr *ip_hdr;
+
+ ip_hdr = rte_pktmbuf_mtod_offset(m_pkt,
+ struct rte_ipv6_hdr *,
+ m_pkt->l2_len);
+
+ udp_hdr->dgram_cksum =
+ rte_ipv6_phdr_cksum(ip_hdr,
+ m_pkt->ol_flags);
+
+ } else {
+ DRV_LOG(ERR, "Invalid input for UDP CKSUM");
+ }
+
+ tx_oob.short_oob.tx_compute_UDP_checksum = 1;
+ }
+
+ tx_oob.short_oob.suppress_tx_CQE_generation = 0;
+ tx_oob.short_oob.VCQ_number = txq->gdma_cq.id;
+
+ tx_oob.short_oob.VSQ_frame_num =
+ get_vsq_frame_num(txq->gdma_sq.id);
+ tx_oob.short_oob.short_vport_offset = txq->tx_vp_offset;
+
+ DRV_LOG(DEBUG, "tx_oob packet_format %u ipv4 %u ipv6 %u",
+ tx_oob.short_oob.packet_format,
+ tx_oob.short_oob.tx_is_outer_ipv4,
+ tx_oob.short_oob.tx_is_outer_ipv6);
+
+ DRV_LOG(DEBUG, "tx_oob checksum ip %u tcp %u udp %u offset %u",
+ tx_oob.short_oob.tx_compute_IP_header_checksum,
+ tx_oob.short_oob.tx_compute_TCP_checksum,
+ tx_oob.short_oob.tx_compute_UDP_checksum,
+ tx_oob.short_oob.tx_transport_header_offset);
+
+ DRV_LOG(DEBUG, "pkt[%d]: buf_addr 0x%p, nb_segs %d, pkt_len %d",
+ pkt_idx, m_pkt->buf_addr, m_pkt->nb_segs,
+ m_pkt->pkt_len);
+
+ /* Create SGL for packet data buffers */
+ for (uint16_t seg_idx = 0; seg_idx < m_pkt->nb_segs; seg_idx++) {
+ struct mana_mr_cache *mr =
+ mana_find_pmd_mr(&txq->mr_btree, priv, m_seg);
+
+ if (!mr) {
+ DRV_LOG(ERR, "failed to get MR, pkt_idx %u",
+ pkt_idx);
+ return pkt_idx;
+ }
+
+ sgl.gdma_sgl[seg_idx].address =
+ rte_cpu_to_le_64(rte_pktmbuf_mtod(m_seg,
+ uint64_t));
+ sgl.gdma_sgl[seg_idx].size = m_seg->data_len;
+ sgl.gdma_sgl[seg_idx].memory_key = mr->lkey;
+
+ DRV_LOG(DEBUG,
+ "seg idx %u addr 0x%" PRIx64 " size %x key %x",
+ seg_idx, sgl.gdma_sgl[seg_idx].address,
+ sgl.gdma_sgl[seg_idx].size,
+ sgl.gdma_sgl[seg_idx].memory_key);
+
+ m_seg = m_seg->next;
+ }
+
+ struct gdma_work_request work_req = {0};
+ struct gdma_posted_wqe_info wqe_info = {0};
+
+ work_req.gdma_header.struct_size = sizeof(work_req);
+ wqe_info.gdma_header.struct_size = sizeof(wqe_info);
+
+ work_req.sgl = sgl.gdma_sgl;
+ work_req.num_sgl_elements = m_pkt->nb_segs;
+ work_req.inline_oob_size_in_bytes =
+ sizeof(struct transmit_short_oob_v2);
+ work_req.inline_oob_data = &tx_oob;
+ work_req.flags = 0;
+ work_req.client_data_unit = NOT_USING_CLIENT_DATA_UNIT;
+
+ ret = gdma_post_work_request(&txq->gdma_sq, &work_req,
+ &wqe_info);
+ if (!ret) {
+ struct mana_txq_desc *desc =
+ &txq->desc_ring[txq->desc_ring_head];
+
+ /* Update queue for tracking pending requests */
+ desc->pkt = m_pkt;
+ desc->wqe_size_in_bu = wqe_info.wqe_size_in_bu;
+ txq->desc_ring_head =
+ (txq->desc_ring_head + 1) % txq->num_desc;
+
+ DRV_LOG(DEBUG, "nb_pkts %u pkt[%d] sent",
+ nb_pkts, pkt_idx);
+ } else {
+ DRV_LOG(INFO, "pkt[%d] failed to post send ret %d",
+ pkt_idx, ret);
+ break;
+ }
+ }
+
+ /* Ring hardware door bell */
+ db_page = priv->db_page;
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+ struct rte_eth_dev *dev =
+ &rte_eth_devices[priv->dev_data->port_id];
+ struct mana_process_priv *process_priv = dev->process_private;
+
+ db_page = process_priv->db_page;
+ }
+
+ ret = mana_ring_doorbell(db_page, gdma_queue_send,
+ txq->gdma_sq.id,
+ txq->gdma_sq.head *
+ GDMA_WQE_ALIGNMENT_UNIT_SIZE);
+ if (ret)
+ DRV_LOG(ERR, "mana_ring_doorbell failed ret %d", ret);
+
+ return pkt_idx;
+}