From patchwork Sat Sep 24 02:45:43 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Long Li X-Patchwork-Id: 116795 X-Patchwork-Delegate: ferruh.yigit@amd.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 4897BA054F; Sat, 24 Sep 2022 04:47:21 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 5BB9F42C28; Sat, 24 Sep 2022 04:46:15 +0200 (CEST) Received: from linux.microsoft.com (linux.microsoft.com [13.77.154.182]) by mails.dpdk.org (Postfix) with ESMTP id EDE4442BD0 for ; Sat, 24 Sep 2022 04:45:59 +0200 (CEST) Received: by linux.microsoft.com (Postfix, from userid 1004) id A7FD820C32BB; Fri, 23 Sep 2022 19:45:59 -0700 (PDT) DKIM-Filter: OpenDKIM Filter v2.11.0 linux.microsoft.com A7FD820C32BB DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linuxonhyperv.com; s=default; t=1663987559; bh=DruOGKpqd/Zn2UxRk5ojpkU/n1ENwwjozex68IQodJ4=; h=From:To:Cc:Subject:Date:In-Reply-To:References:Reply-To:From; b=j+dPU/zMe0gZoIUs8Id7anLzaUZXwOq8JSfArqGpkRbJy/LfRcLZs5Qdy95cI40kD rmnQF/YHGoASYkWMfTJw7wi9sE1GhwcY6QhE5cca7aQ9gDCFnY6uI1lXK/cKs8+S+h 9oZGytUfk/J3FC9/hi33sWaqWM5B3h+XSWi8gP9w= From: longli@linuxonhyperv.com To: Ferruh Yigit Cc: dev@dpdk.org, Ajay Sharma , Stephen Hemminger , Long Li Subject: [Patch v9 15/18] net/mana: send packets Date: Fri, 23 Sep 2022 19:45:43 -0700 Message-Id: <1663987546-15982-16-git-send-email-longli@linuxonhyperv.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1663987546-15982-1-git-send-email-longli@linuxonhyperv.com> References: <1662674189-29524-1-git-send-email-longli@linuxonhyperv.com> <1663987546-15982-1-git-send-email-longli@linuxonhyperv.com> X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: longli@microsoft.com Errors-To: dev-bounces@dpdk.org From: Long Li With all the TX queues created, MANA can send packets over those queues. Signed-off-by: Long Li --- Change log: v2: rename all camel cases. v7: return the correct number of packets sent v8: fix coding style to function definitions. change enum names to use capital letters. doc/guides/nics/features/mana.ini | 1 + drivers/net/mana/mana.c | 1 + drivers/net/mana/mana.h | 66 ++++++++ drivers/net/mana/mp.c | 1 + drivers/net/mana/tx.c | 248 ++++++++++++++++++++++++++++++ 5 files changed, 317 insertions(+) diff --git a/doc/guides/nics/features/mana.ini b/doc/guides/nics/features/mana.ini index fdbf22d335..7922816d66 100644 --- a/doc/guides/nics/features/mana.ini +++ b/doc/guides/nics/features/mana.ini @@ -4,6 +4,7 @@ ; Refer to default.ini for the full list of available PMD features. ; [Features] +Free Tx mbuf on demand = Y Link status = P Linux = Y L3 checksum offload = Y diff --git a/drivers/net/mana/mana.c b/drivers/net/mana/mana.c index 51aa01a642..d0214725e6 100644 --- a/drivers/net/mana/mana.c +++ b/drivers/net/mana/mana.c @@ -991,6 +991,7 @@ mana_pci_probe_mac(struct rte_pci_device *pci_dev, /* fd is no not used after mapping doorbell */ close(fd); + eth_dev->tx_pkt_burst = mana_tx_burst; eth_dev->rx_pkt_burst = mana_rx_burst; rte_spinlock_lock(&mana_shared_data->lock); diff --git a/drivers/net/mana/mana.h b/drivers/net/mana/mana.h index ef0743df2d..9c576a82fa 100644 --- a/drivers/net/mana/mana.h +++ b/drivers/net/mana/mana.h @@ -61,6 +61,47 @@ struct mana_shared_data { #define NOT_USING_CLIENT_DATA_UNIT 0 +enum tx_packet_format_v2 { + SHORT_PACKET_FORMAT = 0, + LONG_PACKET_FORMAT = 1 +}; + +struct transmit_short_oob_v2 { + enum tx_packet_format_v2 packet_format : 2; + uint32_t tx_is_outer_ipv4 : 1; + uint32_t tx_is_outer_ipv6 : 1; + uint32_t tx_compute_IP_header_checksum : 1; + uint32_t tx_compute_TCP_checksum : 1; + uint32_t tx_compute_UDP_checksum : 1; + uint32_t suppress_tx_CQE_generation : 1; + uint32_t VCQ_number : 24; + uint32_t tx_transport_header_offset : 10; + uint32_t VSQ_frame_num : 14; + uint32_t short_vport_offset : 8; +}; + +struct transmit_long_oob_v2 { + uint32_t tx_is_encapsulated_packet : 1; + uint32_t tx_inner_is_ipv6 : 1; + uint32_t tx_inner_TCP_options_present : 1; + uint32_t inject_vlan_prior_tag : 1; + uint32_t reserved1 : 12; + uint32_t priority_code_point : 3; + uint32_t drop_eligible_indicator : 1; + uint32_t vlan_identifier : 12; + uint32_t tx_inner_frame_offset : 10; + uint32_t tx_inner_IP_header_relative_offset : 6; + uint32_t long_vport_offset : 12; + uint32_t reserved3 : 4; + uint32_t reserved4 : 32; + uint32_t reserved5 : 32; +}; + +struct transmit_oob_v2 { + struct transmit_short_oob_v2 short_oob; + struct transmit_long_oob_v2 long_oob; +}; + enum gdma_queue_types { GDMA_QUEUE_TYPE_INVALID = 0, GDMA_QUEUE_SEND, @@ -182,6 +223,17 @@ enum mana_cqe_type { CQE_RX_COALESCED_4 = 2, CQE_RX_OBJECT_FENCE = 3, CQE_RX_TRUNCATED = 4, + + CQE_TX_OKAY = 32, + CQE_TX_SA_DROP = 33, + CQE_TX_MTU_DROP = 34, + CQE_TX_INVALID_OOB = 35, + CQE_TX_INVALID_ETH_TYPE = 36, + CQE_TX_HDR_PROCESSING_ERROR = 37, + CQE_TX_VF_DISABLED = 38, + CQE_TX_VPORT_IDX_OUT_OF_RANGE = 39, + CQE_TX_VPORT_DISABLED = 40, + CQE_TX_VLAN_TAGGING_VIOLATION = 41, }; struct mana_cqe_header { @@ -190,6 +242,17 @@ struct mana_cqe_header { uint32_t vendor_err : 24; }; /* HW DATA */ +struct mana_tx_comp_oob { + struct mana_cqe_header cqe_hdr; + + uint32_t tx_data_offset; + + uint32_t tx_sgl_offset : 5; + uint32_t tx_wqe_offset : 27; + + uint32_t reserved[12]; +}; /* HW DATA */ + /* NDIS HASH Types */ #define NDIS_HASH_IPV4 RTE_BIT32(0) #define NDIS_HASH_TCP_IPV4 RTE_BIT32(1) @@ -353,6 +416,7 @@ struct mana_txq { uint32_t desc_ring_head, desc_ring_tail; struct mana_mr_btree mr_btree; + struct mana_stats stats; unsigned int socket; }; @@ -404,6 +468,8 @@ uint8_t *gdma_get_wqe_pointer(struct mana_gdma_queue *queue); uint16_t mana_rx_burst(void *dpdk_rxq, struct rte_mbuf **rx_pkts, uint16_t pkts_n); +uint16_t mana_tx_burst(void *dpdk_txq, struct rte_mbuf **tx_pkts, + uint16_t pkts_n); uint16_t mana_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n); diff --git a/drivers/net/mana/mp.c b/drivers/net/mana/mp.c index feda30623a..92432c431d 100644 --- a/drivers/net/mana/mp.c +++ b/drivers/net/mana/mp.c @@ -141,6 +141,7 @@ mana_mp_secondary_handle(const struct rte_mp_msg *mp_msg, const void *peer) case MANA_MP_REQ_START_RXTX: DRV_LOG(INFO, "Port %u starting datapath", dev->data->port_id); + dev->tx_pkt_burst = mana_tx_burst; dev->rx_pkt_burst = mana_rx_burst; rte_mb(); diff --git a/drivers/net/mana/tx.c b/drivers/net/mana/tx.c index e4ff0fbf56..0884681c30 100644 --- a/drivers/net/mana/tx.c +++ b/drivers/net/mana/tx.c @@ -164,3 +164,251 @@ get_vsq_frame_num(uint32_t vsq) v.gdma_txq_id = vsq; return v.vsq_frame; } + +uint16_t +mana_tx_burst(void *dpdk_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + struct mana_txq *txq = dpdk_txq; + struct mana_priv *priv = txq->priv; + struct gdma_comp comp; + int ret; + void *db_page; + uint16_t pkt_sent = 0; + + /* Process send completions from GDMA */ + while (gdma_poll_completion_queue(&txq->gdma_cq, &comp) == 1) { + struct mana_txq_desc *desc = + &txq->desc_ring[txq->desc_ring_tail]; + struct mana_tx_comp_oob *oob = + (struct mana_tx_comp_oob *)&comp.completion_data[0]; + + if (oob->cqe_hdr.cqe_type != CQE_TX_OKAY) { + DRV_LOG(ERR, + "mana_tx_comp_oob cqe_type %u vendor_err %u", + oob->cqe_hdr.cqe_type, oob->cqe_hdr.vendor_err); + txq->stats.errors++; + } else { + DRV_LOG(DEBUG, "mana_tx_comp_oob CQE_TX_OKAY"); + txq->stats.packets++; + } + + if (!desc->pkt) { + DRV_LOG(ERR, "mana_txq_desc has a NULL pkt"); + } else { + txq->stats.bytes += desc->pkt->data_len; + rte_pktmbuf_free(desc->pkt); + } + + desc->pkt = NULL; + txq->desc_ring_tail = (txq->desc_ring_tail + 1) % txq->num_desc; + txq->gdma_sq.tail += desc->wqe_size_in_bu; + } + + /* Post send requests to GDMA */ + for (uint16_t pkt_idx = 0; pkt_idx < nb_pkts; pkt_idx++) { + struct rte_mbuf *m_pkt = tx_pkts[pkt_idx]; + struct rte_mbuf *m_seg = m_pkt; + struct transmit_oob_v2 tx_oob = {0}; + struct one_sgl sgl = {0}; + uint16_t seg_idx; + + /* Drop the packet if it exceeds max segments */ + if (m_pkt->nb_segs > priv->max_send_sge) { + DRV_LOG(ERR, "send packet segments %d exceeding max", + m_pkt->nb_segs); + continue; + } + + /* Fill in the oob */ + tx_oob.short_oob.packet_format = SHORT_PACKET_FORMAT; + tx_oob.short_oob.tx_is_outer_ipv4 = + m_pkt->ol_flags & RTE_MBUF_F_TX_IPV4 ? 1 : 0; + tx_oob.short_oob.tx_is_outer_ipv6 = + m_pkt->ol_flags & RTE_MBUF_F_TX_IPV6 ? 1 : 0; + + tx_oob.short_oob.tx_compute_IP_header_checksum = + m_pkt->ol_flags & RTE_MBUF_F_TX_IP_CKSUM ? 1 : 0; + + if ((m_pkt->ol_flags & RTE_MBUF_F_TX_L4_MASK) == + RTE_MBUF_F_TX_TCP_CKSUM) { + struct rte_tcp_hdr *tcp_hdr; + + /* HW needs partial TCP checksum */ + + tcp_hdr = rte_pktmbuf_mtod_offset(m_pkt, + struct rte_tcp_hdr *, + m_pkt->l2_len + m_pkt->l3_len); + + if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV4) { + struct rte_ipv4_hdr *ip_hdr; + + ip_hdr = rte_pktmbuf_mtod_offset(m_pkt, + struct rte_ipv4_hdr *, + m_pkt->l2_len); + tcp_hdr->cksum = rte_ipv4_phdr_cksum(ip_hdr, + m_pkt->ol_flags); + + } else if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV6) { + struct rte_ipv6_hdr *ip_hdr; + + ip_hdr = rte_pktmbuf_mtod_offset(m_pkt, + struct rte_ipv6_hdr *, + m_pkt->l2_len); + tcp_hdr->cksum = rte_ipv6_phdr_cksum(ip_hdr, + m_pkt->ol_flags); + } else { + DRV_LOG(ERR, "Invalid input for TCP CKSUM"); + } + + tx_oob.short_oob.tx_compute_TCP_checksum = 1; + tx_oob.short_oob.tx_transport_header_offset = + m_pkt->l2_len + m_pkt->l3_len; + } + + if ((m_pkt->ol_flags & RTE_MBUF_F_TX_L4_MASK) == + RTE_MBUF_F_TX_UDP_CKSUM) { + struct rte_udp_hdr *udp_hdr; + + /* HW needs partial UDP checksum */ + udp_hdr = rte_pktmbuf_mtod_offset(m_pkt, + struct rte_udp_hdr *, + m_pkt->l2_len + m_pkt->l3_len); + + if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV4) { + struct rte_ipv4_hdr *ip_hdr; + + ip_hdr = rte_pktmbuf_mtod_offset(m_pkt, + struct rte_ipv4_hdr *, + m_pkt->l2_len); + + udp_hdr->dgram_cksum = + rte_ipv4_phdr_cksum(ip_hdr, + m_pkt->ol_flags); + + } else if (m_pkt->ol_flags & RTE_MBUF_F_TX_IPV6) { + struct rte_ipv6_hdr *ip_hdr; + + ip_hdr = rte_pktmbuf_mtod_offset(m_pkt, + struct rte_ipv6_hdr *, + m_pkt->l2_len); + + udp_hdr->dgram_cksum = + rte_ipv6_phdr_cksum(ip_hdr, + m_pkt->ol_flags); + + } else { + DRV_LOG(ERR, "Invalid input for UDP CKSUM"); + } + + tx_oob.short_oob.tx_compute_UDP_checksum = 1; + } + + tx_oob.short_oob.suppress_tx_CQE_generation = 0; + tx_oob.short_oob.VCQ_number = txq->gdma_cq.id; + + tx_oob.short_oob.VSQ_frame_num = + get_vsq_frame_num(txq->gdma_sq.id); + tx_oob.short_oob.short_vport_offset = txq->tx_vp_offset; + + DRV_LOG(DEBUG, "tx_oob packet_format %u ipv4 %u ipv6 %u", + tx_oob.short_oob.packet_format, + tx_oob.short_oob.tx_is_outer_ipv4, + tx_oob.short_oob.tx_is_outer_ipv6); + + DRV_LOG(DEBUG, "tx_oob checksum ip %u tcp %u udp %u offset %u", + tx_oob.short_oob.tx_compute_IP_header_checksum, + tx_oob.short_oob.tx_compute_TCP_checksum, + tx_oob.short_oob.tx_compute_UDP_checksum, + tx_oob.short_oob.tx_transport_header_offset); + + DRV_LOG(DEBUG, "pkt[%d]: buf_addr 0x%p, nb_segs %d, pkt_len %d", + pkt_idx, m_pkt->buf_addr, m_pkt->nb_segs, + m_pkt->pkt_len); + + /* Create SGL for packet data buffers */ + for (seg_idx = 0; seg_idx < m_pkt->nb_segs; seg_idx++) { + struct mana_mr_cache *mr = + mana_find_pmd_mr(&txq->mr_btree, priv, m_seg); + + if (!mr) { + DRV_LOG(ERR, "failed to get MR, pkt_idx %u", + pkt_idx); + break; + } + + sgl.gdma_sgl[seg_idx].address = + rte_cpu_to_le_64(rte_pktmbuf_mtod(m_seg, + uint64_t)); + sgl.gdma_sgl[seg_idx].size = m_seg->data_len; + sgl.gdma_sgl[seg_idx].memory_key = mr->lkey; + + DRV_LOG(DEBUG, + "seg idx %u addr 0x%" PRIx64 " size %x key %x", + seg_idx, sgl.gdma_sgl[seg_idx].address, + sgl.gdma_sgl[seg_idx].size, + sgl.gdma_sgl[seg_idx].memory_key); + + m_seg = m_seg->next; + } + + /* Skip this packet if we can't populate all segments */ + if (seg_idx != m_pkt->nb_segs) + continue; + + struct gdma_work_request work_req = {0}; + struct gdma_posted_wqe_info wqe_info = {0}; + + work_req.gdma_header.struct_size = sizeof(work_req); + wqe_info.gdma_header.struct_size = sizeof(wqe_info); + + work_req.sgl = sgl.gdma_sgl; + work_req.num_sgl_elements = m_pkt->nb_segs; + work_req.inline_oob_size_in_bytes = + sizeof(struct transmit_short_oob_v2); + work_req.inline_oob_data = &tx_oob; + work_req.flags = 0; + work_req.client_data_unit = NOT_USING_CLIENT_DATA_UNIT; + + ret = gdma_post_work_request(&txq->gdma_sq, &work_req, + &wqe_info); + if (!ret) { + struct mana_txq_desc *desc = + &txq->desc_ring[txq->desc_ring_head]; + + /* Update queue for tracking pending requests */ + desc->pkt = m_pkt; + desc->wqe_size_in_bu = wqe_info.wqe_size_in_bu; + txq->desc_ring_head = + (txq->desc_ring_head + 1) % txq->num_desc; + + pkt_sent++; + + DRV_LOG(DEBUG, "nb_pkts %u pkt[%d] sent", + nb_pkts, pkt_idx); + } else { + DRV_LOG(INFO, "pkt[%d] failed to post send ret %d", + pkt_idx, ret); + break; + } + } + + /* Ring hardware door bell */ + db_page = priv->db_page; + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + struct rte_eth_dev *dev = + &rte_eth_devices[priv->dev_data->port_id]; + struct mana_process_priv *process_priv = dev->process_private; + + db_page = process_priv->db_page; + } + + if (pkt_sent) + ret = mana_ring_doorbell(db_page, GDMA_QUEUE_SEND, + txq->gdma_sq.id, + txq->gdma_sq.head * + GDMA_WQE_ALIGNMENT_UNIT_SIZE); + if (ret) + DRV_LOG(ERR, "mana_ring_doorbell failed ret %d", ret); + + return pkt_sent; +}