From patchwork Wed Dec 11 14:50:01 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Arkadiusz Kusztal X-Patchwork-Id: 63761 X-Patchwork-Delegate: gakhil@marvell.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 038BAA04F6; Wed, 11 Dec 2019 15:51:24 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id AEF821BE82; Wed, 11 Dec 2019 15:51:23 +0100 (CET) Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by dpdk.org (Postfix) with ESMTP id ECB2E1BE3D for ; Wed, 11 Dec 2019 15:51:21 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga001.fm.intel.com ([10.253.24.23]) by orsmga105.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 11 Dec 2019 06:51:20 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.69,301,1571727600"; d="scan'208";a="220478758" Received: from akusztax-mobl.ger.corp.intel.com ([10.104.12.173]) by fmsmga001.fm.intel.com with ESMTP; 11 Dec 2019 06:51:19 -0800 From: Arek Kusztal To: dev@dpdk.org Cc: akhil.goyal@nxp.com, fiona.trahe@intel.com, declan.doherty@intel.com, Arek Kusztal Date: Wed, 11 Dec 2019 15:50:01 +0100 Message-Id: <20191211145004.11672-2-arkadiuszx.kusztal@intel.com> X-Mailer: git-send-email 2.19.1.windows.1 In-Reply-To: <20191211145004.11672-1-arkadiuszx.kusztal@intel.com> References: <20191211145004.11672-1-arkadiuszx.kusztal@intel.com> MIME-Version: 1.0 Subject: [dpdk-dev] [PATCH v3 1/4] common/qat: remove tail write coalescing feature X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: Fiona Trahe The feature Coalescing Tail Writes on Enqueue is removed as it is not thread-safe and a dual-thread feature will be added shortly. Signed-off-by: Fiona Trahe Signed-off-by: Arek Kusztal Acked-by: Fiona Trahe --- drivers/common/qat/qat_qp.c | 16 +++------------- drivers/common/qat/qat_qp.h | 6 ------ 2 files changed, 3 insertions(+), 19 deletions(-) diff --git a/drivers/common/qat/qat_qp.c b/drivers/common/qat/qat_qp.c index 03f11f8..01ddce0 100644 --- a/drivers/common/qat/qat_qp.c +++ b/drivers/common/qat/qat_qp.c @@ -538,7 +538,6 @@ static inline void txq_write_tail(struct qat_qp *qp, struct qat_queue *q) { WRITE_CSR_RING_TAIL(qp->mmap_bar_addr, q->hw_bundle_number, q->hw_queue_number, q->tail); - q->nb_pending_requests = 0; q->csr_tail = q->tail; } @@ -622,25 +621,20 @@ qat_enqueue_op_burst(void *qp, void **ops, uint16_t nb_ops) kick_tail: queue->tail = tail; tmp_qp->stats.enqueued_count += nb_ops_sent; - queue->nb_pending_requests += nb_ops_sent; - if (tmp_qp->inflights16 < QAT_CSR_TAIL_FORCE_WRITE_THRESH || - queue->nb_pending_requests > QAT_CSR_TAIL_WRITE_THRESH) { - txq_write_tail(tmp_qp, queue); - } + txq_write_tail(tmp_qp, queue); return nb_ops_sent; } uint16_t qat_dequeue_op_burst(void *qp, void **ops, uint16_t nb_ops) { - struct qat_queue *rx_queue, *tx_queue; + struct qat_queue *rx_queue; struct qat_qp *tmp_qp = (struct qat_qp *)qp; uint32_t head; uint32_t resp_counter = 0; uint8_t *resp_msg; rx_queue = &(tmp_qp->rx_q); - tx_queue = &(tmp_qp->tx_q); head = rx_queue->head; resp_msg = (uint8_t *)rx_queue->base_addr + rx_queue->head; @@ -677,11 +671,7 @@ qat_dequeue_op_burst(void *qp, void **ops, uint16_t nb_ops) QAT_CSR_HEAD_WRITE_THRESH) rxq_free_desc(tmp_qp, rx_queue); } - /* also check if tail needs to be advanced */ - if (tmp_qp->inflights16 <= QAT_CSR_TAIL_FORCE_WRITE_THRESH && - tx_queue->tail != tx_queue->csr_tail) { - txq_write_tail(tmp_qp, tx_queue); - } + return resp_counter; } diff --git a/drivers/common/qat/qat_qp.h b/drivers/common/qat/qat_qp.h index 980c2ba..9212ca4 100644 --- a/drivers/common/qat/qat_qp.h +++ b/drivers/common/qat/qat_qp.h @@ -11,10 +11,6 @@ struct qat_pci_device; #define QAT_CSR_HEAD_WRITE_THRESH 32U /* number of requests to accumulate before writing head CSR */ -#define QAT_CSR_TAIL_WRITE_THRESH 32U -/* number of requests to accumulate before writing tail CSR */ -#define QAT_CSR_TAIL_FORCE_WRITE_THRESH 256U -/* number of inflights below which no tail write coalescing should occur */ typedef int (*build_request_t)(void *op, uint8_t *req, void *op_cookie, @@ -64,8 +60,6 @@ struct qat_queue { uint32_t csr_tail; /* last written tail value */ uint16_t nb_processed_responses; /* number of responses processed since last CSR head write */ - uint16_t nb_pending_requests; - /* number of requests pending since last CSR tail write */ }; struct qat_qp { From patchwork Wed Dec 11 14:50:02 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Arkadiusz Kusztal X-Patchwork-Id: 63762 X-Patchwork-Delegate: gakhil@marvell.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id B14EAA04F6; Wed, 11 Dec 2019 15:51:32 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 749F81BEC4; Wed, 11 Dec 2019 15:51:25 +0100 (CET) Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by dpdk.org (Postfix) with ESMTP id 6FC141BE3D for ; Wed, 11 Dec 2019 15:51:23 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga001.fm.intel.com ([10.253.24.23]) by orsmga105.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 11 Dec 2019 06:51:23 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.69,301,1571727600"; d="scan'208";a="220478765" Received: from akusztax-mobl.ger.corp.intel.com ([10.104.12.173]) by fmsmga001.fm.intel.com with ESMTP; 11 Dec 2019 06:51:21 -0800 From: Arek Kusztal To: dev@dpdk.org Cc: akhil.goyal@nxp.com, fiona.trahe@intel.com, declan.doherty@intel.com, Arek Kusztal Date: Wed, 11 Dec 2019 15:50:02 +0100 Message-Id: <20191211145004.11672-3-arkadiuszx.kusztal@intel.com> X-Mailer: git-send-email 2.19.1.windows.1 In-Reply-To: <20191211145004.11672-1-arkadiuszx.kusztal@intel.com> References: <20191211145004.11672-1-arkadiuszx.kusztal@intel.com> MIME-Version: 1.0 Subject: [dpdk-dev] [PATCH v3 2/4] common/qat: move max inflights param into qp X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: Fiona Trahe The max_inflights parameter is moved from qat_queue to qat_qp as it's a more appropriate location. Signed-off-by: Fiona Trahe Signed-off-by: Arek Kusztal Acked-by: Fiona Trahe --- drivers/common/qat/qat_qp.c | 23 ++++++++++++----------- drivers/common/qat/qat_qp.h | 2 +- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/common/qat/qat_qp.c b/drivers/common/qat/qat_qp.c index 01ddce0..8e4c74a 100644 --- a/drivers/common/qat/qat_qp.c +++ b/drivers/common/qat/qat_qp.c @@ -239,6 +239,15 @@ int qat_qp_setup(struct qat_pci_device *qat_dev, goto create_err; } + qp->max_inflights = ADF_MAX_INFLIGHTS(qp->tx_q.queue_size, + ADF_BYTES_TO_MSG_SIZE(qp->tx_q.msg_size)); + + if (qp->max_inflights < 2) { + QAT_LOG(ERR, "Invalid num inflights"); + qat_queue_delete(&(qp->tx_q)); + goto create_err; + } + if (qat_queue_create(qat_dev, &(qp->rx_q), qat_qp_conf, ADF_RING_DIR_RX) != 0) { QAT_LOG(ERR, "Rx queue create failed " @@ -416,15 +425,7 @@ qat_queue_create(struct qat_pci_device *qat_dev, struct qat_queue *queue, goto queue_create_err; } - queue->max_inflights = ADF_MAX_INFLIGHTS(queue->queue_size, - ADF_BYTES_TO_MSG_SIZE(desc_size)); queue->modulo_mask = (1 << ADF_RING_SIZE_MODULO(queue->queue_size)) - 1; - - if (queue->max_inflights < 2) { - QAT_LOG(ERR, "Invalid num inflights"); - ret = -EINVAL; - goto queue_create_err; - } queue->head = 0; queue->tail = 0; queue->msg_size = desc_size; @@ -443,11 +444,11 @@ qat_queue_create(struct qat_pci_device *qat_dev, struct qat_queue *queue, queue->hw_queue_number, queue_base); QAT_LOG(DEBUG, "RING: Name:%s, size in CSR: %u, in bytes %u," - " nb msgs %u, msg_size %u, max_inflights %u modulo mask %u", + " nb msgs %u, msg_size %u, modulo mask %u", queue->memz_name, queue->queue_size, queue_size_bytes, qp_conf->nb_descriptors, desc_size, - queue->max_inflights, queue->modulo_mask); + queue->modulo_mask); return 0; @@ -590,7 +591,7 @@ qat_enqueue_op_burst(void *qp, void **ops, uint16_t nb_ops) /* Find how many can actually fit on the ring */ tmp_qp->inflights16 += nb_ops; - overflow = tmp_qp->inflights16 - queue->max_inflights; + overflow = tmp_qp->inflights16 - tmp_qp->max_inflights; if (overflow > 0) { tmp_qp->inflights16 -= overflow; nb_ops_possible = nb_ops - overflow; diff --git a/drivers/common/qat/qat_qp.h b/drivers/common/qat/qat_qp.h index 9212ca4..5066f06 100644 --- a/drivers/common/qat/qat_qp.h +++ b/drivers/common/qat/qat_qp.h @@ -51,7 +51,6 @@ struct qat_queue { uint32_t tail; /* Shadow copy of the tail */ uint32_t modulo_mask; uint32_t msg_size; - uint16_t max_inflights; uint32_t queue_size; uint8_t hw_bundle_number; uint8_t hw_queue_number; @@ -76,6 +75,7 @@ struct qat_qp { enum qat_service_type service_type; struct qat_pci_device *qat_dev; /**< qat device this qp is on */ + uint16_t max_inflights; } __rte_cache_aligned; extern const struct qat_qp_hw_data qat_gen1_qps[][ADF_MAX_QPS_ON_ANY_SERVICE]; From patchwork Wed Dec 11 14:50:03 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Arkadiusz Kusztal X-Patchwork-Id: 63763 X-Patchwork-Delegate: gakhil@marvell.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id EDF70A04F6; Wed, 11 Dec 2019 15:51:41 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 256411BF6D; Wed, 11 Dec 2019 15:51:28 +0100 (CET) Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by dpdk.org (Postfix) with ESMTP id B797B1BF30 for ; Wed, 11 Dec 2019 15:51:26 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga001.fm.intel.com ([10.253.24.23]) by orsmga105.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 11 Dec 2019 06:51:26 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.69,301,1571727600"; d="scan'208";a="220478773" Received: from akusztax-mobl.ger.corp.intel.com ([10.104.12.173]) by fmsmga001.fm.intel.com with ESMTP; 11 Dec 2019 06:51:23 -0800 From: Arek Kusztal To: dev@dpdk.org Cc: akhil.goyal@nxp.com, fiona.trahe@intel.com, declan.doherty@intel.com, Arek Kusztal Date: Wed, 11 Dec 2019 15:50:03 +0100 Message-Id: <20191211145004.11672-4-arkadiuszx.kusztal@intel.com> X-Mailer: git-send-email 2.19.1.windows.1 In-Reply-To: <20191211145004.11672-1-arkadiuszx.kusztal@intel.com> References: <20191211145004.11672-1-arkadiuszx.kusztal@intel.com> MIME-Version: 1.0 Subject: [dpdk-dev] [PATCH v3 3/4] common/qat: add dual thread support X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: Fiona Trahe Remove the limitation whereby enqueue and dequeue must be done in same thread. The inflight calculation is reworked to be thread-safe for 2 threads - note this is not general multi-thread support, i.e all enqueues to a qp must still be done in one thread and all dequeues must be done in one thread, but enqueues and dequeues may be in separate threads. Documentation updated. Signed-off-by: Fiona Trahe Signed-off-by: Arek Kusztal Acked-by: Fiona Trahe --- doc/guides/compressdevs/qat_comp.rst | 5 ++++- doc/guides/cryptodevs/qat.rst | 10 +++++++-- drivers/common/qat/qat_qp.c | 40 +++++++++++++++++++++--------------- drivers/common/qat/qat_qp.h | 3 ++- 4 files changed, 38 insertions(+), 20 deletions(-) diff --git a/doc/guides/compressdevs/qat_comp.rst b/doc/guides/compressdevs/qat_comp.rst index 6421f76..757611a 100644 --- a/doc/guides/compressdevs/qat_comp.rst +++ b/doc/guides/compressdevs/qat_comp.rst @@ -37,7 +37,10 @@ Limitations ----------- * Compressdev level 0, no compression, is not supported. -* Queue pairs are not thread-safe (that is, within a single queue pair, RX and TX from different lcores is not supported). +* Queue-pairs are thread-safe on Intel CPUs but Queues are not (that is, within a single + queue-pair all enqueues to the TX queue must be done from one thread and all dequeues + from the RX queue must be done from one thread, but enqueues and dequeues may be done + in different threads.) * No BSD support as BSD QAT kernel driver not available. * When using Deflate dynamic huffman encoding for compression, the input size (op.src.length) must be < CONFIG_RTE_PMD_QAT_COMP_IM_BUFFER_SIZE from the config file, diff --git a/doc/guides/cryptodevs/qat.rst b/doc/guides/cryptodevs/qat.rst index 6197875..3a4a189 100644 --- a/doc/guides/cryptodevs/qat.rst +++ b/doc/guides/cryptodevs/qat.rst @@ -81,7 +81,10 @@ Limitations * No BSD support as BSD QAT kernel driver not available. * ZUC EEA3/EIA3 is not supported by dh895xcc devices * Maximum additional authenticated data (AAD) for GCM is 240 bytes long and must be passed to the device in a buffer rounded up to the nearest block-size multiple (x16) and padded with zeros. -* Queue pairs are not thread-safe (that is, within a single queue pair, RX and TX from different lcores is not supported). +* Queue-pairs are thread-safe on Intel CPUs but Queues are not (that is, within a single + queue-pair all enqueues to the TX queue must be done from one thread and all dequeues + from the RX queue must be done from one thread, but enqueues and dequeues may be done + in different threads.) * A GCM limitation exists, but only in the case where there are multiple generations of QAT devices on a single platform. To optimise performance, the GCM crypto session should be initialised for the @@ -133,7 +136,10 @@ Limitations ~~~~~~~~~~~ * Big integers longer than 4096 bits are not supported. -* Queue pairs are not thread-safe (that is, within a single queue pair, RX and TX from different lcores is not supported). +* Queue-pairs are thread-safe on Intel CPUs but Queues are not (that is, within a single + queue-pair all enqueues to the TX queue must be done from one thread and all dequeues + from the RX queue must be done from one thread, but enqueues and dequeues may be done + in different threads.) * RSA-2560, RSA-3584 are not supported .. _building_qat: diff --git a/drivers/common/qat/qat_qp.c b/drivers/common/qat/qat_qp.c index 8e4c74a..30cdc61 100644 --- a/drivers/common/qat/qat_qp.c +++ b/drivers/common/qat/qat_qp.c @@ -230,7 +230,7 @@ int qat_qp_setup(struct qat_pci_device *qat_dev, } qp->mmap_bar_addr = pci_dev->mem_resource[0].addr; - qp->inflights16 = 0; + qp->enqueued = qp->dequeued = 0; if (qat_queue_create(qat_dev, &(qp->tx_q), qat_qp_conf, ADF_RING_DIR_TX) != 0) { @@ -321,7 +321,7 @@ int qat_qp_release(struct qat_qp **qp_addr) qp->qat_dev->qat_dev_id); /* Don't free memory if there are still responses to be processed */ - if (qp->inflights16 == 0) { + if ((qp->enqueued - qp->dequeued) == 0) { qat_queue_delete(&(qp->tx_q)); qat_queue_delete(&(qp->rx_q)); } else { @@ -579,7 +579,6 @@ qat_enqueue_op_burst(void *qp, void **ops, uint16_t nb_ops) uint16_t nb_ops_possible = nb_ops; register uint8_t *base_addr; register uint32_t tail; - int overflow; if (unlikely(nb_ops == 0)) return 0; @@ -590,13 +589,25 @@ qat_enqueue_op_burst(void *qp, void **ops, uint16_t nb_ops) tail = queue->tail; /* Find how many can actually fit on the ring */ - tmp_qp->inflights16 += nb_ops; - overflow = tmp_qp->inflights16 - tmp_qp->max_inflights; - if (overflow > 0) { - tmp_qp->inflights16 -= overflow; - nb_ops_possible = nb_ops - overflow; - if (nb_ops_possible == 0) - return 0; + { + /* dequeued can only be written by one thread, but it may not + * be this thread. As it's 4-byte aligned it will be read + * atomically here by any Intel CPU. + * enqueued can wrap before dequeued, but cannot + * lap it as var size of enq/deq (uint32_t) > var size of + * max_inflights (uint16_t). In reality inflights is never + * even as big as max uint16_t, as it's <= ADF_MAX_DESC. + * On wrapping, the calculation still returns the correct + * positive value as all three vars are unsigned. + */ + uint32_t inflights = + tmp_qp->enqueued - tmp_qp->dequeued; + + if ((inflights + nb_ops) > tmp_qp->max_inflights) { + nb_ops_possible = tmp_qp->max_inflights - inflights; + if (nb_ops_possible == 0) + return 0; + } } while (nb_ops_sent != nb_ops_possible) { @@ -605,11 +616,7 @@ qat_enqueue_op_burst(void *qp, void **ops, uint16_t nb_ops) tmp_qp->qat_dev_gen); if (ret != 0) { tmp_qp->stats.enqueue_err_count++; - /* - * This message cannot be enqueued, - * decrease number of ops that wasn't sent - */ - tmp_qp->inflights16 -= nb_ops_possible - nb_ops_sent; + /* This message cannot be enqueued */ if (nb_ops_sent == 0) return 0; goto kick_tail; @@ -621,6 +628,7 @@ qat_enqueue_op_burst(void *qp, void **ops, uint16_t nb_ops) } kick_tail: queue->tail = tail; + tmp_qp->enqueued += nb_ops_sent; tmp_qp->stats.enqueued_count += nb_ops_sent; txq_write_tail(tmp_qp, queue); return nb_ops_sent; @@ -664,9 +672,9 @@ qat_dequeue_op_burst(void *qp, void **ops, uint16_t nb_ops) } if (resp_counter > 0) { rx_queue->head = head; + tmp_qp->dequeued += resp_counter; tmp_qp->stats.dequeued_count += resp_counter; rx_queue->nb_processed_responses += resp_counter; - tmp_qp->inflights16 -= resp_counter; if (rx_queue->nb_processed_responses > QAT_CSR_HEAD_WRITE_THRESH) diff --git a/drivers/common/qat/qat_qp.h b/drivers/common/qat/qat_qp.h index 5066f06..8b9ab79 100644 --- a/drivers/common/qat/qat_qp.h +++ b/drivers/common/qat/qat_qp.h @@ -63,7 +63,6 @@ struct qat_queue { struct qat_qp { void *mmap_bar_addr; - uint16_t inflights16; struct qat_queue tx_q; struct qat_queue rx_q; struct qat_common_stats stats; @@ -75,6 +74,8 @@ struct qat_qp { enum qat_service_type service_type; struct qat_pci_device *qat_dev; /**< qat device this qp is on */ + uint32_t enqueued; + uint32_t dequeued __rte_aligned(4); uint16_t max_inflights; } __rte_cache_aligned; From patchwork Wed Dec 11 14:50:04 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Arkadiusz Kusztal X-Patchwork-Id: 63764 X-Patchwork-Delegate: gakhil@marvell.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 47F0EA04F6; Wed, 11 Dec 2019 15:51:51 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id C28161BE80; Wed, 11 Dec 2019 15:51:32 +0100 (CET) Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by dpdk.org (Postfix) with ESMTP id 7B1371BF71 for ; Wed, 11 Dec 2019 15:51:30 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga001.fm.intel.com ([10.253.24.23]) by orsmga105.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 11 Dec 2019 06:51:30 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.69,301,1571727600"; d="scan'208";a="220478779" Received: from akusztax-mobl.ger.corp.intel.com ([10.104.12.173]) by fmsmga001.fm.intel.com with ESMTP; 11 Dec 2019 06:51:26 -0800 From: Arek Kusztal To: dev@dpdk.org Cc: akhil.goyal@nxp.com, fiona.trahe@intel.com, declan.doherty@intel.com, Arek Kusztal Date: Wed, 11 Dec 2019 15:50:04 +0100 Message-Id: <20191211145004.11672-5-arkadiuszx.kusztal@intel.com> X-Mailer: git-send-email 2.19.1.windows.1 In-Reply-To: <20191211145004.11672-1-arkadiuszx.kusztal@intel.com> References: <20191211145004.11672-1-arkadiuszx.kusztal@intel.com> MIME-Version: 1.0 Subject: [dpdk-dev] [PATCH v3 4/4] crypto/qat: add minimum enq threshold to qat pmd X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This patch adds minimum enqueue threshold to Intel QuickAssist Technology PMD. It is an optimisation, configured by a command line option, which can be used to reduce MMIO write occurrences. Signed-off-by: Fiona Trahe Signed-off-by: Arek Kusztal --- doc/guides/cryptodevs/qat.rst | 18 +++++++ drivers/common/qat/qat_common.c | 3 ++ drivers/common/qat/qat_common.h | 3 ++ drivers/common/qat/qat_device.c | 99 ++++++++++++++++++++++++++++++++----- drivers/common/qat/qat_device.h | 22 +++++++-- drivers/common/qat/qat_qp.c | 11 +++++ drivers/common/qat/qat_qp.h | 3 ++ drivers/compress/qat/qat_comp_pmd.c | 13 ++++- drivers/compress/qat/qat_comp_pmd.h | 4 +- drivers/crypto/qat/qat_asym_pmd.c | 13 ++++- drivers/crypto/qat/qat_asym_pmd.h | 4 +- drivers/crypto/qat/qat_sym_pmd.c | 13 ++++- drivers/crypto/qat/qat_sym_pmd.h | 4 +- 13 files changed, 188 insertions(+), 22 deletions(-) diff --git a/doc/guides/cryptodevs/qat.rst b/doc/guides/cryptodevs/qat.rst index 3a4a189..dc79b60 100644 --- a/doc/guides/cryptodevs/qat.rst +++ b/doc/guides/cryptodevs/qat.rst @@ -243,6 +243,24 @@ allocated while for GEN1 devices, 12 buffers are allocated, plus 1472 bytes over larger than the input size). +Running QAT PMD with minimum threshold for burst size +~~~~~~~~~~~~~~~~~~~~~~~~ + +If only a small number or packets can be enqueued. Each enqueue causes an expensive MMIO write. +These MMIO write occurrences can be optimised by setting any of the following parameters +- qat_sym_enq_threshold +- qat_asym_enq_threshold +- qat_comp_enq_threshold +When any of these parameters is set rte_cryptodev_enqueue_burst function will +return 0 (thereby avoiding an MMIO) if the device is congested and number of packets +possible to enqueue is smaller. +To use this feature the user must set the parameter on process start as a device additional parameter: + .example: '-w 03:01.1,qat_sym_enq_threshold=32,qat_comp_enq_threshold=16' +All parameters can be used with the same device regardless of order. Parameters are separated +by comma. When the same parameter is used more than once first occurrence of the parameter +is used. +Maximum threshold that can be set is 32. + Device and driver naming ~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/drivers/common/qat/qat_common.c b/drivers/common/qat/qat_common.c index 4753866..e222443 100644 --- a/drivers/common/qat/qat_common.c +++ b/drivers/common/qat/qat_common.c @@ -94,6 +94,9 @@ void qat_stats_get(struct qat_pci_device *dev, stats->dequeued_count += qp[i]->stats.dequeued_count; stats->enqueue_err_count += qp[i]->stats.enqueue_err_count; stats->dequeue_err_count += qp[i]->stats.dequeue_err_count; + stats->threshold_hit_count += qp[i]->stats.threshold_hit_count; + QAT_LOG(DEBUG, "Threshold was used for qp %d %"PRIu64" times", i, + stats->threshold_hit_count); } } diff --git a/drivers/common/qat/qat_common.h b/drivers/common/qat/qat_common.h index de9a3ba..cf840fe 100644 --- a/drivers/common/qat/qat_common.h +++ b/drivers/common/qat/qat_common.h @@ -61,6 +61,9 @@ struct qat_common_stats { /**< Total error count on operations enqueued */ uint64_t dequeue_err_count; /**< Total error count on operations dequeued */ + uint64_t threshold_hit_count; + /**< Total number of times min qp threshold condition was fulfilled */ + }; struct qat_pci_device; diff --git a/drivers/common/qat/qat_device.c b/drivers/common/qat/qat_device.c index 2a1cf3e..2b41d9a 100644 --- a/drivers/common/qat/qat_device.c +++ b/drivers/common/qat/qat_device.c @@ -3,6 +3,8 @@ */ #include +#include +#include #include "qat_device.h" #include "adf_transport_access_macros.h" @@ -105,12 +107,71 @@ qat_get_qat_dev_from_pci_dev(struct rte_pci_device *pci_dev) return qat_pci_get_named_dev(name); } +static void qat_dev_parse_cmd(const char *str, struct qat_dev_cmd_param + *qat_dev_cmd_param) +{ + int i = 0; + const char *param; + + while (1) { + char value_str[4] = { }; + + param = qat_dev_cmd_param[i].name; + if (param == NULL) + return; + long value = 0; + const char *arg = strstr(str, param); + const char *arg2 = NULL; + + if (arg) { + arg2 = arg + strlen(param); + if (*arg2 != '=') { + QAT_LOG(DEBUG, "parsing error '=' sign" + " should immediately follow %s", + param); + arg2 = NULL; + } else + arg2++; + } else { + QAT_LOG(DEBUG, "%s not provided", param); + } + if (arg2) { + int iter = 0; + while (iter < 2) { + if (!isdigit(*(arg2 + iter))) + break; + iter++; + } + if (!iter) { + QAT_LOG(DEBUG, "parsing error %s" + " no number provided", + param); + } else { + memcpy(value_str, arg2, iter); + value = strtol(value_str, NULL, 10); + if (value > MAX_QP_THRESHOLD_SIZE) { + QAT_LOG(DEBUG, "Exceeded max size of" + " threshold, setting to %d", + MAX_QP_THRESHOLD_SIZE); + value = MAX_QP_THRESHOLD_SIZE; + } + QAT_LOG(DEBUG, "parsing %s = %ld", + param, value); + } + } + qat_dev_cmd_param[i].val = value; + i++; + } +} + struct qat_pci_device * -qat_pci_device_allocate(struct rte_pci_device *pci_dev) +qat_pci_device_allocate(struct rte_pci_device *pci_dev, + struct qat_dev_cmd_param *qat_dev_cmd_param) { struct qat_pci_device *qat_dev; uint8_t qat_dev_id; char name[QAT_DEV_NAME_MAX_LEN]; + struct rte_devargs *devargs = pci_dev->device.devargs; rte_pci_device_name(&pci_dev->addr, name, sizeof(name)); snprintf(name+strlen(name), QAT_DEV_NAME_MAX_LEN-strlen(name), "_qat"); @@ -148,6 +209,9 @@ qat_pci_device_allocate(struct rte_pci_device *pci_dev) return NULL; } + if (devargs && devargs->drv_str) + qat_dev_parse_cmd(devargs->drv_str, qat_dev_cmd_param); + rte_spinlock_init(&qat_dev->arb_csr_lock); qat_dev->attached = QAT_ATTACHED; @@ -199,37 +263,45 @@ qat_pci_dev_destroy(struct qat_pci_device *qat_pci_dev, static int qat_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_device *pci_dev) { - int ret = 0; + int sym_ret = 0, asym_ret = 0, comp_ret = 0; int num_pmds_created = 0; struct qat_pci_device *qat_pci_dev; + struct qat_dev_cmd_param qat_dev_cmd_param[] = { + { SYM_ENQ_THRESHOLD_NAME, 0 }, + { ASYM_ENQ_THRESHOLD_NAME, 0 }, + { COMP_ENQ_THRESHOLD_NAME, 0 }, + { NULL, 0 }, + }; QAT_LOG(DEBUG, "Found QAT device at %02x:%02x.%x", pci_dev->addr.bus, pci_dev->addr.devid, pci_dev->addr.function); - qat_pci_dev = qat_pci_device_allocate(pci_dev); + qat_pci_dev = qat_pci_device_allocate(pci_dev, qat_dev_cmd_param); if (qat_pci_dev == NULL) return -ENODEV; - ret = qat_sym_dev_create(qat_pci_dev); - if (ret == 0) + sym_ret = qat_sym_dev_create(qat_pci_dev, qat_dev_cmd_param); + if (sym_ret == 0) { num_pmds_created++; + + } else QAT_LOG(WARNING, "Failed to create QAT SYM PMD on device %s", qat_pci_dev->name); - ret = qat_comp_dev_create(qat_pci_dev); - if (ret == 0) + comp_ret = qat_comp_dev_create(qat_pci_dev, qat_dev_cmd_param); + if (comp_ret == 0) num_pmds_created++; else QAT_LOG(WARNING, "Failed to create QAT COMP PMD on device %s", qat_pci_dev->name); - ret = qat_asym_dev_create(qat_pci_dev); - if (ret == 0) + asym_ret = qat_asym_dev_create(qat_pci_dev, qat_dev_cmd_param); + if (asym_ret == 0) num_pmds_created++; else QAT_LOG(WARNING, @@ -264,13 +336,15 @@ static struct rte_pci_driver rte_qat_pmd = { }; __rte_weak int -qat_sym_dev_create(struct qat_pci_device *qat_pci_dev __rte_unused) +qat_sym_dev_create(struct qat_pci_device *qat_pci_dev __rte_unused, + struct qat_dev_cmd_param *qat_dev_cmd_param __rte_unused) { return 0; } __rte_weak int -qat_asym_dev_create(struct qat_pci_device *qat_pci_dev __rte_unused) +qat_asym_dev_create(struct qat_pci_device *qat_pci_dev __rte_unused, + struct qat_dev_cmd_param *qat_dev_cmd_param __rte_unused) { return 0; } @@ -288,7 +362,8 @@ qat_asym_dev_destroy(struct qat_pci_device *qat_pci_dev __rte_unused) } __rte_weak int -qat_comp_dev_create(struct qat_pci_device *qat_pci_dev __rte_unused) +qat_comp_dev_create(struct qat_pci_device *qat_pci_dev __rte_unused, + struct qat_dev_cmd_param *qat_dev_cmd_param __rte_unused) { return 0; } diff --git a/drivers/common/qat/qat_device.h b/drivers/common/qat/qat_device.h index 131375e..09a4c55 100644 --- a/drivers/common/qat/qat_device.h +++ b/drivers/common/qat/qat_device.h @@ -16,6 +16,16 @@ #define QAT_DEV_NAME_MAX_LEN 64 +#define SYM_ENQ_THRESHOLD_NAME "qat_sym_enq_threshold" +#define ASYM_ENQ_THRESHOLD_NAME "qat_asym_enq_threshold" +#define COMP_ENQ_THRESHOLD_NAME "qat_comp_enq_threshold" +#define MAX_QP_THRESHOLD_SIZE 32 + +struct qat_dev_cmd_param { + const char *name; + uint16_t val; +}; + enum qat_comp_num_im_buffers { QAT_NUM_INTERM_BUFS_GEN1 = 12, QAT_NUM_INTERM_BUFS_GEN2 = 20, @@ -94,7 +104,8 @@ struct qat_gen_hw_data { extern struct qat_gen_hw_data qat_gen_config[]; struct qat_pci_device * -qat_pci_device_allocate(struct rte_pci_device *pci_dev); +qat_pci_device_allocate(struct rte_pci_device *pci_dev, + struct qat_dev_cmd_param *qat_dev_cmd_param); int qat_pci_device_release(struct rte_pci_device *pci_dev); @@ -104,10 +115,12 @@ qat_get_qat_dev_from_pci_dev(struct rte_pci_device *pci_dev); /* declaration needed for weak functions */ int -qat_sym_dev_create(struct qat_pci_device *qat_pci_dev __rte_unused); +qat_sym_dev_create(struct qat_pci_device *qat_pci_dev __rte_unused, + struct qat_dev_cmd_param *qat_dev_cmd_param); int -qat_asym_dev_create(struct qat_pci_device *qat_pci_dev __rte_unused); +qat_asym_dev_create(struct qat_pci_device *qat_pci_dev __rte_unused, + struct qat_dev_cmd_param *qat_dev_cmd_param); int qat_sym_dev_destroy(struct qat_pci_device *qat_pci_dev __rte_unused); @@ -116,7 +129,8 @@ int qat_asym_dev_destroy(struct qat_pci_device *qat_pci_dev __rte_unused); int -qat_comp_dev_create(struct qat_pci_device *qat_pci_dev __rte_unused); +qat_comp_dev_create(struct qat_pci_device *qat_pci_dev __rte_unused, + struct qat_dev_cmd_param *qat_dev_cmd_param); int qat_comp_dev_destroy(struct qat_pci_device *qat_pci_dev __rte_unused); diff --git a/drivers/common/qat/qat_qp.c b/drivers/common/qat/qat_qp.c index 30cdc61..a3cf34f 100644 --- a/drivers/common/qat/qat_qp.c +++ b/drivers/common/qat/qat_qp.c @@ -608,8 +608,19 @@ qat_enqueue_op_burst(void *qp, void **ops, uint16_t nb_ops) if (nb_ops_possible == 0) return 0; } + /* QAT has plenty of work queued already, so don't waste cycles + * enqueueing, wait til the application has gathered a bigger burst + * or some completed ops have been dequeued + */ + if (tmp_qp->min_enq_burst_threshold && inflights > + QAT_QP_MIN_INFL_THRESHOLD && nb_ops_possible < + tmp_qp->min_enq_burst_threshold) { + tmp_qp->stats.threshold_hit_count++; + return 0; + } } + while (nb_ops_sent != nb_ops_possible) { ret = tmp_qp->build_request(*ops, base_addr + tail, tmp_qp->op_cookies[tail / queue->msg_size], diff --git a/drivers/common/qat/qat_qp.h b/drivers/common/qat/qat_qp.h index 8b9ab79..0b95ea3 100644 --- a/drivers/common/qat/qat_qp.h +++ b/drivers/common/qat/qat_qp.h @@ -12,6 +12,8 @@ struct qat_pci_device; #define QAT_CSR_HEAD_WRITE_THRESH 32U /* number of requests to accumulate before writing head CSR */ +#define QAT_QP_MIN_INFL_THRESHOLD 256 + typedef int (*build_request_t)(void *op, uint8_t *req, void *op_cookie, enum qat_device_gen qat_dev_gen); @@ -77,6 +79,7 @@ struct qat_qp { uint32_t enqueued; uint32_t dequeued __rte_aligned(4); uint16_t max_inflights; + uint16_t min_enq_burst_threshold; } __rte_cache_aligned; extern const struct qat_qp_hw_data qat_gen1_qps[][ADF_MAX_QPS_ON_ANY_SERVICE]; diff --git a/drivers/compress/qat/qat_comp_pmd.c b/drivers/compress/qat/qat_comp_pmd.c index 05b7dfe..48cd968 100644 --- a/drivers/compress/qat/qat_comp_pmd.c +++ b/drivers/compress/qat/qat_comp_pmd.c @@ -139,6 +139,7 @@ qat_comp_qp_setup(struct rte_compressdev *dev, uint16_t qp_id, = *qp_addr; qp = (struct qat_qp *)*qp_addr; + qp->min_enq_burst_threshold = qat_private->min_enq_burst_threshold; for (i = 0; i < qp->nb_descriptors; i++) { @@ -660,8 +661,10 @@ static const struct rte_driver compdev_qat_driver = { .alias = qat_comp_drv_name }; int -qat_comp_dev_create(struct qat_pci_device *qat_pci_dev) +qat_comp_dev_create(struct qat_pci_device *qat_pci_dev, + struct qat_dev_cmd_param *qat_dev_cmd_param) { + int i = 0; if (qat_pci_dev->qat_dev_gen == QAT_GEN3) { QAT_LOG(ERR, "Compression PMD not supported on QAT c4xxx"); return 0; @@ -719,6 +722,14 @@ qat_comp_dev_create(struct qat_pci_device *qat_pci_dev) break; } + while (1) { + if (qat_dev_cmd_param[i].name == NULL) + break; + if (!strcmp(qat_dev_cmd_param[i].name, COMP_ENQ_THRESHOLD_NAME)) + comp_dev->min_enq_burst_threshold = qat_dev_cmd_param[i].val; + i++; + } + QAT_LOG(DEBUG, "Created QAT COMP device %s as compressdev instance %d", name, compressdev->data->dev_id); diff --git a/drivers/compress/qat/qat_comp_pmd.h b/drivers/compress/qat/qat_comp_pmd.h index 6979de1..5c7fa9f 100644 --- a/drivers/compress/qat/qat_comp_pmd.h +++ b/drivers/compress/qat/qat_comp_pmd.h @@ -32,10 +32,12 @@ struct qat_comp_dev_private { /**< The device's pool for qat_comp_xforms */ struct rte_mempool *streampool; /**< The device's pool for qat_comp_streams */ + uint16_t min_enq_burst_threshold; }; int -qat_comp_dev_create(struct qat_pci_device *qat_pci_dev); +qat_comp_dev_create(struct qat_pci_device *qat_pci_dev, + struct qat_dev_cmd_param *qat_dev_cmd_param); int qat_comp_dev_destroy(struct qat_pci_device *qat_pci_dev); diff --git a/drivers/crypto/qat/qat_asym_pmd.c b/drivers/crypto/qat/qat_asym_pmd.c index c8a52b6..97dad92 100644 --- a/drivers/crypto/qat/qat_asym_pmd.c +++ b/drivers/crypto/qat/qat_asym_pmd.c @@ -160,6 +160,7 @@ static int qat_asym_qp_setup(struct rte_cryptodev *dev, uint16_t qp_id, = *qp_addr; qp = (struct qat_qp *)*qp_addr; + qp->min_enq_burst_threshold = qat_private->min_enq_burst_threshold; for (i = 0; i < qp->nb_descriptors; i++) { int j; @@ -235,8 +236,10 @@ static const struct rte_driver cryptodev_qat_asym_driver = { }; int -qat_asym_dev_create(struct qat_pci_device *qat_pci_dev) +qat_asym_dev_create(struct qat_pci_device *qat_pci_dev, + struct qat_dev_cmd_param *qat_dev_cmd_param) { + int i = 0; struct rte_cryptodev_pmd_init_params init_params = { .name = "", .socket_id = qat_pci_dev->pci_dev->device.numa_node, @@ -281,6 +284,14 @@ qat_asym_dev_create(struct qat_pci_device *qat_pci_dev) internals->asym_dev_id = cryptodev->data->dev_id; internals->qat_dev_capabilities = qat_gen1_asym_capabilities; + while (1) { + if (qat_dev_cmd_param[i].name == NULL) + break; + if (!strcmp(qat_dev_cmd_param[i].name, ASYM_ENQ_THRESHOLD_NAME)) + internals->min_enq_burst_threshold = qat_dev_cmd_param[i].val; + i++; + } + QAT_LOG(DEBUG, "Created QAT ASYM device %s as cryptodev instance %d", cryptodev->data->name, internals->asym_dev_id); return 0; diff --git a/drivers/crypto/qat/qat_asym_pmd.h b/drivers/crypto/qat/qat_asym_pmd.h index 895d0f6..0535bc6 100644 --- a/drivers/crypto/qat/qat_asym_pmd.h +++ b/drivers/crypto/qat/qat_asym_pmd.h @@ -26,6 +26,7 @@ struct qat_asym_dev_private { /**< Device instance for this rte_cryptodev */ const struct rte_cryptodev_capabilities *qat_dev_capabilities; /* QAT device asymmetric crypto capabilities */ + uint16_t min_enq_burst_threshold; }; uint16_t @@ -42,7 +43,8 @@ int qat_asym_session_configure(struct rte_cryptodev *dev, struct rte_mempool *mempool); int -qat_asym_dev_create(struct qat_pci_device *qat_pci_dev); +qat_asym_dev_create(struct qat_pci_device *qat_pci_dev, + struct qat_dev_cmd_param *qat_dev_cmd_param); int qat_asym_dev_destroy(struct qat_pci_device *qat_pci_dev); diff --git a/drivers/crypto/qat/qat_sym_pmd.c b/drivers/crypto/qat/qat_sym_pmd.c index 71f21ce..fd2f3b3 100644 --- a/drivers/crypto/qat/qat_sym_pmd.c +++ b/drivers/crypto/qat/qat_sym_pmd.c @@ -169,6 +169,7 @@ static int qat_sym_qp_setup(struct rte_cryptodev *dev, uint16_t qp_id, = *qp_addr; qp = (struct qat_qp *)*qp_addr; + qp->min_enq_burst_threshold = qat_private->min_enq_burst_threshold; for (i = 0; i < qp->nb_descriptors; i++) { @@ -237,8 +238,10 @@ static const struct rte_driver cryptodev_qat_sym_driver = { }; int -qat_sym_dev_create(struct qat_pci_device *qat_pci_dev) +qat_sym_dev_create(struct qat_pci_device *qat_pci_dev, + struct qat_dev_cmd_param *qat_dev_cmd_param __rte_unused) { + int i = 0; struct rte_cryptodev_pmd_init_params init_params = { .name = "", .socket_id = qat_pci_dev->pci_dev->device.numa_node, @@ -302,6 +305,14 @@ qat_sym_dev_create(struct qat_pci_device *qat_pci_dev) break; } + while (1) { + if (qat_dev_cmd_param[i].name == NULL) + break; + if (!strcmp(qat_dev_cmd_param[i].name, SYM_ENQ_THRESHOLD_NAME)) + internals->min_enq_burst_threshold = qat_dev_cmd_param[i].val; + i++; + } + QAT_LOG(DEBUG, "Created QAT SYM device %s as cryptodev instance %d", cryptodev->data->name, internals->sym_dev_id); return 0; diff --git a/drivers/crypto/qat/qat_sym_pmd.h b/drivers/crypto/qat/qat_sym_pmd.h index 7ddaf45..a32c25a 100644 --- a/drivers/crypto/qat/qat_sym_pmd.h +++ b/drivers/crypto/qat/qat_sym_pmd.h @@ -28,10 +28,12 @@ struct qat_sym_dev_private { /**< Device instance for this rte_cryptodev */ const struct rte_cryptodev_capabilities *qat_dev_capabilities; /* QAT device symmetric crypto capabilities */ + uint16_t min_enq_burst_threshold; }; int -qat_sym_dev_create(struct qat_pci_device *qat_pci_dev); +qat_sym_dev_create(struct qat_pci_device *qat_pci_dev, + struct qat_dev_cmd_param *qat_dev_cmd_param); int qat_sym_dev_destroy(struct qat_pci_device *qat_pci_dev);