get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/48594/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 48594,
    "url": "https://patches.dpdk.org/api/patches/48594/?format=api",
    "web_url": "https://patches.dpdk.org/project/dpdk/patch/20181207151534.16428-1-kamilx.chalupnik@intel.com/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20181207151534.16428-1-kamilx.chalupnik@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20181207151534.16428-1-kamilx.chalupnik@intel.com",
    "date": "2018-12-07T15:15:31",
    "name": "[v2,1/4] baseband: enhancement of offload cost test",
    "commit_ref": null,
    "pull_url": null,
    "state": "accepted",
    "archived": true,
    "hash": "d94077b660f9b4dddf4f3a4f7372ca8e8c23db9f",
    "submitter": {
        "id": 1010,
        "url": "https://patches.dpdk.org/api/people/1010/?format=api",
        "name": "Kamil Chalupnik",
        "email": "kamilx.chalupnik@intel.com"
    },
    "delegate": {
        "id": 6690,
        "url": "https://patches.dpdk.org/api/users/6690/?format=api",
        "username": "akhil",
        "first_name": "akhil",
        "last_name": "goyal",
        "email": "gakhil@marvell.com"
    },
    "mbox": "https://patches.dpdk.org/project/dpdk/patch/20181207151534.16428-1-kamilx.chalupnik@intel.com/mbox/",
    "series": [
        {
            "id": 2698,
            "url": "https://patches.dpdk.org/api/series/2698/?format=api",
            "web_url": "https://patches.dpdk.org/project/dpdk/list/?series=2698",
            "date": "2018-12-07T15:15:31",
            "name": "[v2,1/4] baseband: enhancement of offload cost test",
            "version": 2,
            "mbox": "https://patches.dpdk.org/series/2698/mbox/"
        }
    ],
    "comments": "https://patches.dpdk.org/api/patches/48594/comments/",
    "check": "success",
    "checks": "https://patches.dpdk.org/api/patches/48594/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@dpdk.org",
        "Delivered-To": "patchwork@dpdk.org",
        "Received": [
            "from [92.243.14.124] (localhost [127.0.0.1])\n\tby dpdk.org (Postfix) with ESMTP id 0D96A5F19;\n\tFri,  7 Dec 2018 16:16:04 +0100 (CET)",
            "from mga05.intel.com (mga05.intel.com [192.55.52.43])\n\tby dpdk.org (Postfix) with ESMTP id B1D4A5F17\n\tfor <dev@dpdk.org>; Fri,  7 Dec 2018 16:16:01 +0100 (CET)",
            "from fmsmga001.fm.intel.com ([10.253.24.23])\n\tby fmsmga105.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384;\n\t07 Dec 2018 07:16:00 -0800",
            "from kchalupx-mobl.ger.corp.intel.com ([10.103.104.172])\n\tby fmsmga001.fm.intel.com with ESMTP; 07 Dec 2018 07:15:58 -0800"
        ],
        "X-Amp-Result": "SKIPPED(no attachment in message)",
        "X-Amp-File-Uploaded": "False",
        "X-ExtLoop1": "1",
        "X-IronPort-AV": "E=Sophos;i=\"5.56,326,1539673200\"; d=\"scan'208\";a=\"127991473\"",
        "From": "Kamil Chalupnik <kamilx.chalupnik@intel.com>",
        "To": "dev@dpdk.org",
        "Cc": "amr.mokhtar@intel.com, akhil.goyal@nxp.com,\n\tKamil Chalupnik <kamilx.chalupnik@intel.com>",
        "Date": "Fri,  7 Dec 2018 16:15:31 +0100",
        "Message-Id": "<20181207151534.16428-1-kamilx.chalupnik@intel.com>",
        "X-Mailer": "git-send-email 2.9.0.windows.1",
        "In-Reply-To": "<20181207143126.3876-1-kamilx.chalupnik@intel.com>",
        "References": "<20181207143126.3876-1-kamilx.chalupnik@intel.com>",
        "Subject": "[dpdk-dev] [PATCH v2 1/4] baseband: enhancement of offload cost test",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n\t<mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "Offload cost test was improved in order to collect\nmore accurate results.\n\nSigned-off-by: Kamil Chalupnik <kamilx.chalupnik@intel.com>\n---\n app/test-bbdev/test_bbdev_perf.c                 | 152 +++++++++++------------\n config/common_base                               |   2 +-\n drivers/baseband/turbo_sw/bbdev_turbo_software.c |  70 ++++++++---\n lib/librte_bbdev/rte_bbdev.h                     |   9 +-\n 4 files changed, 135 insertions(+), 98 deletions(-)",
    "diff": "diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c\nindex fbe6cc9..bf97edb 100644\n--- a/app/test-bbdev/test_bbdev_perf.c\n+++ b/app/test-bbdev/test_bbdev_perf.c\n@@ -88,19 +88,19 @@ struct thread_params {\n /* Stores time statistics */\n struct test_time_stats {\n \t/* Stores software enqueue total working time */\n-\tuint64_t enq_sw_tot_time;\n+\tuint64_t enq_sw_total_time;\n \t/* Stores minimum value of software enqueue working time */\n \tuint64_t enq_sw_min_time;\n \t/* Stores maximum value of software enqueue working time */\n \tuint64_t enq_sw_max_time;\n \t/* Stores turbo enqueue total working time */\n-\tuint64_t enq_tur_tot_time;\n-\t/* Stores minimum value of turbo enqueue working time */\n-\tuint64_t enq_tur_min_time;\n-\t/* Stores maximum value of turbo enqueue working time */\n-\tuint64_t enq_tur_max_time;\n+\tuint64_t enq_acc_total_time;\n+\t/* Stores minimum value of accelerator enqueue working time */\n+\tuint64_t enq_acc_min_time;\n+\t/* Stores maximum value of accelerator enqueue working time */\n+\tuint64_t enq_acc_max_time;\n \t/* Stores dequeue total working time */\n-\tuint64_t deq_tot_time;\n+\tuint64_t deq_total_time;\n \t/* Stores minimum value of dequeue working time */\n \tuint64_t deq_min_time;\n \t/* Stores maximum value of dequeue working time */\n@@ -1200,12 +1200,15 @@ typedef int (test_case_function)(struct active_device *ad,\n \tburst_sz = tp->op_params->burst_sz;\n \tnum_to_process = tp->op_params->num_to_process;\n \n-\tif (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)\n+\tif (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {\n \t\tdeq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id, dec_ops,\n \t\t\t\tburst_sz);\n-\telse\n+\t\trte_bbdev_dec_op_free_bulk(dec_ops, deq);\n+\t} else {\n \t\tdeq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id, enc_ops,\n \t\t\t\tburst_sz);\n+\t\trte_bbdev_enc_op_free_bulk(enc_ops, deq);\n+\t}\n \n \tif (deq < burst_sz) {\n \t\tprintf(\n@@ -1316,8 +1319,6 @@ typedef int (test_case_function)(struct active_device *ad,\n \n \t\tenqueued += rte_bbdev_enqueue_dec_ops(tp->dev_id, queue_id, ops,\n \t\t\t\tnum_to_enq);\n-\n-\t\trte_bbdev_dec_op_free_bulk(ops, num_to_enq);\n \t}\n \n \tif (allocs_failed > 0)\n@@ -1380,8 +1381,6 @@ typedef int (test_case_function)(struct active_device *ad,\n \n \t\tenqueued += rte_bbdev_enqueue_enc_ops(tp->dev_id, queue_id, ops,\n \t\t\t\tnum_to_enq);\n-\n-\t\trte_bbdev_enc_op_free_bulk(ops, num_to_enq);\n \t}\n \n \tif (allocs_failed > 0)\n@@ -1575,13 +1574,14 @@ typedef int (test_case_function)(struct active_device *ad,\n \tRTE_LCORE_FOREACH(lcore_id) {\n \t\tif (iter++ >= used_cores)\n \t\t\tbreak;\n-\t\tprintf(\"\\tlcore_id: %u, throughput: %.8lg MOPS, %.8lg Mbps\\n\",\n-\t\tlcore_id, t_params[lcore_id].mops, t_params[lcore_id].mbps);\n+\t\tprintf(\"Throughput for core (%u): %.8lg MOPS, %.8lg Mbps\\n\",\n+\t\t\t\tlcore_id, t_params[lcore_id].mops,\n+\t\t\t\tt_params[lcore_id].mbps);\n \t\ttotal_mops += t_params[lcore_id].mops;\n \t\ttotal_mbps += t_params[lcore_id].mbps;\n \t}\n \tprintf(\n-\t\t\"\\n\\tTotal stats for %u cores: throughput: %.8lg MOPS, %.8lg Mbps\\n\",\n+\t\t\"\\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\\n\",\n \t\tused_cores, total_mops, total_mbps);\n }\n \n@@ -1882,7 +1882,7 @@ typedef int (test_case_function)(struct active_device *ad,\n \tTEST_ASSERT_NOT_NULL(op_type_str, \"Invalid op type: %u\", op_type);\n \n \tprintf(\n-\t\t\"Validation/Latency test: dev: %s, burst size: %u, num ops: %u, op type: %s\\n\",\n+\t\t\"\\nValidation/Latency test: dev: %s, burst size: %u, num ops: %u, op type: %s\\n\",\n \t\t\tinfo.dev_name, burst_sz, num_to_process, op_type_str);\n \n \tif (op_type == RTE_BBDEV_OP_TURBO_DEC)\n@@ -1899,10 +1899,10 @@ typedef int (test_case_function)(struct active_device *ad,\n \tif (iter <= 0)\n \t\treturn TEST_FAILED;\n \n-\tprintf(\"\\toperation latency:\\n\"\n-\t\t\t\"\\t\\tavg latency: %lg cycles, %lg us\\n\"\n-\t\t\t\"\\t\\tmin latency: %lg cycles, %lg us\\n\"\n-\t\t\t\"\\t\\tmax latency: %lg cycles, %lg us\\n\",\n+\tprintf(\"Operation latency:\\n\"\n+\t\t\t\"\\tavg latency: %lg cycles, %lg us\\n\"\n+\t\t\t\"\\tmin latency: %lg cycles, %lg us\\n\"\n+\t\t\t\"\\tmax latency: %lg cycles, %lg us\\n\",\n \t\t\t(double)total_time / (double)iter,\n \t\t\t(double)(total_time * 1000000) / (double)iter /\n \t\t\t(double)rte_get_tsc_hz(), (double)min_time,\n@@ -1930,7 +1930,7 @@ typedef int (test_case_function)(struct active_device *ad,\n \tstats->dequeued_count = q_stats->dequeued_count;\n \tstats->enqueue_err_count = q_stats->enqueue_err_count;\n \tstats->dequeue_err_count = q_stats->dequeue_err_count;\n-\tstats->offload_time = q_stats->offload_time;\n+\tstats->acc_offload_cycles = q_stats->acc_offload_cycles;\n \n \treturn 0;\n }\n@@ -1974,18 +1974,18 @@ typedef int (test_case_function)(struct active_device *ad,\n \t\t\t\tqueue_id, dev_id);\n \n \t\tenq_sw_last_time = rte_rdtsc_precise() - enq_start_time -\n-\t\t\t\tstats.offload_time;\n+\t\t\t\tstats.acc_offload_cycles;\n \t\ttime_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,\n \t\t\t\tenq_sw_last_time);\n \t\ttime_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,\n \t\t\t\tenq_sw_last_time);\n-\t\ttime_st->enq_sw_tot_time += enq_sw_last_time;\n+\t\ttime_st->enq_sw_total_time += enq_sw_last_time;\n \n-\t\ttime_st->enq_tur_max_time = RTE_MAX(time_st->enq_tur_max_time,\n-\t\t\t\tstats.offload_time);\n-\t\ttime_st->enq_tur_min_time = RTE_MIN(time_st->enq_tur_min_time,\n-\t\t\t\tstats.offload_time);\n-\t\ttime_st->enq_tur_tot_time += stats.offload_time;\n+\t\ttime_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,\n+\t\t\t\tstats.acc_offload_cycles);\n+\t\ttime_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,\n+\t\t\t\tstats.acc_offload_cycles);\n+\t\ttime_st->enq_acc_total_time += stats.acc_offload_cycles;\n \n \t\t/* ensure enqueue has been completed */\n \t\trte_delay_ms(10);\n@@ -2003,7 +2003,7 @@ typedef int (test_case_function)(struct active_device *ad,\n \t\t\t\tdeq_last_time);\n \t\ttime_st->deq_min_time = RTE_MIN(time_st->deq_min_time,\n \t\t\t\tdeq_last_time);\n-\t\ttime_st->deq_tot_time += deq_last_time;\n+\t\ttime_st->deq_total_time += deq_last_time;\n \n \t\t/* Dequeue remaining operations if needed*/\n \t\twhile (burst_sz != deq)\n@@ -2055,18 +2055,18 @@ typedef int (test_case_function)(struct active_device *ad,\n \t\t\t\tqueue_id, dev_id);\n \n \t\tenq_sw_last_time = rte_rdtsc_precise() - enq_start_time -\n-\t\t\t\tstats.offload_time;\n+\t\t\t\tstats.acc_offload_cycles;\n \t\ttime_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,\n \t\t\t\tenq_sw_last_time);\n \t\ttime_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,\n \t\t\t\tenq_sw_last_time);\n-\t\ttime_st->enq_sw_tot_time += enq_sw_last_time;\n+\t\ttime_st->enq_sw_total_time += enq_sw_last_time;\n \n-\t\ttime_st->enq_tur_max_time = RTE_MAX(time_st->enq_tur_max_time,\n-\t\t\t\tstats.offload_time);\n-\t\ttime_st->enq_tur_min_time = RTE_MIN(time_st->enq_tur_min_time,\n-\t\t\t\tstats.offload_time);\n-\t\ttime_st->enq_tur_tot_time += stats.offload_time;\n+\t\ttime_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,\n+\t\t\t\tstats.acc_offload_cycles);\n+\t\ttime_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,\n+\t\t\t\tstats.acc_offload_cycles);\n+\t\ttime_st->enq_acc_total_time += stats.acc_offload_cycles;\n \n \t\t/* ensure enqueue has been completed */\n \t\trte_delay_ms(10);\n@@ -2084,7 +2084,7 @@ typedef int (test_case_function)(struct active_device *ad,\n \t\t\t\tdeq_last_time);\n \t\ttime_st->deq_min_time = RTE_MIN(time_st->deq_min_time,\n \t\t\t\tdeq_last_time);\n-\t\ttime_st->deq_tot_time += deq_last_time;\n+\t\ttime_st->deq_total_time += deq_last_time;\n \n \t\twhile (burst_sz != deq)\n \t\t\tdeq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,\n@@ -2121,7 +2121,7 @@ typedef int (test_case_function)(struct active_device *ad,\n \n \tmemset(&time_st, 0, sizeof(struct test_time_stats));\n \ttime_st.enq_sw_min_time = UINT64_MAX;\n-\ttime_st.enq_tur_min_time = UINT64_MAX;\n+\ttime_st.enq_acc_min_time = UINT64_MAX;\n \ttime_st.deq_min_time = UINT64_MAX;\n \n \tTEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),\n@@ -2134,7 +2134,7 @@ typedef int (test_case_function)(struct active_device *ad,\n \tTEST_ASSERT_NOT_NULL(op_type_str, \"Invalid op type: %u\", op_type);\n \n \tprintf(\n-\t\t\"Offload latency test: dev: %s, burst size: %u, num ops: %u, op type: %s\\n\",\n+\t\t\"\\nOffload latency test: dev: %s, burst size: %u, num ops: %u, op type: %s\\n\",\n \t\t\tinfo.dev_name, burst_sz, num_to_process, op_type_str);\n \n \tif (op_type == RTE_BBDEV_OP_TURBO_DEC)\n@@ -2149,36 +2149,36 @@ typedef int (test_case_function)(struct active_device *ad,\n \tif (iter <= 0)\n \t\treturn TEST_FAILED;\n \n-\tprintf(\"\\tenq offload cost latency:\\n\"\n-\t\t\t\"\\t\\tsoftware avg %lg cycles, %lg us\\n\"\n-\t\t\t\"\\t\\tsoftware min %lg cycles, %lg us\\n\"\n-\t\t\t\"\\t\\tsoftware max %lg cycles, %lg us\\n\"\n-\t\t\t\"\\t\\tturbo avg %lg cycles, %lg us\\n\"\n-\t\t\t\"\\t\\tturbo min %lg cycles, %lg us\\n\"\n-\t\t\t\"\\t\\tturbo max %lg cycles, %lg us\\n\",\n-\t\t\t(double)time_st.enq_sw_tot_time / (double)iter,\n-\t\t\t(double)(time_st.enq_sw_tot_time * 1000000) /\n+\tprintf(\"Enqueue offload cost latency:\\n\"\n+\t\t\t\"\\tDriver offload avg %lg cycles, %lg us\\n\"\n+\t\t\t\"\\tDriver offload min %lg cycles, %lg us\\n\"\n+\t\t\t\"\\tDriver offload max %lg cycles, %lg us\\n\"\n+\t\t\t\"\\tAccelerator offload avg %lg cycles, %lg us\\n\"\n+\t\t\t\"\\tAccelerator offload min %lg cycles, %lg us\\n\"\n+\t\t\t\"\\tAccelerator offload max %lg cycles, %lg us\\n\",\n+\t\t\t(double)time_st.enq_sw_total_time / (double)iter,\n+\t\t\t(double)(time_st.enq_sw_total_time * 1000000) /\n \t\t\t(double)iter / (double)rte_get_tsc_hz(),\n \t\t\t(double)time_st.enq_sw_min_time,\n \t\t\t(double)(time_st.enq_sw_min_time * 1000000) /\n \t\t\trte_get_tsc_hz(), (double)time_st.enq_sw_max_time,\n \t\t\t(double)(time_st.enq_sw_max_time * 1000000) /\n-\t\t\trte_get_tsc_hz(), (double)time_st.enq_tur_tot_time /\n+\t\t\trte_get_tsc_hz(), (double)time_st.enq_acc_total_time /\n \t\t\t(double)iter,\n-\t\t\t(double)(time_st.enq_tur_tot_time * 1000000) /\n+\t\t\t(double)(time_st.enq_acc_total_time * 1000000) /\n \t\t\t(double)iter / (double)rte_get_tsc_hz(),\n-\t\t\t(double)time_st.enq_tur_min_time,\n-\t\t\t(double)(time_st.enq_tur_min_time * 1000000) /\n-\t\t\trte_get_tsc_hz(), (double)time_st.enq_tur_max_time,\n-\t\t\t(double)(time_st.enq_tur_max_time * 1000000) /\n+\t\t\t(double)time_st.enq_acc_min_time,\n+\t\t\t(double)(time_st.enq_acc_min_time * 1000000) /\n+\t\t\trte_get_tsc_hz(), (double)time_st.enq_acc_max_time,\n+\t\t\t(double)(time_st.enq_acc_max_time * 1000000) /\n \t\t\trte_get_tsc_hz());\n \n-\tprintf(\"\\tdeq offload cost latency - one op:\\n\"\n-\t\t\t\"\\t\\tavg %lg cycles, %lg us\\n\"\n-\t\t\t\"\\t\\tmin %lg cycles, %lg us\\n\"\n-\t\t\t\"\\t\\tmax %lg cycles, %lg us\\n\",\n-\t\t\t(double)time_st.deq_tot_time / (double)iter,\n-\t\t\t(double)(time_st.deq_tot_time * 1000000) /\n+\tprintf(\"Dequeue offload cost latency - one op:\\n\"\n+\t\t\t\"\\tavg %lg cycles, %lg us\\n\"\n+\t\t\t\"\\tmin %lg cycles, %lg us\\n\"\n+\t\t\t\"\\tmax %lg cycles, %lg us\\n\",\n+\t\t\t(double)time_st.deq_total_time / (double)iter,\n+\t\t\t(double)(time_st.deq_total_time * 1000000) /\n \t\t\t(double)iter / (double)rte_get_tsc_hz(),\n \t\t\t(double)time_st.deq_min_time,\n \t\t\t(double)(time_st.deq_min_time * 1000000) /\n@@ -2194,7 +2194,7 @@ typedef int (test_case_function)(struct active_device *ad,\n static int\n offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,\n \t\tconst uint16_t num_to_process, uint16_t burst_sz,\n-\t\tuint64_t *deq_tot_time, uint64_t *deq_min_time,\n+\t\tuint64_t *deq_total_time, uint64_t *deq_min_time,\n \t\tuint64_t *deq_max_time)\n {\n \tint i, deq_total;\n@@ -2214,7 +2214,7 @@ typedef int (test_case_function)(struct active_device *ad,\n \t\tdeq_last_time = rte_rdtsc_precise() - deq_start_time;\n \t\t*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);\n \t\t*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);\n-\t\t*deq_tot_time += deq_last_time;\n+\t\t*deq_total_time += deq_last_time;\n \t}\n \n \treturn i;\n@@ -2223,7 +2223,7 @@ typedef int (test_case_function)(struct active_device *ad,\n static int\n offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,\n \t\tconst uint16_t num_to_process, uint16_t burst_sz,\n-\t\tuint64_t *deq_tot_time, uint64_t *deq_min_time,\n+\t\tuint64_t *deq_total_time, uint64_t *deq_min_time,\n \t\tuint64_t *deq_max_time)\n {\n \tint i, deq_total;\n@@ -2242,7 +2242,7 @@ typedef int (test_case_function)(struct active_device *ad,\n \t\tdeq_last_time = rte_rdtsc_precise() - deq_start_time;\n \t\t*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);\n \t\t*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);\n-\t\t*deq_tot_time += deq_last_time;\n+\t\t*deq_total_time += deq_last_time;\n \t}\n \n \treturn i;\n@@ -2261,7 +2261,7 @@ typedef int (test_case_function)(struct active_device *ad,\n \treturn TEST_SKIPPED;\n #else\n \tint iter;\n-\tuint64_t deq_tot_time, deq_min_time, deq_max_time;\n+\tuint64_t deq_total_time, deq_min_time, deq_max_time;\n \tuint16_t burst_sz = op_params->burst_sz;\n \tconst uint16_t num_to_process = op_params->num_to_process;\n \tconst enum rte_bbdev_op_type op_type = test_vector.op_type;\n@@ -2269,7 +2269,7 @@ typedef int (test_case_function)(struct active_device *ad,\n \tstruct rte_bbdev_info info;\n \tconst char *op_type_str;\n \n-\tdeq_tot_time = deq_max_time = 0;\n+\tdeq_total_time = deq_max_time = 0;\n \tdeq_min_time = UINT64_MAX;\n \n \tTEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),\n@@ -2281,27 +2281,27 @@ typedef int (test_case_function)(struct active_device *ad,\n \tTEST_ASSERT_NOT_NULL(op_type_str, \"Invalid op type: %u\", op_type);\n \n \tprintf(\n-\t\t\"Offload latency empty dequeue test: dev: %s, burst size: %u, num ops: %u, op type: %s\\n\",\n+\t\t\"\\nOffload latency empty dequeue test: dev: %s, burst size: %u, num ops: %u, op type: %s\\n\",\n \t\t\tinfo.dev_name, burst_sz, num_to_process, op_type_str);\n \n \tif (op_type == RTE_BBDEV_OP_TURBO_DEC)\n \t\titer = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,\n-\t\t\t\tnum_to_process, burst_sz, &deq_tot_time,\n+\t\t\t\tnum_to_process, burst_sz, &deq_total_time,\n \t\t\t\t&deq_min_time, &deq_max_time);\n \telse\n \t\titer = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,\n-\t\t\t\tnum_to_process, burst_sz, &deq_tot_time,\n+\t\t\t\tnum_to_process, burst_sz, &deq_total_time,\n \t\t\t\t&deq_min_time, &deq_max_time);\n \n \tif (iter <= 0)\n \t\treturn TEST_FAILED;\n \n-\tprintf(\"\\tempty deq offload\\n\"\n-\t\t\t\"\\t\\tavg. latency: %lg cycles, %lg us\\n\"\n-\t\t\t\"\\t\\tmin. latency: %lg cycles, %lg us\\n\"\n-\t\t\t\"\\t\\tmax. latency: %lg cycles, %lg us\\n\",\n-\t\t\t(double)deq_tot_time / (double)iter,\n-\t\t\t(double)(deq_tot_time * 1000000) / (double)iter /\n+\tprintf(\"Empty dequeue offload\\n\"\n+\t\t\t\"\\tavg. latency: %lg cycles, %lg us\\n\"\n+\t\t\t\"\\tmin. latency: %lg cycles, %lg us\\n\"\n+\t\t\t\"\\tmax. latency: %lg cycles, %lg us\\n\",\n+\t\t\t(double)deq_total_time / (double)iter,\n+\t\t\t(double)(deq_total_time * 1000000) / (double)iter /\n \t\t\t(double)rte_get_tsc_hz(), (double)deq_min_time,\n \t\t\t(double)(deq_min_time * 1000000) / rte_get_tsc_hz(),\n \t\t\t(double)deq_max_time, (double)(deq_max_time * 1000000) /\ndiff --git a/config/common_base b/config/common_base\nindex d12ae98..3ff98bb 100644\n--- a/config/common_base\n+++ b/config/common_base\n@@ -481,7 +481,7 @@ CONFIG_RTE_PMD_PACKET_PREFETCH=y\n #\n CONFIG_RTE_LIBRTE_BBDEV=y\n CONFIG_RTE_BBDEV_MAX_DEVS=128\n-CONFIG_RTE_BBDEV_OFFLOAD_COST=n\n+CONFIG_RTE_BBDEV_OFFLOAD_COST=y\n \n #\n # Compile PMD for NULL bbdev device\ndiff --git a/drivers/baseband/turbo_sw/bbdev_turbo_software.c b/drivers/baseband/turbo_sw/bbdev_turbo_software.c\nindex 8ceb276..57f6ba1 100644\n--- a/drivers/baseband/turbo_sw/bbdev_turbo_software.c\n+++ b/drivers/baseband/turbo_sw/bbdev_turbo_software.c\n@@ -510,9 +510,10 @@ struct turbo_sw_queue {\n #ifdef RTE_BBDEV_OFFLOAD_COST\n \t\tstart_time = rte_rdtsc_precise();\n #endif\n+\t\t/* CRC24A generation */\n \t\tbblib_lte_crc24a_gen(&crc_req, &crc_resp);\n #ifdef RTE_BBDEV_OFFLOAD_COST\n-\t\tq_stats->offload_time += rte_rdtsc_precise() - start_time;\n+\t\tq_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;\n #endif\n \t} else if (enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) {\n \t\t/* CRC24B */\n@@ -542,9 +543,10 @@ struct turbo_sw_queue {\n #ifdef RTE_BBDEV_OFFLOAD_COST\n \t\tstart_time = rte_rdtsc_precise();\n #endif\n+\t\t/* CRC24B generation */\n \t\tbblib_lte_crc24b_gen(&crc_req, &crc_resp);\n #ifdef RTE_BBDEV_OFFLOAD_COST\n-\t\tq_stats->offload_time += rte_rdtsc_precise() - start_time;\n+\t\tq_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;\n #endif\n \t} else {\n \t\tret = is_enc_input_valid(k, k_idx, total_left);\n@@ -596,15 +598,14 @@ struct turbo_sw_queue {\n #ifdef RTE_BBDEV_OFFLOAD_COST\n \tstart_time = rte_rdtsc_precise();\n #endif\n-\n+\t/* Turbo encoding */\n \tif (bblib_turbo_encoder(&turbo_req, &turbo_resp) != 0) {\n \t\top->status |= 1 << RTE_BBDEV_DRV_ERROR;\n \t\trte_bbdev_log(ERR, \"Turbo Encoder failed\");\n \t\treturn;\n \t}\n-\n #ifdef RTE_BBDEV_OFFLOAD_COST\n-\tq_stats->offload_time += rte_rdtsc_precise() - start_time;\n+\tq_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;\n #endif\n \n \t/* Restore 3 first bytes of next CB if they were overwritten by CRC*/\n@@ -671,23 +672,21 @@ struct turbo_sw_queue {\n #ifdef RTE_BBDEV_OFFLOAD_COST\n \t\tstart_time = rte_rdtsc_precise();\n #endif\n-\n+\t\t/* Rate-Matching */\n \t\tif (bblib_rate_match_dl(&rm_req, &rm_resp) != 0) {\n \t\t\top->status |= 1 << RTE_BBDEV_DRV_ERROR;\n \t\t\trte_bbdev_log(ERR, \"Rate matching failed\");\n \t\t\treturn;\n \t\t}\n+#ifdef RTE_BBDEV_OFFLOAD_COST\n+\t\tq_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;\n+#endif\n \n \t\t/* SW fills an entire last byte even if E%8 != 0. Clear the\n \t\t * superfluous data bits for consistency with HW device.\n \t\t */\n \t\tmask_id = (e & 7) >> 1;\n \t\trm_out[out_len - 1] &= mask_out[mask_id];\n-\n-#ifdef RTE_BBDEV_OFFLOAD_COST\n-\t\tq_stats->offload_time += rte_rdtsc_precise() - start_time;\n-#endif\n-\n \t\tenc->output.length += rm_resp.OutputLen;\n \t} else {\n \t\t/* Rate matching is bypassed */\n@@ -798,7 +797,7 @@ struct turbo_sw_queue {\n {\n \tuint16_t i;\n #ifdef RTE_BBDEV_OFFLOAD_COST\n-\tqueue_stats->offload_time = 0;\n+\tqueue_stats->acc_offload_cycles = 0;\n #endif\n \n \tfor (i = 0; i < nb_ops; ++i)\n@@ -905,7 +904,8 @@ struct turbo_sw_queue {\n process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,\n \t\tuint8_t c, uint16_t k, uint16_t kw, struct rte_mbuf *m_in,\n \t\tstruct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset,\n-\t\tbool check_crc_24b, uint16_t crc24_overlap, uint16_t total_left)\n+\t\tbool check_crc_24b, uint16_t crc24_overlap, uint16_t total_left,\n+\t\tstruct rte_bbdev_stats *q_stats)\n {\n \tint ret;\n \tint32_t k_idx;\n@@ -917,6 +917,11 @@ struct turbo_sw_queue {\n \tstruct bblib_turbo_decoder_request turbo_req;\n \tstruct bblib_turbo_decoder_response turbo_resp;\n \tstruct rte_bbdev_op_turbo_dec *dec = &op->turbo_dec;\n+#ifdef RTE_BBDEV_OFFLOAD_COST\n+\tuint64_t start_time;\n+#else\n+\tRTE_SET_USED(q_stats);\n+#endif\n \n \tk_idx = compute_idx(k);\n \n@@ -942,7 +947,14 @@ struct turbo_sw_queue {\n \t\tdeint_req.pharqbuffer = q->deint_input;\n \t\tdeint_req.ncb = ncb_without_null;\n \t\tdeint_resp.pinteleavebuffer = q->deint_output;\n+\n+#ifdef RTE_BBDEV_OFFLOAD_COST\n+\t\tstart_time = rte_rdtsc_precise();\n+#endif\n \t\tbblib_deinterleave_ul(&deint_req, &deint_resp);\n+#ifdef RTE_BBDEV_OFFLOAD_COST\n+\t\tq_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;\n+#endif\n \t} else\n \t\tmove_padding_bytes(in, q->deint_output, k, ncb);\n \n@@ -961,7 +973,15 @@ struct turbo_sw_queue {\n \tadapter_req.ncb = ncb_without_null;\n \tadapter_req.pinteleavebuffer = adapter_input;\n \tadapter_resp.pharqout = q->adapter_output;\n+\n+#ifdef RTE_BBDEV_OFFLOAD_COST\n+\tstart_time = rte_rdtsc_precise();\n+#endif\n+\t/* Turbo decode adaptation */\n \tbblib_turbo_adapter_ul(&adapter_req, &adapter_resp);\n+#ifdef RTE_BBDEV_OFFLOAD_COST\n+\tq_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;\n+#endif\n \n \tout = (uint8_t *)rte_pktmbuf_append(m_out, ((k - crc24_overlap) >> 3));\n \tif (out == NULL) {\n@@ -986,12 +1006,20 @@ struct turbo_sw_queue {\n \tturbo_resp.ag_buf = q->ag;\n \tturbo_resp.cb_buf = q->code_block;\n \tturbo_resp.output = out;\n+\n+#ifdef RTE_BBDEV_OFFLOAD_COST\n+\tstart_time = rte_rdtsc_precise();\n+#endif\n+\t/* Turbo decode */\n \titer_cnt = bblib_turbo_decoder(&turbo_req, &turbo_resp);\n+#ifdef RTE_BBDEV_OFFLOAD_COST\n+\tq_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;\n+#endif\n \tdec->hard_output.length += (k >> 3);\n \n \tif (iter_cnt > 0) {\n \t\t/* Temporary solution for returned iter_count from SDK */\n-\t\titer_cnt = (iter_cnt - 1) / 2;\n+\t\titer_cnt = (iter_cnt - 1) >> 1;\n \t\tdec->iter_count = RTE_MAX(iter_cnt, dec->iter_count);\n \t} else {\n \t\top->status |= 1 << RTE_BBDEV_DATA_ERROR;\n@@ -1001,7 +1029,8 @@ struct turbo_sw_queue {\n }\n \n static inline void\n-enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op)\n+enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,\n+\t\tstruct rte_bbdev_stats *queue_stats)\n {\n \tuint8_t c, r = 0;\n \tuint16_t kw, k = 0;\n@@ -1053,7 +1082,7 @@ struct turbo_sw_queue {\n \t\tprocess_dec_cb(q, op, c, k, kw, m_in, m_out, in_offset,\n \t\t\t\tout_offset, check_bit(dec->op_flags,\n \t\t\t\tRTE_BBDEV_TURBO_CRC_TYPE_24B), crc24_overlap,\n-\t\t\t\ttotal_left);\n+\t\t\t\ttotal_left, queue_stats);\n \t\t/* To keep CRC24 attached to end of Code block, use\n \t\t * RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP flag as it\n \t\t * removed by default once verified.\n@@ -1075,12 +1104,15 @@ struct turbo_sw_queue {\n \n static inline uint16_t\n enqueue_dec_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_dec_op **ops,\n-\t\tuint16_t nb_ops)\n+\t\tuint16_t nb_ops, struct rte_bbdev_stats *queue_stats)\n {\n \tuint16_t i;\n+#ifdef RTE_BBDEV_OFFLOAD_COST\n+\tqueue_stats->acc_offload_cycles = 0;\n+#endif\n \n \tfor (i = 0; i < nb_ops; ++i)\n-\t\tenqueue_dec_one_op(q, ops[i]);\n+\t\tenqueue_dec_one_op(q, ops[i], queue_stats);\n \n \treturn rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops,\n \t\t\tNULL);\n@@ -1112,7 +1144,7 @@ struct turbo_sw_queue {\n \tstruct turbo_sw_queue *q = queue;\n \tuint16_t nb_enqueued = 0;\n \n-\tnb_enqueued = enqueue_dec_all_ops(q, ops, nb_ops);\n+\tnb_enqueued = enqueue_dec_all_ops(q, ops, nb_ops, &q_data->queue_stats);\n \n \tq_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued;\n \tq_data->queue_stats.enqueued_count += nb_enqueued;\ndiff --git a/lib/librte_bbdev/rte_bbdev.h b/lib/librte_bbdev/rte_bbdev.h\nindex 25ef409..da8cf07 100644\n--- a/lib/librte_bbdev/rte_bbdev.h\n+++ b/lib/librte_bbdev/rte_bbdev.h\n@@ -239,8 +239,13 @@ struct rte_bbdev_stats {\n \tuint64_t enqueue_err_count;\n \t/** Total error count on operations dequeued */\n \tuint64_t dequeue_err_count;\n-\t/** Offload time */\n-\tuint64_t offload_time;\n+\t/** CPU cycles consumed by the (HW/SW) accelerator device to offload\n+\t *  the enqueue request to its internal queues.\n+\t *  - For a HW device this is the cycles consumed in MMIO write\n+\t *  - For a SW (vdev) device, this is the processing time of the\n+\t *     bbdev operation\n+\t */\n+\tuint64_t acc_offload_cycles;\n };\n \n /**\n",
    "prefixes": [
        "v2",
        "1/4"
    ]
}