get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/117074/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 117074,
    "url": "https://patches.dpdk.org/api/patches/117074/?format=api",
    "web_url": "https://patches.dpdk.org/project/dpdk/patch/20220928191223.1630200-1-abdullah.sevincer@intel.com/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20220928191223.1630200-1-abdullah.sevincer@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20220928191223.1630200-1-abdullah.sevincer@intel.com",
    "date": "2022-09-28T19:11:46",
    "name": "[v5,1/3] event/dlb2: add producer port probing optimization",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "380083ac60969c67b72e400e62091c03a67ee36a",
    "submitter": {
        "id": 2843,
        "url": "https://patches.dpdk.org/api/people/2843/?format=api",
        "name": "Sevincer, Abdullah",
        "email": "abdullah.sevincer@intel.com"
    },
    "delegate": null,
    "mbox": "https://patches.dpdk.org/project/dpdk/patch/20220928191223.1630200-1-abdullah.sevincer@intel.com/mbox/",
    "series": [
        {
            "id": 24887,
            "url": "https://patches.dpdk.org/api/series/24887/?format=api",
            "web_url": "https://patches.dpdk.org/project/dpdk/list/?series=24887",
            "date": "2022-09-28T19:11:46",
            "name": "[v5,1/3] event/dlb2: add producer port probing optimization",
            "version": 5,
            "mbox": "https://patches.dpdk.org/series/24887/mbox/"
        }
    ],
    "comments": "https://patches.dpdk.org/api/patches/117074/comments/",
    "check": "success",
    "checks": "https://patches.dpdk.org/api/patches/117074/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id F1870A00C4;\n\tWed, 28 Sep 2022 21:12:40 +0200 (CEST)",
            "from [217.70.189.124] (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id CD72A4113C;\n\tWed, 28 Sep 2022 21:12:40 +0200 (CEST)",
            "from mga14.intel.com (mga14.intel.com [192.55.52.115])\n by mails.dpdk.org (Postfix) with ESMTP id 89EA3410FA\n for <dev@dpdk.org>; Wed, 28 Sep 2022 21:12:38 +0200 (CEST)",
            "from orsmga002.jf.intel.com ([10.7.209.21])\n by fmsmga103.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;\n 28 Sep 2022 12:12:37 -0700",
            "from txanpdk02.an.intel.com ([10.123.117.76])\n by orsmga002.jf.intel.com with ESMTP; 28 Sep 2022 12:12:36 -0700"
        ],
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/simple;\n d=intel.com; i=@intel.com; q=dns/txt; s=Intel;\n t=1664392358; x=1695928358;\n h=from:to:cc:subject:date:message-id:in-reply-to:\n references:mime-version:content-transfer-encoding;\n bh=2M034ZcG2f+EiuDB01ffDbl+c7C9QylrccyVZEMF5pg=;\n b=MB38ru/FPc7gfaRD8UmdtUIGDXO98J2xc2MgxYU3T0lEGKXEbaCo+NOv\n Kng5IqJV3cLn/5Y+OSfxucIR11Gbv7nUSL/ufbBipmlIbGeM4eZGAyNhL\n mkbI1ljbjKl9hGNo0f2gXqYBmBjSoYIvcE7LUDr3x7RitRhm+Lf9Y4tKu\n brJW+pQsBLU97wZABNthub+WFd6ybG5XsMx2B9YmXPE/vSY0TSSQva8JQ\n Ll/TJs3j9KfDWwgDnat9YLUsUHUrZgFEkRmfuUUFdBZFk4SBYuajATg0u\n i+Keldr4Q8l7wmGGyHbl57Y0V3Xy5mAIrnXxXdXCbNvz4nGEszKB4Mmab w==;",
        "X-IronPort-AV": [
            "E=McAfee;i=\"6500,9779,10484\"; a=\"301667867\"",
            "E=Sophos;i=\"5.93,352,1654585200\"; d=\"scan'208\";a=\"301667867\"",
            "E=McAfee;i=\"6500,9779,10484\"; a=\"622040138\"",
            "E=Sophos;i=\"5.93,352,1654585200\"; d=\"scan'208\";a=\"622040138\""
        ],
        "X-ExtLoop1": "1",
        "From": "Abdullah Sevincer <abdullah.sevincer@intel.com>",
        "To": "dev@dpdk.org",
        "Cc": "jerinj@marvell.com,\n\tAbdullah Sevincer <abdullah.sevincer@intel.com>",
        "Subject": "[PATCH v5 1/3] event/dlb2: add producer port probing optimization",
        "Date": "Wed, 28 Sep 2022 14:11:46 -0500",
        "Message-Id": "<20220928191223.1630200-1-abdullah.sevincer@intel.com>",
        "X-Mailer": "git-send-email 2.25.1",
        "In-Reply-To": "<20220927014204.1401746-1-abdullah.sevincer@intel.com>",
        "References": "<20220820005957.2986689-2-timothy.mcdaniel@intel.com>\n <20220927014204.1401746-1-abdullah.sevincer@intel.com>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org"
    },
    "content": "For best performance, applications running on certain cores should use\nthe DLB device locally available on the same tile along with other\nresources. To allocate optimal resources, probing is done for each\nproducer port (PP) for a given CPU and the best performing ports are\nallocated to producers. The cpu used for probing is either the first\ncore of producer coremask (if present) or the second core of EAL\ncoremask. This will be extended later to probe for all CPUs in the\nproducer coremask or EAL coremask.\n\nProducer coremask can be passed along with the BDF of the DLB devices.\n\"-a xx:y.z,producer_coremask=<core_mask>\"\n\nApplications also need to pass RTE_EVENT_PORT_CFG_HINT_PRODUCER during\nrte_event_port_setup() for producer ports for optimal port allocation.\n\nFor optimal load balancing ports that map to one or more QIDs in common\nshould not be in numerical sequence. The port->QID mapping is application\ndependent, but the driver interleaves port IDs as much as possible to\nreduce the likelihood of sequential ports mapping to the same QID(s).\n\nHence, DLB uses an initial allocation of Port IDs to maximize the\naverage distance between an ID and its immediate neighbors. Using\nthe initialport allocation option can be passed through devarg\n\"default_port_allocation=y(or Y)\".\n\nWhen events are dropped by workers or consumers that use LDB ports,\ncompletions are sent which are just ENQs and may impact the latency.\nTo address this,  probing is done for LDB ports as well. Probing is\ndone on ports per 'cos'. When default cos is used, ports will be\nallocated from best ports from the best 'cos', else from best ports of\nthe specific cos.\n\nSigned-off-by: Abdullah Sevincer <abdullah.sevincer@intel.com>\n---\n doc/guides/eventdevs/dlb2.rst              |  36 +++\n drivers/event/dlb2/dlb2.c                  |  72 +++++-\n drivers/event/dlb2/dlb2_priv.h             |   7 +\n drivers/event/dlb2/dlb2_user.h             |   1 +\n drivers/event/dlb2/pf/base/dlb2_hw_types.h |   5 +\n drivers/event/dlb2/pf/base/dlb2_resource.c | 250 ++++++++++++++++++++-\n drivers/event/dlb2/pf/base/dlb2_resource.h |  15 +-\n drivers/event/dlb2/pf/dlb2_main.c          |   9 +-\n drivers/event/dlb2/pf/dlb2_main.h          |  23 +-\n drivers/event/dlb2/pf/dlb2_pf.c            |  23 +-\n 10 files changed, 413 insertions(+), 28 deletions(-)",
    "diff": "diff --git a/doc/guides/eventdevs/dlb2.rst b/doc/guides/eventdevs/dlb2.rst\nindex 5b21f13b68..f5bf5757c6 100644\n--- a/doc/guides/eventdevs/dlb2.rst\n+++ b/doc/guides/eventdevs/dlb2.rst\n@@ -414,3 +414,39 @@ Note that the weight may not exceed the maximum CQ depth.\n        --allow ea:00.0,cq_weight=all:<weight>\n        --allow ea:00.0,cq_weight=qidA-qidB:<weight>\n        --allow ea:00.0,cq_weight=qid:<weight>\n+\n+Producer Coremask\n+~~~~~~~~~~~~~~~~~\n+\n+For best performance, applications running on certain cores should use\n+the DLB device locally available on the same tile along with other\n+resources. To allocate optimal resources, probing is done for each\n+producer port (PP) for a given CPU and the best performing ports are\n+allocated to producers. The cpu used for probing is either the first\n+core of producer coremask (if present) or the second core of EAL\n+coremask. This will be extended later to probe for all CPUs in the\n+producer coremask or EAL coremask. Producer coremask can be passed\n+along with the BDF of the DLB devices.\n+\n+    .. code-block:: console\n+\n+       -a xx:y.z,producer_coremask=<core_mask>\n+\n+Default LDB Port Allocation\n+~~~~~~~~~~~~~~~~~~~~~~~~~~~\n+\n+For optimal load balancing ports that map to one or more QIDs in common\n+should not be in numerical sequence. The port->QID mapping is application\n+dependent, but the driver interleaves port IDs as much as possible to\n+reduce the likelihood of sequential ports mapping to the same QID(s).\n+\n+Hence, DLB uses an initial allocation of Port IDs to maximize the\n+average distance between an ID and its immediate neighbors. (i.e.the\n+distance from 1 to 0 and to 2, the distance from 2 to 1 and to 3, etc.).\n+Initial port allocation option can be passed through devarg. If y (or Y)\n+inial port allocation will be used, otherwise initial port allocation\n+won't be used.\n+\n+    .. code-block:: console\n+\n+       --allow ea:00.0,default_port_allocation=<y/Y>\ndiff --git a/drivers/event/dlb2/dlb2.c b/drivers/event/dlb2/dlb2.c\nindex 759578378f..6a9db4b642 100644\n--- a/drivers/event/dlb2/dlb2.c\n+++ b/drivers/event/dlb2/dlb2.c\n@@ -293,6 +293,23 @@ dlb2_string_to_int(int *result, const char *str)\n \treturn 0;\n }\n \n+static int\n+set_producer_coremask(const char *key __rte_unused,\n+\t\t      const char *value,\n+\t\t      void *opaque)\n+{\n+\tconst char **mask_str = opaque;\n+\n+\tif (value == NULL || opaque == NULL) {\n+\t\tDLB2_LOG_ERR(\"NULL pointer\\n\");\n+\t\treturn -EINVAL;\n+\t}\n+\n+\t*mask_str = value;\n+\n+\treturn 0;\n+}\n+\n static int\n set_numa_node(const char *key __rte_unused, const char *value, void *opaque)\n {\n@@ -617,6 +634,26 @@ set_vector_opts_enab(const char *key __rte_unused,\n \treturn 0;\n }\n \n+static int\n+set_default_ldb_port_allocation(const char *key __rte_unused,\n+\t\t      const char *value,\n+\t\t      void *opaque)\n+{\n+\tbool *default_ldb_port_allocation = opaque;\n+\n+\tif (value == NULL || opaque == NULL) {\n+\t\tDLB2_LOG_ERR(\"NULL pointer\\n\");\n+\t\treturn -EINVAL;\n+\t}\n+\n+\tif ((*value == 'y') || (*value == 'Y'))\n+\t\t*default_ldb_port_allocation = true;\n+\telse\n+\t\t*default_ldb_port_allocation = false;\n+\n+\treturn 0;\n+}\n+\n static int\n set_qid_depth_thresh(const char *key __rte_unused,\n \t\t     const char *value,\n@@ -1785,6 +1822,9 @@ dlb2_hw_create_dir_port(struct dlb2_eventdev *dlb2,\n \t} else\n \t\tcredit_high_watermark = enqueue_depth;\n \n+\tif (ev_port->conf.event_port_cfg & RTE_EVENT_PORT_CFG_HINT_PRODUCER)\n+\t\tcfg.is_producer = 1;\n+\n \t/* Per QM values */\n \n \tret = dlb2_iface_dir_port_create(handle, &cfg,  dlb2->poll_mode);\n@@ -1979,6 +2019,10 @@ dlb2_eventdev_port_setup(struct rte_eventdev *dev,\n \t}\n \tev_port->enq_retries = port_conf->enqueue_depth / sw_credit_quanta;\n \n+\t/* Save off port config for reconfig */\n+\tev_port->conf = *port_conf;\n+\n+\n \t/*\n \t * Create port\n \t */\n@@ -2005,9 +2049,6 @@ dlb2_eventdev_port_setup(struct rte_eventdev *dev,\n \t\t}\n \t}\n \n-\t/* Save off port config for reconfig */\n-\tev_port->conf = *port_conf;\n-\n \tev_port->id = ev_port_id;\n \tev_port->enq_configured = true;\n \tev_port->setup_done = true;\n@@ -4700,6 +4741,8 @@ dlb2_parse_params(const char *params,\n \t\t\t\t\t     DLB2_CQ_WEIGHT,\n \t\t\t\t\t     DLB2_PORT_COS,\n \t\t\t\t\t     DLB2_COS_BW,\n+\t\t\t\t\t     DLB2_PRODUCER_COREMASK,\n+\t\t\t\t\t     DLB2_DEFAULT_LDB_PORT_ALLOCATION_ARG,\n \t\t\t\t\t     NULL };\n \n \tif (params != NULL && params[0] != '\\0') {\n@@ -4881,6 +4924,29 @@ dlb2_parse_params(const char *params,\n \t\t\t}\n \n \n+\t\t\tret = rte_kvargs_process(kvlist,\n+\t\t\t\t\t\t DLB2_PRODUCER_COREMASK,\n+\t\t\t\t\t\t set_producer_coremask,\n+\t\t\t\t\t\t &dlb2_args->producer_coremask);\n+\t\t\tif (ret != 0) {\n+\t\t\t\tDLB2_LOG_ERR(\n+\t\t\t\t\t\"%s: Error parsing producer coremask\",\n+\t\t\t\t\tname);\n+\t\t\t\trte_kvargs_free(kvlist);\n+\t\t\t\treturn ret;\n+\t\t\t}\n+\n+\t\t\tret = rte_kvargs_process(kvlist,\n+\t\t\t\t\t\t DLB2_DEFAULT_LDB_PORT_ALLOCATION_ARG,\n+\t\t\t\t\t\t set_default_ldb_port_allocation,\n+\t\t\t\t\t\t &dlb2_args->default_ldb_port_allocation);\n+\t\t\tif (ret != 0) {\n+\t\t\t\tDLB2_LOG_ERR(\"%s: Error parsing ldb default port allocation arg\",\n+\t\t\t\t\t     name);\n+\t\t\t\trte_kvargs_free(kvlist);\n+\t\t\t\treturn ret;\n+\t\t\t}\n+\n \t\t\trte_kvargs_free(kvlist);\n \t\t}\n \t}\ndiff --git a/drivers/event/dlb2/dlb2_priv.h b/drivers/event/dlb2/dlb2_priv.h\nindex db431f7d8b..9ef5bcb901 100644\n--- a/drivers/event/dlb2/dlb2_priv.h\n+++ b/drivers/event/dlb2/dlb2_priv.h\n@@ -51,6 +51,8 @@\n #define DLB2_CQ_WEIGHT \"cq_weight\"\n #define DLB2_PORT_COS \"port_cos\"\n #define DLB2_COS_BW \"cos_bw\"\n+#define DLB2_PRODUCER_COREMASK \"producer_coremask\"\n+#define DLB2_DEFAULT_LDB_PORT_ALLOCATION_ARG \"default_port_allocation\"\n \n /* Begin HW related defines and structs */\n \n@@ -386,6 +388,7 @@ struct dlb2_port {\n \tuint16_t hw_credit_quanta;\n \tbool use_avx512;\n \tuint32_t cq_weight;\n+\tbool is_producer; /* True if port is of type producer */\n };\n \n /* Per-process per-port mmio and memory pointers */\n@@ -669,6 +672,8 @@ struct dlb2_devargs {\n \tstruct dlb2_cq_weight cq_weight;\n \tstruct dlb2_port_cos port_cos;\n \tstruct dlb2_cos_bw cos_bw;\n+\tconst char *producer_coremask;\n+\tbool default_ldb_port_allocation;\n };\n \n /* End Eventdev related defines and structs */\n@@ -722,6 +727,8 @@ void dlb2_event_build_hcws(struct dlb2_port *qm_port,\n \t\t\t   uint8_t *sched_type,\n \t\t\t   uint8_t *queue_id);\n \n+/* Extern functions */\n+extern int rte_eal_parse_coremask(const char *coremask, int *cores);\n \n /* Extern globals */\n extern struct process_local_port_data dlb2_port[][DLB2_NUM_PORT_TYPES];\ndiff --git a/drivers/event/dlb2/dlb2_user.h b/drivers/event/dlb2/dlb2_user.h\nindex 901e2e0c66..28c6aaaf43 100644\n--- a/drivers/event/dlb2/dlb2_user.h\n+++ b/drivers/event/dlb2/dlb2_user.h\n@@ -498,6 +498,7 @@ struct dlb2_create_dir_port_args {\n \t__u16 cq_depth;\n \t__u16 cq_depth_threshold;\n \t__s32 queue_id;\n+\t__u8 is_producer;\n };\n \n /*\ndiff --git a/drivers/event/dlb2/pf/base/dlb2_hw_types.h b/drivers/event/dlb2/pf/base/dlb2_hw_types.h\nindex 9511521e67..87996ef621 100644\n--- a/drivers/event/dlb2/pf/base/dlb2_hw_types.h\n+++ b/drivers/event/dlb2/pf/base/dlb2_hw_types.h\n@@ -249,6 +249,7 @@ struct dlb2_hw_domain {\n \tstruct dlb2_list_head avail_ldb_queues;\n \tstruct dlb2_list_head avail_ldb_ports[DLB2_NUM_COS_DOMAINS];\n \tstruct dlb2_list_head avail_dir_pq_pairs;\n+\tstruct dlb2_list_head rsvd_dir_pq_pairs;\n \tu32 total_hist_list_entries;\n \tu32 avail_hist_list_entries;\n \tu32 hist_list_entry_base;\n@@ -347,6 +348,10 @@ struct dlb2_hw {\n \tstruct dlb2_function_resources vdev[DLB2_MAX_NUM_VDEVS];\n \tstruct dlb2_hw_domain domains[DLB2_MAX_NUM_DOMAINS];\n \tu8 cos_reservation[DLB2_NUM_COS_DOMAINS];\n+\tint prod_core_list[RTE_MAX_LCORE];\n+\tu8 num_prod_cores;\n+\tint dir_pp_allocations[DLB2_MAX_NUM_DIR_PORTS_V2_5];\n+\tint ldb_pp_allocations[DLB2_MAX_NUM_LDB_PORTS];\n \n \t/* Virtualization */\n \tint virt_mode;\ndiff --git a/drivers/event/dlb2/pf/base/dlb2_resource.c b/drivers/event/dlb2/pf/base/dlb2_resource.c\nindex 0731416a43..280a8e51b1 100644\n--- a/drivers/event/dlb2/pf/base/dlb2_resource.c\n+++ b/drivers/event/dlb2/pf/base/dlb2_resource.c\n@@ -51,6 +51,7 @@ static void dlb2_init_domain_rsrc_lists(struct dlb2_hw_domain *domain)\n \tdlb2_list_init_head(&domain->used_dir_pq_pairs);\n \tdlb2_list_init_head(&domain->avail_ldb_queues);\n \tdlb2_list_init_head(&domain->avail_dir_pq_pairs);\n+\tdlb2_list_init_head(&domain->rsvd_dir_pq_pairs);\n \n \tfor (i = 0; i < DLB2_NUM_COS_DOMAINS; i++)\n \t\tdlb2_list_init_head(&domain->used_ldb_ports[i]);\n@@ -106,8 +107,10 @@ void dlb2_resource_free(struct dlb2_hw *hw)\n  * Return:\n  * Returns 0 upon success, <0 otherwise.\n  */\n-int dlb2_resource_init(struct dlb2_hw *hw, enum dlb2_hw_ver ver)\n+int dlb2_resource_init(struct dlb2_hw *hw, enum dlb2_hw_ver ver, const void *probe_args)\n {\n+\tconst struct dlb2_devargs *args = (const struct dlb2_devargs *)probe_args;\n+\tbool ldb_port_default = args ? args->default_ldb_port_allocation : false;\n \tstruct dlb2_list_entry *list;\n \tunsigned int i;\n \tint ret;\n@@ -122,6 +125,7 @@ int dlb2_resource_init(struct dlb2_hw *hw, enum dlb2_hw_ver ver)\n \t * the distance from 1 to 0 and to 2, the distance from 2 to 1 and to\n \t * 3, etc.).\n \t */\n+\n \tconst u8 init_ldb_port_allocation[DLB2_MAX_NUM_LDB_PORTS] = {\n \t\t0,  7,  14,  5, 12,  3, 10,  1,  8, 15,  6, 13,  4, 11,  2,  9,\n \t\t16, 23, 30, 21, 28, 19, 26, 17, 24, 31, 22, 29, 20, 27, 18, 25,\n@@ -164,7 +168,10 @@ int dlb2_resource_init(struct dlb2_hw *hw, enum dlb2_hw_ver ver)\n \t\tint cos_id = i >> DLB2_NUM_COS_DOMAINS;\n \t\tstruct dlb2_ldb_port *port;\n \n-\t\tport = &hw->rsrcs.ldb_ports[init_ldb_port_allocation[i]];\n+\t\tif (ldb_port_default == true)\n+\t\t\tport = &hw->rsrcs.ldb_ports[init_ldb_port_allocation[i]];\n+\t\telse\n+\t\t\tport = &hw->rsrcs.ldb_ports[hw->ldb_pp_allocations[i]];\n \n \t\tdlb2_list_add(&hw->pf.avail_ldb_ports[cos_id],\n \t\t\t      &port->func_list);\n@@ -172,7 +179,8 @@ int dlb2_resource_init(struct dlb2_hw *hw, enum dlb2_hw_ver ver)\n \n \thw->pf.num_avail_dir_pq_pairs = DLB2_MAX_NUM_DIR_PORTS(hw->ver);\n \tfor (i = 0; i < hw->pf.num_avail_dir_pq_pairs; i++) {\n-\t\tlist = &hw->rsrcs.dir_pq_pairs[i].func_list;\n+\t\tint index = hw->dir_pp_allocations[i];\n+\t\tlist = &hw->rsrcs.dir_pq_pairs[index].func_list;\n \n \t\tdlb2_list_add(&hw->pf.avail_dir_pq_pairs, list);\n \t}\n@@ -592,6 +600,7 @@ static int dlb2_attach_dir_ports(struct dlb2_hw *hw,\n \t\t\t\t u32 num_ports,\n \t\t\t\t struct dlb2_cmd_response *resp)\n {\n+\tint num_res = hw->num_prod_cores;\n \tunsigned int i;\n \n \tif (rsrcs->num_avail_dir_pq_pairs < num_ports) {\n@@ -611,12 +620,19 @@ static int dlb2_attach_dir_ports(struct dlb2_hw *hw,\n \t\t\treturn -EFAULT;\n \t\t}\n \n+\t\tif (num_res) {\n+\t\t\tdlb2_list_add(&domain->rsvd_dir_pq_pairs,\n+\t\t\t\t      &port->domain_list);\n+\t\t\tnum_res--;\n+\t\t} else {\n+\t\t\tdlb2_list_add(&domain->avail_dir_pq_pairs,\n+\t\t\t&port->domain_list);\n+\t\t}\n+\n \t\tdlb2_list_del(&rsrcs->avail_dir_pq_pairs, &port->func_list);\n \n \t\tport->domain_id = domain->id;\n \t\tport->owned = true;\n-\n-\t\tdlb2_list_add(&domain->avail_dir_pq_pairs, &port->domain_list);\n \t}\n \n \trsrcs->num_avail_dir_pq_pairs -= num_ports;\n@@ -739,6 +755,199 @@ static int dlb2_attach_ldb_queues(struct dlb2_hw *hw,\n \treturn 0;\n }\n \n+static int\n+dlb2_pp_profile(struct dlb2_hw *hw, int port, int cpu, bool is_ldb)\n+{\n+\tu64 cycle_start = 0ULL, cycle_end = 0ULL;\n+\tstruct dlb2_hcw hcw_mem[DLB2_HCW_MEM_SIZE], *hcw;\n+\tvoid __iomem *pp_addr;\n+\tcpu_set_t cpuset;\n+\tint i;\n+\n+\tCPU_ZERO(&cpuset);\n+\tCPU_SET(cpu, &cpuset);\n+\tsched_setaffinity(0, sizeof(cpuset), &cpuset);\n+\n+\tpp_addr = os_map_producer_port(hw, port, is_ldb);\n+\n+\t/* Point hcw to a 64B-aligned location */\n+\thcw = (struct dlb2_hcw *)((uintptr_t)&hcw_mem[DLB2_HCW_64B_OFF] &\n+\t      ~DLB2_HCW_ALIGN_MASK);\n+\n+\t/*\n+\t * Program the first HCW for a completion and token return and\n+\t * the other HCWs as NOOPS\n+\t */\n+\n+\tmemset(hcw, 0, (DLB2_HCW_MEM_SIZE - DLB2_HCW_64B_OFF) * sizeof(*hcw));\n+\thcw->qe_comp = 1;\n+\thcw->cq_token = 1;\n+\thcw->lock_id = 1;\n+\n+\tcycle_start = rte_get_tsc_cycles();\n+\tfor (i = 0; i < DLB2_NUM_PROBE_ENQS; i++)\n+\t\tdlb2_movdir64b(pp_addr, hcw);\n+\n+\tcycle_end = rte_get_tsc_cycles();\n+\n+\tos_unmap_producer_port(hw, pp_addr);\n+\treturn (int)(cycle_end - cycle_start);\n+}\n+\n+static void *\n+dlb2_pp_profile_func(void *data)\n+{\n+\tstruct dlb2_pp_thread_data *thread_data = data;\n+\tint cycles;\n+\n+\tcycles = dlb2_pp_profile(thread_data->hw, thread_data->pp,\n+\tthread_data->cpu, thread_data->is_ldb);\n+\n+\tthread_data->cycles = cycles;\n+\n+\treturn NULL;\n+}\n+\n+static int dlb2_pp_cycle_comp(const void *a, const void *b)\n+{\n+\tconst struct dlb2_pp_thread_data *x = a;\n+\tconst struct dlb2_pp_thread_data *y = b;\n+\n+\treturn x->cycles - y->cycles;\n+}\n+\n+\n+/* Probe producer ports from different CPU cores */\n+static void\n+dlb2_get_pp_allocation(struct dlb2_hw *hw, int cpu, int port_type, int cos_id)\n+{\n+\tstruct dlb2_dev *dlb2_dev = container_of(hw, struct dlb2_dev, hw);\n+\tint i, err, ver = DLB2_HW_DEVICE_FROM_PCI_ID(dlb2_dev->pdev);\n+\tbool is_ldb = (port_type == DLB2_LDB_PORT);\n+\tint num_ports = is_ldb ? DLB2_MAX_NUM_LDB_PORTS :\n+\tDLB2_MAX_NUM_DIR_PORTS(ver);\n+\tstruct dlb2_pp_thread_data dlb2_thread_data[num_ports];\n+\tint *port_allocations = is_ldb ? hw->ldb_pp_allocations :\n+\t\t\t\t\t hw->dir_pp_allocations;\n+\tint num_sort = is_ldb ? DLB2_NUM_COS_DOMAINS : 1;\n+\tstruct dlb2_pp_thread_data cos_cycles[num_sort];\n+\tint num_ports_per_sort = num_ports / num_sort;\n+\tpthread_t pthread;\n+\n+\tdlb2_dev->enqueue_four = dlb2_movdir64b;\n+\n+\tDLB2_LOG_INFO(\" for %s: cpu core used in pp profiling: %d\\n\",\n+\t\t      is_ldb ? \"LDB\" : \"DIR\", cpu);\n+\n+\tmemset(cos_cycles, 0, num_sort * sizeof(struct dlb2_pp_thread_data));\n+\tfor (i = 0; i < num_ports; i++) {\n+\t\tint cos = is_ldb ? (i >> DLB2_NUM_COS_DOMAINS) : 0;\n+\n+\t\tdlb2_thread_data[i].is_ldb = is_ldb;\n+\t\tdlb2_thread_data[i].pp = i;\n+\t\tdlb2_thread_data[i].cycles = 0;\n+\t\tdlb2_thread_data[i].hw = hw;\n+\t\tdlb2_thread_data[i].cpu = cpu;\n+\n+\t\terr = pthread_create(&pthread, NULL, &dlb2_pp_profile_func,\n+\t\t\t\t     &dlb2_thread_data[i]);\n+\t\tif (err) {\n+\t\t\tDLB2_LOG_ERR(\": thread creation failed! err=%d\", err);\n+\t\t\treturn;\n+\t\t}\n+\n+\t\terr = pthread_join(pthread, NULL);\n+\t\tif (err) {\n+\t\t\tDLB2_LOG_ERR(\": thread join failed! err=%d\", err);\n+\t\t\treturn;\n+\t\t}\n+\t\tcos_cycles[cos].cycles += dlb2_thread_data[i].cycles;\n+\n+\t\tif ((i + 1) % num_ports_per_sort == 0) {\n+\t\t\tint index = cos * num_ports_per_sort;\n+\n+\t\t\tcos_cycles[cos].pp = index;\n+\t\t\t/*\n+\t\t\t * For LDB ports first sort with in a cos. Later sort\n+\t\t\t * the best cos based on total cycles for the cos.\n+\t\t\t * For DIR ports, there is a single sort across all\n+\t\t\t * ports.\n+\t\t\t */\n+\t\t\tqsort(&dlb2_thread_data[index], num_ports_per_sort,\n+\t\t\t      sizeof(struct dlb2_pp_thread_data),\n+\t\t\t      dlb2_pp_cycle_comp);\n+\t\t}\n+\t}\n+\n+\t/*\n+\t * Re-arrange best ports by cos if default cos is used.\n+\t */\n+\tif (is_ldb && cos_id == DLB2_COS_DEFAULT)\n+\t\tqsort(cos_cycles, num_sort,\n+\t\t      sizeof(struct dlb2_pp_thread_data),\n+\t\t      dlb2_pp_cycle_comp);\n+\n+\tfor (i = 0; i < num_ports; i++) {\n+\t\tint start = is_ldb ? cos_cycles[i / num_ports_per_sort].pp : 0;\n+\t\tint index = i % num_ports_per_sort;\n+\n+\t\tport_allocations[i] = dlb2_thread_data[start + index].pp;\n+\t\tDLB2_LOG_INFO(\": pp %d cycles %d\", port_allocations[i],\n+\t\t\t     dlb2_thread_data[start + index].cycles);\n+\t}\n+}\n+\n+int\n+dlb2_resource_probe(struct dlb2_hw *hw, const void *probe_args)\n+{\n+\tconst struct dlb2_devargs *args = (const struct dlb2_devargs *)probe_args;\n+\tconst char *mask = NULL;\n+\tint cpu = 0, cnt = 0, cores[RTE_MAX_LCORE];\n+\tint i, cos_id = DLB2_COS_DEFAULT;\n+\n+\tif (args) {\n+\t\tmask = (const char *)args->producer_coremask;\n+\t\tcos_id = args->cos_id;\n+\t}\n+\n+\tif (mask && rte_eal_parse_coremask(mask, cores)) {\n+\t\tDLB2_LOG_ERR(\": Invalid producer coremask=%s\", mask);\n+\t\treturn -1;\n+\t}\n+\n+\thw->num_prod_cores = 0;\n+\tfor (i = 0; i < RTE_MAX_LCORE; i++) {\n+\t\tif (rte_lcore_is_enabled(i)) {\n+\t\t\tif (mask) {\n+\t\t\t\t/*\n+\t\t\t\t * Populate the producer cores from parsed\n+\t\t\t\t * coremask\n+\t\t\t\t */\n+\t\t\t\tif (cores[i] != -1) {\n+\t\t\t\t\thw->prod_core_list[cores[i]] = i;\n+\t\t\t\t\thw->num_prod_cores++;\n+\t\t\t\t}\n+\t\t\t} else if ((++cnt == DLB2_EAL_PROBE_CORE ||\n+\t\t\t   rte_lcore_count() < DLB2_EAL_PROBE_CORE)) {\n+\t\t\t\t/*\n+\t\t\t\t * If no producer coremask is provided, use the\n+\t\t\t\t * second EAL core to probe\n+\t\t\t\t */\n+\t\t\t\tcpu = i;\n+\t\t\t\tbreak;\n+\t\t\t}\n+\t\t}\n+\t}\n+\t/* Use the first core in producer coremask to probe */\n+\tif (hw->num_prod_cores)\n+\t\tcpu = hw->prod_core_list[0];\n+\n+\tdlb2_get_pp_allocation(hw, cpu, DLB2_LDB_PORT, cos_id);\n+\tdlb2_get_pp_allocation(hw, cpu, DLB2_DIR_PORT, DLB2_COS_DEFAULT);\n+\n+\treturn 0;\n+}\n+\n static int\n dlb2_domain_attach_resources(struct dlb2_hw *hw,\n \t\t\t     struct dlb2_function_resources *rsrcs,\n@@ -4359,6 +4568,8 @@ dlb2_verify_create_ldb_port_args(struct dlb2_hw *hw,\n \t\treturn -EINVAL;\n \t}\n \n+\tDLB2_LOG_INFO(\": LDB: cos=%d port:%d\\n\", id, port->id.phys_id);\n+\n \t/* Check cache-line alignment */\n \tif ((cq_dma_base & 0x3F) != 0) {\n \t\tresp->status = DLB2_ST_INVALID_CQ_VIRT_ADDR;\n@@ -4568,13 +4779,25 @@ dlb2_verify_create_dir_port_args(struct dlb2_hw *hw,\n \t\t/*\n \t\t * If the port's queue is not configured, validate that a free\n \t\t * port-queue pair is available.\n+\t\t * First try the 'res' list if the port is producer OR if\n+\t\t * 'avail' list is empty else fall back to 'avail' list\n \t\t */\n-\t\tpq = DLB2_DOM_LIST_HEAD(domain->avail_dir_pq_pairs,\n-\t\t\t\t\ttypeof(*pq));\n+\t\tif (!dlb2_list_empty(&domain->rsvd_dir_pq_pairs) &&\n+\t\t    (args->is_producer ||\n+\t\t     dlb2_list_empty(&domain->avail_dir_pq_pairs)))\n+\t\t\tpq = DLB2_DOM_LIST_HEAD(domain->rsvd_dir_pq_pairs,\n+\t\t\t\t\t\ttypeof(*pq));\n+\t\telse\n+\t\t\tpq = DLB2_DOM_LIST_HEAD(domain->avail_dir_pq_pairs,\n+\t\t\t\t\t\ttypeof(*pq));\n+\n \t\tif (!pq) {\n \t\t\tresp->status = DLB2_ST_DIR_PORTS_UNAVAILABLE;\n \t\t\treturn -EINVAL;\n \t\t}\n+\t\tDLB2_LOG_INFO(\": DIR: port:%d is_producer=%d\\n\",\n+\t\t\t      pq->id.phys_id, args->is_producer);\n+\n \t}\n \n \t/* Check cache-line alignment */\n@@ -4875,11 +5098,18 @@ int dlb2_hw_create_dir_port(struct dlb2_hw *hw,\n \t\treturn ret;\n \n \t/*\n-\t * Configuration succeeded, so move the resource from the 'avail' to\n-\t * the 'used' list (if it's not already there).\n+\t * Configuration succeeded, so move the resource from the 'avail' or\n+\t * 'res' to the 'used' list (if it's not already there).\n \t */\n \tif (args->queue_id == -1) {\n-\t\tdlb2_list_del(&domain->avail_dir_pq_pairs, &port->domain_list);\n+\t\tstruct dlb2_list_head *res = &domain->rsvd_dir_pq_pairs;\n+\t\tstruct dlb2_list_head *avail = &domain->avail_dir_pq_pairs;\n+\n+\t\tif ((args->is_producer && !dlb2_list_empty(res)) ||\n+\t\t     dlb2_list_empty(avail))\n+\t\t\tdlb2_list_del(res, &port->domain_list);\n+\t\telse\n+\t\t\tdlb2_list_del(avail, &port->domain_list);\n \n \t\tdlb2_list_add(&domain->used_dir_pq_pairs, &port->domain_list);\n \t}\ndiff --git a/drivers/event/dlb2/pf/base/dlb2_resource.h b/drivers/event/dlb2/pf/base/dlb2_resource.h\nindex a7e6c90888..71bd6148f1 100644\n--- a/drivers/event/dlb2/pf/base/dlb2_resource.h\n+++ b/drivers/event/dlb2/pf/base/dlb2_resource.h\n@@ -23,7 +23,20 @@\n  * Return:\n  * Returns 0 upon success, <0 otherwise.\n  */\n-int dlb2_resource_init(struct dlb2_hw *hw, enum dlb2_hw_ver ver);\n+int dlb2_resource_init(struct dlb2_hw *hw, enum dlb2_hw_ver ver, const void *probe_args);\n+\n+/**\n+ * dlb2_resource_probe() - probe hw resources\n+ * @hw: pointer to struct dlb2_hw.\n+ *\n+ * This function probes hw resources for best port allocation to producer\n+ * cores.\n+ *\n+ * Return:\n+ * Returns 0 upon success, <0 otherwise.\n+ */\n+int dlb2_resource_probe(struct dlb2_hw *hw, const void *probe_args);\n+\n \n /**\n  * dlb2_clr_pmcsr_disable() - power on bulk of DLB 2.0 logic\ndiff --git a/drivers/event/dlb2/pf/dlb2_main.c b/drivers/event/dlb2/pf/dlb2_main.c\nindex b6ec85b479..717aa4fc08 100644\n--- a/drivers/event/dlb2/pf/dlb2_main.c\n+++ b/drivers/event/dlb2/pf/dlb2_main.c\n@@ -147,7 +147,7 @@ static int dlb2_pf_wait_for_device_ready(struct dlb2_dev *dlb2_dev,\n }\n \n struct dlb2_dev *\n-dlb2_probe(struct rte_pci_device *pdev)\n+dlb2_probe(struct rte_pci_device *pdev, const void *probe_args)\n {\n \tstruct dlb2_dev *dlb2_dev;\n \tint ret = 0;\n@@ -208,6 +208,10 @@ dlb2_probe(struct rte_pci_device *pdev)\n \tif (ret)\n \t\tgoto wait_for_device_ready_fail;\n \n+\tret = dlb2_resource_probe(&dlb2_dev->hw, probe_args);\n+\tif (ret)\n+\t\tgoto resource_probe_fail;\n+\n \tret = dlb2_pf_reset(dlb2_dev);\n \tif (ret)\n \t\tgoto dlb2_reset_fail;\n@@ -216,7 +220,7 @@ dlb2_probe(struct rte_pci_device *pdev)\n \tif (ret)\n \t\tgoto init_driver_state_fail;\n \n-\tret = dlb2_resource_init(&dlb2_dev->hw, dlb_version);\n+\tret = dlb2_resource_init(&dlb2_dev->hw, dlb_version, probe_args);\n \tif (ret)\n \t\tgoto resource_init_fail;\n \n@@ -227,6 +231,7 @@ dlb2_probe(struct rte_pci_device *pdev)\n init_driver_state_fail:\n dlb2_reset_fail:\n pci_mmap_bad_addr:\n+resource_probe_fail:\n wait_for_device_ready_fail:\n \trte_free(dlb2_dev);\n dlb2_dev_malloc_fail:\ndiff --git a/drivers/event/dlb2/pf/dlb2_main.h b/drivers/event/dlb2/pf/dlb2_main.h\nindex 5aa51b1616..4c64d72e9c 100644\n--- a/drivers/event/dlb2/pf/dlb2_main.h\n+++ b/drivers/event/dlb2/pf/dlb2_main.h\n@@ -15,7 +15,11 @@\n #include \"base/dlb2_hw_types.h\"\n #include \"../dlb2_user.h\"\n \n-#define DLB2_DEFAULT_UNREGISTER_TIMEOUT_S 5\n+#define DLB2_EAL_PROBE_CORE 2\n+#define DLB2_NUM_PROBE_ENQS 1000\n+#define DLB2_HCW_MEM_SIZE 8\n+#define DLB2_HCW_64B_OFF 4\n+#define DLB2_HCW_ALIGN_MASK 0x3F\n \n struct dlb2_dev;\n \n@@ -31,15 +35,30 @@ struct dlb2_dev {\n \t/* struct list_head list; */\n \tstruct device *dlb2_device;\n \tbool domain_reset_failed;\n+\t/* The enqueue_four function enqueues four HCWs (one cache-line worth)\n+\t * to the HQM, using whichever mechanism is supported by the platform\n+\t * on which this driver is running.\n+\t */\n+\tvoid (*enqueue_four)(void *qe4, void *pp_addr);\n \t/* The resource mutex serializes access to driver data structures and\n \t * hardware registers.\n \t */\n \trte_spinlock_t resource_mutex;\n \tbool worker_launched;\n \tu8 revision;\n+\tu8 version;\n+};\n+\n+struct dlb2_pp_thread_data {\n+\tstruct dlb2_hw *hw;\n+\tint pp;\n+\tint cpu;\n+\tbool is_ldb;\n+\tint cycles;\n };\n \n-struct dlb2_dev *dlb2_probe(struct rte_pci_device *pdev);\n+struct dlb2_dev *dlb2_probe(struct rte_pci_device *pdev, const void *probe_args);\n+\n \n int dlb2_pf_reset(struct dlb2_dev *dlb2_dev);\n int dlb2_pf_create_sched_domain(struct dlb2_hw *hw,\ndiff --git a/drivers/event/dlb2/pf/dlb2_pf.c b/drivers/event/dlb2/pf/dlb2_pf.c\nindex 71ac141b66..3d15250e11 100644\n--- a/drivers/event/dlb2/pf/dlb2_pf.c\n+++ b/drivers/event/dlb2/pf/dlb2_pf.c\n@@ -702,6 +702,7 @@ dlb2_eventdev_pci_init(struct rte_eventdev *eventdev)\n \tstruct dlb2_devargs dlb2_args = {\n \t\t.socket_id = rte_socket_id(),\n \t\t.max_num_events = DLB2_MAX_NUM_LDB_CREDITS,\n+\t\t.producer_coremask = NULL,\n \t\t.num_dir_credits_override = -1,\n \t\t.qid_depth_thresholds = { {0} },\n \t\t.poll_interval = DLB2_POLL_INTERVAL_DEFAULT,\n@@ -713,6 +714,7 @@ dlb2_eventdev_pci_init(struct rte_eventdev *eventdev)\n \t};\n \tstruct dlb2_eventdev *dlb2;\n \tint q;\n+\tconst void *probe_args = NULL;\n \n \tDLB2_LOG_DBG(\"Enter with dev_id=%d socket_id=%d\",\n \t\t     eventdev->data->dev_id, eventdev->data->socket_id);\n@@ -728,16 +730,6 @@ dlb2_eventdev_pci_init(struct rte_eventdev *eventdev)\n \t\tdlb2 = dlb2_pmd_priv(eventdev); /* rte_zmalloc_socket mem */\n \t\tdlb2->version = DLB2_HW_DEVICE_FROM_PCI_ID(pci_dev);\n \n-\t\t/* Probe the DLB2 PF layer */\n-\t\tdlb2->qm_instance.pf_dev = dlb2_probe(pci_dev);\n-\n-\t\tif (dlb2->qm_instance.pf_dev == NULL) {\n-\t\t\tDLB2_LOG_ERR(\"DLB2 PF Probe failed with error %d\\n\",\n-\t\t\t\t     rte_errno);\n-\t\t\tret = -rte_errno;\n-\t\t\tgoto dlb2_probe_failed;\n-\t\t}\n-\n \t\t/* Were we invoked with runtime parameters? */\n \t\tif (pci_dev->device.devargs) {\n \t\t\tret = dlb2_parse_params(pci_dev->device.devargs->args,\n@@ -749,6 +741,17 @@ dlb2_eventdev_pci_init(struct rte_eventdev *eventdev)\n \t\t\t\t\t     ret, rte_errno);\n \t\t\t\tgoto dlb2_probe_failed;\n \t\t\t}\n+\t\t\tprobe_args = &dlb2_args;\n+\t\t}\n+\n+\t\t/* Probe the DLB2 PF layer */\n+\t\tdlb2->qm_instance.pf_dev = dlb2_probe(pci_dev, probe_args);\n+\n+\t\tif (dlb2->qm_instance.pf_dev == NULL) {\n+\t\t\tDLB2_LOG_ERR(\"DLB2 PF Probe failed with error %d\\n\",\n+\t\t\t\t     rte_errno);\n+\t\t\tret = -rte_errno;\n+\t\t\tgoto dlb2_probe_failed;\n \t\t}\n \n \t\tret = dlb2_primary_eventdev_probe(eventdev,\n",
    "prefixes": [
        "v5",
        "1/3"
    ]
}