get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/139199/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 139199,
    "url": "http://patches.dpdk.org/api/patches/139199/?format=api",
    "web_url": "http://patches.dpdk.org/project/dpdk/patch/20240408212615.416813-1-stephen@networkplumber.org/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20240408212615.416813-1-stephen@networkplumber.org>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20240408212615.416813-1-stephen@networkplumber.org",
    "date": "2024-04-08T21:26:15",
    "name": "[v2] latencystats: performance overhaul",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": false,
    "hash": "99193edd86226d8ddc35602d876908e1891e0131",
    "submitter": {
        "id": 27,
        "url": "http://patches.dpdk.org/api/people/27/?format=api",
        "name": "Stephen Hemminger",
        "email": "stephen@networkplumber.org"
    },
    "delegate": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/users/1/?format=api",
        "username": "tmonjalo",
        "first_name": "Thomas",
        "last_name": "Monjalon",
        "email": "thomas@monjalon.net"
    },
    "mbox": "http://patches.dpdk.org/project/dpdk/patch/20240408212615.416813-1-stephen@networkplumber.org/mbox/",
    "series": [
        {
            "id": 31708,
            "url": "http://patches.dpdk.org/api/series/31708/?format=api",
            "web_url": "http://patches.dpdk.org/project/dpdk/list/?series=31708",
            "date": "2024-04-08T21:26:15",
            "name": "[v2] latencystats: performance overhaul",
            "version": 2,
            "mbox": "http://patches.dpdk.org/series/31708/mbox/"
        }
    ],
    "comments": "http://patches.dpdk.org/api/patches/139199/comments/",
    "check": "warning",
    "checks": "http://patches.dpdk.org/api/patches/139199/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 135AA43DD2;\n\tMon,  8 Apr 2024 23:26:20 +0200 (CEST)",
            "from mails.dpdk.org (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id 022994067A;\n\tMon,  8 Apr 2024 23:26:20 +0200 (CEST)",
            "from mail-pf1-f171.google.com (mail-pf1-f171.google.com\n [209.85.210.171])\n by mails.dpdk.org (Postfix) with ESMTP id D29724064A\n for <dev@dpdk.org>; Mon,  8 Apr 2024 23:26:18 +0200 (CEST)",
            "by mail-pf1-f171.google.com with SMTP id\n d2e1a72fcca58-6ed04c91c46so3151705b3a.0\n for <dev@dpdk.org>; Mon, 08 Apr 2024 14:26:18 -0700 (PDT)",
            "from hermes.lan (204-195-96-226.wavecable.com. [204.195.96.226])\n by smtp.gmail.com with ESMTPSA id\n x6-20020aa79a46000000b006eceaccaec9sm6947667pfj.131.2024.04.08.14.26.17\n (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);\n Mon, 08 Apr 2024 14:26:17 -0700 (PDT)"
        ],
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed;\n d=networkplumber-org.20230601.gappssmtp.com; s=20230601; t=1712611578;\n x=1713216378; darn=dpdk.org;\n h=content-transfer-encoding:mime-version:references:in-reply-to\n :message-id:date:subject:cc:to:from:from:to:cc:subject:date\n :message-id:reply-to;\n bh=e8EM7gia0ZiCANGv+rhleXV1EfI5Fa+g6j/LmqjQQXo=;\n b=md1nK73kQKv0l/JxY9kfsPiLcyAGs6veU4+yQx+FmA9O1Hmuy1MlJk59AFVNjKDFpe\n 0rhBfhyctI9aLEnx9oDd35vHTIT1oQ9IYva6vcQueGw84aguj5JwaKpy5IaUZg9AVcoY\n W51QQ7So/Ypbc+VzZtherWEXosJCfsb0RK5luWq+TjCKyzeLHi/1ake4nut8DvYdc5gE\n o3VUCnXVKV2qqMcIInZShFXpXJ1qug3zY3NmNqnleU7EpAzGSEnDgBxNOKZLqrw+Ownp\n au2qSwe8hUxeMPUAHJlakeLjid+uNts9/FPr+F4BPPyqsxwOLgAI5jD6yFmJTZ2R7P6k\n vARw==",
        "X-Google-DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed;\n d=1e100.net; s=20230601; t=1712611578; x=1713216378;\n h=content-transfer-encoding:mime-version:references:in-reply-to\n :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc\n :subject:date:message-id:reply-to;\n bh=e8EM7gia0ZiCANGv+rhleXV1EfI5Fa+g6j/LmqjQQXo=;\n b=fzPuBRph2nnxw1AQKLrKJoahXCepd0ydI6IRyerkZzRA6H9nAUVkZrxnBHW17X9YJH\n HHZj3WYcJz09HrmJacBnl3uD61RSUEHe77QWONkRcprXpFbXT7Z4f2zmNc/jDxQf2Kld\n tAQpnsTzMC6in6wvLAYXIxGUL/26ru5vL17PxXyAWsH04YUcQSHv+ZQo3Q8MdC7Vf8LC\n hYe7Si46lxr2Rok2aiesoeM9sOSwG8/tpUf8p/mmKICjFj78kKusFnB+XrWGYFmlTMO/\n hVnKWN/gNfhsq/C2g5u/sstptJPIROcZDwuPcZSV5J0zsfMzHJxqu8esvZRDoej/SUWH\n AWfg==",
        "X-Gm-Message-State": "AOJu0YyapvsI8XdJh8ohKeXmaqN6UJDZSutw3u4M5jZ5x4sCswR1YM9M\n seE/rEjTLG/KuzkmZLEDlm0eV+EHgLH79ZURja6VpdUtozCcCYA02Xj2u6hrfjWdKzurq7D8gIz\n z94s=",
        "X-Google-Smtp-Source": "\n AGHT+IEuwWhH0RlqJzpxf/Qj6H3nRwvoEJ43WsJD4SJXsLfcEFvItg4WrDqF1s9EAixzDIkH1DZN5g==",
        "X-Received": "by 2002:a05:6a20:9194:b0:1a7:4b33:7c6f with SMTP id\n v20-20020a056a20919400b001a74b337c6fmr9537344pzd.43.1712611577911;\n Mon, 08 Apr 2024 14:26:17 -0700 (PDT)",
        "From": "Stephen Hemminger <stephen@networkplumber.org>",
        "To": "dev@dpdk.org",
        "Cc": "Stephen Hemminger <stephen@networkplumber.org>,\n Reshma Pattan <reshma.pattan@intel.com>",
        "Subject": "[PATCH v2] latencystats: performance overhaul",
        "Date": "Mon,  8 Apr 2024 14:26:15 -0700",
        "Message-ID": "<20240408212615.416813-1-stephen@networkplumber.org>",
        "X-Mailer": "git-send-email 2.43.0",
        "In-Reply-To": "<20240408195036.182545-1-stephen@networkplumber.org>",
        "References": "<20240408195036.182545-1-stephen@networkplumber.org>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org"
    },
    "content": "The latencystats library had multiple performance issues.\n  - using floating point in the fast path of Tx\n  - global lock in fast path\n  - global stats cause cache thrashing\n  - divide instructions in fast path\n\nOther minor issues:\n  - message split across lines\n  - error messages not printed with correct priority\n\nSigned-off-by: Stephen Hemminger <stephen@networkplumber.org>\n---\n lib/latencystats/rte_latencystats.c | 334 ++++++++++++++++------------\n 1 file changed, 187 insertions(+), 147 deletions(-)",
    "diff": "diff --git a/lib/latencystats/rte_latencystats.c b/lib/latencystats/rte_latencystats.c\nindex 4ea9b0d75b..5154e0650a 100644\n--- a/lib/latencystats/rte_latencystats.c\n+++ b/lib/latencystats/rte_latencystats.c\n@@ -2,29 +2,18 @@\n  * Copyright(c) 2018 Intel Corporation\n  */\n \n-#include <math.h>\n-\n-#include <rte_string_fns.h>\n #include <rte_mbuf_dyn.h>\n #include <rte_log.h>\n+#include <rte_stdatomic.h>\n #include <rte_cycles.h>\n #include <rte_ethdev.h>\n #include <rte_metrics.h>\n #include <rte_memzone.h>\n #include <rte_lcore.h>\n+#include <rte_time.h>\n \n #include \"rte_latencystats.h\"\n \n-/** Nano seconds per second */\n-#define NS_PER_SEC 1E9\n-\n-/** Clock cycles per nano second */\n-static uint64_t\n-latencystat_cycles_per_ns(void)\n-{\n-\treturn rte_get_timer_hz() / NS_PER_SEC;\n-}\n-\n RTE_LOG_REGISTER_DEFAULT(latencystat_logtype, INFO);\n #define RTE_LOGTYPE_LATENCY_STATS latencystat_logtype\n #define LATENCY_STATS_LOG(level, ...) \\\n@@ -40,28 +29,32 @@ timestamp_dynfield(struct rte_mbuf *mbuf)\n \t\t\ttimestamp_dynfield_offset, rte_mbuf_timestamp_t *);\n }\n \n-static const char *MZ_RTE_LATENCY_STATS = \"rte_latencystats\";\n+static const char MZ_RTE_LATENCY_STATS[] = \"rte_latencystats\";\n static int latency_stats_index;\n static uint64_t samp_intvl;\n-static uint64_t timer_tsc;\n-static uint64_t prev_tsc;\n \n+/* Per queue latency information (in cycles) */\n struct rte_latency_stats {\n-\tfloat min_latency; /**< Minimum latency in nano seconds */\n-\tfloat avg_latency; /**< Average latency in nano seconds */\n-\tfloat max_latency; /**< Maximum latency in nano seconds */\n-\tfloat jitter; /** Latency variation */\n-\trte_spinlock_t lock; /** Latency calculation lock */\n-};\n-\n-static struct rte_latency_stats *glob_stats;\n-\n-struct rxtx_cbs {\n+\tRTE_ATOMIC(uint64_t) min_latency; /* Minimum latency */\n+\tRTE_ATOMIC(uint64_t) avg_latency; /* Average latency */\n+\tRTE_ATOMIC(uint64_t) max_latency; /* Maximum latency */\n+\tRTE_ATOMIC(uint64_t) jitter;      /* Latency variation */\n+} __rte_cache_aligned;\n+\n+/* per queue info stored in memxone */\n+static struct {\n+\tstruct rte_latency_stats stats[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];\n+} *latency_stats;\n+\n+static struct {\n+\tuint64_t prev_tsc;\n \tconst struct rte_eth_rxtx_callback *cb;\n-};\n+} rx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];\n \n-static struct rxtx_cbs rx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];\n-static struct rxtx_cbs tx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];\n+static struct {\n+\tuint64_t prev_latency;\n+\tconst struct rte_eth_rxtx_callback *cb;\n+} tx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];\n \n struct latency_stats_nameoff {\n \tchar name[RTE_ETH_XSTATS_NAME_SIZE];\n@@ -75,51 +68,92 @@ static const struct latency_stats_nameoff lat_stats_strings[] = {\n \t{\"jitter_ns\", offsetof(struct rte_latency_stats, jitter)},\n };\n \n-#define NUM_LATENCY_STATS (sizeof(lat_stats_strings) / \\\n-\t\t\t\tsizeof(lat_stats_strings[0]))\n+#define NUM_LATENCY_STATS RTE_DIM(lat_stats_strings)\n \n-int32_t\n-rte_latencystats_update(void)\n+static inline uint64_t\n+cycles_to_ns(uint64_t cycles)\n {\n-\tunsigned int i;\n-\tfloat *stats_ptr = NULL;\n-\tuint64_t values[NUM_LATENCY_STATS] = {0};\n-\tint ret;\n-\n-\tfor (i = 0; i < NUM_LATENCY_STATS; i++) {\n-\t\tstats_ptr = RTE_PTR_ADD(glob_stats,\n-\t\t\t\tlat_stats_strings[i].offset);\n-\t\tvalues[i] = (uint64_t)floor((*stats_ptr)/\n-\t\t\t\tlatencystat_cycles_per_ns());\n-\t}\n+\treturn (cycles * NSEC_PER_SEC) / rte_get_tsc_hz();\n+}\n \n-\tret = rte_metrics_update_values(RTE_METRICS_GLOBAL,\n-\t\t\t\t\tlatency_stats_index,\n-\t\t\t\t\tvalues, NUM_LATENCY_STATS);\n-\tif (ret < 0)\n-\t\tLATENCY_STATS_LOG(INFO, \"Failed to push the stats\");\n+static inline uint64_t\n+latencystat_read_ns(__rte_atomic const uint64_t *stat_ptr)\n+{\n+\treturn cycles_to_ns(rte_atomic_load_explicit(stat_ptr, rte_memory_order_relaxed));\n+}\n \n-\treturn ret;\n+static inline void\n+latencystat_write(__rte_atomic uint64_t *stat_ptr, uint64_t value)\n+{\n+\trte_atomic_store_explicit(stat_ptr, value, rte_memory_order_relaxed);\n }\n \n+/* aggregate data across all ports and queues */\n static void\n-rte_latencystats_fill_values(struct rte_metric_value *values)\n+latencystats_collect(uint64_t *values)\n {\n-\tunsigned int i;\n-\tfloat *stats_ptr = NULL;\n+\tunsigned int i, samples = 0;\n+\tuint16_t pid, qid;\n+\tint ret;\n+\tstruct rte_latency_stats sum = { };\n+\n+\tRTE_ETH_FOREACH_DEV(pid) {\n+\t\tstruct rte_eth_dev_info dev_info;\n+\n+\t\tret = rte_eth_dev_info_get(pid, &dev_info);\n+\t\tif (ret != 0) {\n+\t\t\tLATENCY_STATS_LOG(ERR,\n+\t\t\t\t\"Error during getting device (port %u) info: %s\",\n+\t\t\t\tpid, strerror(-ret));\n+\t\t\tcontinue;\n+\t\t}\n+\n+\t\tfor (qid = 0; qid < dev_info.nb_tx_queues; qid++) {\n+\t\t\tconst struct rte_latency_stats *stats = &latency_stats->stats[pid][qid];\n+\t\t\tuint64_t l;\n+\n+\t\t\tl = latencystat_read_ns(&stats->min_latency);\n+\t\t\tif (l != 0 && (sum.min_latency == 0 || l < sum.min_latency))\n+\t\t\t\tsum.min_latency = l;\n+\n+\t\t\tl = latencystat_read_ns(&stats->max_latency);\n+\t\t\tif (l < sum.max_latency)\n+\t\t\t\tsum.max_latency = l;\n+\n+\t\t\tsum.avg_latency += latencystat_read_ns(&stats->avg_latency);\n+\t\t\tsum.jitter += latencystat_read_ns(&stats->jitter);\n+\t\t\t++samples;\n+\t\t}\n+\n+\t}\n \n+\t/* adjust averages based on number of samples */\n+\tif (samples > 0) {\n+\t\tsum.avg_latency /= samples;\n+\t\tsum.jitter /= samples;\n+\t}\n+\n+\t/* convert cycle counts to ns */\n \tfor (i = 0; i < NUM_LATENCY_STATS; i++) {\n-\t\tstats_ptr = RTE_PTR_ADD(glob_stats,\n-\t\t\t\tlat_stats_strings[i].offset);\n-\t\tvalues[i].key = i;\n-\t\tvalues[i].value = (uint64_t)floor((*stats_ptr)/\n-\t\t\t\t\t\tlatencystat_cycles_per_ns());\n+\t\tuint64_t *stats_ptr = RTE_PTR_ADD(&sum, lat_stats_strings[i].offset);\n+\n+\t\tvalues[i] = *stats_ptr;\n \t}\n }\n \n+int32_t\n+rte_latencystats_update(void)\n+{\n+\tuint64_t values[NUM_LATENCY_STATS] = { 0 };\n+\n+\tlatencystats_collect(values);\n+\n+\treturn rte_metrics_update_values(RTE_METRICS_GLOBAL, latency_stats_index,\n+\t\t\t\t\tvalues, NUM_LATENCY_STATS);\n+}\n+\n static uint16_t\n-add_time_stamps(uint16_t pid __rte_unused,\n-\t\tuint16_t qid __rte_unused,\n+add_time_stamps(uint16_t pid, uint16_t qid,\n \t\tstruct rte_mbuf **pkts,\n \t\tuint16_t nb_pkts,\n \t\tuint16_t max_pkts __rte_unused,\n@@ -127,55 +161,49 @@ add_time_stamps(uint16_t pid __rte_unused,\n {\n \tunsigned int i;\n \tuint64_t diff_tsc, now;\n+\tuint64_t *prev_tsc = &rx_cbs[pid][qid].prev_tsc;\n \n \t/*\n \t * For every sample interval,\n \t * time stamp is marked on one received packet.\n \t */\n-\tnow = rte_rdtsc();\n \tfor (i = 0; i < nb_pkts; i++) {\n-\t\tdiff_tsc = now - prev_tsc;\n-\t\ttimer_tsc += diff_tsc;\n+\t\tif ((pkts[i]->ol_flags & timestamp_dynflag) != 0)\n+\t\t\tcontinue;\n \n-\t\tif ((pkts[i]->ol_flags & timestamp_dynflag) == 0\n-\t\t\t\t&& (timer_tsc >= samp_intvl)) {\n+\t\tnow = rte_rdtsc();\n+\t\tdiff_tsc = now - *prev_tsc;\n+\t\tif (diff_tsc >= samp_intvl) {\n \t\t\t*timestamp_dynfield(pkts[i]) = now;\n \t\t\tpkts[i]->ol_flags |= timestamp_dynflag;\n-\t\t\ttimer_tsc = 0;\n+\t\t\t*prev_tsc = now;\n+\t\t\tbreak;\n \t\t}\n-\t\tprev_tsc = now;\n-\t\tnow = rte_rdtsc();\n \t}\n \n \treturn nb_pkts;\n }\n \n static uint16_t\n-calc_latency(uint16_t pid __rte_unused,\n-\t\tuint16_t qid __rte_unused,\n-\t\tstruct rte_mbuf **pkts,\n-\t\tuint16_t nb_pkts,\n-\t\tvoid *_ __rte_unused)\n+calc_latency(uint16_t pid, uint16_t qid,\n+\t     struct rte_mbuf **pkts, uint16_t nb_pkts,\n+\t     void *user_cb __rte_unused)\n {\n-\tunsigned int i, cnt = 0;\n-\tuint64_t now;\n-\tfloat latency[nb_pkts];\n-\tstatic float prev_latency;\n-\t/*\n-\t * Alpha represents degree of weighting decrease in EWMA,\n-\t * a constant smoothing factor between 0 and 1. The value\n-\t * is used below for measuring average latency.\n-\t */\n-\tconst float alpha = 0.2;\n+\tstruct rte_latency_stats *stats = &latency_stats->stats[pid][qid];\n+\tunsigned int i;\n+\tuint64_t now, *prev_latency;\n \n+\tprev_latency = &tx_cbs[pid][qid].prev_latency;\n \tnow = rte_rdtsc();\n \tfor (i = 0; i < nb_pkts; i++) {\n-\t\tif (pkts[i]->ol_flags & timestamp_dynflag)\n-\t\t\tlatency[cnt++] = now - *timestamp_dynfield(pkts[i]);\n-\t}\n+\t\tuint64_t latency;\n+\t\tint64_t delta;\n+\n+\t\tif ((pkts[i]->ol_flags & timestamp_dynflag) == 0)\n+\t\t\tcontinue;\n+\n+\t\tlatency = now - *timestamp_dynfield(pkts[i]);\n \n-\trte_spinlock_lock(&glob_stats->lock);\n-\tfor (i = 0; i < cnt; i++) {\n \t\t/*\n \t\t * The jitter is calculated as statistical mean of interpacket\n \t\t * delay variation. The \"jitter estimate\" is computed by taking\n@@ -187,24 +215,25 @@ calc_latency(uint16_t pid __rte_unused,\n \t\t * Reference: Calculated as per RFC 5481, sec 4.1,\n \t\t * RFC 3393 sec 4.5, RFC 1889 sec.\n \t\t */\n-\t\tglob_stats->jitter +=  (fabsf(prev_latency - latency[i])\n-\t\t\t\t\t- glob_stats->jitter)/16;\n-\t\tif (glob_stats->min_latency == 0)\n-\t\t\tglob_stats->min_latency = latency[i];\n-\t\telse if (latency[i] < glob_stats->min_latency)\n-\t\t\tglob_stats->min_latency = latency[i];\n-\t\telse if (latency[i] > glob_stats->max_latency)\n-\t\t\tglob_stats->max_latency = latency[i];\n+\t\tdelta = *prev_latency - latency;\n+\t\t*prev_latency = latency;\n+\t\tlatencystat_write(&stats->jitter,\n+\t\t\t\t  stats->jitter + (delta - stats->jitter) / 16);\n+\n+\t\tif (stats->min_latency == 0 || latency < stats->min_latency)\n+\t\t\tlatencystat_write(&stats->min_latency, latency);\n+\t\telse if (latency > stats->max_latency)\n+\t\t\tlatencystat_write(&stats->max_latency, latency);\n+\n \t\t/*\n \t\t * The average latency is measured using exponential moving\n \t\t * average, i.e. using EWMA\n \t\t * https://en.wikipedia.org/wiki/Moving_average\n \t\t */\n-\t\tglob_stats->avg_latency +=\n-\t\t\talpha * (latency[i] - glob_stats->avg_latency);\n-\t\tprev_latency = latency[i];\n+\t\tdelta = latency - stats->avg_latency;\n+\t\tlatency = (delta + 3 * stats->avg_latency) / 4;\n+\t\tlatencystat_write(&stats->avg_latency, latency);\n \t}\n-\trte_spinlock_unlock(&glob_stats->lock);\n \n \treturn nb_pkts;\n }\n@@ -214,38 +243,34 @@ rte_latencystats_init(uint64_t app_samp_intvl,\n \t\trte_latency_stats_flow_type_fn user_cb)\n {\n \tunsigned int i;\n-\tuint16_t pid;\n-\tuint16_t qid;\n-\tstruct rxtx_cbs *cbs = NULL;\n-\tconst char *ptr_strings[NUM_LATENCY_STATS] = {0};\n-\tconst struct rte_memzone *mz = NULL;\n-\tconst unsigned int flags = 0;\n+\tuint16_t pid, qid;\n+\tconst char *ptr_strings[NUM_LATENCY_STATS];\n+\tconst struct rte_memzone *mz;\n \tint ret;\n \n \tif (rte_memzone_lookup(MZ_RTE_LATENCY_STATS))\n \t\treturn -EEXIST;\n \n-\t/** Allocate stats in shared memory fo multi process support */\n-\tmz = rte_memzone_reserve(MZ_RTE_LATENCY_STATS, sizeof(*glob_stats),\n-\t\t\t\t\trte_socket_id(), flags);\n+\t/** Allocate stats in shared memory for multi process support */\n+\tmz = rte_memzone_reserve(MZ_RTE_LATENCY_STATS, sizeof(*latency_stats),\n+\t\t\t\t\trte_socket_id(), 0);\n \tif (mz == NULL) {\n \t\tLATENCY_STATS_LOG(ERR, \"Cannot reserve memory: %s:%d\",\n \t\t\t__func__, __LINE__);\n \t\treturn -ENOMEM;\n \t}\n \n-\tglob_stats = mz->addr;\n-\trte_spinlock_init(&glob_stats->lock);\n-\tsamp_intvl = app_samp_intvl * latencystat_cycles_per_ns();\n+\tlatency_stats = mz->addr;\n+\tsamp_intvl = (app_samp_intvl * NSEC_PER_SEC) / rte_get_tsc_hz();\n \n-\t/** Register latency stats with stats library */\n+\t/* Register latency stats with stats library */\n \tfor (i = 0; i < NUM_LATENCY_STATS; i++)\n \t\tptr_strings[i] = lat_stats_strings[i].name;\n \n \tlatency_stats_index = rte_metrics_reg_names(ptr_strings,\n \t\t\t\t\t\t\tNUM_LATENCY_STATS);\n \tif (latency_stats_index < 0) {\n-\t\tLATENCY_STATS_LOG(DEBUG,\n+\t\tLATENCY_STATS_LOG(ERR,\n \t\t\t\"Failed to register latency stats names\");\n \t\treturn -1;\n \t}\n@@ -262,10 +287,11 @@ rte_latencystats_init(uint64_t app_samp_intvl,\n \t/** Register Rx/Tx callbacks */\n \tRTE_ETH_FOREACH_DEV(pid) {\n \t\tstruct rte_eth_dev_info dev_info;\n+\t\tconst struct rte_eth_rxtx_callback *cb;\n \n \t\tret = rte_eth_dev_info_get(pid, &dev_info);\n \t\tif (ret != 0) {\n-\t\t\tLATENCY_STATS_LOG(INFO,\n+\t\t\tLATENCY_STATS_LOG(NOTICE,\n \t\t\t\t\"Error during getting device (port %u) info: %s\",\n \t\t\t\tpid, strerror(-ret));\n \n@@ -273,23 +299,25 @@ rte_latencystats_init(uint64_t app_samp_intvl,\n \t\t}\n \n \t\tfor (qid = 0; qid < dev_info.nb_rx_queues; qid++) {\n-\t\t\tcbs = &rx_cbs[pid][qid];\n-\t\t\tcbs->cb = rte_eth_add_first_rx_callback(pid, qid,\n-\t\t\t\t\tadd_time_stamps, user_cb);\n-\t\t\tif (!cbs->cb)\n-\t\t\t\tLATENCY_STATS_LOG(INFO, \"Failed to \"\n-\t\t\t\t\t\"register Rx callback for pid=%d, \"\n-\t\t\t\t\t\"qid=%d\", pid, qid);\n+\t\t\tcb = rte_eth_add_first_rx_callback(pid, qid, add_time_stamps, user_cb);\n+\t\t\tif (cb)\n+\t\t\t\trx_cbs[pid][qid].cb = cb;\n+\t\t\telse\n+\t\t\t\tLATENCY_STATS_LOG(NOTICE,\n+\t\t\t\t\t\t  \"Failed to register Rx callback for pid=%d, qid=%d\",\n+\t\t\t\t\t\t  pid, qid);\n \t\t}\n+\n \t\tfor (qid = 0; qid < dev_info.nb_tx_queues; qid++) {\n-\t\t\tcbs = &tx_cbs[pid][qid];\n-\t\t\tcbs->cb =  rte_eth_add_tx_callback(pid, qid,\n-\t\t\t\t\tcalc_latency, user_cb);\n-\t\t\tif (!cbs->cb)\n-\t\t\t\tLATENCY_STATS_LOG(INFO, \"Failed to \"\n-\t\t\t\t\t\"register Tx callback for pid=%d, \"\n-\t\t\t\t\t\"qid=%d\", pid, qid);\n+\t\t\tcb = rte_eth_add_tx_callback(pid, qid, calc_latency, user_cb);\n+\t\t\tif (cb)\n+\t\t\t\ttx_cbs[pid][qid].cb = cb;\n+\t\t\telse\n+\t\t\t\tLATENCY_STATS_LOG(NOTICE,\n+\t\t\t\t\t\t  \"Failed to register Tx callback for pid=%d, qid=%d\",\n+\t\t\t\t\t\t  pid, qid);\n \t\t}\n+\n \t}\n \treturn 0;\n }\n@@ -297,19 +325,18 @@ rte_latencystats_init(uint64_t app_samp_intvl,\n int\n rte_latencystats_uninit(void)\n {\n-\tuint16_t pid;\n-\tuint16_t qid;\n-\tint ret = 0;\n-\tstruct rxtx_cbs *cbs = NULL;\n-\tconst struct rte_memzone *mz = NULL;\n+\tconst struct rte_memzone *mz;\n+\tuint16_t pid, qid;\n+\tint ret;\n \n \t/** De register Rx/Tx callbacks */\n \tRTE_ETH_FOREACH_DEV(pid) {\n \t\tstruct rte_eth_dev_info dev_info;\n+\t\tconst struct rte_eth_rxtx_callback *cb;\n \n \t\tret = rte_eth_dev_info_get(pid, &dev_info);\n \t\tif (ret != 0) {\n-\t\t\tLATENCY_STATS_LOG(INFO,\n+\t\t\tLATENCY_STATS_LOG(NOTICE,\n \t\t\t\t\"Error during getting device (port %u) info: %s\",\n \t\t\t\tpid, strerror(-ret));\n \n@@ -317,20 +344,23 @@ rte_latencystats_uninit(void)\n \t\t}\n \n \t\tfor (qid = 0; qid < dev_info.nb_rx_queues; qid++) {\n-\t\t\tcbs = &rx_cbs[pid][qid];\n-\t\t\tret = rte_eth_remove_rx_callback(pid, qid, cbs->cb);\n+\t\t\tcb = rx_cbs[pid][qid].cb;\n+\t\t\tif (cb == NULL)\n+\t\t\t\tcontinue;\n+\n+\t\t\tret = rte_eth_remove_rx_callback(pid, qid, cb);\n \t\t\tif (ret)\n-\t\t\t\tLATENCY_STATS_LOG(INFO, \"failed to \"\n-\t\t\t\t\t\"remove Rx callback for pid=%d, \"\n-\t\t\t\t\t\"qid=%d\", pid, qid);\n+\t\t\t\tLATENCY_STATS_LOG(NOTICE, \"Failed to remove Rx callback\");\n \t\t}\n+\n \t\tfor (qid = 0; qid < dev_info.nb_tx_queues; qid++) {\n-\t\t\tcbs = &tx_cbs[pid][qid];\n-\t\t\tret = rte_eth_remove_tx_callback(pid, qid, cbs->cb);\n+\t\t\tcb = tx_cbs[pid][qid].cb;\n+\t\t\tif (cb == NULL)\n+\t\t\t\tcontinue;\n+\n+\t\t\tret = rte_eth_remove_tx_callback(pid, qid, cb);\n \t\t\tif (ret)\n-\t\t\t\tLATENCY_STATS_LOG(INFO, \"failed to \"\n-\t\t\t\t\t\"remove Tx callback for pid=%d, \"\n-\t\t\t\t\t\"qid=%d\", pid, qid);\n+\t\t\t\tLATENCY_STATS_LOG(NOTICE, \"Failed to remove Tx callback\");\n \t\t}\n \t}\n \n@@ -360,6 +390,9 @@ rte_latencystats_get_names(struct rte_metric_name *names, uint16_t size)\n int\n rte_latencystats_get(struct rte_metric_value *values, uint16_t size)\n {\n+\tunsigned int i;\n+\tuint64_t stats[NUM_LATENCY_STATS];\n+\n \tif (size < NUM_LATENCY_STATS || values == NULL)\n \t\treturn NUM_LATENCY_STATS;\n \n@@ -371,11 +404,18 @@ rte_latencystats_get(struct rte_metric_value *values, uint16_t size)\n \t\t\t\t\"Latency stats memzone not found\");\n \t\t\treturn -ENOMEM;\n \t\t}\n-\t\tglob_stats =  mz->addr;\n+\n+\t\tlatency_stats = mz->addr;\n \t}\n \n \t/* Retrieve latency stats */\n-\trte_latencystats_fill_values(values);\n+\tlatencystats_collect(stats);\n+\n+\tfor (i = 0; i < NUM_LATENCY_STATS; i++) {\n+\t\tvalues[i].key = i;\n+\t\tvalues[i].value = stats[i];\n+\t}\n+\n \n \treturn NUM_LATENCY_STATS;\n }\n",
    "prefixes": [
        "v2"
    ]
}