Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/52276/?format=api
http://patches.dpdk.org/api/patches/52276/?format=api", "web_url": "http://patches.dpdk.org/project/dpdk/patch/20190404100127.29381-5-gage.eads@intel.com/", "project": { "id": 1, "url": "http://patches.dpdk.org/api/projects/1/?format=api", "name": "DPDK", "link_name": "dpdk", "list_id": "dev.dpdk.org", "list_email": "dev@dpdk.org", "web_url": "http://core.dpdk.org", "scm_url": "git://dpdk.org/dpdk", "webscm_url": "http://git.dpdk.org/dpdk", "list_archive_url": "https://inbox.dpdk.org/dev", "list_archive_url_format": "https://inbox.dpdk.org/dev/{}", "commit_url_format": "" }, "msgid": "<20190404100127.29381-5-gage.eads@intel.com>", "list_archive_url": "https://inbox.dpdk.org/dev/20190404100127.29381-5-gage.eads@intel.com", "date": "2019-04-04T10:01:23", "name": "[v10,4/8] test/stack: add stack perf test", "commit_ref": null, "pull_url": null, "state": "accepted", "archived": true, "hash": "4d917ea12e5457aa1d19ce0077dca24dd187e7bb", "submitter": { "id": 586, "url": "http://patches.dpdk.org/api/people/586/?format=api", "name": "Eads, Gage", "email": "gage.eads@intel.com" }, "delegate": { "id": 1, "url": "http://patches.dpdk.org/api/users/1/?format=api", "username": "tmonjalo", "first_name": "Thomas", "last_name": "Monjalon", "email": "thomas@monjalon.net" }, "mbox": "http://patches.dpdk.org/project/dpdk/patch/20190404100127.29381-5-gage.eads@intel.com/mbox/", "series": [ { "id": 4111, "url": "http://patches.dpdk.org/api/series/4111/?format=api", "web_url": "http://patches.dpdk.org/project/dpdk/list/?series=4111", "date": "2019-04-04T10:01:19", "name": "Add stack library and new mempool handler", "version": 10, "mbox": "http://patches.dpdk.org/series/4111/mbox/" } ], "comments": "http://patches.dpdk.org/api/patches/52276/comments/", "check": "success", "checks": "http://patches.dpdk.org/api/patches/52276/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<dev-bounces@dpdk.org>", "X-Original-To": "patchwork@dpdk.org", "Delivered-To": "patchwork@dpdk.org", "Received": [ "from [92.243.14.124] (localhost [127.0.0.1])\n\tby dpdk.org (Postfix) with ESMTP id 4B9D41B150;\n\tThu, 4 Apr 2019 12:02:25 +0200 (CEST)", "from mga07.intel.com (mga07.intel.com [134.134.136.100])\n\tby dpdk.org (Postfix) with ESMTP id 0F9A21B120\n\tfor <dev@dpdk.org>; Thu, 4 Apr 2019 12:02:14 +0200 (CEST)", "from orsmga006.jf.intel.com ([10.7.209.51])\n\tby orsmga105.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384;\n\t04 Apr 2019 03:02:14 -0700", "from txasoft-yocto.an.intel.com ([10.123.72.192])\n\tby orsmga006.jf.intel.com with ESMTP; 04 Apr 2019 03:02:13 -0700" ], "X-Amp-Result": "SKIPPED(no attachment in message)", "X-Amp-File-Uploaded": "False", "X-ExtLoop1": "1", "X-IronPort-AV": "E=Sophos;i=\"5.60,308,1549958400\"; d=\"scan'208\";a=\"132898902\"", "From": "Gage Eads <gage.eads@intel.com>", "To": "dev@dpdk.org", "Cc": "olivier.matz@6wind.com, arybchenko@solarflare.com,\n\tbruce.richardson@intel.com, konstantin.ananyev@intel.com,\n\tgavin.hu@arm.com, \n\tHonnappa.Nagarahalli@arm.com, nd@arm.com, thomas@monjalon.net", "Date": "Thu, 4 Apr 2019 05:01:23 -0500", "Message-Id": "<20190404100127.29381-5-gage.eads@intel.com>", "X-Mailer": "git-send-email 2.13.6", "In-Reply-To": "<20190404100127.29381-1-gage.eads@intel.com>", "References": "<20190403232020.12784-1-gage.eads@intel.com>\n\t<20190404100127.29381-1-gage.eads@intel.com>", "Subject": "[dpdk-dev] [PATCH v10 4/8] test/stack: add stack perf test", "X-BeenThere": "dev@dpdk.org", "X-Mailman-Version": "2.1.15", "Precedence": "list", "List-Id": "DPDK patches and discussions <dev.dpdk.org>", "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>", "List-Archive": "<http://mails.dpdk.org/archives/dev/>", "List-Post": "<mailto:dev@dpdk.org>", "List-Help": "<mailto:dev-request@dpdk.org?subject=help>", "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n\t<mailto:dev-request@dpdk.org?subject=subscribe>", "Errors-To": "dev-bounces@dpdk.org", "Sender": "\"dev\" <dev-bounces@dpdk.org>" }, "content": "stack_perf_autotest tests the following with one lcore:\n- Cycles to attempt to pop an empty stack\n- Cycles to push then pop a single object\n- Cycles to push then pop a burst of 32 objects\n\nIt also tests the cycles to push then pop a burst of 8 and 32 objects with\nthe following lcore combinations (if possible):\n- Two hyperthreads\n- Two physical cores\n- Two physical cores on separate NUMA nodes\n- All available lcores\n\nSigned-off-by: Gage Eads <gage.eads@intel.com>\nReviewed-by: Olivier Matz <olivier.matz@6wind.com>\n---\n app/test/Makefile | 1 +\n app/test/meson.build | 2 +\n app/test/test_stack_perf.c | 345 +++++++++++++++++++++++++++++++++++++++++++++\n 3 files changed, 348 insertions(+)\n create mode 100644 app/test/test_stack_perf.c", "diff": "diff --git a/app/test/Makefile b/app/test/Makefile\nindex e5bde81af..b28bed2d4 100644\n--- a/app/test/Makefile\n+++ b/app/test/Makefile\n@@ -91,6 +91,7 @@ endif\n SRCS-y += test_rwlock.c\n \n SRCS-$(CONFIG_RTE_LIBRTE_STACK) += test_stack.c\n+SRCS-$(CONFIG_RTE_LIBRTE_STACK) += test_stack_perf.c\n \n SRCS-$(CONFIG_RTE_LIBRTE_TIMER) += test_timer.c\n SRCS-$(CONFIG_RTE_LIBRTE_TIMER) += test_timer_perf.c\ndiff --git a/app/test/meson.build b/app/test/meson.build\nindex 56ea13f53..02eb788a4 100644\n--- a/app/test/meson.build\n+++ b/app/test/meson.build\n@@ -96,6 +96,7 @@ test_sources = files('commands.c',\n \t'test_service_cores.c',\n \t'test_spinlock.c',\n \t'test_stack.c',\n+\t'test_stack_perf.c',\n \t'test_string_fns.c',\n \t'test_table.c',\n \t'test_table_acl.c',\n@@ -241,6 +242,7 @@ perf_test_names = [\n 'distributor_perf_autotest',\n 'ring_pmd_perf_autotest',\n 'pmd_perf_autotest',\n+ 'stack_perf_autotest',\n ]\n \n # All test cases in driver_test_names list are non-parallel\ndiff --git a/app/test/test_stack_perf.c b/app/test/test_stack_perf.c\nnew file mode 100644\nindex 000000000..a44fbb73e\n--- /dev/null\n+++ b/app/test/test_stack_perf.c\n@@ -0,0 +1,345 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(c) 2019 Intel Corporation\n+ */\n+\n+\n+#include <stdio.h>\n+#include <inttypes.h>\n+\n+#include <rte_atomic.h>\n+#include <rte_cycles.h>\n+#include <rte_launch.h>\n+#include <rte_pause.h>\n+#include <rte_stack.h>\n+\n+#include \"test.h\"\n+\n+#define STACK_NAME \"STACK_PERF\"\n+#define MAX_BURST 32\n+#define STACK_SIZE (RTE_MAX_LCORE * MAX_BURST)\n+\n+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))\n+\n+/*\n+ * Push/pop bulk sizes, marked volatile so they aren't treated as compile-time\n+ * constants.\n+ */\n+static volatile unsigned int bulk_sizes[] = {8, MAX_BURST};\n+\n+static rte_atomic32_t lcore_barrier;\n+\n+struct lcore_pair {\n+\tunsigned int c1;\n+\tunsigned int c2;\n+};\n+\n+static int\n+get_two_hyperthreads(struct lcore_pair *lcp)\n+{\n+\tunsigned int socket[2];\n+\tunsigned int core[2];\n+\tunsigned int id[2];\n+\n+\tRTE_LCORE_FOREACH(id[0]) {\n+\t\tRTE_LCORE_FOREACH(id[1]) {\n+\t\t\tif (id[0] == id[1])\n+\t\t\t\tcontinue;\n+\t\t\tcore[0] = lcore_config[id[0]].core_id;\n+\t\t\tcore[1] = lcore_config[id[1]].core_id;\n+\t\t\tsocket[0] = lcore_config[id[0]].socket_id;\n+\t\t\tsocket[1] = lcore_config[id[1]].socket_id;\n+\t\t\tif ((core[0] == core[1]) && (socket[0] == socket[1])) {\n+\t\t\t\tlcp->c1 = id[0];\n+\t\t\t\tlcp->c2 = id[1];\n+\t\t\t\treturn 0;\n+\t\t\t}\n+\t\t}\n+\t}\n+\n+\treturn 1;\n+}\n+\n+static int\n+get_two_cores(struct lcore_pair *lcp)\n+{\n+\tunsigned int socket[2];\n+\tunsigned int core[2];\n+\tunsigned int id[2];\n+\n+\tRTE_LCORE_FOREACH(id[0]) {\n+\t\tRTE_LCORE_FOREACH(id[1]) {\n+\t\t\tif (id[0] == id[1])\n+\t\t\t\tcontinue;\n+\t\t\tcore[0] = lcore_config[id[0]].core_id;\n+\t\t\tcore[1] = lcore_config[id[1]].core_id;\n+\t\t\tsocket[0] = lcore_config[id[0]].socket_id;\n+\t\t\tsocket[1] = lcore_config[id[1]].socket_id;\n+\t\t\tif ((core[0] != core[1]) && (socket[0] == socket[1])) {\n+\t\t\t\tlcp->c1 = id[0];\n+\t\t\t\tlcp->c2 = id[1];\n+\t\t\t\treturn 0;\n+\t\t\t}\n+\t\t}\n+\t}\n+\n+\treturn 1;\n+}\n+\n+static int\n+get_two_sockets(struct lcore_pair *lcp)\n+{\n+\tunsigned int socket[2];\n+\tunsigned int id[2];\n+\n+\tRTE_LCORE_FOREACH(id[0]) {\n+\t\tRTE_LCORE_FOREACH(id[1]) {\n+\t\t\tif (id[0] == id[1])\n+\t\t\t\tcontinue;\n+\t\t\tsocket[0] = lcore_config[id[0]].socket_id;\n+\t\t\tsocket[1] = lcore_config[id[1]].socket_id;\n+\t\t\tif (socket[0] != socket[1]) {\n+\t\t\t\tlcp->c1 = id[0];\n+\t\t\t\tlcp->c2 = id[1];\n+\t\t\t\treturn 0;\n+\t\t\t}\n+\t\t}\n+\t}\n+\n+\treturn 1;\n+}\n+\n+/* Measure the cycle cost of popping an empty stack. */\n+static void\n+test_empty_pop(struct rte_stack *s)\n+{\n+\tunsigned int iterations = 100000000;\n+\tvoid *objs[MAX_BURST];\n+\tunsigned int i;\n+\n+\tuint64_t start = rte_rdtsc();\n+\n+\tfor (i = 0; i < iterations; i++)\n+\t\trte_stack_pop(s, objs, bulk_sizes[0]);\n+\n+\tuint64_t end = rte_rdtsc();\n+\n+\tprintf(\"Stack empty pop: %.2F\\n\",\n+\t (double)(end - start) / iterations);\n+}\n+\n+struct thread_args {\n+\tstruct rte_stack *s;\n+\tunsigned int sz;\n+\tdouble avg;\n+};\n+\n+/* Measure the average per-pointer cycle cost of stack push and pop */\n+static int\n+bulk_push_pop(void *p)\n+{\n+\tunsigned int iterations = 1000000;\n+\tstruct thread_args *args = p;\n+\tvoid *objs[MAX_BURST] = {0};\n+\tunsigned int size, i;\n+\tstruct rte_stack *s;\n+\n+\ts = args->s;\n+\tsize = args->sz;\n+\n+\trte_atomic32_sub(&lcore_barrier, 1);\n+\twhile (rte_atomic32_read(&lcore_barrier) != 0)\n+\t\trte_pause();\n+\n+\tuint64_t start = rte_rdtsc();\n+\n+\tfor (i = 0; i < iterations; i++) {\n+\t\trte_stack_push(s, objs, size);\n+\t\trte_stack_pop(s, objs, size);\n+\t}\n+\n+\tuint64_t end = rte_rdtsc();\n+\n+\targs->avg = ((double)(end - start))/(iterations * size);\n+\n+\treturn 0;\n+}\n+\n+/*\n+ * Run bulk_push_pop() simultaneously on pairs of cores, to measure stack\n+ * perf when between hyperthread siblings, cores on the same socket, and cores\n+ * on different sockets.\n+ */\n+static void\n+run_on_core_pair(struct lcore_pair *cores, struct rte_stack *s,\n+\t\t lcore_function_t fn)\n+{\n+\tstruct thread_args args[2];\n+\tunsigned int i;\n+\n+\tfor (i = 0; i < ARRAY_SIZE(bulk_sizes); i++) {\n+\t\trte_atomic32_set(&lcore_barrier, 2);\n+\n+\t\targs[0].sz = args[1].sz = bulk_sizes[i];\n+\t\targs[0].s = args[1].s = s;\n+\n+\t\tif (cores->c1 == rte_get_master_lcore()) {\n+\t\t\trte_eal_remote_launch(fn, &args[1], cores->c2);\n+\t\t\tfn(&args[0]);\n+\t\t\trte_eal_wait_lcore(cores->c2);\n+\t\t} else {\n+\t\t\trte_eal_remote_launch(fn, &args[0], cores->c1);\n+\t\t\trte_eal_remote_launch(fn, &args[1], cores->c2);\n+\t\t\trte_eal_wait_lcore(cores->c1);\n+\t\t\trte_eal_wait_lcore(cores->c2);\n+\t\t}\n+\n+\t\tprintf(\"Average cycles per object push/pop (bulk size: %u): %.2F\\n\",\n+\t\t bulk_sizes[i], (args[0].avg + args[1].avg) / 2);\n+\t}\n+}\n+\n+/* Run bulk_push_pop() simultaneously on 1+ cores. */\n+static void\n+run_on_n_cores(struct rte_stack *s, lcore_function_t fn, int n)\n+{\n+\tstruct thread_args args[RTE_MAX_LCORE];\n+\tunsigned int i;\n+\n+\tfor (i = 0; i < ARRAY_SIZE(bulk_sizes); i++) {\n+\t\tunsigned int lcore_id;\n+\t\tint cnt = 0;\n+\t\tdouble avg;\n+\n+\t\trte_atomic32_set(&lcore_barrier, n);\n+\n+\t\tRTE_LCORE_FOREACH_SLAVE(lcore_id) {\n+\t\t\tif (++cnt >= n)\n+\t\t\t\tbreak;\n+\n+\t\t\targs[lcore_id].s = s;\n+\t\t\targs[lcore_id].sz = bulk_sizes[i];\n+\n+\t\t\tif (rte_eal_remote_launch(fn, &args[lcore_id],\n+\t\t\t\t\t\t lcore_id))\n+\t\t\t\trte_panic(\"Failed to launch lcore %d\\n\",\n+\t\t\t\t\t lcore_id);\n+\t\t}\n+\n+\t\tlcore_id = rte_lcore_id();\n+\n+\t\targs[lcore_id].s = s;\n+\t\targs[lcore_id].sz = bulk_sizes[i];\n+\n+\t\tfn(&args[lcore_id]);\n+\n+\t\trte_eal_mp_wait_lcore();\n+\n+\t\tavg = args[rte_lcore_id()].avg;\n+\n+\t\tcnt = 0;\n+\t\tRTE_LCORE_FOREACH_SLAVE(lcore_id) {\n+\t\t\tif (++cnt >= n)\n+\t\t\t\tbreak;\n+\t\t\tavg += args[lcore_id].avg;\n+\t\t}\n+\n+\t\tprintf(\"Average cycles per object push/pop (bulk size: %u): %.2F\\n\",\n+\t\t bulk_sizes[i], avg / n);\n+\t}\n+}\n+\n+/*\n+ * Measure the cycle cost of pushing and popping a single pointer on a single\n+ * lcore.\n+ */\n+static void\n+test_single_push_pop(struct rte_stack *s)\n+{\n+\tunsigned int iterations = 16000000;\n+\tvoid *obj = NULL;\n+\tunsigned int i;\n+\n+\tuint64_t start = rte_rdtsc();\n+\n+\tfor (i = 0; i < iterations; i++) {\n+\t\trte_stack_push(s, &obj, 1);\n+\t\trte_stack_pop(s, &obj, 1);\n+\t}\n+\n+\tuint64_t end = rte_rdtsc();\n+\n+\tprintf(\"Average cycles per single object push/pop: %.2F\\n\",\n+\t ((double)(end - start)) / iterations);\n+}\n+\n+/* Measure the cycle cost of bulk pushing and popping on a single lcore. */\n+static void\n+test_bulk_push_pop(struct rte_stack *s)\n+{\n+\tunsigned int iterations = 8000000;\n+\tvoid *objs[MAX_BURST];\n+\tunsigned int sz, i;\n+\n+\tfor (sz = 0; sz < ARRAY_SIZE(bulk_sizes); sz++) {\n+\t\tuint64_t start = rte_rdtsc();\n+\n+\t\tfor (i = 0; i < iterations; i++) {\n+\t\t\trte_stack_push(s, objs, bulk_sizes[sz]);\n+\t\t\trte_stack_pop(s, objs, bulk_sizes[sz]);\n+\t\t}\n+\n+\t\tuint64_t end = rte_rdtsc();\n+\n+\t\tdouble avg = ((double)(end - start) /\n+\t\t\t (iterations * bulk_sizes[sz]));\n+\n+\t\tprintf(\"Average cycles per object push/pop (bulk size: %u): %.2F\\n\",\n+\t\t bulk_sizes[sz], avg);\n+\t}\n+}\n+\n+static int\n+test_stack_perf(void)\n+{\n+\tstruct lcore_pair cores;\n+\tstruct rte_stack *s;\n+\n+\trte_atomic32_init(&lcore_barrier);\n+\n+\ts = rte_stack_create(STACK_NAME, STACK_SIZE, rte_socket_id(), 0);\n+\tif (s == NULL) {\n+\t\tprintf(\"[%s():%u] failed to create a stack\\n\",\n+\t\t __func__, __LINE__);\n+\t\treturn -1;\n+\t}\n+\n+\tprintf(\"### Testing single element push/pop ###\\n\");\n+\ttest_single_push_pop(s);\n+\n+\tprintf(\"\\n### Testing empty pop ###\\n\");\n+\ttest_empty_pop(s);\n+\n+\tprintf(\"\\n### Testing using a single lcore ###\\n\");\n+\ttest_bulk_push_pop(s);\n+\n+\tif (get_two_hyperthreads(&cores) == 0) {\n+\t\tprintf(\"\\n### Testing using two hyperthreads ###\\n\");\n+\t\trun_on_core_pair(&cores, s, bulk_push_pop);\n+\t}\n+\tif (get_two_cores(&cores) == 0) {\n+\t\tprintf(\"\\n### Testing using two physical cores ###\\n\");\n+\t\trun_on_core_pair(&cores, s, bulk_push_pop);\n+\t}\n+\tif (get_two_sockets(&cores) == 0) {\n+\t\tprintf(\"\\n### Testing using two NUMA nodes ###\\n\");\n+\t\trun_on_core_pair(&cores, s, bulk_push_pop);\n+\t}\n+\n+\tprintf(\"\\n### Testing on all %u lcores ###\\n\", rte_lcore_count());\n+\trun_on_n_cores(s, bulk_push_pop, rte_lcore_count());\n+\n+\trte_stack_free(s);\n+\treturn 0;\n+}\n+\n+REGISTER_TEST_COMMAND(stack_perf_autotest, test_stack_perf);\n", "prefixes": [ "v10", "4/8" ] }{ "id": 52276, "url": "