@@ -353,6 +353,7 @@ endforeach
# set other values pulled from the build options
dpdk_conf.set('RTE_MAX_ETHPORTS', get_option('max_ethports'))
dpdk_conf.set('RTE_LIBEAL_USE_HPET', get_option('use_hpet'))
+dpdk_conf.set('RTE_USE_CC_MEMCPY', get_option('use_cc_memcpy'))
dpdk_conf.set('RTE_ENABLE_STDATOMIC', get_option('enable_stdatomic'))
dpdk_conf.set('RTE_ENABLE_TRACE_FP', get_option('enable_trace_fp'))
dpdk_conf.set('RTE_PKTMBUF_HEADROOM', get_option('pkt_mbuf_headroom'))
@@ -24,6 +24,27 @@ DPDK Release 24.07
New Features
------------
+* **Compiler memcpy replaces custom DPDK implementation.**
+
+ The memory copy functions of ``<rte_memcpy.h>`` now delegates to the
+ standard memcpy() function, implemented by the compiler and the C
+ runtime (e.g., libc).
+
+ In this release of DPDK, the handcrafted, per-architecture memory
+ copy implementations are still available, and may be reactivated by
+ setting the new ``use_cc_memcpy`` build option to false.
+
+ The performance benefits of the custom DPDK rte_memcpy()
+ implementations have been diminishing with every new compiler
+ release, and with current toolchains the use of a custom memcpy()
+ implementation may even result in worse performance than the
+ standard memcpy().
+
+ An additional benefit of this change is that compilers and static
+ analysis tools have an easier time detecting incorrect usage of
+ rte_memcpy() (e.g., buffer overruns, or overlapping source and
+ destination buffers).
+
.. This section should contain new features added in this release.
Sample format:
@@ -5,10 +5,20 @@
#ifndef _RTE_MEMCPY_ARM_H_
#define _RTE_MEMCPY_ARM_H_
+#include <rte_config.h>
+
+#ifdef RTE_USE_CC_MEMCPY
+
+#include <generic/rte_memcpy.h>
+
+#else
+
#ifdef RTE_ARCH_64
#include <rte_memcpy_64.h>
#else
#include <rte_memcpy_32.h>
#endif
+#endif /* RTE_USE_CC_MEMCPY */
+
#endif /* _RTE_MEMCPY_ARM_H_ */
@@ -5,12 +5,19 @@
#ifndef _RTE_MEMCPY_H_
#define _RTE_MEMCPY_H_
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/**
* @file
*
* Functions for vectorised implementation of memcpy().
*/
+#include <stdint.h>
+#include <string.h>
+
/**
* Copy 16 bytes from one location to another using optimised
* instructions. The locations should not overlap.
@@ -35,8 +42,6 @@ rte_mov16(uint8_t *dst, const uint8_t *src);
static inline void
rte_mov32(uint8_t *dst, const uint8_t *src);
-#ifdef __DOXYGEN__
-
/**
* Copy 48 bytes from one location to another using optimised
* instructions. The locations should not overlap.
@@ -49,8 +54,6 @@ rte_mov32(uint8_t *dst, const uint8_t *src);
static inline void
rte_mov48(uint8_t *dst, const uint8_t *src);
-#endif /* __DOXYGEN__ */
-
/**
* Copy 64 bytes from one location to another using optimised
* instructions. The locations should not overlap.
@@ -87,8 +90,6 @@ rte_mov128(uint8_t *dst, const uint8_t *src);
static inline void
rte_mov256(uint8_t *dst, const uint8_t *src);
-#ifdef __DOXYGEN__
-
/**
* Copy bytes from one location to another. The locations must not overlap.
*
@@ -111,6 +112,52 @@ rte_mov256(uint8_t *dst, const uint8_t *src);
static void *
rte_memcpy(void *dst, const void *src, size_t n);
-#endif /* __DOXYGEN__ */
+#ifdef RTE_USE_CC_MEMCPY
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 16);
+}
+
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 32);
+}
+
+static inline void
+rte_mov48(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 48);
+}
+
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 64);
+}
+
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 128);
+}
+
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 256);
+}
+
+static inline void *
+rte_memcpy(void *dst, const void *src, size_t n)
+{
+ return memcpy(dst, src, n);
+}
+#endif /* RTE_USE_CC_MEMCPY */
+
+#ifdef __cplusplus
+}
+#endif
#endif /* _RTE_MEMCPY_H_ */
@@ -5,57 +5,12 @@
#ifndef RTE_MEMCPY_LOONGARCH_H
#define RTE_MEMCPY_LOONGARCH_H
-#include <stdint.h>
-#include <string.h>
+#include <rte_config.h>
-#include "rte_common.h"
-
-#ifdef __cplusplus
-extern "C" {
+#ifndef RTE_USE_CC_MEMCPY
+#define RTE_USE_CC_MEMCPY
#endif
-#include "generic/rte_memcpy.h"
-
-static inline void
-rte_mov16(uint8_t *dst, const uint8_t *src)
-{
- memcpy(dst, src, 16);
-}
-
-static inline void
-rte_mov32(uint8_t *dst, const uint8_t *src)
-{
- memcpy(dst, src, 32);
-}
-
-static inline void
-rte_mov48(uint8_t *dst, const uint8_t *src)
-{
- memcpy(dst, src, 48);
-}
-
-static inline void
-rte_mov64(uint8_t *dst, const uint8_t *src)
-{
- memcpy(dst, src, 64);
-}
-
-static inline void
-rte_mov128(uint8_t *dst, const uint8_t *src)
-{
- memcpy(dst, src, 128);
-}
-
-static inline void
-rte_mov256(uint8_t *dst, const uint8_t *src)
-{
- memcpy(dst, src, 256);
-}
-
-#define rte_memcpy(d, s, n) memcpy((d), (s), (n))
-
-#ifdef __cplusplus
-}
-#endif
+#include <generic/rte_memcpy.h>
#endif /* RTE_MEMCPY_LOONGARCH_H */
@@ -6,6 +6,14 @@
#ifndef _RTE_MEMCPY_PPC_64_H_
#define _RTE_MEMCPY_PPC_64_H_
+#include <rte_config.h>
+
+#ifdef RTE_USE_CC_MEMCPY
+
+#include <generic/rte_memcpy.h>
+
+#else
+
#include <stdint.h>
#include <string.h>
@@ -215,4 +223,6 @@ rte_memcpy_func(void *dst, const void *src, size_t n)
}
#endif
+#endif /* RTE_USE_CC_MEMCPY */
+
#endif /* _RTE_MEMCPY_PPC_64_H_ */
@@ -7,57 +7,12 @@
#ifndef RTE_MEMCPY_RISCV_H
#define RTE_MEMCPY_RISCV_H
-#include <stdint.h>
-#include <string.h>
+#include <rte_config.h>
-#include "rte_common.h"
-
-#ifdef __cplusplus
-extern "C" {
+#ifndef RTE_USE_CC_MEMCPY
+#define RTE_USE_CC_MEMCPY
#endif
-#include "generic/rte_memcpy.h"
-
-static inline void
-rte_mov16(uint8_t *dst, const uint8_t *src)
-{
- memcpy(dst, src, 16);
-}
-
-static inline void
-rte_mov32(uint8_t *dst, const uint8_t *src)
-{
- memcpy(dst, src, 32);
-}
-
-static inline void
-rte_mov48(uint8_t *dst, const uint8_t *src)
-{
- memcpy(dst, src, 48);
-}
-
-static inline void
-rte_mov64(uint8_t *dst, const uint8_t *src)
-{
- memcpy(dst, src, 64);
-}
-
-static inline void
-rte_mov128(uint8_t *dst, const uint8_t *src)
-{
- memcpy(dst, src, 128);
-}
-
-static inline void
-rte_mov256(uint8_t *dst, const uint8_t *src)
-{
- memcpy(dst, src, 256);
-}
-
-#define rte_memcpy(d, s, n) memcpy((d), (s), (n))
-
-#ifdef __cplusplus
-}
-#endif
+#include <generic/rte_memcpy.h>
#endif /* RTE_MEMCPY_RISCV_H */
@@ -16,6 +16,7 @@ arch_headers = files(
'rte_spinlock.h',
'rte_vect.h',
)
+
arch_indirect_headers = files(
'rte_atomic_32.h',
'rte_atomic_64.h',
@@ -11,12 +11,19 @@
* Functions for SSE/AVX/AVX2/AVX512 implementation of memcpy().
*/
+#include <rte_config.h>
+
+#ifdef RTE_USE_CC_MEMCPY
+
+#include <generic/rte_memcpy.h>
+
+#else
+
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <rte_vect.h>
#include <rte_common.h>
-#include <rte_config.h>
#ifdef __cplusplus
extern "C" {
@@ -878,4 +885,6 @@ rte_memcpy(void *dst, const void *src, size_t n)
}
#endif
+#endif /* RTE_USE_CC_MEMCPY */
+
#endif /* _RTE_MEMCPY_X86_64_H_ */
@@ -60,3 +60,5 @@ option('tests', type: 'boolean', value: true, description:
'build unit tests')
option('use_hpet', type: 'boolean', value: false, description:
'use HPET timer in EAL')
+option('use_cc_memcpy', type: 'boolean', value: true, description:
+ 'Have the functions of <rte_memcpy.h> delegate to compiler/libc memcpy() instead of using custom implementation.')