@@ -46,7 +46,7 @@ ifeq ($(CONFIG_RTE_INSECURE_FUNCTION_WARNING),y)
INC += rte_warnings.h
endif
-ARCH_INC := rte_atomic.h rte_atomic_arch.h rte_byteorder_arch.h rte_cycles_arch.h rte_prefetch_arch.h rte_spinlock_arch.h
+ARCH_INC := rte_atomic.h rte_atomic_arch.h rte_byteorder_arch.h rte_cycles_arch.h rte_prefetch_arch.h rte_spinlock_arch.h rte_memcpy_arch.h
SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include := $(addprefix include/,$(INC))
SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include/arch := \
new file mode 100644
@@ -0,0 +1,199 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMCPY_ARCH_H_
+#define _RTE_MEMCPY_ARCH_H_
+
+#include <stdint.h>
+#include <string.h>
+#include <emmintrin.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:593) /* Stop unused variable warning (reg_a etc). */
+#endif
+
+/**
+ * Copy 16 bytes from one location to another using optimised SSE
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ * Pointer to the destination of the data.
+ * @param src
+ * Pointer to the source data.
+ */
+static inline void
+rte_arch_mov16(uint8_t *dst, const uint8_t *src)
+{
+ __m128i reg_a;
+ asm volatile (
+ "movdqu (%[src]), %[reg_a]\n\t"
+ "movdqu %[reg_a], (%[dst])\n\t"
+ : [reg_a] "=x" (reg_a)
+ : [src] "r" (src),
+ [dst] "r"(dst)
+ : "memory"
+ );
+}
+
+/**
+ * Copy 32 bytes from one location to another using optimised SSE
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ * Pointer to the destination of the data.
+ * @param src
+ * Pointer to the source data.
+ */
+static inline void
+rte_arch_mov32(uint8_t *dst, const uint8_t *src)
+{
+ __m128i reg_a, reg_b;
+ asm volatile (
+ "movdqu (%[src]), %[reg_a]\n\t"
+ "movdqu 16(%[src]), %[reg_b]\n\t"
+ "movdqu %[reg_a], (%[dst])\n\t"
+ "movdqu %[reg_b], 16(%[dst])\n\t"
+ : [reg_a] "=x" (reg_a),
+ [reg_b] "=x" (reg_b)
+ : [src] "r" (src),
+ [dst] "r"(dst)
+ : "memory"
+ );
+}
+
+/**
+ * Copy 48 bytes from one location to another using optimised SSE
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ * Pointer to the destination of the data.
+ * @param src
+ * Pointer to the source data.
+ */
+static inline void
+rte_arch_mov48(uint8_t *dst, const uint8_t *src)
+{
+ __m128i reg_a, reg_b, reg_c;
+ asm volatile (
+ "movdqu (%[src]), %[reg_a]\n\t"
+ "movdqu 16(%[src]), %[reg_b]\n\t"
+ "movdqu 32(%[src]), %[reg_c]\n\t"
+ "movdqu %[reg_a], (%[dst])\n\t"
+ "movdqu %[reg_b], 16(%[dst])\n\t"
+ "movdqu %[reg_c], 32(%[dst])\n\t"
+ : [reg_a] "=x" (reg_a),
+ [reg_b] "=x" (reg_b),
+ [reg_c] "=x" (reg_c)
+ : [src] "r" (src),
+ [dst] "r"(dst)
+ : "memory"
+ );
+}
+
+/**
+ * Copy 64 bytes from one location to another using optimised SSE
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ * Pointer to the destination of the data.
+ * @param src
+ * Pointer to the source data.
+ */
+static inline void
+rte_arch_mov64(uint8_t *dst, const uint8_t *src)
+{
+ __m128i reg_a, reg_b, reg_c, reg_d;
+ asm volatile (
+ "movdqu (%[src]), %[reg_a]\n\t"
+ "movdqu 16(%[src]), %[reg_b]\n\t"
+ "movdqu 32(%[src]), %[reg_c]\n\t"
+ "movdqu 48(%[src]), %[reg_d]\n\t"
+ "movdqu %[reg_a], (%[dst])\n\t"
+ "movdqu %[reg_b], 16(%[dst])\n\t"
+ "movdqu %[reg_c], 32(%[dst])\n\t"
+ "movdqu %[reg_d], 48(%[dst])\n\t"
+ : [reg_a] "=x" (reg_a),
+ [reg_b] "=x" (reg_b),
+ [reg_c] "=x" (reg_c),
+ [reg_d] "=x" (reg_d)
+ : [src] "r" (src),
+ [dst] "r"(dst)
+ : "memory"
+ );
+}
+
+/**
+ * Copy 128 bytes from one location to another using optimised SSE
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ * Pointer to the destination of the data.
+ * @param src
+ * Pointer to the source data.
+ */
+static inline void
+rte_arch_mov128(uint8_t *dst, const uint8_t *src)
+{
+ __m128i reg_a, reg_b, reg_c, reg_d, reg_e, reg_f, reg_g, reg_h;
+ asm volatile (
+ "movdqu (%[src]), %[reg_a]\n\t"
+ "movdqu 16(%[src]), %[reg_b]\n\t"
+ "movdqu 32(%[src]), %[reg_c]\n\t"
+ "movdqu 48(%[src]), %[reg_d]\n\t"
+ "movdqu 64(%[src]), %[reg_e]\n\t"
+ "movdqu 80(%[src]), %[reg_f]\n\t"
+ "movdqu 96(%[src]), %[reg_g]\n\t"
+ "movdqu 112(%[src]), %[reg_h]\n\t"
+ "movdqu %[reg_a], (%[dst])\n\t"
+ "movdqu %[reg_b], 16(%[dst])\n\t"
+ "movdqu %[reg_c], 32(%[dst])\n\t"
+ "movdqu %[reg_d], 48(%[dst])\n\t"
+ "movdqu %[reg_e], 64(%[dst])\n\t"
+ "movdqu %[reg_f], 80(%[dst])\n\t"
+ "movdqu %[reg_g], 96(%[dst])\n\t"
+ "movdqu %[reg_h], 112(%[dst])\n\t"
+ : [reg_a] "=x" (reg_a),
+ [reg_b] "=x" (reg_b),
+ [reg_c] "=x" (reg_c),
+ [reg_d] "=x" (reg_d),
+ [reg_e] "=x" (reg_e),
+ [reg_f] "=x" (reg_f),
+ [reg_g] "=x" (reg_g),
+ [reg_h] "=x" (reg_h)
+ : [src] "r" (src),
+ [dst] "r"(dst)
+ : "memory"
+ );
+}
+
+#endif /* _RTE_MEMCPY_ARCH_H_ */
\ No newline at end of file
@@ -37,12 +37,10 @@
/**
* @file
*
- * Functions for SSE implementation of memcpy().
+ * Functions for vector instruction implementation of memcpy().
*/
-#include <stdint.h>
-#include <string.h>
-#include <emmintrin.h>
+#include "arch/rte_memcpy_arch.h"
#ifdef __cplusplus
extern "C" {
@@ -64,15 +62,7 @@ extern "C" {
static inline void
rte_mov16(uint8_t *dst, const uint8_t *src)
{
- __m128i reg_a;
- asm volatile (
- "movdqu (%[src]), %[reg_a]\n\t"
- "movdqu %[reg_a], (%[dst])\n\t"
- : [reg_a] "=x" (reg_a)
- : [src] "r" (src),
- [dst] "r"(dst)
- : "memory"
- );
+ rte_arch_mov16(dst, src);
}
/**
@@ -87,18 +77,7 @@ rte_mov16(uint8_t *dst, const uint8_t *src)
static inline void
rte_mov32(uint8_t *dst, const uint8_t *src)
{
- __m128i reg_a, reg_b;
- asm volatile (
- "movdqu (%[src]), %[reg_a]\n\t"
- "movdqu 16(%[src]), %[reg_b]\n\t"
- "movdqu %[reg_a], (%[dst])\n\t"
- "movdqu %[reg_b], 16(%[dst])\n\t"
- : [reg_a] "=x" (reg_a),
- [reg_b] "=x" (reg_b)
- : [src] "r" (src),
- [dst] "r"(dst)
- : "memory"
- );
+ rte_arch_mov32(dst, src);
}
/**
@@ -113,21 +92,7 @@ rte_mov32(uint8_t *dst, const uint8_t *src)
static inline void
rte_mov48(uint8_t *dst, const uint8_t *src)
{
- __m128i reg_a, reg_b, reg_c;
- asm volatile (
- "movdqu (%[src]), %[reg_a]\n\t"
- "movdqu 16(%[src]), %[reg_b]\n\t"
- "movdqu 32(%[src]), %[reg_c]\n\t"
- "movdqu %[reg_a], (%[dst])\n\t"
- "movdqu %[reg_b], 16(%[dst])\n\t"
- "movdqu %[reg_c], 32(%[dst])\n\t"
- : [reg_a] "=x" (reg_a),
- [reg_b] "=x" (reg_b),
- [reg_c] "=x" (reg_c)
- : [src] "r" (src),
- [dst] "r"(dst)
- : "memory"
- );
+ rte_arch_mov48(dst, src);
}
/**
@@ -142,24 +107,7 @@ rte_mov48(uint8_t *dst, const uint8_t *src)
static inline void
rte_mov64(uint8_t *dst, const uint8_t *src)
{
- __m128i reg_a, reg_b, reg_c, reg_d;
- asm volatile (
- "movdqu (%[src]), %[reg_a]\n\t"
- "movdqu 16(%[src]), %[reg_b]\n\t"
- "movdqu 32(%[src]), %[reg_c]\n\t"
- "movdqu 48(%[src]), %[reg_d]\n\t"
- "movdqu %[reg_a], (%[dst])\n\t"
- "movdqu %[reg_b], 16(%[dst])\n\t"
- "movdqu %[reg_c], 32(%[dst])\n\t"
- "movdqu %[reg_d], 48(%[dst])\n\t"
- : [reg_a] "=x" (reg_a),
- [reg_b] "=x" (reg_b),
- [reg_c] "=x" (reg_c),
- [reg_d] "=x" (reg_d)
- : [src] "r" (src),
- [dst] "r"(dst)
- : "memory"
- );
+ rte_arch_mov64(dst, src);
}
/**
@@ -174,36 +122,7 @@ rte_mov64(uint8_t *dst, const uint8_t *src)
static inline void
rte_mov128(uint8_t *dst, const uint8_t *src)
{
- __m128i reg_a, reg_b, reg_c, reg_d, reg_e, reg_f, reg_g, reg_h;
- asm volatile (
- "movdqu (%[src]), %[reg_a]\n\t"
- "movdqu 16(%[src]), %[reg_b]\n\t"
- "movdqu 32(%[src]), %[reg_c]\n\t"
- "movdqu 48(%[src]), %[reg_d]\n\t"
- "movdqu 64(%[src]), %[reg_e]\n\t"
- "movdqu 80(%[src]), %[reg_f]\n\t"
- "movdqu 96(%[src]), %[reg_g]\n\t"
- "movdqu 112(%[src]), %[reg_h]\n\t"
- "movdqu %[reg_a], (%[dst])\n\t"
- "movdqu %[reg_b], 16(%[dst])\n\t"
- "movdqu %[reg_c], 32(%[dst])\n\t"
- "movdqu %[reg_d], 48(%[dst])\n\t"
- "movdqu %[reg_e], 64(%[dst])\n\t"
- "movdqu %[reg_f], 80(%[dst])\n\t"
- "movdqu %[reg_g], 96(%[dst])\n\t"
- "movdqu %[reg_h], 112(%[dst])\n\t"
- : [reg_a] "=x" (reg_a),
- [reg_b] "=x" (reg_b),
- [reg_c] "=x" (reg_c),
- [reg_d] "=x" (reg_d),
- [reg_e] "=x" (reg_e),
- [reg_f] "=x" (reg_f),
- [reg_g] "=x" (reg_g),
- [reg_h] "=x" (reg_h)
- : [src] "r" (src),
- [dst] "r"(dst)
- : "memory"
- );
+ rte_arch_mov128(dst, src);
}
#ifdef __INTEL_COMPILER
new file mode 100644
@@ -0,0 +1,199 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMCPY_ARCH_H_
+#define _RTE_MEMCPY_ARCH_H_
+
+#include <stdint.h>
+#include <string.h>
+#include <emmintrin.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:593) /* Stop unused variable warning (reg_a etc). */
+#endif
+
+/**
+ * Copy 16 bytes from one location to another using optimised SSE
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ * Pointer to the destination of the data.
+ * @param src
+ * Pointer to the source data.
+ */
+static inline void
+rte_arch_mov16(uint8_t *dst, const uint8_t *src)
+{
+ __m128i reg_a;
+ asm volatile (
+ "movdqu (%[src]), %[reg_a]\n\t"
+ "movdqu %[reg_a], (%[dst])\n\t"
+ : [reg_a] "=x" (reg_a)
+ : [src] "r" (src),
+ [dst] "r"(dst)
+ : "memory"
+ );
+}
+
+/**
+ * Copy 32 bytes from one location to another using optimised SSE
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ * Pointer to the destination of the data.
+ * @param src
+ * Pointer to the source data.
+ */
+static inline void
+rte_arch_mov32(uint8_t *dst, const uint8_t *src)
+{
+ __m128i reg_a, reg_b;
+ asm volatile (
+ "movdqu (%[src]), %[reg_a]\n\t"
+ "movdqu 16(%[src]), %[reg_b]\n\t"
+ "movdqu %[reg_a], (%[dst])\n\t"
+ "movdqu %[reg_b], 16(%[dst])\n\t"
+ : [reg_a] "=x" (reg_a),
+ [reg_b] "=x" (reg_b)
+ : [src] "r" (src),
+ [dst] "r"(dst)
+ : "memory"
+ );
+}
+
+/**
+ * Copy 48 bytes from one location to another using optimised SSE
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ * Pointer to the destination of the data.
+ * @param src
+ * Pointer to the source data.
+ */
+static inline void
+rte_arch_mov48(uint8_t *dst, const uint8_t *src)
+{
+ __m128i reg_a, reg_b, reg_c;
+ asm volatile (
+ "movdqu (%[src]), %[reg_a]\n\t"
+ "movdqu 16(%[src]), %[reg_b]\n\t"
+ "movdqu 32(%[src]), %[reg_c]\n\t"
+ "movdqu %[reg_a], (%[dst])\n\t"
+ "movdqu %[reg_b], 16(%[dst])\n\t"
+ "movdqu %[reg_c], 32(%[dst])\n\t"
+ : [reg_a] "=x" (reg_a),
+ [reg_b] "=x" (reg_b),
+ [reg_c] "=x" (reg_c)
+ : [src] "r" (src),
+ [dst] "r"(dst)
+ : "memory"
+ );
+}
+
+/**
+ * Copy 64 bytes from one location to another using optimised SSE
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ * Pointer to the destination of the data.
+ * @param src
+ * Pointer to the source data.
+ */
+static inline void
+rte_arch_mov64(uint8_t *dst, const uint8_t *src)
+{
+ __m128i reg_a, reg_b, reg_c, reg_d;
+ asm volatile (
+ "movdqu (%[src]), %[reg_a]\n\t"
+ "movdqu 16(%[src]), %[reg_b]\n\t"
+ "movdqu 32(%[src]), %[reg_c]\n\t"
+ "movdqu 48(%[src]), %[reg_d]\n\t"
+ "movdqu %[reg_a], (%[dst])\n\t"
+ "movdqu %[reg_b], 16(%[dst])\n\t"
+ "movdqu %[reg_c], 32(%[dst])\n\t"
+ "movdqu %[reg_d], 48(%[dst])\n\t"
+ : [reg_a] "=x" (reg_a),
+ [reg_b] "=x" (reg_b),
+ [reg_c] "=x" (reg_c),
+ [reg_d] "=x" (reg_d)
+ : [src] "r" (src),
+ [dst] "r"(dst)
+ : "memory"
+ );
+}
+
+/**
+ * Copy 128 bytes from one location to another using optimised SSE
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ * Pointer to the destination of the data.
+ * @param src
+ * Pointer to the source data.
+ */
+static inline void
+rte_arch_mov128(uint8_t *dst, const uint8_t *src)
+{
+ __m128i reg_a, reg_b, reg_c, reg_d, reg_e, reg_f, reg_g, reg_h;
+ asm volatile (
+ "movdqu (%[src]), %[reg_a]\n\t"
+ "movdqu 16(%[src]), %[reg_b]\n\t"
+ "movdqu 32(%[src]), %[reg_c]\n\t"
+ "movdqu 48(%[src]), %[reg_d]\n\t"
+ "movdqu 64(%[src]), %[reg_e]\n\t"
+ "movdqu 80(%[src]), %[reg_f]\n\t"
+ "movdqu 96(%[src]), %[reg_g]\n\t"
+ "movdqu 112(%[src]), %[reg_h]\n\t"
+ "movdqu %[reg_a], (%[dst])\n\t"
+ "movdqu %[reg_b], 16(%[dst])\n\t"
+ "movdqu %[reg_c], 32(%[dst])\n\t"
+ "movdqu %[reg_d], 48(%[dst])\n\t"
+ "movdqu %[reg_e], 64(%[dst])\n\t"
+ "movdqu %[reg_f], 80(%[dst])\n\t"
+ "movdqu %[reg_g], 96(%[dst])\n\t"
+ "movdqu %[reg_h], 112(%[dst])\n\t"
+ : [reg_a] "=x" (reg_a),
+ [reg_b] "=x" (reg_b),
+ [reg_c] "=x" (reg_c),
+ [reg_d] "=x" (reg_d),
+ [reg_e] "=x" (reg_e),
+ [reg_f] "=x" (reg_f),
+ [reg_g] "=x" (reg_g),
+ [reg_h] "=x" (reg_h)
+ : [src] "r" (src),
+ [dst] "r"(dst)
+ : "memory"
+ );
+}
+
+#endif /* _RTE_MEMCPY_ARCH_H_ */