[dpdk-dev,02/12] Add atomic operations for IBM Power architecture

Message ID 1411724186-8036-3-git-send-email-bjzhuc@cn.ibm.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Chao Zhu Sept. 26, 2014, 9:36 a.m. UTC
  The atomic operations implemented with assembly code in DPDK only
support x86. This patch add architecture specific atomic operations for
IBM Power architecture.

Signed-off-by: Chao Zhu <bjzhuc@cn.ibm.com>
---
 .../common/include/powerpc/arch/rte_atomic.h       |  387 ++++++++++++++++++++
 .../common/include/powerpc/arch/rte_atomic_arch.h  |  318 ++++++++++++++++
 2 files changed, 705 insertions(+), 0 deletions(-)
 create mode 100644 lib/librte_eal/common/include/powerpc/arch/rte_atomic.h
 create mode 100644 lib/librte_eal/common/include/powerpc/arch/rte_atomic_arch.h
  

Comments

Hemant Agrawal Sept. 29, 2014, 6:16 a.m. UTC | #1
Hi Chao,

This Patch seems to be incomplete. You may also need to patch the librte_eal\common\include\rte_atomic.h 
e.g.
#if !(defined RTE_ARCH_X86_64) || !(defined RTE_ARCH_I686)
#include <arch/rte_atomic.h>
#else /* if Intel*/

Otherwise you shall be getting compilation errors for "_mm_mfence"

Similar is true for other common header files as well.


Regards,
Hemant

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Chao Zhu
> Sent: 26/Sep/2014 3:06 PM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH 02/12] Add atomic operations for IBM Power
> architecture
> 
> The atomic operations implemented with assembly code in DPDK only support
> x86. This patch add architecture specific atomic operations for IBM Power
> architecture.
> 
> Signed-off-by: Chao Zhu <bjzhuc@cn.ibm.com>
> ---
>  .../common/include/powerpc/arch/rte_atomic.h       |  387
> ++++++++++++++++++++
>  .../common/include/powerpc/arch/rte_atomic_arch.h  |  318
> ++++++++++++++++
>  2 files changed, 705 insertions(+), 0 deletions(-)  create mode 100644
> lib/librte_eal/common/include/powerpc/arch/rte_atomic.h
>  create mode 100644
> lib/librte_eal/common/include/powerpc/arch/rte_atomic_arch.h
> 
> diff --git a/lib/librte_eal/common/include/powerpc/arch/rte_atomic.h
> b/lib/librte_eal/common/include/powerpc/arch/rte_atomic.h
> new file mode 100644
> index 0000000..7f5214e
> --- /dev/null
> +++ b/lib/librte_eal/common/include/powerpc/arch/rte_atomic.h
> @@ -0,0 +1,387 @@
> +/*
> + *   BSD LICENSE
> + *
> + *   Copyright (C) IBM Corporation 2014.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of IBM Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
> NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
> NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
> OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
> ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> +*/
> +
> +/*
> + * Inspired from FreeBSD src/sys/powerpc/include/atomic.h
> + * Copyright (c) 2008 Marcel Moolenaar
> + * Copyright (c) 2001 Benno Rice
> + * Copyright (c) 2001 David E. O'Brien
> + * Copyright (c) 1998 Doug Rabson
> + * All rights reserved.
> + */
> +
> +#ifndef _RTE_ATOMIC_H_
> +#error "don't include this file directly, please include generic <rte_atomic.h>"
> +#endif
> +
> +#ifndef _RTE_POWERPC_64_ATOMIC_H_
> +#define _RTE_POWERPC_64_ATOMIC_H_
> +
> +/*------------------------- 64 bit atomic operations
> +-------------------------*/
> +
> +/**
> + * An atomic compare and set function used by the mutex functions.
> + * (atomic) equivalent to:
> + *   if (*dst == exp)
> + *     *dst = src (all 64-bit words)
> + *
> + * @param dst
> + *   The destination into which the value will be written.
> + * @param exp
> + *   The expected value.
> + * @param src
> + *   The new value.
> + * @return
> + *   Non-zero on success; 0 on failure.
> + */
> +static inline int
> +rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
> +{
> +	unsigned int ret = 0;
> +
> +	asm volatile (
> +			"\tlwsync\n"
> +			"1: ldarx %[ret], 0, %[dst]\n"
> +			"cmpld %[exp], %[ret]\n"
> +			"bne 2f\n"
> +			"stdcx. %[src], 0, %[dst]\n"
> +			"bne- 1b\n"
> +			"li %[ret], 1\n"
> +			"b 3f\n"
> +			"2:\n"
> +			"stdcx. %[ret], 0, %[dst]\n"
> +			"li %[ret], 0\n"
> +			"3:\n"
> +			"isync\n"
> +			: [ret] "=&r" (ret), "=m" (*dst)
> +			: [dst] "r" (dst), [exp] "r" (exp), [src] "r" (src), "m" (*dst)
> +			: "cc", "memory");
> +	return ret;
> +}
> +
> +/**
> + * The atomic counter structure.
> + */
> +typedef struct {
> +	volatile int64_t cnt;  /**< Internal counter value. */ }
> +rte_atomic64_t;
> +
> +/**
> + * Static initializer for an atomic counter.
> + */
> +#define RTE_ATOMIC64_INIT(val) { (val) }
> +
> +/**
> + * Initialize the atomic counter.
> + *
> + * @param v
> + *   A pointer to the atomic counter.
> + */
> +static inline void
> +rte_atomic64_init(rte_atomic64_t *v)
> +{
> +	v->cnt = 0;
> +}
> +
> +/**
> + * Atomically read a 64-bit counter.
> + *
> + * @param v
> + *   A pointer to the atomic counter.
> + * @return
> + *   The value of the counter.
> + */
> +static inline int64_t
> +rte_atomic64_read(rte_atomic64_t *v)
> +{
> +	long ret;
> +
> +	asm volatile("ld%U1%X1 %[ret],%[cnt]" : [ret] "=r"(ret) : [cnt]
> +"m"(v->cnt));
> +
> +	return ret;
> +}
> +
> +/**
> + * Atomically set a 64-bit counter.
> + *
> + * @param v
> + *   A pointer to the atomic counter.
> + * @param new_value
> + *   The new value of the counter.
> + */
> +static inline void
> +rte_atomic64_set(rte_atomic64_t *v, int64_t new_value) {
> +	asm volatile("std%U0%X0 %[new_value],%[cnt]" : [cnt] "=m"(v->cnt) :
> +[new_value] "r"(new_value)); }
> +
> +/**
> + * Atomically add a 64-bit value to a counter.
> + *
> + * @param v
> + *   A pointer to the atomic counter.
> + * @param inc
> + *   The value to be added to the counter.
> + */
> +static inline void
> +rte_atomic64_add(rte_atomic64_t *v, int64_t inc) {
> +	long t;
> +
> +	asm volatile(
> +			"1: ldarx %[t],0,%[cnt]\n"
> +			"add %[t],%[inc],%[t]\n"
> +			"stdcx. %[t],0,%[cnt]\n"
> +			"bne- 1b\n"
> +			: [t] "=&r" (t), "=m" (v->cnt)
> +			: [cnt] "r" (&v->cnt), [inc] "r" (inc), "m" (v->cnt)
> +			: "cc", "memory");
> +}
> +
> +/**
> + * Atomically subtract a 64-bit value from a counter.
> + *
> + * @param v
> + *   A pointer to the atomic counter.
> + * @param dec
> + *   The value to be subtracted from the counter.
> + */
> +static inline void
> +rte_atomic64_sub(rte_atomic64_t *v, int64_t dec) {
> +	long t;
> +
> +	asm volatile(
> +			"1: ldarx %[t],0,%[cnt]\n"
> +			"subf %[t],%[dec],%[t]\n"
> +			"stdcx. %[t],0,%[cnt]\n"
> +			"bne- 1b\n"
> +			: [t] "=&r" (t), "+m" (v->cnt)
> +			: [cnt] "r" (&v->cnt), [dec] "r" (dec), "m" (v->cnt)
> +			: "cc", "memory");
> +}
> +
> +/**
> + * Atomically increment a 64-bit counter by one and test.
> + *
> + * @param v
> + *   A pointer to the atomic counter.
> + */
> +static inline void
> +rte_atomic64_inc(rte_atomic64_t *v)
> +{
> +	long t;
> +
> +	asm volatile(
> +			"1: ldarx %[t],0,%[cnt]\n"
> +			"addic %[t],%[t],1\n"
> +			"stdcx. %[t],0,%[cnt] \n"
> +			"bne- 1b\n"
> +			: [t] "=&r" (t), "+m" (v->cnt)
> +			: [cnt] "r" (&v->cnt), "m" (v->cnt)
> +			: "cc", "xer", "memory");
> +}
> +
> +/**
> + * Atomically decrement a 64-bit counter by one and test.
> + *
> + * @param v
> + *   A pointer to the atomic counter.
> + */
> +static inline void
> +rte_atomic64_dec(rte_atomic64_t *v)
> +{
> +	long t;
> +
> +	asm volatile(
> +			"1: ldarx %[t],0,%[cnt]\n"
> +			"addic %[t],%[t],-1\n"
> +			"stdcx. %[t],0,%[cnt]\n"
> +			"bne- 1b\n"
> +			: [t] "=&r" (t), "+m" (v->cnt)
> +			: [cnt] "r" (&v->cnt), "m" (v->cnt)
> +			: "cc", "xer", "memory");
> +}
> +
> +/**
> + * Add a 64-bit value to an atomic counter and return the result.
> + *
> + * Atomically adds the 64-bit value (inc) to the atomic counter (v) and
> + * returns the value of v after the addition.
> + *
> + * @param v
> + *   A pointer to the atomic counter.
> + * @param inc
> + *   The value to be added to the counter.
> + * @return
> + *   The value of v after the addition.
> + */
> +static inline int64_t
> +rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc) {
> +	long ret;
> +
> +	asm volatile(
> +			"\n\tlwsync\n"
> +			"1: ldarx %[ret],0,%[cnt]\n"
> +			"add %[ret],%[inc],%[ret]\n"
> +			"stdcx. %[ret],0,%[cnt]\n"
> +			"bne- 1b\n"
> +			"isync\n"
> +			: [ret] "=&r" (ret)
> +			: [inc] "r" (inc), [cnt] "r" (&v->cnt)
> +			: "cc", "memory");
> +
> +	return ret;
> +}
> +
> +/**
> + * Subtract a 64-bit value from an atomic counter and return the result.
> + *
> + * Atomically subtracts the 64-bit value (dec) from the atomic counter
> +(v)
> + * and returns the value of v after the subtraction.
> + *
> + * @param v
> + *   A pointer to the atomic counter.
> + * @param dec
> + *   The value to be subtracted from the counter.
> + * @return
> + *   The value of v after the subtraction.
> + */
> +static inline int64_t
> +rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec) {
> +	long ret;
> +
> +	asm volatile(
> +			"\n\tlwsync\n"
> +			"1: ldarx %[ret],0,%[cnt]\n"
> +			"subf %[ret],%[dec],%[ret]\n"
> +			"stdcx. %[ret],0,%[cnt] \n"
> +			"bne- 1b\n"
> +			"isync\n"
> +			: [ret] "=&r" (ret)
> +			: [dec] "r" (dec), [cnt] "r" (&v->cnt)
> +			: "cc", "memory");
> +
> +	return ret;
> +}
> +
> +static __inline__ long rte_atomic64_inc_return(rte_atomic64_t *v) {
> +	long ret;
> +
> +	asm volatile(
> +			"\n\tlwsync\n"
> +			"1: ldarx %[ret],0,%[cnt]\n"
> +			"addic %[ret],%[ret],1\n"
> +			"stdcx. %[ret],0,%[cnt]\n"
> +			"bne- 1b\n"
> +			"isync\n"
> +			: [ret] "=&r" (ret)
> +			: [cnt] "r" (&v->cnt)
> +			: "cc", "xer", "memory");
> +
> +	return ret;
> +}
> +/**
> + * Atomically increment a 64-bit counter by one and test.
> + *
> + * Atomically increments the atomic counter (v) by one and returns
> + * true if the result is 0, or false in all other cases.
> + *
> + * @param v
> + *   A pointer to the atomic counter.
> + * @return
> + *   True if the result after the addition is 0; false otherwise.
> + */
> +#define rte_atomic64_inc_and_test(v) (rte_atomic64_inc_return(v) == 0)
> +
> +static __inline__ long rte_atomic64_dec_return(rte_atomic64_t *v) {
> +	long ret;
> +
> +	asm volatile(
> +			"\n\tlwsync\n"
> +			"1: ldarx %[ret],0,%[cnt]\n"
> +			"addic %[ret],%[ret],-1\n"
> +			"stdcx. %[ret],0,%[cnt]\n"
> +			"bne- 1b\n"
> +			"isync\n"
> +			: [ret] "=&r" (ret)
> +			: [cnt] "r" (&v->cnt)
> +			: "cc", "xer", "memory");
> +
> +	return ret;
> +}
> +/**
> + * Atomically decrement a 64-bit counter by one and test.
> + *
> + * Atomically decrements the atomic counter (v) by one and returns true
> +if
> + * the result is 0, or false in all other cases.
> + *
> + * @param v
> + *   A pointer to the atomic counter.
> + * @return
> + *   True if the result after subtraction is 0; false otherwise.
> + */
> +#define rte_atomic64_dec_and_test(v)    (rte_atomic64_dec_return((v)) ==
> 0)
> +
> +/**
> + * Atomically test and set a 64-bit atomic counter.
> + *
> + * If the counter value is already set, return 0 (failed). Otherwise,
> +set
> + * the counter value to 1 and return 1 (success).
> + *
> + * @param v
> + *   A pointer to the atomic counter.
> + * @return
> + *   0 if failed; else 1, success.
> + */
> +static inline int rte_atomic64_test_and_set(rte_atomic64_t *v) {
> +	return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1); }
> +
> +/**
> + * Atomically set a 64-bit counter to 0.
> + *
> + * @param v
> + *   A pointer to the atomic counter.
> + */
> +static inline void rte_atomic64_clear(rte_atomic64_t *v) {
> +	v->cnt = 0;
> +}
> +
> +#endif /* _RTE_POWERPC_64_ATOMIC_H_ */
> +
> diff --git a/lib/librte_eal/common/include/powerpc/arch/rte_atomic_arch.h
> b/lib/librte_eal/common/include/powerpc/arch/rte_atomic_arch.h
> new file mode 100644
> index 0000000..fe5666e
> --- /dev/null
> +++ b/lib/librte_eal/common/include/powerpc/arch/rte_atomic_arch.h
> @@ -0,0 +1,318 @@
> +/*
> + *   BSD LICENSE
> + *
> + *   Copyright (C) IBM Corporation 2014.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of IBM Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
> NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
> NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
> OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
> ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> +*/
> +
> +/*
> + * Inspired from FreeBSD src/sys/powerpc/include/atomic.h
> + * Copyright (c) 2008 Marcel Moolenaar
> + * Copyright (c) 2001 Benno Rice
> + * Copyright (c) 2001 David E. O'Brien
> + * Copyright (c) 1998 Doug Rabson
> + * All rights reserved.
> + */
> +
> +#ifndef _RTE_ATOMIC_H_
> +#error "don't include this file directly, please include generic <rte_atomic.h>"
> +#endif
> +
> +#ifndef _RTE_ATOMIC_ARCH_H_
> +#define _RTE_ATOMIC_ARCH_H_
> +
> +#include <stdint.h>
> +
> +/**
> + * General memory barrier.
> + *
> + * Guarantees that the LOAD and STORE operations generated before the
> + * barrier occur before the LOAD and STORE operations generated after.
> + */
> +#define	rte_arch_mb()  asm volatile("sync" : : : "memory")
> +
> +/**
> + * Write memory barrier.
> + *
> + * Guarantees that the STORE operations generated before the barrier
> + * occur before the STORE operations generated after.
> + */
> +#define	rte_arch_wmb() asm volatile("sync" : : : "memory")
> +
> +/**
> + * Read memory barrier.
> + *
> + * Guarantees that the LOAD operations generated before the barrier
> + * occur before the LOAD operations generated after.
> + */
> +#define	rte_arch_rmb() asm volatile("sync" : : : "memory")
> +
> +#define	rte_arch_compiler_barrier() do {		\
> +	asm volatile ("" : : : "memory");	\
> +} while(0)
> +
> +/*------------------------- 16 bit atomic operations
> +-------------------------*/
> +
> +/**
> + * The atomic counter structure.
> + */
> +typedef struct {
> +	volatile int16_t cnt; /**< An internal counter value. */ }
> +rte_atomic16_t;
> +
> +/**
> + * Atomic compare and set.
> + *
> + * (atomic) equivalent to:
> + *   if (*dst == exp)
> + *     *dst = src (all 16-bit words)
> + *
> + * @param dst
> + *   The destination location into which the value will be written.
> + * @param exp
> + *   The expected value.
> + * @param src
> + *   The new value.
> + * @return
> + *   Non-zero on success; 0 on failure.
> + */
> +static inline int
> +rte_arch_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t
> +src) {
> +	return __atomic_compare_exchange(dst, &exp, &src, 0,
> __ATOMIC_ACQUIRE,
> +__ATOMIC_ACQUIRE) ? 1 : 0; }
> +
> +/**
> + * Atomically increment a counter by one.
> + *
> + * @param v
> + *   A pointer to the atomic counter.
> + */
> +static inline void
> +rte_arch_atomic16_inc(rte_atomic16_t *v) {
> +	__atomic_add_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE); }
> +
> +/**
> + * Atomically decrement a counter by one.
> + *
> + * @param v
> + *   A pointer to the atomic counter.
> + */
> +static inline void
> +rte_arch_atomic16_dec(rte_atomic16_t *v) {
> +	__atomic_sub_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE); }
> +
> +/**
> + * Atomically increment a 16-bit counter by one and test.
> + *
> + * Atomically increments the atomic counter (v) by one and returns true
> +if
> + * the result is 0, or false in all other cases.
> + *
> + * @param v
> + *   A pointer to the atomic counter.
> + * @return
> + *   True if the result after the increment operation is 0; false otherwise.
> + */
> +static inline int rte_arch_atomic16_inc_and_test(rte_atomic16_t *v) {
> +	return (__atomic_add_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) == 0); }
> +
> +/**
> + * Atomically decrement a 16-bit counter by one and test.
> + *
> + * Atomically decrements the atomic counter (v) by one and returns true
> +if
> + * the result is 0, or false in all other cases.
> + *
> + * @param v
> + *   A pointer to the atomic counter.
> + * @return
> + *   True if the result after the decrement operation is 0; false otherwise.
> + */
> +static inline int rte_arch_atomic16_dec_and_test(rte_atomic16_t *v) {
> +	return (__atomic_sub_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) == 0); }
> +
> +/*------------------------- 32 bit atomic operations
> +-------------------------*/
> +
> +/**
> + * The atomic counter structure.
> + */
> +typedef struct {
> +	volatile int32_t cnt; /**< An internal counter value. */ }
> +rte_atomic32_t;
> +
> +/**
> + * Atomic compare and set.
> + *
> + * (atomic) equivalent to:
> + *   if (*dst == exp)
> + *     *dst = src (all 32-bit words)
> + *
> + * @param dst
> + *   The destination location into which the value will be written.
> + * @param exp
> + *   The expected value.
> + * @param src
> + *   The new value.
> + * @return
> + *   Non-zero on success; 0 on failure.
> + */
> +static inline int
> +rte_arch_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t
> +src) {
> +	unsigned int ret = 0;
> +
> +	asm volatile(
> +			"\tlwsync\n"
> +			"1:\tlwarx %[ret], 0, %[dst]\n"
> +			"cmplw %[exp], %[ret]\n"
> +			"bne 2f\n"
> +			"stwcx. %[src], 0, %[dst]\n"
> +			"bne- 1b\n"
> +			"li %[ret], 1\n"
> +			"b 3f\n"
> +			"2:\n"
> +			"stwcx. %[ret], 0, %[dst]\n"
> +			"li %[ret], 0\n"
> +			"3:\n"
> +			"isync\n"
> +			: [ret] "=&r" (ret), "=m" (*dst)
> +			: [dst] "r" (dst), [exp] "r" (exp), [src] "r" (src), "m" (*dst)
> +			: "cc", "memory");
> +
> +	return ret;
> +}
> +
> +/**
> + * Atomically increment a counter by one.
> + *
> + * @param v
> + *   A pointer to the atomic counter.
> + */
> +static inline void
> +rte_arch_atomic32_inc(rte_atomic32_t *v) {
> +	int t;
> +
> +	asm volatile(
> +			"1: lwarx %[t],0,%[cnt]\n"
> +			"addic %[t],%[t],1\n"
> +			"stwcx. %[t],0,%[cnt]\n"
> +			"bne- 1b\n"
> +			: [t] "=&r" (t), "=m" (v->cnt)
> +			: [cnt] "r" (&v->cnt), "m" (v->cnt)
> +			: "cc", "xer", "memory");
> +}
> +
> +/**
> + * Atomically decrement a counter by one.
> + *
> + * @param v
> + *   A pointer to the atomic counter.
> + */
> +static inline void
> +rte_arch_atomic32_dec(rte_atomic32_t *v) {
> +	int t;
> +
> +	asm volatile(
> +			"1: lwarx %[t],0,%[cnt]\n"
> +			"addic %[t],%[t],-1\n"
> +			"stwcx. %[t],0,%[cnt]\n"
> +			"bne- 1b\n"
> +			: [t] "=&r" (t), "=m" (v->cnt)
> +			: [cnt] "r" (&v->cnt), "m" (v->cnt)
> +			: "cc", "xer", "memory");
> +}
> +
> +/**
> + * Atomically increment a 32-bit counter by one and test.
> + *
> + * Atomically increments the atomic counter (v) by one and returns true
> +if
> + * the result is 0, or false in all other cases.
> + *
> + * @param v
> + *   A pointer to the atomic counter.
> + * @return
> + *   True if the result after the increment operation is 0; false otherwise.
> + */
> +static inline int rte_arch_atomic32_inc_and_test(rte_atomic32_t *v) {
> +	int ret;
> +
> +	asm volatile(
> +			"\n\tlwsync\n"
> +			"1: lwarx %[ret],0,%[cnt]\n"
> +			"addic	%[ret],%[ret],1\n"
> +			"stwcx. %[ret],0,%[cnt]\n"
> +			"bne- 1b\n"
> +			"isync\n"
> +			: [ret] "=&r" (ret)
> +			: [cnt] "r" (&v->cnt)
> +			: "cc", "xer", "memory");
> +
> +	return (ret == 0);
> +}
> +
> +/**
> + * Atomically decrement a 32-bit counter by one and test.
> + *
> + * Atomically decrements the atomic counter (v) by one and returns true
> +if
> + * the result is 0, or false in all other cases.
> + *
> + * @param v
> + *   A pointer to the atomic counter.
> + * @return
> + *   True if the result after the decrement operation is 0; false otherwise.
> + */
> +static inline int rte_arch_atomic32_dec_and_test(rte_atomic32_t *v) {
> +	int ret;
> +
> +	asm volatile(
> +			"\n\tlwsync\n"
> +			"1: lwarx %[ret],0,%[cnt]\n"
> +			"addic %[ret],%[ret],-1\n"
> +			"stwcx. %[ret],0,%[cnt]\n"
> +			"bne- 1b\n"
> +			"isync\n"
> +			: [ret] "=&r" (ret)
> +			: [cnt] "r" (&v->cnt)
> +			: "cc", "xer", "memory");
> +
> +	return (ret == 0);
> +}
> +
> +#endif /* _RTE_ATOMIC_ARCH_H_ */
> +
> --
> 1.7.1
  
Chao Zhu Sept. 29, 2014, 6:41 a.m. UTC | #2
Hi, Hemant 

Actually, I submitted another set of patches to split the architecture 
specific operations which includes the patch to 
librte_eal\common\include\rte_atomic.h. Please refer to the previous 
email.   

Best Regards!
------------------------------
Chao Zhu (祝超)
Research Staff Member
Cloud Infrastructure and Technology Group
IBM China Research Lab
Building 19 Zhongguancun Software Park
8 Dongbeiwang West Road, Haidian District,
Beijing, PRC. 100193
Tel: +86-10-58748711
Email: bjzhuc@cn.ibm.com




From:   "Hemant@freescale.com" <Hemant@freescale.com>

To:     Chao CH Zhu/China/IBM@IBMCN, "dev@dpdk.org" <dev@dpdk.org>
Date:   2014/09/29 14:15
Subject:        RE: [dpdk-dev] [PATCH 02/12] Add atomic operations for IBM 
Power   architecture



Hi Chao,

This Patch seems to be incomplete. You may also need to patch the 
librte_eal\common\include\rte_atomic.h 
e.g.
#if !(defined RTE_ARCH_X86_64) || !(defined RTE_ARCH_I686)
#include <arch/rte_atomic.h>
#else /* if Intel*/

Otherwise you shall be getting compilation errors for "_mm_mfence"

Similar is true for other common header files as well.


Regards,
Hemant

> -----Original Message-----

> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Chao Zhu

> Sent: 26/Sep/2014 3:06 PM

> To: dev@dpdk.org

> Subject: [dpdk-dev] [PATCH 02/12] Add atomic operations for IBM Power

> architecture

> 

> The atomic operations implemented with assembly code in DPDK only 

support
> x86. This patch add architecture specific atomic operations for IBM 

Power
> architecture.

> 

> Signed-off-by: Chao Zhu <bjzhuc@cn.ibm.com>

> ---

>  .../common/include/powerpc/arch/rte_atomic.h       |  387

> ++++++++++++++++++++

>  .../common/include/powerpc/arch/rte_atomic_arch.h  |  318

> ++++++++++++++++

>  2 files changed, 705 insertions(+), 0 deletions(-)  create mode 100644

> lib/librte_eal/common/include/powerpc/arch/rte_atomic.h

>  create mode 100644

> lib/librte_eal/common/include/powerpc/arch/rte_atomic_arch.h

> 

> diff --git a/lib/librte_eal/common/include/powerpc/arch/rte_atomic.h

> b/lib/librte_eal/common/include/powerpc/arch/rte_atomic.h

> new file mode 100644

> index 0000000..7f5214e

> --- /dev/null

> +++ b/lib/librte_eal/common/include/powerpc/arch/rte_atomic.h

> @@ -0,0 +1,387 @@

> +/*

> + *   BSD LICENSE

> + *

> + *   Copyright (C) IBM Corporation 2014.

> + *

> + *   Redistribution and use in source and binary forms, with or without

> + *   modification, are permitted provided that the following conditions

> + *   are met:

> + *

> + *     * Redistributions of source code must retain the above copyright

> + *       notice, this list of conditions and the following disclaimer.

> + *     * Redistributions in binary form must reproduce the above 

copyright
> + *       notice, this list of conditions and the following disclaimer 

in
> + *       the documentation and/or other materials provided with the

> + *       distribution.

> + *     * Neither the name of IBM Corporation nor the names of its

> + *       contributors may be used to endorse or promote products 

derived
> + *       from this software without specific prior written permission.

> + *

> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND

> CONTRIBUTORS

> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT

> NOT

> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

> FITNESS FOR

> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE

> COPYRIGHT

> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,

> INCIDENTAL,

> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT

> NOT

> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS

> OF USE,

> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND

> ON ANY

> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR 

TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF

> THE USE

> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH

> DAMAGE.

> +*/

> +

> +/*

> + * Inspired from FreeBSD src/sys/powerpc/include/atomic.h

> + * Copyright (c) 2008 Marcel Moolenaar

> + * Copyright (c) 2001 Benno Rice

> + * Copyright (c) 2001 David E. O'Brien

> + * Copyright (c) 1998 Doug Rabson

> + * All rights reserved.

> + */

> +

> +#ifndef _RTE_ATOMIC_H_

> +#error "don't include this file directly, please include generic 

<rte_atomic.h>"
> +#endif

> +

> +#ifndef _RTE_POWERPC_64_ATOMIC_H_

> +#define _RTE_POWERPC_64_ATOMIC_H_

> +

> +/*------------------------- 64 bit atomic operations

> +-------------------------*/

> +

> +/**

> + * An atomic compare and set function used by the mutex functions.

> + * (atomic) equivalent to:

> + *   if (*dst == exp)

> + *     *dst = src (all 64-bit words)

> + *

> + * @param dst

> + *   The destination into which the value will be written.

> + * @param exp

> + *   The expected value.

> + * @param src

> + *   The new value.

> + * @return

> + *   Non-zero on success; 0 on failure.

> + */

> +static inline int

> +rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)

> +{

> +              unsigned int ret = 0;

> +

> +              asm volatile (

> +                                              "\tlwsync\n"

> +                                              "1: ldarx %[ret], 0, 

%[dst]\n"
> +                                              "cmpld %[exp], %[ret]\n"

> +                                              "bne 2f\n"

> +                                              "stdcx. %[src], 0, 

%[dst]\n"
> +                                              "bne- 1b\n"

> +                                              "li %[ret], 1\n"

> +                                              "b 3f\n"

> +                                              "2:\n"

> +                                              "stdcx. %[ret], 0, 

%[dst]\n"
> +                                              "li %[ret], 0\n"

> +                                              "3:\n"

> +                                              "isync\n"

> +                                              : [ret] "=&r" (ret), "=m" 

(*dst)
> +                                              : [dst] "r" (dst), [exp] 

"r" (exp), [src] "r" (src), "m" (*dst)
> +                                              : "cc", "memory");

> +              return ret;

> +}

> +

> +/**

> + * The atomic counter structure.

> + */

> +typedef struct {

> +              volatile int64_t cnt;  /**< Internal counter value. */ }

> +rte_atomic64_t;

> +

> +/**

> + * Static initializer for an atomic counter.

> + */

> +#define RTE_ATOMIC64_INIT(val) { (val) }

> +

> +/**

> + * Initialize the atomic counter.

> + *

> + * @param v

> + *   A pointer to the atomic counter.

> + */

> +static inline void

> +rte_atomic64_init(rte_atomic64_t *v)

> +{

> +              v->cnt = 0;

> +}

> +

> +/**

> + * Atomically read a 64-bit counter.

> + *

> + * @param v

> + *   A pointer to the atomic counter.

> + * @return

> + *   The value of the counter.

> + */

> +static inline int64_t

> +rte_atomic64_read(rte_atomic64_t *v)

> +{

> +              long ret;

> +

> +              asm volatile("ld%U1%X1 %[ret],%[cnt]" : [ret] "=r"(ret) : 

[cnt]
> +"m"(v->cnt));

> +

> +              return ret;

> +}

> +

> +/**

> + * Atomically set a 64-bit counter.

> + *

> + * @param v

> + *   A pointer to the atomic counter.

> + * @param new_value

> + *   The new value of the counter.

> + */

> +static inline void

> +rte_atomic64_set(rte_atomic64_t *v, int64_t new_value) {

> +              asm volatile("std%U0%X0 %[new_value],%[cnt]" : [cnt] 

"=m"(v->cnt) :
> +[new_value] "r"(new_value)); }

> +

> +/**

> + * Atomically add a 64-bit value to a counter.

> + *

> + * @param v

> + *   A pointer to the atomic counter.

> + * @param inc

> + *   The value to be added to the counter.

> + */

> +static inline void

> +rte_atomic64_add(rte_atomic64_t *v, int64_t inc) {

> +              long t;

> +

> +              asm volatile(

> +                                              "1: ldarx 

%[t],0,%[cnt]\n"
> +                                              "add %[t],%[inc],%[t]\n"

> +                                              "stdcx. %[t],0,%[cnt]\n"

> +                                              "bne- 1b\n"

> +                                              : [t] "=&r" (t), "=m" 

(v->cnt)
> +                                              : [cnt] "r" (&v->cnt), 

[inc] "r" (inc), "m" (v->cnt)
> +                                              : "cc", "memory");

> +}

> +

> +/**

> + * Atomically subtract a 64-bit value from a counter.

> + *

> + * @param v

> + *   A pointer to the atomic counter.

> + * @param dec

> + *   The value to be subtracted from the counter.

> + */

> +static inline void

> +rte_atomic64_sub(rte_atomic64_t *v, int64_t dec) {

> +              long t;

> +

> +              asm volatile(

> +                                              "1: ldarx 

%[t],0,%[cnt]\n"
> +                                              "subf %[t],%[dec],%[t]\n"

> +                                              "stdcx. %[t],0,%[cnt]\n"

> +                                              "bne- 1b\n"

> +                                              : [t] "=&r" (t), "+m" 

(v->cnt)
> +                                              : [cnt] "r" (&v->cnt), 

[dec] "r" (dec), "m" (v->cnt)
> +                                              : "cc", "memory");

> +}

> +

> +/**

> + * Atomically increment a 64-bit counter by one and test.

> + *

> + * @param v

> + *   A pointer to the atomic counter.

> + */

> +static inline void

> +rte_atomic64_inc(rte_atomic64_t *v)

> +{

> +              long t;

> +

> +              asm volatile(

> +                                              "1: ldarx 

%[t],0,%[cnt]\n"
> +                                              "addic %[t],%[t],1\n"

> +                                              "stdcx. %[t],0,%[cnt] \n"

> +                                              "bne- 1b\n"

> +                                              : [t] "=&r" (t), "+m" 

(v->cnt)
> +                                              : [cnt] "r" (&v->cnt), 

"m" (v->cnt)
> +                                              : "cc", "xer", "memory");

> +}

> +

> +/**

> + * Atomically decrement a 64-bit counter by one and test.

> + *

> + * @param v

> + *   A pointer to the atomic counter.

> + */

> +static inline void

> +rte_atomic64_dec(rte_atomic64_t *v)

> +{

> +              long t;

> +

> +              asm volatile(

> +                                              "1: ldarx 

%[t],0,%[cnt]\n"
> +                                              "addic %[t],%[t],-1\n"

> +                                              "stdcx. %[t],0,%[cnt]\n"

> +                                              "bne- 1b\n"

> +                                              : [t] "=&r" (t), "+m" 

(v->cnt)
> +                                              : [cnt] "r" (&v->cnt), 

"m" (v->cnt)
> +                                              : "cc", "xer", "memory");

> +}

> +

> +/**

> + * Add a 64-bit value to an atomic counter and return the result.

> + *

> + * Atomically adds the 64-bit value (inc) to the atomic counter (v) and

> + * returns the value of v after the addition.

> + *

> + * @param v

> + *   A pointer to the atomic counter.

> + * @param inc

> + *   The value to be added to the counter.

> + * @return

> + *   The value of v after the addition.

> + */

> +static inline int64_t

> +rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc) {

> +              long ret;

> +

> +              asm volatile(

> +                                              "\n\tlwsync\n"

> +                                              "1: ldarx 

%[ret],0,%[cnt]\n"
> +                                              "add 

%[ret],%[inc],%[ret]\n"
> +                                              "stdcx. 

%[ret],0,%[cnt]\n"
> +                                              "bne- 1b\n"

> +                                              "isync\n"

> +                                              : [ret] "=&r" (ret)

> +                                              : [inc] "r" (inc), [cnt] 

"r" (&v->cnt)
> +                                              : "cc", "memory");

> +

> +              return ret;

> +}

> +

> +/**

> + * Subtract a 64-bit value from an atomic counter and return the 

result.
> + *

> + * Atomically subtracts the 64-bit value (dec) from the atomic counter

> +(v)

> + * and returns the value of v after the subtraction.

> + *

> + * @param v

> + *   A pointer to the atomic counter.

> + * @param dec

> + *   The value to be subtracted from the counter.

> + * @return

> + *   The value of v after the subtraction.

> + */

> +static inline int64_t

> +rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec) {

> +              long ret;

> +

> +              asm volatile(

> +                                              "\n\tlwsync\n"

> +                                              "1: ldarx 

%[ret],0,%[cnt]\n"
> +                                              "subf 

%[ret],%[dec],%[ret]\n"
> +                                              "stdcx. %[ret],0,%[cnt] 

\n"
> +                                              "bne- 1b\n"

> +                                              "isync\n"

> +                                              : [ret] "=&r" (ret)

> +                                              : [dec] "r" (dec), [cnt] 

"r" (&v->cnt)
> +                                              : "cc", "memory");

> +

> +              return ret;

> +}

> +

> +static __inline__ long rte_atomic64_inc_return(rte_atomic64_t *v) {

> +              long ret;

> +

> +              asm volatile(

> +                                              "\n\tlwsync\n"

> +                                              "1: ldarx 

%[ret],0,%[cnt]\n"
> +                                              "addic %[ret],%[ret],1\n"

> +                                              "stdcx. 

%[ret],0,%[cnt]\n"
> +                                              "bne- 1b\n"

> +                                              "isync\n"

> +                                              : [ret] "=&r" (ret)

> +                                              : [cnt] "r" (&v->cnt)

> +                                              : "cc", "xer", "memory");

> +

> +              return ret;

> +}

> +/**

> + * Atomically increment a 64-bit counter by one and test.

> + *

> + * Atomically increments the atomic counter (v) by one and returns

> + * true if the result is 0, or false in all other cases.

> + *

> + * @param v

> + *   A pointer to the atomic counter.

> + * @return

> + *   True if the result after the addition is 0; false otherwise.

> + */

> +#define rte_atomic64_inc_and_test(v) (rte_atomic64_inc_return(v) == 0)

> +

> +static __inline__ long rte_atomic64_dec_return(rte_atomic64_t *v) {

> +              long ret;

> +

> +              asm volatile(

> +                                              "\n\tlwsync\n"

> +                                              "1: ldarx 

%[ret],0,%[cnt]\n"
> +                                              "addic 

%[ret],%[ret],-1\n"
> +                                              "stdcx. 

%[ret],0,%[cnt]\n"
> +                                              "bne- 1b\n"

> +                                              "isync\n"

> +                                              : [ret] "=&r" (ret)

> +                                              : [cnt] "r" (&v->cnt)

> +                                              : "cc", "xer", "memory");

> +

> +              return ret;

> +}

> +/**

> + * Atomically decrement a 64-bit counter by one and test.

> + *

> + * Atomically decrements the atomic counter (v) by one and returns true

> +if

> + * the result is 0, or false in all other cases.

> + *

> + * @param v

> + *   A pointer to the atomic counter.

> + * @return

> + *   True if the result after subtraction is 0; false otherwise.

> + */

> +#define rte_atomic64_dec_and_test(v)    (rte_atomic64_dec_return((v)) 

==
> 0)

> +

> +/**

> + * Atomically test and set a 64-bit atomic counter.

> + *

> + * If the counter value is already set, return 0 (failed). Otherwise,

> +set

> + * the counter value to 1 and return 1 (success).

> + *

> + * @param v

> + *   A pointer to the atomic counter.

> + * @return

> + *   0 if failed; else 1, success.

> + */

> +static inline int rte_atomic64_test_and_set(rte_atomic64_t *v) {

> +              return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 

0, 1); }
> +

> +/**

> + * Atomically set a 64-bit counter to 0.

> + *

> + * @param v

> + *   A pointer to the atomic counter.

> + */

> +static inline void rte_atomic64_clear(rte_atomic64_t *v) {

> +              v->cnt = 0;

> +}

> +

> +#endif /* _RTE_POWERPC_64_ATOMIC_H_ */

> +

> diff --git 

a/lib/librte_eal/common/include/powerpc/arch/rte_atomic_arch.h
> b/lib/librte_eal/common/include/powerpc/arch/rte_atomic_arch.h

> new file mode 100644

> index 0000000..fe5666e

> --- /dev/null

> +++ b/lib/librte_eal/common/include/powerpc/arch/rte_atomic_arch.h

> @@ -0,0 +1,318 @@

> +/*

> + *   BSD LICENSE

> + *

> + *   Copyright (C) IBM Corporation 2014.

> + *

> + *   Redistribution and use in source and binary forms, with or without

> + *   modification, are permitted provided that the following conditions

> + *   are met:

> + *

> + *     * Redistributions of source code must retain the above copyright

> + *       notice, this list of conditions and the following disclaimer.

> + *     * Redistributions in binary form must reproduce the above 

copyright
> + *       notice, this list of conditions and the following disclaimer 

in
> + *       the documentation and/or other materials provided with the

> + *       distribution.

> + *     * Neither the name of IBM Corporation nor the names of its

> + *       contributors may be used to endorse or promote products 

derived
> + *       from this software without specific prior written permission.

> + *

> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND

> CONTRIBUTORS

> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT

> NOT

> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

> FITNESS FOR

> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE

> COPYRIGHT

> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,

> INCIDENTAL,

> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT

> NOT

> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS

> OF USE,

> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND

> ON ANY

> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR 

TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF

> THE USE

> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH

> DAMAGE.

> +*/

> +

> +/*

> + * Inspired from FreeBSD src/sys/powerpc/include/atomic.h

> + * Copyright (c) 2008 Marcel Moolenaar

> + * Copyright (c) 2001 Benno Rice

> + * Copyright (c) 2001 David E. O'Brien

> + * Copyright (c) 1998 Doug Rabson

> + * All rights reserved.

> + */

> +

> +#ifndef _RTE_ATOMIC_H_

> +#error "don't include this file directly, please include generic 

<rte_atomic.h>"
> +#endif

> +

> +#ifndef _RTE_ATOMIC_ARCH_H_

> +#define _RTE_ATOMIC_ARCH_H_

> +

> +#include <stdint.h>

> +

> +/**

> + * General memory barrier.

> + *

> + * Guarantees that the LOAD and STORE operations generated before the

> + * barrier occur before the LOAD and STORE operations generated after.

> + */

> +#define               rte_arch_mb()  asm volatile("sync" : : : 

"memory")
> +

> +/**

> + * Write memory barrier.

> + *

> + * Guarantees that the STORE operations generated before the barrier

> + * occur before the STORE operations generated after.

> + */

> +#define               rte_arch_wmb() asm volatile("sync" : : : 

"memory")
> +

> +/**

> + * Read memory barrier.

> + *

> + * Guarantees that the LOAD operations generated before the barrier

> + * occur before the LOAD operations generated after.

> + */

> +#define               rte_arch_rmb() asm volatile("sync" : : : 

"memory")
> +

> +#define               rte_arch_compiler_barrier() do {        \

> +              asm volatile ("" : : : "memory");               \

> +} while(0)

> +

> +/*------------------------- 16 bit atomic operations

> +-------------------------*/

> +

> +/**

> + * The atomic counter structure.

> + */

> +typedef struct {

> +              volatile int16_t cnt; /**< An internal counter value. */ 

}
> +rte_atomic16_t;

> +

> +/**

> + * Atomic compare and set.

> + *

> + * (atomic) equivalent to:

> + *   if (*dst == exp)

> + *     *dst = src (all 16-bit words)

> + *

> + * @param dst

> + *   The destination location into which the value will be written.

> + * @param exp

> + *   The expected value.

> + * @param src

> + *   The new value.

> + * @return

> + *   Non-zero on success; 0 on failure.

> + */

> +static inline int

> +rte_arch_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t

> +src) {

> +              return __atomic_compare_exchange(dst, &exp, &src, 0,

> __ATOMIC_ACQUIRE,

> +__ATOMIC_ACQUIRE) ? 1 : 0; }

> +

> +/**

> + * Atomically increment a counter by one.

> + *

> + * @param v

> + *   A pointer to the atomic counter.

> + */

> +static inline void

> +rte_arch_atomic16_inc(rte_atomic16_t *v) {

> +              __atomic_add_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE); }

> +

> +/**

> + * Atomically decrement a counter by one.

> + *

> + * @param v

> + *   A pointer to the atomic counter.

> + */

> +static inline void

> +rte_arch_atomic16_dec(rte_atomic16_t *v) {

> +              __atomic_sub_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE); }

> +

> +/**

> + * Atomically increment a 16-bit counter by one and test.

> + *

> + * Atomically increments the atomic counter (v) by one and returns true

> +if

> + * the result is 0, or false in all other cases.

> + *

> + * @param v

> + *   A pointer to the atomic counter.

> + * @return

> + *   True if the result after the increment operation is 0; false 

otherwise.
> + */

> +static inline int rte_arch_atomic16_inc_and_test(rte_atomic16_t *v) {

> +              return (__atomic_add_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) 

== 0); }
> +

> +/**

> + * Atomically decrement a 16-bit counter by one and test.

> + *

> + * Atomically decrements the atomic counter (v) by one and returns true

> +if

> + * the result is 0, or false in all other cases.

> + *

> + * @param v

> + *   A pointer to the atomic counter.

> + * @return

> + *   True if the result after the decrement operation is 0; false 

otherwise.
> + */

> +static inline int rte_arch_atomic16_dec_and_test(rte_atomic16_t *v) {

> +              return (__atomic_sub_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) 

== 0); }
> +

> +/*------------------------- 32 bit atomic operations

> +-------------------------*/

> +

> +/**

> + * The atomic counter structure.

> + */

> +typedef struct {

> +              volatile int32_t cnt; /**< An internal counter value. */ 

}
> +rte_atomic32_t;

> +

> +/**

> + * Atomic compare and set.

> + *

> + * (atomic) equivalent to:

> + *   if (*dst == exp)

> + *     *dst = src (all 32-bit words)

> + *

> + * @param dst

> + *   The destination location into which the value will be written.

> + * @param exp

> + *   The expected value.

> + * @param src

> + *   The new value.

> + * @return

> + *   Non-zero on success; 0 on failure.

> + */

> +static inline int

> +rte_arch_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t

> +src) {

> +              unsigned int ret = 0;

> +

> +              asm volatile(

> +                                              "\tlwsync\n"

> +                                              "1:\tlwarx %[ret], 0, 

%[dst]\n"
> +                                              "cmplw %[exp], %[ret]\n"

> +                                              "bne 2f\n"

> +                                              "stwcx. %[src], 0, 

%[dst]\n"
> +                                              "bne- 1b\n"

> +                                              "li %[ret], 1\n"

> +                                              "b 3f\n"

> +                                              "2:\n"

> +                                              "stwcx. %[ret], 0, 

%[dst]\n"
> +                                              "li %[ret], 0\n"

> +                                              "3:\n"

> +                                              "isync\n"

> +                                              : [ret] "=&r" (ret), "=m" 

(*dst)
> +                                              : [dst] "r" (dst), [exp] 

"r" (exp), [src] "r" (src), "m" (*dst)
> +                                              : "cc", "memory");

> +

> +              return ret;

> +}

> +

> +/**

> + * Atomically increment a counter by one.

> + *

> + * @param v

> + *   A pointer to the atomic counter.

> + */

> +static inline void

> +rte_arch_atomic32_inc(rte_atomic32_t *v) {

> +              int t;

> +

> +              asm volatile(

> +                                              "1: lwarx 

%[t],0,%[cnt]\n"
> +                                              "addic %[t],%[t],1\n"

> +                                              "stwcx. %[t],0,%[cnt]\n"

> +                                              "bne- 1b\n"

> +                                              : [t] "=&r" (t), "=m" 

(v->cnt)
> +                                              : [cnt] "r" (&v->cnt), 

"m" (v->cnt)
> +                                              : "cc", "xer", "memory");

> +}

> +

> +/**

> + * Atomically decrement a counter by one.

> + *

> + * @param v

> + *   A pointer to the atomic counter.

> + */

> +static inline void

> +rte_arch_atomic32_dec(rte_atomic32_t *v) {

> +              int t;

> +

> +              asm volatile(

> +                                              "1: lwarx 

%[t],0,%[cnt]\n"
> +                                              "addic %[t],%[t],-1\n"

> +                                              "stwcx. %[t],0,%[cnt]\n"

> +                                              "bne- 1b\n"

> +                                              : [t] "=&r" (t), "=m" 

(v->cnt)
> +                                              : [cnt] "r" (&v->cnt), 

"m" (v->cnt)
> +                                              : "cc", "xer", "memory");

> +}

> +

> +/**

> + * Atomically increment a 32-bit counter by one and test.

> + *

> + * Atomically increments the atomic counter (v) by one and returns true

> +if

> + * the result is 0, or false in all other cases.

> + *

> + * @param v

> + *   A pointer to the atomic counter.

> + * @return

> + *   True if the result after the increment operation is 0; false 

otherwise.
> + */

> +static inline int rte_arch_atomic32_inc_and_test(rte_atomic32_t *v) {

> +              int ret;

> +

> +              asm volatile(

> +                                              "\n\tlwsync\n"

> +                                              "1: lwarx 

%[ret],0,%[cnt]\n"
> +                                              "addic %[ret],%[ret],1\n"

> +                                              "stwcx. 

%[ret],0,%[cnt]\n"
> +                                              "bne- 1b\n"

> +                                              "isync\n"

> +                                              : [ret] "=&r" (ret)

> +                                              : [cnt] "r" (&v->cnt)

> +                                              : "cc", "xer", "memory");

> +

> +              return (ret == 0);

> +}

> +

> +/**

> + * Atomically decrement a 32-bit counter by one and test.

> + *

> + * Atomically decrements the atomic counter (v) by one and returns true

> +if

> + * the result is 0, or false in all other cases.

> + *

> + * @param v

> + *   A pointer to the atomic counter.

> + * @return

> + *   True if the result after the decrement operation is 0; false 

otherwise.
> + */

> +static inline int rte_arch_atomic32_dec_and_test(rte_atomic32_t *v) {

> +              int ret;

> +

> +              asm volatile(

> +                                              "\n\tlwsync\n"

> +                                              "1: lwarx 

%[ret],0,%[cnt]\n"
> +                                              "addic 

%[ret],%[ret],-1\n"
> +                                              "stwcx. 

%[ret],0,%[cnt]\n"
> +                                              "bne- 1b\n"

> +                                              "isync\n"

> +                                              : [ret] "=&r" (ret)

> +                                              : [cnt] "r" (&v->cnt)

> +                                              : "cc", "xer", "memory");

> +

> +              return (ret == 0);

> +}

> +

> +#endif /* _RTE_ATOMIC_ARCH_H_ */

> +

> --

> 1.7.1
  
Ananyev, Konstantin Oct. 16, 2014, 12:39 a.m. UTC | #3
Hi,

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Chao Zhu
> Sent: Friday, September 26, 2014 10:36 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH 02/12] Add atomic operations for IBM Power architecture
> 
> The atomic operations implemented with assembly code in DPDK only
> support x86. This patch add architecture specific atomic operations for
> IBM Power architecture.
> 
> Signed-off-by: Chao Zhu <bjzhuc@cn.ibm.com>
> ---
>  .../common/include/powerpc/arch/rte_atomic.h       |  387 ++++++++++++++++++++
>  .../common/include/powerpc/arch/rte_atomic_arch.h  |  318 ++++++++++++++++
>  2 files changed, 705 insertions(+), 0 deletions(-)
>  create mode 100644 lib/librte_eal/common/include/powerpc/arch/rte_atomic.h
>  create mode 100644 lib/librte_eal/common/include/powerpc/arch/rte_atomic_arch.h
> 
...
> +
> diff --git a/lib/librte_eal/common/include/powerpc/arch/rte_atomic_arch.h
> b/lib/librte_eal/common/include/powerpc/arch/rte_atomic_arch.h
> new file mode 100644
> index 0000000..fe5666e
> --- /dev/null
> +
...
>+#define	rte_arch_rmb() asm volatile("sync" : : : "memory")
>+
> +#define	rte_arch_compiler_barrier() do {		\
> +	asm volatile ("" : : : "memory");	\
> +} while(0)

I don't know much about PPC architecture, but as I remember it uses a  weakly-ordering memory model.
Is that correct?
If so, then you probably need rte_arch_compiler_barrier() to be "sync" instruction (like mb()s above) .
The reason is that IA has much stronger memory ordering model and there are a lot of places in the code where it implies that  ordering.
For example - ring enqueue/dequeue functions.	

Konstantin
  
Chao Zhu Oct. 16, 2014, 3:14 a.m. UTC | #4
Konstantin,

In my understanding, compiler barrier is a kind of software barrier which 
prevents the compiler from moving memory accesses across the barrier. This 
should be architecture-independent. And the "sync" instruction is a 
hardware barrier which depends on PowerPC architecture. So I think the 
compiler barrier should be the same on x86 and PowerPC. Any comments? 
Please correct me if I was wrong.

Thanks a lot! 
 
Best Regards!
------------------------------
Chao Zhu 




From:   "Ananyev, Konstantin" <konstantin.ananyev@intel.com>
To:     Chao CH Zhu/China/IBM@IBMCN, "dev@dpdk.org" <dev@dpdk.org>
Date:   2014/10/16 08:38
Subject:        RE: [dpdk-dev] [PATCH 02/12] Add atomic operations for IBM 
Power   architecture




Hi,

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Chao Zhu
> Sent: Friday, September 26, 2014 10:36 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH 02/12] Add atomic operations for IBM Power 
architecture
> 
> The atomic operations implemented with assembly code in DPDK only
> support x86. This patch add architecture specific atomic operations for
> IBM Power architecture.
> 
> Signed-off-by: Chao Zhu <bjzhuc@cn.ibm.com>
> ---
>  .../common/include/powerpc/arch/rte_atomic.h       |  387 
++++++++++++++++++++
>  .../common/include/powerpc/arch/rte_atomic_arch.h  |  318 
++++++++++++++++
>  2 files changed, 705 insertions(+), 0 deletions(-)
>  create mode 100644 
lib/librte_eal/common/include/powerpc/arch/rte_atomic.h
>  create mode 100644 
lib/librte_eal/common/include/powerpc/arch/rte_atomic_arch.h
> 
...
> +
> diff --git 
a/lib/librte_eal/common/include/powerpc/arch/rte_atomic_arch.h
> b/lib/librte_eal/common/include/powerpc/arch/rte_atomic_arch.h
> new file mode 100644
> index 0000000..fe5666e
> --- /dev/null
> +
...
>+#define                rte_arch_rmb() asm volatile("sync" : : : 
"memory")
>+
> +#define               rte_arch_compiler_barrier() do {        \
> +              asm volatile ("" : : : "memory");               \
> +} while(0)

I don't know much about PPC architecture, but as I remember it uses a 
weakly-ordering memory model.
Is that correct?
If so, then you probably need rte_arch_compiler_barrier() to be "sync" 
instruction (like mb()s above) .
The reason is that IA has much stronger memory ordering model and there 
are a lot of places in the code where it implies that  ordering.
For example - ring enqueue/dequeue functions. 

Konstantin
  
Bruce Richardson Oct. 16, 2014, 9:42 a.m. UTC | #5
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Chao CH Zhu
> Sent: Thursday, October 16, 2014 4:14 AM
> To: Ananyev, Konstantin
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH 02/12] Add atomic operations for IBM Power
> architecture
> 
> Konstantin,
> 
> In my understanding, compiler barrier is a kind of software barrier which
> prevents the compiler from moving memory accesses across the barrier. This
> should be architecture-independent. And the "sync" instruction is a
> hardware barrier which depends on PowerPC architecture. So I think the
> compiler barrier should be the same on x86 and PowerPC. Any comments?
> Please correct me if I was wrong.
> 
I would agree with that assessment, as far as it goes, in that a compiler barrier is going to be the same on both architectures. However, we also need to start thinking about actual use cases - how to we specify the barriers in a piece of code where we need a full memory barrier on PPC and only a compiler barrier on IA? 
My suggestion would be to do first as you propose and have proper primitives for the different barrier types defined correctly for each platform - with the compiler barrier being, presumably, common across each one. Then, as a second step, we probably need to look at defining "logical" barrier types (for want of a better term) that can then be used in the code and which would be different across platforms.

Does this make sense to do this way? Is it the best solution? Do we want to define the basic primitives or are we only ever likely to need the logical barrier types?

/Bruce
  
Ananyev, Konstantin Oct. 16, 2014, 10:59 a.m. UTC | #6
> 
> 
> From: Chao CH Zhu [mailto:bjzhuc@cn.ibm.com]
> Sent: Thursday, October 16, 2014 4:14 AM
> To: Ananyev, Konstantin
> Cc: dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH 02/12] Add atomic operations for IBM Power architecture
> 
> Konstantin,
> 
> In my understanding, compiler barrier is a kind of software barrier which prevents the compiler from moving memory accesses across
> the barrier.

Yes, compiler_barrier() right now only guarantees that the compiler wouldn't reorder instructions across it while emitting the code.

> This should be architecture-independent. And the "sync" instruction is a hardware barrier which depends on PowerPC
> architecture.

I understand what "sync" does.

>So I think the compiler barrier should be the same on x86 and PowerPC. Any comments? Please correct me if I was
> wrong.

The thing is that current DPDK code will not work correctly on system with weak memory ordering -
IA has quite strict memory ordering model and there is a code inside DPDK that relies on the fact that CPU would follow that model.
For such places in the code - compiler barrier is enough for IA, but is not enough for PPC. 

Do you worry about the names here- compiler barrier will become a HW one? :)?
In that case what you probably can do:
Create a new architecture dependent macro: rte_barrier().
That  would expand into rte_compiler_barrier() for IA and to rte_mb() for PPC.
Got through all references of rte_compiler_barrier() inside DPDK and replace it with rte_barrier().

Konstantin

> 
> Thanks a lot!
> 
> Best Regards!
> ------------------------------
> Chao Zhu
> 
> 
> 
> 
> From:        "Ananyev, Konstantin" <konstantin.ananyev@intel.com>
> To:        Chao CH Zhu/China/IBM@IBMCN, "dev@dpdk.org" <dev@dpdk.org>
> Date:        2014/10/16 08:38
> Subject:        RE: [dpdk-dev] [PATCH 02/12] Add atomic operations for IBM Power        architecture
> ________________________________________
> 
> 
> 
> 
> Hi,
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Chao Zhu
> > Sent: Friday, September 26, 2014 10:36 AM
> > To: dev@dpdk.org
> > Subject: [dpdk-dev] [PATCH 02/12] Add atomic operations for IBM Power architecture
> >
> > The atomic operations implemented with assembly code in DPDK only
> > support x86. This patch add architecture specific atomic operations for
> > IBM Power architecture.
> >
> > Signed-off-by: Chao Zhu <bjzhuc@cn.ibm.com>
> > ---
> >  .../common/include/powerpc/arch/rte_atomic.h       |  387 ++++++++++++++++++++
> >  .../common/include/powerpc/arch/rte_atomic_arch.h  |  318 ++++++++++++++++
> >  2 files changed, 705 insertions(+), 0 deletions(-)
> >  create mode 100644 lib/librte_eal/common/include/powerpc/arch/rte_atomic.h
> >  create mode 100644 lib/librte_eal/common/include/powerpc/arch/rte_atomic_arch.h
> >
> ...
> > +
> > diff --git a/lib/librte_eal/common/include/powerpc/arch/rte_atomic_arch.h
> > b/lib/librte_eal/common/include/powerpc/arch/rte_atomic_arch.h
> > new file mode 100644
> > index 0000000..fe5666e
> > --- /dev/null
> > +
> ...
> >+#define                 rte_arch_rmb() asm volatile("sync" : : : "memory")
> >+
> > +#define                 rte_arch_compiler_barrier() do {                                  \
> > +                 asm volatile ("" : : : "memory");                 \
> > +} while(0)
> 
> I don't know much about PPC architecture, but as I remember it uses a  weakly-ordering memory model.
> Is that correct?
> If so, then you probably need rte_arch_compiler_barrier() to be "sync" instruction (like mb()s above) .
> The reason is that IA has much stronger memory ordering model and there are a lot of places in the code where it implies
> that  ordering.
> For example - ring enqueue/dequeue functions.
> 
> Konstantin
  
Ananyev, Konstantin Oct. 16, 2014, 11:04 a.m. UTC | #7
> -----Original Message-----
> From: Richardson, Bruce
> Sent: Thursday, October 16, 2014 10:43 AM
> To: Chao CH Zhu; Ananyev, Konstantin
> Cc: dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH 02/12] Add atomic operations for IBM Power architecture
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Chao CH Zhu
> > Sent: Thursday, October 16, 2014 4:14 AM
> > To: Ananyev, Konstantin
> > Cc: dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH 02/12] Add atomic operations for IBM Power
> > architecture
> >
> > Konstantin,
> >
> > In my understanding, compiler barrier is a kind of software barrier which
> > prevents the compiler from moving memory accesses across the barrier. This
> > should be architecture-independent. And the "sync" instruction is a
> > hardware barrier which depends on PowerPC architecture. So I think the
> > compiler barrier should be the same on x86 and PowerPC. Any comments?
> > Please correct me if I was wrong.
> >
> I would agree with that assessment, as far as it goes, in that a compiler barrier is going to be the same on both architectures. However,
> we also need to start thinking about actual use cases - how to we specify the barriers in a piece of code where we need a full memory
> barrier on PPC and only a compiler barrier on IA?
> My suggestion would be to do first as you propose and have proper primitives for the different barrier types defined correctly for
> each platform - with the compiler barrier being, presumably, common across each one. Then, as a second step, we probably need to
> look at defining "logical" barrier types (for want of a better term) that can then be used in the code and which would be different
> across platforms.

Yeh, as I said in other mail, what we probably can do:

Create a new architecture dependent macro: rte_barrier().
That  would expand into rte_compiler_barrier() for IA and to rte_mb() for PPC.
Got through all references of rte_compiler_barrier() inside DPDK and replace it with rte_barrier().

BTW, for my own curiosity:
Is there any good use for compiler_barrier() on systems with weakly ordered memory model? 

> 
> Does this make sense to do this way? Is it the best solution? Do we want to define the basic primitives or are we only ever likely to
> need the logical barrier types?
> 
> /Bruce
  

Patch

diff --git a/lib/librte_eal/common/include/powerpc/arch/rte_atomic.h b/lib/librte_eal/common/include/powerpc/arch/rte_atomic.h
new file mode 100644
index 0000000..7f5214e
--- /dev/null
+++ b/lib/librte_eal/common/include/powerpc/arch/rte_atomic.h
@@ -0,0 +1,387 @@ 
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IBM Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*
+ * Inspired from FreeBSD src/sys/powerpc/include/atomic.h
+ * Copyright (c) 2008 Marcel Moolenaar
+ * Copyright (c) 2001 Benno Rice
+ * Copyright (c) 2001 David E. O'Brien
+ * Copyright (c) 1998 Doug Rabson
+ * All rights reserved.
+ */
+
+#ifndef _RTE_ATOMIC_H_
+#error "don't include this file directly, please include generic <rte_atomic.h>"
+#endif
+
+#ifndef _RTE_POWERPC_64_ATOMIC_H_
+#define _RTE_POWERPC_64_ATOMIC_H_
+
+/*------------------------- 64 bit atomic operations -------------------------*/
+
+/**
+ * An atomic compare and set function used by the mutex functions.
+ * (atomic) equivalent to:
+ *   if (*dst == exp)
+ *     *dst = src (all 64-bit words)
+ *
+ * @param dst
+ *   The destination into which the value will be written.
+ * @param exp
+ *   The expected value.
+ * @param src
+ *   The new value.
+ * @return
+ *   Non-zero on success; 0 on failure.
+ */
+static inline int
+rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
+{
+	unsigned int ret = 0;
+
+	asm volatile (
+			"\tlwsync\n"
+			"1: ldarx %[ret], 0, %[dst]\n"
+			"cmpld %[exp], %[ret]\n"
+			"bne 2f\n"
+			"stdcx. %[src], 0, %[dst]\n"
+			"bne- 1b\n"
+			"li %[ret], 1\n"
+			"b 3f\n"
+			"2:\n"
+			"stdcx. %[ret], 0, %[dst]\n"
+			"li %[ret], 0\n"
+			"3:\n"
+			"isync\n"
+			: [ret] "=&r" (ret), "=m" (*dst)
+			: [dst] "r" (dst), [exp] "r" (exp), [src] "r" (src), "m" (*dst)
+			: "cc", "memory");
+	return ret;
+}
+
+/**
+ * The atomic counter structure.
+ */
+typedef struct {
+	volatile int64_t cnt;  /**< Internal counter value. */
+} rte_atomic64_t;
+
+/**
+ * Static initializer for an atomic counter.
+ */
+#define RTE_ATOMIC64_INIT(val) { (val) }
+
+/**
+ * Initialize the atomic counter.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic64_init(rte_atomic64_t *v)
+{
+	v->cnt = 0;
+}
+
+/**
+ * Atomically read a 64-bit counter.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @return
+ *   The value of the counter.
+ */
+static inline int64_t
+rte_atomic64_read(rte_atomic64_t *v)
+{
+	long ret;
+
+	asm volatile("ld%U1%X1 %[ret],%[cnt]" : [ret] "=r"(ret) : [cnt] "m"(v->cnt));
+
+	return ret;
+}
+
+/**
+ * Atomically set a 64-bit counter.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @param new_value
+ *   The new value of the counter.
+ */
+static inline void
+rte_atomic64_set(rte_atomic64_t *v, int64_t new_value)
+{
+	asm volatile("std%U0%X0 %[new_value],%[cnt]" : [cnt] "=m"(v->cnt) : [new_value] "r"(new_value));
+}
+
+/**
+ * Atomically add a 64-bit value to a counter.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @param inc
+ *   The value to be added to the counter.
+ */
+static inline void
+rte_atomic64_add(rte_atomic64_t *v, int64_t inc)
+{
+	long t;
+
+	asm volatile(
+			"1: ldarx %[t],0,%[cnt]\n"
+			"add %[t],%[inc],%[t]\n"
+			"stdcx. %[t],0,%[cnt]\n"
+			"bne- 1b\n"
+			: [t] "=&r" (t), "=m" (v->cnt)
+			: [cnt] "r" (&v->cnt), [inc] "r" (inc), "m" (v->cnt)
+			: "cc", "memory");
+}
+
+/**
+ * Atomically subtract a 64-bit value from a counter.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @param dec
+ *   The value to be subtracted from the counter.
+ */
+static inline void
+rte_atomic64_sub(rte_atomic64_t *v, int64_t dec)
+{
+	long t;
+
+	asm volatile(
+			"1: ldarx %[t],0,%[cnt]\n"
+			"subf %[t],%[dec],%[t]\n"
+			"stdcx. %[t],0,%[cnt]\n"
+			"bne- 1b\n"
+			: [t] "=&r" (t), "+m" (v->cnt)
+			: [cnt] "r" (&v->cnt), [dec] "r" (dec), "m" (v->cnt)
+			: "cc", "memory");
+}
+
+/**
+ * Atomically increment a 64-bit counter by one and test.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic64_inc(rte_atomic64_t *v)
+{
+	long t;
+
+	asm volatile(
+			"1: ldarx %[t],0,%[cnt]\n"
+			"addic %[t],%[t],1\n"
+			"stdcx. %[t],0,%[cnt] \n"
+			"bne- 1b\n"
+			: [t] "=&r" (t), "+m" (v->cnt)
+			: [cnt] "r" (&v->cnt), "m" (v->cnt)
+			: "cc", "xer", "memory");
+}
+
+/**
+ * Atomically decrement a 64-bit counter by one and test.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic64_dec(rte_atomic64_t *v)
+{
+	long t;
+
+	asm volatile(
+			"1: ldarx %[t],0,%[cnt]\n"
+			"addic %[t],%[t],-1\n"
+			"stdcx. %[t],0,%[cnt]\n"
+			"bne- 1b\n"
+			: [t] "=&r" (t), "+m" (v->cnt)
+			: [cnt] "r" (&v->cnt), "m" (v->cnt)
+			: "cc", "xer", "memory");
+}
+
+/**
+ * Add a 64-bit value to an atomic counter and return the result.
+ *
+ * Atomically adds the 64-bit value (inc) to the atomic counter (v) and
+ * returns the value of v after the addition.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @param inc
+ *   The value to be added to the counter.
+ * @return
+ *   The value of v after the addition.
+ */
+static inline int64_t
+rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc)
+{
+	long ret;
+
+	asm volatile(
+			"\n\tlwsync\n"
+			"1: ldarx %[ret],0,%[cnt]\n"
+			"add %[ret],%[inc],%[ret]\n"
+			"stdcx. %[ret],0,%[cnt]\n"
+			"bne- 1b\n"
+			"isync\n"
+			: [ret] "=&r" (ret)
+			: [inc] "r" (inc), [cnt] "r" (&v->cnt)
+			: "cc", "memory");
+
+	return ret;
+}
+
+/**
+ * Subtract a 64-bit value from an atomic counter and return the result.
+ *
+ * Atomically subtracts the 64-bit value (dec) from the atomic counter (v)
+ * and returns the value of v after the subtraction.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @param dec
+ *   The value to be subtracted from the counter.
+ * @return
+ *   The value of v after the subtraction.
+ */
+static inline int64_t
+rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec)
+{
+	long ret;
+
+	asm volatile(
+			"\n\tlwsync\n"
+			"1: ldarx %[ret],0,%[cnt]\n"
+			"subf %[ret],%[dec],%[ret]\n"
+			"stdcx. %[ret],0,%[cnt] \n"
+			"bne- 1b\n"
+			"isync\n"
+			: [ret] "=&r" (ret)
+			: [dec] "r" (dec), [cnt] "r" (&v->cnt)
+			: "cc", "memory");
+
+	return ret;
+}
+
+static __inline__ long rte_atomic64_inc_return(rte_atomic64_t *v)
+{
+	long ret;
+
+	asm volatile(
+			"\n\tlwsync\n"
+			"1: ldarx %[ret],0,%[cnt]\n"
+			"addic %[ret],%[ret],1\n"
+			"stdcx. %[ret],0,%[cnt]\n"
+			"bne- 1b\n"
+			"isync\n"
+			: [ret] "=&r" (ret)
+			: [cnt] "r" (&v->cnt)
+			: "cc", "xer", "memory");
+
+	return ret;
+}
+/**
+ * Atomically increment a 64-bit counter by one and test.
+ *
+ * Atomically increments the atomic counter (v) by one and returns
+ * true if the result is 0, or false in all other cases.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @return
+ *   True if the result after the addition is 0; false otherwise.
+ */
+#define rte_atomic64_inc_and_test(v) (rte_atomic64_inc_return(v) == 0)
+
+static __inline__ long rte_atomic64_dec_return(rte_atomic64_t *v)
+{
+	long ret;
+
+	asm volatile(
+			"\n\tlwsync\n"
+			"1: ldarx %[ret],0,%[cnt]\n"
+			"addic %[ret],%[ret],-1\n"
+			"stdcx. %[ret],0,%[cnt]\n"
+			"bne- 1b\n"
+			"isync\n"
+			: [ret] "=&r" (ret)
+			: [cnt] "r" (&v->cnt)
+			: "cc", "xer", "memory");
+
+	return ret;
+}
+/**
+ * Atomically decrement a 64-bit counter by one and test.
+ *
+ * Atomically decrements the atomic counter (v) by one and returns true if
+ * the result is 0, or false in all other cases.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @return
+ *   True if the result after subtraction is 0; false otherwise.
+ */
+#define rte_atomic64_dec_and_test(v)    (rte_atomic64_dec_return((v)) == 0)
+
+/**
+ * Atomically test and set a 64-bit atomic counter.
+ *
+ * If the counter value is already set, return 0 (failed). Otherwise, set
+ * the counter value to 1 and return 1 (success).
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @return
+ *   0 if failed; else 1, success.
+ */
+static inline int rte_atomic64_test_and_set(rte_atomic64_t *v)
+{
+	return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1);
+}
+
+/**
+ * Atomically set a 64-bit counter to 0.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ */
+static inline void rte_atomic64_clear(rte_atomic64_t *v)
+{
+	v->cnt = 0;
+}
+
+#endif /* _RTE_POWERPC_64_ATOMIC_H_ */
+
diff --git a/lib/librte_eal/common/include/powerpc/arch/rte_atomic_arch.h b/lib/librte_eal/common/include/powerpc/arch/rte_atomic_arch.h
new file mode 100644
index 0000000..fe5666e
--- /dev/null
+++ b/lib/librte_eal/common/include/powerpc/arch/rte_atomic_arch.h
@@ -0,0 +1,318 @@ 
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IBM Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*
+ * Inspired from FreeBSD src/sys/powerpc/include/atomic.h
+ * Copyright (c) 2008 Marcel Moolenaar
+ * Copyright (c) 2001 Benno Rice
+ * Copyright (c) 2001 David E. O'Brien
+ * Copyright (c) 1998 Doug Rabson
+ * All rights reserved.
+ */
+
+#ifndef _RTE_ATOMIC_H_
+#error "don't include this file directly, please include generic <rte_atomic.h>"
+#endif
+
+#ifndef _RTE_ATOMIC_ARCH_H_
+#define _RTE_ATOMIC_ARCH_H_
+
+#include <stdint.h>
+
+/**
+ * General memory barrier.
+ *
+ * Guarantees that the LOAD and STORE operations generated before the
+ * barrier occur before the LOAD and STORE operations generated after.
+ */
+#define	rte_arch_mb()  asm volatile("sync" : : : "memory")
+
+/**
+ * Write memory barrier.
+ *
+ * Guarantees that the STORE operations generated before the barrier
+ * occur before the STORE operations generated after.
+ */
+#define	rte_arch_wmb() asm volatile("sync" : : : "memory")
+
+/**
+ * Read memory barrier.
+ *
+ * Guarantees that the LOAD operations generated before the barrier
+ * occur before the LOAD operations generated after.
+ */
+#define	rte_arch_rmb() asm volatile("sync" : : : "memory")
+
+#define	rte_arch_compiler_barrier() do {		\
+	asm volatile ("" : : : "memory");	\
+} while(0)
+
+/*------------------------- 16 bit atomic operations -------------------------*/
+
+/**
+ * The atomic counter structure.
+ */
+typedef struct {
+	volatile int16_t cnt; /**< An internal counter value. */
+} rte_atomic16_t;
+
+/**
+ * Atomic compare and set.
+ *
+ * (atomic) equivalent to:
+ *   if (*dst == exp)
+ *     *dst = src (all 16-bit words)
+ *
+ * @param dst
+ *   The destination location into which the value will be written.
+ * @param exp
+ *   The expected value.
+ * @param src
+ *   The new value.
+ * @return
+ *   Non-zero on success; 0 on failure.
+ */
+static inline int
+rte_arch_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src)
+{
+	return __atomic_compare_exchange(dst, &exp, &src, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE) ? 1 : 0;
+}
+
+/**
+ * Atomically increment a counter by one.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ */
+static inline void
+rte_arch_atomic16_inc(rte_atomic16_t *v)
+{
+	__atomic_add_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE);
+}
+
+/**
+ * Atomically decrement a counter by one.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ */
+static inline void
+rte_arch_atomic16_dec(rte_atomic16_t *v)
+{
+	__atomic_sub_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE);
+}
+
+/**
+ * Atomically increment a 16-bit counter by one and test.
+ *
+ * Atomically increments the atomic counter (v) by one and returns true if
+ * the result is 0, or false in all other cases.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @return
+ *   True if the result after the increment operation is 0; false otherwise.
+ */
+static inline int rte_arch_atomic16_inc_and_test(rte_atomic16_t *v)
+{
+	return (__atomic_add_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) == 0);
+}
+
+/**
+ * Atomically decrement a 16-bit counter by one and test.
+ *
+ * Atomically decrements the atomic counter (v) by one and returns true if
+ * the result is 0, or false in all other cases.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @return
+ *   True if the result after the decrement operation is 0; false otherwise.
+ */
+static inline int rte_arch_atomic16_dec_and_test(rte_atomic16_t *v)
+{
+	return (__atomic_sub_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) == 0);
+}
+
+/*------------------------- 32 bit atomic operations -------------------------*/
+
+/**
+ * The atomic counter structure.
+ */
+typedef struct {
+	volatile int32_t cnt; /**< An internal counter value. */
+} rte_atomic32_t;
+
+/**
+ * Atomic compare and set.
+ *
+ * (atomic) equivalent to:
+ *   if (*dst == exp)
+ *     *dst = src (all 32-bit words)
+ *
+ * @param dst
+ *   The destination location into which the value will be written.
+ * @param exp
+ *   The expected value.
+ * @param src
+ *   The new value.
+ * @return
+ *   Non-zero on success; 0 on failure.
+ */
+static inline int
+rte_arch_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
+{
+	unsigned int ret = 0;
+
+	asm volatile(
+			"\tlwsync\n"
+			"1:\tlwarx %[ret], 0, %[dst]\n"
+			"cmplw %[exp], %[ret]\n"
+			"bne 2f\n"
+			"stwcx. %[src], 0, %[dst]\n"
+			"bne- 1b\n"
+			"li %[ret], 1\n"
+			"b 3f\n"
+			"2:\n"
+			"stwcx. %[ret], 0, %[dst]\n"   
+			"li %[ret], 0\n"
+			"3:\n"
+			"isync\n"
+			: [ret] "=&r" (ret), "=m" (*dst)
+			: [dst] "r" (dst), [exp] "r" (exp), [src] "r" (src), "m" (*dst)
+			: "cc", "memory");
+
+	return ret;
+}
+
+/**
+ * Atomically increment a counter by one.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ */
+static inline void
+rte_arch_atomic32_inc(rte_atomic32_t *v)
+{
+	int t;
+
+	asm volatile(
+			"1: lwarx %[t],0,%[cnt]\n"
+			"addic %[t],%[t],1\n"
+			"stwcx. %[t],0,%[cnt]\n"
+			"bne- 1b\n"
+			: [t] "=&r" (t), "=m" (v->cnt)
+			: [cnt] "r" (&v->cnt), "m" (v->cnt)
+			: "cc", "xer", "memory");
+}
+
+/**
+ * Atomically decrement a counter by one.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ */
+static inline void
+rte_arch_atomic32_dec(rte_atomic32_t *v)
+{
+	int t;
+
+	asm volatile(
+			"1: lwarx %[t],0,%[cnt]\n"
+			"addic %[t],%[t],-1\n"
+			"stwcx. %[t],0,%[cnt]\n"
+			"bne- 1b\n"
+			: [t] "=&r" (t), "=m" (v->cnt)
+			: [cnt] "r" (&v->cnt), "m" (v->cnt)
+			: "cc", "xer", "memory");
+}
+
+/**
+ * Atomically increment a 32-bit counter by one and test.
+ *
+ * Atomically increments the atomic counter (v) by one and returns true if
+ * the result is 0, or false in all other cases.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @return
+ *   True if the result after the increment operation is 0; false otherwise.
+ */
+static inline int rte_arch_atomic32_inc_and_test(rte_atomic32_t *v)
+{
+	int ret;
+
+	asm volatile(
+			"\n\tlwsync\n"
+			"1: lwarx %[ret],0,%[cnt]\n"
+			"addic	%[ret],%[ret],1\n"
+			"stwcx. %[ret],0,%[cnt]\n"
+			"bne- 1b\n"
+			"isync\n"
+			: [ret] "=&r" (ret)
+			: [cnt] "r" (&v->cnt)
+			: "cc", "xer", "memory");
+
+	return (ret == 0);
+}
+
+/**
+ * Atomically decrement a 32-bit counter by one and test.
+ *
+ * Atomically decrements the atomic counter (v) by one and returns true if
+ * the result is 0, or false in all other cases.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @return
+ *   True if the result after the decrement operation is 0; false otherwise.
+ */
+static inline int rte_arch_atomic32_dec_and_test(rte_atomic32_t *v)
+{
+	int ret;
+
+	asm volatile(
+			"\n\tlwsync\n"
+			"1: lwarx %[ret],0,%[cnt]\n"
+			"addic %[ret],%[ret],-1\n"
+			"stwcx. %[ret],0,%[cnt]\n"
+			"bne- 1b\n"
+			"isync\n"
+			: [ret] "=&r" (ret)
+			: [cnt] "r" (&v->cnt)
+			: "cc", "xer", "memory");
+
+	return (ret == 0);
+}
+
+#endif /* _RTE_ATOMIC_ARCH_H_ */
+