Add support for IBM Z s390x

Message ID 20221028215240.103365-1-dmiller423@gmail.com (mailing list archive)
State Changes Requested, archived
Delegated to: Thomas Monjalon
Headers
Series Add support for IBM Z s390x |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation warning apply issues
ci/iol-testing warning apply patch failure

Commit Message

David Miller Oct. 28, 2022, 9:52 p.m. UTC
  Signed-off-by: David Miller <dmiller423@gmail.com>
Reviewed-by: Mathew S Thoennes <tardis@us.ibm.com>
---
 app/test-acl/main.c                          |   4 +
 app/test-pmd/config.c                        |  12 +-
 app/test/test_acl.c                          |   1 +
 app/test/test_atomic.c                       |   7 +-
 app/test/test_cmdline.c                      |   6 +-
 app/test/test_cmdline_ipaddr.c               |  11 +
 app/test/test_cmdline_num.c                  | 110 ++++
 app/test/test_hash_functions.c               |  29 +
 app/test/test_xmmt_ops.h                     |  14 +
 buildtools/pmdinfogen.py                     |  11 +-
 config/meson.build                           |   2 +
 config/s390x/meson.build                     |  51 ++
 config/s390x/s390x_linux_clang_ubuntu        |  19 +
 doc/guides/nics/features/i40e.ini            |   1 +
 drivers/common/mlx5/mlx5_common.h            |   9 +
 drivers/net/i40e/i40e_rxtx_vec_s390x.c       | 630 +++++++++++++++++++
 drivers/net/i40e/meson.build                 |   2 +
 drivers/net/ixgbe/ixgbe_rxtx.c               |   2 +-
 drivers/net/memif/rte_eth_memif.h            |   2 +
 drivers/net/mlx5/mlx5_rx.c                   |  22 +-
 drivers/net/octeontx/base/octeontx_pki_var.h |   6 +
 examples/l3fwd-acl/main.c                    |   4 +
 examples/l3fwd/l3fwd_em.c                    |   8 +
 examples/l3fwd/l3fwd_lpm_s390x.h             | 137 ++++
 examples/l3fwd/l3fwd_s390x.h                 | 259 ++++++++
 lib/acl/acl_bld.c                            |   3 +
 lib/acl/acl_gen.c                            |   9 +
 lib/acl/acl_run_scalar.c                     |   8 +
 lib/acl/rte_acl.c                            |  27 +
 lib/acl/rte_acl.h                            |   5 +-
 lib/eal/s390x/include/meson.build            |  16 +
 lib/eal/s390x/include/rte_atomic.h           |  47 ++
 lib/eal/s390x/include/rte_byteorder.h        |  43 ++
 lib/eal/s390x/include/rte_cpuflags.h         |  42 ++
 lib/eal/s390x/include/rte_cycles.h           |  44 ++
 lib/eal/s390x/include/rte_io.h               | 184 ++++++
 lib/eal/s390x/include/rte_mcslock.h          |  18 +
 lib/eal/s390x/include/rte_memcpy.h           |  55 ++
 lib/eal/s390x/include/rte_pause.h            |  22 +
 lib/eal/s390x/include/rte_power_intrinsics.h |  20 +
 lib/eal/s390x/include/rte_prefetch.h         |  46 ++
 lib/eal/s390x/include/rte_rwlock.h           |  42 ++
 lib/eal/s390x/include/rte_spinlock.h         |  85 +++
 lib/eal/s390x/include/rte_ticketlock.h       |  18 +
 lib/eal/s390x/include/rte_vect.h             |  35 ++
 lib/eal/s390x/meson.build                    |  16 +
 lib/eal/s390x/rte_cpuflags.c                 |  91 +++
 lib/eal/s390x/rte_cycles.c                   |  11 +
 lib/eal/s390x/rte_hypervisor.c               |  11 +
 lib/eal/s390x/rte_power_intrinsics.c         |  51 ++
 lib/hash/rte_fbk_hash.h                      |   7 +
 lib/lpm/meson.build                          |   1 +
 lib/lpm/rte_lpm.h                            |   2 +
 lib/lpm/rte_lpm6.c                           |  18 +
 lib/lpm/rte_lpm_s390x.h                      | 130 ++++
 meson.build                                  |   2 +
 56 files changed, 2450 insertions(+), 18 deletions(-)
 create mode 100644 config/s390x/meson.build
 create mode 100644 config/s390x/s390x_linux_clang_ubuntu
 create mode 100644 drivers/net/i40e/i40e_rxtx_vec_s390x.c
 create mode 100644 examples/l3fwd/l3fwd_lpm_s390x.h
 create mode 100644 examples/l3fwd/l3fwd_s390x.h
 create mode 100644 lib/eal/s390x/include/meson.build
 create mode 100644 lib/eal/s390x/include/rte_atomic.h
 create mode 100644 lib/eal/s390x/include/rte_byteorder.h
 create mode 100644 lib/eal/s390x/include/rte_cpuflags.h
 create mode 100644 lib/eal/s390x/include/rte_cycles.h
 create mode 100644 lib/eal/s390x/include/rte_io.h
 create mode 100644 lib/eal/s390x/include/rte_mcslock.h
 create mode 100644 lib/eal/s390x/include/rte_memcpy.h
 create mode 100644 lib/eal/s390x/include/rte_pause.h
 create mode 100644 lib/eal/s390x/include/rte_power_intrinsics.h
 create mode 100644 lib/eal/s390x/include/rte_prefetch.h
 create mode 100644 lib/eal/s390x/include/rte_rwlock.h
 create mode 100644 lib/eal/s390x/include/rte_spinlock.h
 create mode 100644 lib/eal/s390x/include/rte_ticketlock.h
 create mode 100644 lib/eal/s390x/include/rte_vect.h
 create mode 100644 lib/eal/s390x/meson.build
 create mode 100644 lib/eal/s390x/rte_cpuflags.c
 create mode 100644 lib/eal/s390x/rte_cycles.c
 create mode 100644 lib/eal/s390x/rte_hypervisor.c
 create mode 100644 lib/eal/s390x/rte_power_intrinsics.c
 create mode 100644 lib/lpm/rte_lpm_s390x.h
  

Comments

Stephen Hemminger Oct. 28, 2022, 10:45 p.m. UTC | #1
On Fri, 28 Oct 2022 17:52:40 -0400
David Miller <dmiller423@gmail.com> wrote:

> diff --git a/app/test/test_cmdline.c b/app/test/test_cmdline.c
> index 115bee966d..e0720ff345 100644
> --- a/app/test/test_cmdline.c
> +++ b/app/test/test_cmdline.c
> @@ -10,21 +10,21 @@
>  static int
>  test_cmdline(void)
>  {
> -	printf("Testind parsing ethernet addresses...\n");
> +	printf("Testing parsing ethernet addresses...\n");
>  	if (test_parse_etheraddr_valid() < 0)
>  		return -1;
>  	if (test_parse_etheraddr_invalid_data() < 0)
>  		return -1;
>  	if (test_parse_etheraddr_invalid_param() < 0)
>  		return -1;
> -	printf("Testind parsing port lists...\n");
> +	printf("Testing parsing port lists...\n");
>  	if (test_parse_portlist_valid() < 0)
>  		return -1;
>  	if (test_parse_portlist_invalid_data() < 0)
>  		return -1;
>  	if (test_parse_portlist_invalid_param() < 0)
>  		return -1;
> -	printf("Testind parsing numbers...\n");
> +	printf("Testing parsing numbers...\n");
>  	if (test_parse_num_valid() < 0)
>  		return -1;
>  	if (test_parse_num_invalid_data() < 0)

This spelling fix should be its own patch, not related to s390x
  
Stephen Hemminger Oct. 28, 2022, 10:51 p.m. UTC | #2
On Fri, 28 Oct 2022 17:52:40 -0400
David Miller <dmiller423@gmail.com> wrote:

> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
> +            switch (type) {
> +                case RTE_UINT8:
> +                {
> +                    uint8_t *temp = (uint8_t *)&result;
> +                    result = *temp;
> +                    break;
> +                }
> +                case RTE_UINT16:
> +                {
> +                    uint16_t *temp = (uint16_t *)&result;
> +                    result = *temp;
> +                    break;
> +                }
> +                case RTE_UINT32:
> +                {
> +                    uint32_t *temp = (uint32_t *)&result;
> +                    result = *temp;
> +                    break;
> +                }
> +                case RTE_INT8:
> +                {
> +                    int8_t *temp = (int8_t *)&result;
> +                    result = *temp;
> +                    break;
> +                }
> +                case RTE_INT16:
> +                {
> +                    int16_t *temp = (int16_t *)&result;
> +                    result = *temp;
> +                    break;
> +                }
> +                case RTE_INT32:
> +                {
> +                    int32_t *temp = (int32_t *)&result;
> +                    result = *temp;
> +                    break;
> +                }
> +                default:
> +                    break;
> +            }
> +#endif

You are indenting with 4 spaces.
The DPDK uses tab indentation like the Linux kernel.
Surprised that checkpatch isn't catching this.
  
David Miller Oct. 28, 2022, 10:54 p.m. UTC | #3
Will move to a new patch,  and likely I opened in IDE again and it's
set up for the previous OSS project.
I got an email full of complaints from automated checkpatch,  will
resubmit again early next week.

Best Regards
  - David Miller

On Fri, Oct 28, 2022 at 6:45 PM Stephen Hemminger
<stephen@networkplumber.org> wrote:
>
> On Fri, 28 Oct 2022 17:52:40 -0400
> David Miller <dmiller423@gmail.com> wrote:
>
> > diff --git a/app/test/test_cmdline.c b/app/test/test_cmdline.c
> > index 115bee966d..e0720ff345 100644
> > --- a/app/test/test_cmdline.c
> > +++ b/app/test/test_cmdline.c
> > @@ -10,21 +10,21 @@
> >  static int
> >  test_cmdline(void)
> >  {
> > -     printf("Testind parsing ethernet addresses...\n");
> > +     printf("Testing parsing ethernet addresses...\n");
> >       if (test_parse_etheraddr_valid() < 0)
> >               return -1;
> >       if (test_parse_etheraddr_invalid_data() < 0)
> >               return -1;
> >       if (test_parse_etheraddr_invalid_param() < 0)
> >               return -1;
> > -     printf("Testind parsing port lists...\n");
> > +     printf("Testing parsing port lists...\n");
> >       if (test_parse_portlist_valid() < 0)
> >               return -1;
> >       if (test_parse_portlist_invalid_data() < 0)
> >               return -1;
> >       if (test_parse_portlist_invalid_param() < 0)
> >               return -1;
> > -     printf("Testind parsing numbers...\n");
> > +     printf("Testing parsing numbers...\n");
> >       if (test_parse_num_valid() < 0)
> >               return -1;
> >       if (test_parse_num_invalid_data() < 0)
>
> This spelling fix should be its own patch, not related to s390x
  
Stephen Hemminger July 6, 2023, 10:47 p.m. UTC | #4
On Fri, 28 Oct 2022 17:52:40 -0400
David Miller <dmiller423@gmail.com> wrote:

> diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
> index cc8e7aa138..2a863f3d39 100644
> --- a/app/test-pmd/config.c
> +++ b/app/test-pmd/config.c
> @@ -245,9 +245,9 @@ nic_stats_display(portid_t port_id)
>  	static uint64_t prev_bytes_tx[RTE_MAX_ETHPORTS];
>  	static uint64_t prev_ns[RTE_MAX_ETHPORTS];
>  	struct timespec cur_time;
> -	uint64_t diff_pkts_rx, diff_pkts_tx, diff_bytes_rx, diff_bytes_tx,
> -								diff_ns;
> -	uint64_t mpps_rx, mpps_tx, mbps_rx, mbps_tx;
> +    __uint128_t diff_pkts_rx, diff_pkts_tx, diff_bytes_rx, diff_bytes_tx,
> +            diff_ns;
> +    __uint128_t mpps_rx, mpps_tx, mbps_rx, mbps_tx

Not all platforms DPDK supports are likely to handle 128 bit numbers.
And unsigned long long is not 128 bit on many platforms.

Better to leave this part alone.
  
Stephen Hemminger July 6, 2023, 10:49 p.m. UTC | #5
On Fri, 28 Oct 2022 17:52:40 -0400
David Miller <dmiller423@gmail.com> wrote:

> Signed-off-by: David Miller <dmiller423@gmail.com>
> Reviewed-by: Mathew S Thoennes <tardis@us.ibm.com>
> ---
>  app/test-acl/main.c                          |   4 +
>  app/test-pmd/config.c                        |  12 +-
>  app/test/test_acl.c                          |   1 +
>  app/test/test_atomic.c                       |   7 +-
>  app/test/test_cmdline.c                      |   6 +-
>  app/test/test_cmdline_ipaddr.c               |  11 +
>  app/test/test_cmdline_num.c                  | 110 ++++
>  app/test/test_hash_functions.c               |  29 +
>  app/test/test_xmmt_ops.h                     |  14 +
>  buildtools/pmdinfogen.py                     |  11 +-
>  config/meson.build                           |   2 +
>  config/s390x/meson.build                     |  51 ++
>  config/s390x/s390x_linux_clang_ubuntu        |  19 +
>  doc/guides/nics/features/i40e.ini            |   1 +
>  drivers/common/mlx5/mlx5_common.h            |   9 +
>  drivers/net/i40e/i40e_rxtx_vec_s390x.c       | 630 +++++++++++++++++++
>  drivers/net/i40e/meson.build                 |   2 +
>  drivers/net/ixgbe/ixgbe_rxtx.c               |   2 +-
>  drivers/net/memif/rte_eth_memif.h            |   2 +
>  drivers/net/mlx5/mlx5_rx.c                   |  22 +-
>  drivers/net/octeontx/base/octeontx_pki_var.h |   6 +
>  examples/l3fwd-acl/main.c                    |   4 +
>  examples/l3fwd/l3fwd_em.c                    |   8 +
>  examples/l3fwd/l3fwd_lpm_s390x.h             | 137 ++++
>  examples/l3fwd/l3fwd_s390x.h                 | 259 ++++++++
>  lib/acl/acl_bld.c                            |   3 +
>  lib/acl/acl_gen.c                            |   9 +
>  lib/acl/acl_run_scalar.c                     |   8 +
>  lib/acl/rte_acl.c                            |  27 +
>  lib/acl/rte_acl.h                            |   5 +-
>  lib/eal/s390x/include/meson.build            |  16 +
>  lib/eal/s390x/include/rte_atomic.h           |  47 ++
>  lib/eal/s390x/include/rte_byteorder.h        |  43 ++
>  lib/eal/s390x/include/rte_cpuflags.h         |  42 ++
>  lib/eal/s390x/include/rte_cycles.h           |  44 ++
>  lib/eal/s390x/include/rte_io.h               | 184 ++++++
>  lib/eal/s390x/include/rte_mcslock.h          |  18 +
>  lib/eal/s390x/include/rte_memcpy.h           |  55 ++
>  lib/eal/s390x/include/rte_pause.h            |  22 +
>  lib/eal/s390x/include/rte_power_intrinsics.h |  20 +
>  lib/eal/s390x/include/rte_prefetch.h         |  46 ++
>  lib/eal/s390x/include/rte_rwlock.h           |  42 ++
>  lib/eal/s390x/include/rte_spinlock.h         |  85 +++
>  lib/eal/s390x/include/rte_ticketlock.h       |  18 +
>  lib/eal/s390x/include/rte_vect.h             |  35 ++
>  lib/eal/s390x/meson.build                    |  16 +
>  lib/eal/s390x/rte_cpuflags.c                 |  91 +++
>  lib/eal/s390x/rte_cycles.c                   |  11 +
>  lib/eal/s390x/rte_hypervisor.c               |  11 +
>  lib/eal/s390x/rte_power_intrinsics.c         |  51 ++
>  lib/hash/rte_fbk_hash.h                      |   7 +
>  lib/lpm/meson.build                          |   1 +
>  lib/lpm/rte_lpm.h                            |   2 +
>  lib/lpm/rte_lpm6.c                           |  18 +
>  lib/lpm/rte_lpm_s390x.h                      | 130 ++++
>  meson.build                                  |   2 +
>  56 files changed, 2450 insertions(+), 18 deletions(-)
>  create mode 100644 config/s390x/meson.build
>  create mode 100644 config/s390x/s390x_linux_clang_ubuntu
>  create mode 100644 drivers/net/i40e/i40e_rxtx_vec_s390x.c
>  create mode 100644 examples/l3fwd/l3fwd_lpm_s390x.h
>  create mode 100644 examples/l3fwd/l3fwd_s390x.h
>  create mode 100644 lib/eal/s390x/include/meson.build
>  create mode 100644 lib/eal/s390x/include/rte_atomic.h
>  create mode 100644 lib/eal/s390x/include/rte_byteorder.h
>  create mode 100644 lib/eal/s390x/include/rte_cpuflags.h
>  create mode 100644 lib/eal/s390x/include/rte_cycles.h
>  create mode 100644 lib/eal/s390x/include/rte_io.h
>  create mode 100644 lib/eal/s390x/include/rte_mcslock.h
>  create mode 100644 lib/eal/s390x/include/rte_memcpy.h
>  create mode 100644 lib/eal/s390x/include/rte_pause.h
>  create mode 100644 lib/eal/s390x/include/rte_power_intrinsics.h
>  create mode 100644 lib/eal/s390x/include/rte_prefetch.h
>  create mode 100644 lib/eal/s390x/include/rte_rwlock.h
>  create mode 100644 lib/eal/s390x/include/rte_spinlock.h
>  create mode 100644 lib/eal/s390x/include/rte_ticketlock.h
>  create mode 100644 lib/eal/s390x/include/rte_vect.h
>  create mode 100644 lib/eal/s390x/meson.build
>  create mode 100644 lib/eal/s390x/rte_cpuflags.c
>  create mode 100644 lib/eal/s390x/rte_cycles.c
>  create mode 100644 lib/eal/s390x/rte_hypervisor.c
>  create mode 100644 lib/eal/s390x/rte_power_intrinsics.c
>  create mode 100644 lib/lpm/rte_lpm_s390x.h

Quite a large patch, and never saw V2 of it.
Also, who is maintainer of s390 port?
  
David Miller July 7, 2023, 12:04 a.m. UTC | #6
I updated the original patches to newer versions of DPDK (up to 22.xx).
Mathew S. Thoennes will be the long term maintainer however.

The uint128 was part of the original patch sent to me,  I'm happy to remove it.

Best Regards
  - David Miller

On Thu, Jul 6, 2023 at 5:49 PM Stephen Hemminger
<stephen@networkplumber.org> wrote:
>
> On Fri, 28 Oct 2022 17:52:40 -0400
> David Miller <dmiller423@gmail.com> wrote:
>
> > Signed-off-by: David Miller <dmiller423@gmail.com>
> > Reviewed-by: Mathew S Thoennes <tardis@us.ibm.com>
> > ---
> >  app/test-acl/main.c                          |   4 +
> >  app/test-pmd/config.c                        |  12 +-
> >  app/test/test_acl.c                          |   1 +
> >  app/test/test_atomic.c                       |   7 +-
> >  app/test/test_cmdline.c                      |   6 +-
> >  app/test/test_cmdline_ipaddr.c               |  11 +
> >  app/test/test_cmdline_num.c                  | 110 ++++
> >  app/test/test_hash_functions.c               |  29 +
> >  app/test/test_xmmt_ops.h                     |  14 +
> >  buildtools/pmdinfogen.py                     |  11 +-
> >  config/meson.build                           |   2 +
> >  config/s390x/meson.build                     |  51 ++
> >  config/s390x/s390x_linux_clang_ubuntu        |  19 +
> >  doc/guides/nics/features/i40e.ini            |   1 +
> >  drivers/common/mlx5/mlx5_common.h            |   9 +
> >  drivers/net/i40e/i40e_rxtx_vec_s390x.c       | 630 +++++++++++++++++++
> >  drivers/net/i40e/meson.build                 |   2 +
> >  drivers/net/ixgbe/ixgbe_rxtx.c               |   2 +-
> >  drivers/net/memif/rte_eth_memif.h            |   2 +
> >  drivers/net/mlx5/mlx5_rx.c                   |  22 +-
> >  drivers/net/octeontx/base/octeontx_pki_var.h |   6 +
> >  examples/l3fwd-acl/main.c                    |   4 +
> >  examples/l3fwd/l3fwd_em.c                    |   8 +
> >  examples/l3fwd/l3fwd_lpm_s390x.h             | 137 ++++
> >  examples/l3fwd/l3fwd_s390x.h                 | 259 ++++++++
> >  lib/acl/acl_bld.c                            |   3 +
> >  lib/acl/acl_gen.c                            |   9 +
> >  lib/acl/acl_run_scalar.c                     |   8 +
> >  lib/acl/rte_acl.c                            |  27 +
> >  lib/acl/rte_acl.h                            |   5 +-
> >  lib/eal/s390x/include/meson.build            |  16 +
> >  lib/eal/s390x/include/rte_atomic.h           |  47 ++
> >  lib/eal/s390x/include/rte_byteorder.h        |  43 ++
> >  lib/eal/s390x/include/rte_cpuflags.h         |  42 ++
> >  lib/eal/s390x/include/rte_cycles.h           |  44 ++
> >  lib/eal/s390x/include/rte_io.h               | 184 ++++++
> >  lib/eal/s390x/include/rte_mcslock.h          |  18 +
> >  lib/eal/s390x/include/rte_memcpy.h           |  55 ++
> >  lib/eal/s390x/include/rte_pause.h            |  22 +
> >  lib/eal/s390x/include/rte_power_intrinsics.h |  20 +
> >  lib/eal/s390x/include/rte_prefetch.h         |  46 ++
> >  lib/eal/s390x/include/rte_rwlock.h           |  42 ++
> >  lib/eal/s390x/include/rte_spinlock.h         |  85 +++
> >  lib/eal/s390x/include/rte_ticketlock.h       |  18 +
> >  lib/eal/s390x/include/rte_vect.h             |  35 ++
> >  lib/eal/s390x/meson.build                    |  16 +
> >  lib/eal/s390x/rte_cpuflags.c                 |  91 +++
> >  lib/eal/s390x/rte_cycles.c                   |  11 +
> >  lib/eal/s390x/rte_hypervisor.c               |  11 +
> >  lib/eal/s390x/rte_power_intrinsics.c         |  51 ++
> >  lib/hash/rte_fbk_hash.h                      |   7 +
> >  lib/lpm/meson.build                          |   1 +
> >  lib/lpm/rte_lpm.h                            |   2 +
> >  lib/lpm/rte_lpm6.c                           |  18 +
> >  lib/lpm/rte_lpm_s390x.h                      | 130 ++++
> >  meson.build                                  |   2 +
> >  56 files changed, 2450 insertions(+), 18 deletions(-)
> >  create mode 100644 config/s390x/meson.build
> >  create mode 100644 config/s390x/s390x_linux_clang_ubuntu
> >  create mode 100644 drivers/net/i40e/i40e_rxtx_vec_s390x.c
> >  create mode 100644 examples/l3fwd/l3fwd_lpm_s390x.h
> >  create mode 100644 examples/l3fwd/l3fwd_s390x.h
> >  create mode 100644 lib/eal/s390x/include/meson.build
> >  create mode 100644 lib/eal/s390x/include/rte_atomic.h
> >  create mode 100644 lib/eal/s390x/include/rte_byteorder.h
> >  create mode 100644 lib/eal/s390x/include/rte_cpuflags.h
> >  create mode 100644 lib/eal/s390x/include/rte_cycles.h
> >  create mode 100644 lib/eal/s390x/include/rte_io.h
> >  create mode 100644 lib/eal/s390x/include/rte_mcslock.h
> >  create mode 100644 lib/eal/s390x/include/rte_memcpy.h
> >  create mode 100644 lib/eal/s390x/include/rte_pause.h
> >  create mode 100644 lib/eal/s390x/include/rte_power_intrinsics.h
> >  create mode 100644 lib/eal/s390x/include/rte_prefetch.h
> >  create mode 100644 lib/eal/s390x/include/rte_rwlock.h
> >  create mode 100644 lib/eal/s390x/include/rte_spinlock.h
> >  create mode 100644 lib/eal/s390x/include/rte_ticketlock.h
> >  create mode 100644 lib/eal/s390x/include/rte_vect.h
> >  create mode 100644 lib/eal/s390x/meson.build
> >  create mode 100644 lib/eal/s390x/rte_cpuflags.c
> >  create mode 100644 lib/eal/s390x/rte_cycles.c
> >  create mode 100644 lib/eal/s390x/rte_hypervisor.c
> >  create mode 100644 lib/eal/s390x/rte_power_intrinsics.c
> >  create mode 100644 lib/lpm/rte_lpm_s390x.h
>
> Quite a large patch, and never saw V2 of it.
> Also, who is maintainer of s390 port?
  

Patch

diff --git a/app/test-acl/main.c b/app/test-acl/main.c
index 06e3847ab9..1f567c5359 100644
--- a/app/test-acl/main.c
+++ b/app/test-acl/main.c
@@ -83,6 +83,10 @@  static const struct acl_alg acl_alg[] = {
 		.name = "altivec",
 		.alg = RTE_ACL_CLASSIFY_ALTIVEC,
 	},
+	{
+		.name = "s390x",
+		.alg = RTE_ACL_CLASSIFY_S390X,
+	},
 	{
 		.name = "avx512x16",
 		.alg = RTE_ACL_CLASSIFY_AVX512X16,
diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index cc8e7aa138..2a863f3d39 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -245,9 +245,9 @@  nic_stats_display(portid_t port_id)
 	static uint64_t prev_bytes_tx[RTE_MAX_ETHPORTS];
 	static uint64_t prev_ns[RTE_MAX_ETHPORTS];
 	struct timespec cur_time;
-	uint64_t diff_pkts_rx, diff_pkts_tx, diff_bytes_rx, diff_bytes_tx,
-								diff_ns;
-	uint64_t mpps_rx, mpps_tx, mbps_rx, mbps_tx;
+    __uint128_t diff_pkts_rx, diff_pkts_tx, diff_bytes_rx, diff_bytes_tx,
+            diff_ns;
+    __uint128_t mpps_rx, mpps_tx, mbps_rx, mbps_tx;
 	struct rte_eth_stats stats;
 
 	static const char *nic_stats_border = "########################";
@@ -302,9 +302,9 @@  nic_stats_display(portid_t port_id)
 		(double)diff_bytes_tx / diff_ns * NS_PER_SEC : 0;
 
 	printf("\n  Throughput (since last show)\n");
-	printf("  Rx-pps: %12"PRIu64"          Rx-bps: %12"PRIu64"\n  Tx-pps: %12"
-	       PRIu64"          Tx-bps: %12"PRIu64"\n", mpps_rx, mbps_rx * 8,
-	       mpps_tx, mbps_tx * 8);
+    printf("  Rx-pps: %12llu          Rx-bps: %12llu \n  Tx-pps: %12llu          Tx-bps: %12llu \n",
+           (unsigned long long) mpps_rx, (unsigned long long) mbps_rx * 8,
+           (unsigned long long) mpps_tx, (unsigned long long) mbps_tx * 8);
 
 	if (xstats_display_num > 0)
 		nic_xstats_display_periodic(port_id);
diff --git a/app/test/test_acl.c b/app/test/test_acl.c
index 4d51098925..da16365294 100644
--- a/app/test/test_acl.c
+++ b/app/test/test_acl.c
@@ -351,6 +351,7 @@  test_classify_run(struct rte_acl_ctx *acx, struct ipv4_7tuple test_data[],
 		RTE_ACL_CLASSIFY_AVX2,
 		RTE_ACL_CLASSIFY_NEON,
 		RTE_ACL_CLASSIFY_ALTIVEC,
+        RTE_ACL_CLASSIFY_S390X,
 		RTE_ACL_CLASSIFY_AVX512X16,
 		RTE_ACL_CLASSIFY_AVX512X32,
 	};
diff --git a/app/test/test_atomic.c b/app/test/test_atomic.c
index e4b997827e..37ece78425 100644
--- a/app/test/test_atomic.c
+++ b/app/test/test_atomic.c
@@ -17,6 +17,7 @@ 
 #include <rte_lcore.h>
 #include <rte_random.h>
 #include <rte_hash_crc.h>
+#include <rte_byteorder.h>
 
 #include "test.h"
 
@@ -351,6 +352,7 @@  volatile uint16_t token16;
 volatile uint32_t token32;
 volatile uint64_t token64;
 
+#ifndef RTE_ARCH_S390X
 static void
 build_crc8_table(void)
 {
@@ -441,6 +443,8 @@  test_atomic_exchange(__rte_unused void *arg)
 
 	return 0;
 }
+#endif
+
 static int
 test_atomic(void)
 {
@@ -597,6 +601,7 @@  test_atomic(void)
 	}
 #endif
 
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 	/*
 	 * Test 16/32/64bit atomic exchange.
 	 */
@@ -628,7 +633,7 @@  test_atomic(void)
 		printf("Atomic exchange test failed\n");
 		return -1;
 	}
-
+#endif
 	return 0;
 }
 REGISTER_TEST_COMMAND(atomic_autotest, test_atomic);
diff --git a/app/test/test_cmdline.c b/app/test/test_cmdline.c
index 115bee966d..e0720ff345 100644
--- a/app/test/test_cmdline.c
+++ b/app/test/test_cmdline.c
@@ -10,21 +10,21 @@ 
 static int
 test_cmdline(void)
 {
-	printf("Testind parsing ethernet addresses...\n");
+	printf("Testing parsing ethernet addresses...\n");
 	if (test_parse_etheraddr_valid() < 0)
 		return -1;
 	if (test_parse_etheraddr_invalid_data() < 0)
 		return -1;
 	if (test_parse_etheraddr_invalid_param() < 0)
 		return -1;
-	printf("Testind parsing port lists...\n");
+	printf("Testing parsing port lists...\n");
 	if (test_parse_portlist_valid() < 0)
 		return -1;
 	if (test_parse_portlist_invalid_data() < 0)
 		return -1;
 	if (test_parse_portlist_invalid_param() < 0)
 		return -1;
-	printf("Testind parsing numbers...\n");
+	printf("Testing parsing numbers...\n");
 	if (test_parse_num_valid() < 0)
 		return -1;
 	if (test_parse_num_invalid_data() < 0)
diff --git a/app/test/test_cmdline_ipaddr.c b/app/test/test_cmdline_ipaddr.c
index f540063508..d950383e10 100644
--- a/app/test/test_cmdline_ipaddr.c
+++ b/app/test/test_cmdline_ipaddr.c
@@ -6,12 +6,14 @@ 
 #include <inttypes.h>
 
 #include <rte_string_fns.h>
+#include <rte_byteorder.h>
 
 #include <cmdline_parse.h>
 #include <cmdline_parse_ipaddr.h>
 
 #include "test_cmdline.h"
 
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 #define IP4(a,b,c,d) {.s_addr = (uint32_t)(((a) & 0xff) | \
 					   (((b) & 0xff) << 8) | \
 					   (((c) & 0xff) << 16)  | \
@@ -19,6 +21,15 @@ 
 
 #define U16_SWAP(x) \
 		(((x & 0xFF) << 8) | ((x & 0xFF00) >> 8))
+#else
+#define IP4(a,b,c,d) {((uint32_t)(((a) & 0xff) << 24) | \
+					   (((b) & 0xff) << 16) | \
+					   (((c) & 0xff) << 8)  | \
+					   ((d) & 0xff))}
+
+#define U16_SWAP(x) x
+
+#endif
 
 /* create IPv6 address, swapping bytes where needed */
 #ifndef s6_addr16
diff --git a/app/test/test_cmdline_num.c b/app/test/test_cmdline_num.c
index 9276de59bd..a710109707 100644
--- a/app/test/test_cmdline_num.c
+++ b/app/test/test_cmdline_num.c
@@ -10,6 +10,7 @@ 
 
 #include <cmdline_parse.h>
 #include <cmdline_parse_num.h>
+#include <rte_byteorder.h>
 
 #include "test_cmdline.h"
 
@@ -438,6 +439,48 @@  test_parse_num_valid(void)
 			/* check if result matches what it should have matched
 			 * since unsigned numbers don't care about number of bits, we can just convert
 			 * everything to uint64_t without any worries. */
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+            switch (type) {
+                case RTE_UINT8:
+                {
+                    uint8_t *temp = (uint8_t *)&result;
+                    result = *temp;
+                    break;
+                }
+                case RTE_UINT16:
+                {
+                    uint16_t *temp = (uint16_t *)&result;
+                    result = *temp;
+                    break;
+                }
+                case RTE_UINT32:
+                {
+                    uint32_t *temp = (uint32_t *)&result;
+                    result = *temp;
+                    break;
+                }
+                case RTE_INT8:
+                {
+                    int8_t *temp = (int8_t *)&result;
+                    result = *temp;
+                    break;
+                }
+                case RTE_INT16:
+                {
+                    int16_t *temp = (int16_t *)&result;
+                    result = *temp;
+                    break;
+                }
+                case RTE_INT32:
+                {
+                    int32_t *temp = (int32_t *)&result;
+                    result = *temp;
+                    break;
+                }
+                default:
+                    break;
+            }
+#endif
 			if (ret > 0 && num_valid_positive_strs[i].result != result) {
 				printf("Error: parsing %s as %s failed: result mismatch!\n",
 						num_valid_positive_strs[i].str, buf);
@@ -467,6 +510,7 @@  test_parse_num_valid(void)
 			 * the result is signed in this case, so we have to account for that */
 			if (ret > 0) {
 				/* detect negative */
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 				switch (type) {
 				case RTE_INT8:
 					result = (int8_t) result;
@@ -480,6 +524,30 @@  test_parse_num_valid(void)
 				default:
 					break;
 				}
+#else
+                switch (type) {
+				case RTE_INT8:
+				{
+					int8_t *temp = (int8_t *)&result;
+					result = *temp;
+					break;
+				}
+				case RTE_INT16:
+				{
+					int16_t *temp = (int16_t *)&result;
+					result = *temp;
+					break;
+				}
+				case RTE_INT32:
+				{
+					int32_t *temp = (int32_t *)&result;
+					result = *temp;
+					break;
+				}
+				default:
+					break;
+				}
+#endif
 				if (num_valid_negative_strs[i].result == (int64_t) result)
 					continue;
 				printf("Error: parsing %s as %s failed: result mismatch!\n",
@@ -516,6 +584,48 @@  test_parse_num_valid(void)
 			/* check if result matches what it should have matched
 			 * since unsigned numbers don't care about number of bits, we can just convert
 			 * everything to uint64_t without any worries. */
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+            switch (type) {
+                case RTE_UINT8:
+                {
+                    uint8_t *temp = (uint8_t *)&result;
+                    result = *temp;
+                    break;
+                }
+                case RTE_UINT16:
+                {
+                    uint16_t *temp = (uint16_t *)&result;
+                    result = *temp;
+                    break;
+                }
+                case RTE_UINT32:
+                {
+                    uint32_t *temp = (uint32_t *)&result;
+                    result = *temp;
+                    break;
+                }
+                case RTE_INT8:
+                {
+                    int8_t *temp = (int8_t *)&result;
+                    result = *temp;
+                    break;
+                }
+                case RTE_INT16:
+                {
+                    int16_t *temp = (int16_t *)&result;
+                    result = *temp;
+                    break;
+                }
+                case RTE_INT32:
+                {
+                    int32_t *temp = (int32_t *)&result;
+                    result = *temp;
+                    break;
+                }
+                default:
+                    break;
+            }
+#endif
 			if (ret > 0 && num_garbage_positive_strs[i].result != result) {
 				printf("Error: parsing %s as %s failed: result mismatch!\n",
 						num_garbage_positive_strs[i].str, buf);
diff --git a/app/test/test_hash_functions.c b/app/test/test_hash_functions.c
index 76d51b6e71..b387d0eabb 100644
--- a/app/test/test_hash_functions.c
+++ b/app/test/test_hash_functions.c
@@ -25,6 +25,7 @@ 
  * e.g.: key size = 4, key = 0x03020100
  *       key size = 8, key = 0x0706050403020100
  */
+#if !defined(RTE_ARCH_S390X)
 static uint32_t hash_values_jhash[2][12] = {{
 	0x8ba9414b, 0xdf0d39c9,
 	0xe4cf1d42, 0xd4ccb93c, 0x5e84eafc, 0x21362cfe,
@@ -51,6 +52,34 @@  static uint32_t hash_values_crc[2][12] = {{
 	0x789c104f, 0x53028d3e
 }
 };
+#else
+static uint32_t hash_values_jhash[2][12] = {{
+	0x8ba9414b, 0x8a2f8eb,
+	0x55dcd60b, 0xf0b95bfe, 0x1a28d94c, 0x003d8f00,
+	0x84c90b2c, 0x24b83acf, 0x5e16af2f, 0x751c9f59,
+	0x665b8254, 0x6e347c81
+},
+{
+	0x5c62c303, 0xb21d4b7b,
+	0xa33cdfcf, 0x47cf3d14, 0x1cae829f, 0x1253a9ea,
+	0x7171efd1, 0xcef21db0, 0x3df3f5fe, 0x35fd67d2,
+	0x2922cbc4, 0xeaee5c5c
+}
+};
+static uint32_t hash_values_crc[2][12] = {{
+	0x00000000, 0x13a29877,
+	0x3eef4343, 0xb6719589, 0x938d3d79, 0xed93196b,
+	0xe710a46c, 0x81f7ab71, 0x702bc9ee, 0x26c72488,
+	0x2e7092a9, 0xf2fbc80b
+},
+{
+	0xbdfd3980, 0x91e95e36,
+	0x37765e57, 0x6559eb17, 0x49c8a164, 0x18daa0d3,
+	0x67065980, 0x62f966d0, 0x4e28a2a0, 0xe342d18f,
+	0x1518c680, 0xebe8026b
+}
+};
+#endif
 
 /*******************************************************************************
  * Hash function performance test configuration section. Each performance test
diff --git a/app/test/test_xmmt_ops.h b/app/test/test_xmmt_ops.h
index 3a82d5ecac..a11f759af4 100644
--- a/app/test/test_xmmt_ops.h
+++ b/app/test/test_xmmt_ops.h
@@ -49,6 +49,20 @@  vect_set_epi32(int i3, int i2, int i1, int i0)
 	return data;
 }
 
+#elif defined(RTE_ARCH_S390X)
+
+/* loads the xmm_t value from address p(does not need to be 16-byte aligned)*/
+#define vect_loadu_sil128(p) vec_xld2(0, (signed int *)p)
+
+/* sets the 4 signed 32-bit integer values and returns the xmm_t variable */
+static __rte_always_inline xmm_t
+vect_set_epi32(int i3, int i2, int i1, int i0)
+{
+	xmm_t data = (xmm_t){i0, i1, i2, i3};
+
+	return data;
+}
+
 #endif
 
 #endif /* _TEST_XMMT_OPS_H_ */
diff --git a/buildtools/pmdinfogen.py b/buildtools/pmdinfogen.py
index 2a44f17bda..10467c1a3e 100755
--- a/buildtools/pmdinfogen.py
+++ b/buildtools/pmdinfogen.py
@@ -16,8 +16,15 @@ 
 except ImportError:
     pass
 
-import coff
+try:
+    import coff
+except TypeError:
+    pass
 
+def decode_asciiz(data):
+    index = data.find(b'\x00')
+    end = index if index >= 0 else len(data)
+    return data[:end].decode()
 
 class ELFSymbol:
     def __init__(self, image, symbol):
@@ -28,7 +35,7 @@  def __init__(self, image, symbol):
     def string_value(self):
         size = self._symbol["st_size"]
         value = self.get_value(0, size)
-        return coff.decode_asciiz(value)  # not COFF-specific
+        return decode_asciiz(value)  # not COFF-specific
 
     def get_value(self, offset, size):
         section = self._symbol["st_shndx"]
diff --git a/config/meson.build b/config/meson.build
index 7134e80e8d..407aa1483d 100644
--- a/config/meson.build
+++ b/config/meson.build
@@ -121,6 +121,8 @@  if cpu_instruction_set == 'generic'
         cpu_instruction_set = 'generic'
     elif host_machine.cpu_family().startswith('ppc')
         cpu_instruction_set = 'power8'
+	elif host_machine.cpu_family().startswith('s390x')
+		machine = 'z13'
     endif
 endif
 
diff --git a/config/s390x/meson.build b/config/s390x/meson.build
new file mode 100644
index 0000000000..b15e74ba44
--- /dev/null
+++ b/config/s390x/meson.build
@@ -0,0 +1,51 @@ 
+# SPDX-License-Identifier: BSD-3-Clause
+# (c) Copyright IBM Corp. 2019, 2020
+
+if not dpdk_conf.get('RTE_ARCH_64')
+	error('Only 64-bit compiles are supported for this platform type')
+endif
+dpdk_conf.set('RTE_ARCH', 's390x')
+dpdk_conf.set('RTE_ARCH_S390X', 1)
+dpdk_conf.set('RTE_FORCE_INTRINSICS', 1)
+
+# overrides specific to s390x
+dpdk_conf.set('RTE_MAX_LCORE', 256)
+dpdk_conf.set('RTE_MAX_NUMA_NODES', 32)
+dpdk_conf.set('RTE_CACHE_LINE_SIZE', 128)
+
+
+
+# default to z13
+cpu_instruction_set = 'z13'
+
+# test compiler support
+cc_march_z14 = cc.has_argument('-march=z14')
+cc_march_z15 = cc.has_argument('-march=z15')
+
+
+machine_args = ['-march=' + cpu_instruction_set, '-mtune=' + cpu_instruction_set]
+
+dpdk_conf.set('RTE_MACHINE','s390x')
+dpdk_conf.set('RTE_MACHINE_CPUFLAG_ZARCH', 1)   # should this be z# 13 ?
+#dpdk_conf.set('RTE_MACHINE', cpu_instruction_set)
+
+if (cc.get_define('__s390x__', args: machine_args) != '')
+    compile_time_cpuflags += ['RTE_MACHINE_CPUFLAG_ZARCH']
+endif
+
+
+# Suppress the gcc warning "note: the layout of aggregates containing
+# vectors with 4-byte alignment has changed in GCC 5".
+if (cc.get_id() == 'gcc' and cc.version().version_compare('>=10.0') and
+        cc.version().version_compare('<12.0') and cc.has_argument('-Wno-psabi'))
+    add_project_arguments('-Wno-psabi', language: 'c')
+endif
+
+
+
+
+
+
+
+
+
diff --git a/config/s390x/s390x_linux_clang_ubuntu b/config/s390x/s390x_linux_clang_ubuntu
new file mode 100644
index 0000000000..952d1ce460
--- /dev/null
+++ b/config/s390x/s390x_linux_clang_ubuntu
@@ -0,0 +1,19 @@ 
+[binaries]
+c = 'clang'
+cpp = 'clang++'
+ar = 'llvm-ar'
+strip = 'llvm-strip'
+llvm-config = 'llvm-config'
+pcap-config = 'llvm-config'
+pkgconfig = 'pkg-config'
+
+[host_machine]
+system = 'linux'
+cpu_family = 's390x'
+cpu = 'z13'
+endian = 'big'
+
+[properties]
+platform = 'generic'
+c_args = ['-target', 'aarch64-linux-gnu', '--sysroot', '/usr/aarch64-linux-gnu']
+c_link_args = ['-target', 'aarch64-linux-gnu', '-fuse-ld=lld', '--gcc-toolchain=/usr']
diff --git a/doc/guides/nics/features/i40e.ini b/doc/guides/nics/features/i40e.ini
index dd18fec217..bc0c8b1969 100644
--- a/doc/guides/nics/features/i40e.ini
+++ b/doc/guides/nics/features/i40e.ini
@@ -50,6 +50,7 @@  x86-32               = Y
 x86-64               = Y
 ARMv8                = Y
 Power8               = Y
+s390x                = Y
 
 [rte_flow items]
 ah                   = Y
diff --git a/drivers/common/mlx5/mlx5_common.h b/drivers/common/mlx5/mlx5_common.h
index 63f31437da..61fd6afa02 100644
--- a/drivers/common/mlx5/mlx5_common.h
+++ b/drivers/common/mlx5/mlx5_common.h
@@ -20,6 +20,11 @@ 
 #include <rte_spinlock.h>
 #include <rte_os_shim.h>
 
+/* s390x pci implemenation. */
+#ifdef RTE_MACHINE_CPUFLAG_ZARCH
+#include <rte_io.h>
+#endif
+
 #include "mlx5_prm.h"
 #include "mlx5_devx_cmds.h"
 #include "mlx5_common_os.h"
@@ -358,7 +363,11 @@  mlx5_doorbell_ring(struct mlx5_uar_data *uar, uint64_t val, uint32_t index,
 	/* Ensure ordering between DB record actual update and UAR access. */
 	rte_wmb();
 #ifdef RTE_ARCH_64
+# ifndef RTE_MACHINE_CPUFLAG_ZARCH
 	*uar->db = val;
+# else
+    rte_write64_relaxed(val, uar->db);
+# endif
 #else /* !RTE_ARCH_64 */
 	rte_spinlock_lock(uar->sl_p);
 	*(volatile uint32_t *)uar->db = val;
diff --git a/drivers/net/i40e/i40e_rxtx_vec_s390x.c b/drivers/net/i40e/i40e_rxtx_vec_s390x.c
new file mode 100644
index 0000000000..1cee842ad8
--- /dev/null
+++ b/drivers/net/i40e/i40e_rxtx_vec_s390x.c
@@ -0,0 +1,630 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * (c) Copyright IBM Corp. 2017, 2019
+ */
+
+#include <stdint.h>
+#include <vecintrin.h>
+#include <rte_ethdev_driver.h>
+#include <rte_malloc.h>
+
+#include "base/i40e_prototype.h"
+#include "base/i40e_type.h"
+#include "i40e_ethdev.h"
+#include "i40e_rxtx.h"
+#include "i40e_rxtx_vec_common.h"
+
+#pragma GCC diagnostic ignored "-Wcast-qual"
+
+typedef unsigned long long vector_unsigned_long_long
+	__attribute__((vector_size(2 * sizeof(unsigned long long))));
+typedef unsigned int vector_unsigned_int
+	__attribute__((vector_size(4 * sizeof(unsigned int))));
+typedef unsigned short vector_unsigned_short
+	__attribute__((vector_size(8 * sizeof(unsigned short))));
+typedef unsigned char vector_unsigned_char
+	__attribute__((vector_size(16 * sizeof(unsigned char))));
+
+
+static inline void
+i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+{
+	int i;
+	uint16_t rx_id;
+	volatile union i40e_rx_desc *rxdp;
+
+	struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+	struct rte_mbuf *mb0, *mb1;
+
+	vector_unsigned_long_long hdr_room = (vector_unsigned_long_long){
+						RTE_PKTMBUF_HEADROOM,
+						RTE_PKTMBUF_HEADROOM};
+	vector_unsigned_long_long dma_addr0, dma_addr1;
+
+	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+
+	/* Pull 'n' more MBUFs into the software ring */
+	if (rte_mempool_get_bulk(rxq->mp,
+				 (void *)rxep,
+				 RTE_I40E_RXQ_REARM_THRESH) < 0) {
+		if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
+		    rxq->nb_rx_desc) {
+			dma_addr0 = (vector_unsigned_long_long){};
+			for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
+				rxep[i].mbuf = &rxq->fake_mbuf;
+				vec_xstd2(dma_addr0, 0,
+					(unsigned long long *)&rxdp[i].read);
+			}
+		}
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+			RTE_I40E_RXQ_REARM_THRESH;
+		return;
+	}
+
+	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
+	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+		vector_unsigned_long_long vaddr0, vaddr1;
+		uintptr_t p0, p1;
+
+		mb0 = rxep[0].mbuf;
+		mb1 = rxep[1].mbuf;
+
+		 /* Flush mbuf with pkt template.
+		  * Data to be rearmed is 6 bytes long.
+		  * Though, RX will overwrite ol_flags that are coming next
+		  * anyway. So overwrite whole 8 bytes with one load:
+		  * 6 bytes of rearm_data plus first 2 bytes of ol_flags.
+		  */
+		p0 = (uintptr_t)&mb0->rearm_data;
+		*(uint64_t *)p0 = rxq->mbuf_initializer;
+		p1 = (uintptr_t)&mb1->rearm_data;
+		*(uint64_t *)p1 = rxq->mbuf_initializer;
+
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		vaddr0 = vec_xld2(0, (unsigned long long *)&mb0->buf_addr);
+		vaddr1 = vec_xld2(0, (unsigned long long *)&mb1->buf_addr);
+
+		/* convert pa to dma_addr hdr/data */
+		dma_addr0 = vec_mergel(vaddr0, vaddr0);
+		dma_addr1 = vec_mergel(vaddr1, vaddr1);
+
+		/* add headroom to pa values */
+		dma_addr0 = dma_addr0 + hdr_room;
+		dma_addr1 = dma_addr1 + hdr_room;
+
+		/* flush desc with pa dma_addr */
+		vec_xstd2(dma_addr0, 0, (unsigned long long *)&rxdp++->read);
+		vec_xstd2(dma_addr1, 0, (unsigned long long *)&rxdp++->read);
+	}
+
+	rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
+	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+		rxq->rxrearm_start = 0;
+
+	rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
+
+	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+	/* Update the tail pointer on the NIC */
+	I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+}
+
+static inline void
+desc_to_olflags_v(vector_unsigned_long_long descs[4], struct rte_mbuf **rx_pkts)
+{
+	vector_unsigned_int vlan0, vlan1, rss, l3_l4e;
+
+	/* mask everything except RSS, flow director and VLAN flags
+	 * bit2 is for VLAN tag, bit11 for flow director indication
+	 * bit13:12 for RSS indication.
+	 */
+	const vector_unsigned_int rss_vlan_msk = (vector_unsigned_int){
+			(int32_t)0x1c03804, (int32_t)0x1c03804,
+			(int32_t)0x1c03804, (int32_t)0x1c03804};
+
+	/* map rss and vlan type to rss hash and vlan flag */
+	const vector_unsigned_char vlan_flags = (vector_unsigned_char){
+			0, 0, 0, 0,
+			PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED, 0, 0, 0,
+			0, 0, 0, 0,
+			0, 0, 0, 0};
+
+	const vector_unsigned_char rss_flags = (vector_unsigned_char){
+			0, PKT_RX_FDIR, 0, 0,
+			0, 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH | PKT_RX_FDIR,
+			0, 0, 0, 0,
+			0, 0, 0, 0};
+
+	const vector_unsigned_char l3_l4e_flags = (vector_unsigned_char){
+			0,
+			PKT_RX_IP_CKSUM_BAD,
+			PKT_RX_L4_CKSUM_BAD,
+			PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD,
+			PKT_RX_IP_CKSUM_BAD,
+			PKT_RX_IP_CKSUM_BAD,
+			PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD,
+			PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD,
+			0, 0, 0, 0, 0, 0, 0, 0};
+
+	vlan0 = (vector_unsigned_int)vec_mergel(descs[0], descs[1]);
+	vlan1 = (vector_unsigned_int)vec_mergel(descs[2], descs[3]);
+	vlan0 = (vector_unsigned_int)vec_mergeh(vlan0, vlan1);
+
+	vlan1 = vec_and(vlan0, rss_vlan_msk);
+	vlan0 = (vector_unsigned_int)vec_perm(vlan_flags,
+					(vector_unsigned_char){},
+					*(vector_unsigned_char *)&vlan1);
+
+	rss[0] = (uint32_t)vlan1[0] >> 11;
+	rss[1] = (uint32_t)vlan1[1] >> 11;
+	rss[2] = (uint32_t)vlan1[2] >> 11;
+	rss[3] = (uint32_t)vlan1[3] >> 11;
+	rss = (vector_unsigned_int)vec_perm(rss_flags, (vector_unsigned_char){},
+					*(vector_unsigned_char *)&rss);
+
+	l3_l4e[0] = (uint32_t)vlan1[0] >> 22;
+	l3_l4e[1] = (uint32_t)vlan1[1] >> 22;
+	l3_l4e[2] = (uint32_t)vlan1[2] >> 22;
+	l3_l4e[3] = (uint32_t)vlan1[3] >> 22;
+
+	l3_l4e = (vector_unsigned_int)vec_perm(l3_l4e_flags,
+					(vector_unsigned_char){},
+					*(vector_unsigned_char *)&l3_l4e);
+
+	vlan0 = vec_or(vlan0, rss);
+	vlan0 = vec_or(vlan0, l3_l4e);
+
+	rx_pkts[0]->ol_flags = (uint64_t)vlan0[2];
+	rx_pkts[1]->ol_flags = (uint64_t)vlan0[3];
+	rx_pkts[2]->ol_flags = (uint64_t)vlan0[0];
+	rx_pkts[3]->ol_flags = (uint64_t)vlan0[1];
+}
+
+#define PKTLEN_SHIFT     10
+
+static inline void
+desc_to_ptype_v(vector_unsigned_long_long descs[4], struct rte_mbuf **rx_pkts,
+		uint32_t *ptype_tbl)
+{
+	vector_unsigned_long_long ptype0 = vec_mergel(descs[0], descs[1]);
+	vector_unsigned_long_long ptype1 = vec_mergel(descs[2], descs[3]);
+
+	ptype0[0] = ptype0[0] >> 30;
+	ptype0[1] = ptype0[1] >> 30;
+
+	ptype1[0] = ptype1[0] >> 30;
+	ptype1[1] = ptype1[1] >> 30;
+
+	rx_pkts[0]->packet_type =
+		ptype_tbl[(*(vector_unsigned_char *)&ptype0)[0]];
+	rx_pkts[1]->packet_type =
+		ptype_tbl[(*(vector_unsigned_char *)&ptype0)[8]];
+	rx_pkts[2]->packet_type =
+		ptype_tbl[(*(vector_unsigned_char *)&ptype1)[0]];
+	rx_pkts[3]->packet_type =
+		ptype_tbl[(*(vector_unsigned_char *)&ptype1)[8]];
+}
+
+ /* Notice:
+  * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
+  * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST
+  *   numbers of DD bits
+  */
+static inline uint16_t
+_recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+		   uint16_t nb_pkts, uint8_t *split_packet)
+{
+	volatile union i40e_rx_desc *rxdp;
+	struct i40e_rx_entry *sw_ring;
+	uint16_t nb_pkts_recd;
+	int pos;
+	uint64_t var;
+	vector_unsigned_char shuf_msk;
+	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+
+	vector_unsigned_short crc_adjust = (vector_unsigned_short){
+		0, 0,         /* ignore pkt_type field */
+		rxq->crc_len, /* sub crc on pkt_len */
+		0,            /* ignore high-16bits of pkt_len */
+		rxq->crc_len, /* sub crc on data_len */
+		0, 0, 0       /* ignore non-length fields */
+		};
+	vector_unsigned_long_long dd_check, eop_check;
+
+	/* nb_pkts shall be less equal than RTE_I40E_MAX_RX_BURST */
+	nb_pkts = RTE_MIN(nb_pkts, RTE_I40E_MAX_RX_BURST);
+
+	/* nb_pkts has to be floor-aligned to RTE_I40E_DESCS_PER_LOOP */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_I40E_DESCS_PER_LOOP);
+
+	/* Just the act of getting into the function from the application is
+	 * going to cost about 7 cycles
+	 */
+	rxdp = rxq->rx_ring + rxq->rx_tail;
+
+	rte_prefetch0(rxdp);
+
+	/* See if we need to rearm the RX queue - gives the prefetch a bit
+	 * of time to act
+	 */
+	if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH)
+		i40e_rxq_rearm(rxq);
+
+	/* Before we start moving massive data around, check to see if
+	 * there is actually a packet available
+	 */
+	if (!(rxdp->wb.qword1.status_error_len &
+			rte_cpu_to_le_32(1 << I40E_RX_DESC_STATUS_DD_SHIFT)))
+		return 0;
+
+	/* 4 packets DD mask */
+	dd_check = (vector_unsigned_long_long){0x0000000100000001ULL,
+					  0x0000000100000001ULL};
+
+	/* 4 packets EOP mask */
+	eop_check = (vector_unsigned_long_long){0x0000000200000002ULL,
+					   0x0000000200000002ULL};
+
+	/* mask to shuffle from desc. to mbuf */
+	shuf_msk = (vector_unsigned_char){
+		0xFF, 0xFF,   /* pkt_type set as unknown */
+		0xFF, 0xFF,   /* pkt_type set as unknown */
+		14, 15,       /* octet 15~14, low 16 bits pkt_len */
+		0xFF, 0xFF,   /* skip high 16 bits pkt_len, zero out */
+		14, 15,       /* octet 15~14, 16 bits data_len */
+		2, 3,         /* octet 2~3, low 16 bits vlan_macip */
+		4, 5, 6, 7    /* octet 4~7, 32bits rss */
+		};
+
+	/* Cache is empty -> need to scan the buffer rings, but first move
+	 * the next 'n' mbufs into the cache
+	 */
+	sw_ring = &rxq->sw_ring[rxq->rx_tail];
+
+	/* A. load 4 packet in one loop
+	 * [A*. mask out 4 unused dirty field in desc]
+	 * B. copy 4 mbuf point from swring to rx_pkts
+	 * C. calc the number of DD bits among the 4 packets
+	 * [C*. extract the end-of-packet bit, if requested]
+	 * D. fill info. from desc to mbuf
+	 */
+
+	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
+			pos += RTE_I40E_DESCS_PER_LOOP,
+			rxdp += RTE_I40E_DESCS_PER_LOOP) {
+		vector_unsigned_long_long descs[RTE_I40E_DESCS_PER_LOOP];
+		vector_unsigned_char pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
+		vector_unsigned_short staterr, sterr_tmp1, sterr_tmp2;
+		vector_unsigned_long_long mbp1, mbp2;  /* two mbuf pointer
+							* in one XMM reg.
+							*/
+
+		/* B.1 load 1 mbuf point */
+		mbp1 = *(vector_unsigned_long_long *)&sw_ring[pos];
+		/* Read desc statuses backwards to avoid race condition */
+		/* A.1 load 4 pkts desc */
+		descs[3] = *(vector_unsigned_long_long *)(rxdp + 3);
+		rte_compiler_barrier();
+
+		/* B.2 copy 2 mbuf point into rx_pkts  */
+		*(vector_unsigned_long_long *)&rx_pkts[pos] = mbp1;
+
+		/* B.1 load 1 mbuf point */
+		mbp2 = *(vector_unsigned_long_long *)&sw_ring[pos + 2];
+
+		descs[2] = *(vector_unsigned_long_long *)(rxdp + 2);
+		rte_compiler_barrier();
+		/* B.1 load 2 mbuf point */
+		descs[1] = *(vector_unsigned_long_long *)(rxdp + 1);
+		rte_compiler_barrier();
+		descs[0] = *(vector_unsigned_long_long *)(rxdp);
+
+		/* B.2 copy 2 mbuf point into rx_pkts  */
+		*(vector_unsigned_long_long *)&rx_pkts[pos + 2] =  mbp2;
+
+		if (split_packet) {
+			rte_mbuf_prefetch_part2(rx_pkts[pos]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
+		}
+
+		/* avoid compiler reorder optimization */
+		rte_compiler_barrier();
+
+		/* pkt 3,4 shift the pktlen field to be 16-bit aligned*/
+		vector_unsigned_int len3_temp = vec_xld2(0,
+				(unsigned int *)&descs[3]);
+		len3_temp[3] = len3_temp[3] << PKTLEN_SHIFT;
+		const vector_unsigned_int len3 = len3_temp;
+
+		vector_unsigned_int len2_temp = vec_xld2(0,
+				(unsigned int *)&descs[2]);
+		len2_temp[3] = len2_temp[3] << PKTLEN_SHIFT;
+		const vector_unsigned_int len2 = len2_temp;
+
+		/* merge the now-aligned packet length fields back in */
+		descs[3] = (vector_unsigned_long_long)len3;
+		descs[2] = (vector_unsigned_long_long)len2;
+
+		/* D.1 pkt 3,4 convert format from desc to pktmbuf */
+		pkt_mb4 = vec_perm((vector_unsigned_char)descs[3],
+				  (vector_unsigned_char){}, shuf_msk);
+		pkt_mb3 = vec_perm((vector_unsigned_char)descs[2],
+				  (vector_unsigned_char){}, shuf_msk);
+
+		/* C.1 4=>2 filter staterr info only */
+		sterr_tmp2 = vec_mergel((vector_unsigned_short)descs[3],
+					(vector_unsigned_short)descs[2]);
+		/* C.1 4=>2 filter staterr info only */
+		sterr_tmp1 = vec_mergel((vector_unsigned_short)descs[1],
+					(vector_unsigned_short)descs[0]);
+		/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
+		pkt_mb4 = (vector_unsigned_char)((vector_unsigned_short)pkt_mb4
+				- crc_adjust);
+		pkt_mb3 = (vector_unsigned_char)((vector_unsigned_short)pkt_mb3
+				- crc_adjust);
+
+		/* pkt 1,2 shift the pktlen field to be 16-bit aligned*/
+		const vector_unsigned_int len1 =
+			vec_sll(vec_xld2(0, (unsigned int *)&descs[1]),
+			(vector_unsigned_int){0, 0, 0, PKTLEN_SHIFT});
+		const vector_unsigned_int len0 =
+			vec_sll(vec_xld2(0, (unsigned int *)&descs[0]),
+			(vector_unsigned_int){0, 0, 0, PKTLEN_SHIFT});
+
+		/* merge the now-aligned packet length fields back in */
+		descs[1] = (vector_unsigned_long_long)len1;
+		descs[0] = (vector_unsigned_long_long)len0;
+
+		/* D.1 pkt 1,2 convert format from desc to pktmbuf */
+		pkt_mb2 = vec_perm((vector_unsigned_char)descs[1],
+				   (vector_unsigned_char){}, shuf_msk);
+		pkt_mb1 = vec_perm((vector_unsigned_char)descs[0],
+				   (vector_unsigned_char){}, shuf_msk);
+
+		/* C.2 get 4 pkts staterr value  */
+		staterr = (vector_unsigned_short)vec_mergeh(sterr_tmp1,
+				sterr_tmp2);
+
+		/* D.3 copy final 3,4 data to rx_pkts */
+		vec_xstd2(pkt_mb4, 0, (unsigned char *)&rx_pkts[pos + 3]
+			->rx_descriptor_fields1);
+		vec_xstd2(pkt_mb3, 0, (unsigned char *)&rx_pkts[pos + 2]
+			->rx_descriptor_fields1);
+
+		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
+		pkt_mb2 = (vector_unsigned_char)((vector_unsigned_short)pkt_mb2
+				- crc_adjust);
+		pkt_mb1 = (vector_unsigned_char)((vector_unsigned_short)pkt_mb1
+				- crc_adjust);
+
+		/* C* extract and record EOP bit */
+		if (split_packet) {
+			vector_unsigned_char eop_shuf_mask =
+				(vector_unsigned_char){
+					0xFF, 0xFF, 0xFF, 0xFF,
+					0xFF, 0xFF, 0xFF, 0xFF,
+					0xFF, 0xFF, 0xFF, 0xFF,
+					0x04, 0x0C, 0x00, 0x08
+				};
+
+			/* and with mask to extract bits, flipping 1-0 */
+			vector_unsigned_char eop_bits =
+				vec_and((vector_unsigned_char)vec_nor(staterr,
+				staterr), (vector_unsigned_char)eop_check);
+			/* the staterr values are not in order, as the count
+			 * count of dd bits doesn't care. However, for end of
+			 * packet tracking, we do care, so shuffle. This also
+			 * compresses the 32-bit values to 8-bit
+			 */
+			eop_bits = vec_perm(eop_bits, (vector_unsigned_char){},
+					    eop_shuf_mask);
+			/* store the resulting 32-bit value */
+			*split_packet = (vec_xld2(0,
+					 (unsigned int *)&eop_bits))[0];
+			split_packet += RTE_I40E_DESCS_PER_LOOP;
+
+			/* zero-out next pointers */
+			rx_pkts[pos]->next = NULL;
+			rx_pkts[pos + 1]->next = NULL;
+			rx_pkts[pos + 2]->next = NULL;
+			rx_pkts[pos + 3]->next = NULL;
+		}
+
+		/* C.3 calc available number of desc */
+		staterr = vec_and(staterr, (vector_unsigned_short)dd_check);
+
+		/* D.3 copy final 1,2 data to rx_pkts */
+		vec_xstd2(pkt_mb2, 0, (unsigned char *)&rx_pkts[pos + 1]
+			->rx_descriptor_fields1);
+		vec_xstd2(pkt_mb1, 0, (unsigned char *)&rx_pkts[pos]
+			->rx_descriptor_fields1);
+
+		desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
+		desc_to_olflags_v(descs, &rx_pkts[pos]);
+
+		/* C.4 calc avaialbe number of desc */
+		var = __builtin_popcountll((vec_xld2(0,
+			(unsigned long long *)&staterr)[0]));
+		nb_pkts_recd += var;
+		if (likely(var != RTE_I40E_DESCS_PER_LOOP))
+			break;
+	}
+
+	/* Update our internal tail pointer */
+	rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd);
+	rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1));
+	rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd);
+
+	return nb_pkts_recd;
+}
+
+ /* Notice:
+  * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
+  * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST
+  *   numbers of DD bits
+  */
+uint16_t
+i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+		   uint16_t nb_pkts)
+{
+	return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
+}
+
+ /* vPMD receive routine that reassembles scattered packets
+  * Notice:
+  * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
+  * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST
+  *   numbers of DD bits
+  */
+uint16_t
+i40e_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+			     uint16_t nb_pkts)
+{
+	struct i40e_rx_queue *rxq = rx_queue;
+	uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
+
+	/* get some new buffers */
+	uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
+			split_flags);
+	if (nb_bufs == 0)
+		return 0;
+
+	/* happy day case, full burst + no packets to be joined */
+	const uint64_t *split_fl64 = (uint64_t *)split_flags;
+
+	if (rxq->pkt_first_seg == NULL &&
+	    split_fl64[0] == 0 && split_fl64[1] == 0 &&
+	    split_fl64[2] == 0 && split_fl64[3] == 0)
+		return nb_bufs;
+
+	/* reassemble any packets that need reassembly*/
+	unsigned int i = 0;
+
+	if (!rxq->pkt_first_seg) {
+		/* find the first split flag, and only reassemble then*/
+		while (i < nb_bufs && !split_flags[i])
+			i++;
+		if (i == nb_bufs)
+			return nb_bufs;
+	}
+	return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i,
+		&split_flags[i]);
+}
+
+static inline void
+vtx1(volatile struct i40e_tx_desc *txdp,
+	struct rte_mbuf *pkt, uint64_t flags)
+{
+	uint64_t high_qw = (I40E_TX_DESC_DTYPE_DATA |
+		((uint64_t)flags  << I40E_TXD_QW1_CMD_SHIFT) |
+		((uint64_t)pkt->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT));
+
+	vector_unsigned_long_long descriptor = (vector_unsigned_long_long){
+		pkt->buf_iova + pkt->data_off, high_qw};
+	*(vector_unsigned_long_long *)txdp = descriptor;
+}
+
+static inline void
+vtx(volatile struct i40e_tx_desc *txdp,
+	struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags)
+{
+	int i;
+
+	for (i = 0; i < nb_pkts; ++i, ++txdp, ++pkt)
+		vtx1(txdp, *pkt, flags);
+}
+
+uint16_t
+i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
+			  uint16_t nb_pkts)
+{
+	struct i40e_tx_queue *txq = (struct i40e_tx_queue *)tx_queue;
+	volatile struct i40e_tx_desc *txdp;
+	struct i40e_tx_entry *txep;
+	uint16_t n, nb_commit, tx_id;
+	uint64_t flags = I40E_TD_CMD;
+	uint64_t rs = I40E_TX_DESC_CMD_RS | I40E_TD_CMD;
+	int i;
+
+	/* cross rx_thresh boundary is not allowed */
+	nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
+
+	if (txq->nb_tx_free < txq->tx_free_thresh)
+		i40e_tx_free_bufs(txq);
+
+	nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
+	nb_commit = nb_pkts;
+	if (unlikely(nb_pkts == 0))
+		return 0;
+
+	tx_id = txq->tx_tail;
+	txdp = &txq->tx_ring[tx_id];
+	txep = &txq->sw_ring[tx_id];
+
+	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
+
+	n = (uint16_t)(txq->nb_tx_desc - tx_id);
+	if (nb_commit >= n) {
+		tx_backlog_entry(txep, tx_pkts, n);
+
+		for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp)
+			vtx1(txdp, *tx_pkts, flags);
+
+		vtx1(txdp, *tx_pkts++, rs);
+
+		nb_commit = (uint16_t)(nb_commit - n);
+
+		tx_id = 0;
+		txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
+
+		/* avoid reach the end of ring */
+		txdp = &txq->tx_ring[tx_id];
+		txep = &txq->sw_ring[tx_id];
+	}
+
+	tx_backlog_entry(txep, tx_pkts, nb_commit);
+
+	vtx(txdp, tx_pkts, nb_commit, flags);
+
+	tx_id = (uint16_t)(tx_id + nb_commit);
+	if (tx_id > txq->tx_next_rs) {
+		txq->tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
+			rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS) <<
+						I40E_TXD_QW1_CMD_SHIFT);
+		txq->tx_next_rs =
+			(uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh);
+	}
+
+	txq->tx_tail = tx_id;
+
+	I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail);
+
+	return nb_pkts;
+}
+
+void __attribute__((cold))
+i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
+{
+	_i40e_rx_queue_release_mbufs_vec(rxq);
+}
+
+int __attribute__((cold))
+i40e_rxq_vec_setup(struct i40e_rx_queue *rxq)
+{
+	return i40e_rxq_vec_setup_default(rxq);
+}
+
+int __attribute__((cold))
+i40e_txq_vec_setup(struct i40e_tx_queue __rte_unused * txq)
+{
+	return 0;
+}
+
+int __attribute__((cold))
+i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev)
+{
+	return i40e_rx_vec_dev_conf_condition_check_default(dev);
+}
diff --git a/drivers/net/i40e/meson.build b/drivers/net/i40e/meson.build
index efc5f93e35..88fac6fc2c 100644
--- a/drivers/net/i40e/meson.build
+++ b/drivers/net/i40e/meson.build
@@ -73,6 +73,8 @@  if arch_subdir == 'x86'
     endif
 elif arch_subdir == 'ppc'
        sources += files('i40e_rxtx_vec_altivec.c')
+elif arch_subdir == 's390x'
+       sources += files('i40e_rxtx_vec_s390x.c')
 elif arch_subdir == 'arm'
        sources += files('i40e_rxtx_vec_neon.c')
 endif
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 9e8ea366a5..98d8eb93eb 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -5958,7 +5958,7 @@  ixgbe_config_rss_filter(struct rte_eth_dev *dev,
 }
 
 /* Stubs needed for linkage when RTE_ARCH_PPC_64 is set */
-#if defined(RTE_ARCH_PPC_64)
+#if defined(RTE_ARCH_PPC_64) || defined(RTE_ARCH_S390X)
 int
 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
 {
diff --git a/drivers/net/memif/rte_eth_memif.h b/drivers/net/memif/rte_eth_memif.h
index a5ee23d42e..0270e7859a 100644
--- a/drivers/net/memif/rte_eth_memif.h
+++ b/drivers/net/memif/rte_eth_memif.h
@@ -178,6 +178,8 @@  const char *memif_version(void);
 #define __NR_memfd_create 279
 #elif defined __powerpc__
 #define __NR_memfd_create 360
+#elif defined __s390x__
+#define __NR_memfd_create 350
 #elif defined __i386__
 #define __NR_memfd_create 356
 #else
diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c
index e5eea0ad94..7618a68c4c 100644
--- a/drivers/net/mlx5/mlx5_rx.c
+++ b/drivers/net/mlx5/mlx5_rx.c
@@ -209,6 +209,8 @@  mlx5_rx_burst_mode_get(struct rte_eth_dev *dev,
 		snprintf(mode->info, sizeof(mode->info), "%s", "Vector Neon");
 #elif defined RTE_ARCH_PPC_64
 		snprintf(mode->info, sizeof(mode->info), "%s", "Vector AltiVec");
+#elif defined RTE_ARCH_S390X
+        snprintf(mode->info, sizeof(mode->info), "%s", "Vector S390X");
 #else
 		return -EINVAL;
 #endif
@@ -219,6 +221,8 @@  mlx5_rx_burst_mode_get(struct rte_eth_dev *dev,
 		snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector Neon");
 #elif defined RTE_ARCH_PPC_64
 		snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector AltiVec");
+#elif defined RTE_ARCH_S390X
+        snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector S390X");
 #else
 		return -EINVAL;
 #endif
@@ -313,12 +317,24 @@  rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
 	uint8_t ptype;
 	uint8_t pinfo = (cqe->pkt_info & 0x3) << 6;
 
+    /*
+     * hdr_type_etc is from the cqe thus it is BE
+     * the logic below did not convert BE -> LE prior
+     * to using the value of it.  So the logic below
+     * is written for LE thus the value of hdr_type_etc has
+     * to be converted from LE to BE for the logic to work
+    */
+    uint16_t cqe_t_le  = rte_le_to_cpu_16(cqe->hdr_type_etc);
+    uint16_t mcqe_t_le;
+
 	/* Get l3/l4 header from mini-CQE in case L3/L4 format*/
 	if (mcqe == NULL ||
 	    rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX)
-		ptype = (cqe->hdr_type_etc & 0xfc00) >> 10;
-	else
-		ptype = mcqe->hdr_type >> 2;
+		ptype = (cqe_t_le & 0xfc00) >> 10;
+	else {
+        mcqe_t_le = rte_le_to_cpu_16(mcqe->hdr_type);
+		ptype = mcqe_t_le >> 2;
+    }
 	/*
 	 * The index to the array should have:
 	 * bit[1:0] = l3_hdr_type
diff --git a/drivers/net/octeontx/base/octeontx_pki_var.h b/drivers/net/octeontx/base/octeontx_pki_var.h
index 4445369ce7..b37d79eb83 100644
--- a/drivers/net/octeontx/base/octeontx_pki_var.h
+++ b/drivers/net/octeontx/base/octeontx_pki_var.h
@@ -157,6 +157,12 @@  typedef union octtx_wqe_s {
 			uint64_t	lbptr : 8;
 			uint64_t	laptr : 8;
 		} w4;
+
+		struct {
+			uint64_t	size  :16;
+			uint64_t	dwd   : 1;
+			uint64_t	rsvd0 :47;
+		} w5;
 #endif
 	} s;
 
diff --git a/examples/l3fwd-acl/main.c b/examples/l3fwd-acl/main.c
index 2d2ecc7635..e39153f16c 100644
--- a/examples/l3fwd-acl/main.c
+++ b/examples/l3fwd-acl/main.c
@@ -170,6 +170,10 @@  static const struct {
 		.name = "altivec",
 		.alg = RTE_ACL_CLASSIFY_ALTIVEC,
 	},
+    {
+        .name = "s390x",
+        .alg = RTE_ACL_CLASSIFY_S390X,
+    }
 	{
 		.name = "avx512x16",
 		.alg = RTE_ACL_CLASSIFY_AVX512X16,
diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index 24d0910fe0..dc6c53dc12 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -239,6 +239,14 @@  em_mask_key(void *key, xmm_t mask)
 
 	return vec_and(data, mask);
 }
+#elif defined(__s390x__)
+static inline xmm_t
+em_mask_key(void *key, xmm_t mask)
+{
+	xmm_t data = (xmm_t) vec_xld2(0, (unsigned int *)(key));
+
+	return data + mask;
+}
 #else
 #error No vector engine (SSE, NEON, ALTIVEC) available, check your toolchain
 #endif
diff --git a/examples/l3fwd/l3fwd_lpm_s390x.h b/examples/l3fwd/l3fwd_lpm_s390x.h
new file mode 100644
index 0000000000..858f696ba9
--- /dev/null
+++ b/examples/l3fwd/l3fwd_lpm_s390x.h
@@ -0,0 +1,137 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2016 Intel Corporation.
+ * (c) Copyright IBM Corp. 2017, 2019
+ */
+#ifndef __L3FWD_LPM_S390X_H__
+#define __L3FWD_LPM_S390X_H__
+
+#include "l3fwd_s390x.h"
+
+typedef unsigned char vector_unsigned_char
+	__attribute__((vector_size(16*sizeof(unsigned char))));
+
+/*
+ * Read packet_type and destination IPV4 addresses from 4 mbufs.
+ */
+static inline void
+processx4_step1(struct rte_mbuf *pkt[FWDSTEP],
+		vector_unsigned_int *dip,
+		uint32_t *ipv4_flag)
+{
+	struct ipv4_hdr *ipv4_hdr;
+	struct ether_hdr *eth_hdr;
+	uint32_t x0, x1, x2, x3;
+
+	eth_hdr = rte_pktmbuf_mtod(pkt[0], struct ether_hdr *);
+	ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+	x0 = ipv4_hdr->dst_addr;
+	ipv4_flag[0] = pkt[0]->packet_type & RTE_PTYPE_L3_IPV4;
+
+	rte_compiler_barrier();
+	eth_hdr = rte_pktmbuf_mtod(pkt[1], struct ether_hdr *);
+	ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+	x1 = ipv4_hdr->dst_addr;
+	ipv4_flag[0] &= pkt[1]->packet_type;
+
+	rte_compiler_barrier();
+	eth_hdr = rte_pktmbuf_mtod(pkt[2], struct ether_hdr *);
+	ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+	x2 = ipv4_hdr->dst_addr;
+	ipv4_flag[0] &= pkt[2]->packet_type;
+
+	rte_compiler_barrier();
+	eth_hdr = rte_pktmbuf_mtod(pkt[3], struct ether_hdr *);
+	ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+	x3 = ipv4_hdr->dst_addr;
+	ipv4_flag[0] &= pkt[3]->packet_type;
+
+	rte_compiler_barrier();
+	dip[0] = (vector_unsigned_int){x0, x1, x2, x3};
+}
+
+/*
+ * Lookup into LPM for destination port.
+ * If lookup fails, use incoming port (portid) as destination port.
+ */
+static inline void
+processx4_step2(const struct lcore_conf *qconf,
+		vector_unsigned_int dip,
+		uint32_t ipv4_flag,
+		uint8_t portid,
+		struct rte_mbuf *pkt[FWDSTEP],
+		uint16_t dprt[FWDSTEP])
+{
+	rte_xmm_t dst;
+	const vector_unsigned_char bswap_mask = (vector_unsigned_char){
+							3, 2, 1, 0,
+							7, 6, 5, 4,
+							11, 10, 9, 8,
+							15, 14, 13, 12};
+
+	/* Byte swap 4 IPV4 addresses. */
+	dip = (vector_unsigned_int)vec_perm(*(vector_unsigned_char *)&dip,
+					(vector_unsigned_char){}, bswap_mask);
+
+	/* if all 4 packets are IPV4. */
+	if (likely(ipv4_flag)) {
+		rte_lpm_lookupx4(qconf->ipv4_lookup_struct, (xmm_t)dip,
+			(uint32_t *)&dst, portid);
+		/* get rid of unused upper 16 bit for each dport. */
+		dst.x = (xmm_t)vec_packs(dst.x, dst.x);
+		*(uint64_t *)dprt = dst.u64[0];
+	} else {
+		dst.x = (xmm_t)dip;
+		dprt[0] = lpm_get_dst_port_with_ipv4(qconf, pkt[0],
+							dst.u32[0], portid);
+		dprt[1] = lpm_get_dst_port_with_ipv4(qconf, pkt[1],
+							dst.u32[1], portid);
+		dprt[2] = lpm_get_dst_port_with_ipv4(qconf, pkt[2],
+							dst.u32[2], portid);
+		dprt[3] = lpm_get_dst_port_with_ipv4(qconf, pkt[3],
+							dst.u32[3], portid);
+	}
+}
+
+/*
+ * Buffer optimized handling of packets, invoked
+ * from main_loop.
+ */
+static inline void
+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+			uint8_t portid, struct lcore_conf *qconf)
+{
+	int32_t j;
+	uint16_t dst_port[MAX_PKT_BURST];
+	vector_unsigned_int dip[MAX_PKT_BURST / FWDSTEP];
+	uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP];
+	const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
+
+	for (j = 0; j != k; j += FWDSTEP)
+		processx4_step1(&pkts_burst[j], &dip[j / FWDSTEP],
+				&ipv4_flag[j / FWDSTEP]);
+
+	for (j = 0; j != k; j += FWDSTEP)
+		processx4_step2(qconf, dip[j / FWDSTEP],
+				ipv4_flag[j / FWDSTEP],
+				portid, &pkts_burst[j], &dst_port[j]);
+
+	/* Classify last up to 3 packets one by one */
+	switch (nb_rx % FWDSTEP) {
+	case 3:
+		dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+		j++;
+		/* fall-through */
+	case 2:
+		dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+		j++;
+		/* fall-through */
+	case 1:
+		dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+		j++;
+		/* fall-through */
+	}
+
+	send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
+}
+
+#endif /* __L3FWD_LPM_S390X_H__ */
diff --git a/examples/l3fwd/l3fwd_s390x.h b/examples/l3fwd/l3fwd_s390x.h
new file mode 100644
index 0000000000..d027092a49
--- /dev/null
+++ b/examples/l3fwd/l3fwd_s390x.h
@@ -0,0 +1,259 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016 Intel Corporation.
+ * (c) Copyright IBM Corp. 2017, 2019
+ */
+#ifndef _L3FWD_S390X_H_
+#define _L3FWD_S390X_H_
+
+#include "l3fwd.h"
+#include "l3fwd_common.h"
+
+#define vec_sro(a, b) vec_srb(a, (b) << 64) // Vector Shift Right by Octet
+typedef unsigned int vector_unsigned_int
+	__attribute__((vector_size(4*sizeof(unsigned int))));
+typedef unsigned short vector_unsigned_short
+	__attribute__((vector_size(8*sizeof(unsigned short))));
+
+/*
+ * Update source and destination MAC addresses in the ethernet header.
+ * Perform RFC1812 checks and updates for IPV4 packets.
+ */
+static inline void
+processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP])
+{
+	vector_unsigned_int te[FWDSTEP];
+	vector_unsigned_int ve[FWDSTEP];
+	vector_unsigned_int *p[FWDSTEP];
+
+	p[0] = rte_pktmbuf_mtod(pkt[0], vector_unsigned_int *);
+	p[1] = rte_pktmbuf_mtod(pkt[1], vector_unsigned_int *);
+	p[2] = rte_pktmbuf_mtod(pkt[2], vector_unsigned_int *);
+	p[3] = rte_pktmbuf_mtod(pkt[3], vector_unsigned_int *);
+
+	ve[0] = (vector_unsigned_int)val_eth[dst_port[0]];
+	te[0] = *p[0];
+
+	ve[1] = (vector_unsigned_int)val_eth[dst_port[1]];
+	te[1] = *p[1];
+
+	ve[2] = (vector_unsigned_int)val_eth[dst_port[2]];
+	te[2] = *p[2];
+
+	ve[3] = (vector_unsigned_int)val_eth[dst_port[3]];
+	te[3] = *p[3];
+
+	/* Update first 12 bytes, keep rest bytes intact. */
+	te[0] = (vector_unsigned_int)vec_sel(
+			(vector_unsigned_short)ve[0],
+			(vector_unsigned_short)te[0],
+			(vector_unsigned_short) {0, 0, 0, 0,
+						0, 0, 0xffff, 0xffff});
+
+	te[1] = (vector_unsigned_int)vec_sel(
+			(vector_unsigned_short)ve[1],
+			(vector_unsigned_short)te[1],
+			(vector_unsigned_short) {0, 0, 0, 0,
+						0, 0, 0xffff, 0xffff});
+
+	te[2] = (vector_unsigned_int)vec_sel(
+			(vector_unsigned_short)ve[2],
+			(vector_unsigned_short)te[2],
+			(vector_unsigned_short) {0, 0, 0, 0, 0,
+						0, 0xffff, 0xffff});
+
+	te[3] = (vector_unsigned_int)vec_sel(
+			(vector_unsigned_short)ve[3],
+			(vector_unsigned_short)te[3],
+			(vector_unsigned_short) {0, 0, 0, 0,
+						0, 0, 0xffff, 0xffff});
+
+	*p[0] = te[0];
+	*p[1] = te[1];
+	*p[2] = te[2];
+	*p[3] = te[3];
+
+	rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[0] + 1),
+		&dst_port[0], pkt[0]->packet_type);
+	rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[1] + 1),
+		&dst_port[1], pkt[1]->packet_type);
+	rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[2] + 1),
+		&dst_port[2], pkt[2]->packet_type);
+	rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[3] + 1),
+		&dst_port[3], pkt[3]->packet_type);
+}
+
+/*
+ * Group consecutive packets with the same destination port in bursts of 4.
+ * Suppose we have array of destination ports:
+ * dst_port[] = {a, b, c, d,, e, ... }
+ * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>.
+ * We doing 4 comparisons at once and the result is 4 bit mask.
+ * This mask is used as an index into prebuild array of pnum values.
+ */
+static inline uint16_t *
+port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, vector_unsigned_short dp1,
+	vector_unsigned_short dp2)
+{
+	union {
+		uint16_t u16[FWDSTEP + 1];
+		uint64_t u64;
+	} *pnum = (void *)pn;
+
+	int32_t v;
+
+	v = vec_any_eq(dp1, dp2);
+
+
+	/* update last port counter. */
+	lp[0] += gptbl[v].lpv;
+
+	/* if dest port value has changed. */
+	if (v != GRPMSK) {
+		pnum->u64 = gptbl[v].pnum;
+		pnum->u16[FWDSTEP] = 1;
+		lp = pnum->u16 + gptbl[v].idx;
+	}
+
+	return lp;
+}
+
+/**
+ * Process one packet:
+ * Update source and destination MAC addresses in the ethernet header.
+ * Perform RFC1812 checks and updates for IPV4 packets.
+ */
+static inline void
+process_packet(struct rte_mbuf *pkt, uint16_t *dst_port)
+{
+	struct ether_hdr *eth_hdr;
+	vector_unsigned_int te, ve;
+
+	eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
+
+	te = *(vector_unsigned_int *)eth_hdr;
+	ve = (vector_unsigned_int)val_eth[dst_port[0]];
+
+	rfc1812_process((struct ipv4_hdr *)(eth_hdr + 1), dst_port,
+			pkt->packet_type);
+
+	/* dynamically vec_sel te and ve for MASK_ETH (0x3f) */
+	te = (vector_unsigned_int)vec_sel(
+		(vector_unsigned_short)ve,
+		(vector_unsigned_short)te,
+		(vector_unsigned_short){0, 0, 0, 0,
+					0, 0, 0xffff, 0xffff});
+
+	*(vector_unsigned_int *)eth_hdr = te;
+}
+
+/**
+ * Send packets burst from pkts_burst to the ports in dst_port array
+ */
+static __rte_always_inline void
+send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
+		uint16_t dst_port[MAX_PKT_BURST], int nb_rx)
+{
+	int32_t k;
+	int j = 0;
+	uint16_t dlp;
+	uint16_t *lp;
+	uint16_t pnum[MAX_PKT_BURST + 1];
+
+	/*
+	 * Finish packet processing and group consecutive
+	 * packets with the same destination port.
+	 */
+	k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
+	if (k != 0) {
+		vector_unsigned_short dp1, dp2;
+
+		lp = pnum;
+		lp[0] = 1;
+
+		processx4_step3(pkts_burst, dst_port);
+
+		/* dp1: <d[0], d[1], d[2], d[3], ... > */
+		dp1 = *(vector_unsigned_short *)dst_port;
+
+		for (j = FWDSTEP; j != k; j += FWDSTEP) {
+			processx4_step3(&pkts_burst[j], &dst_port[j]);
+
+			/*
+			 * dp2:
+			 * <d[j-3], d[j-2], d[j-1], d[j], ... >
+			 */
+			dp2 = *((vector_unsigned_short *)
+					&dst_port[j - FWDSTEP + 1]);
+			lp  = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2);
+
+			/*
+			 * dp1:
+			 * <d[j], d[j+1], d[j+2], d[j+3], ... >
+			 */
+			dp1 = vec_sro(dp2, (vector unsigned char) {
+				0, 0, 0, 0, 0, 0, 0, 0,
+				0, 0, 0, (FWDSTEP - 1) * sizeof(dst_port[0])});
+		}
+
+		/*
+		 * dp2: <d[j-3], d[j-2], d[j-1], d[j-1], ... >
+		 */
+		dp2 = vec_perm(dp1, (vector_unsigned_short){},
+				(vector unsigned char) {0xf9});
+		lp  = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2);
+
+		/*
+		 * remove values added by the last repeated
+		 * dst port.
+		 */
+		lp[0]--;
+		dlp = dst_port[j - 1];
+	} else {
+		/* set dlp and lp to the never used values. */
+		dlp = BAD_PORT - 1;
+		lp = pnum + MAX_PKT_BURST;
+	}
+
+	/* Process up to last 3 packets one by one. */
+	switch (nb_rx % FWDSTEP) {
+	case 3:
+		process_packet(pkts_burst[j], dst_port + j);
+		GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
+		j++;
+		/* fall-through */
+	case 2:
+		process_packet(pkts_burst[j], dst_port + j);
+		GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
+		j++;
+		/* fall-through */
+	case 1:
+		process_packet(pkts_burst[j], dst_port + j);
+		GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
+		j++;
+	}
+
+	/*
+	 * Send packets out, through destination port.
+	 * Consecutive packets with the same destination port
+	 * are already grouped together.
+	 * If destination port for the packet equals BAD_PORT,
+	 * then free the packet without sending it out.
+	 */
+	for (j = 0; j < nb_rx; j += k) {
+
+		int32_t m;
+		uint16_t pn;
+
+		pn = dst_port[j];
+		k = pnum[j];
+
+		if (likely(pn != BAD_PORT))
+			send_packetsx4(qconf, pn, pkts_burst + j, k);
+		else
+			for (m = j; m != j + k; m++)
+				rte_pktmbuf_free(pkts_burst[m]);
+
+	}
+}
+
+#endif /* _L3FWD_S390X_H_ */
diff --git a/lib/acl/acl_bld.c b/lib/acl/acl_bld.c
index 7ea30f4186..04f5f0a820 100644
--- a/lib/acl/acl_bld.c
+++ b/lib/acl/acl_bld.c
@@ -777,6 +777,9 @@  acl_build_reset(struct rte_acl_ctx *ctx)
 		sizeof(*ctx) - offsetof(struct rte_acl_ctx, num_categories));
 }
 
+
+
+
 static void
 acl_gen_full_range(struct acl_build_context *context, struct rte_acl_node *root,
 	struct rte_acl_node *end, int size, int level)
diff --git a/lib/acl/acl_gen.c b/lib/acl/acl_gen.c
index e759a2ca15..a3c31b0dc9 100644
--- a/lib/acl/acl_gen.c
+++ b/lib/acl/acl_gen.c
@@ -360,7 +360,16 @@  acl_gen_node(struct rte_acl_node *node, uint64_t *node_array,
 		array_ptr = &node_array[index->quad_index];
 		acl_add_ptrs(node, array_ptr, no_match, 0);
 		qtrp = (uint32_t *)node->transitions;
+
+		/* Swap qtrp on big endian that transitions[0]
+		 * is at least signifcant byte.
+		 */
+#if __BYTE_ORDER == __ORDER_BIG_ENDIAN__
+		node->node_index = __bswap_32(qtrp[0]);
+#else
 		node->node_index = qtrp[0];
+#endif
+
 		node->node_index <<= sizeof(index->quad_index) * CHAR_BIT;
 		node->node_index |= index->quad_index | node->node_type;
 		index->quad_index += node->fanout;
diff --git a/lib/acl/acl_run_scalar.c b/lib/acl/acl_run_scalar.c
index 3d61e79409..9f01ef8d8c 100644
--- a/lib/acl/acl_run_scalar.c
+++ b/lib/acl/acl_run_scalar.c
@@ -141,6 +141,14 @@  rte_acl_classify_scalar(const struct rte_acl_ctx *ctx, const uint8_t **data,
 		input0 = GET_NEXT_4BYTES(parms, 0);
 		input1 = GET_NEXT_4BYTES(parms, 1);
 
+		/* input needs to be swapped because the rules get
+		 * swapped while building the trie.
+		 */
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+		input0 = __bswap_32(input0);
+		input1 = __bswap_32(input1);
+#endif
+
 		for (n = 0; n < 4; n++) {
 
 			transition0 = scalar_transition(flows.trans,
diff --git a/lib/acl/rte_acl.c b/lib/acl/rte_acl.c
index a61c3ba188..ae42ea5b54 100644
--- a/lib/acl/rte_acl.c
+++ b/lib/acl/rte_acl.c
@@ -101,6 +101,8 @@  static const rte_acl_classify_t classify_fns[] = {
 	[RTE_ACL_CLASSIFY_AVX2] = rte_acl_classify_avx2,
 	[RTE_ACL_CLASSIFY_NEON] = rte_acl_classify_neon,
 	[RTE_ACL_CLASSIFY_ALTIVEC] = rte_acl_classify_altivec,
+	/* use scalar for s390x for now */
+	[RTE_ACL_CLASSIFY_S390X] = rte_acl_classify_scalar,
 	[RTE_ACL_CLASSIFY_AVX512X16] = rte_acl_classify_avx512x16,
 	[RTE_ACL_CLASSIFY_AVX512X32] = rte_acl_classify_avx512x32,
 };
@@ -145,6 +147,27 @@  acl_check_alg_ppc(enum rte_acl_classify_alg alg)
 	return -EINVAL;
 }
 
+
+
+/*
+ * Helper function for acl_check_alg.
+ * Check support for PPC specific classify methods.
+ */
+static int
+acl_check_alg_s390x(enum rte_acl_classify_alg alg)
+{
+    if (alg == RTE_ACL_CLASSIFY_S390X) {
+#if defined(RTE_ARCH_S390X)
+        if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) 
+			return 0;
+#endif
+        return -ENOTSUP;
+    }
+
+    return -EINVAL;
+}
+
+
 #ifdef CC_AVX512_SUPPORT
 static int
 acl_check_avx512_cpu_flags(void)
@@ -216,6 +239,8 @@  acl_check_alg(enum rte_acl_classify_alg alg)
 		return acl_check_alg_arm(alg);
 	case RTE_ACL_CLASSIFY_ALTIVEC:
 		return acl_check_alg_ppc(alg);
+    case RTE_ACL_CLASSIFY_S390X:
+        return acl_check_alg_s390x(alg);
 	case RTE_ACL_CLASSIFY_AVX512X32:
 	case RTE_ACL_CLASSIFY_AVX512X16:
 	case RTE_ACL_CLASSIFY_AVX2:
@@ -244,6 +269,8 @@  acl_get_best_alg(void)
 		RTE_ACL_CLASSIFY_NEON,
 #elif defined(RTE_ARCH_PPC_64)
 		RTE_ACL_CLASSIFY_ALTIVEC,
+#elif defined(RTE_ARCH_S390X)
+        RTE_ACL_CLASSIFY_S390X,
 #elif defined(RTE_ARCH_X86)
 		RTE_ACL_CLASSIFY_AVX512X32,
 		RTE_ACL_CLASSIFY_AVX512X16,
diff --git a/lib/acl/rte_acl.h b/lib/acl/rte_acl.h
index f7f5f08701..307a78ceac 100644
--- a/lib/acl/rte_acl.h
+++ b/lib/acl/rte_acl.h
@@ -241,8 +241,9 @@  enum rte_acl_classify_alg {
 	RTE_ACL_CLASSIFY_AVX2 = 3,    /**< requires AVX2 support. */
 	RTE_ACL_CLASSIFY_NEON = 4,    /**< requires NEON support. */
 	RTE_ACL_CLASSIFY_ALTIVEC = 5,    /**< requires ALTIVEC support. */
-	RTE_ACL_CLASSIFY_AVX512X16 = 6,  /**< requires AVX512 support. */
-	RTE_ACL_CLASSIFY_AVX512X32 = 7,  /**< requires AVX512 support. */
+    RTE_ACL_CLASSIFY_S390X = 6,    /**< requires s390x z13 support. */
+	RTE_ACL_CLASSIFY_AVX512X16 = 7,  /**< requires AVX512 support. */
+	RTE_ACL_CLASSIFY_AVX512X32 = 8,  /**< requires AVX512 support. */
 };
 
 /**
diff --git a/lib/eal/s390x/include/meson.build b/lib/eal/s390x/include/meson.build
new file mode 100644
index 0000000000..b4561d6a82
--- /dev/null
+++ b/lib/eal/s390x/include/meson.build
@@ -0,0 +1,16 @@ 
+# SPDX-License-Identifier: BSD-3-Clause
+# (c) Copyright IBM Corp. 2018, 2019
+
+install_headers(
+	'rte_atomic.h',
+	'rte_byteorder.h',
+	'rte_cpuflags.h',
+	'rte_cycles.h',
+	'rte_io.h',
+	'rte_memcpy.h',
+	'rte_pause.h',
+	'rte_prefetch.h',
+	'rte_rwlock.h',
+	'rte_spinlock.h',
+	'rte_vect.h',
+	subdir: get_option('include_subdir_arch'))
diff --git a/lib/eal/s390x/include/rte_atomic.h b/lib/eal/s390x/include/rte_atomic.h
new file mode 100644
index 0000000000..5fce6d5f07
--- /dev/null
+++ b/lib/eal/s390x/include/rte_atomic.h
@@ -0,0 +1,47 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_ATOMIC_S390X_H_
+#define _RTE_ATOMIC_S390X_H_
+
+#ifndef RTE_FORCE_INTRINSICS
+#  error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_atomic.h"
+
+//#define dsb(opt) asm volatile("" : : : "memory")
+//#define dmb(opt) asm volatile("" : : : "memory")
+
+#define rte_mb() rte_compiler_barrier() //asm volatile("" : : : "memory")
+
+#define rte_wmb() rte_mb()
+
+#define rte_rmb() rte_mb()
+
+#define rte_smp_mb() rte_mb()
+
+#define rte_smp_wmb() rte_wmb()
+
+#define rte_smp_rmb() rte_rmb()
+
+#define rte_io_mb() rte_mb()
+
+#define rte_io_wmb() rte_wmb()
+
+#define rte_io_rmb() rte_rmb()
+
+#define rte_cio_wmb() rte_wmb()
+
+#define rte_cio_rmb() rte_rmb()
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ATOMIC_S390X_H_ */
diff --git a/lib/eal/s390x/include/rte_byteorder.h b/lib/eal/s390x/include/rte_byteorder.h
new file mode 100644
index 0000000000..de6e410b4b
--- /dev/null
+++ b/lib/eal/s390x/include/rte_byteorder.h
@@ -0,0 +1,43 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+/* Inspired from FreeBSD src/sys/powerpc/include/endian.h
+ * Copyright (c) 1987, 1991, 1993
+ * The Regents of the University of California.  All rights reserved.
+ */
+
+#ifndef _RTE_BYTEORDER_S390X_H_
+#define _RTE_BYTEORDER_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include "generic/rte_byteorder.h"
+
+/* s390x is big endian
+ */
+
+#define rte_cpu_to_le_16(x) rte_bswap16(x)
+#define rte_cpu_to_le_32(x) rte_bswap32(x)
+#define rte_cpu_to_le_64(x) rte_bswap64(x)
+
+#define rte_cpu_to_be_16(x) (x)
+#define rte_cpu_to_be_32(x) (x)
+#define rte_cpu_to_be_64(x) (x)
+
+#define rte_le_to_cpu_16(x) rte_bswap16(x)
+#define rte_le_to_cpu_32(x) rte_bswap32(x)
+#define rte_le_to_cpu_64(x) rte_bswap64(x)
+
+#define rte_be_to_cpu_16(x) (x)
+#define rte_be_to_cpu_32(x) (x)
+#define rte_be_to_cpu_64(x) (x)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BYTEORDER_S390X_H_ */
diff --git a/lib/eal/s390x/include/rte_cpuflags.h b/lib/eal/s390x/include/rte_cpuflags.h
new file mode 100644
index 0000000000..bfeff3f98b
--- /dev/null
+++ b/lib/eal/s390x/include/rte_cpuflags.h
@@ -0,0 +1,42 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_CPUFLAGS_S390X_H_
+#define _RTE_CPUFLAGS_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Enumeration of all CPU features supported
+ */
+enum rte_cpu_flag_t {
+	RTE_CPUFLAG_ESAN3 = 0,
+	RTE_CPUFLAG_ZARCH,
+	RTE_CPUFLAG_STFLE,
+	RTE_CPUFLAG_MSA,
+	RTE_CPUFLAG_LDISP,
+	RTE_CPUFLAG_EIMM,
+	RTE_CPUFLAG_DFP,
+	RTE_CPUFLAG_HPAGE, //from elf.h
+	//RTE_CPUFLAG_EDAT, //from hwcap.h
+	RTE_CPUFLAG_ETF3EH,
+	RTE_CPUFLAG_HIGH_GPRS,
+	RTE_CPUFLAG_TE,
+	RTE_CPUFLAG_VXRS,
+	RTE_CPUFLAG_VXRS_BCD,
+	RTE_CPUFLAG_VXRS_EXT,
+	RTE_CPUFLAG_GS,
+	/* The last item */
+	RTE_CPUFLAG_NUMFLAGS,/**< This should always be the last! */
+};
+
+#include "generic/rte_cpuflags.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CPUFLAGS_S390X_H_ */
diff --git a/lib/eal/s390x/include/rte_cycles.h b/lib/eal/s390x/include/rte_cycles.h
new file mode 100644
index 0000000000..7a430e06a8
--- /dev/null
+++ b/lib/eal/s390x/include/rte_cycles.h
@@ -0,0 +1,44 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_CYCLES_S390X_H_
+#define _RTE_CYCLES_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_cycles.h"
+
+#include <rte_common.h>
+
+/**
+ * Read the time base register.
+ *
+ * @return
+ *   The time base for this lcore.
+ */
+static inline uint64_t
+rte_rdtsc(void)
+{
+	uint64_t tsc;
+	asm volatile("stckf %0" : "=Q"(tsc) : : "cc");
+	return tsc;
+}
+
+static inline uint64_t
+rte_rdtsc_precise(void)
+{
+	rte_mb();
+	return rte_rdtsc();
+}
+
+static inline uint64_t
+rte_get_tsc_cycles(void) { return rte_rdtsc(); }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CYCLES_S390X_H_ */
diff --git a/lib/eal/s390x/include/rte_io.h b/lib/eal/s390x/include/rte_io.h
new file mode 100644
index 0000000000..9cb3c1ca7c
--- /dev/null
+++ b/lib/eal/s390x/include/rte_io.h
@@ -0,0 +1,184 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_IO_S390X_H_
+#define _RTE_IO_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#define RTE_OVERRIDE_IO_H
+
+#include "generic/rte_io.h"
+
+#include <unistd.h>
+#include <sys/syscall.h>
+
+union register_pair {
+	__int128_t pair;
+	struct {
+		unsigned long even;
+		unsigned long odd;
+	} even_odd;
+};
+
+/* s390 requires special instructions to access IO memory. */
+static inline uint64_t pcilgi(const volatile void *ioaddr, size_t len)
+{
+        union register_pair ioaddr_len =
+                {.even_odd.even = (uint64_t)ioaddr, .even_odd.odd = len};
+	uint64_t val;
+	int cc = -1;
+
+	asm volatile (
+		"       .insn   rre,0xb9d60000,%[val],%[ioaddr_len]\n"
+		"       ipm     %[cc]\n"
+		"       srl     %[cc],28\n"
+		: [cc] "+d" (cc), [val] "=d" (val),
+		  [ioaddr_len] "+&d" (ioaddr_len.pair) :: "cc");
+	return val;
+}
+
+static inline void pcistgi(volatile void *ioaddr, uint64_t val, size_t len)
+{
+        union register_pair ioaddr_len =
+                {.even_odd.even = (uint64_t)ioaddr, .even_odd.odd = len};
+	int cc = -1;
+
+	asm volatile (
+		"       .insn   rre,0xb9d40000,%[val],%[ioaddr_len]\n"
+		"       ipm     %[cc]\n"
+		"       srl     %[cc],28\n"
+		: [cc] "+d" (cc), [ioaddr_len] "+&d" (ioaddr_len.pair)
+		: [val] "d" (val)
+		: "cc", "memory");
+}
+
+/* fall back to syscall on old machines ? */
+static __rte_always_inline uint8_t
+rte_read8_relaxed(const volatile void *addr)
+{
+	return pcilgi(addr, 1);
+}
+
+static __rte_always_inline uint16_t
+rte_read16_relaxed(const volatile void *addr)
+{
+	return pcilgi(addr, 2);
+}
+
+static __rte_always_inline uint32_t
+rte_read32_relaxed(const volatile void *addr)
+{
+	return pcilgi(addr, 4);
+}
+
+static __rte_always_inline uint64_t
+rte_read64_relaxed(const volatile void *addr)
+{
+	return pcilgi(addr, 8);
+}
+
+static __rte_always_inline void
+rte_write8_relaxed(uint8_t value, volatile void *addr)
+{
+	pcistgi(addr, value, sizeof(value));
+}
+
+static __rte_always_inline void
+rte_write16_relaxed(uint16_t value, volatile void *addr)
+{
+	pcistgi(addr, value, sizeof(value));
+}
+
+static __rte_always_inline void
+rte_write32_relaxed(uint32_t value, volatile void *addr)
+{
+	pcistgi(addr, value, sizeof(value));
+}
+
+static __rte_always_inline void
+rte_write64_relaxed(uint64_t value, volatile void *addr)
+{
+	pcistgi(addr, value, sizeof(value));
+}
+
+static __rte_always_inline uint8_t
+rte_read8(const volatile void *addr)
+{
+	uint8_t val;
+	val = rte_read8_relaxed(addr);
+	rte_io_rmb();
+	return val;
+}
+
+static __rte_always_inline uint16_t
+rte_read16(const volatile void *addr)
+{
+	uint16_t val;
+	val = rte_read16_relaxed(addr);
+	rte_io_rmb();
+	return val;
+}
+
+static __rte_always_inline uint32_t
+rte_read32(const volatile void *addr)
+{
+	uint32_t val;
+	val = rte_read32_relaxed(addr);
+	rte_io_rmb();
+	return val;
+}
+
+static __rte_always_inline uint64_t
+rte_read64(const volatile void *addr)
+{
+	uint64_t val;
+	val = rte_read64_relaxed(addr);
+	rte_io_rmb();
+	return val;
+}
+
+static __rte_always_inline void
+rte_write8(uint8_t value, volatile void *addr)
+{
+	rte_io_wmb();
+	rte_write8_relaxed(value, addr);
+}
+
+static __rte_always_inline void
+rte_write16(uint16_t value, volatile void *addr)
+{
+	rte_io_wmb();
+	rte_write16_relaxed(value, addr);
+}
+
+static __rte_always_inline void
+rte_write32(uint32_t value, volatile void *addr)
+{
+	rte_io_wmb();
+	rte_write32_relaxed(value, addr);
+}
+
+static __rte_always_inline void
+rte_write32_wc(uint32_t value, volatile void *addr)
+{
+    rte_write32(value, addr);
+}
+
+static __rte_always_inline void
+rte_write64(uint64_t value, volatile void *addr)
+{
+	rte_io_wmb();
+	rte_write64_relaxed(value, addr);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_IO_S390X_H_ */
diff --git a/lib/eal/s390x/include/rte_mcslock.h b/lib/eal/s390x/include/rte_mcslock.h
new file mode 100644
index 0000000000..9125237dfd
--- /dev/null
+++ b/lib/eal/s390x/include/rte_mcslock.h
@@ -0,0 +1,18 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#ifndef _RTE_MCSLOCK_S390X_H_
+#define _RTE_MCSLOCK_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_mcslock.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MCSLOCK_S390X_H_ */
diff --git a/lib/eal/s390x/include/rte_memcpy.h b/lib/eal/s390x/include/rte_memcpy.h
new file mode 100644
index 0000000000..1135b1af6f
--- /dev/null
+++ b/lib/eal/s390x/include/rte_memcpy.h
@@ -0,0 +1,55 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_MEMCPY_S390X_H_
+#define _RTE_MEMCPY_S390X_H_
+
+#include <stdint.h>
+#include <string.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_memcpy.h"
+
+
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 16);
+}
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 32);
+}
+static inline void
+rte_mov48(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 48);
+}
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 64);
+}
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 128);
+}
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 256);
+}
+#define rte_memcpy(d, s, n)	memcpy((d), (s), (n))
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCPY_S390X_H_ */
diff --git a/lib/eal/s390x/include/rte_pause.h b/lib/eal/s390x/include/rte_pause.h
new file mode 100644
index 0000000000..be90ce6a1f
--- /dev/null
+++ b/lib/eal/s390x/include/rte_pause.h
@@ -0,0 +1,22 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_PAUSE_S390X_H_
+#define _RTE_PAUSE_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_pause.h"
+
+static inline void rte_pause(void)
+{
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PAUSE_S390X_H_ */
diff --git a/lib/eal/s390x/include/rte_power_intrinsics.h b/lib/eal/s390x/include/rte_power_intrinsics.h
new file mode 100644
index 0000000000..c0e9ac279f
--- /dev/null
+++ b/lib/eal/s390x/include/rte_power_intrinsics.h
@@ -0,0 +1,20 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef _RTE_POWER_INTRINSIC_PPC_H_
+#define _RTE_POWER_INTRINSIC_PPC_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+
+#include "generic/rte_power_intrinsics.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_POWER_INTRINSIC_PPC_H_ */
diff --git a/lib/eal/s390x/include/rte_prefetch.h b/lib/eal/s390x/include/rte_prefetch.h
new file mode 100644
index 0000000000..4a2e73116d
--- /dev/null
+++ b/lib/eal/s390x/include/rte_prefetch.h
@@ -0,0 +1,46 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_PREFETCH_S390X_H_
+#define _RTE_PREFETCH_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include "generic/rte_prefetch.h"
+
+static inline void rte_prefetch0(const volatile void *p)
+{
+	asm volatile ("pfd 1, 0(%[p])" : : [p] "r" (p));
+}
+
+static inline void rte_prefetch1(const volatile void *p)
+{
+	asm volatile ("pfd 1, 0(%[p])" : : [p] "r" (p));
+}
+
+static inline void rte_prefetch2(const volatile void *p)
+{
+	asm volatile ("pfd 1, 0(%[p])" : : [p] "r" (p));
+}
+
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+	/* non-temporal version not available, fallback to rte_prefetch0 */
+	rte_prefetch0(p);
+}
+
+__rte_experimental
+static inline void rte_cldemote(const volatile void *p)
+{
+    RTE_SET_USED(p);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PREFETCH_S390X_H_ */
diff --git a/lib/eal/s390x/include/rte_rwlock.h b/lib/eal/s390x/include/rte_rwlock.h
new file mode 100644
index 0000000000..f649484f35
--- /dev/null
+++ b/lib/eal/s390x/include/rte_rwlock.h
@@ -0,0 +1,42 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_RWLOCK_S390X_H_
+#define _RTE_RWLOCK_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_rwlock.h"
+
+static inline void
+rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
+{
+	rte_rwlock_read_lock(rwl);
+}
+
+static inline void
+rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl)
+{
+	rte_rwlock_read_unlock(rwl);
+}
+
+static inline void
+rte_rwlock_write_lock_tm(rte_rwlock_t *rwl)
+{
+	rte_rwlock_write_lock(rwl);
+}
+
+static inline void
+rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl)
+{
+	rte_rwlock_write_unlock(rwl);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_RWLOCK_S390X_H_ */
diff --git a/lib/eal/s390x/include/rte_spinlock.h b/lib/eal/s390x/include/rte_spinlock.h
new file mode 100644
index 0000000000..0434864fbc
--- /dev/null
+++ b/lib/eal/s390x/include/rte_spinlock.h
@@ -0,0 +1,85 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_SPINLOCK_S390X_H_
+#define _RTE_SPINLOCK_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include "generic/rte_spinlock.h"
+
+#ifndef RTE_FORCE_INTRINSICS
+
+static inline void
+rte_spinlock_lock(rte_spinlock_t *sl)
+{
+	while (__sync_lock_test_and_set(&sl->locked, 1))
+		while (sl->locked)
+			rte_pause();
+}
+
+static inline void
+rte_spinlock_unlock(rte_spinlock_t *sl)
+{
+	__sync_lock_release(&sl->locked);
+}
+
+static inline int
+rte_spinlock_trylock(rte_spinlock_t *sl)
+{
+	return __sync_lock_test_and_set(&sl->locked, 1) == 0;
+}
+
+#endif
+
+
+static inline int rte_tm_supported(void)
+{
+	return 0;
+}
+
+static inline void
+rte_spinlock_lock_tm(rte_spinlock_t *sl)
+{
+	rte_spinlock_lock(sl); /* fall-back */
+}
+
+static inline int
+rte_spinlock_trylock_tm(rte_spinlock_t *sl)
+{
+	return rte_spinlock_trylock(sl);
+}
+
+static inline void
+rte_spinlock_unlock_tm(rte_spinlock_t *sl)
+{
+	rte_spinlock_unlock(sl);
+}
+
+static inline void
+rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr)
+{
+	rte_spinlock_recursive_lock(slr); /* fall-back */
+}
+
+static inline void
+rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr)
+{
+	rte_spinlock_recursive_unlock(slr);
+}
+
+static inline int
+rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr)
+{
+	return rte_spinlock_recursive_trylock(slr);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_SPINLOCK_S390X_H_ */
diff --git a/lib/eal/s390x/include/rte_ticketlock.h b/lib/eal/s390x/include/rte_ticketlock.h
new file mode 100644
index 0000000000..0785363c94
--- /dev/null
+++ b/lib/eal/s390x/include/rte_ticketlock.h
@@ -0,0 +1,18 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2019
+ */
+
+#ifndef _RTE_TICKETLOCK_S390X_H_
+#define _RTE_TICKETLOCK_S390X_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_ticketlock.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_TICKETLOCK_S390X_H_ */
diff --git a/lib/eal/s390x/include/rte_vect.h b/lib/eal/s390x/include/rte_vect.h
new file mode 100644
index 0000000000..8fe3535965
--- /dev/null
+++ b/lib/eal/s390x/include/rte_vect.h
@@ -0,0 +1,35 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#ifndef _RTE_VECT_S390X_H_
+#define _RTE_VECT_S390X_H_
+
+#include <vecintrin.h>
+#include "generic/rte_vect.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_256
+
+typedef int xmm_t __attribute__((vector_size(4*sizeof(int))));
+
+#define	XMM_SIZE	(sizeof(xmm_t))
+#define	XMM_MASK	(XMM_SIZE - 1)
+
+typedef union rte_xmm {
+	xmm_t    x;
+	uint8_t  u8[XMM_SIZE / sizeof(uint8_t)];
+	uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
+	uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
+	uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
+	double   pd[XMM_SIZE / sizeof(double)];
+} __attribute__((aligned(16))) rte_xmm_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_VECT_S390X_H_ */
diff --git a/lib/eal/s390x/meson.build b/lib/eal/s390x/meson.build
new file mode 100644
index 0000000000..c8cc8d1f3d
--- /dev/null
+++ b/lib/eal/s390x/meson.build
@@ -0,0 +1,16 @@ 
+# SPDX-License-Identifier: BSD-3-Clause
+# (c) Copyright IBM Corp. 2018, 2019
+
+subdir('include')
+
+# 19.xx zarch patches lib/librte_eal/common/arch/s390x/meson.build:
+# var was: eal_common_arch_sources
+#
+sources += files(
+        'rte_cpuflags.c',
+        'rte_cycles.c',
+        'rte_hypervisor.c',
+        'rte_power_intrinsics.c',
+)
+
+
diff --git a/lib/eal/s390x/rte_cpuflags.c b/lib/eal/s390x/rte_cpuflags.c
new file mode 100644
index 0000000000..d57a51d267
--- /dev/null
+++ b/lib/eal/s390x/rte_cpuflags.c
@@ -0,0 +1,91 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#include "rte_cpuflags.h"
+
+#include <elf.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <unistd.h>
+#include <string.h>
+
+/* Symbolic values for the entries in the auxiliary table */
+#define AT_HWCAP  16
+#define AT_HWCAP2 26
+
+/* software based registers */
+enum cpu_register_t {
+	REG_NONE = 0,
+	REG_HWCAP,
+	REG_HWCAP2,
+	REG_MAX
+};
+
+typedef uint32_t hwcap_registers_t[REG_MAX];
+
+struct feature_entry {
+	uint32_t reg;
+	uint32_t bit;
+#define CPU_FLAG_NAME_MAX_LEN 64
+	char name[CPU_FLAG_NAME_MAX_LEN];
+};
+
+#define FEAT_DEF(name, reg, bit) \
+	[RTE_CPUFLAG_##name] = {reg, bit, #name},
+
+const struct feature_entry rte_cpu_feature_table[] = {
+	FEAT_DEF(ESAN3,                  REG_HWCAP,   0)
+	FEAT_DEF(ZARCH,                  REG_HWCAP,   1)
+	FEAT_DEF(STFLE,                  REG_HWCAP,   2)
+	FEAT_DEF(MSA,                    REG_HWCAP,   3)
+	FEAT_DEF(LDISP,                  REG_HWCAP,   4)
+	FEAT_DEF(EIMM,                   REG_HWCAP,   5)
+	FEAT_DEF(DFP,                    REG_HWCAP,   6)
+	FEAT_DEF(HPAGE,                  REG_HWCAP,   7)
+	FEAT_DEF(ETF3EH,                 REG_HWCAP,   8)
+	FEAT_DEF(HIGH_GPRS,              REG_HWCAP,   9)
+	FEAT_DEF(TE,                     REG_HWCAP,  10)
+	FEAT_DEF(VXRS,                   REG_HWCAP,  11)
+	FEAT_DEF(VXRS_BCD,               REG_HWCAP,  12)
+	FEAT_DEF(VXRS_EXT,               REG_HWCAP,  13)
+	FEAT_DEF(GS,                     REG_HWCAP,  14)
+};
+
+/*
+ * Read AUXV software register and get cpu features for Power
+ */
+static void
+rte_cpu_get_features(hwcap_registers_t out)
+{
+	out[REG_HWCAP] = rte_cpu_getauxval(AT_HWCAP);
+	out[REG_HWCAP2] = rte_cpu_getauxval(AT_HWCAP2);
+}
+
+/*
+ * Checks if a particular flag is available on current machine.
+ */
+int
+rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
+{
+	const struct feature_entry *feat;
+	hwcap_registers_t regs = {0};
+
+	if (feature >= RTE_CPUFLAG_NUMFLAGS)
+		return -ENOENT;
+
+	feat = &rte_cpu_feature_table[feature];
+	if (feat->reg == REG_NONE)
+		return -EFAULT;
+
+	rte_cpu_get_features(regs);
+	return (regs[feat->reg] >> feat->bit) & 1;
+}
+
+const char *
+rte_cpu_get_flag_name(enum rte_cpu_flag_t feature)
+{
+	if (feature >= RTE_CPUFLAG_NUMFLAGS)
+		return NULL;
+	return rte_cpu_feature_table[feature].name;
+}
diff --git a/lib/eal/s390x/rte_cycles.c b/lib/eal/s390x/rte_cycles.c
new file mode 100644
index 0000000000..b29c4454a1
--- /dev/null
+++ b/lib/eal/s390x/rte_cycles.c
@@ -0,0 +1,11 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#include "eal_private.h"
+
+uint64_t
+get_tsc_freq_arch(void)
+{
+	return 0;
+}
diff --git a/lib/eal/s390x/rte_hypervisor.c b/lib/eal/s390x/rte_hypervisor.c
new file mode 100644
index 0000000000..22b0c5cc47
--- /dev/null
+++ b/lib/eal/s390x/rte_hypervisor.c
@@ -0,0 +1,11 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2018, 2019
+ */
+
+#include "rte_hypervisor.h"
+
+enum rte_hypervisor
+rte_hypervisor_get(void)
+{
+	return RTE_HYPERVISOR_UNKNOWN;
+}
diff --git a/lib/eal/s390x/rte_power_intrinsics.c b/lib/eal/s390x/rte_power_intrinsics.c
new file mode 100644
index 0000000000..f00b58ade5
--- /dev/null
+++ b/lib/eal/s390x/rte_power_intrinsics.c
@@ -0,0 +1,51 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Intel Corporation
+ */
+
+#include "rte_power_intrinsics.h"
+
+/**
+ * This function is not supported on PPC64.
+ */
+int
+rte_power_monitor(const struct rte_power_monitor_cond *pmc,
+		const uint64_t tsc_timestamp)
+{
+	RTE_SET_USED(pmc);
+	RTE_SET_USED(tsc_timestamp);
+
+	return -ENOTSUP;
+}
+
+/**
+ * This function is not supported on PPC64.
+ */
+int
+rte_power_pause(const uint64_t tsc_timestamp)
+{
+	RTE_SET_USED(tsc_timestamp);
+
+	return -ENOTSUP;
+}
+
+/**
+ * This function is not supported on PPC64.
+ */
+int
+rte_power_monitor_wakeup(const unsigned int lcore_id)
+{
+	RTE_SET_USED(lcore_id);
+
+	return -ENOTSUP;
+}
+
+int
+rte_power_monitor_multi(const struct rte_power_monitor_cond pmc[],
+		const uint32_t num, const uint64_t tsc_timestamp)
+{
+	RTE_SET_USED(pmc);
+	RTE_SET_USED(num);
+	RTE_SET_USED(tsc_timestamp);
+
+	return -ENOTSUP;
+}
diff --git a/lib/hash/rte_fbk_hash.h b/lib/hash/rte_fbk_hash.h
index b01126999b..956d3f90f9 100644
--- a/lib/hash/rte_fbk_hash.h
+++ b/lib/hash/rte_fbk_hash.h
@@ -123,9 +123,16 @@  rte_fbk_hash_add_key_with_bucket(struct rte_fbk_hash_table *ht,
 	 * corrupted due to race conditions, but it's still possible to
 	 * overwrite entries that have just been made valid.
 	 */
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 	const uint64_t new_entry = ((uint64_t)(key) << 32) |
 			((uint64_t)(value) << 16) |
 			1;  /* 1 = is_entry bit. */
+	#else
+	const uint64_t new_entry =
+			((uint64_t)(1) << 48) | /* 1 = is_entry bit. */
+			((uint64_t)(value) << 32) |
+			(uint64_t)(key);
+	#endif
 	uint32_t i;
 
 	for (i = 0; i < ht->entries_per_bucket; i++) {
diff --git a/lib/lpm/meson.build b/lib/lpm/meson.build
index 78d91d3421..20f76368fa 100644
--- a/lib/lpm/meson.build
+++ b/lib/lpm/meson.build
@@ -13,6 +13,7 @@  headers = files('rte_lpm.h', 'rte_lpm6.h')
 # without worrying about which architecture we actually need
 indirect_headers += files(
         'rte_lpm_altivec.h',
+        'rte_lpm_s390x.h',
         'rte_lpm_neon.h',
         'rte_lpm_sse.h',
         'rte_lpm_sve.h',
diff --git a/lib/lpm/rte_lpm.h b/lib/lpm/rte_lpm.h
index eb91960e81..b9ee616c1d 100644
--- a/lib/lpm/rte_lpm.h
+++ b/lib/lpm/rte_lpm.h
@@ -405,6 +405,8 @@  rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
 #endif
 #elif defined(RTE_ARCH_PPC_64)
 #include "rte_lpm_altivec.h"
+#elif defined(RTE_ARCH_S390X)
+#include "rte_lpm_s390x.h"
 #else
 #include "rte_lpm_sse.h"
 #endif
diff --git a/lib/lpm/rte_lpm6.c b/lib/lpm/rte_lpm6.c
index 8d21aeddb8..4a0f5740a2 100644
--- a/lib/lpm/rte_lpm6.c
+++ b/lib/lpm/rte_lpm6.c
@@ -18,6 +18,7 @@ 
 #include <assert.h>
 #include <rte_jhash.h>
 #include <rte_tailq.h>
+#include <rte_byteorder.h>
 
 #include "rte_lpm6.h"
 
@@ -52,6 +53,8 @@  static struct rte_tailq_elem rte_lpm6_tailq = {
 };
 EAL_REGISTER_TAILQ(rte_lpm6_tailq)
 
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
 /** Tbl entry structure. It is the same for both tbl24 and tbl8 */
 struct rte_lpm6_tbl_entry {
 	uint32_t next_hop:	21;  /**< Next hop / next table to be checked. */
@@ -63,6 +66,21 @@  struct rte_lpm6_tbl_entry {
 	uint32_t ext_entry :1;   /**< External entry. */
 };
 
+#else
+
+struct rte_lpm6_tbl_entry {
+
+	/* Flags. */
+	uint32_t ext_entry :1;   /**< External entry. */
+	uint32_t valid_group :1; /**< Group validation flag. */
+	uint32_t valid     :1;   /**< Validation flag. */
+
+	uint32_t depth	:8;      /**< Rule depth. */
+	uint32_t next_hop:	21;  /**< Next hop / next table to be checked. */
+};
+
+#endif
+
 /** Rules tbl entry structure. */
 struct rte_lpm6_rule {
 	uint8_t ip[RTE_LPM6_IPV6_ADDR_SIZE]; /**< Rule IP address. */
diff --git a/lib/lpm/rte_lpm_s390x.h b/lib/lpm/rte_lpm_s390x.h
new file mode 100644
index 0000000000..eb1fdd4509
--- /dev/null
+++ b/lib/lpm/rte_lpm_s390x.h
@@ -0,0 +1,130 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * (c) Copyright IBM Corp. 2016, 2018
+ */
+
+#ifndef _RTE_LPM_S390X_H_
+#define _RTE_LPM_S390X_H_
+
+#include <rte_branch_prediction.h>
+#include <rte_byteorder.h>
+#include <rte_common.h>
+#include <rte_vect.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline void
+rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
+	uint32_t defv)
+{
+	typedef int vector_signed_int
+		__attribute__((vector_size(4*sizeof(int))));
+	vector_signed_int i24;
+	rte_xmm_t i8;
+	uint32_t tbl[4];
+	uint64_t idx, pt, pt2;
+	const uint32_t *ptbl;
+
+	const uint32_t mask = UINT8_MAX;
+	const vector_signed_int mask8 = (xmm_t){mask, mask, mask, mask};
+
+	/*
+	 * RTE_LPM_VALID_EXT_ENTRY_BITMASK for 2 LPM entries
+	 * as one 64-bit value (0x0300000003000000).
+	 */
+	const uint64_t mask_xv =
+		((uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK |
+		(uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK << 32);
+
+	/*
+	 * RTE_LPM_LOOKUP_SUCCESS for 2 LPM entries
+	 * as one 64-bit value (0x0100000001000000).
+	 */
+	const uint64_t mask_v =
+		((uint64_t)RTE_LPM_LOOKUP_SUCCESS |
+		(uint64_t)RTE_LPM_LOOKUP_SUCCESS << 32);
+
+	/* get 4 indexes for tbl24[]. */
+	i24[0] = (uint32_t)ip[0] >> 8;
+	i24[1] = (uint32_t)ip[1] >> 8;
+	i24[2] = (uint32_t)ip[2] >> 8;
+	i24[3] = (uint32_t)ip[3] >> 8;
+
+	/* extract values from tbl24[] */
+	idx = (uint32_t)i24[0];
+	idx = idx < (1<<24) ? idx : (1<<24)-1;
+	ptbl = (const uint32_t *)&lpm->tbl24[idx];
+	tbl[0] = *ptbl;
+
+	idx = (uint32_t) i24[1];
+	idx = idx < (1<<24) ? idx : (1<<24)-1;
+	ptbl = (const uint32_t *)&lpm->tbl24[idx];
+	tbl[1] = *ptbl;
+
+	idx = (uint32_t) i24[2];
+	idx = idx < (1<<24) ? idx : (1<<24)-1;
+	ptbl = (const uint32_t *)&lpm->tbl24[idx];
+	tbl[2] = *ptbl;
+
+	idx = (uint32_t) i24[3];
+	idx = idx < (1<<24) ? idx : (1<<24)-1;
+	ptbl = (const uint32_t *)&lpm->tbl24[idx];
+	tbl[3] = *ptbl;
+
+	/* get 4 indexes for tbl8[]. */
+	i8.x = vec_and(ip, mask8);
+
+	pt = (uint64_t)tbl[0] |
+		(uint64_t)tbl[1] << 32;
+	pt2 = (uint64_t)tbl[2] |
+		(uint64_t)tbl[3] << 32;
+
+	/* search successfully finished for all 4 IP addresses. */
+	if (likely((pt & mask_xv) == mask_v) &&
+			likely((pt2 & mask_xv) == mask_v)) {
+		*(uint64_t *)hop = pt & RTE_LPM_MASKX4_RES;
+		*(uint64_t *)(hop + 2) = pt2 & RTE_LPM_MASKX4_RES;
+		return;
+	}
+
+	if (unlikely((pt & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+			RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+		i8.u32[0] = i8.u32[0] +
+			(uint8_t)tbl[0] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[0]];
+		tbl[0] = *ptbl;
+	}
+	if (unlikely((pt >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+			RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+		i8.u32[1] = i8.u32[1] +
+			(uint8_t)tbl[1] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[1]];
+		tbl[1] = *ptbl;
+	}
+	if (unlikely((pt2 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+			RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+		i8.u32[2] = i8.u32[2] +
+			(uint8_t)tbl[2] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[2]];
+		tbl[2] = *ptbl;
+	}
+	if (unlikely((pt2 >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+			RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+		i8.u32[3] = i8.u32[3] +
+			(uint8_t)tbl[3] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[3]];
+		tbl[3] = *ptbl;
+	}
+
+	hop[0] = (tbl[0] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[0] & 0x00FFFFFF : defv;
+	hop[1] = (tbl[1] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[1] & 0x00FFFFFF : defv;
+	hop[2] = (tbl[2] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[2] & 0x00FFFFFF : defv;
+	hop[3] = (tbl[3] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[3] & 0x00FFFFFF : defv;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_LPM_S390X_H_ */
diff --git a/meson.build b/meson.build
index 937f6110c0..8c8d673609 100644
--- a/meson.build
+++ b/meson.build
@@ -50,6 +50,8 @@  elif host_machine.cpu_family().startswith('arm') or host_machine.cpu_family().st
     arch_subdir = 'arm'
 elif host_machine.cpu_family().startswith('ppc')
     arch_subdir = 'ppc'
+elif host_machine.cpu_family().startswith('s390x')
+	arch_subdir = 's390x'
 endif
 
 # configure the build, and make sure configs here and in config folder are