[v4,1/4] hash: add kv hash library
diff mbox series

Message ID c4de6f3ac323a83807342807ba9e9bba1c207635.1588967562.git.vladimir.medvedkin@intel.com
State New
Delegated to: Thomas Monjalon
Headers show
Series
  • add new kv hash table
Related show

Checks

Context Check Description
ci/iol-testing fail Testing issues
ci/iol-mellanox-Performance success Performance Testing PASS
ci/Intel-compilation success Compilation OK
ci/iol-nxp-Performance success Performance Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/checkpatch success coding style OK

Commit Message

Medvedkin, Vladimir May 8, 2020, 7:58 p.m. UTC
KV hash is a special optimized key-value storage for fixed
key and value sizes. At the moment it supports 32 bit keys
and 64 bit values. This table is hash function agnostic so
user must provide precalculated hash signature for
add/delete/lookup operations.

Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
---
 lib/Makefile                           |   2 +-
 lib/librte_hash/Makefile               |  14 +-
 lib/librte_hash/k32v64_hash.c          | 277 +++++++++++++++++++++++++++++++++
 lib/librte_hash/k32v64_hash.h          |  98 ++++++++++++
 lib/librte_hash/k32v64_hash_avx512vl.c |  59 +++++++
 lib/librte_hash/meson.build            |  17 +-
 lib/librte_hash/rte_hash_version.map   |   6 +-
 lib/librte_hash/rte_kv_hash.c          | 184 ++++++++++++++++++++++
 lib/librte_hash/rte_kv_hash.h          | 169 ++++++++++++++++++++
 9 files changed, 821 insertions(+), 5 deletions(-)
 create mode 100644 lib/librte_hash/k32v64_hash.c
 create mode 100644 lib/librte_hash/k32v64_hash.h
 create mode 100644 lib/librte_hash/k32v64_hash_avx512vl.c
 create mode 100644 lib/librte_hash/rte_kv_hash.c
 create mode 100644 lib/librte_hash/rte_kv_hash.h

Comments

Ananyev, Konstantin June 23, 2020, 3:44 p.m. UTC | #1
Hi Vladimir,

> --- /dev/null
> +++ b/lib/librte_hash/k32v64_hash.c
> @@ -0,0 +1,277 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#include <string.h>
> +
> +#include <rte_errno.h>
> +#include <rte_malloc.h>
> +#include <rte_memory.h>
> +
> +#include "k32v64_hash.h"
> +
> +static inline int
> +k32v64_hash_lookup(struct k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash, uint64_t *value)
> +{
> +	return __k32v64_hash_lookup(table, key, hash, value, __kv_cmp_keys);
> +}
> +
> +static int
> +k32v64_hash_bulk_lookup(struct rte_kv_hash_table *ht, void *keys_p,
> +	uint32_t *hashes, void *values_p, unsigned int n)
> +{
> +	struct k32v64_hash_table *table = (struct k32v64_hash_table *)ht;
> +	uint32_t *keys = keys_p;
> +	uint64_t *values = values_p;
> +	int ret, cnt = 0;
> +	unsigned int i;
> +
> +	if (unlikely((table == NULL) || (keys == NULL) || (hashes == NULL) ||
> +			(values == NULL)))
> +		return -EINVAL;
> +
> +	for (i = 0; i < n; i++) {
> +		ret = k32v64_hash_lookup(table, keys[i], hashes[i],
> +			&values[i]);
> +		if (ret == 0)
> +			cnt++;
> +	}
> +	return cnt;
> +}
> +
> +#ifdef CC_AVX512VL_SUPPORT
> +int
> +k32v64_hash_bulk_lookup_avx512vl(struct rte_kv_hash_table *ht,
> +	void *keys_p, uint32_t *hashes, void *values_p, unsigned int n);
> +#endif
> +
> +static rte_kv_hash_bulk_lookup_t
> +get_lookup_bulk_fn(void)
> +{
> +#ifdef CC_AVX512VL_SUPPORT
> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512VL))
> +		return k32v64_hash_bulk_lookup_avx512vl;
> +#endif
> +	return k32v64_hash_bulk_lookup;
> +}
> +
> +static int
> +k32v64_hash_add(struct k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash, uint64_t value, uint64_t *old_value, int *found)
> +{
> +	uint32_t bucket;
> +	int i, idx, ret;
> +	uint8_t msk;
> +	struct k32v64_ext_ent *tmp, *ent, *prev = NULL;
> +
> +	if (table == NULL)
> +		return -EINVAL;
> +
> +	bucket = hash & table->bucket_msk;
> +	/* Search key in table. Update value if exists */
> +	for (i = 0; i < K32V64_KEYS_PER_BUCKET; i++) {
> +		if ((key == table->t[bucket].key[i]) &&
> +				(table->t[bucket].key_mask & (1 << i))) {
> +			*old_value = table->t[bucket].val[i];
> +			*found = 1;
> +			__atomic_fetch_add(&table->t[bucket].cnt, 1,
> +				__ATOMIC_ACQUIRE);
> +			table->t[bucket].val[i] = value;
> +			__atomic_fetch_add(&table->t[bucket].cnt, 1,
> +				__ATOMIC_RELEASE);
> +			return 0;
> +		}
> +	}
> +
> +	if (!SLIST_EMPTY(&table->t[bucket].head)) {
> +		SLIST_FOREACH(ent, &table->t[bucket].head, next) {
> +			if (ent->key == key) {
> +				*old_value = ent->val;
> +				*found = 1;
> +				__atomic_fetch_add(&table->t[bucket].cnt, 1,
> +					__ATOMIC_ACQUIRE);
> +				ent->val = value;
> +				__atomic_fetch_add(&table->t[bucket].cnt, 1,
> +					__ATOMIC_RELEASE);
> +				return 0;
> +			}
> +		}
> +	}
> +
> +	msk = ~table->t[bucket].key_mask & VALID_KEY_MSK;
> +	if (msk) {
> +		idx = __builtin_ctz(msk);
> +		table->t[bucket].key[idx] = key;
> +		table->t[bucket].val[idx] = value;
> +		__atomic_or_fetch(&table->t[bucket].key_mask, 1 << idx,
> +			__ATOMIC_RELEASE);

I think this case also has to guarded with table->t[bucket].cnt updates.

> +		table->nb_ent++;
> +		*found = 0;
> +		return 0;
> +	}
> +
> +	ret = rte_mempool_get(table->ext_ent_pool, (void **)&ent);
> +	if (ret < 0)
> +		return ret;
> +
> +	SLIST_NEXT(ent, next) = NULL;
> +	ent->key = key;
> +	ent->val = value;
> +	rte_smp_wmb();

__atomic_thread_fence(__ATOMIC_RELEASE);
?

> +	SLIST_FOREACH(tmp, &table->t[bucket].head, next)
> +		prev = tmp;
> +
> +	if (prev == NULL)
> +		SLIST_INSERT_HEAD(&table->t[bucket].head, ent, next);
> +	else
> +		SLIST_INSERT_AFTER(prev, ent, next);
> +
> +	table->nb_ent++;
> +	table->nb_ext_ent++;
> +	*found = 0;
> +	return 0;
> +}
> +
> +static int
> +k32v64_hash_delete(struct k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash, uint64_t *old_value)
> +{
> +	uint32_t bucket;
> +	int i;
> +	struct k32v64_ext_ent *ent;
> +
> +	if (table == NULL)
> +		return -EINVAL;
> +
> +	bucket = hash & table->bucket_msk;
> +
> +	for (i = 0; i < K32V64_KEYS_PER_BUCKET; i++) {
> +		if ((key == table->t[bucket].key[i]) &&
> +				(table->t[bucket].key_mask & (1 << i))) {
> +			*old_value = table->t[bucket].val[i];
> +			ent = SLIST_FIRST(&table->t[bucket].head);
> +			if (ent) {
> +				__atomic_fetch_add(&table->t[bucket].cnt, 1,
> +					__ATOMIC_ACQUIRE);
> +				table->t[bucket].key[i] = ent->key;
> +				table->t[bucket].val[i] = ent->val;
> +				SLIST_REMOVE_HEAD(&table->t[bucket].head, next);
> +				__atomic_fetch_add(&table->t[bucket].cnt, 1,
> +					__ATOMIC_RELEASE);
> +				table->nb_ext_ent--;
> +			} else
> +				__atomic_and_fetch(&table->t[bucket].key_mask,
> +					~(1 << i), __ATOMIC_RELEASE);

Same thought as above - safer to guard this mask update with cnt update.

> +			if (ent)
> +				rte_mempool_put(table->ext_ent_pool, ent);

Can't this 'if(ent)' be merged with previous 'if (ent) {...}' above? 

> +			table->nb_ent--;
> +			return 0;
> +		}
> +	}
> +
> +	SLIST_FOREACH(ent, &table->t[bucket].head, next)
> +		if (ent->key == key)
> +			break;
> +
> +	if (ent == NULL)
> +		return -ENOENT;
> +
> +	*old_value = ent->val;
> +
> +	__atomic_fetch_add(&table->t[bucket].cnt, 1, __ATOMIC_ACQUIRE);
> +	SLIST_REMOVE(&table->t[bucket].head, ent, k32v64_ext_ent, next);
> +	__atomic_fetch_add(&table->t[bucket].cnt, 1, __ATOMIC_RELEASE);
> +	rte_mempool_put(table->ext_ent_pool, ent);
> +
> +	table->nb_ext_ent--;
> +	table->nb_ent--;
> +
> +	return 0;
> +}
> +
Ananyev, Konstantin June 23, 2020, 11:06 p.m. UTC | #2
> Hi Vladimir,
> 
> > --- /dev/null
> > +++ b/lib/librte_hash/k32v64_hash.c
> > @@ -0,0 +1,277 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2020 Intel Corporation
> > + */
> > +
> > +#include <string.h>
> > +
> > +#include <rte_errno.h>
> > +#include <rte_malloc.h>
> > +#include <rte_memory.h>
> > +
> > +#include "k32v64_hash.h"
> > +
> > +static inline int
> > +k32v64_hash_lookup(struct k32v64_hash_table *table, uint32_t key,
> > +	uint32_t hash, uint64_t *value)
> > +{
> > +	return __k32v64_hash_lookup(table, key, hash, value, __kv_cmp_keys);
> > +}
> > +
> > +static int
> > +k32v64_hash_bulk_lookup(struct rte_kv_hash_table *ht, void *keys_p,
> > +	uint32_t *hashes, void *values_p, unsigned int n)
> > +{
> > +	struct k32v64_hash_table *table = (struct k32v64_hash_table *)ht;
> > +	uint32_t *keys = keys_p;
> > +	uint64_t *values = values_p;
> > +	int ret, cnt = 0;
> > +	unsigned int i;
> > +
> > +	if (unlikely((table == NULL) || (keys == NULL) || (hashes == NULL) ||
> > +			(values == NULL)))
> > +		return -EINVAL;


As a nit - this formal parameter checking better to do in public function
(rte_kv_hash_bulk_lookup) before de-refencing table and calling actual lookup().  
Same story for modify() - formal parameter checking can be done at the top of public function.
BTW, why to unite add/delete into modify(), if internally you have 2 different functions
(for add/delete) anyway?

> > +
> > +	for (i = 0; i < n; i++) {
> > +		ret = k32v64_hash_lookup(table, keys[i], hashes[i],
> > +			&values[i]);
> > +		if (ret == 0)
> > +			cnt++;
> > +	}
> > +	return cnt;
> > +}
> > +
> > +#ifdef CC_AVX512VL_SUPPORT
> > +int
> > +k32v64_hash_bulk_lookup_avx512vl(struct rte_kv_hash_table *ht,
> > +	void *keys_p, uint32_t *hashes, void *values_p, unsigned int n);
> > +#endif
> > +
> > +static rte_kv_hash_bulk_lookup_t
> > +get_lookup_bulk_fn(void)
> > +{
> > +#ifdef CC_AVX512VL_SUPPORT
> > +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512VL))
> > +		return k32v64_hash_bulk_lookup_avx512vl;
> > +#endif
> > +	return k32v64_hash_bulk_lookup;
> > +}
> > +
> > +static int
> > +k32v64_hash_add(struct k32v64_hash_table *table, uint32_t key,
> > +	uint32_t hash, uint64_t value, uint64_t *old_value, int *found)
> > +{
> > +	uint32_t bucket;
> > +	int i, idx, ret;
> > +	uint8_t msk;
> > +	struct k32v64_ext_ent *tmp, *ent, *prev = NULL;
> > +
> > +	if (table == NULL)
> > +		return -EINVAL;
> > +
> > +	bucket = hash & table->bucket_msk;
> > +	/* Search key in table. Update value if exists */
> > +	for (i = 0; i < K32V64_KEYS_PER_BUCKET; i++) {
> > +		if ((key == table->t[bucket].key[i]) &&
> > +				(table->t[bucket].key_mask & (1 << i))) {
> > +			*old_value = table->t[bucket].val[i];
> > +			*found = 1;
> > +			__atomic_fetch_add(&table->t[bucket].cnt, 1,
> > +				__ATOMIC_ACQUIRE);

After another thought - atomic add is probably an overkill here.
Something like:
void update_start(struct k32v64_hash_bucket *bkt)
{
	bkt->cnt++;
        __atomic_thread_fence(__ATOMIC_ACQ_REL);
}

void update_finish(struct k32v64_hash_bucket *bkt)
{
	__atomic_thread_fence(__ATOMIC_ACQ_REL);
	bkt->cnt++;
}

I think should be sufficient here.

> > +			table->t[bucket].val[i] = value;
> > +			__atomic_fetch_add(&table->t[bucket].cnt, 1,
> > +				__ATOMIC_RELEASE);
> > +			return 0;
> > +		}
> > +	}
> > +
> > +	if (!SLIST_EMPTY(&table->t[bucket].head)) {
> > +		SLIST_FOREACH(ent, &table->t[bucket].head, next) {
> > +			if (ent->key == key) {
> > +				*old_value = ent->val;
> > +				*found = 1;
> > +				__atomic_fetch_add(&table->t[bucket].cnt, 1,
> > +					__ATOMIC_ACQUIRE);
> > +				ent->val = value;
> > +				__atomic_fetch_add(&table->t[bucket].cnt, 1,
> > +					__ATOMIC_RELEASE);
> > +				return 0;
> > +			}
> > +		}
> > +	}
> > +
> > +	msk = ~table->t[bucket].key_mask & VALID_KEY_MSK;
> > +	if (msk) {
> > +		idx = __builtin_ctz(msk);
> > +		table->t[bucket].key[idx] = key;
> > +		table->t[bucket].val[idx] = value;
> > +		__atomic_or_fetch(&table->t[bucket].key_mask, 1 << idx,
> > +			__ATOMIC_RELEASE);
> 
> I think this case also has to guarded with table->t[bucket].cnt updates.
> 
> > +		table->nb_ent++;
> > +		*found = 0;
> > +		return 0;
> > +	}
> > +
> > +	ret = rte_mempool_get(table->ext_ent_pool, (void **)&ent);
> > +	if (ret < 0)
> > +		return ret;
> > +
> > +	SLIST_NEXT(ent, next) = NULL;
> > +	ent->key = key;
> > +	ent->val = value;
> > +	rte_smp_wmb();
> 
> __atomic_thread_fence(__ATOMIC_RELEASE);
> ?
> 
> > +	SLIST_FOREACH(tmp, &table->t[bucket].head, next)
> > +		prev = tmp;
> > +
> > +	if (prev == NULL)
> > +		SLIST_INSERT_HEAD(&table->t[bucket].head, ent, next);
> > +	else
> > +		SLIST_INSERT_AFTER(prev, ent, next);
> > +
> > +	table->nb_ent++;
> > +	table->nb_ext_ent++;
> > +	*found = 0;
> > +	return 0;
> > +}
> > +
> > +static int
> > +k32v64_hash_delete(struct k32v64_hash_table *table, uint32_t key,
> > +	uint32_t hash, uint64_t *old_value)
> > +{
> > +	uint32_t bucket;
> > +	int i;
> > +	struct k32v64_ext_ent *ent;
> > +
> > +	if (table == NULL)
> > +		return -EINVAL;
> > +
> > +	bucket = hash & table->bucket_msk;
> > +
> > +	for (i = 0; i < K32V64_KEYS_PER_BUCKET; i++) {
> > +		if ((key == table->t[bucket].key[i]) &&
> > +				(table->t[bucket].key_mask & (1 << i))) {
> > +			*old_value = table->t[bucket].val[i];
> > +			ent = SLIST_FIRST(&table->t[bucket].head);
> > +			if (ent) {
> > +				__atomic_fetch_add(&table->t[bucket].cnt, 1,
> > +					__ATOMIC_ACQUIRE);
> > +				table->t[bucket].key[i] = ent->key;
> > +				table->t[bucket].val[i] = ent->val;
> > +				SLIST_REMOVE_HEAD(&table->t[bucket].head, next);
> > +				__atomic_fetch_add(&table->t[bucket].cnt, 1,
> > +					__ATOMIC_RELEASE);
> > +				table->nb_ext_ent--;
> > +			} else
> > +				__atomic_and_fetch(&table->t[bucket].key_mask,
> > +					~(1 << i), __ATOMIC_RELEASE);
> 
> Same thought as above - safer to guard this mask update with cnt update.
> 
> > +			if (ent)
> > +				rte_mempool_put(table->ext_ent_pool, ent);
> 
> Can't this 'if(ent)' be merged with previous 'if (ent) {...}' above?
> 
> > +			table->nb_ent--;
> > +			return 0;
> > +		}
> > +	}
> > +
> > +	SLIST_FOREACH(ent, &table->t[bucket].head, next)
> > +		if (ent->key == key)
> > +			break;
> > +
> > +	if (ent == NULL)
> > +		return -ENOENT;
> > +
> > +	*old_value = ent->val;
> > +
> > +	__atomic_fetch_add(&table->t[bucket].cnt, 1, __ATOMIC_ACQUIRE);
> > +	SLIST_REMOVE(&table->t[bucket].head, ent, k32v64_ext_ent, next);
> > +	__atomic_fetch_add(&table->t[bucket].cnt, 1, __ATOMIC_RELEASE);
> > +	rte_mempool_put(table->ext_ent_pool, ent);
> > +
> > +	table->nb_ext_ent--;
> > +	table->nb_ent--;
> > +
> > +	return 0;
> > +}
> > +
Wang, Yipeng1 June 24, 2020, 1:19 a.m. UTC | #3
> -----Original Message-----
> From: Medvedkin, Vladimir <vladimir.medvedkin@intel.com>
> Sent: Friday, May 8, 2020 12:59 PM
> To: dev@dpdk.org
> Cc: Ananyev, Konstantin <konstantin.ananyev@intel.com>; Wang, Yipeng1
> <yipeng1.wang@intel.com>; Gobriel, Sameh <sameh.gobriel@intel.com>;
> Richardson, Bruce <bruce.richardson@intel.com>
> Subject: [PATCH v4 1/4] hash: add kv hash library
> 
> KV hash is a special optimized key-value storage for fixed key and value sizes.
> At the moment it supports 32 bit keys and 64 bit values. This table is hash
> function agnostic so user must provide precalculated hash signature for
> add/delete/lookup operations.
> 
> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
> ---
>  lib/Makefile                           |   2 +-
>  lib/librte_hash/Makefile               |  14 +-
>  lib/librte_hash/k32v64_hash.c          | 277
> +++++++++++++++++++++++++++++++++
>  lib/librte_hash/k32v64_hash.h          |  98 ++++++++++++
>  lib/librte_hash/k32v64_hash_avx512vl.c |  59 +++++++
>  lib/librte_hash/meson.build            |  17 +-
>  lib/librte_hash/rte_hash_version.map   |   6 +-
>  lib/librte_hash/rte_kv_hash.c          | 184 ++++++++++++++++++++++
>  lib/librte_hash/rte_kv_hash.h          | 169 ++++++++++++++++++++
>  9 files changed, 821 insertions(+), 5 deletions(-)  create mode 100644
> lib/librte_hash/k32v64_hash.c  create mode 100644
> lib/librte_hash/k32v64_hash.h  create mode 100644
> lib/librte_hash/k32v64_hash_avx512vl.c
>  create mode 100644 lib/librte_hash/rte_kv_hash.c  create mode 100644
> lib/librte_hash/rte_kv_hash.h
> 
> diff --git a/lib/Makefile b/lib/Makefile index 9d24609..42769e9 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -48,7 +48,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
> DEPDIRS-librte_vhost := librte_eal librte_mempool librte_mbuf
> librte_ethdev \
>  			librte_net librte_hash librte_cryptodev
>  DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash -DEPDIRS-librte_hash :=
> librte_eal librte_ring
> +DEPDIRS-librte_hash := librte_eal librte_ring librte_mempool
>  DIRS-$(CONFIG_RTE_LIBRTE_EFD) += librte_efd  DEPDIRS-librte_efd :=
> librte_eal librte_ring librte_hash
>  DIRS-$(CONFIG_RTE_LIBRTE_RIB) += librte_rib diff --git
> a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile index
> ec9f864..a0cdee9 100644
> --- a/lib/librte_hash/Makefile
> +++ b/lib/librte_hash/Makefile
> @@ -8,13 +8,15 @@ LIB = librte_hash.a
> 
>  CFLAGS += -O3
>  CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
> -LDLIBS += -lrte_eal -lrte_ring
> +LDLIBS += -lrte_eal -lrte_ring -lrte_mempool
> 
>  EXPORT_MAP := rte_hash_version.map
> 
>  # all source are stored in SRCS-y
>  SRCS-$(CONFIG_RTE_LIBRTE_HASH) := rte_cuckoo_hash.c
>  SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_fbk_hash.c
> +SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_kv_hash.c
> +SRCS-$(CONFIG_RTE_LIBRTE_HASH) += k32v64_hash.c
> 
>  # install this header file
>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include := rte_hash.h @@ -27,5
> +29,15 @@ endif  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
> rte_jhash.h  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_thash.h
> SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_fbk_hash.h
> +SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_kv_hash.h
> +
> +CC_AVX512VL_SUPPORT=$(shell $(CC) -mavx512vl -dM -E - </dev/null
> 2>&1 |
> +\ grep -q __AVX512VL__ && echo 1)
> +
> +ifeq ($(CC_AVX512VL_SUPPORT), 1)
> +	SRCS-$(CONFIG_RTE_LIBRTE_HASH) += k32v64_hash_avx512vl.c
> +	CFLAGS_k32v64_hash_avx512vl.o += -mavx512vl
> +	CFLAGS_k32v64_hash.o += -DCC_AVX512VL_SUPPORT endif
> 
>  include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_hash/k32v64_hash.c b/lib/librte_hash/k32v64_hash.c
> new file mode 100644 index 0000000..24cd63a
> --- /dev/null
> +++ b/lib/librte_hash/k32v64_hash.c
> @@ -0,0 +1,277 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#include <string.h>
> +
> +#include <rte_errno.h>
> +#include <rte_malloc.h>
> +#include <rte_memory.h>
> +
> +#include "k32v64_hash.h"
> +
> +static inline int
> +k32v64_hash_lookup(struct k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash, uint64_t *value)
> +{
> +	return __k32v64_hash_lookup(table, key, hash, value,
> __kv_cmp_keys); }
> +
> +static int
> +k32v64_hash_bulk_lookup(struct rte_kv_hash_table *ht, void *keys_p,
> +	uint32_t *hashes, void *values_p, unsigned int n) {
> +	struct k32v64_hash_table *table = (struct k32v64_hash_table *)ht;
> +	uint32_t *keys = keys_p;
> +	uint64_t *values = values_p;
> +	int ret, cnt = 0;
> +	unsigned int i;
> +
> +	if (unlikely((table == NULL) || (keys == NULL) || (hashes == NULL) ||
> +			(values == NULL)))
> +		return -EINVAL;
> +
> +	for (i = 0; i < n; i++) {
> +		ret = k32v64_hash_lookup(table, keys[i], hashes[i],
> +			&values[i]);
[Wang, Yipeng] You don't need to start a new line for values.
> +		if (ret == 0)
> +			cnt++;
> +	}
> +	return cnt;
> +}
> +
> +#ifdef CC_AVX512VL_SUPPORT
[Wang, Yipeng] Why not use the already provided, e.g. #if defined(RTE_MACHINE_CPUFLAG_SSE2) like rte_hash does?
> +int
> +k32v64_hash_bulk_lookup_avx512vl(struct rte_kv_hash_table *ht,
> +	void *keys_p, uint32_t *hashes, void *values_p, unsigned int n);
> +#endif
> +
> +static rte_kv_hash_bulk_lookup_t
> +get_lookup_bulk_fn(void)
> +{
> +#ifdef CC_AVX512VL_SUPPORT
> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512VL))
> +		return k32v64_hash_bulk_lookup_avx512vl; #endif
> +	return k32v64_hash_bulk_lookup;
> +}
> +
> +static int
> +k32v64_hash_add(struct k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash, uint64_t value, uint64_t *old_value, int *found) {
> +	uint32_t bucket;
> +	int i, idx, ret;
> +	uint8_t msk;
> +	struct k32v64_ext_ent *tmp, *ent, *prev = NULL;
> +
> +	if (table == NULL)
> +		return -EINVAL;
> +
> +	bucket = hash & table->bucket_msk;
> +	/* Search key in table. Update value if exists */
> +	for (i = 0; i < K32V64_KEYS_PER_BUCKET; i++) {
> +		if ((key == table->t[bucket].key[i]) &&
> +				(table->t[bucket].key_mask & (1 << i))) {
> +			*old_value = table->t[bucket].val[i];
> +			*found = 1;
> +			__atomic_fetch_add(&table->t[bucket].cnt, 1,
> +				__ATOMIC_ACQUIRE);
> +			table->t[bucket].val[i] = value;
> +			__atomic_fetch_add(&table->t[bucket].cnt, 1,
> +				__ATOMIC_RELEASE);
> +			return 0;
> +		}
> +	}
> +
> +	if (!SLIST_EMPTY(&table->t[bucket].head)) {
> +		SLIST_FOREACH(ent, &table->t[bucket].head, next) {
> +			if (ent->key == key) {
> +				*old_value = ent->val;
> +				*found = 1;
> +				__atomic_fetch_add(&table->t[bucket].cnt,
> 1,
> +					__ATOMIC_ACQUIRE);
> +				ent->val = value;
> +				__atomic_fetch_add(&table->t[bucket].cnt,
> 1,
> +					__ATOMIC_RELEASE);
> +				return 0;
> +			}
> +		}
> +	}
> +
> +	msk = ~table->t[bucket].key_mask & VALID_KEY_MSK;
> +	if (msk) {
> +		idx = __builtin_ctz(msk);
> +		table->t[bucket].key[idx] = key;
> +		table->t[bucket].val[idx] = value;
> +		__atomic_or_fetch(&table->t[bucket].key_mask, 1 << idx,
> +			__ATOMIC_RELEASE);
> +		table->nb_ent++;
> +		*found = 0;
> +		return 0;
> +	}
> +
> +	ret = rte_mempool_get(table->ext_ent_pool, (void **)&ent);
> +	if (ret < 0)
> +		return ret;
> +
> +	SLIST_NEXT(ent, next) = NULL;
> +	ent->key = key;
> +	ent->val = value;
> +	rte_smp_wmb();
> +	SLIST_FOREACH(tmp, &table->t[bucket].head, next)
> +		prev = tmp;
> +
> +	if (prev == NULL)
> +		SLIST_INSERT_HEAD(&table->t[bucket].head, ent, next);
> +	else
> +		SLIST_INSERT_AFTER(prev, ent, next);
> +
> +	table->nb_ent++;
> +	table->nb_ext_ent++;
> +	*found = 0;
> +	return 0;
> +}
> +
> +static int
> +k32v64_hash_delete(struct k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash, uint64_t *old_value)
> +{
> +	uint32_t bucket;
> +	int i;
> +	struct k32v64_ext_ent *ent;
> +
> +	if (table == NULL)
> +		return -EINVAL;
> +
> +	bucket = hash & table->bucket_msk;
> +
> +	for (i = 0; i < K32V64_KEYS_PER_BUCKET; i++) {
> +		if ((key == table->t[bucket].key[i]) &&
> +				(table->t[bucket].key_mask & (1 << i))) {
> +			*old_value = table->t[bucket].val[i];
> +			ent = SLIST_FIRST(&table->t[bucket].head);
> +			if (ent) {
> +				__atomic_fetch_add(&table->t[bucket].cnt,
> 1,
> +					__ATOMIC_ACQUIRE);
> +				table->t[bucket].key[i] = ent->key;
> +				table->t[bucket].val[i] = ent->val;
> +				SLIST_REMOVE_HEAD(&table-
> >t[bucket].head, next);
> +				__atomic_fetch_add(&table->t[bucket].cnt,
> 1,
> +					__ATOMIC_RELEASE);
> +				table->nb_ext_ent--;
> +			} else
> +				__atomic_and_fetch(&table-
> >t[bucket].key_mask,
> +					~(1 << i), __ATOMIC_RELEASE);
> +			if (ent)
> +				rte_mempool_put(table->ext_ent_pool,
> ent);
> +			table->nb_ent--;
> +			return 0;
> +		}
> +	}
> +
> +	SLIST_FOREACH(ent, &table->t[bucket].head, next)
> +		if (ent->key == key)
> +			break;
> +
> +	if (ent == NULL)
> +		return -ENOENT;
> +
> +	*old_value = ent->val;
> +
> +	__atomic_fetch_add(&table->t[bucket].cnt, 1,
> __ATOMIC_ACQUIRE);
> +	SLIST_REMOVE(&table->t[bucket].head, ent, k32v64_ext_ent, next);
> +	__atomic_fetch_add(&table->t[bucket].cnt, 1, __ATOMIC_RELEASE);
> +	rte_mempool_put(table->ext_ent_pool, ent);
> +
[Wang, Yipeng] I am not sure if delete can be called safely with other lookup threads.
The item could be recycled to be used by another add while a lookup thread traversing the linked list.
This is similar to why we need the RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL flag and related API in rte_hash.
> +	table->nb_ext_ent--;
> +	table->nb_ent--;
> +
> +	return 0;
> +}
> +
> +static int
> +k32v64_modify(struct rte_kv_hash_table *table, void *key_p, uint32_t
> hash,
> +	enum rte_kv_modify_op op, void *value_p, int *found) {
> +	struct k32v64_hash_table *ht = (struct k32v64_hash_table *)table;
> +	uint32_t *key = key_p;
> +	uint64_t value;
> +
> +	if ((ht == NULL) || (key == NULL) || (value_p == NULL) ||
> +			(found == NULL) || (op >=
> RTE_KV_MODIFY_OP_MAX)) {
> +		return -EINVAL;
> +	}
> +
> +	value = *(uint64_t *)value_p;
> +	switch (op) {
> +	case RTE_KV_MODIFY_ADD:
> +		return k32v64_hash_add(ht, *key, hash, value, value_p,
> found);
> +	case RTE_KV_MODIFY_DEL:
> +		return k32v64_hash_delete(ht, *key, hash, value_p);
> +	default:
> +		break;
> +	}
[Wang, Yipeng] A question would be why put del and add inside another mod wrapper.
If we don't wrap del and add like this, we could get rid of this branch.
> +
> +	return -EINVAL;
> +}
> +
> +struct rte_kv_hash_table *
> +k32v64_hash_create(const struct rte_kv_hash_params *params) {
> +	char hash_name[RTE_KV_HASH_NAMESIZE];
> +	struct k32v64_hash_table *ht = NULL;
> +	uint32_t mem_size, nb_buckets, max_ent;
> +	int ret;
> +	struct rte_mempool *mp;
> +
> +	if ((params == NULL) || (params->name == NULL) ||
> +			(params->entries == 0)) {
[Wang, Yipeng] Should we check if entry count larger than keys_per_bucket as well?
> +		rte_errno = EINVAL;
> +		return NULL;
> +	}
> +
> +	ret = snprintf(hash_name, sizeof(hash_name), "KV_%s", params-
> >name);
> +	if (ret < 0 || ret >= RTE_KV_HASH_NAMESIZE) {
> +		rte_errno = ENAMETOOLONG;
> +		return NULL;
> +	}
> +
> +	max_ent = rte_align32pow2(params->entries);
> +	nb_buckets = max_ent / K32V64_KEYS_PER_BUCKET;
[Wang, Yipeng] Macro to check if keys_per_bucket need to be power of 2
> +	mem_size = sizeof(struct k32v64_hash_table) +
> +		sizeof(struct k32v64_hash_bucket) * nb_buckets;
> +
> +	mp = rte_mempool_create(hash_name, max_ent,
> +		sizeof(struct k32v64_ext_ent), 0, 0, NULL, NULL, NULL, NULL,
> +		params->socket_id, 0);
> +
> +	if (mp == NULL)
> +		return NULL;
> +
> +	ht = rte_zmalloc_socket(hash_name, mem_size,
> +		RTE_CACHE_LINE_SIZE, params->socket_id);
> +	if (ht == NULL) {
> +		rte_mempool_free(mp);
> +		return NULL;
> +	}
> +
> +	memcpy(ht->pub.name, hash_name, sizeof(ht->pub.name));
> +	ht->max_ent = max_ent;
> +	ht->bucket_msk = nb_buckets - 1;
> +	ht->ext_ent_pool = mp;
> +	ht->pub.lookup = get_lookup_bulk_fn();
[Wang, Yipeng] Inside the function, we also need to check the CPUID during runtime to decide if AVX can be used, not only compile time.
You could refer to example from rte_hash: ... if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2)) ...

> +	ht->pub.modify = k32v64_modify;
> +
> +	return (struct rte_kv_hash_table *)ht; }
> +
> +void
> +k32v64_hash_free(struct rte_kv_hash_table *ht) {
> +	if (ht == NULL)
> +		return;
> +
> +	rte_mempool_free(((struct k32v64_hash_table *)ht)-
> >ext_ent_pool);
> +	rte_free(ht);
> +}
> diff --git a/lib/librte_hash/k32v64_hash.h b/lib/librte_hash/k32v64_hash.h
> new file mode 100644 index 0000000..10061a5
> --- /dev/null
> +++ b/lib/librte_hash/k32v64_hash.h
> @@ -0,0 +1,98 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#include <rte_kv_hash.h>
> +
> +#define K32V64_KEYS_PER_BUCKET		4
> +#define K32V64_WRITE_IN_PROGRESS	1
> +#define VALID_KEY_MSK           ((1 << K32V64_KEYS_PER_BUCKET) - 1)
> +
> +struct k32v64_ext_ent {
> +	SLIST_ENTRY(k32v64_ext_ent) next;
> +	uint32_t	key;
> +	uint64_t	val;
> +};
> +
> +struct k32v64_hash_bucket {
> +	uint32_t	key[K32V64_KEYS_PER_BUCKET];
> +	uint64_t	val[K32V64_KEYS_PER_BUCKET];
> +	uint8_t		key_mask;
> +	uint32_t	cnt;
> +	SLIST_HEAD(k32v64_list_head, k32v64_ext_ent) head; }
> +__rte_cache_aligned;
> +
> +struct k32v64_hash_table {
> +	struct rte_kv_hash_table pub;	/**< Public part */
[Wang, Yipeng] Do we need to have the pub filed? Could we just init the public part in the public create function?
> +	uint32_t	nb_ent;		/**< Number of entities in
> the table*/
> +	uint32_t	nb_ext_ent;	/**< Number of extended entities */
> +	uint32_t	max_ent;	/**< Maximum number of entities */
> +	uint32_t	bucket_msk;
> +	struct rte_mempool	*ext_ent_pool;
> +	__extension__ struct k32v64_hash_bucket	t[0];
> +};
> +
> +typedef int (*k32v64_cmp_fn_t)
> +(struct k32v64_hash_bucket *bucket, uint32_t key, uint64_t *val);
> +
> +static inline int
> +__kv_cmp_keys(struct k32v64_hash_bucket *bucket, uint32_t key,
> +	uint64_t *val)
> +{
> +	int i;
> +
> +	for (i = 0; i < K32V64_KEYS_PER_BUCKET; i++) {
> +		if ((key == bucket->key[i]) &&
> +				(bucket->key_mask & (1 << i))) {
> +			*val = bucket->val[i];
> +			return 1;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static inline int
> +__k32v64_hash_lookup(struct k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash, uint64_t *value, k32v64_cmp_fn_t cmp_f) {
> +	uint64_t	val = 0;
> +	struct k32v64_ext_ent *ent;
> +	uint32_t	cnt;
> +	int found = 0;
> +	uint32_t bucket = hash & table->bucket_msk;
> +
> +	do {
> +
> +		do {
> +			cnt = __atomic_load_n(&table->t[bucket].cnt,
> +				__ATOMIC_ACQUIRE);
> +		} while (unlikely(cnt & K32V64_WRITE_IN_PROGRESS));
> +
> +		found = cmp_f(&table->t[bucket], key, &val);
> +		if (unlikely((found == 0) &&
> +				(!SLIST_EMPTY(&table->t[bucket].head)))) {
> +			SLIST_FOREACH(ent, &table->t[bucket].head, next) {
> +				if (ent->key == key) {
> +					val = ent->val;
> +					found = 1;
> +					break;
> +				}
> +			}
> +		}
> +		__atomic_thread_fence(__ATOMIC_RELEASE);
> +	} while (unlikely(cnt != __atomic_load_n(&table->t[bucket].cnt,
> +			 __ATOMIC_RELAXED)));
> +
> +	if (found == 1) {
> +		*value = val;
> +		return 0;
> +	} else
> +		return -ENOENT;
> +}
> +
> +struct rte_kv_hash_table *
> +k32v64_hash_create(const struct rte_kv_hash_params *params);
> +
> +void
> +k32v64_hash_free(struct rte_kv_hash_table *ht);
> diff --git a/lib/librte_hash/k32v64_hash_avx512vl.c
> b/lib/librte_hash/k32v64_hash_avx512vl.c
> new file mode 100644
> index 0000000..75cede5
> --- /dev/null
> +++ b/lib/librte_hash/k32v64_hash_avx512vl.c
> @@ -0,0 +1,59 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#include "k32v64_hash.h"
> +
> +int
> +k32v64_hash_bulk_lookup_avx512vl(struct rte_kv_hash_table *ht, void
> *keys_p,
> +	uint32_t *hashes, void *values_p, unsigned int n);
> +
> +static inline int
> +k32v64_cmp_keys_avx512vl(struct k32v64_hash_bucket *bucket, uint32_t
> key,
> +	uint64_t *val)
> +{
> +	__m128i keys, srch_key;
> +	__mmask8 msk;
> +
> +	keys = _mm_load_si128((void *)bucket);
> +	srch_key = _mm_set1_epi32(key);
> +
> +	msk = _mm_mask_cmpeq_epi32_mask(bucket->key_mask, keys,
> srch_key);
> +	if (msk) {
> +		*val = bucket->val[__builtin_ctz(msk)];
> +		return 1;
> +	}
> +
> +	return 0;
> +}
> +
> +static inline int
> +k32v64_hash_lookup_avx512vl(struct k32v64_hash_table *table, uint32_t
> key,
> +	uint32_t hash, uint64_t *value)
> +{
> +	return __k32v64_hash_lookup(table, key, hash, value,
> +		k32v64_cmp_keys_avx512vl);
> +}
> +
> +int
> +k32v64_hash_bulk_lookup_avx512vl(struct rte_kv_hash_table *ht, void
> *keys_p,
> +	uint32_t *hashes, void *values_p, unsigned int n) {
> +	struct k32v64_hash_table *table = (struct k32v64_hash_table *)ht;
> +	uint32_t *keys = keys_p;
> +	uint64_t *values = values_p;
> +	int ret, cnt = 0;
> +	unsigned int i;
> +
> +	if (unlikely((table == NULL) || (keys == NULL) || (hashes == NULL) ||
> +			(values == NULL)))
> +		return -EINVAL;
> +
> +	for (i = 0; i < n; i++) {
> +		ret = k32v64_hash_lookup_avx512vl(table, keys[i], hashes[i],
> +			&values[i]);
> +		if (ret == 0)
> +			cnt++;
> +	}
> +	return cnt;
> +}
> diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build index
> 6ab46ae..0d014ea 100644
> --- a/lib/librte_hash/meson.build
> +++ b/lib/librte_hash/meson.build
> @@ -3,10 +3,23 @@
> 
>  headers = files('rte_crc_arm64.h',
>  	'rte_fbk_hash.h',
> +	'rte_kv_hash.h',
>  	'rte_hash_crc.h',
>  	'rte_hash.h',
>  	'rte_jhash.h',
>  	'rte_thash.h')
> 
> -sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c') -deps += ['ring']
> +sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c', 'rte_kv_hash.c',
> +'k32v64_hash.c') deps += ['ring', 'mempool']
> +
> +if dpdk_conf.has('RTE_ARCH_X86')
> +	if cc.has_argument('-mavx512vl')
> +		avx512_tmplib = static_library('avx512_tmp',
> +                                'k32v64_hash_avx512vl.c',
> +                                dependencies: static_rte_mempool,
> +                                c_args: cflags + ['-mavx512vl'])
> +                objs += avx512_tmplib.extract_objects('k32v64_hash_avx512vl.c')
> +                cflags += '-DCC_AVX512VL_SUPPORT'
> +
> +	endif
> +endif
> diff --git a/lib/librte_hash/rte_hash_version.map
> b/lib/librte_hash/rte_hash_version.map
> index c2a9094..614e0a5 100644
> --- a/lib/librte_hash/rte_hash_version.map
> +++ b/lib/librte_hash/rte_hash_version.map
> @@ -36,5 +36,9 @@ EXPERIMENTAL {
>  	rte_hash_lookup_with_hash_bulk;
>  	rte_hash_lookup_with_hash_bulk_data;
>  	rte_hash_max_key_id;
> -
> +	rte_kv_hash_create;
> +	rte_kv_hash_find_existing;
> +	rte_kv_hash_free;
> +	rte_kv_hash_add;
> +	rte_kv_hash_delete;
>  };
> diff --git a/lib/librte_hash/rte_kv_hash.c b/lib/librte_hash/rte_kv_hash.c
> new file mode 100644 index 0000000..03df8db
> --- /dev/null
> +++ b/lib/librte_hash/rte_kv_hash.c
> @@ -0,0 +1,184 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#include <string.h>
> +
> +#include <rte_eal_memconfig.h>
> +#include <rte_errno.h>
> +#include <rte_malloc.h>
> +#include <rte_memory.h>
> +#include <rte_tailq.h>
> +
> +#include <rte_kv_hash.h>
[Wang, Yipeng] I think for this header we should use quotes ""
> +#include "k32v64_hash.h"
> +
> +TAILQ_HEAD(rte_kv_hash_list, rte_tailq_entry);
> +
> +static struct rte_tailq_elem rte_kv_hash_tailq = {
> +	.name = "RTE_KV_HASH",
> +};
> +
> +EAL_REGISTER_TAILQ(rte_kv_hash_tailq);
> +
> +int
> +rte_kv_hash_add(struct rte_kv_hash_table *table, void *key,
> +	uint32_t hash, void *value, int *found) {
> +	if (table == NULL)
[Wang, Yipeng] To be more consistent,
 I think we should either also check key/value/found here, or just not checking at all and leave it to the next functions.
> +		return -EINVAL;
> +
> +	return table->modify(table, key, hash, RTE_KV_MODIFY_ADD,
> +		value, found);
> +}
> +
> +int
> +rte_kv_hash_delete(struct rte_kv_hash_table *table, void *key,
> +	uint32_t hash, void *value)
> +{
> +	int found;
> +
> +	if (table == NULL)
> +		return -EINVAL;
> +
> +	return table->modify(table, key, hash, RTE_KV_MODIFY_DEL,
> +		value, &found);
> +}
> +
> +struct rte_kv_hash_table *
> +rte_kv_hash_find_existing(const char *name) {
> +	struct rte_kv_hash_table *h = NULL;
> +	struct rte_tailq_entry *te;
> +	struct rte_kv_hash_list *kv_hash_list;
> +
> +	kv_hash_list = RTE_TAILQ_CAST(rte_kv_hash_tailq.head,
> +			rte_kv_hash_list);
> +
> +	rte_mcfg_tailq_read_lock();
> +	TAILQ_FOREACH(te, kv_hash_list, next) {
> +		h = (struct rte_kv_hash_table *) te->data;
> +		if (strncmp(name, h->name, RTE_KV_HASH_NAMESIZE) == 0)
> +			break;
> +	}
> +	rte_mcfg_tailq_read_unlock();
> +	if (te == NULL) {
> +		rte_errno = ENOENT;
> +		return NULL;
> +	}
> +	return h;
> +}
> +
> +struct rte_kv_hash_table *
> +rte_kv_hash_create(const struct rte_kv_hash_params *params) {
> +	char hash_name[RTE_KV_HASH_NAMESIZE];
> +	struct rte_kv_hash_table *ht, *tmp_ht = NULL;
> +	struct rte_tailq_entry *te;
> +	struct rte_kv_hash_list *kv_hash_list;
> +	int ret;
> +
> +	if ((params == NULL) || (params->name == NULL) ||
> +			(params->entries == 0) ||
> +			(params->type >= RTE_KV_HASH_MAX)) {
> +		rte_errno = EINVAL;
> +		return NULL;
> +	}
> +
> +	kv_hash_list = RTE_TAILQ_CAST(rte_kv_hash_tailq.head,
> +		rte_kv_hash_list);
> +
> +	ret = snprintf(hash_name, sizeof(hash_name), "KV_%s", params-
> >name);
> +	if (ret < 0 || ret >= RTE_KV_HASH_NAMESIZE) {
> +		rte_errno = ENAMETOOLONG;
> +		return NULL;
> +	}
> +
> +	switch (params->type) {
> +	case RTE_KV_HASH_K32V64:
> +		ht = k32v64_hash_create(params);
> +		break;
> +	default:
> +		rte_errno = EINVAL;
> +		return NULL;
> +	}
> +	if (ht == NULL)
> +		return ht;
> +
> +	rte_mcfg_tailq_write_lock();
> +	TAILQ_FOREACH(te, kv_hash_list, next) {
> +		tmp_ht = (struct rte_kv_hash_table *) te->data;
> +		if (strncmp(params->name, tmp_ht->name,
> +				RTE_KV_HASH_NAMESIZE) == 0)
> +			break;
> +	}
> +	if (te != NULL) {
> +		rte_errno = EEXIST;
> +		goto exit;
> +	}
> +
> +	te = rte_zmalloc("KV_HASH_TAILQ_ENTRY", sizeof(*te), 0);
> +	if (te == NULL) {
> +		RTE_LOG(ERR, HASH, "Failed to allocate tailq entry\n");
> +		goto exit;
> +	}
> +
> +	ht->type = params->type;
> +	te->data = (void *)ht;
> +	TAILQ_INSERT_TAIL(kv_hash_list, te, next);
> +
> +	rte_mcfg_tailq_write_unlock();
> +
> +	return ht;
> +
> +exit:
> +	rte_mcfg_tailq_write_unlock();
> +	switch (params->type) {
> +	case RTE_KV_HASH_K32V64:
> +		k32v64_hash_free(ht);
> +		break;
> +	default:
> +		break;
> +	}
> +	return NULL;
> +}
> +
> +void
> +rte_kv_hash_free(struct rte_kv_hash_table *ht) {
> +	struct rte_tailq_entry *te;
> +	struct rte_kv_hash_list *kv_hash_list;
> +
> +	if (ht == NULL)
> +		return;
> +
> +	kv_hash_list = RTE_TAILQ_CAST(rte_kv_hash_tailq.head,
> +				rte_kv_hash_list);
> +
> +	rte_mcfg_tailq_write_lock();
> +
> +	/* find out tailq entry */
> +	TAILQ_FOREACH(te, kv_hash_list, next) {
> +		if (te->data == (void *) ht)
> +			break;
> +	}
> +
> +
> +	if (te == NULL) {
> +		rte_mcfg_tailq_write_unlock();
> +		return;
> +	}
> +
> +	TAILQ_REMOVE(kv_hash_list, te, next);
> +
> +	rte_mcfg_tailq_write_unlock();
> +
> +	switch (ht->type) {
> +	case RTE_KV_HASH_K32V64:
> +		k32v64_hash_free(ht);
> +		break;
> +	default:
> +		break;
> +	}
> +	rte_free(te);
> +}
> diff --git a/lib/librte_hash/rte_kv_hash.h b/lib/librte_hash/rte_kv_hash.h
> new file mode 100644 index 0000000..c0375d1
> --- /dev/null
> +++ b/lib/librte_hash/rte_kv_hash.h
> @@ -0,0 +1,169 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#ifndef _RTE_KV_HASH_H_
> +#define _RTE_KV_HASH_H_
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include <rte_compat.h>
> +#include <rte_atomic.h>
> +#include <rte_mempool.h>
> +
> +#define RTE_KV_HASH_NAMESIZE		32
> +
> +enum rte_kv_hash_type {
> +	RTE_KV_HASH_K32V64,
> +	RTE_KV_HASH_MAX
> +};
> +
> +enum rte_kv_modify_op {
> +	RTE_KV_MODIFY_ADD,
> +	RTE_KV_MODIFY_DEL,
> +	RTE_KV_MODIFY_OP_MAX
> +};
[Wang, Yipeng] Again, any particular reason that you combine add and del into an additional struct mod?
> +
> +struct rte_kv_hash_params {
> +	const char *name;
> +	uint32_t entries;
> +	int socket_id;
> +	enum rte_kv_hash_type type;
> +};
> +
> +struct rte_kv_hash_table;
> +
> +typedef int (*rte_kv_hash_bulk_lookup_t) (struct rte_kv_hash_table
> +*table, void *keys, uint32_t *hashes,
> +	void *values, unsigned int n);
> +
> +typedef int (*rte_kv_hash_modify_t)
> +(struct rte_kv_hash_table *table, void *key, uint32_t hash,
> +	enum rte_kv_modify_op op, void *value, int *found);
> +
> +struct rte_kv_hash_table {
> +	char name[RTE_KV_HASH_NAMESIZE];	/**< Name of the
> hash. */
> +	rte_kv_hash_bulk_lookup_t	lookup;
> +	rte_kv_hash_modify_t		modify;
> +	enum rte_kv_hash_type		type;
> +};
> +
> +/**
> + * Lookup bulk of keys.
> + * This function is multi-thread safe with regarding to other lookup threads.
> + *
> + * @param table
> + *   Hash table to add the key to.
> + * @param keys
> + *   Pointer to array of keys
> + * @param hashes
> + *   Pointer to array of hash values associated with keys.
> + * @param values
> + *   Pointer to array of value corresponded to keys
> + *   If the key was not found the corresponding value remains intact.
> + * @param n
> + *   Number of keys to lookup in batch.
> + * @return
> + *   -EINVAL if there's an error, otherwise number of successful lookups.
> + */
[Wang, Yipeng] experimental tag
> +static inline int
> +rte_kv_hash_bulk_lookup(struct rte_kv_hash_table *table,
> +	void *keys, uint32_t *hashes, void *values, unsigned int n) {
> +	return table->lookup(table, keys, hashes, values, n); }
> +
> +/**
> + * Add a key to an existing hash table with hash value.
> + * This operation is not multi-thread safe regarding to add/delete
> +functions
> + * and should only be called from one thread.
> + * However it is safe to call it along with lookup.
> + *
> + * @param table
> + *   Hash table to add the key to.
> + * @param key
> + *   Key to add to the hash table.
> + * @param value
> + *   Value to associate with key.
> + * @param hash
> + *   Hash value associated with key.
> + * @found
> + *   0 if no previously added key was found
> + *   1 previously added key was found, old value associated with the key
> + *   was written to *value
> + * @return
> + *   0 if ok, or negative value on error.
> + */
> +__rte_experimental
> +int
> +rte_kv_hash_add(struct rte_kv_hash_table *table, void *key,
> +	uint32_t hash, void *value, int *found);
> +
> +/**
> + * Remove a key with a given hash value from an existing hash table.
> + * This operation is not multi-thread safe regarding to add/delete
> +functions
> + * and should only be called from one thread.
> + * However it is safe to call it along with lookup.
> + *
> + * @param table
> + *   Hash table to remove the key from.
> + * @param key
> + *   Key to remove from the hash table.
> + * @param hash
> + *   hash value associated with key.
> + * @param value
> + *   pointer to memory where the old value will be written to on success
> + * @return
> + *   0 if ok, or negative value on error.
> + */
> +__rte_experimental
> +int
> +rte_kv_hash_delete(struct rte_kv_hash_table *table, void *key,
> +	uint32_t hash, void *value);
> +
> +/**
> + * Performs a lookup for an existing hash table, and returns a pointer
> +to
> + * the table if found.
> + *
> + * @param name
> + *   Name of the hash table to find
> + *
> + * @return
> + *   pointer to hash table structure or NULL on error with rte_errno
> + *   set appropriately.
> + */
> +__rte_experimental
> +struct rte_kv_hash_table *
> +rte_kv_hash_find_existing(const char *name);
> +
> +/**
> + * Create a new hash table for use with four byte keys.
[Wang, Yipeng] inaccurate comment, not four byte keys
> + *
> + * @param params
> + *   Parameters used in creation of hash table.
> + *
> + * @return
> + *   Pointer to hash table structure that is used in future hash table
> + *   operations, or NULL on error with rte_errno set appropriately.
> + */
> +__rte_experimental
> +struct rte_kv_hash_table *
> +rte_kv_hash_create(const struct rte_kv_hash_params *params);
> +
> +/**
> + * Free all memory used by a hash table.
> + *
> + * @param table
> + *   Hash table to deallocate.
> + */
> +__rte_experimental
> +void
> +rte_kv_hash_free(struct rte_kv_hash_table *table);
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _RTE_KV_HASH_H_ */
> --
> 2.7.4
Honnappa Nagarahalli June 25, 2020, 4:27 a.m. UTC | #4
Hi Vladimir,
	Few comments inline.

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Vladimir Medvedkin
> Sent: Friday, May 8, 2020 2:59 PM
> To: dev@dpdk.org
> Cc: konstantin.ananyev@intel.com; yipeng1.wang@intel.com;
> sameh.gobriel@intel.com; bruce.richardson@intel.com
> Subject: [dpdk-dev] [PATCH v4 1/4] hash: add kv hash library
> 
> KV hash is a special optimized key-value storage for fixed key and value sizes.
> At the moment it supports 32 bit keys and 64 bit values. This table is hash
> function agnostic so user must provide precalculated hash signature for
> add/delete/lookup operations.
> 
> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
> ---
>  lib/Makefile                           |   2 +-
>  lib/librte_hash/Makefile               |  14 +-
>  lib/librte_hash/k32v64_hash.c          | 277
> +++++++++++++++++++++++++++++++++
>  lib/librte_hash/k32v64_hash.h          |  98 ++++++++++++
>  lib/librte_hash/k32v64_hash_avx512vl.c |  59 +++++++
>  lib/librte_hash/meson.build            |  17 +-
>  lib/librte_hash/rte_hash_version.map   |   6 +-
>  lib/librte_hash/rte_kv_hash.c          | 184 ++++++++++++++++++++++
>  lib/librte_hash/rte_kv_hash.h          | 169 ++++++++++++++++++++
>  9 files changed, 821 insertions(+), 5 deletions(-)  create mode 100644
> lib/librte_hash/k32v64_hash.c  create mode 100644
> lib/librte_hash/k32v64_hash.h  create mode 100644
> lib/librte_hash/k32v64_hash_avx512vl.c
>  create mode 100644 lib/librte_hash/rte_kv_hash.c  create mode 100644
> lib/librte_hash/rte_kv_hash.h
> 
> diff --git a/lib/Makefile b/lib/Makefile index 9d24609..42769e9 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -48,7 +48,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
> DEPDIRS-librte_vhost := librte_eal librte_mempool librte_mbuf librte_ethdev \
>  			librte_net librte_hash librte_cryptodev
>  DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash -DEPDIRS-librte_hash :=
> librte_eal librte_ring
> +DEPDIRS-librte_hash := librte_eal librte_ring librte_mempool
>  DIRS-$(CONFIG_RTE_LIBRTE_EFD) += librte_efd  DEPDIRS-librte_efd :=
> librte_eal librte_ring librte_hash
>  DIRS-$(CONFIG_RTE_LIBRTE_RIB) += librte_rib diff --git
> a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile index ec9f864..a0cdee9
> 100644
> --- a/lib/librte_hash/Makefile
> +++ b/lib/librte_hash/Makefile
> @@ -8,13 +8,15 @@ LIB = librte_hash.a
> 
>  CFLAGS += -O3
>  CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
> -LDLIBS += -lrte_eal -lrte_ring
> +LDLIBS += -lrte_eal -lrte_ring -lrte_mempool
> 
>  EXPORT_MAP := rte_hash_version.map
> 
>  # all source are stored in SRCS-y
>  SRCS-$(CONFIG_RTE_LIBRTE_HASH) := rte_cuckoo_hash.c
>  SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_fbk_hash.c
> +SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_kv_hash.c
> +SRCS-$(CONFIG_RTE_LIBRTE_HASH) += k32v64_hash.c
> 
>  # install this header file
>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include := rte_hash.h @@ -27,5
> +29,15 @@ endif  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
> rte_jhash.h  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_thash.h
> SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_fbk_hash.h
> +SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_kv_hash.h
> +
> +CC_AVX512VL_SUPPORT=$(shell $(CC) -mavx512vl -dM -E - </dev/null 2>&1 |
> +\ grep -q __AVX512VL__ && echo 1)
> +
> +ifeq ($(CC_AVX512VL_SUPPORT), 1)
> +	SRCS-$(CONFIG_RTE_LIBRTE_HASH) += k32v64_hash_avx512vl.c
> +	CFLAGS_k32v64_hash_avx512vl.o += -mavx512vl
> +	CFLAGS_k32v64_hash.o += -DCC_AVX512VL_SUPPORT endif
> 
>  include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_hash/k32v64_hash.c b/lib/librte_hash/k32v64_hash.c
> new file mode 100644 index 0000000..24cd63a
> --- /dev/null
> +++ b/lib/librte_hash/k32v64_hash.c
> @@ -0,0 +1,277 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#include <string.h>
> +
> +#include <rte_errno.h>
> +#include <rte_malloc.h>
> +#include <rte_memory.h>
> +
> +#include "k32v64_hash.h"
> +
> +static inline int
> +k32v64_hash_lookup(struct k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash, uint64_t *value)
> +{
> +	return __k32v64_hash_lookup(table, key, hash, value,
> __kv_cmp_keys); }
Since, this is an inline function, would it be better to push this to the header file?


> +
> +static int
> +k32v64_hash_bulk_lookup(struct rte_kv_hash_table *ht, void *keys_p,
> +	uint32_t *hashes, void *values_p, unsigned int n) {
> +	struct k32v64_hash_table *table = (struct k32v64_hash_table *)ht;
> +	uint32_t *keys = keys_p;
> +	uint64_t *values = values_p;
> +	int ret, cnt = 0;
> +	unsigned int i;
> +
> +	if (unlikely((table == NULL) || (keys == NULL) || (hashes == NULL) ||
> +			(values == NULL)))
> +		return -EINVAL;
> +
> +	for (i = 0; i < n; i++) {
> +		ret = k32v64_hash_lookup(table, keys[i], hashes[i],
> +			&values[i]);
> +		if (ret == 0)
> +			cnt++;
> +	}
> +	return cnt;
> +}
> +
> +#ifdef CC_AVX512VL_SUPPORT
> +int
> +k32v64_hash_bulk_lookup_avx512vl(struct rte_kv_hash_table *ht,
> +	void *keys_p, uint32_t *hashes, void *values_p, unsigned int n);
> +#endif
> +
> +static rte_kv_hash_bulk_lookup_t
> +get_lookup_bulk_fn(void)
> +{
> +#ifdef CC_AVX512VL_SUPPORT
> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512VL))
> +		return k32v64_hash_bulk_lookup_avx512vl; #endif
> +	return k32v64_hash_bulk_lookup;
> +}
> +
> +static int
> +k32v64_hash_add(struct k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash, uint64_t value, uint64_t *old_value, int *found) {
> +	uint32_t bucket;
> +	int i, idx, ret;
> +	uint8_t msk;
> +	struct k32v64_ext_ent *tmp, *ent, *prev = NULL;
> +
> +	if (table == NULL)
> +		return -EINVAL;
> +
> +	bucket = hash & table->bucket_msk;
> +	/* Search key in table. Update value if exists */
> +	for (i = 0; i < K32V64_KEYS_PER_BUCKET; i++) {
> +		if ((key == table->t[bucket].key[i]) &&
> +				(table->t[bucket].key_mask & (1 << i))) {
> +			*old_value = table->t[bucket].val[i];
> +			*found = 1;
> +			__atomic_fetch_add(&table->t[bucket].cnt, 1,
> +				__ATOMIC_ACQUIRE);
> +			table->t[bucket].val[i] = value;
Suggest using C11 builtin to store value. As far as I know all the supported platforms in DPDK have 64b atomic store in 32b mode.
With this we will be able to avoid incrementing the counter. The reader will either get old or the new value.

> +			__atomic_fetch_add(&table->t[bucket].cnt, 1,
> +				__ATOMIC_RELEASE);
> +			return 0;
> +		}
> +	}
> +
> +	if (!SLIST_EMPTY(&table->t[bucket].head)) {
> +		SLIST_FOREACH(ent, &table->t[bucket].head, next) {
> +			if (ent->key == key) {
> +				*old_value = ent->val;
> +				*found = 1;
> +				__atomic_fetch_add(&table->t[bucket].cnt, 1,
> +					__ATOMIC_ACQUIRE);
> +				ent->val = value;
Same here. __atomic_store(&ent->val, value, __ATOMIC_RELAXED).

> +				__atomic_fetch_add(&table->t[bucket].cnt, 1,
> +					__ATOMIC_RELEASE);
> +				return 0;
> +			}
> +		}
> +	}
> +
> +	msk = ~table->t[bucket].key_mask & VALID_KEY_MSK;
> +	if (msk) {
> +		idx = __builtin_ctz(msk);
> +		table->t[bucket].key[idx] = key;
> +		table->t[bucket].val[idx] = value;
> +		__atomic_or_fetch(&table->t[bucket].key_mask, 1 << idx,
> +			__ATOMIC_RELEASE);
> +		table->nb_ent++;
Looks like bucket counter logic is needed for this case too. Please see the comments below in k32v64_hash_delete.

> +		*found = 0;
> +		return 0;
> +	}
> +
> +	ret = rte_mempool_get(table->ext_ent_pool, (void **)&ent);
> +	if (ret < 0)
> +		return ret;
> +
> +	SLIST_NEXT(ent, next) = NULL;
> +	ent->key = key;
> +	ent->val = value;
> +	rte_smp_wmb();
We need to avoid using rte_smp_* barriers as we are adopting C11 built-in atomics. See the below comment.

> +	SLIST_FOREACH(tmp, &table->t[bucket].head, next)
> +		prev = tmp;
> +
> +	if (prev == NULL)
> +		SLIST_INSERT_HEAD(&table->t[bucket].head, ent, next);
> +	else
> +		SLIST_INSERT_AFTER(prev, ent, next);
Both the inserts need to use release order when the 'ent' is inserted. I am not sure where the SLIST is being picked up from. But looking at the SLIST implementation in 'lib/librte_eal/windows/include/sys/queue.h', it is not implemented using C11. I think we could move queue.h from windows directory to a common directory and change the SLIST implementation.

> +
> +	table->nb_ent++;
> +	table->nb_ext_ent++;
> +	*found = 0;
> +	return 0;
> +}
> +
> +static int
> +k32v64_hash_delete(struct k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash, uint64_t *old_value)
> +{
> +	uint32_t bucket;
> +	int i;
> +	struct k32v64_ext_ent *ent;
> +
> +	if (table == NULL)
> +		return -EINVAL;
> +
> +	bucket = hash & table->bucket_msk;
> +
> +	for (i = 0; i < K32V64_KEYS_PER_BUCKET; i++) {
> +		if ((key == table->t[bucket].key[i]) &&
> +				(table->t[bucket].key_mask & (1 << i))) {
> +			*old_value = table->t[bucket].val[i];
> +			ent = SLIST_FIRST(&table->t[bucket].head);
> +			if (ent) {
> +				__atomic_fetch_add(&table->t[bucket].cnt, 1,
> +					__ATOMIC_ACQUIRE);
> +				table->t[bucket].key[i] = ent->key;
> +				table->t[bucket].val[i] = ent->val;
> +				SLIST_REMOVE_HEAD(&table->t[bucket].head,
> next);
> +				__atomic_fetch_add(&table->t[bucket].cnt, 1,
> +					__ATOMIC_RELEASE);
> +				table->nb_ext_ent--;
> +			} else
> +				__atomic_and_fetch(&table-
> >t[bucket].key_mask,
> +					~(1 << i), __ATOMIC_RELEASE);
(taking note from your responses to my comments in v3)
It is possible that the reader might match the old key but get the new value.
1) Reader: successful key match
2) Writer: k32v64_hash_delete followed by k32v64_hash_add
3) Reader: reads the value

IMO, there are 2 ways to solve this issue:
1) Use the bucket count logic while adding an entry in the non-extended bucket (marked it in k32v64_hash_add).
2) Do not use the entry in the bucket till the readers indicate that they have stopped using the entry

> +			if (ent)
> +				rte_mempool_put(table->ext_ent_pool, ent);
> +			table->nb_ent--;
> +			return 0;
> +		}
> +	}
> +
> +	SLIST_FOREACH(ent, &table->t[bucket].head, next)
> +		if (ent->key == key)
> +			break;
> +
> +	if (ent == NULL)
> +		return -ENOENT;
> +
> +	*old_value = ent->val;
> +
> +	__atomic_fetch_add(&table->t[bucket].cnt, 1, __ATOMIC_ACQUIRE);
> +	SLIST_REMOVE(&table->t[bucket].head, ent, k32v64_ext_ent, next);
> +	__atomic_fetch_add(&table->t[bucket].cnt, 1, __ATOMIC_RELEASE);
> +	rte_mempool_put(table->ext_ent_pool, ent);
> +
> +	table->nb_ext_ent--;
> +	table->nb_ent--;
> +
> +	return 0;
> +}
> +
> +static int
> +k32v64_modify(struct rte_kv_hash_table *table, void *key_p, uint32_t hash,
> +	enum rte_kv_modify_op op, void *value_p, int *found) {
> +	struct k32v64_hash_table *ht = (struct k32v64_hash_table *)table;
> +	uint32_t *key = key_p;
> +	uint64_t value;
> +
> +	if ((ht == NULL) || (key == NULL) || (value_p == NULL) ||
> +			(found == NULL) || (op >= RTE_KV_MODIFY_OP_MAX))
> {
> +		return -EINVAL;
> +	}
Suggest doing this check in rte_kv_hash_add/rte_kv_hash_delete. Then every implementation does not have to do this.

> +
> +	value = *(uint64_t *)value_p;
In the API 'rte_kv_hash_add', value_p is 'void *' which does not convey that it is a pointer to 64b data. What happens while running on 32b systems?

> +	switch (op) {
> +	case RTE_KV_MODIFY_ADD:
> +		return k32v64_hash_add(ht, *key, hash, value, value_p,
> found);
> +	case RTE_KV_MODIFY_DEL:
> +		return k32v64_hash_delete(ht, *key, hash, value_p);
> +	default:
> +		break;
> +	}
> +
> +	return -EINVAL;
> +}
> +
> +struct rte_kv_hash_table *
> +k32v64_hash_create(const struct rte_kv_hash_params *params) {
This is a private symbol, I think it needs to have '__rte' suffix?

> +	char hash_name[RTE_KV_HASH_NAMESIZE];
> +	struct k32v64_hash_table *ht = NULL;
> +	uint32_t mem_size, nb_buckets, max_ent;
> +	int ret;
> +	struct rte_mempool *mp;
> +
> +	if ((params == NULL) || (params->name == NULL) ||
> +			(params->entries == 0)) {
> +		rte_errno = EINVAL;
> +		return NULL;
> +	}
nit, these checks were done already in 'rte_kv_hash_create', these checks can be skipped.

> +
> +	ret = snprintf(hash_name, sizeof(hash_name), "KV_%s", params-
> >name);
> +	if (ret < 0 || ret >= RTE_KV_HASH_NAMESIZE) {
> +		rte_errno = ENAMETOOLONG;
> +		return NULL;
> +	}
Same here, this is checked in the calling function.

> +
> +	max_ent = rte_align32pow2(params->entries);
> +	nb_buckets = max_ent / K32V64_KEYS_PER_BUCKET;
> +	mem_size = sizeof(struct k32v64_hash_table) +
> +		sizeof(struct k32v64_hash_bucket) * nb_buckets;
> +
> +	mp = rte_mempool_create(hash_name, max_ent,
> +		sizeof(struct k32v64_ext_ent), 0, 0, NULL, NULL, NULL, NULL,
> +		params->socket_id, 0);
> +
> +	if (mp == NULL)
> +		return NULL;
> +
> +	ht = rte_zmalloc_socket(hash_name, mem_size,
> +		RTE_CACHE_LINE_SIZE, params->socket_id);
> +	if (ht == NULL) {
> +		rte_mempool_free(mp);
> +		return NULL;
> +	}
> +
> +	memcpy(ht->pub.name, hash_name, sizeof(ht->pub.name));
> +	ht->max_ent = max_ent;
> +	ht->bucket_msk = nb_buckets - 1;
> +	ht->ext_ent_pool = mp;
> +	ht->pub.lookup = get_lookup_bulk_fn();
> +	ht->pub.modify = k32v64_modify;
> +
> +	return (struct rte_kv_hash_table *)ht; }
> +
> +void
> +k32v64_hash_free(struct rte_kv_hash_table *ht) {
This is a private symbol, I think it needs to have '__rte' suffix?

> +	if (ht == NULL)
> +		return;
> +
> +	rte_mempool_free(((struct k32v64_hash_table *)ht)->ext_ent_pool);
> +	rte_free(ht);
> +}
> diff --git a/lib/librte_hash/k32v64_hash.h b/lib/librte_hash/k32v64_hash.h
> new file mode 100644 index 0000000..10061a5
> --- /dev/null
> +++ b/lib/librte_hash/k32v64_hash.h
> @@ -0,0 +1,98 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#include <rte_kv_hash.h>
> +
> +#define K32V64_KEYS_PER_BUCKET		4
> +#define K32V64_WRITE_IN_PROGRESS	1
> +#define VALID_KEY_MSK           ((1 << K32V64_KEYS_PER_BUCKET) - 1)
> +
> +struct k32v64_ext_ent {
> +	SLIST_ENTRY(k32v64_ext_ent) next;
> +	uint32_t	key;
> +	uint64_t	val;
> +};
> +
> +struct k32v64_hash_bucket {
> +	uint32_t	key[K32V64_KEYS_PER_BUCKET];
> +	uint64_t	val[K32V64_KEYS_PER_BUCKET];
> +	uint8_t		key_mask;
> +	uint32_t	cnt;
> +	SLIST_HEAD(k32v64_list_head, k32v64_ext_ent) head; }
> +__rte_cache_aligned;
> +
> +struct k32v64_hash_table {
> +	struct rte_kv_hash_table pub;	/**< Public part */
> +	uint32_t	nb_ent;		/**< Number of entities in the table*/
> +	uint32_t	nb_ext_ent;	/**< Number of extended entities */
> +	uint32_t	max_ent;	/**< Maximum number of entities */
> +	uint32_t	bucket_msk;
> +	struct rte_mempool	*ext_ent_pool;
> +	__extension__ struct k32v64_hash_bucket	t[0];
> +};
> +
> +typedef int (*k32v64_cmp_fn_t)
> +(struct k32v64_hash_bucket *bucket, uint32_t key, uint64_t *val);
> +
> +static inline int
> +__kv_cmp_keys(struct k32v64_hash_bucket *bucket, uint32_t key,
> +	uint64_t *val)
Changing __kv_cmp_keys to __k32v64_cmp_keys would be consistent

> +{
> +	int i;
> +
> +	for (i = 0; i < K32V64_KEYS_PER_BUCKET; i++) {
> +		if ((key == bucket->key[i]) &&
> +				(bucket->key_mask & (1 << i))) {
> +			*val = bucket->val[i];
> +			return 1;
> +		}
> +	}
You have to load-acquire 'key_mask' (corresponding to store-release on 'key_mask' in add). Suggest changing this as follows:

__atomic_load(&bucket->key_mask, &key_mask, __ATOMIC_ACQUIRE);
for (i = 0; i < K32V64_KEYS_PER_BUCKET; i++) {
	if ((key == bucket->key[i]) && (key_mask & (1 << i))) {
		*val = bucket->val[i];
		return 1;
	}
}

> +
> +	return 0;
> +}
> +
> +static inline int
> +__k32v64_hash_lookup(struct k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash, uint64_t *value, k32v64_cmp_fn_t cmp_f) {
> +	uint64_t	val = 0;
> +	struct k32v64_ext_ent *ent;
> +	uint32_t	cnt;
> +	int found = 0;
> +	uint32_t bucket = hash & table->bucket_msk;
> +
> +	do {
> +
> +		do {
> +			cnt = __atomic_load_n(&table->t[bucket].cnt,
> +				__ATOMIC_ACQUIRE);
> +		} while (unlikely(cnt & K32V64_WRITE_IN_PROGRESS));
Agree that it is a small section. But the issue can happen. This also makes the algorithm un-acceptable in many use cases. IMHO, since we have identified the issue, we should fix it.
The issue is mainly due to not following the reader-writer concurrency design principles. i.e. the data that we want to communicate from writer to reader (key and value in this case) is not being communicated atomically. For ex: in rte_hash/cuckoo hash library, you can see that the key and pData are being communicated atomically using the key store index.

I might be wrong, but, I do not think we can make this block-free (readers move forward even when the writer is not scheduled) using bucket counter.

This problem does not exist in existing rte_hash library. It might not be performing as good as this, but it is block-free.

> +
> +		found = cmp_f(&table->t[bucket], key, &val);
> +		if (unlikely((found == 0) &&
> +				(!SLIST_EMPTY(&table->t[bucket].head)))) {
> +			SLIST_FOREACH(ent, &table->t[bucket].head, next) {
> +				if (ent->key == key) {
> +					val = ent->val;
> +					found = 1;
> +					break;
> +				}
> +			}
> +		}
> +		__atomic_thread_fence(__ATOMIC_RELEASE);
> +	} while (unlikely(cnt != __atomic_load_n(&table->t[bucket].cnt,
> +			 __ATOMIC_RELAXED)));
> +
> +	if (found == 1) {
> +		*value = val;
> +		return 0;
> +	} else
> +		return -ENOENT;
> +}
> +
> +struct rte_kv_hash_table *
> +k32v64_hash_create(const struct rte_kv_hash_params *params);
> +
> +void
> +k32v64_hash_free(struct rte_kv_hash_table *ht);
> diff --git a/lib/librte_hash/k32v64_hash_avx512vl.c
> b/lib/librte_hash/k32v64_hash_avx512vl.c
> new file mode 100644
> index 0000000..75cede5
> --- /dev/null
> +++ b/lib/librte_hash/k32v64_hash_avx512vl.c
> @@ -0,0 +1,59 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#include "k32v64_hash.h"
> +
> +int
> +k32v64_hash_bulk_lookup_avx512vl(struct rte_kv_hash_table *ht, void
> *keys_p,
> +	uint32_t *hashes, void *values_p, unsigned int n);
> +
> +static inline int
> +k32v64_cmp_keys_avx512vl(struct k32v64_hash_bucket *bucket, uint32_t
> key,
> +	uint64_t *val)
> +{
> +	__m128i keys, srch_key;
> +	__mmask8 msk;
> +
> +	keys = _mm_load_si128((void *)bucket);
> +	srch_key = _mm_set1_epi32(key);
> +
> +	msk = _mm_mask_cmpeq_epi32_mask(bucket->key_mask, keys,
> srch_key);
> +	if (msk) {
> +		*val = bucket->val[__builtin_ctz(msk)];
> +		return 1;
> +	}
> +
> +	return 0;
> +}
> +
> +static inline int
> +k32v64_hash_lookup_avx512vl(struct k32v64_hash_table *table, uint32_t
> key,
> +	uint32_t hash, uint64_t *value)
> +{
> +	return __k32v64_hash_lookup(table, key, hash, value,
> +		k32v64_cmp_keys_avx512vl);
> +}
> +
> +int
> +k32v64_hash_bulk_lookup_avx512vl(struct rte_kv_hash_table *ht, void
> *keys_p,
> +	uint32_t *hashes, void *values_p, unsigned int n) {
> +	struct k32v64_hash_table *table = (struct k32v64_hash_table *)ht;
> +	uint32_t *keys = keys_p;
> +	uint64_t *values = values_p;
> +	int ret, cnt = 0;
> +	unsigned int i;
> +
> +	if (unlikely((table == NULL) || (keys == NULL) || (hashes == NULL) ||
> +			(values == NULL)))
> +		return -EINVAL;
> +
> +	for (i = 0; i < n; i++) {
> +		ret = k32v64_hash_lookup_avx512vl(table, keys[i], hashes[i],
> +			&values[i]);
> +		if (ret == 0)
> +			cnt++;
> +	}
> +	return cnt;
> +}
> diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build index
> 6ab46ae..0d014ea 100644
> --- a/lib/librte_hash/meson.build
> +++ b/lib/librte_hash/meson.build
> @@ -3,10 +3,23 @@
> 
>  headers = files('rte_crc_arm64.h',
>  	'rte_fbk_hash.h',
> +	'rte_kv_hash.h',
>  	'rte_hash_crc.h',
>  	'rte_hash.h',
>  	'rte_jhash.h',
>  	'rte_thash.h')
> 
> -sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c') -deps += ['ring']
> +sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c', 'rte_kv_hash.c',
> +'k32v64_hash.c') deps += ['ring', 'mempool']
> +
> +if dpdk_conf.has('RTE_ARCH_X86')
> +	if cc.has_argument('-mavx512vl')
> +		avx512_tmplib = static_library('avx512_tmp',
> +                                'k32v64_hash_avx512vl.c',
> +                                dependencies: static_rte_mempool,
> +                                c_args: cflags + ['-mavx512vl'])
> +                objs += avx512_tmplib.extract_objects('k32v64_hash_avx512vl.c')
> +                cflags += '-DCC_AVX512VL_SUPPORT'
> +
> +	endif
> +endif
> diff --git a/lib/librte_hash/rte_hash_version.map
> b/lib/librte_hash/rte_hash_version.map
> index c2a9094..614e0a5 100644
> --- a/lib/librte_hash/rte_hash_version.map
> +++ b/lib/librte_hash/rte_hash_version.map
> @@ -36,5 +36,9 @@ EXPERIMENTAL {
>  	rte_hash_lookup_with_hash_bulk;
>  	rte_hash_lookup_with_hash_bulk_data;
>  	rte_hash_max_key_id;
> -
> +	rte_kv_hash_create;
> +	rte_kv_hash_find_existing;
> +	rte_kv_hash_free;
> +	rte_kv_hash_add;
> +	rte_kv_hash_delete;
>  };
> diff --git a/lib/librte_hash/rte_kv_hash.c b/lib/librte_hash/rte_kv_hash.c new
> file mode 100644 index 0000000..03df8db
> --- /dev/null
> +++ b/lib/librte_hash/rte_kv_hash.c
> @@ -0,0 +1,184 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#include <string.h>
> +
> +#include <rte_eal_memconfig.h>
> +#include <rte_errno.h>
> +#include <rte_malloc.h>
> +#include <rte_memory.h>
> +#include <rte_tailq.h>
> +
> +#include <rte_kv_hash.h>
> +#include "k32v64_hash.h"
> +
> +TAILQ_HEAD(rte_kv_hash_list, rte_tailq_entry);
> +
> +static struct rte_tailq_elem rte_kv_hash_tailq = {
> +	.name = "RTE_KV_HASH",
> +};
> +
> +EAL_REGISTER_TAILQ(rte_kv_hash_tailq);
> +
> +int
> +rte_kv_hash_add(struct rte_kv_hash_table *table, void *key,
> +	uint32_t hash, void *value, int *found) {
> +	if (table == NULL)
> +		return -EINVAL;
> +
> +	return table->modify(table, key, hash, RTE_KV_MODIFY_ADD,
> +		value, found);
> +}
> +
> +int
> +rte_kv_hash_delete(struct rte_kv_hash_table *table, void *key,
> +	uint32_t hash, void *value)
> +{
> +	int found;
> +
> +	if (table == NULL)
> +		return -EINVAL;
> +
> +	return table->modify(table, key, hash, RTE_KV_MODIFY_DEL,
> +		value, &found);
> +}
> +
> +struct rte_kv_hash_table *
> +rte_kv_hash_find_existing(const char *name) {
I did not see a test case for this. Please add a test case for 'rte_kv_hash_find_existing'.

> +	struct rte_kv_hash_table *h = NULL;
> +	struct rte_tailq_entry *te;
> +	struct rte_kv_hash_list *kv_hash_list;
> +
> +	kv_hash_list = RTE_TAILQ_CAST(rte_kv_hash_tailq.head,
> +			rte_kv_hash_list);
> +
> +	rte_mcfg_tailq_read_lock();
> +	TAILQ_FOREACH(te, kv_hash_list, next) {
> +		h = (struct rte_kv_hash_table *) te->data;
> +		if (strncmp(name, h->name, RTE_KV_HASH_NAMESIZE) == 0)
> +			break;
> +	}
> +	rte_mcfg_tailq_read_unlock();
> +	if (te == NULL) {
> +		rte_errno = ENOENT;
> +		return NULL;
> +	}
> +	return h;
> +}
> +
> +struct rte_kv_hash_table *
> +rte_kv_hash_create(const struct rte_kv_hash_params *params) {
> +	char hash_name[RTE_KV_HASH_NAMESIZE];
> +	struct rte_kv_hash_table *ht, *tmp_ht = NULL;
> +	struct rte_tailq_entry *te;
> +	struct rte_kv_hash_list *kv_hash_list;
> +	int ret;
> +
> +	if ((params == NULL) || (params->name == NULL) ||
> +			(params->entries == 0) ||
> +			(params->type >= RTE_KV_HASH_MAX)) {
> +		rte_errno = EINVAL;
> +		return NULL;
> +	}
> +
> +	kv_hash_list = RTE_TAILQ_CAST(rte_kv_hash_tailq.head,
> +		rte_kv_hash_list);
> +
> +	ret = snprintf(hash_name, sizeof(hash_name), "KV_%s", params-
> >name);
RTE_KV_HASH_NAMESIZE is a public #define. It is natural for the user to use this to define the size of the hash table name. Now, we are taking 3 characters from that space. I think it is better to increase the size of 'hash_name' by 3 characters or skip adding 'KV_' to the name.

This also has an impact on 'rte_kv_hash_find_existing' where the user passed string is being used as is without adding 'KV_'. Suggest adding a test case for 'rte_kv_hash_find_existing'.

> +	if (ret < 0 || ret >= RTE_KV_HASH_NAMESIZE) {
> +		rte_errno = ENAMETOOLONG;
> +		return NULL;
> +	}
> +
> +	switch (params->type) {
> +	case RTE_KV_HASH_K32V64:
> +		ht = k32v64_hash_create(params);
> +		break;
> +	default:
> +		rte_errno = EINVAL;
> +		return NULL;
> +	}
> +	if (ht == NULL)
> +		return ht;
> +
> +	rte_mcfg_tailq_write_lock();
> +	TAILQ_FOREACH(te, kv_hash_list, next) {
> +		tmp_ht = (struct rte_kv_hash_table *) te->data;
> +		if (strncmp(params->name, tmp_ht->name,
> +				RTE_KV_HASH_NAMESIZE) == 0)
> +			break;
> +	}
> +	if (te != NULL) {
> +		rte_errno = EEXIST;
> +		goto exit;
> +	}
> +
> +	te = rte_zmalloc("KV_HASH_TAILQ_ENTRY", sizeof(*te), 0);
> +	if (te == NULL) {
> +		RTE_LOG(ERR, HASH, "Failed to allocate tailq entry\n");
> +		goto exit;
> +	}
> +
> +	ht->type = params->type;
> +	te->data = (void *)ht;
> +	TAILQ_INSERT_TAIL(kv_hash_list, te, next);
> +
> +	rte_mcfg_tailq_write_unlock();
> +
> +	return ht;
> +
> +exit:
> +	rte_mcfg_tailq_write_unlock();
> +	switch (params->type) {
> +	case RTE_KV_HASH_K32V64:
> +		k32v64_hash_free(ht);
> +		break;
> +	default:
> +		break;
> +	}
> +	return NULL;
> +}
> +
> +void
> +rte_kv_hash_free(struct rte_kv_hash_table *ht) {
> +	struct rte_tailq_entry *te;
> +	struct rte_kv_hash_list *kv_hash_list;
> +
> +	if (ht == NULL)
> +		return;
> +
> +	kv_hash_list = RTE_TAILQ_CAST(rte_kv_hash_tailq.head,
> +				rte_kv_hash_list);
> +
> +	rte_mcfg_tailq_write_lock();
> +
> +	/* find out tailq entry */
> +	TAILQ_FOREACH(te, kv_hash_list, next) {
> +		if (te->data == (void *) ht)
> +			break;
> +	}
> +
> +
> +	if (te == NULL) {
> +		rte_mcfg_tailq_write_unlock();
> +		return;
> +	}
> +
> +	TAILQ_REMOVE(kv_hash_list, te, next);
> +
> +	rte_mcfg_tailq_write_unlock();
I understand that the free is not thread safe. But, it might be safer if unlock is after the call to 'k32v64_hash_free'.

> +
> +	switch (ht->type) {
> +	case RTE_KV_HASH_K32V64:
> +		k32v64_hash_free(ht);
> +		break;
> +	default:
> +		break;
> +	}
> +	rte_free(te);
> +}
> diff --git a/lib/librte_hash/rte_kv_hash.h b/lib/librte_hash/rte_kv_hash.h new
> file mode 100644 index 0000000..c0375d1
> --- /dev/null
> +++ b/lib/librte_hash/rte_kv_hash.h
> @@ -0,0 +1,169 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#ifndef _RTE_KV_HASH_H_
> +#define _RTE_KV_HASH_H_
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include <rte_compat.h>
> +#include <rte_atomic.h>
> +#include <rte_mempool.h>
> +
> +#define RTE_KV_HASH_NAMESIZE		32
> +
> +enum rte_kv_hash_type {
> +	RTE_KV_HASH_K32V64,
> +	RTE_KV_HASH_MAX
> +};
> +
> +enum rte_kv_modify_op {
> +	RTE_KV_MODIFY_ADD,
> +	RTE_KV_MODIFY_DEL,
> +	RTE_KV_MODIFY_OP_MAX
> +};
This could be in a private header file.

> +
> +struct rte_kv_hash_params {
> +	const char *name;
> +	uint32_t entries;
> +	int socket_id;
> +	enum rte_kv_hash_type type;
> +};
Since this is a public structure, suggest adding some reserved or flags field which are ignored now but could be used in the future for enhancements.

> +
> +struct rte_kv_hash_table;
> +
> +typedef int (*rte_kv_hash_bulk_lookup_t) (struct rte_kv_hash_table
> +*table, void *keys, uint32_t *hashes,
> +	void *values, unsigned int n);
> +
> +typedef int (*rte_kv_hash_modify_t)
> +(struct rte_kv_hash_table *table, void *key, uint32_t hash,
> +	enum rte_kv_modify_op op, void *value, int *found);
> +
> +struct rte_kv_hash_table {
> +	char name[RTE_KV_HASH_NAMESIZE];	/**< Name of the hash. */
> +	rte_kv_hash_bulk_lookup_t	lookup;
> +	rte_kv_hash_modify_t		modify;
There are separate APIs provided for add and delete. Is there any advantage for combining add/delete into a single function pointer in the backend?
If we keep 2 separate pointers, we can get rid of 'enum rte_kv_modify_op'. It is simple to understand as well.

> +	enum rte_kv_hash_type		type;
> +};
> +
> +/**
> + * Lookup bulk of keys.
> + * This function is multi-thread safe with regarding to other lookup threads.
It is safe with respect to the writer as well (reader-writer concurrency), please capture this as well.

> + *
> + * @param table
> + *   Hash table to add the key to.
nit, 'Hash table to lookup the keys'

> + * @param keys
> + *   Pointer to array of keys
> + * @param hashes
> + *   Pointer to array of hash values associated with keys.
> + * @param values
> + *   Pointer to array of value corresponded to keys
> + *   If the key was not found the corresponding value remains intact.
> + * @param n
> + *   Number of keys to lookup in batch.
> + * @return
> + *   -EINVAL if there's an error, otherwise number of successful lookups.
> + */
> +static inline int
> +rte_kv_hash_bulk_lookup(struct rte_kv_hash_table *table,
> +	void *keys, uint32_t *hashes, void *values, unsigned int n) {
> +	return table->lookup(table, keys, hashes, values, n); }
Consider making this a non-inline function. This is a bulk lookup and I think cost of calling a function should get amortized well.
This will also allow for hiding the 'struct rte_kv_hash_table' from the user which is better for ABI. You can move the definition of the 'struct rte_kv_hash_table' and function pointer declarations to a private header file.

> +
> +/**
> + * Add a key to an existing hash table with hash value.
> + * This operation is not multi-thread safe regarding to add/delete
> +functions
> + * and should only be called from one thread.
> + * However it is safe to call it along with lookup.
> + *
> + * @param table
> + *   Hash table to add the key to.
> + * @param key
> + *   Key to add to the hash table.
> + * @param value
> + *   Value to associate with key.
I think it needs to be called out here that it the data is of size 64b (even in a 32b system) because of the implementation in function 'k32v64_modify'. Why not make 'value' of type 'uint64_t *'?

> + * @param hash
> + *   Hash value associated with key.
> + * @found
> + *   0 if no previously added key was found
> + *   1 previously added key was found, old value associated with the key
> + *   was written to *value
> + * @return
> + *   0 if ok, or negative value on error.
> + */
> +__rte_experimental
> +int
> +rte_kv_hash_add(struct rte_kv_hash_table *table, void *key,
> +	uint32_t hash, void *value, int *found);
> +
> +/**
> + * Remove a key with a given hash value from an existing hash table.
> + * This operation is not multi-thread safe regarding to add/delete
> +functions
> + * and should only be called from one thread.
> + * However it is safe to call it along with lookup.
> + *
> + * @param table
> + *   Hash table to remove the key from.
> + * @param key
> + *   Key to remove from the hash table.
> + * @param hash
> + *   hash value associated with key.
> + * @param value
> + *   pointer to memory where the old value will be written to on success
> + * @return
> + *   0 if ok, or negative value on error.
> + */
> +__rte_experimental
> +int
> +rte_kv_hash_delete(struct rte_kv_hash_table *table, void *key,
> +	uint32_t hash, void *value);
> +
> +/**
> + * Performs a lookup for an existing hash table, and returns a pointer
> +to
> + * the table if found.
> + *
> + * @param name
> + *   Name of the hash table to find
> + *
> + * @return
> + *   pointer to hash table structure or NULL on error with rte_errno
> + *   set appropriately.
> + */
> +__rte_experimental
> +struct rte_kv_hash_table *
> +rte_kv_hash_find_existing(const char *name);
> +
> +/**
> + * Create a new hash table for use with four byte keys.
> + *
> + * @param params
> + *   Parameters used in creation of hash table.
> + *
> + * @return
> + *   Pointer to hash table structure that is used in future hash table
> + *   operations, or NULL on error with rte_errno set appropriately.
> + */
> +__rte_experimental
> +struct rte_kv_hash_table *
> +rte_kv_hash_create(const struct rte_kv_hash_params *params);
> +
> +/**
> + * Free all memory used by a hash table.
> + *
> + * @param table
> + *   Hash table to deallocate.
> + */
> +__rte_experimental
> +void
> +rte_kv_hash_free(struct rte_kv_hash_table *table);
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _RTE_KV_HASH_H_ */
> --
> 2.7.4
Medvedkin, Vladimir June 25, 2020, 7:49 p.m. UTC | #5
Hi Konstantin,

Thanks for the review. See below


On 23/06/2020 16:44, Ananyev, Konstantin wrote:
> Hi Vladimir,
>
>> --- /dev/null
>> +++ b/lib/librte_hash/k32v64_hash.c
>> @@ -0,0 +1,277 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright(c) 2020 Intel Corporation
>> + */
>> +
>> +#include <string.h>
>> +
>> +#include <rte_errno.h>
>> +#include <rte_malloc.h>
>> +#include <rte_memory.h>
>> +
>> +#include "k32v64_hash.h"
>> +
>> +static inline int
>> +k32v64_hash_lookup(struct k32v64_hash_table *table, uint32_t key,
>> +uint32_t hash, uint64_t *value)
>> +{
>> +return __k32v64_hash_lookup(table, key, hash, value, __kv_cmp_keys);
>> +}
>> +
>> +static int
>> +k32v64_hash_bulk_lookup(struct rte_kv_hash_table *ht, void *keys_p,
>> +uint32_t *hashes, void *values_p, unsigned int n)
>> +{
>> +struct k32v64_hash_table *table = (struct k32v64_hash_table *)ht;
>> +uint32_t *keys = keys_p;
>> +uint64_t *values = values_p;
>> +int ret, cnt = 0;
>> +unsigned int i;
>> +
>> +if (unlikely((table == NULL) || (keys == NULL) || (hashes == NULL) ||
>> +(values == NULL)))
>> +return -EINVAL;
>> +
>> +for (i = 0; i < n; i++) {
>> +ret = k32v64_hash_lookup(table, keys[i], hashes[i],
>> +&values[i]);
>> +if (ret == 0)
>> +cnt++;
>> +}
>> +return cnt;
>> +}
>> +
>> +#ifdef CC_AVX512VL_SUPPORT
>> +int
>> +k32v64_hash_bulk_lookup_avx512vl(struct rte_kv_hash_table *ht,
>> +void *keys_p, uint32_t *hashes, void *values_p, unsigned int n);
>> +#endif
>> +
>> +static rte_kv_hash_bulk_lookup_t
>> +get_lookup_bulk_fn(void)
>> +{
>> +#ifdef CC_AVX512VL_SUPPORT
>> +if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512VL))
>> +return k32v64_hash_bulk_lookup_avx512vl;
>> +#endif
>> +return k32v64_hash_bulk_lookup;
>> +}
>> +
>> +static int
>> +k32v64_hash_add(struct k32v64_hash_table *table, uint32_t key,
>> +uint32_t hash, uint64_t value, uint64_t *old_value, int *found)
>> +{
>> +uint32_t bucket;
>> +int i, idx, ret;
>> +uint8_t msk;
>> +struct k32v64_ext_ent *tmp, *ent, *prev = NULL;
>> +
>> +if (table == NULL)
>> +return -EINVAL;
>> +
>> +bucket = hash & table->bucket_msk;
>> +/* Search key in table. Update value if exists */
>> +for (i = 0; i < K32V64_KEYS_PER_BUCKET; i++) {
>> +if ((key == table->t[bucket].key[i]) &&
>> +(table->t[bucket].key_mask & (1 << i))) {
>> +*old_value = table->t[bucket].val[i];
>> +*found = 1;
>> +__atomic_fetch_add(&table->t[bucket].cnt, 1,
>> +__ATOMIC_ACQUIRE);
>> +table->t[bucket].val[i] = value;
>> +__atomic_fetch_add(&table->t[bucket].cnt, 1,
>> +__ATOMIC_RELEASE);
>> +return 0;
>> +}
>> +}
>> +
>> +if (!SLIST_EMPTY(&table->t[bucket].head)) {
>> +SLIST_FOREACH(ent, &table->t[bucket].head, next) {
>> +if (ent->key == key) {
>> +*old_value = ent->val;
>> +*found = 1;
>> +__atomic_fetch_add(&table->t[bucket].cnt, 1,
>> +__ATOMIC_ACQUIRE);
>> +ent->val = value;
>> +__atomic_fetch_add(&table->t[bucket].cnt, 1,
>> +__ATOMIC_RELEASE);
>> +return 0;
>> +}
>> +}
>> +}
>> +
>> +msk = ~table->t[bucket].key_mask & VALID_KEY_MSK;
>> +if (msk) {
>> +idx = __builtin_ctz(msk);
>> +table->t[bucket].key[idx] = key;
>> +table->t[bucket].val[idx] = value;
>> +__atomic_or_fetch(&table->t[bucket].key_mask, 1 << idx,
>> +__ATOMIC_RELEASE);
> I think this case also has to guarded with table->t[bucket].cnt updates.


Agree, will change it.


>
>> +table->nb_ent++;
>> +*found = 0;
>> +return 0;
>> +}
>> +
>> +ret = rte_mempool_get(table->ext_ent_pool, (void **)&ent);
>> +if (ret < 0)
>> +return ret;
>> +
>> +SLIST_NEXT(ent, next) = NULL;
>> +ent->key = key;
>> +ent->val = value;
>> +rte_smp_wmb();
> __atomic_thread_fence(__ATOMIC_RELEASE);
> ?


Sure, looks like I missed it, will fix


>
>> +SLIST_FOREACH(tmp, &table->t[bucket].head, next)
>> +prev = tmp;
>> +
>> +if (prev == NULL)
>> +SLIST_INSERT_HEAD(&table->t[bucket].head, ent, next);
>> +else
>> +SLIST_INSERT_AFTER(prev, ent, next);
>> +
>> +table->nb_ent++;
>> +table->nb_ext_ent++;
>> +*found = 0;
>> +return 0;
>> +}
>> +
>> +static int
>> +k32v64_hash_delete(struct k32v64_hash_table *table, uint32_t key,
>> +uint32_t hash, uint64_t *old_value)
>> +{
>> +uint32_t bucket;
>> +int i;
>> +struct k32v64_ext_ent *ent;
>> +
>> +if (table == NULL)
>> +return -EINVAL;
>> +
>> +bucket = hash & table->bucket_msk;
>> +
>> +for (i = 0; i < K32V64_KEYS_PER_BUCKET; i++) {
>> +if ((key == table->t[bucket].key[i]) &&
>> +(table->t[bucket].key_mask & (1 << i))) {
>> +*old_value = table->t[bucket].val[i];
>> +ent = SLIST_FIRST(&table->t[bucket].head);
>> +if (ent) {
>> +__atomic_fetch_add(&table->t[bucket].cnt, 1,
>> +__ATOMIC_ACQUIRE);
>> +table->t[bucket].key[i] = ent->key;
>> +table->t[bucket].val[i] = ent->val;
>> +SLIST_REMOVE_HEAD(&table->t[bucket].head, next);
>> +__atomic_fetch_add(&table->t[bucket].cnt, 1,
>> +__ATOMIC_RELEASE);
>> +table->nb_ext_ent--;
>> +} else
>> +__atomic_and_fetch(&table->t[bucket].key_mask,
>> +~(1 << i), __ATOMIC_RELEASE);
> Same thought as above - safer to guard this mask update with cnt update.


Agree


>
>> +if (ent)
>> +rte_mempool_put(table->ext_ent_pool, ent);
> Can't this 'if(ent)' be merged with previous 'if (ent) {...}' above?


Yes, will merge


>
>> +table->nb_ent--;
>> +return 0;
>> +}
>> +}
>> +
>> +SLIST_FOREACH(ent, &table->t[bucket].head, next)
>> +if (ent->key == key)
>> +break;
>> +
>> +if (ent == NULL)
>> +return -ENOENT;
>> +
>> +*old_value = ent->val;
>> +
>> +__atomic_fetch_add(&table->t[bucket].cnt, 1, __ATOMIC_ACQUIRE);
>> +SLIST_REMOVE(&table->t[bucket].head, ent, k32v64_ext_ent, next);
>> +__atomic_fetch_add(&table->t[bucket].cnt, 1, __ATOMIC_RELEASE);
>> +rte_mempool_put(table->ext_ent_pool, ent);
>> +
>> +table->nb_ext_ent--;
>> +table->nb_ent--;
>> +
>> +return 0;
>> +}
>> +
Medvedkin, Vladimir June 25, 2020, 7:56 p.m. UTC | #6
On 24/06/2020 00:06, Ananyev, Konstantin wrote:
>> Hi Vladimir,
>>
>>> --- /dev/null
>>> +++ b/lib/librte_hash/k32v64_hash.c
>>> @@ -0,0 +1,277 @@
>>> +/* SPDX-License-Identifier: BSD-3-Clause
>>> + * Copyright(c) 2020 Intel Corporation
>>> + */
>>> +
>>> +#include <string.h>
>>> +
>>> +#include <rte_errno.h>
>>> +#include <rte_malloc.h>
>>> +#include <rte_memory.h>
>>> +
>>> +#include "k32v64_hash.h"
>>> +
>>> +static inline int
>>> +k32v64_hash_lookup(struct k32v64_hash_table *table, uint32_t key,
>>> +uint32_t hash, uint64_t *value)
>>> +{
>>> +return __k32v64_hash_lookup(table, key, hash, value, __kv_cmp_keys);
>>> +}
>>> +
>>> +static int
>>> +k32v64_hash_bulk_lookup(struct rte_kv_hash_table *ht, void *keys_p,
>>> +uint32_t *hashes, void *values_p, unsigned int n)
>>> +{
>>> +struct k32v64_hash_table *table = (struct k32v64_hash_table *)ht;
>>> +uint32_t *keys = keys_p;
>>> +uint64_t *values = values_p;
>>> +int ret, cnt = 0;
>>> +unsigned int i;
>>> +
>>> +if (unlikely((table == NULL) || (keys == NULL) || (hashes == NULL) ||
>>> +(values == NULL)))
>>> +return -EINVAL;
>
> As a nit - this formal parameter checking better to do in public function
> (rte_kv_hash_bulk_lookup) before de-refencing table and calling actual lookup().
> Same story for modify() - formal parameter checking can be done at the top of public function.

Agree, will move checking in public part


> BTW, why to unite add/delete into modify(), if internally you have 2 different functions
> (for add/delete) anyway?


This was done for future extensibility, if we want to add some 
additional control plane features in the future, other than add or 
delete. if you think it's unnecessary I'll change it with add()/del() pair.


>
>>> +
>>> +for (i = 0; i < n; i++) {
>>> +ret = k32v64_hash_lookup(table, keys[i], hashes[i],
>>> +&values[i]);
>>> +if (ret == 0)
>>> +cnt++;
>>> +}
>>> +return cnt;
>>> +}
>>> +
>>> +#ifdef CC_AVX512VL_SUPPORT
>>> +int
>>> +k32v64_hash_bulk_lookup_avx512vl(struct rte_kv_hash_table *ht,
>>> +void *keys_p, uint32_t *hashes, void *values_p, unsigned int n);
>>> +#endif
>>> +
>>> +static rte_kv_hash_bulk_lookup_t
>>> +get_lookup_bulk_fn(void)
>>> +{
>>> +#ifdef CC_AVX512VL_SUPPORT
>>> +if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512VL))
>>> +return k32v64_hash_bulk_lookup_avx512vl;
>>> +#endif
>>> +return k32v64_hash_bulk_lookup;
>>> +}
>>> +
>>> +static int
>>> +k32v64_hash_add(struct k32v64_hash_table *table, uint32_t key,
>>> +uint32_t hash, uint64_t value, uint64_t *old_value, int *found)
>>> +{
>>> +uint32_t bucket;
>>> +int i, idx, ret;
>>> +uint8_t msk;
>>> +struct k32v64_ext_ent *tmp, *ent, *prev = NULL;
>>> +
>>> +if (table == NULL)
>>> +return -EINVAL;
>>> +
>>> +bucket = hash & table->bucket_msk;
>>> +/* Search key in table. Update value if exists */
>>> +for (i = 0; i < K32V64_KEYS_PER_BUCKET; i++) {
>>> +if ((key == table->t[bucket].key[i]) &&
>>> +(table->t[bucket].key_mask & (1 << i))) {
>>> +*old_value = table->t[bucket].val[i];
>>> +*found = 1;
>>> +__atomic_fetch_add(&table->t[bucket].cnt, 1,
>>> +__ATOMIC_ACQUIRE);
> After another thought - atomic add is probably an overkill here.
> Something like:
> void update_start(struct k32v64_hash_bucket *bkt)
> {
> bkt->cnt++;
>          __atomic_thread_fence(__ATOMIC_ACQ_REL);
> }
>
> void update_finish(struct k32v64_hash_bucket *bkt)
> {
> __atomic_thread_fence(__ATOMIC_ACQ_REL);
> bkt->cnt++;
> }
>
> I think should be sufficient here.


Good point


>
>>> +table->t[bucket].val[i] = value;
>>> +__atomic_fetch_add(&table->t[bucket].cnt, 1,
>>> +__ATOMIC_RELEASE);
>>> +return 0;
>>> +}
>>> +}
>>> +
>>> +if (!SLIST_EMPTY(&table->t[bucket].head)) {
>>> +SLIST_FOREACH(ent, &table->t[bucket].head, next) {
>>> +if (ent->key == key) {
>>> +*old_value = ent->val;
>>> +*found = 1;
>>> +__atomic_fetch_add(&table->t[bucket].cnt, 1,
>>> +__ATOMIC_ACQUIRE);
>>> +ent->val = value;
>>> +__atomic_fetch_add(&table->t[bucket].cnt, 1,
>>> +__ATOMIC_RELEASE);
>>> +return 0;
>>> +}
>>> +}
>>> +}
>>> +
>>> +msk = ~table->t[bucket].key_mask & VALID_KEY_MSK;
>>> +if (msk) {
>>> +idx = __builtin_ctz(msk);
>>> +table->t[bucket].key[idx] = key;
>>> +table->t[bucket].val[idx] = value;
>>> +__atomic_or_fetch(&table->t[bucket].key_mask, 1 << idx,
>>> +__ATOMIC_RELEASE);
>> I think this case also has to guarded with table->t[bucket].cnt updates.
>>
>>> +table->nb_ent++;
>>> +*found = 0;
>>> +return 0;
>>> +}
>>> +
>>> +ret = rte_mempool_get(table->ext_ent_pool, (void **)&ent);
>>> +if (ret < 0)
>>> +return ret;
>>> +
>>> +SLIST_NEXT(ent, next) = NULL;
>>> +ent->key = key;
>>> +ent->val = value;
>>> +rte_smp_wmb();
>> __atomic_thread_fence(__ATOMIC_RELEASE);
>> ?
>>
>>> +SLIST_FOREACH(tmp, &table->t[bucket].head, next)
>>> +prev = tmp;
>>> +
>>> +if (prev == NULL)
>>> +SLIST_INSERT_HEAD(&table->t[bucket].head, ent, next);
>>> +else
>>> +SLIST_INSERT_AFTER(prev, ent, next);
>>> +
>>> +table->nb_ent++;
>>> +table->nb_ext_ent++;
>>> +*found = 0;
>>> +return 0;
>>> +}
>>> +
>>> +static int
>>> +k32v64_hash_delete(struct k32v64_hash_table *table, uint32_t key,
>>> +uint32_t hash, uint64_t *old_value)
>>> +{
>>> +uint32_t bucket;
>>> +int i;
>>> +struct k32v64_ext_ent *ent;
>>> +
>>> +if (table == NULL)
>>> +return -EINVAL;
>>> +
>>> +bucket = hash & table->bucket_msk;
>>> +
>>> +for (i = 0; i < K32V64_KEYS_PER_BUCKET; i++) {
>>> +if ((key == table->t[bucket].key[i]) &&
>>> +(table->t[bucket].key_mask & (1 << i))) {
>>> +*old_value = table->t[bucket].val[i];
>>> +ent = SLIST_FIRST(&table->t[bucket].head);
>>> +if (ent) {
>>> +__atomic_fetch_add(&table->t[bucket].cnt, 1,
>>> +__ATOMIC_ACQUIRE);
>>> +table->t[bucket].key[i] = ent->key;
>>> +table->t[bucket].val[i] = ent->val;
>>> +SLIST_REMOVE_HEAD(&table->t[bucket].head, next);
>>> +__atomic_fetch_add(&table->t[bucket].cnt, 1,
>>> +__ATOMIC_RELEASE);
>>> +table->nb_ext_ent--;
>>> +} else
>>> +__atomic_and_fetch(&table->t[bucket].key_mask,
>>> +~(1 << i), __ATOMIC_RELEASE);
>> Same thought as above - safer to guard this mask update with cnt update.
>>
>>> +if (ent)
>>> +rte_mempool_put(table->ext_ent_pool, ent);
>> Can't this 'if(ent)' be merged with previous 'if (ent) {...}' above?
>>
>>> +table->nb_ent--;
>>> +return 0;
>>> +}
>>> +}
>>> +
>>> +SLIST_FOREACH(ent, &table->t[bucket].head, next)
>>> +if (ent->key == key)
>>> +break;
>>> +
>>> +if (ent == NULL)
>>> +return -ENOENT;
>>> +
>>> +*old_value = ent->val;
>>> +
>>> +__atomic_fetch_add(&table->t[bucket].cnt, 1, __ATOMIC_ACQUIRE);
>>> +SLIST_REMOVE(&table->t[bucket].head, ent, k32v64_ext_ent, next);
>>> +__atomic_fetch_add(&table->t[bucket].cnt, 1, __ATOMIC_RELEASE);
>>> +rte_mempool_put(table->ext_ent_pool, ent);
>>> +
>>> +table->nb_ext_ent--;
>>> +table->nb_ent--;
>>> +
>>> +return 0;
>>> +}
>>> +
Medvedkin, Vladimir June 25, 2020, 8:26 p.m. UTC | #7
Hi Yipeng,

Thanks for the review. See below


On 24/06/2020 02:19, Wang, Yipeng1 wrote:
>> -----Original Message-----
>> From: Medvedkin, Vladimir <vladimir.medvedkin@intel.com>
>> Sent: Friday, May 8, 2020 12:59 PM
>> To: dev@dpdk.org
>> Cc: Ananyev, Konstantin <konstantin.ananyev@intel.com>; Wang, Yipeng1
>> <yipeng1.wang@intel.com>; Gobriel, Sameh <sameh.gobriel@intel.com>;
>> Richardson, Bruce <bruce.richardson@intel.com>
>> Subject: [PATCH v4 1/4] hash: add kv hash library
>>
>> KV hash is a special optimized key-value storage for fixed key and value sizes.
>> At the moment it supports 32 bit keys and 64 bit values. This table is hash
>> function agnostic so user must provide precalculated hash signature for
>> add/delete/lookup operations.
>>
>> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
>> ---
>>   lib/Makefile                           |   2 +-
>>   lib/librte_hash/Makefile               |  14 +-
>>   lib/librte_hash/k32v64_hash.c          | 277
>> +++++++++++++++++++++++++++++++++
>>   lib/librte_hash/k32v64_hash.h          |  98 ++++++++++++
>>   lib/librte_hash/k32v64_hash_avx512vl.c |  59 +++++++
>>   lib/librte_hash/meson.build            |  17 +-
>>   lib/librte_hash/rte_hash_version.map   |   6 +-
>>   lib/librte_hash/rte_kv_hash.c          | 184 ++++++++++++++++++++++
>>   lib/librte_hash/rte_kv_hash.h          | 169 ++++++++++++++++++++
>>   9 files changed, 821 insertions(+), 5 deletions(-)  create mode 100644
>> lib/librte_hash/k32v64_hash.c  create mode 100644
>> lib/librte_hash/k32v64_hash.h  create mode 100644
>> lib/librte_hash/k32v64_hash_avx512vl.c
>>   create mode 100644 lib/librte_hash/rte_kv_hash.c  create mode 100644
>> lib/librte_hash/rte_kv_hash.h
>>
>> diff --git a/lib/Makefile b/lib/Makefile index 9d24609..42769e9 100644
>> --- a/lib/Makefile
>> +++ b/lib/Makefile
>> @@ -48,7 +48,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
>> DEPDIRS-librte_vhost := librte_eal librte_mempool librte_mbuf
>> librte_ethdev \
>>   librte_net librte_hash librte_cryptodev
>>   DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash -DEPDIRS-librte_hash :=
>> librte_eal librte_ring
>> +DEPDIRS-librte_hash := librte_eal librte_ring librte_mempool
>>   DIRS-$(CONFIG_RTE_LIBRTE_EFD) += librte_efd  DEPDIRS-librte_efd :=
>> librte_eal librte_ring librte_hash
>>   DIRS-$(CONFIG_RTE_LIBRTE_RIB) += librte_rib diff --git
>> a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile index
>> ec9f864..a0cdee9 100644
>> --- a/lib/librte_hash/Makefile
>> +++ b/lib/librte_hash/Makefile
>> @@ -8,13 +8,15 @@ LIB = librte_hash.a
>>
>>   CFLAGS += -O3
>>   CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
>> -LDLIBS += -lrte_eal -lrte_ring
>> +LDLIBS += -lrte_eal -lrte_ring -lrte_mempool
>>
>>   EXPORT_MAP := rte_hash_version.map
>>
>>   # all source are stored in SRCS-y
>>   SRCS-$(CONFIG_RTE_LIBRTE_HASH) := rte_cuckoo_hash.c
>>   SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_fbk_hash.c
>> +SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_kv_hash.c
>> +SRCS-$(CONFIG_RTE_LIBRTE_HASH) += k32v64_hash.c
>>
>>   # install this header file
>>   SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include := rte_hash.h @@ -27,5
>> +29,15 @@ endif  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
>> rte_jhash.h  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_thash.h
>> SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_fbk_hash.h
>> +SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_kv_hash.h
>> +
>> +CC_AVX512VL_SUPPORT=$(shell $(CC) -mavx512vl -dM -E - </dev/null
>> 2>&1 |
>> +\ grep -q __AVX512VL__ && echo 1)
>> +
>> +ifeq ($(CC_AVX512VL_SUPPORT), 1)
>> +SRCS-$(CONFIG_RTE_LIBRTE_HASH) += k32v64_hash_avx512vl.c
>> +CFLAGS_k32v64_hash_avx512vl.o += -mavx512vl
>> +CFLAGS_k32v64_hash.o += -DCC_AVX512VL_SUPPORT endif
>>
>>   include $(RTE_SDK)/mk/rte.lib.mk
>> diff --git a/lib/librte_hash/k32v64_hash.c b/lib/librte_hash/k32v64_hash.c
>> new file mode 100644 index 0000000..24cd63a
>> --- /dev/null
>> +++ b/lib/librte_hash/k32v64_hash.c
>> @@ -0,0 +1,277 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright(c) 2020 Intel Corporation
>> + */
>> +
>> +#include <string.h>
>> +
>> +#include <rte_errno.h>
>> +#include <rte_malloc.h>
>> +#include <rte_memory.h>
>> +
>> +#include "k32v64_hash.h"
>> +
>> +static inline int
>> +k32v64_hash_lookup(struct k32v64_hash_table *table, uint32_t key,
>> +uint32_t hash, uint64_t *value)
>> +{
>> +return __k32v64_hash_lookup(table, key, hash, value,
>> __kv_cmp_keys); }
>> +
>> +static int
>> +k32v64_hash_bulk_lookup(struct rte_kv_hash_table *ht, void *keys_p,
>> +uint32_t *hashes, void *values_p, unsigned int n) {
>> +struct k32v64_hash_table *table = (struct k32v64_hash_table *)ht;
>> +uint32_t *keys = keys_p;
>> +uint64_t *values = values_p;
>> +int ret, cnt = 0;
>> +unsigned int i;
>> +
>> +if (unlikely((table == NULL) || (keys == NULL) || (hashes == NULL) ||
>> +(values == NULL)))
>> +return -EINVAL;
>> +
>> +for (i = 0; i < n; i++) {
>> +ret = k32v64_hash_lookup(table, keys[i], hashes[i],
>> +&values[i]);
> [Wang, Yipeng] You don't need to start a new line for values.


Then line will be too long (81), checkpatch have warning here


>> +if (ret == 0)
>> +cnt++;
>> +}
>> +return cnt;
>> +}
>> +
>> +#ifdef CC_AVX512VL_SUPPORT
> [Wang, Yipeng] Why not use the already provided, e.g. #if defined(RTE_MACHINE_CPUFLAG_SSE2) like rte_hash does?


For AVX512 implementation we are using avx512vl intrinsics which are not 
supported by some compilers. In this case supporting for AVX512F is not 
enough (there is only RTE_MACHINE_CPUFLAG_AVX512F flag).


>> +int
>> +k32v64_hash_bulk_lookup_avx512vl(struct rte_kv_hash_table *ht,
>> +void *keys_p, uint32_t *hashes, void *values_p, unsigned int n);
>> +#endif
>> +
>> +static rte_kv_hash_bulk_lookup_t
>> +get_lookup_bulk_fn(void)
>> +{
>> +#ifdef CC_AVX512VL_SUPPORT
>> +if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512VL))
>> +return k32v64_hash_bulk_lookup_avx512vl; #endif
>> +return k32v64_hash_bulk_lookup;
>> +}
>> +
>> +static int
>> +k32v64_hash_add(struct k32v64_hash_table *table, uint32_t key,
>> +uint32_t hash, uint64_t value, uint64_t *old_value, int *found) {
>> +uint32_t bucket;
>> +int i, idx, ret;
>> +uint8_t msk;
>> +struct k32v64_ext_ent *tmp, *ent, *prev = NULL;
>> +
>> +if (table == NULL)
>> +return -EINVAL;
>> +
>> +bucket = hash & table->bucket_msk;
>> +/* Search key in table. Update value if exists */
>> +for (i = 0; i < K32V64_KEYS_PER_BUCKET; i++) {
>> +if ((key == table->t[bucket].key[i]) &&
>> +(table->t[bucket].key_mask & (1 << i))) {
>> +*old_value = table->t[bucket].val[i];
>> +*found = 1;
>> +__atomic_fetch_add(&table->t[bucket].cnt, 1,
>> +__ATOMIC_ACQUIRE);
>> +table->t[bucket].val[i] = value;
>> +__atomic_fetch_add(&table->t[bucket].cnt, 1,
>> +__ATOMIC_RELEASE);
>> +return 0;
>> +}
>> +}
>> +
>> +if (!SLIST_EMPTY(&table->t[bucket].head)) {
>> +SLIST_FOREACH(ent, &table->t[bucket].head, next) {
>> +if (ent->key == key) {
>> +*old_value = ent->val;
>> +*found = 1;
>> +__atomic_fetch_add(&table->t[bucket].cnt,
>> 1,
>> +__ATOMIC_ACQUIRE);
>> +ent->val = value;
>> +__atomic_fetch_add(&table->t[bucket].cnt,
>> 1,
>> +__ATOMIC_RELEASE);
>> +return 0;
>> +}
>> +}
>> +}
>> +
>> +msk = ~table->t[bucket].key_mask & VALID_KEY_MSK;
>> +if (msk) {
>> +idx = __builtin_ctz(msk);
>> +table->t[bucket].key[idx] = key;
>> +table->t[bucket].val[idx] = value;
>> +__atomic_or_fetch(&table->t[bucket].key_mask, 1 << idx,
>> +__ATOMIC_RELEASE);
>> +table->nb_ent++;
>> +*found = 0;
>> +return 0;
>> +}
>> +
>> +ret = rte_mempool_get(table->ext_ent_pool, (void **)&ent);
>> +if (ret < 0)
>> +return ret;
>> +
>> +SLIST_NEXT(ent, next) = NULL;
>> +ent->key = key;
>> +ent->val = value;
>> +rte_smp_wmb();
>> +SLIST_FOREACH(tmp, &table->t[bucket].head, next)
>> +prev = tmp;
>> +
>> +if (prev == NULL)
>> +SLIST_INSERT_HEAD(&table->t[bucket].head, ent, next);
>> +else
>> +SLIST_INSERT_AFTER(prev, ent, next);
>> +
>> +table->nb_ent++;
>> +table->nb_ext_ent++;
>> +*found = 0;
>> +return 0;
>> +}
>> +
>> +static int
>> +k32v64_hash_delete(struct k32v64_hash_table *table, uint32_t key,
>> +uint32_t hash, uint64_t *old_value)
>> +{
>> +uint32_t bucket;
>> +int i;
>> +struct k32v64_ext_ent *ent;
>> +
>> +if (table == NULL)
>> +return -EINVAL;
>> +
>> +bucket = hash & table->bucket_msk;
>> +
>> +for (i = 0; i < K32V64_KEYS_PER_BUCKET; i++) {
>> +if ((key == table->t[bucket].key[i]) &&
>> +(table->t[bucket].key_mask & (1 << i))) {
>> +*old_value = table->t[bucket].val[i];
>> +ent = SLIST_FIRST(&table->t[bucket].head);
>> +if (ent) {
>> +__atomic_fetch_add(&table->t[bucket].cnt,
>> 1,
>> +__ATOMIC_ACQUIRE);
>> +table->t[bucket].key[i] = ent->key;
>> +table->t[bucket].val[i] = ent->val;
>> +SLIST_REMOVE_HEAD(&table-
>>> t[bucket].head, next);
>> +__atomic_fetch_add(&table->t[bucket].cnt,
>> 1,
>> +__ATOMIC_RELEASE);
>> +table->nb_ext_ent--;
>> +} else
>> +__atomic_and_fetch(&table-
>>> t[bucket].key_mask,
>> +~(1 << i), __ATOMIC_RELEASE);
>> +if (ent)
>> +rte_mempool_put(table->ext_ent_pool,
>> ent);
>> +table->nb_ent--;
>> +return 0;
>> +}
>> +}
>> +
>> +SLIST_FOREACH(ent, &table->t[bucket].head, next)
>> +if (ent->key == key)
>> +break;
>> +
>> +if (ent == NULL)
>> +return -ENOENT;
>> +
>> +*old_value = ent->val;
>> +
>> +__atomic_fetch_add(&table->t[bucket].cnt, 1,
>> __ATOMIC_ACQUIRE);
>> +SLIST_REMOVE(&table->t[bucket].head, ent, k32v64_ext_ent, next);
>> +__atomic_fetch_add(&table->t[bucket].cnt, 1, __ATOMIC_RELEASE);
>> +rte_mempool_put(table->ext_ent_pool, ent);
>> +
> [Wang, Yipeng] I am not sure if delete can be called safely with other lookup threads.
> The item could be recycled to be used by another add while a lookup thread traversing the linked list.
> This is similar to why we need the RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL flag and related API in rte_hash.


We discussed it with Konstantin previously. In this (very rare) case 
lookup thread will traverse across linked list which belongs to 
different bucket, won't find proper key and restart lookup again 
(because bucket cnt was changed). So it will never face with a linked 
list entry with inconsistent next pointer that does not point to some 
other list entry.


>> +table->nb_ext_ent--;
>> +table->nb_ent--;
>> +
>> +return 0;
>> +}
>> +
>> +static int
>> +k32v64_modify(struct rte_kv_hash_table *table, void *key_p, uint32_t
>> hash,
>> +enum rte_kv_modify_op op, void *value_p, int *found) {
>> +struct k32v64_hash_table *ht = (struct k32v64_hash_table *)table;
>> +uint32_t *key = key_p;
>> +uint64_t value;
>> +
>> +if ((ht == NULL) || (key == NULL) || (value_p == NULL) ||
>> +(found == NULL) || (op >=
>> RTE_KV_MODIFY_OP_MAX)) {
>> +return -EINVAL;
>> +}
>> +
>> +value = *(uint64_t *)value_p;
>> +switch (op) {
>> +case RTE_KV_MODIFY_ADD:
>> +return k32v64_hash_add(ht, *key, hash, value, value_p,
>> found);
>> +case RTE_KV_MODIFY_DEL:
>> +return k32v64_hash_delete(ht, *key, hash, value_p);
>> +default:
>> +break;
>> +}
> [Wang, Yipeng] A question would be why put del and add inside another mod wrapper.
> If we don't wrap del and add like this, we could get rid of this branch.


This was done for future extensibility, if we want to add some 
additional control plane features in the future, other than add or 
delete. It is a control plane operation so branch here doesn't make any 
difference. If you think it's unnecessary I'll change it with 
add()/del() pair.


>> +
>> +return -EINVAL;
>> +}
>> +
>> +struct rte_kv_hash_table *
>> +k32v64_hash_create(const struct rte_kv_hash_params *params) {
>> +char hash_name[RTE_KV_HASH_NAMESIZE];
>> +struct k32v64_hash_table *ht = NULL;
>> +uint32_t mem_size, nb_buckets, max_ent;
>> +int ret;
>> +struct rte_mempool *mp;
>> +
>> +if ((params == NULL) || (params->name == NULL) ||
>> +(params->entries == 0)) {
> [Wang, Yipeng] Should we check if entry count larger than keys_per_bucket as well?


No, because in general we are not limited to the number of keys in a 
bucket. keys_per_bucket reflects number of keys in open addressing part 
of a bucket.


>> +rte_errno = EINVAL;
>> +return NULL;
>> +}
>> +
>> +ret = snprintf(hash_name, sizeof(hash_name), "KV_%s", params-
>>> name);
>> +if (ret < 0 || ret >= RTE_KV_HASH_NAMESIZE) {
>> +rte_errno = ENAMETOOLONG;
>> +return NULL;
>> +}
>> +
>> +max_ent = rte_align32pow2(params->entries);
>> +nb_buckets = max_ent / K32V64_KEYS_PER_BUCKET;
> [Wang, Yipeng] Macro to check if keys_per_bucket need to be power of 2


In general it doesn't have to be a power of 2.


>> +mem_size = sizeof(struct k32v64_hash_table) +
>> +sizeof(struct k32v64_hash_bucket) * nb_buckets;
>> +
>> +mp = rte_mempool_create(hash_name, max_ent,
>> +sizeof(struct k32v64_ext_ent), 0, 0, NULL, NULL, NULL, NULL,
>> +params->socket_id, 0);
>> +
>> +if (mp == NULL)
>> +return NULL;
>> +
>> +ht = rte_zmalloc_socket(hash_name, mem_size,
>> +RTE_CACHE_LINE_SIZE, params->socket_id);
>> +if (ht == NULL) {
>> +rte_mempool_free(mp);
>> +return NULL;
>> +}
>> +
>> +memcpy(ht->pub.name, hash_name, sizeof(ht->pub.name));
>> +ht->max_ent = max_ent;
>> +ht->bucket_msk = nb_buckets - 1;
>> +ht->ext_ent_pool = mp;
>> +ht->pub.lookup = get_lookup_bulk_fn();
> [Wang, Yipeng] Inside the function, we also need to check the CPUID during runtime to decide if AVX can be used, not only compile time.
> You could refer to example from rte_hash: ... if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2)) ...


The same is here. In get_lookup_bulk_fn():
...
if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512VL))
...


>
>> +ht->pub.modify = k32v64_modify;
>> +
>> +return (struct rte_kv_hash_table *)ht; }
>> +
>> +void
>> +k32v64_hash_free(struct rte_kv_hash_table *ht) {
>> +if (ht == NULL)
>> +return;
>> +
>> +rte_mempool_free(((struct k32v64_hash_table *)ht)-
>>> ext_ent_pool);
>> +rte_free(ht);
>> +}
>> diff --git a/lib/librte_hash/k32v64_hash.h b/lib/librte_hash/k32v64_hash.h
>> new file mode 100644 index 0000000..10061a5
>> --- /dev/null
>> +++ b/lib/librte_hash/k32v64_hash.h
>> @@ -0,0 +1,98 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright(c) 2020 Intel Corporation
>> + */
>> +
>> +#include <rte_kv_hash.h>
>> +
>> +#define K32V64_KEYS_PER_BUCKET4
>> +#define K32V64_WRITE_IN_PROGRESS1
>> +#define VALID_KEY_MSK           ((1 << K32V64_KEYS_PER_BUCKET) - 1)
>> +
>> +struct k32v64_ext_ent {
>> +SLIST_ENTRY(k32v64_ext_ent) next;
>> +uint32_tkey;
>> +uint64_tval;
>> +};
>> +
>> +struct k32v64_hash_bucket {
>> +uint32_tkey[K32V64_KEYS_PER_BUCKET];
>> +uint64_tval[K32V64_KEYS_PER_BUCKET];
>> +uint8_tkey_mask;
>> +uint32_tcnt;
>> +SLIST_HEAD(k32v64_list_head, k32v64_ext_ent) head; }
>> +__rte_cache_aligned;
>> +
>> +struct k32v64_hash_table {
>> +struct rte_kv_hash_table pub;/**< Public part */
> [Wang, Yipeng] Do we need to have the pub filed? Could we just init the public part in the public create function?


As for me it is better to init it inside algorithm specific part. Other 
way we need to expose our internal algo-specific lookup_bulk and modify 
functions to kv_hash layer.


>> +uint32_tnb_ent;/**< Number of entities in
>> the table*/
>> +uint32_tnb_ext_ent;/**< Number of extended entities */
>> +uint32_tmax_ent;/**< Maximum number of entities */
>> +uint32_tbucket_msk;
>> +struct rte_mempool*ext_ent_pool;
>> +__extension__ struct k32v64_hash_buckett[0];
>> +};
>> +
>> +typedef int (*k32v64_cmp_fn_t)
>> +(struct k32v64_hash_bucket *bucket, uint32_t key, uint64_t *val);
>> +
>> +static inline int
>> +__kv_cmp_keys(struct k32v64_hash_bucket *bucket, uint32_t key,
>> +uint64_t *val)
>> +{
>> +int i;
>> +
>> +for (i = 0; i < K32V64_KEYS_PER_BUCKET; i++) {
>> +if ((key == bucket->key[i]) &&
>> +(bucket->key_mask & (1 << i))) {
>> +*val = bucket->val[i];
>> +return 1;
>> +}
>> +}
>> +
>> +return 0;
>> +}
>> +
>> +static inline int
>> +__k32v64_hash_lookup(struct k32v64_hash_table *table, uint32_t key,
>> +uint32_t hash, uint64_t *value, k32v64_cmp_fn_t cmp_f) {
>> +uint64_tval = 0;
>> +struct k32v64_ext_ent *ent;
>> +uint32_tcnt;
>> +int found = 0;
>> +uint32_t bucket = hash & table->bucket_msk;
>> +
>> +do {
>> +
>> +do {
>> +cnt = __atomic_load_n(&table->t[bucket].cnt,
>> +__ATOMIC_ACQUIRE);
>> +} while (unlikely(cnt & K32V64_WRITE_IN_PROGRESS));
>> +
>> +found = cmp_f(&table->t[bucket], key, &val);
>> +if (unlikely((found == 0) &&
>> +(!SLIST_EMPTY(&table->t[bucket].head)))) {
>> +SLIST_FOREACH(ent, &table->t[bucket].head, next) {
>> +if (ent->key == key) {
>> +val = ent->val;
>> +found = 1;
>> +break;
>> +}
>> +}
>> +}
>> +__atomic_thread_fence(__ATOMIC_RELEASE);
>> +} while (unlikely(cnt != __atomic_load_n(&table->t[bucket].cnt,
>> + __ATOMIC_RELAXED)));
>> +
>> +if (found == 1) {
>> +*value = val;
>> +return 0;
>> +} else
>> +return -ENOENT;
>> +}
>> +
>> +struct rte_kv_hash_table *
>> +k32v64_hash_create(const struct rte_kv_hash_params *params);
>> +
>> +void
>> +k32v64_hash_free(struct rte_kv_hash_table *ht);
>> diff --git a/lib/librte_hash/k32v64_hash_avx512vl.c
>> b/lib/librte_hash/k32v64_hash_avx512vl.c
>> new file mode 100644
>> index 0000000..75cede5
>> --- /dev/null
>> +++ b/lib/librte_hash/k32v64_hash_avx512vl.c
>> @@ -0,0 +1,59 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright(c) 2020 Intel Corporation
>> + */
>> +
>> +#include "k32v64_hash.h"
>> +
>> +int
>> +k32v64_hash_bulk_lookup_avx512vl(struct rte_kv_hash_table *ht, void
>> *keys_p,
>> +uint32_t *hashes, void *values_p, unsigned int n);
>> +
>> +static inline int
>> +k32v64_cmp_keys_avx512vl(struct k32v64_hash_bucket *bucket, uint32_t
>> key,
>> +uint64_t *val)
>> +{
>> +__m128i keys, srch_key;
>> +__mmask8 msk;
>> +
>> +keys = _mm_load_si128((void *)bucket);
>> +srch_key = _mm_set1_epi32(key);
>> +
>> +msk = _mm_mask_cmpeq_epi32_mask(bucket->key_mask, keys,
>> srch_key);
>> +if (msk) {
>> +*val = bucket->val[__builtin_ctz(msk)];
>> +return 1;
>> +}
>> +
>> +return 0;
>> +}
>> +
>> +static inline int
>> +k32v64_hash_lookup_avx512vl(struct k32v64_hash_table *table, uint32_t
>> key,
>> +uint32_t hash, uint64_t *value)
>> +{
>> +return __k32v64_hash_lookup(table, key, hash, value,
>> +k32v64_cmp_keys_avx512vl);
>> +}
>> +
>> +int
>> +k32v64_hash_bulk_lookup_avx512vl(struct rte_kv_hash_table *ht, void
>> *keys_p,
>> +uint32_t *hashes, void *values_p, unsigned int n) {
>> +struct k32v64_hash_table *table = (struct k32v64_hash_table *)ht;
>> +uint32_t *keys = keys_p;
>> +uint64_t *values = values_p;
>> +int ret, cnt = 0;
>> +unsigned int i;
>> +
>> +if (unlikely((table == NULL) || (keys == NULL) || (hashes == NULL) ||
>> +(values == NULL)))
>> +return -EINVAL;
>> +
>> +for (i = 0; i < n; i++) {
>> +ret = k32v64_hash_lookup_avx512vl(table, keys[i], hashes[i],
>> +&values[i]);
>> +if (ret == 0)
>> +cnt++;
>> +}
>> +return cnt;
>> +}
>> diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build index
>> 6ab46ae..0d014ea 100644
>> --- a/lib/librte_hash/meson.build
>> +++ b/lib/librte_hash/meson.build
>> @@ -3,10 +3,23 @@
>>
>>   headers = files('rte_crc_arm64.h',
>>   'rte_fbk_hash.h',
>> +'rte_kv_hash.h',
>>   'rte_hash_crc.h',
>>   'rte_hash.h',
>>   'rte_jhash.h',
>>   'rte_thash.h')
>>
>> -sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c') -deps += ['ring']
>> +sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c', 'rte_kv_hash.c',
>> +'k32v64_hash.c') deps += ['ring', 'mempool']
>> +
>> +if dpdk_conf.has('RTE_ARCH_X86')
>> +if cc.has_argument('-mavx512vl')
>> +avx512_tmplib = static_library('avx512_tmp',
>> +                                'k32v64_hash_avx512vl.c',
>> +                                dependencies: static_rte_mempool,
>> +                                c_args: cflags + ['-mavx512vl'])
>> +                objs += avx512_tmplib.extract_objects('k32v64_hash_avx512vl.c')
>> +                cflags += '-DCC_AVX512VL_SUPPORT'
>> +
>> +endif
>> +endif
>> diff --git a/lib/librte_hash/rte_hash_version.map
>> b/lib/librte_hash/rte_hash_version.map
>> index c2a9094..614e0a5 100644
>> --- a/lib/librte_hash/rte_hash_version.map
>> +++ b/lib/librte_hash/rte_hash_version.map
>> @@ -36,5 +36,9 @@ EXPERIMENTAL {
>>   rte_hash_lookup_with_hash_bulk;
>>   rte_hash_lookup_with_hash_bulk_data;
>>   rte_hash_max_key_id;
>> -
>> +rte_kv_hash_create;
>> +rte_kv_hash_find_existing;
>> +rte_kv_hash_free;
>> +rte_kv_hash_add;
>> +rte_kv_hash_delete;
>>   };
>> diff --git a/lib/librte_hash/rte_kv_hash.c b/lib/librte_hash/rte_kv_hash.c
>> new file mode 100644 index 0000000..03df8db
>> --- /dev/null
>> +++ b/lib/librte_hash/rte_kv_hash.c
>> @@ -0,0 +1,184 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright(c) 2020 Intel Corporation
>> + */
>> +
>> +#include <string.h>
>> +
>> +#include <rte_eal_memconfig.h>
>> +#include <rte_errno.h>
>> +#include <rte_malloc.h>
>> +#include <rte_memory.h>
>> +#include <rte_tailq.h>
>> +
>> +#include <rte_kv_hash.h>
> [Wang, Yipeng] I think for this header we should use quotes ""


rte_kv_hash.h is a public header file, we we should do that?


>> +#include "k32v64_hash.h"
>> +
>> +TAILQ_HEAD(rte_kv_hash_list, rte_tailq_entry);
>> +
>> +static struct rte_tailq_elem rte_kv_hash_tailq = {
>> +.name = "RTE_KV_HASH",
>> +};
>> +
>> +EAL_REGISTER_TAILQ(rte_kv_hash_tailq);
>> +
>> +int
>> +rte_kv_hash_add(struct rte_kv_hash_table *table, void *key,
>> +uint32_t hash, void *value, int *found) {
>> +if (table == NULL)
> [Wang, Yipeng] To be more consistent,
>   I think we should either also check key/value/found here, or just not checking at all and leave it to the next functions.


Agree, I will change it next version


>> +return -EINVAL;
>> +
>> +return table->modify(table, key, hash, RTE_KV_MODIFY_ADD,
>> +value, found);
>> +}
>> +
>> +int
>> +rte_kv_hash_delete(struct rte_kv_hash_table *table, void *key,
>> +uint32_t hash, void *value)
>> +{
>> +int found;
>> +
>> +if (table == NULL)
>> +return -EINVAL;
>> +
>> +return table->modify(table, key, hash, RTE_KV_MODIFY_DEL,
>> +value, &found);
>> +}
>> +
>> +struct rte_kv_hash_table *
>> +rte_kv_hash_find_existing(const char *name) {
>> +struct rte_kv_hash_table *h = NULL;
>> +struct rte_tailq_entry *te;
>> +struct rte_kv_hash_list *kv_hash_list;
>> +
>> +kv_hash_list = RTE_TAILQ_CAST(rte_kv_hash_tailq.head,
>> +rte_kv_hash_list);
>> +
>> +rte_mcfg_tailq_read_lock();
>> +TAILQ_FOREACH(te, kv_hash_list, next) {
>> +h = (struct rte_kv_hash_table *) te->data;
>> +if (strncmp(name, h->name, RTE_KV_HASH_NAMESIZE) == 0)
>> +break;
>> +}
>> +rte_mcfg_tailq_read_unlock();
>> +if (te == NULL) {
>> +rte_errno = ENOENT;
>> +return NULL;
>> +}
>> +return h;
>> +}
>> +
>> +struct rte_kv_hash_table *
>> +rte_kv_hash_create(const struct rte_kv_hash_params *params) {
>> +char hash_name[RTE_KV_HASH_NAMESIZE];
>> +struct rte_kv_hash_table *ht, *tmp_ht = NULL;
>> +struct rte_tailq_entry *te;
>> +struct rte_kv_hash_list *kv_hash_list;
>> +int ret;
>> +
>> +if ((params == NULL) || (params->name == NULL) ||
>> +(params->entries == 0) ||
>> +(params->type >= RTE_KV_HASH_MAX)) {
>> +rte_errno = EINVAL;
>> +return NULL;
>> +}
>> +
>> +kv_hash_list = RTE_TAILQ_CAST(rte_kv_hash_tailq.head,
>> +rte_kv_hash_list);
>> +
>> +ret = snprintf(hash_name, sizeof(hash_name), "KV_%s", params-
>>> name);
>> +if (ret < 0 || ret >= RTE_KV_HASH_NAMESIZE) {
>> +rte_errno = ENAMETOOLONG;
>> +return NULL;
>> +}
>> +
>> +switch (params->type) {
>> +case RTE_KV_HASH_K32V64:
>> +ht = k32v64_hash_create(params);
>> +break;
>> +default:
>> +rte_errno = EINVAL;
>> +return NULL;
>> +}
>> +if (ht == NULL)
>> +return ht;
>> +
>> +rte_mcfg_tailq_write_lock();
>> +TAILQ_FOREACH(te, kv_hash_list, next) {
>> +tmp_ht = (struct rte_kv_hash_table *) te->data;
>> +if (strncmp(params->name, tmp_ht->name,
>> +RTE_KV_HASH_NAMESIZE) == 0)
>> +break;
>> +}
>> +if (te != NULL) {
>> +rte_errno = EEXIST;
>> +goto exit;
>> +}
>> +
>> +te = rte_zmalloc("KV_HASH_TAILQ_ENTRY", sizeof(*te), 0);
>> +if (te == NULL) {
>> +RTE_LOG(ERR, HASH, "Failed to allocate tailq entry\n");
>> +goto exit;
>> +}
>> +
>> +ht->type = params->type;
>> +te->data = (void *)ht;
>> +TAILQ_INSERT_TAIL(kv_hash_list, te, next);
>> +
>> +rte_mcfg_tailq_write_unlock();
>> +
>> +return ht;
>> +
>> +exit:
>> +rte_mcfg_tailq_write_unlock();
>> +switch (params->type) {
>> +case RTE_KV_HASH_K32V64:
>> +k32v64_hash_free(ht);
>> +break;
>> +default:
>> +break;
>> +}
>> +return NULL;
>> +}
>> +
>> +void
>> +rte_kv_hash_free(struct rte_kv_hash_table *ht) {
>> +struct rte_tailq_entry *te;
>> +struct rte_kv_hash_list *kv_hash_list;
>> +
>> +if (ht == NULL)
>> +return;
>> +
>> +kv_hash_list = RTE_TAILQ_CAST(rte_kv_hash_tailq.head,
>> +rte_kv_hash_list);
>> +
>> +rte_mcfg_tailq_write_lock();
>> +
>> +/* find out tailq entry */
>> +TAILQ_FOREACH(te, kv_hash_list, next) {
>> +if (te->data == (void *) ht)
>> +break;
>> +}
>> +
>> +
>> +if (te == NULL) {
>> +rte_mcfg_tailq_write_unlock();
>> +return;
>> +}
>> +
>> +TAILQ_REMOVE(kv_hash_list, te, next);
>> +
>> +rte_mcfg_tailq_write_unlock();
>> +
>> +switch (ht->type) {
>> +case RTE_KV_HASH_K32V64:
>> +k32v64_hash_free(ht);
>> +break;
>> +default:
>> +break;
>> +}
>> +rte_free(te);
>> +}
>> diff --git a/lib/librte_hash/rte_kv_hash.h b/lib/librte_hash/rte_kv_hash.h
>> new file mode 100644 index 0000000..c0375d1
>> --- /dev/null
>> +++ b/lib/librte_hash/rte_kv_hash.h
>> @@ -0,0 +1,169 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright(c) 2020 Intel Corporation
>> + */
>> +
>> +#ifndef _RTE_KV_HASH_H_
>> +#define _RTE_KV_HASH_H_
>> +
>> +#ifdef __cplusplus
>> +extern "C" {
>> +#endif
>> +
>> +#include <rte_compat.h>
>> +#include <rte_atomic.h>
>> +#include <rte_mempool.h>
>> +
>> +#define RTE_KV_HASH_NAMESIZE32
>> +
>> +enum rte_kv_hash_type {
>> +RTE_KV_HASH_K32V64,
>> +RTE_KV_HASH_MAX
>> +};
>> +
>> +enum rte_kv_modify_op {
>> +RTE_KV_MODIFY_ADD,
>> +RTE_KV_MODIFY_DEL,
>> +RTE_KV_MODIFY_OP_MAX
>> +};
> [Wang, Yipeng] Again, any particular reason that you combine add and del into an additional struct mod?
>> +
>> +struct rte_kv_hash_params {
>> +const char *name;
>> +uint32_t entries;
>> +int socket_id;
>> +enum rte_kv_hash_type type;
>> +};
>> +
>> +struct rte_kv_hash_table;
>> +
>> +typedef int (*rte_kv_hash_bulk_lookup_t) (struct rte_kv_hash_table
>> +*table, void *keys, uint32_t *hashes,
>> +void *values, unsigned int n);
>> +
>> +typedef int (*rte_kv_hash_modify_t)
>> +(struct rte_kv_hash_table *table, void *key, uint32_t hash,
>> +enum rte_kv_modify_op op, void *value, int *found);
>> +
>> +struct rte_kv_hash_table {
>> +char name[RTE_KV_HASH_NAMESIZE];/**< Name of the
>> hash. */
>> +rte_kv_hash_bulk_lookup_tlookup;
>> +rte_kv_hash_modify_tmodify;
>> +enum rte_kv_hash_typetype;
>> +};
>> +
>> +/**
>> + * Lookup bulk of keys.
>> + * This function is multi-thread safe with regarding to other lookup threads.
>> + *
>> + * @param table
>> + *   Hash table to add the key to.
>> + * @param keys
>> + *   Pointer to array of keys
>> + * @param hashes
>> + *   Pointer to array of hash values associated with keys.
>> + * @param values
>> + *   Pointer to array of value corresponded to keys
>> + *   If the key was not found the corresponding value remains intact.
>> + * @param n
>> + *   Number of keys to lookup in batch.
>> + * @return
>> + *   -EINVAL if there's an error, otherwise number of successful lookups.
>> + */
> [Wang, Yipeng] experimental tag


Yes, will add


>> +static inline int
>> +rte_kv_hash_bulk_lookup(struct rte_kv_hash_table *table,
>> +void *keys, uint32_t *hashes, void *values, unsigned int n) {
>> +return table->lookup(table, keys, hashes, values, n); }
>> +
>> +/**
>> + * Add a key to an existing hash table with hash value.
>> + * This operation is not multi-thread safe regarding to add/delete
>> +functions
>> + * and should only be called from one thread.
>> + * However it is safe to call it along with lookup.
>> + *
>> + * @param table
>> + *   Hash table to add the key to.
>> + * @param key
>> + *   Key to add to the hash table.
>> + * @param value
>> + *   Value to associate with key.
>> + * @param hash
>> + *   Hash value associated with key.
>> + * @found
>> + *   0 if no previously added key was found
>> + *   1 previously added key was found, old value associated with the key
>> + *   was written to *value
>> + * @return
>> + *   0 if ok, or negative value on error.
>> + */
>> +__rte_experimental
>> +int
>> +rte_kv_hash_add(struct rte_kv_hash_table *table, void *key,
>> +uint32_t hash, void *value, int *found);
>> +
>> +/**
>> + * Remove a key with a given hash value from an existing hash table.
>> + * This operation is not multi-thread safe regarding to add/delete
>> +functions
>> + * and should only be called from one thread.
>> + * However it is safe to call it along with lookup.
>> + *
>> + * @param table
>> + *   Hash table to remove the key from.
>> + * @param key
>> + *   Key to remove from the hash table.
>> + * @param hash
>> + *   hash value associated with key.
>> + * @param value
>> + *   pointer to memory where the old value will be written to on success
>> + * @return
>> + *   0 if ok, or negative value on error.
>> + */
>> +__rte_experimental
>> +int
>> +rte_kv_hash_delete(struct rte_kv_hash_table *table, void *key,
>> +uint32_t hash, void *value);
>> +
>> +/**
>> + * Performs a lookup for an existing hash table, and returns a pointer
>> +to
>> + * the table if found.
>> + *
>> + * @param name
>> + *   Name of the hash table to find
>> + *
>> + * @return
>> + *   pointer to hash table structure or NULL on error with rte_errno
>> + *   set appropriately.
>> + */
>> +__rte_experimental
>> +struct rte_kv_hash_table *
>> +rte_kv_hash_find_existing(const char *name);
>> +
>> +/**
>> + * Create a new hash table for use with four byte keys.
> [Wang, Yipeng] inaccurate comment, not four byte keys


Yes, artifact from previous implementation


>> + *
>> + * @param params
>> + *   Parameters used in creation of hash table.
>> + *
>> + * @return
>> + *   Pointer to hash table structure that is used in future hash table
>> + *   operations, or NULL on error with rte_errno set appropriately.
>> + */
>> +__rte_experimental
>> +struct rte_kv_hash_table *
>> +rte_kv_hash_create(const struct rte_kv_hash_params *params);
>> +
>> +/**
>> + * Free all memory used by a hash table.
>> + *
>> + * @param table
>> + *   Hash table to deallocate.
>> + */
>> +__rte_experimental
>> +void
>> +rte_kv_hash_free(struct rte_kv_hash_table *table);
>> +
>> +#ifdef __cplusplus
>> +}
>> +#endif
>> +
>> +#endif /* _RTE_KV_HASH_H_ */
>> --
>> 2.7.4

Patch
diff mbox series

diff --git a/lib/Makefile b/lib/Makefile
index 9d24609..42769e9 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -48,7 +48,7 @@  DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
 DEPDIRS-librte_vhost := librte_eal librte_mempool librte_mbuf librte_ethdev \
 			librte_net librte_hash librte_cryptodev
 DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash
-DEPDIRS-librte_hash := librte_eal librte_ring
+DEPDIRS-librte_hash := librte_eal librte_ring librte_mempool
 DIRS-$(CONFIG_RTE_LIBRTE_EFD) += librte_efd
 DEPDIRS-librte_efd := librte_eal librte_ring librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_RIB) += librte_rib
diff --git a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile
index ec9f864..a0cdee9 100644
--- a/lib/librte_hash/Makefile
+++ b/lib/librte_hash/Makefile
@@ -8,13 +8,15 @@  LIB = librte_hash.a
 
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
-LDLIBS += -lrte_eal -lrte_ring
+LDLIBS += -lrte_eal -lrte_ring -lrte_mempool
 
 EXPORT_MAP := rte_hash_version.map
 
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) := rte_cuckoo_hash.c
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_fbk_hash.c
+SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_kv_hash.c
+SRCS-$(CONFIG_RTE_LIBRTE_HASH) += k32v64_hash.c
 
 # install this header file
 SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include := rte_hash.h
@@ -27,5 +29,15 @@  endif
 SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_jhash.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_thash.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_fbk_hash.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_kv_hash.h
+
+CC_AVX512VL_SUPPORT=$(shell $(CC) -mavx512vl -dM -E - </dev/null 2>&1 | \
+grep -q __AVX512VL__ && echo 1)
+
+ifeq ($(CC_AVX512VL_SUPPORT), 1)
+	SRCS-$(CONFIG_RTE_LIBRTE_HASH) += k32v64_hash_avx512vl.c
+	CFLAGS_k32v64_hash_avx512vl.o += -mavx512vl
+	CFLAGS_k32v64_hash.o += -DCC_AVX512VL_SUPPORT
+endif
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_hash/k32v64_hash.c b/lib/librte_hash/k32v64_hash.c
new file mode 100644
index 0000000..24cd63a
--- /dev/null
+++ b/lib/librte_hash/k32v64_hash.c
@@ -0,0 +1,277 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <string.h>
+
+#include <rte_errno.h>
+#include <rte_malloc.h>
+#include <rte_memory.h>
+
+#include "k32v64_hash.h"
+
+static inline int
+k32v64_hash_lookup(struct k32v64_hash_table *table, uint32_t key,
+	uint32_t hash, uint64_t *value)
+{
+	return __k32v64_hash_lookup(table, key, hash, value, __kv_cmp_keys);
+}
+
+static int
+k32v64_hash_bulk_lookup(struct rte_kv_hash_table *ht, void *keys_p,
+	uint32_t *hashes, void *values_p, unsigned int n)
+{
+	struct k32v64_hash_table *table = (struct k32v64_hash_table *)ht;
+	uint32_t *keys = keys_p;
+	uint64_t *values = values_p;
+	int ret, cnt = 0;
+	unsigned int i;
+
+	if (unlikely((table == NULL) || (keys == NULL) || (hashes == NULL) ||
+			(values == NULL)))
+		return -EINVAL;
+
+	for (i = 0; i < n; i++) {
+		ret = k32v64_hash_lookup(table, keys[i], hashes[i],
+			&values[i]);
+		if (ret == 0)
+			cnt++;
+	}
+	return cnt;
+}
+
+#ifdef CC_AVX512VL_SUPPORT
+int
+k32v64_hash_bulk_lookup_avx512vl(struct rte_kv_hash_table *ht,
+	void *keys_p, uint32_t *hashes, void *values_p, unsigned int n);
+#endif
+
+static rte_kv_hash_bulk_lookup_t
+get_lookup_bulk_fn(void)
+{
+#ifdef CC_AVX512VL_SUPPORT
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512VL))
+		return k32v64_hash_bulk_lookup_avx512vl;
+#endif
+	return k32v64_hash_bulk_lookup;
+}
+
+static int
+k32v64_hash_add(struct k32v64_hash_table *table, uint32_t key,
+	uint32_t hash, uint64_t value, uint64_t *old_value, int *found)
+{
+	uint32_t bucket;
+	int i, idx, ret;
+	uint8_t msk;
+	struct k32v64_ext_ent *tmp, *ent, *prev = NULL;
+
+	if (table == NULL)
+		return -EINVAL;
+
+	bucket = hash & table->bucket_msk;
+	/* Search key in table. Update value if exists */
+	for (i = 0; i < K32V64_KEYS_PER_BUCKET; i++) {
+		if ((key == table->t[bucket].key[i]) &&
+				(table->t[bucket].key_mask & (1 << i))) {
+			*old_value = table->t[bucket].val[i];
+			*found = 1;
+			__atomic_fetch_add(&table->t[bucket].cnt, 1,
+				__ATOMIC_ACQUIRE);
+			table->t[bucket].val[i] = value;
+			__atomic_fetch_add(&table->t[bucket].cnt, 1,
+				__ATOMIC_RELEASE);
+			return 0;
+		}
+	}
+
+	if (!SLIST_EMPTY(&table->t[bucket].head)) {
+		SLIST_FOREACH(ent, &table->t[bucket].head, next) {
+			if (ent->key == key) {
+				*old_value = ent->val;
+				*found = 1;
+				__atomic_fetch_add(&table->t[bucket].cnt, 1,
+					__ATOMIC_ACQUIRE);
+				ent->val = value;
+				__atomic_fetch_add(&table->t[bucket].cnt, 1,
+					__ATOMIC_RELEASE);
+				return 0;
+			}
+		}
+	}
+
+	msk = ~table->t[bucket].key_mask & VALID_KEY_MSK;
+	if (msk) {
+		idx = __builtin_ctz(msk);
+		table->t[bucket].key[idx] = key;
+		table->t[bucket].val[idx] = value;
+		__atomic_or_fetch(&table->t[bucket].key_mask, 1 << idx,
+			__ATOMIC_RELEASE);
+		table->nb_ent++;
+		*found = 0;
+		return 0;
+	}
+
+	ret = rte_mempool_get(table->ext_ent_pool, (void **)&ent);
+	if (ret < 0)
+		return ret;
+
+	SLIST_NEXT(ent, next) = NULL;
+	ent->key = key;
+	ent->val = value;
+	rte_smp_wmb();
+	SLIST_FOREACH(tmp, &table->t[bucket].head, next)
+		prev = tmp;
+
+	if (prev == NULL)
+		SLIST_INSERT_HEAD(&table->t[bucket].head, ent, next);
+	else
+		SLIST_INSERT_AFTER(prev, ent, next);
+
+	table->nb_ent++;
+	table->nb_ext_ent++;
+	*found = 0;
+	return 0;
+}
+
+static int
+k32v64_hash_delete(struct k32v64_hash_table *table, uint32_t key,
+	uint32_t hash, uint64_t *old_value)
+{
+	uint32_t bucket;
+	int i;
+	struct k32v64_ext_ent *ent;
+
+	if (table == NULL)
+		return -EINVAL;
+
+	bucket = hash & table->bucket_msk;
+
+	for (i = 0; i < K32V64_KEYS_PER_BUCKET; i++) {
+		if ((key == table->t[bucket].key[i]) &&
+				(table->t[bucket].key_mask & (1 << i))) {
+			*old_value = table->t[bucket].val[i];
+			ent = SLIST_FIRST(&table->t[bucket].head);
+			if (ent) {
+				__atomic_fetch_add(&table->t[bucket].cnt, 1,
+					__ATOMIC_ACQUIRE);
+				table->t[bucket].key[i] = ent->key;
+				table->t[bucket].val[i] = ent->val;
+				SLIST_REMOVE_HEAD(&table->t[bucket].head, next);
+				__atomic_fetch_add(&table->t[bucket].cnt, 1,
+					__ATOMIC_RELEASE);
+				table->nb_ext_ent--;
+			} else
+				__atomic_and_fetch(&table->t[bucket].key_mask,
+					~(1 << i), __ATOMIC_RELEASE);
+			if (ent)
+				rte_mempool_put(table->ext_ent_pool, ent);
+			table->nb_ent--;
+			return 0;
+		}
+	}
+
+	SLIST_FOREACH(ent, &table->t[bucket].head, next)
+		if (ent->key == key)
+			break;
+
+	if (ent == NULL)
+		return -ENOENT;
+
+	*old_value = ent->val;
+
+	__atomic_fetch_add(&table->t[bucket].cnt, 1, __ATOMIC_ACQUIRE);
+	SLIST_REMOVE(&table->t[bucket].head, ent, k32v64_ext_ent, next);
+	__atomic_fetch_add(&table->t[bucket].cnt, 1, __ATOMIC_RELEASE);
+	rte_mempool_put(table->ext_ent_pool, ent);
+
+	table->nb_ext_ent--;
+	table->nb_ent--;
+
+	return 0;
+}
+
+static int
+k32v64_modify(struct rte_kv_hash_table *table, void *key_p, uint32_t hash,
+	enum rte_kv_modify_op op, void *value_p, int *found)
+{
+	struct k32v64_hash_table *ht = (struct k32v64_hash_table *)table;
+	uint32_t *key = key_p;
+	uint64_t value;
+
+	if ((ht == NULL) || (key == NULL) || (value_p == NULL) ||
+			(found == NULL) || (op >= RTE_KV_MODIFY_OP_MAX)) {
+		return -EINVAL;
+	}
+
+	value = *(uint64_t *)value_p;
+	switch (op) {
+	case RTE_KV_MODIFY_ADD:
+		return k32v64_hash_add(ht, *key, hash, value, value_p, found);
+	case RTE_KV_MODIFY_DEL:
+		return k32v64_hash_delete(ht, *key, hash, value_p);
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+
+struct rte_kv_hash_table *
+k32v64_hash_create(const struct rte_kv_hash_params *params)
+{
+	char hash_name[RTE_KV_HASH_NAMESIZE];
+	struct k32v64_hash_table *ht = NULL;
+	uint32_t mem_size, nb_buckets, max_ent;
+	int ret;
+	struct rte_mempool *mp;
+
+	if ((params == NULL) || (params->name == NULL) ||
+			(params->entries == 0)) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	ret = snprintf(hash_name, sizeof(hash_name), "KV_%s", params->name);
+	if (ret < 0 || ret >= RTE_KV_HASH_NAMESIZE) {
+		rte_errno = ENAMETOOLONG;
+		return NULL;
+	}
+
+	max_ent = rte_align32pow2(params->entries);
+	nb_buckets = max_ent / K32V64_KEYS_PER_BUCKET;
+	mem_size = sizeof(struct k32v64_hash_table) +
+		sizeof(struct k32v64_hash_bucket) * nb_buckets;
+
+	mp = rte_mempool_create(hash_name, max_ent,
+		sizeof(struct k32v64_ext_ent), 0, 0, NULL, NULL, NULL, NULL,
+		params->socket_id, 0);
+
+	if (mp == NULL)
+		return NULL;
+
+	ht = rte_zmalloc_socket(hash_name, mem_size,
+		RTE_CACHE_LINE_SIZE, params->socket_id);
+	if (ht == NULL) {
+		rte_mempool_free(mp);
+		return NULL;
+	}
+
+	memcpy(ht->pub.name, hash_name, sizeof(ht->pub.name));
+	ht->max_ent = max_ent;
+	ht->bucket_msk = nb_buckets - 1;
+	ht->ext_ent_pool = mp;
+	ht->pub.lookup = get_lookup_bulk_fn();
+	ht->pub.modify = k32v64_modify;
+
+	return (struct rte_kv_hash_table *)ht;
+}
+
+void
+k32v64_hash_free(struct rte_kv_hash_table *ht)
+{
+	if (ht == NULL)
+		return;
+
+	rte_mempool_free(((struct k32v64_hash_table *)ht)->ext_ent_pool);
+	rte_free(ht);
+}
diff --git a/lib/librte_hash/k32v64_hash.h b/lib/librte_hash/k32v64_hash.h
new file mode 100644
index 0000000..10061a5
--- /dev/null
+++ b/lib/librte_hash/k32v64_hash.h
@@ -0,0 +1,98 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <rte_kv_hash.h>
+
+#define K32V64_KEYS_PER_BUCKET		4
+#define K32V64_WRITE_IN_PROGRESS	1
+#define VALID_KEY_MSK           ((1 << K32V64_KEYS_PER_BUCKET) - 1)
+
+struct k32v64_ext_ent {
+	SLIST_ENTRY(k32v64_ext_ent) next;
+	uint32_t	key;
+	uint64_t	val;
+};
+
+struct k32v64_hash_bucket {
+	uint32_t	key[K32V64_KEYS_PER_BUCKET];
+	uint64_t	val[K32V64_KEYS_PER_BUCKET];
+	uint8_t		key_mask;
+	uint32_t	cnt;
+	SLIST_HEAD(k32v64_list_head, k32v64_ext_ent) head;
+} __rte_cache_aligned;
+
+struct k32v64_hash_table {
+	struct rte_kv_hash_table pub;	/**< Public part */
+	uint32_t	nb_ent;		/**< Number of entities in the table*/
+	uint32_t	nb_ext_ent;	/**< Number of extended entities */
+	uint32_t	max_ent;	/**< Maximum number of entities */
+	uint32_t	bucket_msk;
+	struct rte_mempool	*ext_ent_pool;
+	__extension__ struct k32v64_hash_bucket	t[0];
+};
+
+typedef int (*k32v64_cmp_fn_t)
+(struct k32v64_hash_bucket *bucket, uint32_t key, uint64_t *val);
+
+static inline int
+__kv_cmp_keys(struct k32v64_hash_bucket *bucket, uint32_t key,
+	uint64_t *val)
+{
+	int i;
+
+	for (i = 0; i < K32V64_KEYS_PER_BUCKET; i++) {
+		if ((key == bucket->key[i]) &&
+				(bucket->key_mask & (1 << i))) {
+			*val = bucket->val[i];
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static inline int
+__k32v64_hash_lookup(struct k32v64_hash_table *table, uint32_t key,
+	uint32_t hash, uint64_t *value, k32v64_cmp_fn_t cmp_f)
+{
+	uint64_t	val = 0;
+	struct k32v64_ext_ent *ent;
+	uint32_t	cnt;
+	int found = 0;
+	uint32_t bucket = hash & table->bucket_msk;
+
+	do {
+
+		do {
+			cnt = __atomic_load_n(&table->t[bucket].cnt,
+				__ATOMIC_ACQUIRE);
+		} while (unlikely(cnt & K32V64_WRITE_IN_PROGRESS));
+
+		found = cmp_f(&table->t[bucket], key, &val);
+		if (unlikely((found == 0) &&
+				(!SLIST_EMPTY(&table->t[bucket].head)))) {
+			SLIST_FOREACH(ent, &table->t[bucket].head, next) {
+				if (ent->key == key) {
+					val = ent->val;
+					found = 1;
+					break;
+				}
+			}
+		}
+		__atomic_thread_fence(__ATOMIC_RELEASE);
+	} while (unlikely(cnt != __atomic_load_n(&table->t[bucket].cnt,
+			 __ATOMIC_RELAXED)));
+
+	if (found == 1) {
+		*value = val;
+		return 0;
+	} else
+		return -ENOENT;
+}
+
+struct rte_kv_hash_table *
+k32v64_hash_create(const struct rte_kv_hash_params *params);
+
+void
+k32v64_hash_free(struct rte_kv_hash_table *ht);
diff --git a/lib/librte_hash/k32v64_hash_avx512vl.c b/lib/librte_hash/k32v64_hash_avx512vl.c
new file mode 100644
index 0000000..75cede5
--- /dev/null
+++ b/lib/librte_hash/k32v64_hash_avx512vl.c
@@ -0,0 +1,59 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include "k32v64_hash.h"
+
+int
+k32v64_hash_bulk_lookup_avx512vl(struct rte_kv_hash_table *ht, void *keys_p,
+	uint32_t *hashes, void *values_p, unsigned int n);
+
+static inline int
+k32v64_cmp_keys_avx512vl(struct k32v64_hash_bucket *bucket, uint32_t key,
+	uint64_t *val)
+{
+	__m128i keys, srch_key;
+	__mmask8 msk;
+
+	keys = _mm_load_si128((void *)bucket);
+	srch_key = _mm_set1_epi32(key);
+
+	msk = _mm_mask_cmpeq_epi32_mask(bucket->key_mask, keys, srch_key);
+	if (msk) {
+		*val = bucket->val[__builtin_ctz(msk)];
+		return 1;
+	}
+
+	return 0;
+}
+
+static inline int
+k32v64_hash_lookup_avx512vl(struct k32v64_hash_table *table, uint32_t key,
+	uint32_t hash, uint64_t *value)
+{
+	return __k32v64_hash_lookup(table, key, hash, value,
+		k32v64_cmp_keys_avx512vl);
+}
+
+int
+k32v64_hash_bulk_lookup_avx512vl(struct rte_kv_hash_table *ht, void *keys_p,
+	uint32_t *hashes, void *values_p, unsigned int n)
+{
+	struct k32v64_hash_table *table = (struct k32v64_hash_table *)ht;
+	uint32_t *keys = keys_p;
+	uint64_t *values = values_p;
+	int ret, cnt = 0;
+	unsigned int i;
+
+	if (unlikely((table == NULL) || (keys == NULL) || (hashes == NULL) ||
+			(values == NULL)))
+		return -EINVAL;
+
+	for (i = 0; i < n; i++) {
+		ret = k32v64_hash_lookup_avx512vl(table, keys[i], hashes[i],
+			&values[i]);
+		if (ret == 0)
+			cnt++;
+	}
+	return cnt;
+}
diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build
index 6ab46ae..0d014ea 100644
--- a/lib/librte_hash/meson.build
+++ b/lib/librte_hash/meson.build
@@ -3,10 +3,23 @@ 
 
 headers = files('rte_crc_arm64.h',
 	'rte_fbk_hash.h',
+	'rte_kv_hash.h',
 	'rte_hash_crc.h',
 	'rte_hash.h',
 	'rte_jhash.h',
 	'rte_thash.h')
 
-sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c')
-deps += ['ring']
+sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c', 'rte_kv_hash.c', 'k32v64_hash.c')
+deps += ['ring', 'mempool']
+
+if dpdk_conf.has('RTE_ARCH_X86')
+	if cc.has_argument('-mavx512vl')
+		avx512_tmplib = static_library('avx512_tmp',
+                                'k32v64_hash_avx512vl.c',
+                                dependencies: static_rte_mempool,
+                                c_args: cflags + ['-mavx512vl'])
+                objs += avx512_tmplib.extract_objects('k32v64_hash_avx512vl.c')
+                cflags += '-DCC_AVX512VL_SUPPORT'
+
+	endif
+endif
diff --git a/lib/librte_hash/rte_hash_version.map b/lib/librte_hash/rte_hash_version.map
index c2a9094..614e0a5 100644
--- a/lib/librte_hash/rte_hash_version.map
+++ b/lib/librte_hash/rte_hash_version.map
@@ -36,5 +36,9 @@  EXPERIMENTAL {
 	rte_hash_lookup_with_hash_bulk;
 	rte_hash_lookup_with_hash_bulk_data;
 	rte_hash_max_key_id;
-
+	rte_kv_hash_create;
+	rte_kv_hash_find_existing;
+	rte_kv_hash_free;
+	rte_kv_hash_add;
+	rte_kv_hash_delete;
 };
diff --git a/lib/librte_hash/rte_kv_hash.c b/lib/librte_hash/rte_kv_hash.c
new file mode 100644
index 0000000..03df8db
--- /dev/null
+++ b/lib/librte_hash/rte_kv_hash.c
@@ -0,0 +1,184 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <string.h>
+
+#include <rte_eal_memconfig.h>
+#include <rte_errno.h>
+#include <rte_malloc.h>
+#include <rte_memory.h>
+#include <rte_tailq.h>
+
+#include <rte_kv_hash.h>
+#include "k32v64_hash.h"
+
+TAILQ_HEAD(rte_kv_hash_list, rte_tailq_entry);
+
+static struct rte_tailq_elem rte_kv_hash_tailq = {
+	.name = "RTE_KV_HASH",
+};
+
+EAL_REGISTER_TAILQ(rte_kv_hash_tailq);
+
+int
+rte_kv_hash_add(struct rte_kv_hash_table *table, void *key,
+	uint32_t hash, void *value, int *found)
+{
+	if (table == NULL)
+		return -EINVAL;
+
+	return table->modify(table, key, hash, RTE_KV_MODIFY_ADD,
+		value, found);
+}
+
+int
+rte_kv_hash_delete(struct rte_kv_hash_table *table, void *key,
+	uint32_t hash, void *value)
+{
+	int found;
+
+	if (table == NULL)
+		return -EINVAL;
+
+	return table->modify(table, key, hash, RTE_KV_MODIFY_DEL,
+		value, &found);
+}
+
+struct rte_kv_hash_table *
+rte_kv_hash_find_existing(const char *name)
+{
+	struct rte_kv_hash_table *h = NULL;
+	struct rte_tailq_entry *te;
+	struct rte_kv_hash_list *kv_hash_list;
+
+	kv_hash_list = RTE_TAILQ_CAST(rte_kv_hash_tailq.head,
+			rte_kv_hash_list);
+
+	rte_mcfg_tailq_read_lock();
+	TAILQ_FOREACH(te, kv_hash_list, next) {
+		h = (struct rte_kv_hash_table *) te->data;
+		if (strncmp(name, h->name, RTE_KV_HASH_NAMESIZE) == 0)
+			break;
+	}
+	rte_mcfg_tailq_read_unlock();
+	if (te == NULL) {
+		rte_errno = ENOENT;
+		return NULL;
+	}
+	return h;
+}
+
+struct rte_kv_hash_table *
+rte_kv_hash_create(const struct rte_kv_hash_params *params)
+{
+	char hash_name[RTE_KV_HASH_NAMESIZE];
+	struct rte_kv_hash_table *ht, *tmp_ht = NULL;
+	struct rte_tailq_entry *te;
+	struct rte_kv_hash_list *kv_hash_list;
+	int ret;
+
+	if ((params == NULL) || (params->name == NULL) ||
+			(params->entries == 0) ||
+			(params->type >= RTE_KV_HASH_MAX)) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	kv_hash_list = RTE_TAILQ_CAST(rte_kv_hash_tailq.head,
+		rte_kv_hash_list);
+
+	ret = snprintf(hash_name, sizeof(hash_name), "KV_%s", params->name);
+	if (ret < 0 || ret >= RTE_KV_HASH_NAMESIZE) {
+		rte_errno = ENAMETOOLONG;
+		return NULL;
+	}
+
+	switch (params->type) {
+	case RTE_KV_HASH_K32V64:
+		ht = k32v64_hash_create(params);
+		break;
+	default:
+		rte_errno = EINVAL;
+		return NULL;
+	}
+	if (ht == NULL)
+		return ht;
+
+	rte_mcfg_tailq_write_lock();
+	TAILQ_FOREACH(te, kv_hash_list, next) {
+		tmp_ht = (struct rte_kv_hash_table *) te->data;
+		if (strncmp(params->name, tmp_ht->name,
+				RTE_KV_HASH_NAMESIZE) == 0)
+			break;
+	}
+	if (te != NULL) {
+		rte_errno = EEXIST;
+		goto exit;
+	}
+
+	te = rte_zmalloc("KV_HASH_TAILQ_ENTRY", sizeof(*te), 0);
+	if (te == NULL) {
+		RTE_LOG(ERR, HASH, "Failed to allocate tailq entry\n");
+		goto exit;
+	}
+
+	ht->type = params->type;
+	te->data = (void *)ht;
+	TAILQ_INSERT_TAIL(kv_hash_list, te, next);
+
+	rte_mcfg_tailq_write_unlock();
+
+	return ht;
+
+exit:
+	rte_mcfg_tailq_write_unlock();
+	switch (params->type) {
+	case RTE_KV_HASH_K32V64:
+		k32v64_hash_free(ht);
+		break;
+	default:
+		break;
+	}
+	return NULL;
+}
+
+void
+rte_kv_hash_free(struct rte_kv_hash_table *ht)
+{
+	struct rte_tailq_entry *te;
+	struct rte_kv_hash_list *kv_hash_list;
+
+	if (ht == NULL)
+		return;
+
+	kv_hash_list = RTE_TAILQ_CAST(rte_kv_hash_tailq.head,
+				rte_kv_hash_list);
+
+	rte_mcfg_tailq_write_lock();
+
+	/* find out tailq entry */
+	TAILQ_FOREACH(te, kv_hash_list, next) {
+		if (te->data == (void *) ht)
+			break;
+	}
+
+
+	if (te == NULL) {
+		rte_mcfg_tailq_write_unlock();
+		return;
+	}
+
+	TAILQ_REMOVE(kv_hash_list, te, next);
+
+	rte_mcfg_tailq_write_unlock();
+
+	switch (ht->type) {
+	case RTE_KV_HASH_K32V64:
+		k32v64_hash_free(ht);
+		break;
+	default:
+		break;
+	}
+	rte_free(te);
+}
diff --git a/lib/librte_hash/rte_kv_hash.h b/lib/librte_hash/rte_kv_hash.h
new file mode 100644
index 0000000..c0375d1
--- /dev/null
+++ b/lib/librte_hash/rte_kv_hash.h
@@ -0,0 +1,169 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef _RTE_KV_HASH_H_
+#define _RTE_KV_HASH_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_compat.h>
+#include <rte_atomic.h>
+#include <rte_mempool.h>
+
+#define RTE_KV_HASH_NAMESIZE		32
+
+enum rte_kv_hash_type {
+	RTE_KV_HASH_K32V64,
+	RTE_KV_HASH_MAX
+};
+
+enum rte_kv_modify_op {
+	RTE_KV_MODIFY_ADD,
+	RTE_KV_MODIFY_DEL,
+	RTE_KV_MODIFY_OP_MAX
+};
+
+struct rte_kv_hash_params {
+	const char *name;
+	uint32_t entries;
+	int socket_id;
+	enum rte_kv_hash_type type;
+};
+
+struct rte_kv_hash_table;
+
+typedef int (*rte_kv_hash_bulk_lookup_t)
+(struct rte_kv_hash_table *table, void *keys, uint32_t *hashes,
+	void *values, unsigned int n);
+
+typedef int (*rte_kv_hash_modify_t)
+(struct rte_kv_hash_table *table, void *key, uint32_t hash,
+	enum rte_kv_modify_op op, void *value, int *found);
+
+struct rte_kv_hash_table {
+	char name[RTE_KV_HASH_NAMESIZE];	/**< Name of the hash. */
+	rte_kv_hash_bulk_lookup_t	lookup;
+	rte_kv_hash_modify_t		modify;
+	enum rte_kv_hash_type		type;
+};
+
+/**
+ * Lookup bulk of keys.
+ * This function is multi-thread safe with regarding to other lookup threads.
+ *
+ * @param table
+ *   Hash table to add the key to.
+ * @param keys
+ *   Pointer to array of keys
+ * @param hashes
+ *   Pointer to array of hash values associated with keys.
+ * @param values
+ *   Pointer to array of value corresponded to keys
+ *   If the key was not found the corresponding value remains intact.
+ * @param n
+ *   Number of keys to lookup in batch.
+ * @return
+ *   -EINVAL if there's an error, otherwise number of successful lookups.
+ */
+static inline int
+rte_kv_hash_bulk_lookup(struct rte_kv_hash_table *table,
+	void *keys, uint32_t *hashes, void *values, unsigned int n)
+{
+	return table->lookup(table, keys, hashes, values, n);
+}
+
+/**
+ * Add a key to an existing hash table with hash value.
+ * This operation is not multi-thread safe regarding to add/delete functions
+ * and should only be called from one thread.
+ * However it is safe to call it along with lookup.
+ *
+ * @param table
+ *   Hash table to add the key to.
+ * @param key
+ *   Key to add to the hash table.
+ * @param value
+ *   Value to associate with key.
+ * @param hash
+ *   Hash value associated with key.
+ * @found
+ *   0 if no previously added key was found
+ *   1 previously added key was found, old value associated with the key
+ *   was written to *value
+ * @return
+ *   0 if ok, or negative value on error.
+ */
+__rte_experimental
+int
+rte_kv_hash_add(struct rte_kv_hash_table *table, void *key,
+	uint32_t hash, void *value, int *found);
+
+/**
+ * Remove a key with a given hash value from an existing hash table.
+ * This operation is not multi-thread safe regarding to add/delete functions
+ * and should only be called from one thread.
+ * However it is safe to call it along with lookup.
+ *
+ * @param table
+ *   Hash table to remove the key from.
+ * @param key
+ *   Key to remove from the hash table.
+ * @param hash
+ *   hash value associated with key.
+ * @param value
+ *   pointer to memory where the old value will be written to on success
+ * @return
+ *   0 if ok, or negative value on error.
+ */
+__rte_experimental
+int
+rte_kv_hash_delete(struct rte_kv_hash_table *table, void *key,
+	uint32_t hash, void *value);
+
+/**
+ * Performs a lookup for an existing hash table, and returns a pointer to
+ * the table if found.
+ *
+ * @param name
+ *   Name of the hash table to find
+ *
+ * @return
+ *   pointer to hash table structure or NULL on error with rte_errno
+ *   set appropriately.
+ */
+__rte_experimental
+struct rte_kv_hash_table *
+rte_kv_hash_find_existing(const char *name);
+
+/**
+ * Create a new hash table for use with four byte keys.
+ *
+ * @param params
+ *   Parameters used in creation of hash table.
+ *
+ * @return
+ *   Pointer to hash table structure that is used in future hash table
+ *   operations, or NULL on error with rte_errno set appropriately.
+ */
+__rte_experimental
+struct rte_kv_hash_table *
+rte_kv_hash_create(const struct rte_kv_hash_params *params);
+
+/**
+ * Free all memory used by a hash table.
+ *
+ * @param table
+ *   Hash table to deallocate.
+ */
+__rte_experimental
+void
+rte_kv_hash_free(struct rte_kv_hash_table *table);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_KV_HASH_H_ */