@@ -284,6 +284,12 @@ CONFIG_RTE_LIBRTE_HASH=y
CONFIG_RTE_LIBRTE_HASH_DEBUG=n
#
+# Compile librte_tshash
+#
+CONFIG_RTE_LIBRTE_THREAD_SAFE_HASH=y
+CONFIG_RTE_LIBRTE_THREAD_SAFE_HASH_STATS=y
+
+#
# Compile librte_lpm
#
CONFIG_RTE_LIBRTE_LPM=y
@@ -312,6 +312,12 @@ CONFIG_RTE_LIBRTE_HASH=y
CONFIG_RTE_LIBRTE_HASH_DEBUG=n
#
+# Compile librte_tshash
+#
+CONFIG_RTE_LIBRTE_THREAD_SAFE_HASH=y
+CONFIG_RTE_LIBRTE_THREAD_SAFE_HASH_STATS=y
+
+#
# Compile librte_lpm
#
CONFIG_RTE_LIBRTE_LPM=y
@@ -51,6 +51,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += librte_pmd_virtio
DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += librte_pmd_vmxnet3
DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += librte_pmd_xenvirt
DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash
+DIRS-$(CONFIG_RTE_LIBRTE_THREAD_SAFE_HASH) += librte_tshash
DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm
DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl
DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net
@@ -68,6 +68,7 @@ extern struct rte_logs rte_logs;
#define RTE_LOGTYPE_TIMER 0x00000010 /**< Log related to timers. */
#define RTE_LOGTYPE_PMD 0x00000020 /**< Log related to poll mode driver. */
#define RTE_LOGTYPE_HASH 0x00000040 /**< Log related to hash table. */
+#define RTE_LOGTYPE_TSHASH 0x00000040 /**< Log related to thread safe hash table. */
#define RTE_LOGTYPE_LPM 0x00000080 /**< Log related to LPM. */
#define RTE_LOGTYPE_KNI 0x00000100 /**< Log related to KNI. */
#define RTE_LOGTYPE_ACL 0x00000200 /**< Log related to ACL. */
@@ -72,6 +72,8 @@ rte_tailq_elem(RTE_TAILQ_RING, "RTE_RING")
rte_tailq_elem(RTE_TAILQ_HASH, "RTE_HASH")
+rte_tailq_elem(RTE_TAILQ_TSHASH, "RTE_TSHASH")
+
rte_tailq_elem(RTE_TAILQ_FBK_HASH, "RTE_FBK_HASH")
rte_tailq_elem(RTE_TAILQ_LPM, "RTE_LPM")
new file mode 100644
@@ -0,0 +1,49 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_tshash.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_THREAD_SAFE_HASH) := rte_tshash.c
+
+# install this header file
+SYMLINK-$(CONFIG_RTE_LIBRTE_THREAD_SAFE_HASH)-include := rte_tshash.h
+
+# this lib needs eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_THREAD_SAFE_HASH) += lib/librte_eal lib/librte_malloc
+
+include $(RTE_SDK)/mk/rte.lib.mk
new file mode 100644
@@ -0,0 +1,580 @@
+/**
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <x86intrin.h>
+
+#include <rte_tailq.h>
+#include <rte_malloc.h>
+#include <rte_memzone.h>
+#include <rte_eal_memconfig.h>
+#include <rte_common.h>
+#include <rte_string_fns.h>
+#include <rte_errno.h>
+#include <rte_memcpy.h>
+#include <rte_rwlock.h>
+#include <rte_log.h>
+
+#include "rte_tshash.h"
+
+#define DEFAULT_LOAD_FACTOR (0.5)
+
+/* Hash function used if none is specified */
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+#include <rte_hash_crc.h>
+#define DEFAULT_TSHASH_FUNC rte_hash_crc
+#else
+#include <rte_jhash.h>
+#define DEFAULT_TSHASH_FUNC rte_jhash
+#endif
+
+TAILQ_HEAD(rte_tshash_list, rte_tshash);
+
+static int rte_tshash_k16_cmp_eq(const void *key1, const void *key2,
+ __rte_unused uint8_t key_size);
+static int rte_tshash_k32_cmp_eq(const void *key1, const void *key2,
+ uint8_t key_size);
+static int rte_tshash_k48_cmp_eq(const void *key1, const void *key2,
+ uint8_t key_size);
+static int rte_tshash_k64_cmp_eq(const void *key1, const void *key2,
+ uint8_t key_size);
+static int rte_tshash_k96_cmp_eq(const void *key1, const void *key2,
+ uint8_t key_size);
+static int rte_tshash_k128_cmp_eq(const void *key1, const void *key2,
+ uint8_t key_size);
+
+struct rte_tshash *
+rte_tshash_create(const char *name, unsigned max_entries,
+ unsigned key_size, int socket_id,
+ const struct rte_tshash_extra_args *e_args)
+{
+ const int noflags = 0;
+ const unsigned anyalignment = 0;
+
+ struct rte_tshash *h = NULL;
+ struct rte_tshash_list *tshash_list;
+ void *k = NULL;
+ struct rte_ring *r = NULL;
+ char ring_name[RTE_RING_NAMESIZE];
+ unsigned use_malloc = 0;
+ unsigned update_add = 0;
+ double load_factor = DEFAULT_LOAD_FACTOR;
+ rte_tshash_cmp_eq_t tshash_cmp_eq_param = NULL;
+ rte_tshash_function_t tshash_func_param = NULL;
+
+ uint32_t num_hashes;
+ uint32_t num_buckets;
+ unsigned key_entry_size;
+ unsigned i;
+
+ RTE_BUILD_BUG_ON(sizeof(struct rte_tshash_bucket) != CACHE_LINE_SIZE);
+
+ if (name == NULL || max_entries < RTE_TSHASH_MIN_ENTRIES ||
+ socket_id >= RTE_MAX_NUMA_NODES)
+ goto param_err;
+
+ if (e_args != NULL) {
+ use_malloc = e_args->malloc_mem;
+ load_factor = e_args->max_load_factor;
+ tshash_cmp_eq_param = e_args->rte_tshash_cmp_eq;
+ tshash_func_param = e_args->hash_func;
+ update_add = e_args->update_add;
+
+ if (load_factor > RTE_TSHASH_MAXIMUM_LOAD_FACTOR)
+ goto param_err;
+ }
+
+ /* check the key-size is a supported value */
+ if (key_size != 16 && key_size != 32 && key_size != 48 &&
+ key_size != 64 && key_size != 96 && key_size != 128)
+ if (!tshash_cmp_eq_param)
+ goto param_err;
+
+ if (key_size == 0)
+ goto param_err;
+
+ /* check that we have an initialised tail queue */
+ tshash_list = RTE_TAILQ_LOOKUP_BY_IDX(RTE_TAILQ_TSHASH, rte_tshash_list);
+ if (tshash_list == NULL) {
+ rte_errno = E_RTE_NO_TAILQ;
+ return NULL;
+ }
+
+ /* calculate number of hash values entries to be used. Floating point
+ * can be inaccurate, so subtract a bit so that we don't accidently
+ * overflow a power of 2, and then scale up to the next one unnecessarily
+ */
+ num_hashes = ((double)max_entries / load_factor) - 2;
+ num_hashes = rte_align32pow2(num_hashes);
+ num_buckets = num_hashes / RTE_TSHASH_BUCKET_SIZE;
+
+
+ /* whatever number of entries for keys we want, we need 1 more as a dummy
+ * entry, which is used in case of a miss on a bucket scan.
+ */
+ max_entries++;
+
+ key_entry_size = sizeof(struct rte_tshash_key) + key_size;
+
+ if (use_malloc) {
+ h = rte_zmalloc_socket(NULL, sizeof(*h) +
+ (sizeof(h->bkts[1]) * num_buckets),
+ anyalignment, socket_id);
+ if (h == NULL)
+ goto memory_err;
+
+ k = rte_zmalloc_socket(NULL, key_entry_size * max_entries,
+ anyalignment, socket_id);
+ if (k == NULL)
+ goto memory_err;
+
+ } else {
+ char mz_name[RTE_MEMZONE_NAMESIZE];
+ char key_mz_name[RTE_MEMZONE_NAMESIZE];
+ const struct rte_memzone *mz;
+
+ snprintf(mz_name, sizeof(mz_name), "HSH_%s", name);
+ snprintf(key_mz_name, sizeof(key_mz_name), "HSH_KEYS_%s", name);
+
+ mz = rte_memzone_reserve(mz_name, sizeof(*h) +
+ (sizeof(h->bkts[1]) * num_buckets),
+ socket_id, noflags);
+ if (mz == NULL)
+ goto memory_err;
+ h = mz->addr;
+
+ mz = rte_memzone_reserve(key_mz_name, key_entry_size * max_entries,
+ socket_id, noflags);
+ if (mz == NULL)
+ goto memory_err;
+ k = mz->addr;
+ }
+ snprintf(ring_name, sizeof(ring_name), "HSH_%s", name);
+ r = rte_ring_create(ring_name, rte_align32pow2(max_entries), socket_id,
+ noflags);
+ if (r == NULL)
+ goto memory_err;
+
+ RTE_LOG(INFO, TSHASH, "Created Hash Table\n");
+ RTE_LOG(INFO, TSHASH, "-> Hash value slots: %u\n", num_hashes);
+ RTE_LOG(INFO, TSHASH, "-> Hash key slots : %u\n", max_entries);
+ RTE_LOG(INFO, TSHASH, "-> Free ring size : %u\n",
+ rte_align32pow2(max_entries + 1));
+ RTE_LOG(INFO, TSHASH, "-> Buckets : %u\n", num_buckets);
+
+ /* add the free slots ring to the hash table, and do other assignments */
+ h->free_slots = r;
+ h->key_store = k;
+ h->socket_id = socket_id;
+ h->bucket_mask = (num_buckets - 1);
+ h->key_entry_size = key_entry_size;
+ h->key_size = key_size;
+ h->max_items = max_entries;
+ h->rte_tshash_cmp_eq = tshash_cmp_eq_param;
+
+ if (h->rte_tshash_cmp_eq == NULL)
+ switch (h->key_size) {
+ case 16:
+ h->rte_tshash_cmp_eq = rte_tshash_k16_cmp_eq;
+ break;
+ case 32:
+ h->rte_tshash_cmp_eq = rte_tshash_k32_cmp_eq;
+ break;
+ case 48:
+ h->rte_tshash_cmp_eq = rte_tshash_k48_cmp_eq;
+ break;
+ case 64:
+ h->rte_tshash_cmp_eq = rte_tshash_k64_cmp_eq;
+ break;
+ case 96:
+ h->rte_tshash_cmp_eq = rte_tshash_k96_cmp_eq;
+ break;
+ case 128:
+ h->rte_tshash_cmp_eq = rte_tshash_k128_cmp_eq;
+ break;
+ }
+
+ if (tshash_func_param)
+ h->tshash_func = tshash_func_param;
+ else
+ h->tshash_func = DEFAULT_TSHASH_FUNC;
+
+ h->update_add = update_add;
+
+ snprintf(h->name, sizeof(h->name), "%s", name);
+
+ /* populate the free slots ring. Entry zero is reserved for key misses */
+ for (i = 1; i < max_entries; i++)
+ rte_ring_sp_enqueue(r, (void *)((uintptr_t)i));
+
+ TAILQ_INSERT_TAIL(tshash_list, h, next);
+ return h;
+
+param_err:
+ rte_errno = EINVAL;
+ return NULL;
+
+memory_err:
+ if (use_malloc) {
+ if (h != NULL)
+ rte_free(h);
+ if (k != NULL)
+ rte_free(k);
+ }
+ rte_errno = ENOMEM;
+ return NULL;
+}
+
+void
+rte_tshash_reset(struct rte_tshash *h)
+{
+ unsigned i;
+ void *ptr;
+ struct rte_tshash_bucket *next_bucket_to_delete, *bucket_to_delete;
+
+ /* first: rte_free the extra buckets */
+ for (i = 0; i < h->bucket_mask+1; i++) {
+ next_bucket_to_delete = h->bkts[i].next;
+ while (next_bucket_to_delete) {
+ bucket_to_delete = next_bucket_to_delete;
+ next_bucket_to_delete = bucket_to_delete->next;
+ rte_free(bucket_to_delete);
+ }
+ }
+ /* zero the buckets */
+ memset(h->bkts, 0, (h->bucket_mask+1)*sizeof(h->bkts[0]));
+
+ /* clear the free ring */
+ while (rte_ring_dequeue(h->free_slots, &ptr) == 0)
+ rte_pause();
+
+ /* zero key slots */
+ memset(h->key_store, 0, h->key_entry_size * h->max_items);
+ /* Repopulate the free ring. Entry zero is reserved for key misses */
+ for (i = 1; i < h->max_items; i++)
+ rte_ring_sp_enqueue(h->free_slots, (void *)((uintptr_t)i));
+#ifdef RTE_LIBRTE_THREAD_SAFE_HASH_STATS
+ /* zero the stats */
+ h->stats.num_extra_buckets = h->stats.used_slots = 0;
+#endif
+}
+
+struct rte_tshash *
+rte_tshash_find_existing(const char *name)
+{
+ struct rte_tshash *h;
+ struct rte_tshash_list *tshash_list;
+
+ /* check that we have an initialised tail queue */
+ tshash_list = RTE_TAILQ_LOOKUP_BY_IDX(RTE_TAILQ_TSHASH, rte_tshash_list);
+ if (tshash_list == NULL) {
+ rte_errno = E_RTE_NO_TAILQ;
+ return NULL;
+ }
+
+ rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
+ TAILQ_FOREACH(h, tshash_list, next) {
+ if (strncmp(name, h->name, RTE_TSHASH_NAMESIZE) == 0)
+ break;
+ }
+ rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+ if (h == NULL)
+ rte_errno = ENOENT;
+ return h;
+}
+
+static int
+rte_tshash_k16_cmp_eq(const void *key1, const void *key2,
+ __rte_unused uint8_t key_size)
+{
+ const __m128i k1 = _mm_load_si128((const __m128i *) key1);
+ const __m128i k2 = _mm_load_si128((const __m128i *) key2);
+ const __m128i x = _mm_xor_si128(k1, k2);
+ return _mm_test_all_zeros(x, x);
+}
+
+static int
+rte_tshash_k32_cmp_eq(const void *key1, const void *key2, uint8_t key_size)
+{
+ return rte_tshash_k16_cmp_eq(key1, key2, key_size) &&
+ rte_tshash_k16_cmp_eq((const char *)key1+16,
+ (const char *)key2+16, key_size);
+}
+
+static int
+rte_tshash_k48_cmp_eq(const void *key1, const void *key2, uint8_t key_size)
+{
+ return rte_tshash_k16_cmp_eq(key1, key2, key_size) &&
+ rte_tshash_k16_cmp_eq((const char *)key1+16,
+ (const char *)key2+16, key_size) &&
+ rte_tshash_k16_cmp_eq((const char *)key1+32,
+ (const char *)key2+32, key_size);
+}
+
+static int
+rte_tshash_k64_cmp_eq(const void *key1, const void *key2, uint8_t key_size)
+{
+ return rte_tshash_k32_cmp_eq(key1, key2, key_size) &&
+ rte_tshash_k32_cmp_eq((const char *)key1+32,
+ (const char *)key2+32, key_size);
+}
+
+static int
+rte_tshash_k96_cmp_eq(const void *key1, const void *key2, uint8_t key_size)
+{
+ return rte_tshash_k64_cmp_eq(key1, key2, key_size) &&
+ rte_tshash_k32_cmp_eq((const char *)key1+64,
+ (const char *)key2+64, key_size);
+}
+
+static int
+rte_tshash_k128_cmp_eq(const void *key1, const void *key2, uint8_t key_size)
+{
+ return rte_tshash_k64_cmp_eq(key1, key2, key_size) &&
+ rte_tshash_k64_cmp_eq((const char *)key1+64,
+ (const char *)key2+64, key_size);
+}
+
+int
+rte_tshash_add_key_with_hash(struct rte_tshash *h, uint64_t hash_val,
+ const void *key, uintptr_t idata)
+{
+ struct rte_tshash_bucket *bkt = &h->bkts[hash_val & h->bucket_mask];
+ struct rte_tshash_bucket *add_bkt = NULL;
+ struct rte_tshash_key *key_slot, *keys = h->key_store;
+ void *slot_id;
+ unsigned pos = 0;
+ int free_pos = -1;
+
+ rte_prefetch0(bkt);
+
+ if (rte_ring_mc_dequeue(h->free_slots, &slot_id) != 0)
+ return -ENOSPC;
+
+ key_slot = (struct rte_tshash_key *)((char *)keys +
+ (uintptr_t)slot_id * h->key_entry_size);
+ rte_prefetch0(key_slot);
+
+ do {
+ for (pos = 0; pos < RTE_TSHASH_BUCKET_SIZE; pos++) {
+ /*
+ * If free slot has not been found yet and current one
+ * is empty, make it invalid for later use
+ */
+ if (free_pos == -1 && bkt->hash_val[pos] == RTE_TSHASH_EMPTY) {
+ if (__sync_bool_compare_and_swap(&bkt->hash_val[pos],
+ RTE_TSHASH_EMPTY, RTE_TSHASH_INVALID)) {
+ free_pos = pos;
+ add_bkt = bkt;
+ } else
+ continue;
+ } else if (bkt->hash_val[pos] == (hash_val | RTE_TSHASH_VALID)) {
+ /* check key for match */
+ struct rte_tshash_key *k = (struct rte_tshash_key *)
+ ((char *)keys + bkt->key_idx[pos] * h->key_entry_size);
+ if (h->rte_tshash_cmp_eq((const void *)k->key,
+ key, h->key_size)) {
+ rte_ring_mp_enqueue(h->free_slots, slot_id);
+ if (free_pos >= 0)
+ bkt->hash_val[free_pos] = RTE_TSHASH_EMPTY;
+ if (!h->update_add)
+ return -EEXIST;
+ /* Update just data */
+ rte_atomic64_set((rte_atomic64_t *)&k->idata, idata);
+ return 0;
+ }
+ }
+ }
+ if (pos == RTE_TSHASH_BUCKET_SIZE) {
+ if (bkt->next == NULL && free_pos < 0) {
+ void *new_bkt = rte_zmalloc(NULL,
+ sizeof(struct rte_tshash_bucket), 0);
+ if (new_bkt == NULL) {
+ rte_ring_mp_enqueue(h->free_slots, slot_id);
+ return -ENOMEM;
+ }
+ if (!__sync_bool_compare_and_swap(&bkt->next, NULL, new_bkt)) {
+ /* if we can't add the new bucket, tell user to retry */
+ rte_ring_mp_enqueue(h->free_slots, slot_id);
+ rte_free(new_bkt);
+ return -EAGAIN;
+ }
+#ifdef RTE_LIBRTE_THREAD_SAFE_HASH_STATS
+ __sync_fetch_and_add(&h->stats.num_extra_buckets, 1);
+#endif
+ }
+ bkt = bkt->next;
+ rte_prefetch0(bkt);
+ pos = 0;
+ }
+ } while (bkt);
+
+ __sync_fetch_and_add(&key_slot->counter, 1);
+ rte_compiler_barrier();
+ rte_memcpy(key_slot->key, key, h->key_size);
+ key_slot->idata = idata;
+
+ add_bkt->key_idx[free_pos] = (uint32_t)((uintptr_t)slot_id);
+ add_bkt->counter[free_pos] = key_slot->counter;
+ rte_compiler_barrier();
+ add_bkt->hash_val[free_pos] = hash_val | RTE_TSHASH_VALID;
+#ifdef RTE_LIBRTE_THREAD_SAFE_HASH_STATS
+ __sync_fetch_and_add(&h->stats.used_slots, 1);
+#endif
+ return 0;
+}
+
+int
+rte_tshash_add_key(struct rte_tshash *h, const void *key, uintptr_t idata)
+{
+ return rte_tshash_add_key_with_hash(h, rte_tshash_hash(h, key), key, idata);
+}
+
+int
+rte_tshash_del_key_with_hash(struct rte_tshash *h,
+ uint64_t hash_val, const void *key, uintptr_t *value)
+{
+ struct rte_tshash_key *keys[RTE_TSHASH_BUCKET_SIZE] = {0};
+ unsigned i;
+ struct rte_tshash_bucket *bkt = &h->bkts[hash_val & h->bucket_mask];
+ struct rte_tshash_key *key_store = h->key_store;
+ _mm_prefetch((const char *)bkt, 0);
+
+ do {
+ rte_prefetch0(bkt->next);
+
+ /* Check if there is a hash value match */
+ for (i = 0; i < RTE_TSHASH_BUCKET_SIZE; i++)
+ if (bkt->hash_val[i] == (hash_val | RTE_TSHASH_VALID)) {
+ keys[i] = (struct rte_tshash_key *)((char *)key_store +
+ (bkt->key_idx[i] * h->key_entry_size));
+ rte_prefetch0((const char *)&keys[i]);
+ }
+
+ for (i = 0; i < RTE_TSHASH_BUCKET_SIZE; i++) {
+ if (keys[i] != 0 && h->rte_tshash_cmp_eq((const void *) keys[i]->key,
+ key, h->key_size)) {
+ if (!__sync_bool_compare_and_swap(&bkt->hash_val[i],
+ hash_val | RTE_TSHASH_VALID, RTE_TSHASH_EMPTY))
+ /* Already deleted */
+ return 0;
+
+ rte_compiler_barrier();
+
+ /*
+ * If user wants to free the data associated to this entry,
+ * the pointer shall be returned
+ */
+ if (value != NULL)
+ *value = keys[i]->idata;
+
+ /* TODO: Remove bucket if all entries in it are empty */
+ rte_ring_mp_enqueue(h->free_slots,
+ (void *)((uintptr_t)bkt->key_idx[i]));
+#ifdef RTE_LIBRTE_THREAD_SAFE_HASH_STATS
+ __sync_fetch_and_sub(&h->stats.used_slots, 1);
+#endif
+ return 0;
+ }
+ }
+
+ bkt = bkt->next;
+
+ } while (bkt);
+
+ /* No such key stored */
+ return -1;
+}
+
+int
+rte_tshash_del_key(struct rte_tshash *h, const void *key, uintptr_t *value)
+{
+ return rte_tshash_del_key_with_hash(h, rte_tshash_hash(h, key), key, value);
+}
+
+int
+rte_tshash_lookup_with_hash(const struct rte_tshash *h,
+ uint64_t hash_val, const void *key, uintptr_t *value)
+{
+ const struct rte_tshash_key *keys[RTE_TSHASH_BUCKET_SIZE] = {0};
+ unsigned num_slots;
+ unsigned i;
+ const struct rte_tshash_bucket *bkt = &h->bkts[hash_val & h->bucket_mask];
+ const struct rte_tshash_key *key_store = h->key_store;
+ uint8_t counters[RTE_TSHASH_BUCKET_SIZE] = {0};
+
+ rte_prefetch0((const char *)bkt);
+
+ do {
+ rte_prefetch0(bkt->next);
+ num_slots = 0;
+
+ /* Check if there is a hash value match */
+ for (i = 0; i < RTE_TSHASH_BUCKET_SIZE; i++)
+ if (bkt->hash_val[i] == (hash_val | RTE_TSHASH_VALID)) {
+ keys[num_slots] = (const struct rte_tshash_key *)
+ ((const char *)key_store +
+ (bkt->key_idx[i] * h->key_entry_size));
+ rte_prefetch0((const char *)&keys[num_slots]);
+ counters[num_slots] = bkt->counter[i];
+ num_slots++;
+ }
+
+ for (i = 0; i < num_slots; i++) {
+ if (h->rte_tshash_cmp_eq((const void *) keys[i]->key,
+ key, h->key_size)) {
+ *value = keys[i]->idata;
+ rte_compiler_barrier();
+ /* Check counter in key slot is same as counter in bucket */
+ if (unlikely(counters[i] != keys[i]->counter))
+ /* Tell user to retry */
+ return -EAGAIN;
+ return 0;
+ }
+ }
+
+ bkt = bkt->next;
+
+ } while (bkt);
+
+ return -1;
+}
+
+int
+rte_tshash_lookup(const struct rte_tshash *h, const void *key,
+ uintptr_t *value)
+{
+ return rte_tshash_lookup_with_hash(h, rte_tshash_hash(h, key), key, value);
+}
+
+
new file mode 100644
@@ -0,0 +1,533 @@
+/**
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_RTE_TSHASH_H_
+#define _RTE_RTE_TSHASH_H_
+
+#include <stdint.h>
+
+#include <rte_memory.h>
+#include <rte_ring.h>
+#include <rte_prefetch.h>
+#include <rte_branch_prediction.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Max number of characters in hash name.*/
+#define RTE_TSHASH_NAMESIZE 32
+
+/** Number of elements in each bucket */
+#define RTE_TSHASH_BUCKET_SIZE 4
+
+#define RTE_TSHASH_MAX_BURST 64
+#define RTE_TSHASH_MIN_ENTRIES 32
+
+#define RTE_TSHASH_MAXIMUM_LOAD_FACTOR (1.0)
+
+typedef uint32_t (*rte_tshash_function_t)(const void *key, uint32_t key_len,
+ uint32_t init_val);
+typedef int (*rte_tshash_cmp_eq_t)(const void *key1, const void *key2,
+ uint8_t key_len);
+
+enum {
+ RTE_TSHASH_EMPTY = 0,
+ RTE_TSHASH_INVALID = 1,
+ RTE_TSHASH_VALID = 3
+};
+
+struct rte_tshash_extra_args {
+ double max_load_factor;
+ unsigned malloc_mem;
+ rte_tshash_cmp_eq_t rte_tshash_cmp_eq;
+ rte_tshash_function_t hash_func;
+ unsigned update_add;
+};
+
+struct rte_tshash_bucket {
+ uint64_t hash_val[RTE_TSHASH_BUCKET_SIZE];
+ uint32_t key_idx[RTE_TSHASH_BUCKET_SIZE + 1]; /* have dummy entry on end,
+ always points to entry zero */
+
+ uint8_t counter[RTE_TSHASH_BUCKET_SIZE];
+
+ struct rte_tshash_bucket *next;
+} __rte_cache_aligned;
+
+struct rte_tshash_key {
+ union {
+ uintptr_t idata;
+ void *pdata;
+ };
+ uint8_t counter;
+ /* Variable key size */
+ char key[] __attribute__((aligned(16)));
+};
+
+#ifdef RTE_LIBRTE_THREAD_SAFE_HASH_STATS
+struct rte_tshash_stats {
+ unsigned used_slots;
+ unsigned num_extra_buckets;
+};
+#endif
+
+struct rte_tshash {
+ TAILQ_ENTRY(rte_tshash) next;/**< Next in list. */
+ char name[RTE_TSHASH_NAMESIZE];
+
+ unsigned key_size __rte_cache_aligned;
+ unsigned key_entry_size;
+ unsigned bucket_mask;
+ unsigned socket_id;
+ unsigned max_items;
+#ifdef RTE_LIBRTE_THREAD_SAFE_HASH_STATS
+ struct rte_tshash_stats stats;
+#endif
+
+ struct rte_ring *free_slots;
+ void *key_store;
+
+ rte_tshash_cmp_eq_t rte_tshash_cmp_eq;
+ rte_tshash_function_t tshash_func;
+ uint32_t tshash_func_init_val;
+
+ unsigned update_add;
+
+ struct rte_tshash_bucket bkts[];
+} __rte_cache_aligned;
+
+
+struct rte_tshash *
+rte_tshash_create(const char *name, unsigned max_entries, unsigned key_size,
+ int socket_id, const struct rte_tshash_extra_args *e_args);
+
+struct rte_tshash *
+rte_tshash_find_existing(const char *name);
+
+void
+rte_tshash_reset(struct rte_tshash *h);
+
+static inline uint64_t
+rte_tshash_hash(const struct rte_tshash *h, const void *key)
+{
+ /* calc hash result by key */
+ return h->tshash_func(key, h->key_size, h->tshash_func_init_val);
+}
+
+int
+rte_tshash_add_key_with_hash(struct rte_tshash *h, uint64_t hash_val,
+ const void *key, uintptr_t idata);
+int
+rte_tshash_add_key(struct rte_tshash *h, const void *key, uintptr_t idata);
+
+int
+rte_tshash_del_key_with_hash(struct rte_tshash *h, uint64_t hash_val,
+ const void *key, uintptr_t *value);
+
+int
+rte_tshash_del_key(struct rte_tshash *h, const void *key, uintptr_t *value);
+
+int
+rte_tshash_lookup_with_hash(const struct rte_tshash *h,
+ uint64_t hash_val, const void *key, uintptr_t *value);
+
+int
+rte_tshash_lookup(const struct rte_tshash *h, const void *key, uintptr_t *value);
+
+
+/* Lookup bulk stage 0: Prefetch next hash value */
+static inline void
+lookup_stage0(unsigned *idx, uint64_t *lookup_mask,
+ uint64_t *const *hash_vals __rte_unused)
+{
+ *idx = __builtin_ctzl(*lookup_mask);
+ if (*lookup_mask == 0)
+ *idx = 0;
+ rte_prefetch0(hash_vals[*idx]);
+ *lookup_mask &= ~(1llu << *idx);
+}
+
+/* Lookup bulk stage 0: Calculate next hash value (from new key) */
+static inline void
+lookup_stage0_key(unsigned *idx, uint64_t *lookup_mask, uint64_t *calc_hash,
+ const void * const *keys, const struct rte_tshash *h)
+{
+ *idx = __builtin_ctzl(*lookup_mask);
+ if (*lookup_mask == 0)
+ *idx = 0;
+
+ *calc_hash = rte_tshash_hash(h, keys[*idx]);
+ *lookup_mask &= ~(1llu << *idx);
+}
+
+
+/* Lookup bulk stage 1: Get next hash value and prefetch associated bucket */
+static inline void
+lookup_stage1(unsigned idx, uint64_t *hash, const struct rte_tshash_bucket **bkt,
+ uint64_t *const *hash_vals, const struct rte_tshash *h)
+{
+ *hash = *hash_vals[idx];
+ *bkt = &h->bkts[*hash & h->bucket_mask];
+ rte_prefetch0(*bkt);
+
+ *hash |= RTE_TSHASH_VALID;
+}
+
+
+/* Lookup bulk stage 1: Prefetch associated bucket */
+static inline void
+lookup_stage1_key(uint64_t *hash, const struct rte_tshash_bucket **bkt,
+ const struct rte_tshash *h)
+{
+ *bkt = &h->bkts[*hash & h->bucket_mask];
+ rte_prefetch0(*bkt);
+
+ *hash |= RTE_TSHASH_VALID;
+}
+
+
+/*
+ * Lookup bulk stage 2: Store pointer to next bucket (if any). Search for match
+ * hashes in first level and prefetch first key slot
+ */
+static inline void
+lookup_stage2(unsigned idx, uint64_t hash, const struct rte_tshash_bucket *bkt,
+ const struct rte_tshash_key **key_slot, uint8_t *counter,
+ uint64_t *next_mask, uint64_t *extra_hits_mask,
+ const struct rte_tshash_bucket *next_bkts[],
+ const struct rte_tshash_key *key_store, const struct rte_tshash *h)
+{
+ unsigned hash_matches, i;
+
+ *next_mask |= (uint64_t)(!!bkt->next) << idx;
+ next_bkts[idx] = bkt->next;
+
+ hash_matches = 1 << RTE_TSHASH_BUCKET_SIZE;
+ for (i = 0; i < RTE_TSHASH_BUCKET_SIZE; i++)
+ hash_matches |= ((hash == bkt->hash_val[i]) << i);
+
+ unsigned key_idx = bkt->key_idx[__builtin_ctzl(hash_matches)];
+ *key_slot = (const struct rte_tshash_key *)((const char *)key_store +
+ key_idx * h->key_entry_size);
+ rte_prefetch0(*key_slot);
+ *counter = bkt->counter[__builtin_ctz(hash_matches)];
+
+ *extra_hits_mask |= (uint64_t)(__builtin_popcount(hash_matches) > 2) << idx;
+}
+
+
+/*
+ * Lookup bulk stage 3: Check if key matches, update hit mask
+ * and store data in values[]
+ */
+static inline void
+lookup_stage3(unsigned idx, const struct rte_tshash_key *key_slot,
+ uint8_t counter, uintptr_t values[], const void * const *keys,
+ uint64_t *hits, const struct rte_tshash *h)
+{
+ values[idx] = key_slot->idata;
+ unsigned hit = h->rte_tshash_cmp_eq((const void *)key_slot->key,
+ keys[idx], h->key_size);
+ rte_compiler_barrier();
+ *hits |= (uint64_t)(hit & (key_slot->counter == counter)) << idx;
+}
+
+
+static inline int
+rte_tshash_lookup_bulk_with_hash(const struct rte_tshash *h,
+ uint64_t lookup_mask, uint64_t *const *hash_vals,
+ const void * const *keys, uintptr_t values[], uint64_t *hit_mask)
+{
+ uint64_t hits = 0;
+ uint64_t next_mask = 0;
+ uint64_t extra_hits_mask = 0;
+ const struct rte_tshash_key *key_store = h->key_store;
+ const struct rte_tshash_bucket *next_bkts[RTE_TSHASH_MAX_BURST];
+
+ unsigned idx00, idx01, idx10, idx11, idx20, idx21, idx30, idx31;
+ const struct rte_tshash_bucket *bkt10, *bkt11, *bkt20, *bkt21;
+ const struct rte_tshash_key *k_slot20, *k_slot21, *k_slot30, *k_slot31;
+ uint64_t hash10, hash11, hash20, hash21;
+ uint8_t counter20, counter21, counter30, counter31;
+
+ lookup_stage0(&idx00, &lookup_mask, hash_vals);
+ lookup_stage0(&idx01, &lookup_mask, hash_vals);
+
+ idx10 = idx00, idx11 = idx01;
+ lookup_stage0(&idx00, &lookup_mask, hash_vals);
+ lookup_stage0(&idx01, &lookup_mask, hash_vals);
+ lookup_stage1(idx10, &hash10, &bkt10, hash_vals, h);
+ lookup_stage1(idx11, &hash11, &bkt11, hash_vals, h);
+
+ bkt20 = bkt10, bkt21 = bkt11;
+ hash20 = hash10, hash21 = hash11;
+ idx20 = idx10, idx21 = idx11;
+ idx10 = idx00, idx11 = idx01;
+
+ lookup_stage0(&idx00, &lookup_mask, hash_vals);
+ lookup_stage0(&idx01, &lookup_mask, hash_vals);
+ lookup_stage1(idx10, &hash10, &bkt10, hash_vals, h);
+ lookup_stage1(idx11, &hash11, &bkt11, hash_vals, h);
+ lookup_stage2(idx20, hash20, bkt20, &k_slot20, &counter20,
+ &next_mask, &extra_hits_mask,
+ next_bkts, key_store, h);
+ lookup_stage2(idx21, hash21, bkt21, &k_slot21, &counter21,
+ &next_mask, &extra_hits_mask,
+ next_bkts, key_store, h);
+
+ while (lookup_mask) {
+ k_slot30 = k_slot20, k_slot31 = k_slot21;
+ idx30 = idx20, idx31 = idx21;
+ counter30 = counter20, counter31 = counter21;
+ bkt20 = bkt10, bkt21 = bkt11;
+ hash20 = hash10, hash21 = hash11;
+ idx20 = idx10, idx21 = idx11;
+ idx10 = idx00, idx11 = idx01;
+
+ lookup_stage0(&idx00, &lookup_mask, hash_vals);
+ lookup_stage0(&idx01, &lookup_mask, hash_vals);
+ lookup_stage1(idx10, &hash10, &bkt10, hash_vals, h);
+ lookup_stage1(idx11, &hash11, &bkt11, hash_vals, h);
+ lookup_stage2(idx20, hash20, bkt20, &k_slot20, &counter20,
+ &next_mask, &extra_hits_mask, next_bkts, key_store, h);
+ lookup_stage2(idx21, hash21, bkt21, &k_slot21, &counter21,
+ &next_mask, &extra_hits_mask, next_bkts, key_store, h);
+ lookup_stage3(idx30, k_slot30, counter30, values, keys, &hits, h);
+ lookup_stage3(idx31, k_slot31, counter31, values, keys, &hits, h);
+ }
+
+ k_slot30 = k_slot20, k_slot31 = k_slot21;
+ idx30 = idx20, idx31 = idx21;
+ counter30 = counter20, counter31 = counter21;
+ bkt20 = bkt10, bkt21 = bkt11;
+ hash20 = hash10, hash21 = hash11;
+ idx20 = idx10, idx21 = idx11;
+ idx10 = idx00, idx11 = idx01;
+ lookup_stage1(idx10, &hash10, &bkt10, hash_vals, h);
+ lookup_stage1(idx11, &hash11, &bkt11, hash_vals, h);
+ lookup_stage2(idx20, hash20, bkt20,
+ &k_slot20, &counter20, &next_mask, &extra_hits_mask,
+ next_bkts, key_store, h);
+ lookup_stage2(idx21, hash21, bkt21,
+ &k_slot21, &counter21, &next_mask, &extra_hits_mask,
+ next_bkts, key_store, h);
+ lookup_stage3(idx30, k_slot30, counter30, values, keys, &hits, h);
+ lookup_stage3(idx31, k_slot31, counter31, values, keys, &hits, h);
+
+ k_slot30 = k_slot20, k_slot31 = k_slot21;
+ idx30 = idx20, idx31 = idx21;
+ counter30 = counter20, counter31 = counter21;
+ bkt20 = bkt10, bkt21 = bkt11;
+ hash20 = hash10, hash21 = hash11;
+ idx20 = idx10, idx21 = idx11;
+ lookup_stage2(idx20, hash20, bkt20, &k_slot20, &counter20, &next_mask,
+ &extra_hits_mask, next_bkts, key_store, h);
+ lookup_stage2(idx21, hash21, bkt21, &k_slot21, &counter21, &next_mask,
+ &extra_hits_mask, next_bkts, key_store, h);
+ lookup_stage3(idx30, k_slot30, counter30, values, keys, &hits, h);
+ lookup_stage3(idx31, k_slot31, counter31, values, keys, &hits, h);
+
+ k_slot30 = k_slot20, k_slot31 = k_slot21;
+ idx30 = idx20, idx31 = idx21;
+ counter30 = counter20, counter31 = counter21;
+ lookup_stage3(idx30, k_slot30, counter30, values, keys, &hits, h);
+ lookup_stage3(idx31, k_slot31, counter31, values, keys, &hits, h);
+
+ /* handle extra_hits_mask */
+ next_mask |= extra_hits_mask;
+
+ /* ignore any items we have already found */
+ next_mask &= ~hits;
+
+ if (unlikely(next_mask)) {
+ /* run a single search for each remaining item */
+ do {
+ unsigned idx = __builtin_ctzl(next_mask);
+ rte_prefetch0(next_bkts[idx]);
+ hits |= (uint64_t)(!rte_tshash_lookup_with_hash(h,
+ *hash_vals[idx], keys[idx], &values[idx])) << idx;
+ next_mask &= ~(1llu << idx);
+ } while (next_mask);
+ }
+
+ *hit_mask = hits;
+ return __builtin_popcountl(hits);
+}
+
+static inline int
+rte_tshash_lookup_bulk(const struct rte_tshash *h,
+ uint64_t lookup_mask, const void * const *keys,
+ uintptr_t values[], uint64_t *hit_mask)
+{
+ uint64_t hits = 0;
+ uint64_t next_mask = 0;
+ uint64_t extra_hits_mask = 0;
+ unsigned idx;
+
+ const struct rte_tshash_key *key_store = h->key_store;
+ const struct rte_tshash_bucket *next_bkts[RTE_TSHASH_MAX_BURST];
+
+ unsigned idx00, idx01, idx10, idx11, idx20, idx21, idx30, idx31;
+ const struct rte_tshash_bucket *bkt10, *bkt11, *bkt20, *bkt21;
+ const struct rte_tshash_key *k_slot20, *k_slot21, *k_slot30, *k_slot31;
+ uint64_t hash00, hash01, hash10, hash11, hash20, hash21;
+ uint8_t counter20, counter21, counter30, counter31;
+
+ lookup_stage0_key(&idx00, &lookup_mask, &hash00, keys, h);
+ lookup_stage0_key(&idx01, &lookup_mask, &hash01, keys, h);
+
+ hash10 = hash00, hash11 = hash01;
+ idx10 = idx00, idx11 = idx01;
+ lookup_stage0_key(&idx00, &lookup_mask, &hash00, keys, h);
+ lookup_stage0_key(&idx01, &lookup_mask, &hash01, keys, h);
+ lookup_stage1_key(&hash10, &bkt10, h);
+ lookup_stage1_key(&hash11, &bkt11, h);
+
+ bkt20 = bkt10, bkt21 = bkt11;
+ hash20 = hash10, hash21 = hash11;
+ idx20 = idx10, idx21 = idx11;
+ hash10 = hash00, hash11 = hash01;
+ idx10 = idx00, idx11 = idx01;
+
+ lookup_stage0_key(&idx00, &lookup_mask, &hash00, keys, h);
+ lookup_stage0_key(&idx01, &lookup_mask, &hash01, keys, h);
+ lookup_stage1_key(&hash10, &bkt10, h);
+ lookup_stage1_key(&hash11, &bkt11, h);
+ lookup_stage2(idx20, hash20, bkt20, &k_slot20, &counter20,
+ &next_mask, &extra_hits_mask,
+ next_bkts, key_store, h);
+ lookup_stage2(idx21, hash21, bkt21, &k_slot21, &counter21,
+ &next_mask, &extra_hits_mask,
+ next_bkts, key_store, h);
+
+ while (lookup_mask) {
+ k_slot30 = k_slot20, k_slot31 = k_slot21;
+ idx30 = idx20, idx31 = idx21;
+ counter30 = counter20, counter31 = counter21;
+ bkt20 = bkt10, bkt21 = bkt11;
+ hash20 = hash10, hash21 = hash11;
+ idx20 = idx10, idx21 = idx11;
+ hash10 = hash00, hash11 = hash01;
+ idx10 = idx00, idx11 = idx01;
+
+ lookup_stage0_key(&idx00, &lookup_mask, &hash00, keys, h);
+ lookup_stage0_key(&idx01, &lookup_mask, &hash01, keys, h);
+ lookup_stage1_key(&hash10, &bkt10, h);
+ lookup_stage1_key(&hash11, &bkt11, h);
+ lookup_stage2(idx20, hash20, bkt20, &k_slot20, &counter20,
+ &next_mask, &extra_hits_mask, next_bkts,
+ key_store, h);
+ lookup_stage2(idx21, hash21, bkt21, &k_slot21, &counter21,
+ &next_mask, &extra_hits_mask, next_bkts,
+ key_store, h);
+ lookup_stage3(idx30, k_slot30, counter30, values, keys, &hits, h);
+ lookup_stage3(idx31, k_slot31, counter31, values, keys, &hits, h);
+ }
+
+ k_slot30 = k_slot20, k_slot31 = k_slot21;
+ idx30 = idx20, idx31 = idx21;
+ counter30 = counter20, counter31 = counter21;
+ bkt20 = bkt10, bkt21 = bkt11;
+ hash20 = hash10, hash21 = hash11;
+ idx20 = idx10, idx21 = idx11;
+ hash10 = hash00, hash11 = hash01;
+ idx10 = idx00, idx11 = idx01;
+ lookup_stage1_key(&hash10, &bkt10, h);
+ lookup_stage1_key(&hash11, &bkt11, h);
+ lookup_stage2(idx20, hash20, bkt20,
+ &k_slot20, &counter20, &next_mask, &extra_hits_mask,
+ next_bkts, key_store, h);
+ lookup_stage2(idx21, hash21, bkt21,
+ &k_slot21, &counter21, &next_mask, &extra_hits_mask,
+ next_bkts, key_store, h);
+ lookup_stage3(idx30, k_slot30, counter30, values, keys, &hits, h);
+ lookup_stage3(idx31, k_slot31, counter31, values, keys, &hits, h);
+
+ k_slot30 = k_slot20, k_slot31 = k_slot21;
+ idx30 = idx20, idx31 = idx21;
+ counter30 = counter20, counter31 = counter21;
+ bkt20 = bkt10, bkt21 = bkt11;
+ hash20 = hash10, hash21 = hash11;
+ idx20 = idx10, idx21 = idx11;
+ lookup_stage2(idx20, hash20, bkt20, &k_slot20, &counter20, &next_mask,
+ &extra_hits_mask, next_bkts, key_store, h);
+ lookup_stage2(idx21, hash21, bkt21, &k_slot21, &counter21, &next_mask,
+ &extra_hits_mask, next_bkts, key_store, h);
+ lookup_stage3(idx30, k_slot30, counter30, values, keys, &hits, h);
+ lookup_stage3(idx31, k_slot31, counter31, values, keys, &hits, h);
+
+ k_slot30 = k_slot20, k_slot31 = k_slot21;
+ idx30 = idx20, idx31 = idx21;
+ counter30 = counter20, counter31 = counter21;
+ lookup_stage3(idx30, k_slot30, counter30, values, keys, &hits, h);
+ lookup_stage3(idx31, k_slot31, counter31, values, keys, &hits, h);
+
+ /* handle extra_hits_mask */
+ next_mask |= extra_hits_mask;
+
+ /* ignore any items we have already found */
+ next_mask &= ~hits;
+
+ if (unlikely(next_mask)) {
+ /* run a single search for each remaining item */
+ do {
+ idx = __builtin_ctzl(next_mask);
+ rte_prefetch0(next_bkts[idx]);
+ hits |= (uint64_t)(!rte_tshash_lookup(h, keys[idx],
+ &values[idx])) << idx;
+ next_mask &= ~(1llu << idx);
+ } while (next_mask);
+ }
+
+ *hit_mask = hits;
+ return __builtin_popcountl(hits);
+}
+
+
+#ifdef RTE_LIBRTE_THREAD_SAFE_HASH_STATS
+static inline const struct rte_tshash_stats *
+rte_tshash_get_stats(const struct rte_tshash *h)
+{ return &h->stats; }
+#endif
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif
new file mode 100644
@@ -0,0 +1,332 @@
+/**
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _RTE_VJHASH_H
+#define _RTE_VJHASH_H
+
+#ifndef RTE_LIBRTE_HASH
+#error "Need librte_hash enabled"
+#endif
+
+/**
+ * @file
+ *
+ * jhash, vector versions functions.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <x86intrin.h>
+
+#include <rte_jhash.h>
+
+#define __rte_jhash_mix4(a, b, c) do { \
+ a = _mm_sub_epi32(a, b); a = _mm_sub_epi32(a, c); \
+ a = _mm_xor_si128(a, _mm_srli_epi32(c, 13)); \
+ b = _mm_sub_epi32(b, c); b = _mm_sub_epi32(b, a); \
+ b = _mm_xor_si128(b, _mm_slli_epi32(a, 8)); \
+ c = _mm_sub_epi32(c, a); c = _mm_sub_epi32(c, b); \
+ c = _mm_xor_si128(c, _mm_srli_epi32(b, 13)); \
+ a = _mm_sub_epi32(a, b); a = _mm_sub_epi32(a, c); \
+ a = _mm_xor_si128(a, _mm_srli_epi32(c, 12)); \
+ b = _mm_sub_epi32(b, c); b = _mm_sub_epi32(b, a); \
+ b = _mm_xor_si128(b, _mm_slli_epi32(a, 16)); \
+ c = _mm_sub_epi32(c, a); c = _mm_sub_epi32(c, b); \
+ c = _mm_xor_si128(c, _mm_srli_epi32(b, 5)); \
+ a = _mm_sub_epi32(a, b); a = _mm_sub_epi32(a, c); \
+ a = _mm_xor_si128(a, _mm_srli_epi32(c, 3)); \
+ b = _mm_sub_epi32(b, c); b = _mm_sub_epi32(b, a); \
+ b = _mm_xor_si128(b, _mm_slli_epi32(a, 10)); \
+ c = _mm_sub_epi32(c, a); c = _mm_sub_epi32(c, b); \
+ c = _mm_xor_si128(c, _mm_srli_epi32(b, 15)); \
+} while (0)
+
+#define BURST 4
+
+static inline void
+_jhash_do_burst(const void *key[], uint32_t key_length,
+ uint32_t initval, uint32_t results[])
+{
+ uint32_t i = 0, len = key_length;
+ const uint32_t **k32 = (void *)key;
+ __m128i a, b, c;
+ __m128i lens;
+
+ a = b = _mm_set1_epi32(RTE_JHASH_GOLDEN_RATIO);
+ c = _mm_set1_epi32(initval);
+ lens = _mm_set1_epi32(key_length);
+
+ while (len >= 12) {
+ const __m128i a1 = _mm_set_epi32(k32[3][i+0], k32[2][i+0],
+ k32[1][i+0], k32[0][i+0]);
+ const __m128i b1 = _mm_set_epi32(k32[3][i+1], k32[2][i+1],
+ k32[1][i+1], k32[0][i+1]);
+ const __m128i c1 = _mm_set_epi32(k32[3][i+2], k32[2][i+2],
+ k32[1][i+2], k32[0][i+2]);
+
+ a = _mm_add_epi32(a, a1);
+ b = _mm_add_epi32(b, b1);
+ c = _mm_add_epi32(c, c1);
+
+ __rte_jhash_mix4(a, b, c);
+
+ len -= 12, i += 3;
+ }
+
+ c = _mm_add_epi32(c, lens);
+
+ /* now switch based on remaining bits */
+
+ switch (len) {
+ case 1:
+ do {
+ const __m128i a1 = _mm_set_epi32((uint8_t)k32[3][i],
+ (uint8_t)k32[2][i],
+ (uint8_t)k32[1][i],
+ (uint8_t)k32[0][i]);
+ a = _mm_add_epi32(a, a1);
+ } while (0);
+ break;
+ case 2:
+ do {
+ const __m128i a1 = _mm_set_epi32((uint16_t)k32[3][i],
+ (uint16_t)k32[2][i],
+ (uint16_t)k32[1][i],
+ (uint16_t)k32[0][i]);
+ a = _mm_add_epi32(a, a1);
+ } while (0);
+ break;
+ case 3:
+ do {
+ const __m128i a1 = _mm_set_epi32(k32[3][i] & 0xFFFFFF,
+ k32[2][i] & 0xFFFFFF,
+ k32[1][i] & 0xFFFFFF,
+ k32[0][i] & 0xFFFFFF);
+ a = _mm_add_epi32(a, a1);
+ } while (0);
+ break;
+ case 4:
+ do {
+ const __m128i a1 = _mm_set_epi32(k32[3][i], k32[2][i],
+ k32[1][i], k32[0][i]);
+ a = _mm_add_epi32(a, a1);
+ } while (0);
+ break;
+ case 5:
+ do {
+ const __m128i a1 = _mm_set_epi32(k32[3][i], k32[2][i],
+ k32[1][i], k32[0][i]);
+ const __m128i b1 = _mm_set_epi32((uint8_t)k32[3][i+1],
+ (uint8_t)k32[2][i+1],
+ (uint8_t)k32[1][i+1],
+ (uint8_t)k32[0][i+1]);
+ a = _mm_add_epi32(a, a1);
+ b = _mm_add_epi32(b, b1);
+ } while (0);
+ break;
+ case 6:
+ do {
+ const __m128i a1 = _mm_set_epi32(k32[3][i], k32[2][i],
+ k32[1][i], k32[0][i]);
+ const __m128i b1 = _mm_set_epi32((uint16_t)k32[3][i+1],
+ (uint16_t)k32[2][i+1],
+ (uint16_t)k32[1][i+1],
+ (uint16_t)k32[0][i+1]);
+ a = _mm_add_epi32(a, a1);
+ b = _mm_add_epi32(b, b1);
+ } while (0);
+ break;
+ case 7:
+ do {
+ const __m128i a1 = _mm_set_epi32(k32[3][i], k32[2][i],
+ k32[1][i], k32[0][i]);
+ const __m128i b1 = _mm_set_epi32(k32[3][i+1] & 0xFFFFFF,
+ k32[2][i+1] & 0xFFFFFF,
+ k32[1][i+1] & 0xFFFFFF,
+ k32[0][i+1] & 0xFFFFFF);
+ a = _mm_add_epi32(a, a1);
+ b = _mm_add_epi32(b, b1);
+ } while (0);
+ break;
+ case 8:
+ do {
+ const __m128i a1 = _mm_set_epi32(k32[3][i], k32[2][i],
+ k32[1][i], k32[0][i]);
+ const __m128i b1 = _mm_set_epi32(k32[3][i+1], k32[2][i+1],
+ k32[1][i+1], k32[0][i+1]);
+ a = _mm_add_epi32(a, a1);
+ b = _mm_add_epi32(b, b1);
+ } while (0);
+ break;
+ case 9:
+ do {
+ const __m128i a1 = _mm_set_epi32(k32[3][i], k32[2][i],
+ k32[1][i], k32[0][i]);
+ const __m128i b1 = _mm_set_epi32(k32[3][i+1], k32[2][i+1],
+ k32[1][i+1], k32[0][i+1]);
+ const __m128i c1 = _mm_set_epi32((k32[3][i+2] & 0xFF) << 8,
+ (k32[2][i+2] & 0xFF) << 8,
+ (k32[1][i+2] & 0xFF) << 8,
+ (k32[0][i+2] & 0xFF) << 8);
+ a = _mm_add_epi32(a, a1);
+ b = _mm_add_epi32(b, b1);
+ c = _mm_add_epi32(c, c1);
+ } while (0);
+ break;
+ case 10:
+ do {
+ const __m128i a1 = _mm_set_epi32(k32[3][i], k32[2][i],
+ k32[1][i], k32[0][i]);
+ const __m128i b1 = _mm_set_epi32(k32[3][i+1], k32[2][i+1],
+ k32[1][i+1], k32[0][i+1]);
+ const __m128i c1 = _mm_set_epi32((k32[3][i+2] & 0xFFFF) << 8,
+ (k32[2][i+2] & 0xFFFF) << 8,
+ (k32[1][i+2] & 0xFFFF) << 8,
+ (k32[0][i+2] & 0xFFFF) << 8);
+ a = _mm_add_epi32(a, a1);
+ b = _mm_add_epi32(b, b1);
+ c = _mm_add_epi32(c, c1);
+ } while (0);
+ break;
+ case 11:
+ do {
+ const __m128i a1 = _mm_set_epi32(k32[3][i], k32[2][i],
+ k32[1][i], k32[0][i]);
+ const __m128i b1 = _mm_set_epi32(k32[3][i+1], k32[2][i+1],
+ k32[1][i+1], k32[0][i+1]);
+ const __m128i c1 = _mm_set_epi32((k32[3][i+2] & 0xFFFFFF) << 8,
+ (k32[2][i+2] & 0xFFFFFF) << 8,
+ (k32[1][i+2] & 0xFFFFFF) << 8,
+ (k32[0][i+2] & 0xFFFFFF) << 8);
+ a = _mm_add_epi32(a, a1);
+ b = _mm_add_epi32(b, b1);
+ c = _mm_add_epi32(c, c1);
+ } while (0);
+ break;
+ default:
+ break;
+ }
+
+ __rte_jhash_mix4(a, b, c);
+
+ _mm_storeu_si128((__m128i *)results, c);
+}
+
+static inline uint32_t
+rte_jhash_multi(const void *key[], uint32_t num_keys, uint32_t key_length,
+ uint32_t initval, uint32_t results[])
+{
+ uint32_t i;
+ for (i = 0; i < (num_keys & ~(BURST-1)); i += BURST)
+ _jhash_do_burst(&key[i], key_length, initval, &results[i]);
+
+ for (; i < num_keys; i++)
+ results[i] = rte_jhash(key[i], key_length, initval);
+ return 0;
+}
+
+static inline void
+_jhash2_do_burst(const uint32_t *k, const size_t buf_len,
+ const uint32_t key_len, const uint32_t initval, uint32_t results[])
+{
+ const uint32_t off0 = 0, off1 = buf_len,
+ off2 = buf_len * 2, off3 = buf_len * 3;
+ uint32_t len = key_len;
+ __m128i a, b, c;
+ __m128i lens;
+
+ a = b = _mm_set1_epi32(RTE_JHASH_GOLDEN_RATIO);
+ c = _mm_set1_epi32(initval);
+ lens = _mm_set1_epi32(key_len * sizeof(*k));
+
+ while (len >= 3) {
+ const __m128i a1 = _mm_set_epi32(k[off3], k[off2],
+ k[off1], k[off0]);
+ const __m128i b1 = _mm_set_epi32(k[off3+1], k[off2+1],
+ k[off1+1], k[off0+1]);
+ const __m128i c1 = _mm_set_epi32(k[off3+2], k[off2+2],
+ k[off1+2], k[off0+2]);
+
+ a = _mm_add_epi32(a, a1);
+ b = _mm_add_epi32(b, b1);
+ c = _mm_add_epi32(c, c1);
+
+ __rte_jhash_mix4(a, b, c);
+
+ len -= 3, k += 3;
+ }
+
+ c = _mm_add_epi32(c, lens);
+
+ switch (len) {
+ case 2: {
+ const __m128i b1 = _mm_set_epi32(k[off3+1], k[off2+1],
+ k[off1+1], k[off0+1]);
+ b = _mm_add_epi32(b, b1);
+ }
+ case 1: {
+ const __m128i a1 = _mm_set_epi32(k[off3], k[off2],
+ k[off1], k[off0]);
+ a = _mm_add_epi32(a, a1);
+ }
+ default:
+ break;
+ }
+
+ __rte_jhash_mix4(a, b, c);
+ _mm_storeu_si128((__m128i *)results, c);
+}
+
+static inline uint32_t
+rte_jhash2_multi(const uint32_t *key, size_t num_keys, size_t buf_len,
+ size_t key_len, uint32_t initval, uint32_t results[])
+{
+ uint32_t i;
+ for (i = 0; i < (num_keys & ~(BURST-1)); i += BURST)
+ _jhash2_do_burst(&key[i*buf_len], buf_len, key_len, initval,
+ &results[i]);
+ for (; i < num_keys; i++)
+ results[i] = rte_jhash2(&key[i*buf_len], key_len, initval);
+ return 0;
+}
+
+
+#undef BURST
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_VJHASH_H */
@@ -97,6 +97,10 @@ ifeq ($(CONFIG_RTE_LIBRTE_HASH),y)
LDLIBS += -lrte_hash
endif
+ifeq ($(CONFIG_RTE_LIBRTE_THREAD_SAFE_HASH),y)
+LDLIBS += -lrte_tshash
+endif
+
ifeq ($(CONFIG_RTE_LIBRTE_LPM),y)
LDLIBS += -lrte_lpm
endif