From patchwork Thu Sep 6 17:09:05 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Wang, Yipeng1" X-Patchwork-Id: 44371 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id B0AC25689; Fri, 7 Sep 2018 02:13:58 +0200 (CEST) Received: from mga04.intel.com (mga04.intel.com [192.55.52.120]) by dpdk.org (Postfix) with ESMTP id 8E6AF4C77 for ; Fri, 7 Sep 2018 02:13:50 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga005.jf.intel.com ([10.7.209.41]) by fmsmga104.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 06 Sep 2018 17:13:47 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.53,340,1531810800"; d="scan'208";a="255176893" Received: from skx-yipeng.jf.intel.com ([10.54.81.175]) by orsmga005.jf.intel.com with ESMTP; 06 Sep 2018 17:13:47 -0700 From: Yipeng Wang To: pablo.de.lara.guarch@intel.com, bruce.richardson@intel.com Cc: dev@dpdk.org, yipeng1.wang@intel.com, michel@digirati.com.br, honnappa.nagarahalli@arm.com Date: Thu, 6 Sep 2018 10:09:05 -0700 Message-Id: <1536253745-133104-6-git-send-email-yipeng1.wang@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1536253745-133104-1-git-send-email-yipeng1.wang@intel.com> References: <1536253745-133104-1-git-send-email-yipeng1.wang@intel.com> Subject: [dpdk-dev] [PATCH v1 5/5] hash: use partial-key hashing X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This commit changes the hashing mechanism to "partial-key hashing" to calculate bucket index and signature of key. This is proposed in Bin Fan, et al's paper "MemC3: Compact and Concurrent MemCache with Dumber Caching and Smarter Hashing". Bascially the idea is to use "xor" to derive alternative bucket from current bucket index and signature. With "partial-key hashing", it reduces the bucket memory requirement from two cache lines to one cache line, which improves the memory efficiency and thus the lookup speed. Signed-off-by: Yipeng Wang --- lib/librte_hash/rte_cuckoo_hash.c | 225 ++++++++++++++++++-------------------- lib/librte_hash/rte_cuckoo_hash.h | 6 +- 2 files changed, 108 insertions(+), 123 deletions(-) diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c index ff380bb..ace47ad 100644 --- a/lib/librte_hash/rte_cuckoo_hash.c +++ b/lib/librte_hash/rte_cuckoo_hash.c @@ -92,6 +92,26 @@ rte_hash_cmp_eq(const void *key1, const void *key2, const struct rte_hash *h) return cmp_jump_table[h->cmp_jump_table_idx](key1, key2, h->key_len); } +static inline void +get_buckets_index(const struct rte_hash *h, const hash_sig_t hash, + uint32_t *prim_bkt, uint32_t *sec_bkt, uint16_t *sig) +{ + /* + * We use higher 16 bits of hash as the signature value stored in table. + * We use the lower bits for the primary bucket + * location. Then we xor primary bucket location and the signature + * to get the secondary bucket location. This is same as + * proposed in paper" B. Fan, et al's paper + * "Cuckoo Filter: Practically Better Than Bloom". The benefit to use + * xor is that one could derive the alternative bucket location + * by only using the current bucket location and the signature. + */ + *sig = hash >> 16; + + *prim_bkt = hash & h->bucket_bitmask; + *sec_bkt = (*prim_bkt ^ *sig) & h->bucket_bitmask; +} + struct rte_hash * rte_hash_create(const struct rte_hash_parameters *params) { @@ -329,9 +349,7 @@ rte_hash_create(const struct rte_hash_parameters *params) h->ext_table_support = ext_table_support; #if defined(RTE_ARCH_X86) - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) - h->sig_cmp_fn = RTE_HASH_COMPARE_AVX2; - else if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2)) + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2)) h->sig_cmp_fn = RTE_HASH_COMPARE_SSE; else #endif @@ -418,18 +436,6 @@ rte_hash_hash(const struct rte_hash *h, const void *key) return h->hash_func(key, h->key_len, h->hash_func_init_val); } -/* Calc the secondary hash value from the primary hash value of a given key */ -static inline hash_sig_t -rte_hash_secondary_hash(const hash_sig_t primary_hash) -{ - static const unsigned all_bits_shift = 12; - static const unsigned alt_bits_xor = 0x5bd1e995; - - uint32_t tag = primary_hash >> all_bits_shift; - - return primary_hash ^ ((tag + 1) * alt_bits_xor); -} - int32_t rte_hash_count(const struct rte_hash *h) { @@ -561,14 +567,13 @@ enqueue_slot_back(const struct rte_hash *h, /* Search a key from bucket and update its data */ static inline int32_t search_and_update(const struct rte_hash *h, void *data, const void *key, - struct rte_hash_bucket *bkt, hash_sig_t sig, hash_sig_t alt_hash) + struct rte_hash_bucket *bkt, uint16_t sig) { int i; struct rte_hash_key *k, *keys = h->key_store; for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (bkt->sig_current[i] == sig && - bkt->sig_alt[i] == alt_hash) { + if (bkt->sig_current[i] == sig) { k = (struct rte_hash_key *) ((char *)keys + bkt->key_idx[i] * h->key_entry_size); if (rte_hash_cmp_eq(key, k->key, h) == 0) { @@ -595,7 +600,7 @@ rte_hash_cuckoo_insert_mw(const struct rte_hash *h, struct rte_hash_bucket *prim_bkt, struct rte_hash_bucket *sec_bkt, const struct rte_hash_key *key, void *data, - hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx, + uint16_t sig, uint32_t new_idx, int32_t *ret_val) { unsigned int i; @@ -606,7 +611,7 @@ rte_hash_cuckoo_insert_mw(const struct rte_hash *h, /* Check if key was inserted after last check but before this * protected region in case of inserting duplicated keys. */ - ret = search_and_update(h, data, key, prim_bkt, sig, alt_hash); + ret = search_and_update(h, data, key, prim_bkt, sig); if (ret != -1) { __hash_rw_writer_unlock(h); *ret_val = ret; @@ -614,7 +619,7 @@ rte_hash_cuckoo_insert_mw(const struct rte_hash *h, } FOR_EACH_BUCKET(cur_bkt, sec_bkt) { - ret = search_and_update(h, data, key, cur_bkt, alt_hash, sig); + ret = search_and_update(h, data, key, cur_bkt, sig); if (ret != -1) { __hash_rw_writer_unlock(h); *ret_val = ret; @@ -629,7 +634,6 @@ rte_hash_cuckoo_insert_mw(const struct rte_hash *h, /* Check if slot is available */ if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) { prim_bkt->sig_current[i] = sig; - prim_bkt->sig_alt[i] = alt_hash; prim_bkt->key_idx[i] = new_idx; break; } @@ -654,7 +658,7 @@ rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h, struct rte_hash_bucket *alt_bkt, const struct rte_hash_key *key, void *data, struct queue_node *leaf, uint32_t leaf_slot, - hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx, + uint16_t sig, uint32_t new_idx, int32_t *ret_val) { uint32_t prev_alt_bkt_idx; @@ -675,7 +679,7 @@ rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h, /* Check if key was inserted after last check but before this * protected region. */ - ret = search_and_update(h, data, key, bkt, sig, alt_hash); + ret = search_and_update(h, data, key, bkt, sig); if (ret != -1) { __hash_rw_writer_unlock(h); *ret_val = ret; @@ -683,7 +687,7 @@ rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h, } FOR_EACH_BUCKET(cur_bkt, alt_bkt) { - ret = search_and_update(h, data, key, cur_bkt, alt_hash, sig); + ret = search_and_update(h, data, key, cur_bkt, sig); if (ret != -1) { __hash_rw_writer_unlock(h); *ret_val = ret; @@ -696,8 +700,9 @@ rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h, prev_bkt = prev_node->bkt; prev_slot = curr_node->prev_slot; - prev_alt_bkt_idx = - prev_bkt->sig_alt[prev_slot] & h->bucket_bitmask; + prev_alt_bkt_idx = (prev_node->cur_bkt_idx ^ + prev_bkt->sig_current[prev_slot]) & + h->bucket_bitmask; if (unlikely(&h->buckets[prev_alt_bkt_idx] != curr_bkt)) { @@ -711,10 +716,8 @@ rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h, * Cuckoo insert to move elements back to its * primary bucket if available */ - curr_bkt->sig_alt[curr_slot] = - prev_bkt->sig_current[prev_slot]; curr_bkt->sig_current[curr_slot] = - prev_bkt->sig_alt[prev_slot]; + prev_bkt->sig_current[prev_slot]; curr_bkt->key_idx[curr_slot] = prev_bkt->key_idx[prev_slot]; @@ -724,7 +727,6 @@ rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h, } curr_bkt->sig_current[curr_slot] = sig; - curr_bkt->sig_alt[curr_slot] = alt_hash; curr_bkt->key_idx[curr_slot] = new_idx; __hash_rw_writer_unlock(h); @@ -742,39 +744,44 @@ rte_hash_cuckoo_make_space_mw(const struct rte_hash *h, struct rte_hash_bucket *bkt, struct rte_hash_bucket *sec_bkt, const struct rte_hash_key *key, void *data, - hash_sig_t sig, hash_sig_t alt_hash, + uint16_t sig, uint32_t bucket_idx, uint32_t new_idx, int32_t *ret_val) { unsigned int i; struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN]; struct queue_node *tail, *head; struct rte_hash_bucket *curr_bkt, *alt_bkt; + uint32_t cur_idx, alt_idx; tail = queue; head = queue + 1; tail->bkt = bkt; tail->prev = NULL; tail->prev_slot = -1; + tail->cur_bkt_idx = bucket_idx; /* Cuckoo bfs Search */ while (likely(tail != head && head < queue + RTE_HASH_BFS_QUEUE_MAX_LEN - RTE_HASH_BUCKET_ENTRIES)) { curr_bkt = tail->bkt; + cur_idx = tail->cur_bkt_idx; for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { if (curr_bkt->key_idx[i] == EMPTY_SLOT) { int32_t ret = rte_hash_cuckoo_move_insert_mw(h, bkt, sec_bkt, key, data, - tail, i, sig, alt_hash, + tail, i, sig, new_idx, ret_val); if (likely(ret != -1)) return ret; } /* Enqueue new node and keep prev node info */ - alt_bkt = &(h->buckets[curr_bkt->sig_alt[i] - & h->bucket_bitmask]); + alt_idx = (curr_bkt->sig_current[i] ^ cur_idx) & + h->bucket_bitmask; + alt_bkt = &(h->buckets[alt_idx]); head->bkt = alt_bkt; + head->cur_bkt_idx = alt_idx; head->prev = tail; head->prev_slot = i; head++; @@ -789,7 +796,7 @@ static inline int32_t __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t sig, void *data) { - hash_sig_t alt_hash; + uint16_t short_sig; uint32_t prim_bucket_idx, sec_bucket_idx; struct rte_hash_bucket *prim_bkt, *sec_bkt, *cur_bkt; struct rte_hash_key *new_k, *keys = h->key_store; @@ -804,18 +811,15 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, int32_t ret_val; struct rte_hash_bucket *last; - prim_bucket_idx = sig & h->bucket_bitmask; + get_buckets_index(h, sig, &prim_bucket_idx, &sec_bucket_idx, &short_sig); prim_bkt = &h->buckets[prim_bucket_idx]; - rte_prefetch0(prim_bkt); - - alt_hash = rte_hash_secondary_hash(sig); - sec_bucket_idx = alt_hash & h->bucket_bitmask; sec_bkt = &h->buckets[sec_bucket_idx]; + rte_prefetch0(prim_bkt); rte_prefetch0(sec_bkt); /* Check if key is already inserted in primary location */ __hash_rw_writer_lock(h); - ret = search_and_update(h, data, key, prim_bkt, sig, alt_hash); + ret = search_and_update(h, data, key, prim_bkt, short_sig); if (ret != -1) { __hash_rw_writer_unlock(h); return ret; @@ -823,12 +827,13 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, /* Check if key is already inserted in secondary location */ FOR_EACH_BUCKET(cur_bkt, sec_bkt) { - ret = search_and_update(h, data, key, cur_bkt, alt_hash, sig); + ret = search_and_update(h, data, key, cur_bkt, short_sig); if (ret != -1) { __hash_rw_writer_unlock(h); return ret; } } + __hash_rw_writer_unlock(h); /* Did not find a match, so get a new slot for storing the new key */ @@ -866,7 +871,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, /* Find an empty slot and insert */ ret = rte_hash_cuckoo_insert_mw(h, prim_bkt, sec_bkt, key, data, - sig, alt_hash, new_idx, &ret_val); + short_sig, new_idx, &ret_val); if (ret == 0) return new_idx - 1; else if (ret == 1) { @@ -876,7 +881,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, /* Primary bucket full, need to make space for new entry */ ret = rte_hash_cuckoo_make_space_mw(h, prim_bkt, sec_bkt, key, data, - sig, alt_hash, new_idx, &ret_val); + short_sig, prim_bucket_idx, new_idx, &ret_val); if (ret == 0) return new_idx - 1; else if (ret == 1) { @@ -886,7 +891,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, /* Also search secondary bucket to get better occupancy */ ret = rte_hash_cuckoo_make_space_mw(h, sec_bkt, prim_bkt, key, data, - alt_hash, sig, new_idx, &ret_val); + short_sig, sec_bucket_idx, new_idx, &ret_val); if (ret == 0) return new_idx - 1; @@ -907,14 +912,14 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, __hash_rw_writer_lock(h); /* We check of duplicates again since could be added before the lock */ /* Check if key is already inserted in primary location */ - ret = search_and_update(h, data, key, prim_bkt, sig, alt_hash); + ret = search_and_update(h, data, key, prim_bkt, short_sig); if (ret != -1) { enqueue_slot_back(h, cached_free_slots, slot_id); goto failure; } FOR_EACH_BUCKET(cur_bkt, sec_bkt) { - ret = search_and_update(h, data, key, cur_bkt, alt_hash, sig); + ret = search_and_update(h, data, key, cur_bkt, short_sig); if (ret != -1) { enqueue_slot_back(h, cached_free_slots, slot_id); goto failure; @@ -927,8 +932,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { /* Check if slot is available */ if (likely(cur_bkt->key_idx[i] == EMPTY_SLOT)) { - cur_bkt->sig_current[i] = alt_hash; - cur_bkt->sig_alt[i] = sig; + cur_bkt->sig_current[i] = short_sig; cur_bkt->key_idx[i] = new_idx; __hash_rw_writer_unlock(h); return new_idx - 1; @@ -946,8 +950,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, bkt_id = (uint32_t)((uintptr_t) ext_bkt_id) - 1; /* Use the first location of the new bucket */ - (h->buckets_ext[bkt_id]).sig_current[0] = alt_hash; - (h->buckets_ext[bkt_id]).sig_alt[0] = sig; + (h->buckets_ext[bkt_id]).sig_current[0] = short_sig; (h->buckets_ext[bkt_id]).key_idx[0] = new_idx; /* Link the new bucket to sec bucket linked list */ last = rte_hash_get_last_bkt(sec_bkt); @@ -1006,7 +1009,7 @@ rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data) /* Search one bucket to find the match key */ static inline int32_t -search_one_bucket(const struct rte_hash *h, const void *key, hash_sig_t sig, +search_one_bucket(const struct rte_hash *h, const void *key, uint16_t sig, void **data, const struct rte_hash_bucket *bkt) { int i; @@ -1035,31 +1038,29 @@ static inline int32_t __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key, hash_sig_t sig, void **data) { - uint32_t bucket_idx; - hash_sig_t alt_hash; + uint32_t prim_bucket_idx, sec_bucket_idx; struct rte_hash_bucket *bkt, *cur_bkt; int ret; + uint16_t short_sig; - bucket_idx = sig & h->bucket_bitmask; - bkt = &h->buckets[bucket_idx]; + get_buckets_index(h, sig, &prim_bucket_idx, &sec_bucket_idx, &short_sig); + bkt = &h->buckets[prim_bucket_idx]; __hash_rw_reader_lock(h); /* Check if key is in primary location */ - ret = search_one_bucket(h, key, sig, data, bkt); + ret = search_one_bucket(h, key, short_sig, data, bkt); if (ret != -1) { __hash_rw_reader_unlock(h); return ret; } /* Calculate secondary hash */ - alt_hash = rte_hash_secondary_hash(sig); - bucket_idx = alt_hash & h->bucket_bitmask; - bkt = &h->buckets[bucket_idx]; + bkt = &h->buckets[sec_bucket_idx]; /* Check if key is in secondary location */ FOR_EACH_BUCKET(cur_bkt, bkt) { - ret = search_one_bucket(h, key, alt_hash, data, cur_bkt); + ret = search_one_bucket(h, key, short_sig, data, cur_bkt); if (ret != -1) { __hash_rw_reader_unlock(h); return ret; @@ -1106,7 +1107,6 @@ remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i) struct lcore_cache *cached_free_slots; bkt->sig_current[i] = NULL_SIGNATURE; - bkt->sig_alt[i] = NULL_SIGNATURE; if (h->multi_writer_support) { lcore_id = rte_lcore_id(); cached_free_slots = &h->local_free_slots[lcore_id]; @@ -1131,7 +1131,7 @@ remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i) /* Search one bucket and remove the matched key */ static inline int32_t search_and_remove(const struct rte_hash *h, const void *key, - struct rte_hash_bucket *bkt, hash_sig_t sig) + struct rte_hash_bucket *bkt, uint16_t sig) { struct rte_hash_key *k, *keys = h->key_store; unsigned int i; @@ -1163,31 +1163,29 @@ static inline int32_t __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t sig) { - uint32_t bucket_idx; - hash_sig_t alt_hash; + uint32_t prim_bucket_idx, sec_bucket_idx; struct rte_hash_bucket *prim_bkt, *sec_bkt; struct rte_hash_bucket *cur_bkt, *prev_bkt, *next_bkt; int32_t ret, i; struct rte_hash_bucket *tobe_removed_bkt = NULL; + uint16_t short_sig; - bucket_idx = sig & h->bucket_bitmask; - prim_bkt = &h->buckets[bucket_idx]; + get_buckets_index(h, sig, &prim_bucket_idx, &sec_bucket_idx, &short_sig); + prim_bkt = &h->buckets[prim_bucket_idx]; __hash_rw_writer_lock(h); /* look for key in primary bucket */ - ret = search_and_remove(h, key, prim_bkt, sig); + ret = search_and_remove(h, key, prim_bkt, short_sig); if (ret != -1) { __hash_rw_writer_unlock(h); return ret; } /* Calculate secondary hash */ - alt_hash = rte_hash_secondary_hash(sig); - bucket_idx = alt_hash & h->bucket_bitmask; - sec_bkt = &h->buckets[bucket_idx]; + sec_bkt = &h->buckets[sec_bucket_idx]; /* look for key in secondary bucket */ - ret = search_and_remove(h, key, sec_bkt, alt_hash); + ret = search_and_remove(h, key, sec_bkt, short_sig); if (ret != -1) { __hash_rw_writer_unlock(h); return ret; @@ -1197,7 +1195,7 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, if (h->ext_table_support) { next_bkt = sec_bkt->next; FOR_EACH_BUCKET(cur_bkt, next_bkt) { - ret = search_and_remove(h, key, cur_bkt, alt_hash); + ret = search_and_remove(h, key, cur_bkt, short_sig); if (ret != -1) goto return_bkt; } @@ -1272,52 +1270,32 @@ static inline void compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches, const struct rte_hash_bucket *prim_bkt, const struct rte_hash_bucket *sec_bkt, - hash_sig_t prim_hash, hash_sig_t sec_hash, + uint16_t sig, enum rte_hash_sig_compare_function sig_cmp_fn) { unsigned int i; switch (sig_cmp_fn) { -#ifdef RTE_MACHINE_CPUFLAG_AVX2 - case RTE_HASH_COMPARE_AVX2: - *prim_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32( - _mm256_load_si256( - (__m256i const *)prim_bkt->sig_current), - _mm256_set1_epi32(prim_hash))); - *sec_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32( - _mm256_load_si256( - (__m256i const *)sec_bkt->sig_current), - _mm256_set1_epi32(sec_hash))); - break; -#endif #ifdef RTE_MACHINE_CPUFLAG_SSE2 case RTE_HASH_COMPARE_SSE: - /* Compare the first 4 signatures in the bucket */ - *prim_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16( + /* Compare all signatures in the bucket */ + *prim_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16( _mm_load_si128( (__m128i const *)prim_bkt->sig_current), - _mm_set1_epi32(prim_hash))); - *prim_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16( - _mm_load_si128( - (__m128i const *)&prim_bkt->sig_current[4]), - _mm_set1_epi32(prim_hash)))) << 4; - /* Compare the first 4 signatures in the bucket */ - *sec_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16( + _mm_set1_epi16(sig))); + /* Compare all signatures in the bucket */ + *sec_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16( _mm_load_si128( (__m128i const *)sec_bkt->sig_current), - _mm_set1_epi32(sec_hash))); - *sec_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16( - _mm_load_si128( - (__m128i const *)&sec_bkt->sig_current[4]), - _mm_set1_epi32(sec_hash)))) << 4; + _mm_set1_epi16(sig))); break; #endif default: for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { *prim_hash_matches |= - ((prim_hash == prim_bkt->sig_current[i]) << i); + ((sig == prim_bkt->sig_current[i]) << (i << 1)); *sec_hash_matches |= - ((sec_hash == sec_bkt->sig_current[i]) << i); + ((sig == sec_bkt->sig_current[i]) << (i << 1)); } } @@ -1333,7 +1311,9 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, int32_t i; int32_t ret; uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX]; - uint32_t sec_hash[RTE_HASH_LOOKUP_BULK_MAX]; + uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX]; + uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX]; + uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX]; const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX]; const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX]; uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0}; @@ -1351,10 +1331,11 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, rte_prefetch0(keys[i + PREFETCH_OFFSET]); prim_hash[i] = rte_hash_hash(h, keys[i]); - sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]); + get_buckets_index(h, prim_hash[i], + &prim_index[i], &sec_index[i], &sig[i]); - primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask]; - secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask]; + primary_bkt[i] = &h->buckets[prim_index[i]]; + secondary_bkt[i] = &h->buckets[sec_index[i]]; rte_prefetch0(primary_bkt[i]); rte_prefetch0(secondary_bkt[i]); @@ -1363,10 +1344,12 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, /* Calculate and prefetch rest of the buckets */ for (; i < num_keys; i++) { prim_hash[i] = rte_hash_hash(h, keys[i]); - sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]); - primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask]; - secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask]; + get_buckets_index(h, prim_hash[i], + &prim_index[i], &sec_index[i], &sig[i]); + + primary_bkt[i] = &h->buckets[prim_index[i]]; + secondary_bkt[i] = &h->buckets[sec_index[i]]; rte_prefetch0(primary_bkt[i]); rte_prefetch0(secondary_bkt[i]); @@ -1377,10 +1360,11 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, for (i = 0; i < num_keys; i++) { compare_signatures(&prim_hitmask[i], &sec_hitmask[i], primary_bkt[i], secondary_bkt[i], - prim_hash[i], sec_hash[i], h->sig_cmp_fn); + sig[i], h->sig_cmp_fn); if (prim_hitmask[i]) { - uint32_t first_hit = __builtin_ctzl(prim_hitmask[i]); + uint32_t first_hit = + __builtin_ctzl(prim_hitmask[i]) >> 1; uint32_t key_idx = primary_bkt[i]->key_idx[first_hit]; const struct rte_hash_key *key_slot = (const struct rte_hash_key *)( @@ -1391,7 +1375,8 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, } if (sec_hitmask[i]) { - uint32_t first_hit = __builtin_ctzl(sec_hitmask[i]); + uint32_t first_hit = + __builtin_ctzl(sec_hitmask[i]) >> 1; uint32_t key_idx = secondary_bkt[i]->key_idx[first_hit]; const struct rte_hash_key *key_slot = (const struct rte_hash_key *)( @@ -1405,7 +1390,8 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, for (i = 0; i < num_keys; i++) { positions[i] = -ENOENT; while (prim_hitmask[i]) { - uint32_t hit_index = __builtin_ctzl(prim_hitmask[i]); + uint32_t hit_index = + __builtin_ctzl(prim_hitmask[i]) >> 1; uint32_t key_idx = primary_bkt[i]->key_idx[hit_index]; const struct rte_hash_key *key_slot = @@ -1424,11 +1410,12 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, positions[i] = key_idx - 1; goto next_key; } - prim_hitmask[i] &= ~(1 << (hit_index)); + prim_hitmask[i] &= ~(3ULL << (hit_index << 1)); } while (sec_hitmask[i]) { - uint32_t hit_index = __builtin_ctzl(sec_hitmask[i]); + uint32_t hit_index = + __builtin_ctzl(sec_hitmask[i]) >> 1; uint32_t key_idx = secondary_bkt[i]->key_idx[hit_index]; const struct rte_hash_key *key_slot = @@ -1448,7 +1435,7 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, positions[i] = key_idx - 1; goto next_key; } - sec_hitmask[i] &= ~(1 << (hit_index)); + sec_hitmask[i] &= ~(3ULL << (hit_index << 1)); } next_key: @@ -1472,10 +1459,10 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, FOR_EACH_BUCKET(cur_bkt, next_bkt) { if (data != NULL) ret = search_one_bucket(h, keys[i], - sec_hash[i], &data[i], cur_bkt); + sig[i], &data[i], cur_bkt); else ret = search_one_bucket(h, keys[i], - sec_hash[i], NULL, cur_bkt); + sig[i], NULL, cur_bkt); if (ret != -1) { positions[i] = ret; hits |= 1ULL << i; diff --git a/lib/librte_hash/rte_cuckoo_hash.h b/lib/librte_hash/rte_cuckoo_hash.h index f190b04..775b93f 100644 --- a/lib/librte_hash/rte_cuckoo_hash.h +++ b/lib/librte_hash/rte_cuckoo_hash.h @@ -131,18 +131,15 @@ struct rte_hash_key { enum rte_hash_sig_compare_function { RTE_HASH_COMPARE_SCALAR = 0, RTE_HASH_COMPARE_SSE, - RTE_HASH_COMPARE_AVX2, RTE_HASH_COMPARE_NUM }; /** Bucket structure */ struct rte_hash_bucket { - hash_sig_t sig_current[RTE_HASH_BUCKET_ENTRIES]; + uint16_t sig_current[RTE_HASH_BUCKET_ENTRIES]; uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES]; - hash_sig_t sig_alt[RTE_HASH_BUCKET_ENTRIES]; - uint8_t flag[RTE_HASH_BUCKET_ENTRIES]; void *next; @@ -195,6 +192,7 @@ struct rte_hash { struct queue_node { struct rte_hash_bucket *bkt; /* Current bucket on the bfs search */ + uint32_t cur_bkt_idx; struct queue_node *prev; /* Parent(bucket) in search path */ int prev_slot; /* Parent(slot) in search path */