[v2] hash table: add an iterator over conflicting entries

Message ID B12F9F5F-2F3B-4E26-B3B6-0680075C2938@bu.edu (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series [v2] hash table: add an iterator over conflicting entries |

Checks

Context Check Description
ci/Intel-compilation fail apply issues

Commit Message

Fu, Qiaobin Aug. 16, 2018, 7:30 a.m. UTC
  Function rte_hash_iterate_conflict_entries() iterates over
the entries that conflict with an incoming entry.

Iterating over conflicting entries enables one to decide
if the incoming entry is more valuable than the entries already
in the hash table. This is particularly useful after
an insertion failure.

v2:
* Fix the style issue

* Make the API more universal

Signed-off-by: Qiaobin Fu <qiaobinf@bu.edu>
Reviewed-by: Cody Doucette <doucette@bu.edu>
Reviewed-by: Michel Machado <michel@digirati.com.br>
Reviewed-by: Keith Wiles <keith.wiles@intel.com>
Reviewed-by: Yipeng Wang <yipeng1.wang@intel.com>
---
 lib/librte_hash/rte_cuckoo_hash.c    | 81 ++++++++++++++++++++++++++++
 lib/librte_hash/rte_hash.h           | 41 ++++++++++++++
 lib/librte_hash/rte_hash_version.map |  7 +++
 3 files changed, 129 insertions(+)
  

Comments

Honnappa Nagarahalli Aug. 17, 2018, 2:33 a.m. UTC | #1
Hi Fu,
	Thank you for the patch. I have few comments below.

Thank you,
Honnappa

-----Original Message-----
From: dev <dev-bounces@dpdk.org> On Behalf Of Fu, Qiaobin
Sent: Thursday, August 16, 2018 2:30 AM
To: Richardson, Bruce <bruce.richardson@intel.com>; De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
Cc: dev@dpdk.org; Michel Machado <michel@digirati.com.br>; Doucette, Cody, Joseph <doucette@bu.edu>; Wang, Yipeng1 <yipeng1.wang@intel.com>; Wiles, Keith <keith.wiles@intel.com>; Gobriel, Sameh <sameh.gobriel@intel.com>; Tai, Charlie <charlie.tai@intel.com>; Stephen Hemminger <stephen@networkplumber.org>; Fu, Qiaobin <qiaobinf@bu.edu>
Subject: [dpdk-dev] [PATCH v2] hash table: add an iterator over conflicting entries

Function rte_hash_iterate_conflict_entries() iterates over the entries that conflict with an incoming entry.

Iterating over conflicting entries enables one to decide if the incoming entry is more valuable than the entries already in the hash table. This is particularly useful after an insertion failure.

v2:
* Fix the style issue

* Make the API more universal

Signed-off-by: Qiaobin Fu <qiaobinf@bu.edu>
Reviewed-by: Cody Doucette <doucette@bu.edu>
Reviewed-by: Michel Machado <michel@digirati.com.br>
Reviewed-by: Keith Wiles <keith.wiles@intel.com>
Reviewed-by: Yipeng Wang <yipeng1.wang@intel.com>
---
 lib/librte_hash/rte_cuckoo_hash.c    | 81 ++++++++++++++++++++++++++++
 lib/librte_hash/rte_hash.h           | 41 ++++++++++++++
 lib/librte_hash/rte_hash_version.map |  7 +++
 3 files changed, 129 insertions(+)

diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
index a07543a29..de69f9966 100644
--- a/lib/librte_hash/rte_cuckoo_hash.c
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -42,6 +42,13 @@ static struct rte_tailq_elem rte_hash_tailq = {  };
 EAL_REGISTER_TAILQ(rte_hash_tailq)
 
+struct rte_hash_iterator_conflict_entries_state {
+	const struct rte_hash *h;
+	uint32_t              vnext;
+	uint32_t              primary_bidx;
+	uint32_t              secondary_bidx;
+};
+
 struct rte_hash *
 rte_hash_find_existing(const char *name)  { @@ -1160,3 +1167,77 @@ rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32
 
 	return position - 1;
 }
+
+/* Get the primary bucket index given the precomputed hash value. */ 
+static inline uint32_t rte_hash_get_primary_bucket(const struct 
+rte_hash *h, hash_sig_t sig) {
+	return sig & h->bucket_bitmask;
+}
+
+/* Get the secondary bucket index given the precomputed hash value. */ 
+static inline uint32_t rte_hash_get_secondary_bucket(const struct 
+rte_hash *h, hash_sig_t sig) {
+	return rte_hash_secondary_hash(sig) & h->bucket_bitmask; }
+
IMO, to keep the code consistent, we do not need to have the above 2 functions.

+int32_t __rte_experimental
+rte_hash_iterator_conflict_entries_init(const struct rte_hash *h,
+	hash_sig_t sig, struct rte_conflict_iterator_state *state) {
+	struct rte_hash_iterator_conflict_entries_state *__state;
+
+	RETURN_IF_TRUE(((h == NULL) || (state == NULL)), -EINVAL);
+
+	__state = (struct rte_hash_iterator_conflict_entries_state *)state;
+	__state->h = h;
+	__state->vnext = 0;
+	__state->primary_bidx = rte_hash_get_primary_bucket(h, sig);
+	__state->secondary_bidx = rte_hash_get_secondary_bucket(h, sig);
+
+	return 0;
+}
+
+int32_t __rte_experimental
+rte_hash_iterate_conflict_entries(struct rte_conflict_iterator_state *state,
+	const void **key, const void **data)
+{
+	struct rte_hash_iterator_conflict_entries_state *__state;
+
+	RETURN_IF_TRUE(((state == NULL) || (key == NULL) ||
+		(data == NULL)), -EINVAL);
+
+	__state = (struct rte_hash_iterator_conflict_entries_state *)state;
+
+	while (__state->vnext < RTE_HASH_BUCKET_ENTRIES * 2) {
+		uint32_t bidx = (__state->vnext < RTE_HASH_BUCKET_ENTRIES) ?
+			__state->primary_bidx : __state->secondary_bidx;
+		uint32_t next = __state->vnext & (RTE_HASH_BUCKET_ENTRIES - 1);
+		uint32_t position = __state->h->buckets[bidx].key_idx[next];
+		struct rte_hash_key *next_key;
+		/*
+		 * The test below is unlikely because this iterator is meant
+		 * to be used after a failed insert.
+		 * */
+		if (unlikely(position == EMPTY_SLOT))
+			goto next;
+
+		/* Get the entry in key table. */
+		next_key = (struct rte_hash_key *) (
+			(char *)__state->h->key_store +
+			position * __state->h->key_entry_size);
+		/* Return key and data. */
+		*key = next_key->key;
+		*data = next_key->pdata;
+
+next:
+		/* Increment iterator. */
+		__state->vnext++;
+
+		if (likely(position != EMPTY_SLOT))
+			return position - 1;
+	}
+
+	return -ENOENT;
+}


I think, we can make this API similar to 'rte_hash_iterate'. I suggest the following API signature:

int32_t
rte_hash_iterate_conflict_entries (const struct rte_hash *h, const void **key, void **data, hash_sig_t sig, uint32_t *next)

primary and secondary bucket indices can be calculated from 'sig', 'next' is the iterator for the entries in the bucket (or conflicted entries). 'next' can go across the primary and secondary buckets. This will avoid creating 'rte_hash_iterator_conflict_entries_init' API.

I also suggest to change the API name to ' rte_hash_iterate_bucket_entries' - 'bucket' is a well understood term in the context of hash algorithms.

Do we also need to have 'rte_hash_iterate_conflict_entries_with_hash' API?

diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h index f71ca9fbf..7ecb6a7eb 100644
--- a/lib/librte_hash/rte_hash.h
+++ b/lib/librte_hash/rte_hash.h
@@ -61,6 +61,11 @@ struct rte_hash_parameters {
 /** @internal A hash table structure. */  struct rte_hash;
 
+/** @internal A hash table conflict iterator state structure. */ struct 
+rte_conflict_iterator_state {
+	uint8_t space[64];
+};
+

The size depends on the current size of the state, which is subject to change with the algorithm used.

 /**
  * Create a new hash table.
  *
@@ -419,6 +424,42 @@ rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
  */
 int32_t
 rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32_t *next);
+
+/**
+ * Initialize the iterator over entries that conflict with a new entry.
+ *
+ * @param h
+ *   Hash table to iterate
+ * @param sig
+ *   Precomputed hash value for the new entry.
+ * @return
+ *   - 0 if successful.
+ *   - -EINVAL if the parameters are invalid.
+ */
+int32_t __rte_experimental
+rte_hash_iterator_conflict_entries_init(const struct rte_hash *h,
+	hash_sig_t sig, struct rte_conflict_iterator_state *state);
+
+/**
+ * Iterate over entries that conflict with a new entry.
+ *
+ * @param state
+ *   Pointer to the iterator state.
+ * @param key
+ *   Output containing the key where current iterator
+ *   was pointing at.
+ * @param data
+ *   Output containing the data associated with key.
+ *   Returns NULL if data was not stored.
+ * @return
+ *   Position where key was stored, if successful.
+ *   - -EINVAL if the parameters are invalid.
+ *   - -ENOENT if there is no more conflicting entries.
+ */
+int32_t __rte_experimental
+rte_hash_iterate_conflict_entries(struct rte_conflict_iterator_state *state,
+	const void **key, const void **data);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_hash/rte_hash_version.map b/lib/librte_hash/rte_hash_version.map
index 52a2576f9..c1c343e52 100644
--- a/lib/librte_hash/rte_hash_version.map
+++ b/lib/librte_hash/rte_hash_version.map
@@ -45,3 +45,10 @@ DPDK_16.07 {
 	rte_hash_get_key_with_position;
 
 } DPDK_2.2;
+
+EXPERIMENTAL {
+	global:
+
+	rte_hash_iterator_conflict_entries_init;
+	rte_hash_iterate_conflict_entries;
+};
--
2.17.1
  
Michel Machado Aug. 17, 2018, 1:34 p.m. UTC | #2
On 08/16/2018 10:33 PM, Honnappa Nagarahalli wrote:
> +/* Get the primary bucket index given the precomputed hash value. */
> +static inline uint32_t rte_hash_get_primary_bucket(const struct
> +rte_hash *h, hash_sig_t sig) {
> +	return sig & h->bucket_bitmask;
> +}
> +
> +/* Get the secondary bucket index given the precomputed hash value. */
> +static inline uint32_t rte_hash_get_secondary_bucket(const struct
> +rte_hash *h, hash_sig_t sig) {
> +	return rte_hash_secondary_hash(sig) & h->bucket_bitmask; }
> +
> IMO, to keep the code consistent, we do not need to have the above 2 functions.

    Ok.

> +int32_t __rte_experimental
> +rte_hash_iterate_conflict_entries(struct rte_conflict_iterator_state *state,
> +	const void **key, const void **data)
> +{
> +	struct rte_hash_iterator_conflict_entries_state *__state;
> +
> +	RETURN_IF_TRUE(((state == NULL) || (key == NULL) ||
> +		(data == NULL)), -EINVAL);
> +
> +	__state = (struct rte_hash_iterator_conflict_entries_state *)state;
> +
> +	while (__state->vnext < RTE_HASH_BUCKET_ENTRIES * 2) {
> +		uint32_t bidx = (__state->vnext < RTE_HASH_BUCKET_ENTRIES) ?
> +			__state->primary_bidx : __state->secondary_bidx;
> +		uint32_t next = __state->vnext & (RTE_HASH_BUCKET_ENTRIES - 1);
> +		uint32_t position = __state->h->buckets[bidx].key_idx[next];
> +		struct rte_hash_key *next_key;
> +		/*
> +		 * The test below is unlikely because this iterator is meant
> +		 * to be used after a failed insert.
> +		 * */
> +		if (unlikely(position == EMPTY_SLOT))
> +			goto next;
> +
> +		/* Get the entry in key table. */
> +		next_key = (struct rte_hash_key *) (
> +			(char *)__state->h->key_store +
> +			position * __state->h->key_entry_size);
> +		/* Return key and data. */
> +		*key = next_key->key;
> +		*data = next_key->pdata;
> +
> +next:
> +		/* Increment iterator. */
> +		__state->vnext++;
> +
> +		if (likely(position != EMPTY_SLOT))
> +			return position - 1;
> +	}
> +
> +	return -ENOENT;
> +}
> 
> 
> I think, we can make this API similar to 'rte_hash_iterate'. I suggest the following API signature:
> 
> int32_t
> rte_hash_iterate_conflict_entries (const struct rte_hash *h, const void **key, void **data, hash_sig_t sig, uint32_t *next)

    The goal of our interface is to support changing the underlying hash 
table algorithm without requiring changes in applications. As Yipeng1 
Wang exemplified in the discussion of the first version of this patch, 
"in future, rte_hash may use three hash functions, or as I mentioned 
each bucket may have an additional linked list or even a second level 
hash table, or if the hopscotch hash replaces cuckoo hash as the new 
algorithm." These new algorithms may require more state than sig and 
next can efficiently provide in order to browse the conflicting entries.

> I also suggest to change the API name to ' rte_hash_iterate_bucket_entries' - 'bucket' is a well understood term in the context of hash algorithms.

    It's a matter of semantics here. rte_hash_iterate_conflict_entries() 
may cross more than one bucket. In fact, the first version of this patch 
tried to do exactly that, but it exposes the underlying algorithm. In 
addition, future algorithms may stretch what is being browsed even further.

> Do we also need to have 'rte_hash_iterate_conflict_entries_with_hash' API?

    I may have not understood the question. We are already working with 
the hash (i.e. sig). Did you mean something else?

> diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h index f71ca9fbf..7ecb6a7eb 100644
> --- a/lib/librte_hash/rte_hash.h
> +++ b/lib/librte_hash/rte_hash.h
> @@ -61,6 +61,11 @@ struct rte_hash_parameters {
>   /** @internal A hash table structure. */  struct rte_hash;
>   
> +/** @internal A hash table conflict iterator state structure. */ struct
> +rte_conflict_iterator_state {
> +	uint8_t space[64];
> +};
> +
> 
> The size depends on the current size of the state, which is subject to change with the algorithm used.

    We chose a size that should be robust for any future underlying 
algorithm. Do you have a suggestion on how to go about it? We chose to 
have a simple struct to enable applications to allocate a state as a 
local variable and avoid a memory allocation.

[ ]'s
Michel Machado
  
Honnappa Nagarahalli Aug. 17, 2018, 7:41 p.m. UTC | #3
-----Original Message-----
From: Michel Machado <michel@digirati.com.br> 
Sent: Friday, August 17, 2018 8:35 AM
To: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Fu, Qiaobin <qiaobinf@bu.edu>; Richardson, Bruce <bruce.richardson@intel.com>; De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
Cc: dev@dpdk.org; Doucette, Cody, Joseph <doucette@bu.edu>; Wang, Yipeng1 <yipeng1.wang@intel.com>; Wiles, Keith <keith.wiles@intel.com>; Gobriel, Sameh <sameh.gobriel@intel.com>; Tai, Charlie <charlie.tai@intel.com>; Stephen Hemminger <stephen@networkplumber.org>; nd <nd@arm.com>
Subject: Re: [dpdk-dev] [PATCH v2] hash table: add an iterator over conflicting entries

On 08/16/2018 10:33 PM, Honnappa Nagarahalli wrote:
> +/* Get the primary bucket index given the precomputed hash value. */ 
> +static inline uint32_t rte_hash_get_primary_bucket(const struct 
> +rte_hash *h, hash_sig_t sig) {
> +	return sig & h->bucket_bitmask;
> +}
> +
> +/* Get the secondary bucket index given the precomputed hash value. 
> +*/ static inline uint32_t rte_hash_get_secondary_bucket(const struct 
> +rte_hash *h, hash_sig_t sig) {
> +	return rte_hash_secondary_hash(sig) & h->bucket_bitmask; }
> +
> IMO, to keep the code consistent, we do not need to have the above 2 functions.

    Ok.

> +int32_t __rte_experimental
> +rte_hash_iterate_conflict_entries(struct rte_conflict_iterator_state *state,
> +	const void **key, const void **data) {
> +	struct rte_hash_iterator_conflict_entries_state *__state;
> +
> +	RETURN_IF_TRUE(((state == NULL) || (key == NULL) ||
> +		(data == NULL)), -EINVAL);
> +
> +	__state = (struct rte_hash_iterator_conflict_entries_state *)state;
> +
> +	while (__state->vnext < RTE_HASH_BUCKET_ENTRIES * 2) {
> +		uint32_t bidx = (__state->vnext < RTE_HASH_BUCKET_ENTRIES) ?
> +			__state->primary_bidx : __state->secondary_bidx;
> +		uint32_t next = __state->vnext & (RTE_HASH_BUCKET_ENTRIES - 1);
> +		uint32_t position = __state->h->buckets[bidx].key_idx[next];
> +		struct rte_hash_key *next_key;
> +		/*
> +		 * The test below is unlikely because this iterator is meant
> +		 * to be used after a failed insert.
> +		 * */
> +		if (unlikely(position == EMPTY_SLOT))
> +			goto next;
> +
> +		/* Get the entry in key table. */
> +		next_key = (struct rte_hash_key *) (
> +			(char *)__state->h->key_store +
> +			position * __state->h->key_entry_size);
> +		/* Return key and data. */
> +		*key = next_key->key;
> +		*data = next_key->pdata;
> +
> +next:
> +		/* Increment iterator. */
> +		__state->vnext++;
> +
> +		if (likely(position != EMPTY_SLOT))
> +			return position - 1;
> +	}
> +
> +	return -ENOENT;
> +}
> 
> 
> I think, we can make this API similar to 'rte_hash_iterate'. I suggest the following API signature:
> 
> int32_t
> rte_hash_iterate_conflict_entries (const struct rte_hash *h, const 
> void **key, void **data, hash_sig_t sig, uint32_t *next)

    The goal of our interface is to support changing the underlying hash table algorithm without requiring changes in applications. As Yipeng1 Wang exemplified in the discussion of the first version of this patch, "in future, rte_hash may use three hash functions, or as I mentioned each bucket may have an additional linked list or even a second level hash table, or if the hopscotch hash replaces cuckoo hash as the new algorithm." These new algorithms may require more state than sig and next can efficiently provide in order to browse the conflicting entries.

Thank you for your explanation. I think, 64B for the size of the state is good. This should apply for 'rte_hash_iterate' API as well. It currently has 4B of state (if the 'sig' is kept out) and is dependent on current hash algorithm.

Can you elaborate more on using ' struct rte_conflict_iterator_state' as the argument for the API?

If the API signature is changed to: rte_hash_iterate_conflict_entries (const struct rte_hash *h, void **key, void **data, const hash_sig_t sig, struct rte_conflict_iterator_state *state) - it will be inline with the existing APIs. Contents of 'state' must be initialized to 0 for the first call. This will also avoid creating 'rte_hash_iterator_conflict_entries_init' API. 


> I also suggest to change the API name to ' rte_hash_iterate_bucket_entries' - 'bucket' is a well understood term in the context of hash algorithms.

    It's a matter of semantics here. rte_hash_iterate_conflict_entries()
may cross more than one bucket. In fact, the first version of this patch tried to do exactly that, but it exposes the underlying algorithm. In addition, future algorithms may stretch what is being browsed even further.

I agree it is a matter of semantics. From the user/application point of view, the algorithm implemented should not matter. 'conflict_entries' definitely conveys the meaning, I think this is nothing but 'entries in a bucket' in the context of hash. May be, Yipeng can reconsider his comment?

> Do we also need to have 'rte_hash_iterate_conflict_entries_with_hash' API?

    I may have not understood the question. We are already working with the hash (i.e. sig). Did you mean something else?

Let me elaborate. For the API 'rte_hash_lookup', there are multiple variations such as 'rte_hash_lookup_with_hash', 'rte_hash_lookup_data', 'rte_hash_lookup_with_hash_data' etc. We do not need to create similar variations for 'rte_hash_iterate_conflict_entries' API right now. But the naming of the API should be such that these variations can be created in the future.

> diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h 
> index f71ca9fbf..7ecb6a7eb 100644
> --- a/lib/librte_hash/rte_hash.h
> +++ b/lib/librte_hash/rte_hash.h
> @@ -61,6 +61,11 @@ struct rte_hash_parameters {
>   /** @internal A hash table structure. */  struct rte_hash;
>   
> +/** @internal A hash table conflict iterator state structure. */ 
> +struct rte_conflict_iterator_state {
> +	uint8_t space[64];
> +};
> +
Needs aligning to cache line.

> 
> The size depends on the current size of the state, which is subject to change with the algorithm used.

    We chose a size that should be robust for any future underlying algorithm. Do you have a suggestion on how to go about it? We chose to have a simple struct to enable applications to allocate a state as a local variable and avoid a memory allocation.

This looks fine after your explanation. The structure name can be changed to 'rte_iterator_state' so that it can be used in other iterator APIs too.

[ ]'s
Michel Machado
  
Michel Machado Aug. 18, 2018, 10:45 p.m. UTC | #4
On 08/17/2018 03:41 PM, Honnappa Nagarahalli wrote:
>> Do we also need to have 'rte_hash_iterate_conflict_entries_with_hash' API?
> 
>      I may have not understood the question. We are already working with the hash (i.e. sig). Did you mean something else?
> 
> Let me elaborate. For the API 'rte_hash_lookup', there are multiple variations such as 'rte_hash_lookup_with_hash', 'rte_hash_lookup_data', 'rte_hash_lookup_with_hash_data' etc. We do not need to create similar variations for 'rte_hash_iterate_conflict_entries' API right now. But the naming of the API should be such that these variations can be created in the future.

    So you mean that we should actually name 
rte_hash_iterator_conflict_entries_init() as 
rte_hash_iterator_conflict_entries_init_with_hash()? I'd be fine with this.

>> diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h
>> index f71ca9fbf..7ecb6a7eb 100644
>> --- a/lib/librte_hash/rte_hash.h
>> +++ b/lib/librte_hash/rte_hash.h
>> @@ -61,6 +61,11 @@ struct rte_hash_parameters {
>>    /** @internal A hash table structure. */  struct rte_hash;
>>    
>> +/** @internal A hash table conflict iterator state structure. */
>> +struct rte_conflict_iterator_state {
>> +	uint8_t space[64];
>> +};
>> +
> Needs aligning to cache line.

    Ok.

>> The size depends on the current size of the state, which is subject to change with the algorithm used.
> 
>      We chose a size that should be robust for any future underlying algorithm. Do you have a suggestion on how to go about it? We chose to have a simple struct to enable applications to allocate a state as a local variable and avoid a memory allocation.
> 
> This looks fine after your explanation. The structure name can be changed to 'rte_iterator_state' so that it can be used in other iterator APIs too.

    I like this suggestion. What about the name 
"rte_hash_iterator_state" to make it specific to the hash table?

[ ]'s
Michel Machado
  
Michel Machado Aug. 18, 2018, 11:08 p.m. UTC | #5
On 08/17/2018 03:41 PM, Honnappa Nagarahalli wrote:
> Can you elaborate more on using ' struct rte_conflict_iterator_state' as the argument for the API?
> 
> If the API signature is changed to: rte_hash_iterate_conflict_entries (const struct rte_hash *h, void **key, void **data, const hash_sig_t sig, struct rte_conflict_iterator_state *state) - it will be inline with the existing APIs. Contents of 'state' must be initialized to 0 for the first call. This will also avoid creating 'rte_hash_iterator_conflict_entries_init' API.

    Testing `state' every time rte_hash_iterate_conflict_entries() is 
called to find out if it's the first call of the iterator will possibly 
add some small, but unnecessary, overhead on 
rte_hash_iterate_conflict_entries() and constraints on struct 
rte_conflict_iterator_state. Moreover, 
rte_hash_iterator_conflict_entries_init() enables one to easily add 
variations of the init function to initialize the state (e.g. using a 
key instead of a sig) and still use the exactly same iterator.

[ ]'s
Michel Machado
  
Honnappa Nagarahalli Aug. 21, 2018, 5:10 a.m. UTC | #6
-----Original Message-----
From: Michel Machado <michel@digirati.com.br> 
Sent: Saturday, August 18, 2018 6:08 PM
To: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Fu, Qiaobin <qiaobinf@bu.edu>; Richardson, Bruce <bruce.richardson@intel.com>; De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
Cc: dev@dpdk.org; Doucette, Cody, Joseph <doucette@bu.edu>; Wang, Yipeng1 <yipeng1.wang@intel.com>; Wiles, Keith <keith.wiles@intel.com>; Gobriel, Sameh <sameh.gobriel@intel.com>; Tai, Charlie <charlie.tai@intel.com>; Stephen Hemminger <stephen@networkplumber.org>; nd <nd@arm.com>
Subject: Re: [dpdk-dev] [PATCH v2] hash table: add an iterator over conflicting entries

On 08/17/2018 03:41 PM, Honnappa Nagarahalli wrote:
> Can you elaborate more on using ' struct rte_conflict_iterator_state' as the argument for the API?
> 
> If the API signature is changed to: rte_hash_iterate_conflict_entries (const struct rte_hash *h, void **key, void **data, const hash_sig_t sig, struct rte_conflict_iterator_state *state) - it will be inline with the existing APIs. Contents of 'state' must be initialized to 0 for the first call. This will also avoid creating 'rte_hash_iterator_conflict_entries_init' API.

    Testing `state' every time rte_hash_iterate_conflict_entries() is called to find out if it's the first call of the iterator will possibly add some small, but unnecessary, overhead on
rte_hash_iterate_conflict_entries() and constraints on struct rte_conflict_iterator_state. Moreover,
rte_hash_iterator_conflict_entries_init() enables one to easily add variations of the init function to initialize the state (e.g. using a key instead of a sig) and still use the exactly same iterator.

IMO, I think, this over-head will be trivial. Looking at the function 'rte_hash_iterate_conflict_entries' the check for '(__state->vnext < RTE_HASH_BUCKET_ENTRIES * 2)' already exists. If the primary/secondary bucket indices are calculated as well in 'rte_hash_iterate_conflict_entries' API ('rte_hash_iterate' API does such calculations), storing them in the state can be avoided. I am wondering if it makes sense to benchmark with these changes and then take a decision?
 
[ ]'s
Michel Machado
  
Michel Machado Aug. 21, 2018, 12:41 p.m. UTC | #7
On 08/21/2018 01:10 AM, Honnappa Nagarahalli wrote:
> On 08/17/2018 03:41 PM, Honnappa Nagarahalli wrote:
>> Can you elaborate more on using ' struct rte_conflict_iterator_state' as the argument for the API?
>>
>> If the API signature is changed to: rte_hash_iterate_conflict_entries (const struct rte_hash *h, void **key, void **data, const hash_sig_t sig, struct rte_conflict_iterator_state *state) - it will be inline with the existing APIs. Contents of 'state' must be initialized to 0 for the first call. This will also avoid creating 'rte_hash_iterator_conflict_entries_init' API.
> 
>      Testing `state' every time rte_hash_iterate_conflict_entries() is called to find out if it's the first call of the iterator will possibly add some small, but unnecessary, overhead on
> rte_hash_iterate_conflict_entries() and constraints on struct rte_conflict_iterator_state. Moreover,
> rte_hash_iterator_conflict_entries_init() enables one to easily add variations of the init function to initialize the state (e.g. using a key instead of a sig) and still use the exactly same iterator.
> 
> IMO, I think, this over-head will be trivial. Looking at the function 'rte_hash_iterate_conflict_entries' the check for '(__state->vnext < RTE_HASH_BUCKET_ENTRIES * 2)' already exists. If the primary/secondary bucket indices are calculated as well in 'rte_hash_iterate_conflict_entries' API ('rte_hash_iterate' API does such calculations), storing them in the state can be avoided. I am wondering if it makes sense to benchmark with these changes and then take a decision?

    We have come up with the init function and struct 
rte_conflict_iterator_state in v2 to make the new iterator as future 
proof to a change of the underlying algorithm as possible. But going 
through your feedback, it seems to me that your top concern is to not 
deviate much of the current interface of rte_hash_iterate(). We are fine 
with pushing v3 using the interface you've suggested to avoid the init 
function and struct rte_conflict_iterator_state:

int32_t
rte_hash_iterate_conflict_entries__with_hash(const struct rte_hash *h, 
const void **key, void **data, hash_sig_t sig, uint32_t *next);

[ ]'s
Michel Machado
  
Honnappa Nagarahalli Aug. 21, 2018, 11:42 p.m. UTC | #8
-----Original Message-----
From: Michel Machado <michel@digirati.com.br> 
Sent: Tuesday, August 21, 2018 7:42 AM
To: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Fu, Qiaobin <qiaobinf@bu.edu>; Richardson, Bruce <bruce.richardson@intel.com>; De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
Cc: dev@dpdk.org; Doucette, Cody, Joseph <doucette@bu.edu>; Wang, Yipeng1 <yipeng1.wang@intel.com>; Wiles, Keith <keith.wiles@intel.com>; Gobriel, Sameh <sameh.gobriel@intel.com>; Tai, Charlie <charlie.tai@intel.com>; Stephen Hemminger <stephen@networkplumber.org>; nd <nd@arm.com>
Subject: Re: [dpdk-dev] [PATCH v2] hash table: add an iterator over conflicting entries

On 08/21/2018 01:10 AM, Honnappa Nagarahalli wrote:
> On 08/17/2018 03:41 PM, Honnappa Nagarahalli wrote:
>> Can you elaborate more on using ' struct rte_conflict_iterator_state' as the argument for the API?
>>
>> If the API signature is changed to: rte_hash_iterate_conflict_entries (const struct rte_hash *h, void **key, void **data, const hash_sig_t sig, struct rte_conflict_iterator_state *state) - it will be inline with the existing APIs. Contents of 'state' must be initialized to 0 for the first call. This will also avoid creating 'rte_hash_iterator_conflict_entries_init' API.
> 
>      Testing `state' every time rte_hash_iterate_conflict_entries() is 
> called to find out if it's the first call of the iterator will 
> possibly add some small, but unnecessary, overhead on
> rte_hash_iterate_conflict_entries() and constraints on struct 
> rte_conflict_iterator_state. Moreover,
> rte_hash_iterator_conflict_entries_init() enables one to easily add variations of the init function to initialize the state (e.g. using a key instead of a sig) and still use the exactly same iterator.
> 
> IMO, I think, this over-head will be trivial. Looking at the function 'rte_hash_iterate_conflict_entries' the check for '(__state->vnext < RTE_HASH_BUCKET_ENTRIES * 2)' already exists. If the primary/secondary bucket indices are calculated as well in 'rte_hash_iterate_conflict_entries' API ('rte_hash_iterate' API does such calculations), storing them in the state can be avoided. I am wondering if it makes sense to benchmark with these changes and then take a decision?

    We have come up with the init function and struct rte_conflict_iterator_state in v2 to make the new iterator as future proof to a change of the underlying algorithm as possible. But going through your feedback, it seems to me that your top concern is to not deviate much of the current interface of rte_hash_iterate(). We are fine with pushing v3 using the interface you've suggested to avoid the init function and struct rte_conflict_iterator_state:

int32_t
rte_hash_iterate_conflict_entries__with_hash(const struct rte_hash *h, const void **key, void **data, hash_sig_t sig, uint32_t *next);

Yes, this is my primary concern. Above signature will conform to what we have currently. If APIs have to change because of the change in underlying algorithm we can do it when it happens (with the better understanding of the situation at that time).

I have to add that I liked your idea of 64B state. But that also means it is not in line with rte_hash_iterate(). We should remember to do it in the future if we happen to change the API signature.

[ ]'s
Michel Machado
  
Wang, Yipeng1 Aug. 24, 2018, 12:33 a.m. UTC | #9
Thanks for the patch and sorry for the late review.
I believe this will be a good addition to the library and please
keep me cc'ed for V3 as well.

We have the linked list cuckoo hash on the roadmap for 18.11
http://mails.dpdk.org/archives/dev/2018-August/109986.html

I think with Honnappa suggested "uint32_t* next",
we may need a little bit tricks to make it work with the extra linked list.
The performance may not be optimal though comparing to your original approach.
Is this important to your use case?

I think we can work something out to satisfy everybody :)

Thanks
Yipeng

>-----Original Message-----
>From: Honnappa Nagarahalli [mailto:Honnappa.Nagarahalli@arm.com]
>Sent: Tuesday, August 21, 2018 4:43 PM
>To: Michel Machado <michel@digirati.com.br>; Fu, Qiaobin <qiaobinf@bu.edu>; Richardson, Bruce <bruce.richardson@intel.com>;
>De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
>Cc: dev@dpdk.org; Doucette, Cody, Joseph <doucette@bu.edu>; Wang, Yipeng1 <yipeng1.wang@intel.com>; Wiles, Keith
><keith.wiles@intel.com>; Gobriel, Sameh <sameh.gobriel@intel.com>; Tai, Charlie <charlie.tai@intel.com>; Stephen Hemminger
><stephen@networkplumber.org>; nd <nd@arm.com>
>Subject: RE: [dpdk-dev] [PATCH v2] hash table: add an iterator over conflicting entries
>
>
>
>-----Original Message-----
>From: Michel Machado <michel@digirati.com.br>
>Sent: Tuesday, August 21, 2018 7:42 AM
>To: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Fu, Qiaobin <qiaobinf@bu.edu>; Richardson, Bruce
><bruce.richardson@intel.com>; De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
>Cc: dev@dpdk.org; Doucette, Cody, Joseph <doucette@bu.edu>; Wang, Yipeng1 <yipeng1.wang@intel.com>; Wiles, Keith
><keith.wiles@intel.com>; Gobriel, Sameh <sameh.gobriel@intel.com>; Tai, Charlie <charlie.tai@intel.com>; Stephen Hemminger
><stephen@networkplumber.org>; nd <nd@arm.com>
>Subject: Re: [dpdk-dev] [PATCH v2] hash table: add an iterator over conflicting entries
>
>On 08/21/2018 01:10 AM, Honnappa Nagarahalli wrote:
>> On 08/17/2018 03:41 PM, Honnappa Nagarahalli wrote:
>>> Can you elaborate more on using ' struct rte_conflict_iterator_state' as the argument for the API?
>>>
>>> If the API signature is changed to: rte_hash_iterate_conflict_entries (const struct rte_hash *h, void **key, void **data, const
>hash_sig_t sig, struct rte_conflict_iterator_state *state) - it will be inline with the existing APIs. Contents of 'state' must be initialized
>to 0 for the first call. This will also avoid creating 'rte_hash_iterator_conflict_entries_init' API.
>>
>>      Testing `state' every time rte_hash_iterate_conflict_entries() is
>> called to find out if it's the first call of the iterator will
>> possibly add some small, but unnecessary, overhead on
>> rte_hash_iterate_conflict_entries() and constraints on struct
>> rte_conflict_iterator_state. Moreover,
>> rte_hash_iterator_conflict_entries_init() enables one to easily add variations of the init function to initialize the state (e.g. using a
>key instead of a sig) and still use the exactly same iterator.
>>
>> IMO, I think, this over-head will be trivial. Looking at the function 'rte_hash_iterate_conflict_entries' the check for '(__state->vnext
>< RTE_HASH_BUCKET_ENTRIES * 2)' already exists. If the primary/secondary bucket indices are calculated as well in
>'rte_hash_iterate_conflict_entries' API ('rte_hash_iterate' API does such calculations), storing them in the state can be avoided. I am
>wondering if it makes sense to benchmark with these changes and then take a decision?
>
>    We have come up with the init function and struct rte_conflict_iterator_state in v2 to make the new iterator as future proof to a
>change of the underlying algorithm as possible. But going through your feedback, it seems to me that your top concern is to not
>deviate much of the current interface of rte_hash_iterate(). We are fine with pushing v3 using the interface you've suggested to
>avoid the init function and struct rte_conflict_iterator_state:
>
>int32_t
>rte_hash_iterate_conflict_entries__with_hash(const struct rte_hash *h, const void **key, void **data, hash_sig_t sig, uint32_t
>*next);
>
>Yes, this is my primary concern. Above signature will conform to what we have currently. If APIs have to change because of the
>change in underlying algorithm we can do it when it happens (with the better understanding of the situation at that time).
>
>I have to add that I liked your idea of 64B state. But that also means it is not in line with rte_hash_iterate(). We should remember to
>do it in the future if we happen to change the API signature.
>
>[ ]'s
>Michel Machado
  
Michel Machado Aug. 24, 2018, 12:34 p.m. UTC | #10
On 08/23/2018 08:33 PM, Wang, Yipeng1 wrote:
> I think with Honnappa suggested "uint32_t* next",
> we may need a little bit tricks to make it work with the extra linked list.
> The performance may not be optimal though comparing to your original approach.
> Is this important to your use case?

    It is. We are developing a DDoS protection system, and have chosen 
DPDK because it was the fastest framework in the evaluations we 
considered. We need to find the conflicting entries when a critical flow 
table of our system is overloaded due to an ongoing attack, so the more 
efficient we can evaluate the merits of an incoming flow against the 
conflicting flows already in the table, the higher the chances we find 
the flows that should be in the flow table.

    We've compromised with Honnappa under the understanding that once 
the underlying algorithm changes, there would be a review of the 
interface since even rte_hash_iterate() may be affected. I still think 
that the v2 we proposed is the best approach here because it isolates 
the interface from the underlying algorithm.

[ ]'s
Michel Machado
  
Honnappa Nagarahalli Aug. 27, 2018, 3:12 a.m. UTC | #11
-----Original Message-----
From: Michel Machado <michel@digirati.com.br> 
Sent: Friday, August 24, 2018 7:34 AM
To: Wang, Yipeng1 <yipeng1.wang@intel.com>; Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Fu, Qiaobin <qiaobinf@bu.edu>
Cc: dev@dpdk.org; Doucette, Cody, Joseph <doucette@bu.edu>; Wiles, Keith <keith.wiles@intel.com>; Gobriel, Sameh <sameh.gobriel@intel.com>; Tai, Charlie <charlie.tai@intel.com>; Stephen Hemminger <stephen@networkplumber.org>; nd <nd@arm.com>; Richardson, Bruce <bruce.richardson@intel.com>; De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
Subject: Re: [dpdk-dev] [PATCH v2] hash table: add an iterator over conflicting entries


On 08/23/2018 08:33 PM, Wang, Yipeng1 wrote:
> I think with Honnappa suggested "uint32_t* next", we may need a little 
> bit tricks to make it work with the extra linked list.
> The performance may not be optimal though comparing to your original approach.
> Is this important to your use case?

    It is. We are developing a DDoS protection system, and have chosen DPDK because it was the fastest framework in the evaluations we considered. We need to find the conflicting entries when a critical flow table of our system is overloaded due to an ongoing attack, so the more efficient we can evaluate the merits of an incoming flow against the conflicting flows already in the table, the higher the chances we find the flows that should be in the flow table.

    We've compromised with Honnappa under the understanding that once the underlying algorithm changes, there would be a review of the interface since even rte_hash_iterate() may be affected. I still think that the v2 we proposed is the best approach here because it isolates the interface from the underlying algorithm.

My only concern was to do with keeping the interfaces across APIs consistent. I am fine with changing 'uint32_t *next' as long as we change 'rte_hash_iterate' API as well.

[ ]'s
Michel Machado
  
Michel Machado Aug. 27, 2018, 6:27 p.m. UTC | #12
On 08/26/2018 11:12 PM, Honnappa Nagarahalli wrote:
> On 08/23/2018 08:33 PM, Wang, Yipeng1 wrote:
>> I think with Honnappa suggested "uint32_t* next", we may need a little
>> bit tricks to make it work with the extra linked list.
>> The performance may not be optimal though comparing to your original approach.
>> Is this important to your use case?
> 
>      It is. We are developing a DDoS protection system, and have chosen DPDK because it was the fastest framework in the evaluations we considered. We need to find the conflicting entries when a critical flow table of our system is overloaded due to an ongoing attack, so the more efficient we can evaluate the merits of an incoming flow against the conflicting flows already in the table, the higher the chances we find the flows that should be in the flow table.
> 
>      We've compromised with Honnappa under the understanding that once the underlying algorithm changes, there would be a review of the interface since even rte_hash_iterate() may be affected. I still think that the v2 we proposed is the best approach here because it isolates the interface from the underlying algorithm.
> 
> My only concern was to do with keeping the interfaces across APIs consistent. I am fine with changing 'uint32_t *next' as long as we change 'rte_hash_iterate' API as well.

    We'll patch rte_hash_iterate() as well in v3.

[ ]'s
Michel Machado
  

Patch

diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
index a07543a29..de69f9966 100644
--- a/lib/librte_hash/rte_cuckoo_hash.c
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -42,6 +42,13 @@  static struct rte_tailq_elem rte_hash_tailq = {
 };
 EAL_REGISTER_TAILQ(rte_hash_tailq)
 
+struct rte_hash_iterator_conflict_entries_state {
+	const struct rte_hash *h;
+	uint32_t              vnext;
+	uint32_t              primary_bidx;
+	uint32_t              secondary_bidx;
+};
+
 struct rte_hash *
 rte_hash_find_existing(const char *name)
 {
@@ -1160,3 +1167,77 @@  rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32
 
 	return position - 1;
 }
+
+/* Get the primary bucket index given the precomputed hash value. */
+static inline uint32_t
+rte_hash_get_primary_bucket(const struct rte_hash *h, hash_sig_t sig)
+{
+	return sig & h->bucket_bitmask;
+}
+
+/* Get the secondary bucket index given the precomputed hash value. */
+static inline uint32_t
+rte_hash_get_secondary_bucket(const struct rte_hash *h, hash_sig_t sig)
+{
+	return rte_hash_secondary_hash(sig) & h->bucket_bitmask;
+}
+
+int32_t __rte_experimental
+rte_hash_iterator_conflict_entries_init(const struct rte_hash *h,
+	hash_sig_t sig, struct rte_conflict_iterator_state *state)
+{
+	struct rte_hash_iterator_conflict_entries_state *__state;
+
+	RETURN_IF_TRUE(((h == NULL) || (state == NULL)), -EINVAL);
+
+	__state = (struct rte_hash_iterator_conflict_entries_state *)state;
+	__state->h = h;
+	__state->vnext = 0;
+	__state->primary_bidx = rte_hash_get_primary_bucket(h, sig);
+	__state->secondary_bidx = rte_hash_get_secondary_bucket(h, sig);
+
+	return 0;
+}
+
+int32_t __rte_experimental
+rte_hash_iterate_conflict_entries(struct rte_conflict_iterator_state *state,
+	const void **key, const void **data)
+{
+	struct rte_hash_iterator_conflict_entries_state *__state;
+
+	RETURN_IF_TRUE(((state == NULL) || (key == NULL) ||
+		(data == NULL)), -EINVAL);
+
+	__state = (struct rte_hash_iterator_conflict_entries_state *)state;
+
+	while (__state->vnext < RTE_HASH_BUCKET_ENTRIES * 2) {
+		uint32_t bidx = (__state->vnext < RTE_HASH_BUCKET_ENTRIES) ?
+			__state->primary_bidx : __state->secondary_bidx;
+		uint32_t next = __state->vnext & (RTE_HASH_BUCKET_ENTRIES - 1);
+		uint32_t position = __state->h->buckets[bidx].key_idx[next];
+		struct rte_hash_key *next_key;
+		/*
+		 * The test below is unlikely because this iterator is meant
+		 * to be used after a failed insert.
+		 * */
+		if (unlikely(position == EMPTY_SLOT))
+			goto next;
+
+		/* Get the entry in key table. */
+		next_key = (struct rte_hash_key *) (
+			(char *)__state->h->key_store +
+			position * __state->h->key_entry_size);
+		/* Return key and data. */
+		*key = next_key->key;
+		*data = next_key->pdata;
+
+next:
+		/* Increment iterator. */
+		__state->vnext++;
+
+		if (likely(position != EMPTY_SLOT))
+			return position - 1;
+	}
+
+	return -ENOENT;
+}
diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h
index f71ca9fbf..7ecb6a7eb 100644
--- a/lib/librte_hash/rte_hash.h
+++ b/lib/librte_hash/rte_hash.h
@@ -61,6 +61,11 @@  struct rte_hash_parameters {
 /** @internal A hash table structure. */
 struct rte_hash;
 
+/** @internal A hash table conflict iterator state structure. */
+struct rte_conflict_iterator_state {
+	uint8_t space[64];
+};
+
 /**
  * Create a new hash table.
  *
@@ -419,6 +424,42 @@  rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
  */
 int32_t
 rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32_t *next);
+
+/**
+ * Initialize the iterator over entries that conflict with a new entry.
+ *
+ * @param h
+ *   Hash table to iterate
+ * @param sig
+ *   Precomputed hash value for the new entry.
+ * @return
+ *   - 0 if successful.
+ *   - -EINVAL if the parameters are invalid.
+ */
+int32_t __rte_experimental
+rte_hash_iterator_conflict_entries_init(const struct rte_hash *h,
+	hash_sig_t sig, struct rte_conflict_iterator_state *state);
+
+/**
+ * Iterate over entries that conflict with a new entry.
+ *
+ * @param state
+ *   Pointer to the iterator state.
+ * @param key
+ *   Output containing the key where current iterator
+ *   was pointing at.
+ * @param data
+ *   Output containing the data associated with key.
+ *   Returns NULL if data was not stored.
+ * @return
+ *   Position where key was stored, if successful.
+ *   - -EINVAL if the parameters are invalid.
+ *   - -ENOENT if there is no more conflicting entries.
+ */
+int32_t __rte_experimental
+rte_hash_iterate_conflict_entries(struct rte_conflict_iterator_state *state,
+	const void **key, const void **data);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_hash/rte_hash_version.map b/lib/librte_hash/rte_hash_version.map
index 52a2576f9..c1c343e52 100644
--- a/lib/librte_hash/rte_hash_version.map
+++ b/lib/librte_hash/rte_hash_version.map
@@ -45,3 +45,10 @@  DPDK_16.07 {
 	rte_hash_get_key_with_position;
 
 } DPDK_2.2;
+
+EXPERIMENTAL {
+	global:
+
+	rte_hash_iterator_conflict_entries_init;
+	rte_hash_iterate_conflict_entries;
+};