Message ID | CAHVfvh7ggGB_q1Rs1c3-9PRwDr_GKA+etaMXRSeKCfUKoUx8hQ@mail.gmail.com (mailing list archive) |
---|---|
State | Not Applicable, archived |
Headers | show |
On Fri, Nov 07, 2014 at 04:31:18PM +0200, jigsaw wrote: > Hi Bruce, > > Pls have a quick look at the diff to see if this is exactly what you mean > about the bitmask. > I just wrote it without even compiling, just to express the idea. So it may > leave some places unpatched. > If this is agreed, I will make a decent test to verify it before sending > the patch for RFC. > > diff --git a/lib/librte_distributor/rte_distributor.c > b/lib/librte_distributor/rte_di > index 585ff88..d606bcf 100644 > --- a/lib/librte_distributor/rte_distributor.c > +++ b/lib/librte_distributor/rte_distributor.c > @@ -92,6 +92,8 @@ struct rte_distributor { > unsigned num_workers; /**< Number of workers > polling */ > > uint32_t in_flight_tags[RTE_MAX_LCORE]; > + uint32_t in_flight_bitmask; > + > struct rte_distributor_backlog backlog[RTE_MAX_LCORE]; > > union rte_distributor_buffer bufs[RTE_MAX_LCORE]; > @@ -188,6 +190,7 @@ static inline void > handle_worker_shutdown(struct rte_distributor *d, unsigned wkr) > { > d->in_flight_tags[wkr] = 0; > + d->in_flight_mask &= ~(1 << wkr); > d->bufs[wkr].bufptr64 = 0; > if (unlikely(d->backlog[wkr].count != 0)) { > /* On return of a packet, we need to move the > @@ -241,6 +244,7 @@ process_returns(struct rte_distributor *d) > else { > d->bufs[wkr].bufptr64 = RTE_DISTRIB_GET_BUF; > d->in_flight_tags[wkr] = 0; > + d->in_flight_mask &= ~(1 << wkr); > } > oldbuf = data >> RTE_DISTRIB_FLAG_BITS; > } else if (data & RTE_DISTRIB_RETURN_BUF) { > @@ -282,12 +286,13 @@ rte_distributor_process(struct rte_distributor *d, > next_mb = mbufs[next_idx++]; > next_value = (((int64_t)(uintptr_t)next_mb) > << RTE_DISTRIB_FLAG_BITS); > - new_tag = (next_mb->hash.rss | 1); > + new_tag = next_mb->hash.rss; > > uint32_t match = 0; > unsigned i; > for (i = 0; i < d->num_workers; i++) > - match |= (!(d->in_flight_tags[i] ^ new_tag) > + match |= (((!(d->in_flight_tags[i] ^ > new_tag)) & > + (d->in_flight_bitmask >> i)) I would not do the bitmask comparison here, as that's extra instruction in the loop. Instead, because its a bitmask, build up the match variable as it was before, and then just do a single and operation afterwards, outside the loop body. /Bruce > << i); > > if (match) { > @@ -309,6 +314,7 @@ rte_distributor_process(struct rte_distributor *d, > else { > d->bufs[wkr].bufptr64 = next_value; > d->in_flight_tags[wkr] = new_tag; > + d->in_flight_bitmask |= 1 << wkr; > next_mb = NULL; > } > oldbuf = data >> RTE_DISTRIB_FLAG_BITS; > > >
Yeah that's better. As below, right? @@ -290,6 +294,7 @@ rte_distributor_process(struct rte_distributor *d, match |= (!(d->in_flight_tags[i] ^ new_tag) << i); + match &= d->in_flight_bitmask; if (match) { next_mb = NULL; unsigned worker = __builtin_ctz(match); On Fri, Nov 7, 2014 at 4:44 PM, Bruce Richardson <bruce.richardson@intel.com > wrote: > On Fri, Nov 07, 2014 at 04:31:18PM +0200, jigsaw wrote: > > Hi Bruce, > > > > Pls have a quick look at the diff to see if this is exactly what you mean > > about the bitmask. > > I just wrote it without even compiling, just to express the idea. So it > may > > leave some places unpatched. > > If this is agreed, I will make a decent test to verify it before sending > > the patch for RFC. > > > > diff --git a/lib/librte_distributor/rte_distributor.c > > b/lib/librte_distributor/rte_di > > index 585ff88..d606bcf 100644 > > --- a/lib/librte_distributor/rte_distributor.c > > +++ b/lib/librte_distributor/rte_distributor.c > > @@ -92,6 +92,8 @@ struct rte_distributor { > > unsigned num_workers; /**< Number of workers > > polling */ > > > > uint32_t in_flight_tags[RTE_MAX_LCORE]; > > + uint32_t in_flight_bitmask; > > + > > struct rte_distributor_backlog backlog[RTE_MAX_LCORE]; > > > > union rte_distributor_buffer bufs[RTE_MAX_LCORE]; > > @@ -188,6 +190,7 @@ static inline void > > handle_worker_shutdown(struct rte_distributor *d, unsigned wkr) > > { > > d->in_flight_tags[wkr] = 0; > > + d->in_flight_mask &= ~(1 << wkr); > > d->bufs[wkr].bufptr64 = 0; > > if (unlikely(d->backlog[wkr].count != 0)) { > > /* On return of a packet, we need to move the > > @@ -241,6 +244,7 @@ process_returns(struct rte_distributor *d) > > else { > > d->bufs[wkr].bufptr64 = > RTE_DISTRIB_GET_BUF; > > d->in_flight_tags[wkr] = 0; > > + d->in_flight_mask &= ~(1 << wkr); > > } > > oldbuf = data >> RTE_DISTRIB_FLAG_BITS; > > } else if (data & RTE_DISTRIB_RETURN_BUF) { > > @@ -282,12 +286,13 @@ rte_distributor_process(struct rte_distributor *d, > > next_mb = mbufs[next_idx++]; > > next_value = (((int64_t)(uintptr_t)next_mb) > > << RTE_DISTRIB_FLAG_BITS); > > - new_tag = (next_mb->hash.rss | 1); > > + new_tag = next_mb->hash.rss; > > > > uint32_t match = 0; > > unsigned i; > > for (i = 0; i < d->num_workers; i++) > > - match |= (!(d->in_flight_tags[i] ^ > new_tag) > > + match |= (((!(d->in_flight_tags[i] ^ > > new_tag)) & > > + (d->in_flight_bitmask >> > i)) > > I would not do the bitmask comparison here, as that's extra instruction in > the > loop. Instead, because its a bitmask, build up the match variable as it was > before, and then just do a single and operation afterwards, outside the > loop > body. > > /Bruce > > > << i); > > > > if (match) { > > @@ -309,6 +314,7 @@ rte_distributor_process(struct rte_distributor *d, > > else { > > d->bufs[wkr].bufptr64 = next_value; > > d->in_flight_tags[wkr] = new_tag; > > + d->in_flight_bitmask |= 1 << wkr; > > next_mb = NULL; > > } > > oldbuf = data >> RTE_DISTRIB_FLAG_BITS; > > > > > > >
On Fri, Nov 07, 2014 at 04:52:46PM +0200, jigsaw wrote: > Yeah that's better. As below, right? Yep. > > @@ -290,6 +294,7 @@ rte_distributor_process(struct rte_distributor *d, > match |= (!(d->in_flight_tags[i] ^ new_tag) > << i); > > + match &= d->in_flight_bitmask; > if (match) { > next_mb = NULL; > unsigned worker = __builtin_ctz(match); > > > On Fri, Nov 7, 2014 at 4:44 PM, Bruce Richardson <bruce.richardson@intel.com > > wrote: > > > On Fri, Nov 07, 2014 at 04:31:18PM +0200, jigsaw wrote: > > > Hi Bruce, > > > > > > Pls have a quick look at the diff to see if this is exactly what you mean > > > about the bitmask. > > > I just wrote it without even compiling, just to express the idea. So it > > may > > > leave some places unpatched. > > > If this is agreed, I will make a decent test to verify it before sending > > > the patch for RFC. > > > > > > diff --git a/lib/librte_distributor/rte_distributor.c > > > b/lib/librte_distributor/rte_di > > > index 585ff88..d606bcf 100644 > > > --- a/lib/librte_distributor/rte_distributor.c > > > +++ b/lib/librte_distributor/rte_distributor.c > > > @@ -92,6 +92,8 @@ struct rte_distributor { > > > unsigned num_workers; /**< Number of workers > > > polling */ > > > > > > uint32_t in_flight_tags[RTE_MAX_LCORE]; > > > + uint32_t in_flight_bitmask; > > > + > > > struct rte_distributor_backlog backlog[RTE_MAX_LCORE]; > > > > > > union rte_distributor_buffer bufs[RTE_MAX_LCORE]; > > > @@ -188,6 +190,7 @@ static inline void > > > handle_worker_shutdown(struct rte_distributor *d, unsigned wkr) > > > { > > > d->in_flight_tags[wkr] = 0; > > > + d->in_flight_mask &= ~(1 << wkr); > > > d->bufs[wkr].bufptr64 = 0; > > > if (unlikely(d->backlog[wkr].count != 0)) { > > > /* On return of a packet, we need to move the > > > @@ -241,6 +244,7 @@ process_returns(struct rte_distributor *d) > > > else { > > > d->bufs[wkr].bufptr64 = > > RTE_DISTRIB_GET_BUF; > > > d->in_flight_tags[wkr] = 0; > > > + d->in_flight_mask &= ~(1 << wkr); > > > } > > > oldbuf = data >> RTE_DISTRIB_FLAG_BITS; > > > } else if (data & RTE_DISTRIB_RETURN_BUF) { > > > @@ -282,12 +286,13 @@ rte_distributor_process(struct rte_distributor *d, > > > next_mb = mbufs[next_idx++]; > > > next_value = (((int64_t)(uintptr_t)next_mb) > > > << RTE_DISTRIB_FLAG_BITS); > > > - new_tag = (next_mb->hash.rss | 1); > > > + new_tag = next_mb->hash.rss; > > > > > > uint32_t match = 0; > > > unsigned i; > > > for (i = 0; i < d->num_workers; i++) > > > - match |= (!(d->in_flight_tags[i] ^ > > new_tag) > > > + match |= (((!(d->in_flight_tags[i] ^ > > > new_tag)) & > > > + (d->in_flight_bitmask >> > > i)) > > > > I would not do the bitmask comparison here, as that's extra instruction in > > the > > loop. Instead, because its a bitmask, build up the match variable as it was > > before, and then just do a single and operation afterwards, outside the > > loop > > body. > > > > /Bruce > > > > > << i); > > > > > > if (match) { > > > @@ -309,6 +314,7 @@ rte_distributor_process(struct rte_distributor *d, > > > else { > > > d->bufs[wkr].bufptr64 = next_value; > > > d->in_flight_tags[wkr] = new_tag; > > > + d->in_flight_bitmask |= 1 << wkr; > > > next_mb = NULL; > > > } > > > oldbuf = data >> RTE_DISTRIB_FLAG_BITS; > > > > > > > > > > >
OK thanks Bruce. I will get the patch done in coming week. -qinglai On Fri, Nov 7, 2014 at 5:04 PM, Bruce Richardson <bruce.richardson@intel.com > wrote: > On Fri, Nov 07, 2014 at 04:52:46PM +0200, jigsaw wrote: > > Yeah that's better. As below, right? > > Yep. > > > > > @@ -290,6 +294,7 @@ rte_distributor_process(struct rte_distributor *d, > > match |= (!(d->in_flight_tags[i] ^ > new_tag) > > << i); > > > > + match &= d->in_flight_bitmask; > > if (match) { > > next_mb = NULL; > > unsigned worker = __builtin_ctz(match); > > > > > > On Fri, Nov 7, 2014 at 4:44 PM, Bruce Richardson < > bruce.richardson@intel.com > > > wrote: > > > > > On Fri, Nov 07, 2014 at 04:31:18PM +0200, jigsaw wrote: > > > > Hi Bruce, > > > > > > > > Pls have a quick look at the diff to see if this is exactly what you > mean > > > > about the bitmask. > > > > I just wrote it without even compiling, just to express the idea. So > it > > > may > > > > leave some places unpatched. > > > > If this is agreed, I will make a decent test to verify it before > sending > > > > the patch for RFC. > > > > > > > > diff --git a/lib/librte_distributor/rte_distributor.c > > > > b/lib/librte_distributor/rte_di > > > > index 585ff88..d606bcf 100644 > > > > --- a/lib/librte_distributor/rte_distributor.c > > > > +++ b/lib/librte_distributor/rte_distributor.c > > > > @@ -92,6 +92,8 @@ struct rte_distributor { > > > > unsigned num_workers; /**< Number of workers > > > > polling */ > > > > > > > > uint32_t in_flight_tags[RTE_MAX_LCORE]; > > > > + uint32_t in_flight_bitmask; > > > > + > > > > struct rte_distributor_backlog backlog[RTE_MAX_LCORE]; > > > > > > > > union rte_distributor_buffer bufs[RTE_MAX_LCORE]; > > > > @@ -188,6 +190,7 @@ static inline void > > > > handle_worker_shutdown(struct rte_distributor *d, unsigned wkr) > > > > { > > > > d->in_flight_tags[wkr] = 0; > > > > + d->in_flight_mask &= ~(1 << wkr); > > > > d->bufs[wkr].bufptr64 = 0; > > > > if (unlikely(d->backlog[wkr].count != 0)) { > > > > /* On return of a packet, we need to move the > > > > @@ -241,6 +244,7 @@ process_returns(struct rte_distributor *d) > > > > else { > > > > d->bufs[wkr].bufptr64 = > > > RTE_DISTRIB_GET_BUF; > > > > d->in_flight_tags[wkr] = 0; > > > > + d->in_flight_mask &= ~(1 << wkr); > > > > } > > > > oldbuf = data >> RTE_DISTRIB_FLAG_BITS; > > > > } else if (data & RTE_DISTRIB_RETURN_BUF) { > > > > @@ -282,12 +286,13 @@ rte_distributor_process(struct rte_distributor > *d, > > > > next_mb = mbufs[next_idx++]; > > > > next_value = (((int64_t)(uintptr_t)next_mb) > > > > << RTE_DISTRIB_FLAG_BITS); > > > > - new_tag = (next_mb->hash.rss | 1); > > > > + new_tag = next_mb->hash.rss; > > > > > > > > uint32_t match = 0; > > > > unsigned i; > > > > for (i = 0; i < d->num_workers; i++) > > > > - match |= (!(d->in_flight_tags[i] ^ > > > new_tag) > > > > + match |= (((!(d->in_flight_tags[i] ^ > > > > new_tag)) & > > > > + > (d->in_flight_bitmask >> > > > i)) > > > > > > I would not do the bitmask comparison here, as that's extra > instruction in > > > the > > > loop. Instead, because its a bitmask, build up the match variable as > it was > > > before, and then just do a single and operation afterwards, outside the > > > loop > > > body. > > > > > > /Bruce > > > > > > > << i); > > > > > > > > if (match) { > > > > @@ -309,6 +314,7 @@ rte_distributor_process(struct rte_distributor > *d, > > > > else { > > > > d->bufs[wkr].bufptr64 = next_value; > > > > d->in_flight_tags[wkr] = new_tag; > > > > + d->in_flight_bitmask |= 1 << wkr; > > > > next_mb = NULL; > > > > } > > > > oldbuf = data >> RTE_DISTRIB_FLAG_BITS; > > > > > > > > > > > > > > > >
diff --git a/lib/librte_distributor/rte_distributor.c b/lib/librte_distributor/rte_di index 585ff88..d606bcf 100644 --- a/lib/librte_distributor/rte_distributor.c +++ b/lib/librte_distributor/rte_distributor.c @@ -92,6 +92,8 @@ struct rte_distributor { unsigned num_workers; /**< Number of workers polling */ uint32_t in_flight_tags[RTE_MAX_LCORE]; + uint32_t in_flight_bitmask; + struct rte_distributor_backlog backlog[RTE_MAX_LCORE];