@@ -57,17 +57,6 @@
/* Maximum size for inline data. */
#define MLX4_PMD_MAX_INLINE 0
-/*
- * Maximum number of cached Memory Pools (MPs) per TX queue. Each RTE MP
- * from which buffers are to be transmitted will have to be mapped by this
- * driver to their own Memory Region (MR). This is a slow operation.
- *
- * This value is always 1 for RX queues.
- */
-#ifndef MLX4_PMD_TX_MP_CACHE
-#define MLX4_PMD_TX_MP_CACHE 8
-#endif
-
/* Interrupt alarm timeout value in microseconds. */
#define MLX4_INTR_ALARM_TIMEOUT 100000
@@ -51,6 +51,7 @@
#pragma GCC diagnostic error "-Wpedantic"
#endif
+#include <rte_byteorder.h>
#include <rte_common.h>
#include <rte_errno.h>
#include <rte_ethdev.h>
@@ -77,60 +78,63 @@
mlx4_rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n)
{
unsigned int i;
- struct rxq_elt (*elts)[elts_n] =
- rte_calloc_socket("RXQ elements", 1, sizeof(*elts), 0,
- rxq->socket);
+ const unsigned int sge_n = 1 << rxq->sge_n;
+ struct rte_mbuf *(*elts)[elts_n] =
+ rte_calloc_socket("RXQ", 1, sizeof(*elts), 0, rxq->socket);
if (elts == NULL) {
+ elts_n = 0;
rte_errno = ENOMEM;
ERROR("%p: can't allocate packets array", (void *)rxq);
goto error;
}
- /* For each WR (packet). */
- for (i = 0; (i != elts_n); ++i) {
- struct rxq_elt *elt = &(*elts)[i];
- struct ibv_recv_wr *wr = &elt->wr;
- struct ibv_sge *sge = &(*elts)[i].sge;
- struct rte_mbuf *buf = rte_pktmbuf_alloc(rxq->mp);
+ rxq->elts = elts;
+ for (i = 0; i != elts_n; ++i) {
+ struct rte_mbuf *buf;
+ volatile struct mlx4_wqe_data_seg *scat =
+ &(*rxq->hw.wqes)[i];
+ buf = rte_pktmbuf_alloc(rxq->mp);
if (buf == NULL) {
rte_errno = ENOMEM;
ERROR("%p: empty mbuf pool", (void *)rxq);
goto error;
}
- elt->buf = buf;
- wr->next = &(*elts)[(i + 1)].wr;
- wr->sg_list = sge;
- wr->num_sge = 1;
/* Headroom is reserved by rte_pktmbuf_alloc(). */
assert(buf->data_off == RTE_PKTMBUF_HEADROOM);
/* Buffer is supposed to be empty. */
assert(rte_pktmbuf_data_len(buf) == 0);
assert(rte_pktmbuf_pkt_len(buf) == 0);
- /* sge->addr must be able to store a pointer. */
- assert(sizeof(sge->addr) >= sizeof(uintptr_t));
- /* SGE keeps its headroom. */
- sge->addr = (uintptr_t)
- ((uint8_t *)buf->buf_addr + RTE_PKTMBUF_HEADROOM);
- sge->length = (buf->buf_len - RTE_PKTMBUF_HEADROOM);
- sge->lkey = rxq->mr->lkey;
- /* Redundant check for tailroom. */
- assert(sge->length == rte_pktmbuf_tailroom(buf));
+ assert(!buf->next);
+ /* Only the first segment keeps headroom. */
+ if (i % sge_n)
+ buf->data_off = 0;
+ buf->port = rxq->port_id;
+ buf->data_len = rte_pktmbuf_tailroom(buf);
+ buf->pkt_len = rte_pktmbuf_tailroom(buf);
+ buf->nb_segs = 1;
+ /* scat->addr must be able to store a pointer. */
+ assert(sizeof(scat->addr) >= sizeof(uintptr_t));
+ *scat = (struct mlx4_wqe_data_seg){
+ .addr =
+ rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t)),
+ .byte_count = rte_cpu_to_be_32(buf->data_len),
+ .lkey = rte_cpu_to_be_32(rxq->mr->lkey),
+ };
+ (*rxq->elts)[i] = buf;
}
- /* The last WR pointer must be NULL. */
- (*elts)[(i - 1)].wr.next = NULL;
- DEBUG("%p: allocated and configured %u single-segment WRs",
- (void *)rxq, elts_n);
- rxq->elts_n = elts_n;
- rxq->elts_head = 0;
- rxq->elts = elts;
+ DEBUG("%p: allocated and configured %u segments (max %u packets)",
+ (void *)rxq, elts_n, elts_n >> rxq->sge_n);
+ rxq->elts_n = log2above(elts_n);
return 0;
error:
- if (elts != NULL) {
- for (i = 0; (i != RTE_DIM(*elts)); ++i)
- rte_pktmbuf_free_seg((*elts)[i].buf);
- rte_free(elts);
+ for (i = 0; i != elts_n; ++i) {
+ if ((*rxq->elts)[i] != NULL)
+ rte_pktmbuf_free_seg((*rxq->elts)[i]);
+ (*rxq->elts)[i] = NULL;
}
+ rte_free(rxq->elts);
+ rxq->elts = NULL;
DEBUG("%p: failed, freed everything", (void *)rxq);
assert(rte_errno > 0);
return -rte_errno;
@@ -146,17 +150,18 @@
mlx4_rxq_free_elts(struct rxq *rxq)
{
unsigned int i;
- unsigned int elts_n = rxq->elts_n;
- struct rxq_elt (*elts)[elts_n] = rxq->elts;
DEBUG("%p: freeing WRs", (void *)rxq);
+ if (rxq->elts == NULL)
+ return;
+
+ for (i = 0; i != (1u << rxq->elts_n); ++i) {
+ if ((*rxq->elts)[i] != NULL)
+ rte_pktmbuf_free_seg((*rxq->elts)[i]);
+ }
+ rte_free(rxq->elts);
rxq->elts_n = 0;
rxq->elts = NULL;
- if (elts == NULL)
- return;
- for (i = 0; (i != RTE_DIM(*elts)); ++i)
- rte_pktmbuf_free_seg((*elts)[i].buf);
- rte_free(elts);
}
/**
@@ -198,7 +203,8 @@
* QP pointer or NULL in case of error and rte_errno is set.
*/
static struct ibv_qp *
-mlx4_rxq_setup_qp(struct priv *priv, struct ibv_cq *cq, uint16_t desc)
+mlx4_rxq_setup_qp(struct priv *priv, struct ibv_cq *cq,
+ uint16_t desc, unsigned int sge_n)
{
struct ibv_qp *qp;
struct ibv_qp_init_attr attr = {
@@ -212,7 +218,7 @@
priv->device_attr.max_qp_wr :
desc),
/* Max number of scatter/gather elements in a WR. */
- .max_recv_sge = 1,
+ .max_recv_sge = sge_n,
},
.qp_type = IBV_QPT_RAW_PACKET,
};
@@ -248,32 +254,43 @@
struct rte_mempool *mp)
{
struct priv *priv = dev->data->dev_private;
+ struct mlx4dv_obj mlxdv;
+ struct mlx4dv_qp dv_qp;
+ struct mlx4dv_cq dv_cq;
struct rxq tmpl = {
.priv = priv,
.mp = mp,
.socket = socket
};
struct ibv_qp_attr mod;
- struct ibv_recv_wr *bad_wr;
unsigned int mb_len;
int ret;
(void)conf; /* Thresholds configuration (ignored). */
mb_len = rte_pktmbuf_data_room_size(mp);
- if (desc == 0) {
- rte_errno = EINVAL;
- ERROR("%p: invalid number of Rx descriptors", (void *)dev);
- goto error;
- }
/* Enable scattered packets support for this queue if necessary. */
assert(mb_len >= RTE_PKTMBUF_HEADROOM);
if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
(mb_len - RTE_PKTMBUF_HEADROOM)) {
- ;
+ tmpl.sge_n = 0;
} else if (dev->data->dev_conf.rxmode.enable_scatter) {
- WARN("%p: scattered mode has been requested but is"
- " not supported, this may lead to packet loss",
- (void *)dev);
+ unsigned int sges_n;
+ unsigned int rx_pkt_len =
+ dev->data->dev_conf.rxmode.jumbo_frame ?
+ dev->data->dev_conf.rxmode.max_rx_pkt_len :
+ ETHER_MTU;
+
+ if (rx_pkt_len < ETHER_MTU)
+ rx_pkt_len = ETHER_MTU;
+ /* Only the first mbuf has a headroom */
+ rx_pkt_len = rx_pkt_len - mb_len + RTE_PKTMBUF_HEADROOM;
+ /*
+ * Determine the number of SGEs needed for a full packet
+ * and round it to the next power of two.
+ */
+ sges_n = (rx_pkt_len / mb_len) + !!(rx_pkt_len % mb_len) + 1;
+ tmpl.sge_n = log2above(sges_n);
+ desc >>= tmpl.sge_n;
} else {
WARN("%p: the requested maximum Rx packet size (%u) is"
" larger than a single mbuf (%u) and scattered"
@@ -282,6 +299,8 @@
dev->data->dev_conf.rxmode.max_rx_pkt_len,
mb_len - RTE_PKTMBUF_HEADROOM);
}
+ DEBUG("%p: number of sges %u (%u WRs)",
+ (void *)dev, 1 << tmpl.sge_n, desc);
/* Use the entire Rx mempool as the memory region. */
tmpl.mr = mlx4_mp2mr(priv->pd, mp);
if (tmpl.mr == NULL) {
@@ -317,7 +336,7 @@
priv->device_attr.max_qp_wr);
DEBUG("priv->device_attr.max_sge is %d",
priv->device_attr.max_sge);
- tmpl.qp = mlx4_rxq_setup_qp(priv, tmpl.cq, desc);
+ tmpl.qp = mlx4_rxq_setup_qp(priv, tmpl.cq, desc, 1 << tmpl.sge_n);
if (tmpl.qp == NULL) {
ERROR("%p: QP creation failure: %s",
(void *)dev, strerror(rte_errno));
@@ -336,21 +355,6 @@
(void *)dev, strerror(rte_errno));
goto error;
}
- ret = mlx4_rxq_alloc_elts(&tmpl, desc);
- if (ret) {
- ERROR("%p: RXQ allocation failed: %s",
- (void *)dev, strerror(rte_errno));
- goto error;
- }
- ret = ibv_post_recv(tmpl.qp, &(*tmpl.elts)[0].wr, &bad_wr);
- if (ret) {
- rte_errno = ret;
- ERROR("%p: ibv_post_recv() failed for WR %p: %s",
- (void *)dev,
- (void *)bad_wr,
- strerror(rte_errno));
- goto error;
- }
mod = (struct ibv_qp_attr){
.qp_state = IBV_QPS_RTR
};
@@ -361,14 +365,44 @@
(void *)dev, strerror(rte_errno));
goto error;
}
+ /* Get HW depended info */
+ mlxdv.cq.in = tmpl.cq;
+ mlxdv.cq.out = &dv_cq;
+ mlxdv.qp.in = tmpl.qp;
+ mlxdv.qp.out = &dv_qp;
+ ret = mlx4dv_init_obj(&mlxdv, MLX4DV_OBJ_QP | MLX4DV_OBJ_CQ);
+ if (ret) {
+ ERROR("%p: Failed to retrieve device obj info", (void *)dev);
+ goto error;
+ }
+ /* Init HW depended fields */
+ tmpl.hw.wqes =
+ (volatile struct mlx4_wqe_data_seg (*)[])
+ ((char *)dv_qp.buf.buf + dv_qp.rq.offset);
+ tmpl.hw.rq_db = dv_qp.rdb;
+ tmpl.hw.rq_ci = 0;
+ tmpl.mcq.buf = dv_cq.buf.buf;
+ tmpl.mcq.cqe_cnt = dv_cq.cqe_cnt;
+ tmpl.mcq.set_ci_db = dv_cq.set_ci_db;
+ tmpl.mcq.cqe_64 = (dv_cq.cqe_size & 64) ? 1 : 0;
/* Save port ID. */
tmpl.port_id = dev->data->port_id;
DEBUG("%p: RTE port ID: %u", (void *)rxq, tmpl.port_id);
+ ret = mlx4_rxq_alloc_elts(&tmpl, desc << tmpl.sge_n);
+ if (ret) {
+ ERROR("%p: RXQ allocation failed: %s",
+ (void *)dev, strerror(rte_errno));
+ goto error;
+ }
/* Clean up rxq in case we're reinitializing it. */
DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq);
mlx4_rxq_cleanup(rxq);
*rxq = tmpl;
DEBUG("%p: rxq updated with %p", (void *)rxq, (void *)&tmpl);
+ /* Update doorbell counter. */
+ rxq->hw.rq_ci = desc;
+ rte_wmb();
+ *rxq->hw.rq_db = rte_cpu_to_be_32(rxq->hw.rq_ci);
return 0;
error:
ret = rte_errno;
@@ -406,6 +440,12 @@
struct rxq *rxq = dev->data->rx_queues[idx];
int ret;
+ if (!rte_is_power_of_2(desc)) {
+ desc = 1 << log2above(desc);
+ WARN("%p: increased number of descriptors in RX queue %u"
+ " to the next power of two (%d)",
+ (void *)dev, idx, desc);
+ }
DEBUG("%p: configuring queue %u for %u descriptors",
(void *)dev, idx, desc);
if (idx >= dev->data->nb_rx_queues) {
@@ -521,9 +521,45 @@
}
/**
- * DPDK callback for Rx.
+ * Poll one CQE from CQ.
*
- * The following function doesn't manage scattered packets.
+ * @param rxq
+ * Pointer to the receive queue structure.
+ * @param[out] out
+ * Just polled CQE.
+ *
+ * @return
+ * Number of bytes of the CQE, 0 in case there is no completion.
+ */
+static unsigned int
+mlx4_cq_poll_one(struct rxq *rxq,
+ struct mlx4_cqe **out)
+{
+ int ret = 0;
+ struct mlx4_cqe *cqe = NULL;
+ struct mlx4_cq *cq = &rxq->mcq;
+
+ cqe = (struct mlx4_cqe *)mlx4_get_cqe(cq, cq->cons_index);
+ if (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
+ !!(cq->cons_index & cq->cqe_cnt))
+ goto out;
+ /*
+ * Make sure we read CQ entry contents after we've checked the
+ * ownership bit.
+ */
+ rte_rmb();
+ assert(!(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK));
+ assert((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) !=
+ MLX4_CQE_OPCODE_ERROR);
+ ret = rte_be_to_cpu_32(cqe->byte_cnt);
+ ++cq->cons_index;
+out:
+ *out = cqe;
+ return ret;
+}
+
+/**
+ * DPDK callback for RX with scattered packets support.
*
* @param dpdk_rxq
* Generic pointer to Rx queue structure.
@@ -538,112 +574,109 @@
uint16_t
mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
{
- struct rxq *rxq = (struct rxq *)dpdk_rxq;
- struct rxq_elt (*elts)[rxq->elts_n] = rxq->elts;
- const unsigned int elts_n = rxq->elts_n;
- unsigned int elts_head = rxq->elts_head;
- struct ibv_wc wcs[pkts_n];
- struct ibv_recv_wr *wr_head = NULL;
- struct ibv_recv_wr **wr_next = &wr_head;
- struct ibv_recv_wr *wr_bad = NULL;
- unsigned int i;
- unsigned int pkts_ret = 0;
- int ret;
+ struct rxq *rxq = dpdk_rxq;
+ const unsigned int wr_cnt = (1 << rxq->elts_n) - 1;
+ const unsigned int sge_n = rxq->sge_n;
+ struct rte_mbuf *pkt = NULL;
+ struct rte_mbuf *seg = NULL;
+ unsigned int i = 0;
+ unsigned int rq_ci = (rxq->hw.rq_ci << sge_n);
+ int len = 0;
- ret = ibv_poll_cq(rxq->cq, pkts_n, wcs);
- if (unlikely(ret == 0))
- return 0;
- if (unlikely(ret < 0)) {
- DEBUG("rxq=%p, ibv_poll_cq() failed (wc_n=%d)",
- (void *)rxq, ret);
- return 0;
- }
- assert(ret <= (int)pkts_n);
- /* For each work completion. */
- for (i = 0; i != (unsigned int)ret; ++i) {
- struct ibv_wc *wc = &wcs[i];
- struct rxq_elt *elt = &(*elts)[elts_head];
- struct ibv_recv_wr *wr = &elt->wr;
- uint32_t len = wc->byte_len;
- struct rte_mbuf *seg = elt->buf;
- struct rte_mbuf *rep;
+ while (pkts_n) {
+ struct mlx4_cqe *cqe;
+ unsigned int idx = rq_ci & wr_cnt;
+ struct rte_mbuf *rep = (*rxq->elts)[idx];
+ volatile struct mlx4_wqe_data_seg *scat =
+ &(*rxq->hw.wqes)[idx];
- /* Sanity checks. */
- assert(wr->sg_list == &elt->sge);
- assert(wr->num_sge == 1);
- assert(elts_head < rxq->elts_n);
- assert(rxq->elts_head < rxq->elts_n);
- /*
- * Fetch initial bytes of packet descriptor into a
- * cacheline while allocating rep.
- */
- rte_mbuf_prefetch_part1(seg);
- rte_mbuf_prefetch_part2(seg);
- /* Link completed WRs together for repost. */
- *wr_next = wr;
- wr_next = &wr->next;
- if (unlikely(wc->status != IBV_WC_SUCCESS)) {
- /* Whatever, just repost the offending WR. */
- DEBUG("rxq=%p: bad work completion status (%d): %s",
- (void *)rxq, wc->status,
- ibv_wc_status_str(wc->status));
- /* Increment dropped packets counter. */
- ++rxq->stats.idropped;
- goto repost;
- }
+ /* Update the 'next' pointer of the previous segment. */
+ if (pkt)
+ seg->next = rep;
+ seg = rep;
+ rte_prefetch0(seg);
+ rte_prefetch0(scat);
rep = rte_mbuf_raw_alloc(rxq->mp);
if (unlikely(rep == NULL)) {
- /*
- * Unable to allocate a replacement mbuf,
- * repost WR.
- */
- DEBUG("rxq=%p: can't allocate a new mbuf",
- (void *)rxq);
- /* Increase out of memory counters. */
++rxq->stats.rx_nombuf;
- ++rxq->priv->dev->data->rx_mbuf_alloc_failed;
- goto repost;
+ if (!pkt) {
+ /*
+ * No buffers before we even started,
+ * bail out silently.
+ */
+ break;
+ }
+ while (pkt != seg) {
+ assert(pkt != (*rxq->elts)[idx]);
+ rep = pkt->next;
+ pkt->next = NULL;
+ pkt->nb_segs = 1;
+ rte_mbuf_raw_free(pkt);
+ pkt = rep;
+ }
+ break;
+ }
+ if (!pkt) {
+ /* Looking for the new packet */
+ len = mlx4_cq_poll_one(rxq, &cqe);
+ if (!len) {
+ rte_mbuf_raw_free(rep);
+ break;
+ }
+ if (unlikely(len < 0)) {
+ /* RX error, packet is likely too large. */
+ rte_mbuf_raw_free(rep);
+ ++rxq->stats.idropped;
+ goto skip;
+ }
+ pkt = seg;
+ pkt->packet_type = 0;
+ pkt->ol_flags = 0;
+ pkt->pkt_len = len;
}
- /* Reconfigure sge to use rep instead of seg. */
- elt->sge.addr = (uintptr_t)rep->buf_addr + RTE_PKTMBUF_HEADROOM;
- assert(elt->sge.lkey == rxq->mr->lkey);
- elt->buf = rep;
- /* Update seg information. */
- seg->data_off = RTE_PKTMBUF_HEADROOM;
- seg->nb_segs = 1;
- seg->port = rxq->port_id;
- seg->next = NULL;
- seg->pkt_len = len;
+ rep->nb_segs = 1;
+ rep->port = rxq->port_id;
+ rep->data_len = seg->data_len;
+ rep->data_off = seg->data_off;
+ (*rxq->elts)[idx] = rep;
+ /*
+ * Fill NIC descriptor with the new buffer. The lkey and size
+ * of the buffers are already known, only the buffer address
+ * changes.
+ */
+ scat->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t));
+ if (len > seg->data_len) {
+ len -= seg->data_len;
+ ++pkt->nb_segs;
+ ++rq_ci;
+ continue;
+ }
+ /* The last segment. */
seg->data_len = len;
- seg->packet_type = 0;
- seg->ol_flags = 0;
+ /* Increment bytes counter. */
+ rxq->stats.ibytes += pkt->pkt_len;
/* Return packet. */
- *(pkts++) = seg;
- ++pkts_ret;
- /* Increase bytes counter. */
- rxq->stats.ibytes += len;
-repost:
- if (++elts_head >= elts_n)
- elts_head = 0;
- continue;
+ *(pkts++) = pkt;
+ pkt = NULL;
+ --pkts_n;
+ ++i;
+skip:
+ /* Align consumer index to the next stride. */
+ rq_ci >>= sge_n;
+ ++rq_ci;
+ rq_ci <<= sge_n;
}
- if (unlikely(i == 0))
+ if (unlikely((i == 0) && ((rq_ci >> sge_n) == rxq->hw.rq_ci)))
return 0;
- /* Repost WRs. */
- *wr_next = NULL;
- assert(wr_head);
- ret = ibv_post_recv(rxq->qp, wr_head, &wr_bad);
- if (unlikely(ret)) {
- /* Inability to repost WRs is fatal. */
- DEBUG("%p: recv_burst(): failed (ret=%d)",
- (void *)rxq->priv,
- ret);
- abort();
- }
- rxq->elts_head = elts_head;
- /* Increase packets counter. */
- rxq->stats.ipackets += pkts_ret;
- return pkts_ret;
+ /* Update the consumer index. */
+ rxq->hw.rq_ci = rq_ci >> sge_n;
+ rte_wmb();
+ *rxq->hw.rq_db = rte_cpu_to_be_32(rxq->hw.rq_ci);
+ *rxq->mcq.set_ci_db =
+ rte_cpu_to_be_32(rxq->mcq.cons_index & 0xffffff);
+ /* Increment packets counter. */
+ rxq->stats.ipackets += i;
+ return i;
}
/**
@@ -62,13 +62,6 @@ struct mlx4_rxq_stats {
uint64_t rx_nombuf; /**< Total of Rx mbuf allocation failures. */
};
-/** Rx element. */
-struct rxq_elt {
- struct ibv_recv_wr wr; /**< Work request. */
- struct ibv_sge sge; /**< Scatter/gather element. */
- struct rte_mbuf *buf; /**< Buffer. */
-};
-
/** Rx queue descriptor. */
struct rxq {
struct priv *priv; /**< Back pointer to private data. */
@@ -78,9 +71,15 @@ struct rxq {
struct ibv_qp *qp; /**< Queue pair. */
struct ibv_comp_channel *channel; /**< Rx completion channel. */
unsigned int port_id; /**< Port ID for incoming packets. */
- unsigned int elts_n; /**< (*elts)[] length. */
- unsigned int elts_head; /**< Current index in (*elts)[]. */
- struct rxq_elt (*elts)[]; /**< Rx elements. */
+ unsigned int elts_n; /**< Log 2 of Mbufs. */
+ struct rte_mbuf *(*elts)[]; /**< Rx elements. */
+ struct {
+ volatile struct mlx4_wqe_data_seg(*wqes)[];
+ volatile uint32_t *rq_db;
+ uint16_t rq_ci;
+ } hw;
+ struct mlx4_cq mcq; /**< Info for directly manipulating the CQ. */
+ unsigned int sge_n; /**< Log 2 of SGEs number. */
struct mlx4_rxq_stats stats; /**< Rx queue counters. */
unsigned int socket; /**< CPU socket ID for allocations. */
};
@@ -108,4 +108,24 @@
int mlx4_fd_set_non_blocking(int fd);
+/**
+ * Return nearest power of two above input value.
+ *
+ * @param v
+ * Input value.
+ *
+ * @return
+ * Nearest power of two above input value.
+ */
+static inline unsigned int
+log2above(unsigned int v)
+{
+ unsigned int l;
+ unsigned int r;
+
+ for (l = 0, r = 0; (v >> 1); ++l, v >>= 1)
+ r |= (v & 1);
+ return l + r;
+}
+
#endif /* MLX4_UTILS_H_ */