[dpdk-dev,RFC,06/14] mbuf: reorder fields by time-of-use

Message ID 1407789890-17355-7-git-send-email-bruce.richardson@intel.com (mailing list archive)
State RFC, archived
Headers

Commit Message

Bruce Richardson Aug. 11, 2014, 8:44 p.m. UTC
  *  Reorder the fields in the mbuf so that we have fields that are used
together side-by-side in the structure. This means that we have a
contiguous block of 8-bytes in the mbuf which are used to reset an mbuf
of descriptor rearm.
*  Where needed add in a dummy fields to overwrite values 8 or 16 bytes
at a time, when doing RX or RX descriptor rearm. This avoids compiler
warnings when using uint64_t values to overwrite a set of smaller
values.
* At the end, place fields that are only used for TX or for the slower
RX path, and mark them as down to be moved to a second cache line.

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
 lib/librte_mbuf/rte_mbuf.c |  2 +-
 lib/librte_mbuf/rte_mbuf.h | 37 +++++++++++++++++++++----------------
 2 files changed, 22 insertions(+), 17 deletions(-)
  

Comments

Olivier Matz Aug. 12, 2014, 10:03 a.m. UTC | #1
Hi Bruce,

On 08/11/2014 10:44 PM, Bruce Richardson wrote:
> *  Reorder the fields in the mbuf so that we have fields that are used
> together side-by-side in the structure. This means that we have a
> contiguous block of 8-bytes in the mbuf which are used to reset an mbuf
> of descriptor rearm.
> *  Where needed add in a dummy fields to overwrite values 8 or 16 bytes
> at a time, when doing RX or RX descriptor rearm. This avoids compiler
> warnings when using uint64_t values to overwrite a set of smaller
> values.
> * At the end, place fields that are only used for TX or for the slower
> RX path, and mark them as down to be moved to a second cache line.
>
> Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
> ---
>   lib/librte_mbuf/rte_mbuf.c |  2 +-
>   lib/librte_mbuf/rte_mbuf.h | 37 +++++++++++++++++++++----------------
>   2 files changed, 22 insertions(+), 17 deletions(-)
>
> diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
> index 64f1587..594b910 100644
> --- a/lib/librte_mbuf/rte_mbuf.c
> +++ b/lib/librte_mbuf/rte_mbuf.c
> @@ -161,7 +161,7 @@ rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len)
>
>   	fprintf(f, "dump mbuf at 0x%p, phys=%"PRIx64", buf_len=%u\n",
>   	       m, (uint64_t)m->buf_physaddr, (unsigned)m->buf_len);
> -	fprintf(f, "  pkt_len=%"PRIu32", ol_flags=%"PRIx16", nb_segs=%u, "
> +	fprintf(f, "  pkt_len=%"PRIu32", ol_flags=%"PRIx64", nb_segs=%u, "
>   	       "in_port=%u\n", m->pkt_len, m->ol_flags,
>   	       (unsigned)m->nb_segs, (unsigned)m->port);
>   	nb_segs = m->nb_segs;

I think this should not go in this patch. Another one "change ol_flags
to 64 bits" would be nice.


> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> index e0981c9..566bb7e 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -132,22 +132,20 @@ union rte_vlan_macip {
>   /**< MAC+IP  length. */
>   #define TX_MACIP_LEN_CMP_MASK   (TX_MAC_LEN_CMP_MASK | TX_IP_LEN_CMP_MASK)
>
> +
>   /**

Garbage here.

>    * The generic rte_mbuf, containing a packet mbuf.
>    */
>   struct rte_mbuf {
> -	struct rte_mempool *pool; /**< Pool from which mbuf was allocated. */
>   	void *buf_addr;           /**< Virtual address of segment buffer. */
>   	phys_addr_t buf_physaddr; /**< Physical address of segment buffer. */
> -	uint16_t buf_len;         /**< Length of segment buffer. */
>
> -	/* valid for any segment */
> -	struct rte_mbuf *next;    /**< Next segment of scattered packet. */
> +	/* next 8 bytes are initialised on RX descriptor rearm */
> +	uint64_t rearm_data[0]; /**< dummy element so we can get uin64_t ptrs
> +	                         * to this part of the mbuf without alias error
> +	                         */
> +	uint16_t buf_len;       /**< Length of segment buffer. */
>   	uint16_t data_off;
> -	uint16_t data_len;        /**< Amount of data in segment buffer. */
> -	uint32_t pkt_len;         /**< Total pkt len: sum of all segments. */

What do you think about using an union instead? I'm not sure it's
clearer, but in case of:

union {
	uint64_t u64;
	struct {
		uint16_t buf_len;
		uint16_t data_off;
		...
	};
};


> -
> -#ifdef RTE_MBUF_REFCNT
>   	/**
>   	 * 16-bit Reference counter.
>   	 * It should only be accessed using the following functions:
> @@ -157,20 +155,23 @@ struct rte_mbuf {
>   	 * config option.
>   	 */
>   	union {
> +#ifdef RTE_MBUF_REFCNT
>   		rte_atomic16_t refcnt_atomic;   /**< Atomically accessed refcnt */
>   		uint16_t refcnt;                /**< Non-atomically accessed refcnt */
> -	};
> -#else
> -	uint16_t refcnt_reserved;     /**< Do not use this field */
>   #endif
> -
> -	/* these fields are valid for first segment only */
> +		uint16_t refcnt_reserved;     /**< Do not use this field */
> +	};

I think this should go in another cosmetic patch.


>   	uint8_t nb_segs;        /**< Number of segments. */
>   	uint8_t port;        /**< Input port. */
> -	uint16_t ol_flags;            /**< Offload features. */
> -	uint16_t reserved;             /**< Unused field. Required for padding. */
>
> -	/* offload features, valid for first segment only */
> +	/* remaining bytes are set on RX when pulling packet from descriptor */
> +	uint64_t ol_flags;        /**< Offload features. */

Should be moved to "change ol_flags to 64 bits".

> +
> +	__m128i rx_descriptor_fields1[0]; /**< dummy field used as marker for
> +	                                   * writes in a vector driver */

Is it a good idea to have a specific type in the generic mbuf structure?
Moreover it seems that later in your patch series it's replaced by
something else.

Also, the 2nd line of comment mixes tabs and spaces.

>
> +	/* second cache line, fields only used in slow path or on TX */
> +	struct rte_mempool *pool; /**< Pool from which mbuf was allocated. */
> +	struct rte_mbuf *next;    /**< Next segment of scattered packet. */

The comment is wrong, this is not a new cache line (introduced later).



Regards,
Olivier
  

Patch

diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index 64f1587..594b910 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -161,7 +161,7 @@  rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len)
 
 	fprintf(f, "dump mbuf at 0x%p, phys=%"PRIx64", buf_len=%u\n",
 	       m, (uint64_t)m->buf_physaddr, (unsigned)m->buf_len);
-	fprintf(f, "  pkt_len=%"PRIu32", ol_flags=%"PRIx16", nb_segs=%u, "
+	fprintf(f, "  pkt_len=%"PRIu32", ol_flags=%"PRIx64", nb_segs=%u, "
 	       "in_port=%u\n", m->pkt_len, m->ol_flags,
 	       (unsigned)m->nb_segs, (unsigned)m->port);
 	nb_segs = m->nb_segs;
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index e0981c9..566bb7e 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -132,22 +132,20 @@  union rte_vlan_macip {
 /**< MAC+IP  length. */
 #define TX_MACIP_LEN_CMP_MASK   (TX_MAC_LEN_CMP_MASK | TX_IP_LEN_CMP_MASK)
 
+
 /**
  * The generic rte_mbuf, containing a packet mbuf.
  */
 struct rte_mbuf {
-	struct rte_mempool *pool; /**< Pool from which mbuf was allocated. */
 	void *buf_addr;           /**< Virtual address of segment buffer. */
 	phys_addr_t buf_physaddr; /**< Physical address of segment buffer. */
-	uint16_t buf_len;         /**< Length of segment buffer. */
 
-	/* valid for any segment */
-	struct rte_mbuf *next;    /**< Next segment of scattered packet. */
+	/* next 8 bytes are initialised on RX descriptor rearm */
+	uint64_t rearm_data[0]; /**< dummy element so we can get uin64_t ptrs
+	                         * to this part of the mbuf without alias error
+	                         */
+	uint16_t buf_len;       /**< Length of segment buffer. */
 	uint16_t data_off;
-	uint16_t data_len;        /**< Amount of data in segment buffer. */
-	uint32_t pkt_len;         /**< Total pkt len: sum of all segments. */
-
-#ifdef RTE_MBUF_REFCNT
 	/**
 	 * 16-bit Reference counter.
 	 * It should only be accessed using the following functions:
@@ -157,20 +155,23 @@  struct rte_mbuf {
 	 * config option.
 	 */
 	union {
+#ifdef RTE_MBUF_REFCNT
 		rte_atomic16_t refcnt_atomic;   /**< Atomically accessed refcnt */
 		uint16_t refcnt;                /**< Non-atomically accessed refcnt */
-	};
-#else
-	uint16_t refcnt_reserved;     /**< Do not use this field */
 #endif
-
-	/* these fields are valid for first segment only */
+		uint16_t refcnt_reserved;     /**< Do not use this field */
+	};
 	uint8_t nb_segs;        /**< Number of segments. */
 	uint8_t port;        /**< Input port. */
-	uint16_t ol_flags;            /**< Offload features. */
-	uint16_t reserved;             /**< Unused field. Required for padding. */
 
-	/* offload features, valid for first segment only */
+	/* remaining bytes are set on RX when pulling packet from descriptor */
+	uint64_t ol_flags;        /**< Offload features. */
+
+	__m128i rx_descriptor_fields1[0]; /**< dummy field used as marker for
+	                                   * writes in a vector driver */
+	uint16_t packet_type;     /**< Type of packet, e.g. protocols used */
+	uint16_t data_len;        /**< Amount of data in segment buffer. */
+	uint32_t pkt_len;         /**< Total pkt len: sum of all segments. */
 	union rte_vlan_macip vlan_macip;
 	union {
 		uint32_t rss;       /**< RSS hash result if RSS enabled */
@@ -181,6 +182,10 @@  struct rte_mbuf {
 		uint32_t sched;     /**< Hierarchical scheduler */
 	} hash;                 /**< hash information */
 
+	/* second cache line, fields only used in slow path or on TX */
+	struct rte_mempool *pool; /**< Pool from which mbuf was allocated. */
+	struct rte_mbuf *next;    /**< Next segment of scattered packet. */
+
 	union {
 		uint8_t metadata[0];
 		uint16_t metadata16[0];