[03/10] vhost: optimize broadcast rarp sync with c11 atomic
Checks
Commit Message
The rarp packet broadcast flag is synchronized with rte_atomic_XX APIs
which is a full barrier, DMB, on aarch64. This patch optimized it with
c11 atomic one-way barrier.
Signed-off-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Reviewed-by: Joyce Kong <joyce.kong@arm.com>
---
lib/librte_vhost/vhost.h | 2 +-
lib/librte_vhost/vhost_user.c | 7 +++----
lib/librte_vhost/virtio_net.c | 16 +++++++++-------
3 files changed, 13 insertions(+), 12 deletions(-)
@@ -350,7 +350,7 @@ struct virtio_net {
uint32_t flags;
uint16_t vhost_hlen;
/* to tell if we need broadcast rarp packet */
- rte_atomic16_t broadcast_rarp;
+ int16_t broadcast_rarp;
uint32_t nr_vring;
int dequeue_zero_copy;
int extbuf;
@@ -2145,11 +2145,10 @@ vhost_user_send_rarp(struct virtio_net **pdev, struct VhostUserMsg *msg,
* Set the flag to inject a RARP broadcast packet at
* rte_vhost_dequeue_burst().
*
- * rte_smp_wmb() is for making sure the mac is copied
- * before the flag is set.
+ * __ATOMIC_RELEASE ordering is for making sure the mac is
+ * copied before the flag is set.
*/
- rte_smp_wmb();
- rte_atomic16_set(&dev->broadcast_rarp, 1);
+ __atomic_store_n(&dev->broadcast_rarp, 1, __ATOMIC_RELEASE);
did = dev->vdpa_dev_id;
vdpa_dev = rte_vdpa_get_device(did);
if (vdpa_dev && vdpa_dev->ops->migration_done)
@@ -2203,6 +2203,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
struct virtio_net *dev;
struct rte_mbuf *rarp_mbuf = NULL;
struct vhost_virtqueue *vq;
+ int success = 1;
dev = get_device(vid);
if (!dev)
@@ -2249,16 +2250,17 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
*
* broadcast_rarp shares a cacheline in the virtio_net structure
* with some fields that are accessed during enqueue and
- * rte_atomic16_cmpset() causes a write if using cmpxchg. This could
- * result in false sharing between enqueue and dequeue.
+ * __atomic_compare_exchange_n causes a write if performed compare
+ * and exchange. This could result in false sharing between enqueue
+ * and dequeue.
*
* Prevent unnecessary false sharing by reading broadcast_rarp first
- * and only performing cmpset if the read indicates it is likely to
- * be set.
+ * and only performing compare and exchange if the read indicates it
+ * is likely to be set.
*/
- if (unlikely(rte_atomic16_read(&dev->broadcast_rarp) &&
- rte_atomic16_cmpset((volatile uint16_t *)
- &dev->broadcast_rarp.cnt, 1, 0))) {
+ if (unlikely(__atomic_load_n(&dev->broadcast_rarp, __ATOMIC_ACQUIRE) &&
+ __atomic_compare_exchange_n(&dev->broadcast_rarp,
+ &success, 0, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) {
rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac);
if (rarp_mbuf == NULL) {