@@ -145,6 +145,7 @@ struct vhost_virtqueue {
struct vring_used_elem_packed *shadow_used_packed;
};
uint16_t shadow_used_idx;
+ uint16_t enqueue_shadow_count;
struct vhost_vring_addr ring_addrs;
struct batch_copy_elem *batch_copy_elems;
@@ -158,6 +158,90 @@ flush_shadow_used_ring_packed(struct virtio_net *dev,
vhost_log_cache_sync(dev, vq);
}
+static __rte_always_inline void
+flush_enqueue_used_packed(struct virtio_net *dev,
+ struct vhost_virtqueue *vq)
+{
+ int i;
+ uint16_t used_idx = vq->last_used_idx;
+ uint16_t head_idx = vq->last_used_idx;
+ uint16_t head_flags = 0;
+
+ /* Split loop in two to save memory barriers */
+ for (i = 0; i < vq->shadow_used_idx; i++) {
+ vq->desc_packed[used_idx].id = vq->shadow_used_packed[i].id;
+ vq->desc_packed[used_idx].len = vq->shadow_used_packed[i].len;
+
+ used_idx += vq->shadow_used_packed[i].count;
+ if (used_idx >= vq->size)
+ used_idx -= vq->size;
+ }
+
+ rte_smp_wmb();
+
+ for (i = 0; i < vq->shadow_used_idx; i++) {
+ uint16_t flags;
+
+ if (vq->shadow_used_packed[i].len)
+ flags = VRING_DESC_F_WRITE;
+ else
+ flags = 0;
+
+ if (vq->used_wrap_counter) {
+ flags |= VRING_DESC_F_USED;
+ flags |= VRING_DESC_F_AVAIL;
+ } else {
+ flags &= ~VRING_DESC_F_USED;
+ flags &= ~VRING_DESC_F_AVAIL;
+ }
+
+ if (i > 0) {
+ vq->desc_packed[vq->last_used_idx].flags = flags;
+
+ vhost_log_cache_used_vring(dev, vq,
+ vq->last_used_idx *
+ sizeof(struct vring_packed_desc),
+ sizeof(struct vring_packed_desc));
+ } else {
+ head_idx = vq->last_used_idx;
+ head_flags = flags;
+ }
+
+ vq->last_used_idx += vq->shadow_used_packed[i].count;
+ if (vq->last_used_idx >= vq->size) {
+ vq->used_wrap_counter ^= 1;
+ vq->last_used_idx -= vq->size;
+ }
+ }
+
+ vq->desc_packed[head_idx].flags = head_flags;
+
+ vhost_log_cache_used_vring(dev, vq,
+ head_idx *
+ sizeof(struct vring_packed_desc),
+ sizeof(struct vring_packed_desc));
+
+ vq->shadow_used_idx = 0;
+ vhost_log_cache_sync(dev, vq);
+}
+
+static __rte_always_inline void
+update_enqueue_shadow_used_ring_packed(struct vhost_virtqueue *vq,
+ uint16_t desc_idx, uint32_t len,
+ uint16_t count)
+{
+ if (!vq->shadow_used_idx)
+ vq->enqueue_shadow_count = vq->last_used_idx & 0x3;
+
+ uint16_t i = vq->shadow_used_idx++;
+
+ vq->shadow_used_packed[i].id = desc_idx;
+ vq->shadow_used_packed[i].len = len;
+ vq->shadow_used_packed[i].count = count;
+
+ vq->enqueue_shadow_count += count;
+}
+
static __rte_always_inline void
update_shadow_used_ring_packed(struct vhost_virtqueue *vq,
uint16_t desc_idx, uint32_t len, uint16_t count)
@@ -198,6 +282,24 @@ do_data_copy_dequeue(struct vhost_virtqueue *vq)
vq->batch_copy_nb_elems = 0;
}
+static __rte_always_inline void
+flush_enqueue_shadow_used_packed(struct virtio_net *dev,
+ struct vhost_virtqueue *vq, uint32_t len[],
+ uint16_t id[], uint16_t count[], uint16_t num_buffers)
+{
+ int i;
+ for (i = 0; i < num_buffers; i++) {
+ update_enqueue_shadow_used_ring_packed(vq, id[i], len[i],
+ count[i]);
+
+ if (vq->enqueue_shadow_count >= PACKED_DESC_PER_CACHELINE) {
+ do_data_copy_enqueue(dev, vq);
+ flush_enqueue_used_packed(dev, vq);
+ }
+ }
+}
+
+
/* avoid write operation when necessary, to lessen cache issues */
#define ASSIGN_UNLESS_EQUAL(var, val) do { \
if ((var) != (val)) \
@@ -800,6 +902,9 @@ vhost_enqueue_normal_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
max_tries = 1;
uint16_t num_buffers = 0;
+ uint32_t buffer_len[max_tries];
+ uint16_t buffer_buf_id[max_tries];
+ uint16_t buffer_desc_count[max_tries];
while (size > 0) {
/*
@@ -822,6 +927,10 @@ vhost_enqueue_normal_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
size -= len;
+ buffer_len[num_buffers] = len;
+ buffer_buf_id[num_buffers] = buf_id;
+ buffer_desc_count[num_buffers] = desc_count;
+
avail_idx += desc_count;
if (avail_idx >= vq->size)
avail_idx -= vq->size;
@@ -836,6 +945,9 @@ vhost_enqueue_normal_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
return 0;
}
+ flush_enqueue_shadow_used_packed(dev, vq, buffer_len, buffer_buf_id,
+ buffer_desc_count, num_buffers);
+
return 0;
}