[v5,3/4] docs: add pointer compression to the EAL guide
Checks
Commit Message
Documentation added in the EAL guide for the new
utility functions for pointer compression
showing example code and potential usecases.
Signed-off-by: Paul Szczepanek <paul.szczepanek@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
.../prog_guide/env_abstraction_layer.rst | 142 ++++++++++++++++++
1 file changed, 142 insertions(+)
--
2.25.1
@@ -1192,3 +1192,145 @@ will not be deallocated.
Any successful deallocation event will trigger a callback, for which user
applications and other DPDK subsystems can register.
+
+.. _pointer_compression:
+
+Pointer Compression
+-------------------
+
+Use ``rte_ptr_compress_16()`` and ``rte_ptr_decompress_16()`` to compress and
+decompress pointers into 16-bit offsets. Use ``rte_ptr_compress_32()`` and
+``rte_ptr_decompress_32()`` to compress and decompress pointers into 32-bit
+offsets.
+
+Compression takes advantage of the fact that pointers are usually located in a
+limited memory region (like a mempool). By converting them to offsets from a
+base memory address they can be stored in fewer bytes. How many bytes are needed
+to store the offset is dictated by the memory region size and alignment of
+objects the pointers point to.
+
+For example, a pointer which is part of a 4GB memory pool can be stored as 32
+bit offset. If the pointer points to memory that is 8 bytes aligned then 3 bits
+can be dropped from the offset and a 32GB memory pool can now fit in 32 bits.
+
+For performance reasons these requirements are not enforced programmatically.
+The programmer is responsible for ensuring that the combination of distance
+from the base pointer and memory alignment allow for storing of the offset in
+the number of bits indicated by the function name (16 or 32). Start of mempool
+memory would be a good candidate for the base pointer. Otherwise any pointer
+that precedes all pointers, is close enough and has the same alignment as the
+pointers being compressed will work.
+
+.. note::
+
+ Performance gains depend on the batch size of pointers and CPU capabilities
+ such as vector extensions. It's important to measure the performance
+ increase on target hardware. A test called ``ring_perf_autotest`` in
+ ``dpdk-test`` can provide the measurements.
+
+Example usage
+~~~~~~~~~~~~~
+
+In this example we send pointers between two cores through a ring. While this
+is a realistic use case the code is simplified for demonstration purposes and
+does not have error handling.
+
+.. code-block:: c
+
+ #include <rte_launch.h>
+ #include <rte_ring.h>
+ #include <rte_ring_elem.h>
+ #include <rte_ptr_compress.h>
+
+ #define ITEMS_ARRAY_SIZE (1024)
+ #define BATCH_SIZE (128)
+ #define ALIGN_EXPONENT (3)
+ #define ITEM_ALIGN (1<<ALIGN_EXPONENT)
+ #define CORE_SEND (1)
+ #define CORE_RECV (2)
+
+ struct item {
+ int a;
+ } __rte_aligned(ITEM_ALIGN);
+
+ static struct item items[ITEMS_ARRAY_SIZE] = {0};
+ static struct rte_ring *ring = NULL;
+
+ static int
+ send_compressed(void *args)
+ {
+ struct item *ptrs_send[BATCH_SIZE] = {0};
+ unsigned int n_send = 0;
+ struct rte_ring_zc_data zcd = {0};
+
+ /* in this example we only fill the ptrs_send once and reuse */
+ for (;n_send < BATCH_SIZE; n_send++)
+ ptrs_send[n_send] = &items[n_send];
+
+ for(;;) {
+ n_send = rte_ring_enqueue_zc_burst_elem_start(
+ ring, sizeof(uint32_t), BATCH_SIZE, &zcd, NULL);
+
+ /* compress ptrs_send into offsets */
+ rte_ptr_compress_32(items, /* base pointer */
+ ptrs_send, /* source array to be compressed */
+ zcd.ptr1, /* destination array to store offsets */
+ zcd.n1, /* how many pointers to compress */
+ ALIGN_EXPONENT /* how many bits can we drop from the offset */);
+
+ if (zcd.ptr2 != NULL)
+ rte_ptr_compress_32(items, ptrs_send + zcd.n1,
+ zcd.ptr2, n_send - zcd.n1, ALIGN_EXPONENT);
+
+ rte_ring_enqueue_zc_finish(ring, n_send);
+ }
+ return 1;
+ }
+
+ static int
+ recv_compressed(void *args)
+ {
+ struct item *ptrs_recv[BATCH_SIZE] = {0};
+ unsigned int n_recv;
+ struct rte_ring_zc_data zcd = {0};
+
+ for(;;) {
+ /* receive compressed pointers from the ring */
+ n_recv = rte_ring_dequeue_zc_burst_elem_start(
+ ring, sizeof(uint32_t), BATCH_SIZE, &zcd, NULL);
+
+ rte_ptr_decompress_32(items, /* base pointer */
+ zcd.ptr1, /* source array to decompress */
+ ptrs_recv, /* destination array to store pointers */
+ zcd.n1, /* how many pointers to decompress */
+ ALIGN_EXPONENT /* how many bits were dropped from the offset */);
+
+ /* handle the potential secondary buffer (caused by ring boundary) */
+ if (zcd.ptr2 != NULL)
+ rte_ptr_decompress_32(items,
+ zcd.ptr2,
+ ptrs_recv + zcd.n1,
+ n_recv - zcd.n1,
+ ALIGN_EXPONENT);
+
+ rte_ring_dequeue_zc_finish(ring, n_recv);
+
+ /* ptrs_recv contains what ptrs_send contained in the other thread */
+ /* (...) */
+ }
+ return 1;
+ }
+
+ void
+ compression_example(void)
+ {
+ ring = rte_ring_create_elem(
+ "COMPR_PTRS", sizeof(uint32_t),
+ 1024, rte_socket_id(),
+ RING_F_SP_ENQ | RING_F_SC_DEQ);
+
+ rte_eal_remote_launch(send_compressed, NULL, CORE_SEND);
+ rte_eal_remote_launch(recv_compressed, NULL, CORE_RECV);
+
+ for(;;) {}
+ }