diff mbox series

[v10,5/7] bbdev: add new operation for FFT processing

Message ID 20220930184605.47655-6-nicolas.chautru@intel.com (mailing list archive)
State Accepted, archived
Delegated to: akhil goyal
Headers show
Series bbdev changes for 22.11 | expand

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Nicolas Chautru Sept. 30, 2022, 6:46 p.m. UTC
Extension of bbdev operation to support FFT based operations.

Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
Acked-by: Hemant Agrawal <hemant.agrawal@nxp.com>
Acked-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 doc/guides/prog_guide/bbdev.rst | 103 ++++++++++++++++++++++
 lib/bbdev/rte_bbdev.c           |  10 ++-
 lib/bbdev/rte_bbdev.h           |  76 ++++++++++++++++
 lib/bbdev/rte_bbdev_op.h        | 149 ++++++++++++++++++++++++++++++++
 lib/bbdev/version.map           |   4 +
 5 files changed, 341 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/doc/guides/prog_guide/bbdev.rst b/doc/guides/prog_guide/bbdev.rst
index 70fa01ada5..1c7eb24148 100644
--- a/doc/guides/prog_guide/bbdev.rst
+++ b/doc/guides/prog_guide/bbdev.rst
@@ -1118,6 +1118,109 @@  Figure :numref:`figure_turbo_tb_decode` above
 showing the Turbo decoding of CBs using BBDEV interface in TB-mode
 is also valid for LDPC decode.
 
+BBDEV FFT Operation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This operation allows to run a combination of DFT and/or IDFT and/or time-domain windowing.
+These can be used in a modular fashion (using bypass modes) or as a processing pipeline
+which can be used for FFT-based baseband signal processing.
+In more details it allows :
+- to process the data first through an IDFT of adjustable size and padding;
+- to perform the windowing as a programmable cyclic shift offset of the data followed by a
+pointwise multiplication by a time domain window;
+- to process the related data through a DFT of adjustable size and de-padding for each such cyclic
+shift output.
+
+A flexible number of Rx antennas are being processed in parallel with the same configuration.
+The API allows more generally for flexibility in what the PMD may support (capability flags) and
+flexibility to adjust some of the parameters of the processing.
+
+The operation/capability flags that can be set for each FFT operation are given below.
+
+  **NOTE:** The actual operation flags that may be used with a specific
+  BBDEV PMD are dependent on the driver capabilities as reported via
+  ``rte_bbdev_info_get()``, and may be a subset of those below.
+
++--------------------------------------------------------------------+
+|Description of FFT capability flags                                 |
++====================================================================+
+|RTE_BBDEV_FFT_WINDOWING                                             |
+| Set to enable/support windowing in time domain                     |
++--------------------------------------------------------------------+
+|RTE_BBDEV_FFT_CS_ADJUSTMENT                                         |
+| Set to enable/support  the cyclic shift time offset adjustment     |
++--------------------------------------------------------------------+
+|RTE_BBDEV_FFT_DFT_BYPASS                                            |
+| Set to bypass the DFT and use directly the IDFT as an option       |
++--------------------------------------------------------------------+
+|RTE_BBDEV_FFT_IDFT_BYPASS                                           |
+| Set to bypass the IDFT and use directly the DFT as an option       |
++--------------------------------------------------------------------+
+|RTE_BBDEV_FFT_WINDOWING_BYPASS                                      |
+| Set to bypass the time domain windowing  as an option              |
++--------------------------------------------------------------------+
+|RTE_BBDEV_FFT_POWER_MEAS                                            |
+| Set to provide an optional power measurement of the DFT output     |
++--------------------------------------------------------------------+
+|RTE_BBDEV_FFT_FP16_INPUT                                            |
+| Set if the input data shall use FP16 format instead of INT16       |
++--------------------------------------------------------------------+
+|RTE_BBDEV_FFT_FP16_OUTPUT                                           |
+| Set if the output data shall use FP16 format instead of INT16      |
++--------------------------------------------------------------------+
+
+The FFT parameters are set out in the table below.
+
++-------------------------+--------------------------------------------------------------+
+|Parameter                |Description                                                   |
++=========================+==============================================================+
+|base_input               |input data                                                    |
++-------------------------+--------------------------------------------------------------+
+|base_output              |output data                                                   |
++-------------------------+--------------------------------------------------------------+
+|power_meas_output        |optional output data with power measurement on DFT output     |
++-------------------------+--------------------------------------------------------------+
+|op_flags                 |bitmask of all active operation capabilities                  |
++-------------------------+--------------------------------------------------------------+
+|input_sequence_size      |size of the input sequence in 32-bits points per antenna      |
++-------------------------+--------------------------------------------------------------+
+|input_leading_padding    |number of points padded at the start of input data            |
++-------------------------+--------------------------------------------------------------+
+|output_sequence_size     |size of the output sequence per antenna and cyclic shift      |
++-------------------------+--------------------------------------------------------------+
+|output_leading_depadding |number of points de-padded at the start of output data        |
++-------------------------+--------------------------------------------------------------+
+|window_index             |optional windowing profile index used for each cyclic shift   |
++-------------------------+--------------------------------------------------------------+
+|cs_bitmap                |bitmap of the cyclic shift output requested (LSB for index 0) |
++-------------------------+--------------------------------------------------------------+
+|num_antennas_log2        |number of antennas as a log2 (10 maps to 1024...)             |
++-------------------------+--------------------------------------------------------------+
+|idft_log2                |iDFT size as a log2                                           |
++-------------------------+--------------------------------------------------------------+
+|dft_log2                 |DFT size as a log2                                            |
++-------------------------+--------------------------------------------------------------+
+|cs_time_adjustment       |adjustment of time position of all the cyclic shift output    |
++-------------------------+--------------------------------------------------------------+
+|idft_shift               |shift down of signal level post iDFT                          |
++-------------------------+--------------------------------------------------------------+
+|dft_shift                |shift down of signal level post DFT                           |
++-------------------------+--------------------------------------------------------------+
+|ncs_reciprocal           |inverse of max number of CS normalized to 15b (ie. 231 for 12)|
++-------------------------+--------------------------------------------------------------+
+|power_shift              |shift down of level of power measurement when enabled         |
++-------------------------+--------------------------------------------------------------+
+|fp16_exp_adjust          |value added to FP16 exponent at conversion from INT16         |
++-------------------------+--------------------------------------------------------------+
+
+The mbuf input ``base_input`` is mandatory for all BBDEV PMDs and is the
+incoming data for the processing. Its size may not fit into an actual mbuf, but the
+structure is used to pass iova address.
+The mbuf output ``output`` is mandatory and is output of the FFT processing chain.
+Each point is a complex number of 32bits : either as 2 INT16 or as 2 FP16 based when the option
+supported.
+The data layout is based on contiguous concatenation of output data first by cyclic shift then
+by antenna.
 
 Sample code
 -----------
diff --git a/lib/bbdev/rte_bbdev.c b/lib/bbdev/rte_bbdev.c
index 38630a23f8..9d65ba8cd3 100644
--- a/lib/bbdev/rte_bbdev.c
+++ b/lib/bbdev/rte_bbdev.c
@@ -24,7 +24,7 @@ 
 #define DEV_NAME "BBDEV"
 
 /* Number of supported operation types */
-#define BBDEV_OP_TYPE_COUNT 5
+#define BBDEV_OP_TYPE_COUNT 6
 
 /* BBDev library logging ID */
 RTE_LOG_REGISTER_DEFAULT(bbdev_logtype, NOTICE);
@@ -852,6 +852,9 @@  get_bbdev_op_size(enum rte_bbdev_op_type type)
 	case RTE_BBDEV_OP_LDPC_ENC:
 		result = sizeof(struct rte_bbdev_enc_op);
 		break;
+	case RTE_BBDEV_OP_FFT:
+		result = sizeof(struct rte_bbdev_fft_op);
+		break;
 	default:
 		break;
 	}
@@ -875,6 +878,10 @@  bbdev_op_init(struct rte_mempool *mempool, void *arg, void *element,
 		struct rte_bbdev_enc_op *op = element;
 		memset(op, 0, mempool->elt_size);
 		op->mempool = mempool;
+	} else if (type == RTE_BBDEV_OP_FFT) {
+		struct rte_bbdev_fft_op *op = element;
+		memset(op, 0, mempool->elt_size);
+		op->mempool = mempool;
 	}
 }
 
@@ -1125,6 +1132,7 @@  rte_bbdev_op_type_str(enum rte_bbdev_op_type op_type)
 		"RTE_BBDEV_OP_TURBO_ENC",
 		"RTE_BBDEV_OP_LDPC_DEC",
 		"RTE_BBDEV_OP_LDPC_ENC",
+		"RTE_BBDEV_OP_FFT",
 	};
 
 	if (op_type < BBDEV_OP_TYPE_COUNT)
diff --git a/lib/bbdev/rte_bbdev.h b/lib/bbdev/rte_bbdev.h
index d95049d44c..0ac863ce4d 100644
--- a/lib/bbdev/rte_bbdev.h
+++ b/lib/bbdev/rte_bbdev.h
@@ -401,6 +401,12 @@  typedef uint16_t (*rte_bbdev_enqueue_dec_ops_t)(
 		struct rte_bbdev_dec_op **ops,
 		uint16_t num);
 
+/** @internal Enqueue FFT operations for processing on queue of a device. */
+typedef uint16_t (*rte_bbdev_enqueue_fft_ops_t)(
+		struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_fft_op **ops,
+		uint16_t num);
+
 /** @internal Dequeue encode operations from a queue of a device. */
 typedef uint16_t (*rte_bbdev_dequeue_enc_ops_t)(
 		struct rte_bbdev_queue_data *q_data,
@@ -411,6 +417,11 @@  typedef uint16_t (*rte_bbdev_dequeue_dec_ops_t)(
 		struct rte_bbdev_queue_data *q_data,
 		struct rte_bbdev_dec_op **ops, uint16_t num);
 
+/** @internal Dequeue FFT operations from a queue of a device. */
+typedef uint16_t (*rte_bbdev_dequeue_fft_ops_t)(
+		struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_fft_op **ops, uint16_t num);
+
 #define RTE_BBDEV_NAME_MAX_LEN  64  /**< Max length of device name */
 
 /**
@@ -459,6 +470,10 @@  struct __rte_cache_aligned rte_bbdev {
 	rte_bbdev_dequeue_enc_ops_t dequeue_ldpc_enc_ops;
 	/** Dequeue decode function */
 	rte_bbdev_dequeue_dec_ops_t dequeue_ldpc_dec_ops;
+	/** Enqueue FFT function */
+	rte_bbdev_enqueue_fft_ops_t enqueue_fft_ops;
+	/** Dequeue FFT function */
+	rte_bbdev_dequeue_fft_ops_t dequeue_fft_ops;
 	const struct rte_bbdev_ops *dev_ops;  /**< Functions exported by PMD */
 	struct rte_bbdev_data *data;  /**< Pointer to device data */
 	enum rte_bbdev_state state;  /**< If device is currently used or not */
@@ -591,6 +606,36 @@  rte_bbdev_enqueue_ldpc_dec_ops(uint16_t dev_id, uint16_t queue_id,
 	return dev->enqueue_ldpc_dec_ops(q_data, ops, num_ops);
 }
 
+/**
+ * Enqueue a burst of FFT operations to a queue of the device.
+ * This functions only enqueues as many operations as currently possible and
+ * does not block until @p num_ops entries in the queue are available.
+ * This function does not provide any error notification to avoid the
+ * corresponding overhead.
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ * @param queue_id
+ *   The index of the queue.
+ * @param ops
+ *   Pointer array containing operations to be enqueued Must have at least
+ *   @p num_ops entries
+ * @param num_ops
+ *   The maximum number of operations to enqueue.
+ *
+ * @return
+ *   The number of operations actually enqueued (this is the number of processed
+ *   entries in the @p ops array).
+ */
+__rte_experimental
+static inline uint16_t
+rte_bbdev_enqueue_fft_ops(uint16_t dev_id, uint16_t queue_id,
+		struct rte_bbdev_fft_op **ops, uint16_t num_ops)
+{
+	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
+	struct rte_bbdev_queue_data *q_data = &dev->data->queues[queue_id];
+	return dev->enqueue_fft_ops(q_data, ops, num_ops);
+}
 
 /**
  * Dequeue a burst of processed encode operations from a queue of the device.
@@ -716,6 +761,37 @@  rte_bbdev_dequeue_ldpc_dec_ops(uint16_t dev_id, uint16_t queue_id,
 	return dev->dequeue_ldpc_dec_ops(q_data, ops, num_ops);
 }
 
+/**
+ * Dequeue a burst of FFT operations from a queue of the device.
+ * This functions returns only the current contents of the queue, and does not
+ * block until @ num_ops is available.
+ * This function does not provide any error notification to avoid the
+ * corresponding overhead.
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ * @param queue_id
+ *   The index of the queue.
+ * @param ops
+ *   Pointer array where operations will be dequeued to. Must have at least
+ *   @p num_ops entries
+ * @param num_ops
+ *   The maximum number of operations to dequeue.
+ *
+ * @return
+ *   The number of operations actually dequeued (this is the number of entries
+ *   copied into the @p ops array).
+ */
+__rte_experimental
+static inline uint16_t
+rte_bbdev_dequeue_fft_ops(uint16_t dev_id, uint16_t queue_id,
+		struct rte_bbdev_fft_op **ops, uint16_t num_ops)
+{
+	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
+	struct rte_bbdev_queue_data *q_data = &dev->data->queues[queue_id];
+	return dev->dequeue_fft_ops(q_data, ops, num_ops);
+}
+
 /** Definitions of device event types */
 enum rte_bbdev_event_type {
 	RTE_BBDEV_EVENT_UNKNOWN,  /**< unknown event type */
diff --git a/lib/bbdev/rte_bbdev_op.h b/lib/bbdev/rte_bbdev_op.h
index 4f1cff8412..73677188ab 100644
--- a/lib/bbdev/rte_bbdev_op.h
+++ b/lib/bbdev/rte_bbdev_op.h
@@ -47,6 +47,8 @@  extern "C" {
 #define RTE_BBDEV_TURBO_MAX_CODE_BLOCKS (64)
 /* LDPC:  Maximum number of Code Blocks in Transport Block.*/
 #define RTE_BBDEV_LDPC_MAX_CODE_BLOCKS (256)
+/* 12 CS maximum */
+#define RTE_BBDEV_MAX_CS_2 (6)
 
 /*
  * Maximum size to be used to manage the enum rte_bbdev_op_type including padding for future
@@ -217,6 +219,26 @@  enum rte_bbdev_op_ldpcenc_flag_bitmasks {
 	RTE_BBDEV_LDPC_ENC_CONCATENATION = (1ULL << 7)
 };
 
+/** Flags for DFT operation and capability structure */
+enum rte_bbdev_op_fft_flag_bitmasks {
+	/** Flexible windowing capability */
+	RTE_BBDEV_FFT_WINDOWING = (1ULL << 0),
+	/** Flexible adjustment of Cyclic Shift time offset */
+	RTE_BBDEV_FFT_CS_ADJUSTMENT = (1ULL << 1),
+	/** Set for bypass the DFT and get directly into iDFT input */
+	RTE_BBDEV_FFT_DFT_BYPASS = (1ULL << 2),
+	/** Set for bypass the IDFT and get directly the DFT output */
+	RTE_BBDEV_FFT_IDFT_BYPASS = (1ULL << 3),
+	/** Set for bypass time domain windowing */
+	RTE_BBDEV_FFT_WINDOWING_BYPASS = (1ULL << 4),
+	/** Set for optional power measurement on DFT output */
+	RTE_BBDEV_FFT_POWER_MEAS = (1ULL << 5),
+	/** Set if the input data used FP16 format */
+	RTE_BBDEV_FFT_FP16_INPUT = (1ULL << 6),
+	/**  Set if the output data uses FP16 format  */
+	RTE_BBDEV_FFT_FP16_OUTPUT = (1ULL << 7)
+};
+
 /** Flags for the Code Block/Transport block mode  */
 enum rte_bbdev_op_cb_mode {
 	/** One operation is one or fraction of one transport block  */
@@ -695,6 +717,55 @@  struct rte_bbdev_op_ldpc_enc {
 	};
 };
 
+/** Operation structure for FFT processing.
+ *
+ * The operation processes the data for multiple antennas in a single call
+ * (.i.e for all the REs belonging to a given SRS sequence for instance)
+ *
+ * The output mbuf data structure is expected to be allocated by the
+ * application with enough room for the output data.
+ */
+struct rte_bbdev_op_fft {
+	/** Input data starting from first antenna */
+	struct rte_bbdev_op_data base_input;
+	/** Output data starting from first antenna and first cyclic shift */
+	struct rte_bbdev_op_data base_output;
+	/** Optional power measurement output data */
+	struct rte_bbdev_op_data power_meas_output;
+	/** Flags from rte_bbdev_op_fft_flag_bitmasks */
+	uint32_t op_flags;
+	/** Input sequence size in 32-bits points */
+	uint16_t input_sequence_size;
+	/** Padding at the start of the sequence */
+	uint16_t input_leading_padding;
+	/** Output sequence size in 32-bits points */
+	uint16_t output_sequence_size;
+	/** Depadding at the start of the DFT output */
+	uint16_t output_leading_depadding;
+	/** Window index being used for each cyclic shift output */
+	uint8_t window_index[RTE_BBDEV_MAX_CS_2];
+	/** Bitmap of the cyclic shift output requested */
+	uint16_t cs_bitmap;
+	/** Number of antennas as a log2 – 8 to 128 */
+	uint8_t num_antennas_log2;
+	/** iDFT size as a log2 - 32 to 2048 */
+	uint8_t idft_log2;
+	/** DFT size as a log2 - 8 to 2048 */
+	uint8_t dft_log2;
+	/** Adjustment of position of the cyclic shifts - -31 to 31 */
+	int8_t cs_time_adjustment;
+	/** iDFT shift down */
+	int8_t idft_shift;
+	/** DFT shift down */
+	int8_t dft_shift;
+	/** NCS reciprocal factor  */
+	uint16_t ncs_reciprocal;
+	/** power measurement out shift down */
+	uint16_t power_shift;
+	/** Adjust the FP6 exponent for INT<->FP16 conversion */
+	uint16_t fp16_exp_adjust;
+};
+
 /** List of the capabilities for the Turbo Decoder */
 struct rte_bbdev_op_cap_turbo_dec {
 	/** Flags from rte_bbdev_op_td_flag_bitmasks */
@@ -747,6 +818,16 @@  struct rte_bbdev_op_cap_ldpc_enc {
 	uint16_t num_buffers_dst;
 };
 
+/** List of the capabilities for the FFT */
+struct rte_bbdev_op_cap_fft {
+	/** Flags from rte_bbdev_op_fft_flag_bitmasks */
+	uint32_t capability_flags;
+	/** Num input code block buffers */
+	uint16_t num_buffers_src;
+	/** Num output code block buffers */
+	uint16_t num_buffers_dst;
+};
+
 /** Different operation types supported by the device
  *  The related macro RTE_BBDEV_OP_TYPE_SIZE_MAX can be used as an absolute maximum for
  *  notably sizing array while allowing for future enumeration insertion.
@@ -757,6 +838,7 @@  enum rte_bbdev_op_type {
 	RTE_BBDEV_OP_TURBO_ENC,  /**< Turbo encode */
 	RTE_BBDEV_OP_LDPC_DEC,  /**< LDPC decode */
 	RTE_BBDEV_OP_LDPC_ENC,  /**< LDPC encode */
+	RTE_BBDEV_OP_FFT,  /**< FFT */
 };
 
 /** Bit indexes of possible errors reported through status field */
@@ -799,6 +881,18 @@  struct rte_bbdev_dec_op {
 	};
 };
 
+/** Structure specifying a single FFT operation */
+struct rte_bbdev_fft_op {
+	/** Status of operation that was performed */
+	int status;
+	/** Mempool which op instance is in */
+	struct rte_mempool *mempool;
+	/** Opaque pointer for user data */
+	void *opaque_data;
+	/** Contains turbo decoder specific parameters */
+	struct rte_bbdev_op_fft fft;
+};
+
 /** Operation capabilities supported by a device */
 struct rte_bbdev_op_cap {
 	enum rte_bbdev_op_type type;  /**< Type of operation */
@@ -807,6 +901,7 @@  struct rte_bbdev_op_cap {
 		struct rte_bbdev_op_cap_turbo_enc turbo_enc;
 		struct rte_bbdev_op_cap_ldpc_dec ldpc_dec;
 		struct rte_bbdev_op_cap_ldpc_enc ldpc_enc;
+		struct rte_bbdev_op_cap_fft fft;
 	} cap;  /**< Operation-type specific capabilities */
 };
 
@@ -925,6 +1020,42 @@  rte_bbdev_dec_op_alloc_bulk(struct rte_mempool *mempool,
 	return 0;
 }
 
+/**
+ * Bulk allocate FFT operations from a mempool with parameter defaults reset.
+ *
+ * @param mempool
+ *   Operation mempool, created by rte_bbdev_op_pool_create().
+ * @param ops
+ *   Output array to place allocated operations
+ * @param num_ops
+ *   Number of operations to allocate
+ *
+ * @returns
+ *   - 0 on success
+ *   - EINVAL if invalid mempool is provided
+ */
+__rte_experimental
+static inline int
+rte_bbdev_fft_op_alloc_bulk(struct rte_mempool *mempool,
+		struct rte_bbdev_fft_op **ops, uint16_t num_ops)
+{
+	struct rte_bbdev_op_pool_private *priv;
+	int ret;
+
+	/* Check type */
+	priv = (struct rte_bbdev_op_pool_private *)
+			rte_mempool_get_priv(mempool);
+	if (unlikely(priv->type != RTE_BBDEV_OP_FFT))
+		return -EINVAL;
+
+	/* Get elements */
+	ret = rte_mempool_get_bulk(mempool, (void **)ops, num_ops);
+	if (unlikely(ret < 0))
+		return ret;
+
+	return 0;
+}
+
 /**
  * Free decode operation structures that were allocated by
  * rte_bbdev_dec_op_alloc_bulk().
@@ -959,6 +1090,24 @@  rte_bbdev_enc_op_free_bulk(struct rte_bbdev_enc_op **ops, unsigned int num_ops)
 		rte_mempool_put_bulk(ops[0]->mempool, (void **)ops, num_ops);
 }
 
+/**
+ * Free encode operation structures that were allocated by
+ * *rte_bbdev_fft_op_alloc_bulk*.
+ * All structures must belong to the same mempool.
+ *
+ * @param ops
+ *   Operation structures
+ * @param num_ops
+ *   Number of structures
+ */
+__rte_experimental
+static inline void
+rte_bbdev_fft_op_free_bulk(struct rte_bbdev_fft_op **ops, unsigned int num_ops)
+{
+	if (num_ops > 0)
+		rte_mempool_put_bulk(ops[0]->mempool, (void **)ops, num_ops);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/bbdev/version.map b/lib/bbdev/version.map
index f0a072ea00..0cbeab3d47 100644
--- a/lib/bbdev/version.map
+++ b/lib/bbdev/version.map
@@ -45,4 +45,8 @@  EXPERIMENTAL {
 
 	# added in 22.11
 	rte_bbdev_device_status_str;
+	rte_bbdev_enqueue_fft_ops;
+	rte_bbdev_dequeue_fft_ops;
+	rte_bbdev_fft_op_alloc_bulk;
+	rte_bbdev_fft_op_free_bulk;
 };