[v1] bbdev: add new operation for FFT processing

Message ID 1646956157-245769-2-git-send-email-nicolas.chautru@intel.com (mailing list archive)
State Superseded, archived
Delegated to: akhil goyal
Headers
Series [v1] bbdev: add new operation for FFT processing |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/iol-intel-Functional success Functional Testing PASS
ci/Intel-compilation success Compilation OK
ci/iol-mellanox-Performance success Performance Testing PASS
ci/intel-Testing success Testing PASS
ci/github-robot: build fail github build: failed
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-abi-testing warning Testing issues
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS

Commit Message

Chautru, Nicolas March 10, 2022, 11:49 p.m. UTC
  Extension of bbdev operation to support FFT based operations.

Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
---
 doc/guides/prog_guide/bbdev.rst | 130 +++++++++++++++++++++++++++++++++++
 lib/bbdev/rte_bbdev.c           |   8 +++
 lib/bbdev/rte_bbdev.h           |  76 ++++++++++++++++++++
 lib/bbdev/rte_bbdev_op.h        | 149 ++++++++++++++++++++++++++++++++++++++++
 lib/bbdev/version.map           |  10 +++
 5 files changed, 373 insertions(+)
  

Comments

Stephen Hemminger March 11, 2022, 1:12 a.m. UTC | #1
On Thu, 10 Mar 2022 15:49:17 -0800
Nicolas Chautru <nicolas.chautru@intel.com> wrote:

> diff --git a/lib/bbdev/rte_bbdev.c b/lib/bbdev/rte_bbdev.c
> index aaee7b7..a72ecba 100644
> --- a/lib/bbdev/rte_bbdev.c
> +++ b/lib/bbdev/rte_bbdev.c
> @@ -850,6 +850,9 @@ struct rte_bbdev *
>  	case RTE_BBDEV_OP_LDPC_ENC:
>  		result = sizeof(struct rte_bbdev_enc_op);
>  		break;
> +	case RTE_BBDEV_OP_FFT:
> +		result = sizeof(struct rte_bbdev_fft_op);
> +		break;
>  	default:
>  		break;
>  	}
> @@ -873,6 +876,10 @@ struct rte_bbdev *
>  		struct rte_bbdev_enc_op *op = element;
>  		memset(op, 0, mempool->elt_size);
>  		op->mempool = mempool;
> +	} else if (type == RTE_BBDEV_OP_FFT) {
> +		struct rte_bbdev_fft_op *op = element;
> +		memset(op, 0, mempool->elt_size);
> +		op->mempool = mempool;
>  	}
>  }
>  
> @@ -1123,6 +1130,7 @@ struct rte_mempool *
>  		"RTE_BBDEV_OP_TURBO_ENC",
>  		"RTE_BBDEV_OP_LDPC_DEC",
>  		"RTE_BBDEV_OP_LDPC_ENC",
> +		"RTE_BBDEV_OP_FFT",
>  	};
>  
>  	if (op_type < RTE_BBDEV_OP_TYPE_COUNT)
> diff --git a/lib/bbdev/rte_bbdev.h b/lib/bbdev/rte_bbdev.h
> index b88c881..e9ca673 100644
> --- a/lib/bbdev/rte_bbdev.h
> +++ b/lib/bbdev/rte_bbdev.h
> @@ -380,6 +380,12 @@ typedef uint16_t (*rte_bbdev_enqueue_dec_ops_t)(
>  		struct rte_bbdev_dec_op **ops,
>  		uint16_t num);
>  
> +/** @internal Enqueue fft operations for processing on queue of a device. */
> +typedef uint16_t (*rte_bbdev_enqueue_fft_ops_t)(
> +		struct rte_bbdev_queue_data *q_data,
> +		struct rte_bbdev_fft_op **ops,
> +		uint16_t num);
> +
>  /** @internal Dequeue encode operations from a queue of a device. */
>  typedef uint16_t (*rte_bbdev_dequeue_enc_ops_t)(
>  		struct rte_bbdev_queue_data *q_data,
> @@ -390,6 +396,11 @@ typedef uint16_t (*rte_bbdev_dequeue_dec_ops_t)(
>  		struct rte_bbdev_queue_data *q_data,
>  		struct rte_bbdev_dec_op **ops, uint16_t num);
>  
> +/** @internal Dequeue fft operations from a queue of a device. */
> +typedef uint16_t (*rte_bbdev_dequeue_fft_ops_t)(
> +		struct rte_bbdev_queue_data *q_data,
> +		struct rte_bbdev_fft_op **ops, uint16_t num);
> +
>  #define RTE_BBDEV_NAME_MAX_LEN  64  /**< Max length of device name */
>  
>  /**
> @@ -438,6 +449,10 @@ struct __rte_cache_aligned rte_bbdev {
>  	rte_bbdev_dequeue_enc_ops_t dequeue_ldpc_enc_ops;
>  	/** Dequeue decode function */
>  	rte_bbdev_dequeue_dec_ops_t dequeue_ldpc_dec_ops;
> +	/** Enqueue FFT function */
> +	rte_bbdev_enqueue_fft_ops_t enqueue_fft_ops;
> +	/** Dequeue FFT function */
> +	rte_bbdev_dequeue_fft_ops_t dequeue_fft_ops;
>  	const struct rte_bbdev_ops *dev_ops;  /**< Functions exported by PMD */
>  	struct rte_bbdev_data *data;  /**< Pointer to device data */
>  	enum rte_bbdev_state state;  /**< If device is currently used or not */


Since rte_bbdev is exposed in rte_bbdev.h it can not be changed without
breaking ABI. It would have been better if data structure was better hidden (hint).
But you can't change it now until 22.11
  
Chautru, Nicolas March 17, 2022, 6:42 p.m. UTC | #2
Hi Stephen,

Yes I am deferring thispatch  to 22.11 due to ABI breakage that cannot be resolved using versioning in a few places. 
Still that patch can be used in anticipation of 22.11 to get early comments on the API extension. I have marked it as deferred in patchwork. 

For 22.07 I have pushed this notice to highlight change in 22.11 so that to clean some of this to be more future proof and extend the API. Ie. no actual change of API in 22.07. 
=> https://patches.dpdk.org/project/dpdk/patch/1647542252-35727-2-git-send-email-nicolas.chautru@intel.com/

Thanks
Nic

> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Thursday, March 10, 2022 5:13 PM
> To: Chautru, Nicolas <nicolas.chautru@intel.com>
> Cc: dev@dpdk.org; gakhil@marvell.com; trix@redhat.com;
> thomas@monjalon.net; hemant.agrawal@nxp.com; Zhang, Mingshan
> <mingshan.zhang@intel.com>; david.marchand@redhat.com
> Subject: Re: [PATCH v1] bbdev: add new operation for FFT processing
> 
> On Thu, 10 Mar 2022 15:49:17 -0800
> Nicolas Chautru <nicolas.chautru@intel.com> wrote:
> 
> > diff --git a/lib/bbdev/rte_bbdev.c b/lib/bbdev/rte_bbdev.c index
> > aaee7b7..a72ecba 100644
> > --- a/lib/bbdev/rte_bbdev.c
> > +++ b/lib/bbdev/rte_bbdev.c
> > @@ -850,6 +850,9 @@ struct rte_bbdev *
> >  	case RTE_BBDEV_OP_LDPC_ENC:
> >  		result = sizeof(struct rte_bbdev_enc_op);
> >  		break;
> > +	case RTE_BBDEV_OP_FFT:
> > +		result = sizeof(struct rte_bbdev_fft_op);
> > +		break;
> >  	default:
> >  		break;
> >  	}
> > @@ -873,6 +876,10 @@ struct rte_bbdev *
> >  		struct rte_bbdev_enc_op *op = element;
> >  		memset(op, 0, mempool->elt_size);
> >  		op->mempool = mempool;
> > +	} else if (type == RTE_BBDEV_OP_FFT) {
> > +		struct rte_bbdev_fft_op *op = element;
> > +		memset(op, 0, mempool->elt_size);
> > +		op->mempool = mempool;
> >  	}
> >  }
> >
> > @@ -1123,6 +1130,7 @@ struct rte_mempool *
> >  		"RTE_BBDEV_OP_TURBO_ENC",
> >  		"RTE_BBDEV_OP_LDPC_DEC",
> >  		"RTE_BBDEV_OP_LDPC_ENC",
> > +		"RTE_BBDEV_OP_FFT",
> >  	};
> >
> >  	if (op_type < RTE_BBDEV_OP_TYPE_COUNT) diff --git
> > a/lib/bbdev/rte_bbdev.h b/lib/bbdev/rte_bbdev.h index b88c881..e9ca673
> > 100644
> > --- a/lib/bbdev/rte_bbdev.h
> > +++ b/lib/bbdev/rte_bbdev.h
> > @@ -380,6 +380,12 @@ typedef uint16_t
> (*rte_bbdev_enqueue_dec_ops_t)(
> >  		struct rte_bbdev_dec_op **ops,
> >  		uint16_t num);
> >
> > +/** @internal Enqueue fft operations for processing on queue of a
> > +device. */ typedef uint16_t (*rte_bbdev_enqueue_fft_ops_t)(
> > +		struct rte_bbdev_queue_data *q_data,
> > +		struct rte_bbdev_fft_op **ops,
> > +		uint16_t num);
> > +
> >  /** @internal Dequeue encode operations from a queue of a device. */
> > typedef uint16_t (*rte_bbdev_dequeue_enc_ops_t)(
> >  		struct rte_bbdev_queue_data *q_data, @@ -390,6 +396,11
> @@ typedef
> > uint16_t (*rte_bbdev_dequeue_dec_ops_t)(
> >  		struct rte_bbdev_queue_data *q_data,
> >  		struct rte_bbdev_dec_op **ops, uint16_t num);
> >
> > +/** @internal Dequeue fft operations from a queue of a device. */
> > +typedef uint16_t (*rte_bbdev_dequeue_fft_ops_t)(
> > +		struct rte_bbdev_queue_data *q_data,
> > +		struct rte_bbdev_fft_op **ops, uint16_t num);
> > +
> >  #define RTE_BBDEV_NAME_MAX_LEN  64  /**< Max length of device name
> */
> >
> >  /**
> > @@ -438,6 +449,10 @@ struct __rte_cache_aligned rte_bbdev {
> >  	rte_bbdev_dequeue_enc_ops_t dequeue_ldpc_enc_ops;
> >  	/** Dequeue decode function */
> >  	rte_bbdev_dequeue_dec_ops_t dequeue_ldpc_dec_ops;
> > +	/** Enqueue FFT function */
> > +	rte_bbdev_enqueue_fft_ops_t enqueue_fft_ops;
> > +	/** Dequeue FFT function */
> > +	rte_bbdev_dequeue_fft_ops_t dequeue_fft_ops;
> >  	const struct rte_bbdev_ops *dev_ops;  /**< Functions exported by
> PMD */
> >  	struct rte_bbdev_data *data;  /**< Pointer to device data */
> >  	enum rte_bbdev_state state;  /**< If device is currently used or not
> > */
> 
> 
> Since rte_bbdev is exposed in rte_bbdev.h it can not be changed without
> breaking ABI. It would have been better if data structure was better hidden
> (hint).
> But you can't change it now until 22.11
  
Chautru, Nicolas May 25, 2022, 10:07 p.m. UTC | #3
Hi Hernant, 
Gentle reminder in case you can find the time. Could please have a look this patch for bbdev api extension. This is targeting 22.11.
New operation type for FFT processing (On top of 4 existing operations types). There is no impact to the la12xx PMD but would be good to get review/feedback. 
This can be used for vanilla FFT and/or chained FFT/iFFT + point-wise multiplication making it applicable for SRS processing.
Thanks, 
Nic

> -----Original Message-----
> From: Chautru, Nicolas <nicolas.chautru@intel.com>
> Sent: Thursday, March 10, 2022 3:49 PM
> To: dev@dpdk.org; gakhil@marvell.com; trix@redhat.com
> Cc: thomas@monjalon.net; hemant.agrawal@nxp.com; Zhang, Mingshan
> <mingshan.zhang@intel.com>; david.marchand@redhat.com; Chautru,
> Nicolas <nicolas.chautru@intel.com>
> Subject: [PATCH v1] bbdev: add new operation for FFT processing
> 
> Extension of bbdev operation to support FFT based operations.
> 
> Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
> ---
>  doc/guides/prog_guide/bbdev.rst | 130
> +++++++++++++++++++++++++++++++++++
>  lib/bbdev/rte_bbdev.c           |   8 +++
>  lib/bbdev/rte_bbdev.h           |  76 ++++++++++++++++++++
>  lib/bbdev/rte_bbdev_op.h        | 149
> ++++++++++++++++++++++++++++++++++++++++
>  lib/bbdev/version.map           |  10 +++
>  5 files changed, 373 insertions(+)
> 
> diff --git a/doc/guides/prog_guide/bbdev.rst
> b/doc/guides/prog_guide/bbdev.rst index 70fa01a..2791286 100644
> --- a/doc/guides/prog_guide/bbdev.rst
> +++ b/doc/guides/prog_guide/bbdev.rst
> @@ -1118,6 +1118,136 @@ Figure :numref:`figure_turbo_tb_decode` above
> showing the Turbo decoding of CBs using BBDEV interface in TB-mode  is also
> valid for LDPC decode.
> 
> +BBDEV FFT Operation
> +~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> +
> +This operation allows to run a combination of DFT and/or IDFT and/or time-
> domain windowing.
> +These can be used in a modular fashion (using bypass modes) or as a
> +processing pipeline which can be used for FFT-based baseband signal
> processing.
> +In more details it allows :
> +- to process the data first through an IDFT of adjustable size and
> +padding;
> +- to perform the windowing as a programmable cyclic shift offset of the
> +data followed by a pointwise multiplication by a time domain window;
> +- to process the related data through a DFT of adjustable size and
> +depadding for each such cyclic shift output.
> +
> +A flexible number of Rx antennas are being processed in parallel with the
> same configuration.
> +The API allows more generally for flexibility in what the PMD may
> +support (cabability flags) and flexibility to adjust some of the parameters of
> the processing.
> +
> +The operation/capability flags that can be set for each FFT operation are
> given below.
> +
> +  **NOTE:** The actual operation flags that may be used with a specific
> + BBDEV PMD are dependent on the driver capabilities as reported via
> + ``rte_bbdev_info_get()``, and may be a subset of those below.
> +
> ++--------------------------------------------------------------------+
> +|Description of FFT capability flags                                 |
> ++==============================================================
> ======+
> +|RTE_BBDEV_FFT_WINDOWING                                             |
> +| Set to enable/support windowing in time domain                     |
> ++--------------------------------------------------------------------+
> +|RTE_BBDEV_FFT_CS_ADJUSTMENT                                         |
> +| Set to enable/support  the cyclic shift time offset adjustment     |
> ++--------------------------------------------------------------------+
> +|RTE_BBDEV_FFT_DFT_BYPASS                                            |
> +| Set to bypass the DFT and use directly the IDFT as an option       |
> ++--------------------------------------------------------------------+
> +|RTE_BBDEV_FFT_IDFT_BYPASS                                           |
> +| Set to bypass the IDFT and use directly the DFT as an option       |
> ++--------------------------------------------------------------------+
> +|RTE_BBDEV_FFT_WINDOWING_BYPASS                                      |
> +| Set to bypass the time domain windowing  as an option              |
> ++--------------------------------------------------------------------+
> +|RTE_BBDEV_FFT_POWER_MEAS                                            |
> +| Set to provide an optional power measument of the DFT output       |
> ++--------------------------------------------------------------------+
> +|RTE_BBDEV_FFT_FP16_INPUT                                            |
> +| Set if the input data shall use FP16 format instead of INT16       |
> ++--------------------------------------------------------------------+
> +|RTE_BBDEV_FFT_FP16_OUTPUT                                           |
> +| Set if the output data shall use FP16 format instead of INT16      |
> ++--------------------------------------------------------------------+
> +
> +The structure passed for each FFT operation is given below, with the
> +operation flags forming a bitmask in the ``op_flags`` field.
> +
> +.. code-block:: c
> +
> +    struct rte_bbdev_op_fft {
> +        struct rte_bbdev_op_data base_input;
> +        struct rte_bbdev_op_data base_output;
> +        struct rte_bbdev_op_data power_meas_output;
> +        uint32_t op_flags;
> +        uint16_t input_sequence_size;
> +        uint16_t input_leading_padding;
> +        uint16_t output_sequence_size;
> +        uint16_t output_leading_depadding;
> +        uint8_t window_index[RTE_BBDEV_MAX_CS_2];
> +        uint16_t cs_bitmap;
> +        uint8_t num_antennas_log2;
> +        uint8_t idft_log2;
> +        uint8_t dft_log2;
> +        int8_t cs_time_adjustment;
> +        int8_t idft_shift;
> +        int8_t dft_shift;
> +        uint16_t ncs_reciprocal;
> +        uint16_t power_shift;
> +        uint16_t fp16_exp_adjust;
> +    };
> +
> +The FFT parameters are set out in the table below.
> +
> ++----------------------+--------------------------------------------------------------+
> +|Parameter             |Description                                                   |
> ++======================+=======================================
> ========
> ++===============+
> +|base_input            |input data                                                    |
> ++----------------------+--------------------------------------------------------------+
> +|base_output           |output data                                                   |
> ++----------------------+--------------------------------------------------------------+
> +|power_meas_output     |optional output data with power measurement
> on DFT output     |                                                    |
> ++----------------------+--------------------------------------------------------------+
> +|op_flags              |bitmask of all active operation capabilities                  |
> ++----------------------+--------------------------------------------------------------+
> +|input_sequence_size   |size of the input sequence in 32-bits points per
> antenna      |
> ++----------------------+--------------------------------------------------------------+
> +|input_leading_padding |number of points padded at the start of input
> data            |
> ++----------------------+--------------------------------------------------------------+
> +|output_sequence_size  |size of the output sequence per antenna and
> cyclic shift      |
> ++----------------------+--------------------------------------------------------------+
> +|output_depadding      |number of points depadded at the start of output
> data         |
> ++----------------------+--------------------------------------------------------------+
> +|window_index          |optional windowing profile index used for each cyclic
> shift   |
> ++----------------------+--------------------------------------------------------------+
> +|cs_bitmap             |bitmap of the cyclic shift output requested (LSB for
> index 0) |
> ++----------------------+--------------------------------------------------------------+
> +|num_antennas_log2     |number of antennas as a log2 (10 maps to 1024...)
> |
> ++----------------------+--------------------------------------------------------------+
> +|idft_log2             |iDFT size as a log2                                           |
> ++----------------------+--------------------------------------------------------------+
> +|dft_log2              |DFT size as a log2                                            |
> ++----------------------+--------------------------------------------------------------+
> +|cs_time_adjustment    |adjustment of time position of all the cyclic shift
> output    |
> ++----------------------+--------------------------------------------------------------+
> +|idft_shift            |shift down of signal level post iDFT                          |
> ++----------------------+--------------------------------------------------------------+
> +|dft_shift             |shift down of signal level post DFT                           |
> ++----------------------+--------------------------------------------------------------+
> +|ncs_reciprocal        |inverse of max number of CS normalized to 15b (ie.
> 231 for 12)|
> ++----------------------+--------------------------------------------------------------+
> +|power_shift           |shift down of level of power measurement when
> enabled         |
> ++----------------------+--------------------------------------------------------------+
> +|fp16_exp_adjust       |value added to FP16 exponent at conversion from
> INT16         |
> ++----------------------+--------------------------------------------------------------+
> +
> +The mbuf input ``base_input`` is mandatory for all BBDEV PMDs and is
> +the incoming data for the processing. Its size may not fit into an
> +actual mbuf, but the stucture is used to pass iova address.
> +The mbuf output ``output`` is mandatory and is output of the FFT processing
> chain.
> +Each point is a complex number of 32bits : either as 2 INT16 or as 2
> +FP16 based when the option supported.
> +The data layout is based on contiguous concatenation of output data
> +first by cyclic shift then by antenna.
> 
>  Sample code
>  -----------
> diff --git a/lib/bbdev/rte_bbdev.c b/lib/bbdev/rte_bbdev.c index
> aaee7b7..a72ecba 100644
> --- a/lib/bbdev/rte_bbdev.c
> +++ b/lib/bbdev/rte_bbdev.c
> @@ -850,6 +850,9 @@ struct rte_bbdev *
>  	case RTE_BBDEV_OP_LDPC_ENC:
>  		result = sizeof(struct rte_bbdev_enc_op);
>  		break;
> +	case RTE_BBDEV_OP_FFT:
> +		result = sizeof(struct rte_bbdev_fft_op);
> +		break;
>  	default:
>  		break;
>  	}
> @@ -873,6 +876,10 @@ struct rte_bbdev *
>  		struct rte_bbdev_enc_op *op = element;
>  		memset(op, 0, mempool->elt_size);
>  		op->mempool = mempool;
> +	} else if (type == RTE_BBDEV_OP_FFT) {
> +		struct rte_bbdev_fft_op *op = element;
> +		memset(op, 0, mempool->elt_size);
> +		op->mempool = mempool;
>  	}
>  }
> 
> @@ -1123,6 +1130,7 @@ struct rte_mempool *
>  		"RTE_BBDEV_OP_TURBO_ENC",
>  		"RTE_BBDEV_OP_LDPC_DEC",
>  		"RTE_BBDEV_OP_LDPC_ENC",
> +		"RTE_BBDEV_OP_FFT",
>  	};
> 
>  	if (op_type < RTE_BBDEV_OP_TYPE_COUNT) diff --git
> a/lib/bbdev/rte_bbdev.h b/lib/bbdev/rte_bbdev.h index b88c881..e9ca673
> 100644
> --- a/lib/bbdev/rte_bbdev.h
> +++ b/lib/bbdev/rte_bbdev.h
> @@ -380,6 +380,12 @@ typedef uint16_t
> (*rte_bbdev_enqueue_dec_ops_t)(
>  		struct rte_bbdev_dec_op **ops,
>  		uint16_t num);
> 
> +/** @internal Enqueue fft operations for processing on queue of a
> +device. */ typedef uint16_t (*rte_bbdev_enqueue_fft_ops_t)(
> +		struct rte_bbdev_queue_data *q_data,
> +		struct rte_bbdev_fft_op **ops,
> +		uint16_t num);
> +
>  /** @internal Dequeue encode operations from a queue of a device. */
> typedef uint16_t (*rte_bbdev_dequeue_enc_ops_t)(
>  		struct rte_bbdev_queue_data *q_data,
> @@ -390,6 +396,11 @@ typedef uint16_t
> (*rte_bbdev_dequeue_dec_ops_t)(
>  		struct rte_bbdev_queue_data *q_data,
>  		struct rte_bbdev_dec_op **ops, uint16_t num);
> 
> +/** @internal Dequeue fft operations from a queue of a device. */
> +typedef uint16_t (*rte_bbdev_dequeue_fft_ops_t)(
> +		struct rte_bbdev_queue_data *q_data,
> +		struct rte_bbdev_fft_op **ops, uint16_t num);
> +
>  #define RTE_BBDEV_NAME_MAX_LEN  64  /**< Max length of device name
> */
> 
>  /**
> @@ -438,6 +449,10 @@ struct __rte_cache_aligned rte_bbdev {
>  	rte_bbdev_dequeue_enc_ops_t dequeue_ldpc_enc_ops;
>  	/** Dequeue decode function */
>  	rte_bbdev_dequeue_dec_ops_t dequeue_ldpc_dec_ops;
> +	/** Enqueue FFT function */
> +	rte_bbdev_enqueue_fft_ops_t enqueue_fft_ops;
> +	/** Dequeue FFT function */
> +	rte_bbdev_dequeue_fft_ops_t dequeue_fft_ops;
>  	const struct rte_bbdev_ops *dev_ops;  /**< Functions exported by
> PMD */
>  	struct rte_bbdev_data *data;  /**< Pointer to device data */
>  	enum rte_bbdev_state state;  /**< If device is currently used or not
> */ @@ -570,6 +585,36 @@ struct __rte_cache_aligned rte_bbdev {
>  	return dev->enqueue_ldpc_dec_ops(q_data, ops, num_ops);  }
> 
> +/**
> + * Enqueue a burst of fft operations to a queue of the device.
> + * This functions only enqueues as many operations as currently
> +possible and
> + * does not block until @p num_ops entries in the queue are available.
> + * This function does not provide any error notification to avoid the
> + * corresponding overhead.
> + *
> + * @param dev_id
> + *   The identifier of the device.
> + * @param queue_id
> + *   The index of the queue.
> + * @param ops
> + *   Pointer array containing operations to be enqueued Must have at least
> + *   @p num_ops entries
> + * @param num_ops
> + *   The maximum number of operations to enqueue.
> + *
> + * @return
> + *   The number of operations actually enqueued (this is the number of
> processed
> + *   entries in the @p ops array).
> + */
> +__rte_experimental
> +static inline uint16_t
> +rte_bbdev_enqueue_fft_ops(uint16_t dev_id, uint16_t queue_id,
> +		struct rte_bbdev_fft_op **ops, uint16_t num_ops) {
> +	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
> +	struct rte_bbdev_queue_data *q_data = &dev->data-
> >queues[queue_id];
> +	return dev->enqueue_fft_ops(q_data, ops, num_ops); }
> 
>  /**
>   * Dequeue a burst of processed encode operations from a queue of the
> device.
> @@ -695,6 +740,37 @@ struct __rte_cache_aligned rte_bbdev {
>  	return dev->dequeue_ldpc_dec_ops(q_data, ops, num_ops);  }
> 
> +/**
> + * Dequeue a burst of fft operations from a queue of the device.
> + * This functions returns only the current contents of the queue, and
> +does not
> + * block until @ num_ops is available.
> + * This function does not provide any error notification to avoid the
> + * corresponding overhead.
> + *
> + * @param dev_id
> + *   The identifier of the device.
> + * @param queue_id
> + *   The index of the queue.
> + * @param ops
> + *   Pointer array where operations will be dequeued to. Must have at least
> + *   @p num_ops entries
> + * @param num_ops
> + *   The maximum number of operations to dequeue.
> + *
> + * @return
> + *   The number of operations actually dequeued (this is the number of
> entries
> + *   copied into the @p ops array).
> + */
> +__rte_experimental
> +static inline uint16_t
> +rte_bbdev_dequeue_fft_ops(uint16_t dev_id, uint16_t queue_id,
> +		struct rte_bbdev_fft_op **ops, uint16_t num_ops) {
> +	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
> +	struct rte_bbdev_queue_data *q_data = &dev->data-
> >queues[queue_id];
> +	return dev->dequeue_fft_ops(q_data, ops, num_ops); }
> +
>  /** Definitions of device event types */  enum rte_bbdev_event_type {
>  	RTE_BBDEV_EVENT_UNKNOWN,  /**< unknown event type */ diff --
> git a/lib/bbdev/rte_bbdev_op.h b/lib/bbdev/rte_bbdev_op.h index
> 6d56133..57e35f4 100644
> --- a/lib/bbdev/rte_bbdev_op.h
> +++ b/lib/bbdev/rte_bbdev_op.h
> @@ -47,6 +47,8 @@
>  #define RTE_BBDEV_TURBO_MAX_CODE_BLOCKS (64)
>  /* LDPC:  Maximum number of Code Blocks in Transport Block.*/  #define
> RTE_BBDEV_LDPC_MAX_CODE_BLOCKS (256)
> +/* 12 CS maximum */
> +#define RTE_BBDEV_MAX_CS_2 (6)
> 
>  /** Flags for turbo decoder operation and capability structure */  enum
> rte_bbdev_op_td_flag_bitmasks { @@ -211,6 +213,26 @@ enum
> rte_bbdev_op_ldpcenc_flag_bitmasks {
>  	RTE_BBDEV_LDPC_ENC_CONCATENATION = (1ULL << 7)  };
> 
> +/** Flags for DFT operation and capability structure */ enum
> +rte_bbdev_op_fft_flag_bitmasks {
> +	/** Flexible windowing capability */
> +	RTE_BBDEV_FFT_WINDOWING = (1ULL << 0),
> +	/** Flexible adjustment of Cyclic Shift time offset */
> +	RTE_BBDEV_FFT_CS_ADJUSTMENT = (1ULL << 1),
> +	/** Set for bypass the DFT and get directly into iDFT input */
> +	RTE_BBDEV_FFT_DFT_BYPASS = (1ULL << 2),
> +	/** Set for bypass the IDFT and get directly the DFT output */
> +	RTE_BBDEV_FFT_IDFT_BYPASS = (1ULL << 3),
> +	/** Set for bypass time domain windowing */
> +	RTE_BBDEV_FFT_WINDOWING_BYPASS = (1ULL << 4),
> +	/** Set for optional power measurement on DFT output */
> +	RTE_BBDEV_FFT_POWER_MEAS = (1ULL << 5),
> +	/** Set if the the input data used FP16 format */
> +	RTE_BBDEV_FFT_FP16_INPUT = (1ULL << 6),
> +	/**  Set if the the output data uses FP16 format  */
> +	RTE_BBDEV_FFT_FP16_OUTPUT = (1ULL << 7) };
> +
>  /** Flags for the Code Block/Transport block mode  */  enum
> rte_bbdev_op_cb_mode {
>  	/** One operation is one or fraction of one transport block  */ @@ -
> 689,6 +711,55 @@ struct rte_bbdev_op_ldpc_enc {
>  	};
>  };
> 
> +/** Operation structure for FFT processing.
> + *
> + * The operation processes the data for multiple antennas in a single
> +call
> + * (.i.e for all the REs belonging to a given SRS sequence for
> +instance)
> + *
> + * The output mbuf data structure is expected to be allocated by the
> + * application with enough room for the output data.
> + */
> +struct rte_bbdev_op_fft {
> +	/** Input data starting from first antenna */
> +	struct rte_bbdev_op_data base_input;
> +	/** Output data starting from first antenna and first cyclic shift */
> +	struct rte_bbdev_op_data base_output;
> +	/** Optional power measurement output data */
> +	struct rte_bbdev_op_data power_meas_output;
> +	/** Flags from rte_bbdev_op_fft_flag_bitmasks */
> +	uint32_t op_flags;
> +	/** Input sequence size in 32-bits points */
> +	uint16_t input_sequence_size;
> +	/** Padding at the start of the sequence */
> +	uint16_t input_leading_padding;
> +	/** Output sequence size in 32-bits points */
> +	uint16_t output_sequence_size;
> +	/** Depadding at the start of the DFT output */
> +	uint16_t output_leading_depadding;
> +	/** Window index being used for each cyclic shift output */
> +	uint8_t window_index[RTE_BBDEV_MAX_CS_2];
> +	/** Bitmap of the cyclic shift output requested */
> +	uint16_t cs_bitmap;
> +	/** Number of antennas as a log2 – 8 to 128 */
> +	uint8_t num_antennas_log2;
> +	/** iDFT size as a log2 - 32 to 2048 */
> +	uint8_t idft_log2;
> +	/** DFT size as a log2 - 8 to 2048 */
> +	uint8_t dft_log2;
> +	/** Adjustment of position of the cyclic shifts - -31 to 31 */
> +	int8_t cs_time_adjustment;
> +	/** iDFT shift down */
> +	int8_t idft_shift;
> +	/** DFT shift down */
> +	int8_t dft_shift;
> +	/** NCS reciprocal factor  */
> +	uint16_t ncs_reciprocal;
> +	/** power measurement out shift down */
> +	uint16_t power_shift;
> +	/** Adjust the FP6 exponent for INT<->FP16 conversion */
> +	uint16_t fp16_exp_adjust;
> +};
> +
>  /** List of the capabilities for the Turbo Decoder */  struct
> rte_bbdev_op_cap_turbo_dec {
>  	/** Flags from rte_bbdev_op_td_flag_bitmasks */ @@ -741,6
> +812,16 @@ struct rte_bbdev_op_cap_ldpc_enc {
>  	uint16_t num_buffers_dst;
>  };
> 
> +/** List of the capabilities for the FFT */ struct rte_bbdev_op_cap_fft
> +{
> +	/** Flags from rte_bbdev_op_ldpcenc_flag_bitmasks */
> +	uint32_t capability_flags;
> +	/** Num input code block buffers */
> +	uint16_t num_buffers_src;
> +	/** Num output code block buffers */
> +	uint16_t num_buffers_dst;
> +};
> +
>  /** Different operation types supported by the device */  enum
> rte_bbdev_op_type {
>  	RTE_BBDEV_OP_NONE,  /**< Dummy operation that does nothing */
> @@ -748,6 +829,7 @@ enum rte_bbdev_op_type {
>  	RTE_BBDEV_OP_TURBO_ENC,  /**< Turbo encode */
>  	RTE_BBDEV_OP_LDPC_DEC,  /**< LDPC decode */
>  	RTE_BBDEV_OP_LDPC_ENC,  /**< LDPC encode */
> +	RTE_BBDEV_OP_FFT,  /**< FFT */
>  	RTE_BBDEV_OP_TYPE_COUNT,  /**< Count of different op types */
> };
> 
> @@ -791,6 +873,18 @@ struct rte_bbdev_dec_op {
>  	};
>  };
> 
> +/** Structure specifying a single fft operation */ struct
> +rte_bbdev_fft_op {
> +	/** Status of operation that was performed */
> +	int status;
> +	/** Mempool which op instance is in */
> +	struct rte_mempool *mempool;
> +	/** Opaque pointer for user data */
> +	void *opaque_data;
> +	/** Contains turbo decoder specific parameters */
> +	struct rte_bbdev_op_fft fft;
> +};
> +
>  /** Operation capabilities supported by a device */  struct
> rte_bbdev_op_cap {
>  	enum rte_bbdev_op_type type;  /**< Type of operation */ @@ -
> 799,6 +893,7 @@ struct rte_bbdev_op_cap {
>  		struct rte_bbdev_op_cap_turbo_enc turbo_enc;
>  		struct rte_bbdev_op_cap_ldpc_dec ldpc_dec;
>  		struct rte_bbdev_op_cap_ldpc_enc ldpc_enc;
> +		struct rte_bbdev_op_cap_fft fft;
>  	} cap;  /**< Operation-type specific capabilities */  };
> 
> @@ -918,6 +1013,42 @@ struct rte_mempool *  }
> 
>  /**
> + * Bulk allocate fft operations from a mempool with parameter defaults
> reset.
> + *
> + * @param mempool
> + *   Operation mempool, created by rte_bbdev_op_pool_create().
> + * @param ops
> + *   Output array to place allocated operations
> + * @param num_ops
> + *   Number of operations to allocate
> + *
> + * @returns
> + *   - 0 on success
> + *   - EINVAL if invalid mempool is provided
> + */
> +__rte_experimental
> +static inline int
> +rte_bbdev_fft_op_alloc_bulk(struct rte_mempool *mempool,
> +		struct rte_bbdev_fft_op **ops, uint16_t num_ops) {
> +	struct rte_bbdev_op_pool_private *priv;
> +	int ret;
> +
> +	/* Check type */
> +	priv = (struct rte_bbdev_op_pool_private *)
> +			rte_mempool_get_priv(mempool);
> +	if (unlikely(priv->type != RTE_BBDEV_OP_FFT))
> +		return -EINVAL;
> +
> +	/* Get elements */
> +	ret = rte_mempool_get_bulk(mempool, (void **)ops, num_ops);
> +	if (unlikely(ret < 0))
> +		return ret;
> +
> +	return 0;
> +}
> +
> +/**
>   * Free decode operation structures that were allocated by
>   * rte_bbdev_dec_op_alloc_bulk().
>   * All structures must belong to the same mempool.
> @@ -951,6 +1082,24 @@ struct rte_mempool *
>  		rte_mempool_put_bulk(ops[0]->mempool, (void **)ops,
> num_ops);  }
> 
> +/**
> + * Free encode operation structures that were allocated by
> + * rte_bbdev_fft_op_alloc_bulk().
> + * All structures must belong to the same mempool.
> + *
> + * @param ops
> + *   Operation structures
> + * @param num_ops
> + *   Number of structures
> + */
> +__rte_experimental
> +static inline void
> +rte_bbdev_fft_op_free_bulk(struct rte_bbdev_fft_op **ops, unsigned int
> +num_ops) {
> +	if (num_ops > 0)
> +		rte_mempool_put_bulk(ops[0]->mempool, (void **)ops,
> num_ops); }
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/lib/bbdev/version.map b/lib/bbdev/version.map index
> cce3f3c..16a16dc 100644
> --- a/lib/bbdev/version.map
> +++ b/lib/bbdev/version.map
> @@ -39,3 +39,13 @@ DPDK_22 {
> 
>  	local: *;
>  };
> +
> +EXPERIMENTAL {
> +	global:
> +
> +	# added in 22.06
> +	rte_bbdev_enqueue_fft_ops;
> +	rte_bbdev_dequeue_fft_ops;
> +	rte_bbdev_fft_op_alloc_bulk;
> +	rte_bbdev_fft_op_free_bulk;
> +};
> --
> 1.8.3.1
  
Hemant Agrawal May 26, 2022, 6:05 a.m. UTC | #4
On 5/26/2022 3:37 AM, Chautru, Nicolas wrote:
> Hi Hernant,
> Gentle reminder in case you can find the time. Could please have a look this patch for bbdev api extension. This is targeting 22.11.
> New operation type for FFT processing (On top of 4 existing operations types). There is no impact to the la12xx PMD but would be good to get review/feedback.
> This can be used for vanilla FFT and/or chained FFT/iFFT + point-wise multiplication making it applicable for SRS processing.
> Thanks,
> Nic

Acked-by:  Hemant Agrawal <hemant.agrawal@nxp.com>

HI Nicolas,

     Yes, I had a look into it.  I see no issue in it. This is fairly 
independent feature addition to BBDEV.


Regards,

Hemant

>> -----Original Message-----
>> From: Chautru, Nicolas <nicolas.chautru@intel.com>
>> Sent: Thursday, March 10, 2022 3:49 PM
>> To: dev@dpdk.org; gakhil@marvell.com; trix@redhat.com
>> Cc: thomas@monjalon.net; hemant.agrawal@nxp.com; Zhang, Mingshan
>> <mingshan.zhang@intel.com>; david.marchand@redhat.com; Chautru,
>> Nicolas <nicolas.chautru@intel.com>
>> Subject: [PATCH v1] bbdev: add new operation for FFT processing
>>
>> Extension of bbdev operation to support FFT based operations.
>>
>> Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
>> ---
>>   doc/guides/prog_guide/bbdev.rst | 130
>> +++++++++++++++++++++++++++++++++++
>>   lib/bbdev/rte_bbdev.c           |   8 +++
>>   lib/bbdev/rte_bbdev.h           |  76 ++++++++++++++++++++
>>   lib/bbdev/rte_bbdev_op.h        | 149
>> ++++++++++++++++++++++++++++++++++++++++
>>   lib/bbdev/version.map           |  10 +++
>>   5 files changed, 373 insertions(+)
>>
>> diff --git a/doc/guides/prog_guide/bbdev.rst
>> b/doc/guides/prog_guide/bbdev.rst index 70fa01a..2791286 100644
>> --- a/doc/guides/prog_guide/bbdev.rst
>> +++ b/doc/guides/prog_guide/bbdev.rst
>> @@ -1118,6 +1118,136 @@ Figure :numref:`figure_turbo_tb_decode` above
>> showing the Turbo decoding of CBs using BBDEV interface in TB-mode  is also
>> valid for LDPC decode.
>>
>> +BBDEV FFT Operation
>> +~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>> +
>> +This operation allows to run a combination of DFT and/or IDFT and/or time-
>> domain windowing.
>> +These can be used in a modular fashion (using bypass modes) or as a
>> +processing pipeline which can be used for FFT-based baseband signal
>> processing.
>> +In more details it allows :
>> +- to process the data first through an IDFT of adjustable size and
>> +padding;
>> +- to perform the windowing as a programmable cyclic shift offset of the
>> +data followed by a pointwise multiplication by a time domain window;
>> +- to process the related data through a DFT of adjustable size and
>> +depadding for each such cyclic shift output.
>> +
>> +A flexible number of Rx antennas are being processed in parallel with the
>> same configuration.
>> +The API allows more generally for flexibility in what the PMD may
>> +support (cabability flags) and flexibility to adjust some of the parameters of
>> the processing.
>> +
>> +The operation/capability flags that can be set for each FFT operation are
>> given below.
>> +
>> +  **NOTE:** The actual operation flags that may be used with a specific
>> + BBDEV PMD are dependent on the driver capabilities as reported via
>> + ``rte_bbdev_info_get()``, and may be a subset of those below.
>> +
>> ++--------------------------------------------------------------------+
>> +|Description of FFT capability flags                                 |
>> ++==============================================================
>> ======+
>> +|RTE_BBDEV_FFT_WINDOWING                                             |
>> +| Set to enable/support windowing in time domain                     |
>> ++--------------------------------------------------------------------+
>> +|RTE_BBDEV_FFT_CS_ADJUSTMENT                                         |
>> +| Set to enable/support  the cyclic shift time offset adjustment     |
>> ++--------------------------------------------------------------------+
>> +|RTE_BBDEV_FFT_DFT_BYPASS                                            |
>> +| Set to bypass the DFT and use directly the IDFT as an option       |
>> ++--------------------------------------------------------------------+
>> +|RTE_BBDEV_FFT_IDFT_BYPASS                                           |
>> +| Set to bypass the IDFT and use directly the DFT as an option       |
>> ++--------------------------------------------------------------------+
>> +|RTE_BBDEV_FFT_WINDOWING_BYPASS                                      |
>> +| Set to bypass the time domain windowing  as an option              |
>> ++--------------------------------------------------------------------+
>> +|RTE_BBDEV_FFT_POWER_MEAS                                            |
>> +| Set to provide an optional power measument of the DFT output       |
>> ++--------------------------------------------------------------------+
>> +|RTE_BBDEV_FFT_FP16_INPUT                                            |
>> +| Set if the input data shall use FP16 format instead of INT16       |
>> ++--------------------------------------------------------------------+
>> +|RTE_BBDEV_FFT_FP16_OUTPUT                                           |
>> +| Set if the output data shall use FP16 format instead of INT16      |
>> ++--------------------------------------------------------------------+
>> +
>> +The structure passed for each FFT operation is given below, with the
>> +operation flags forming a bitmask in the ``op_flags`` field.
>> +
>> +.. code-block:: c
>> +
>> +    struct rte_bbdev_op_fft {
>> +        struct rte_bbdev_op_data base_input;
>> +        struct rte_bbdev_op_data base_output;
>> +        struct rte_bbdev_op_data power_meas_output;
>> +        uint32_t op_flags;
>> +        uint16_t input_sequence_size;
>> +        uint16_t input_leading_padding;
>> +        uint16_t output_sequence_size;
>> +        uint16_t output_leading_depadding;
>> +        uint8_t window_index[RTE_BBDEV_MAX_CS_2];
>> +        uint16_t cs_bitmap;
>> +        uint8_t num_antennas_log2;
>> +        uint8_t idft_log2;
>> +        uint8_t dft_log2;
>> +        int8_t cs_time_adjustment;
>> +        int8_t idft_shift;
>> +        int8_t dft_shift;
>> +        uint16_t ncs_reciprocal;
>> +        uint16_t power_shift;
>> +        uint16_t fp16_exp_adjust;
>> +    };
>> +
>> +The FFT parameters are set out in the table below.
>> +
>> ++----------------------+--------------------------------------------------------------+
>> +|Parameter             |Description                                                   |
>> ++======================+=======================================
>> ========
>> ++===============+
>> +|base_input            |input data                                                    |
>> ++----------------------+--------------------------------------------------------------+
>> +|base_output           |output data                                                   |
>> ++----------------------+--------------------------------------------------------------+
>> +|power_meas_output     |optional output data with power measurement
>> on DFT output     |                                                    |
>> ++----------------------+--------------------------------------------------------------+
>> +|op_flags              |bitmask of all active operation capabilities                  |
>> ++----------------------+--------------------------------------------------------------+
>> +|input_sequence_size   |size of the input sequence in 32-bits points per
>> antenna      |
>> ++----------------------+--------------------------------------------------------------+
>> +|input_leading_padding |number of points padded at the start of input
>> data            |
>> ++----------------------+--------------------------------------------------------------+
>> +|output_sequence_size  |size of the output sequence per antenna and
>> cyclic shift      |
>> ++----------------------+--------------------------------------------------------------+
>> +|output_depadding      |number of points depadded at the start of output
>> data         |
>> ++----------------------+--------------------------------------------------------------+
>> +|window_index          |optional windowing profile index used for each cyclic
>> shift   |
>> ++----------------------+--------------------------------------------------------------+
>> +|cs_bitmap             |bitmap of the cyclic shift output requested (LSB for
>> index 0) |
>> ++----------------------+--------------------------------------------------------------+
>> +|num_antennas_log2     |number of antennas as a log2 (10 maps to 1024...)
>> |
>> ++----------------------+--------------------------------------------------------------+
>> +|idft_log2             |iDFT size as a log2                                           |
>> ++----------------------+--------------------------------------------------------------+
>> +|dft_log2              |DFT size as a log2                                            |
>> ++----------------------+--------------------------------------------------------------+
>> +|cs_time_adjustment    |adjustment of time position of all the cyclic shift
>> output    |
>> ++----------------------+--------------------------------------------------------------+
>> +|idft_shift            |shift down of signal level post iDFT                          |
>> ++----------------------+--------------------------------------------------------------+
>> +|dft_shift             |shift down of signal level post DFT                           |
>> ++----------------------+--------------------------------------------------------------+
>> +|ncs_reciprocal        |inverse of max number of CS normalized to 15b (ie.
>> 231 for 12)|
>> ++----------------------+--------------------------------------------------------------+
>> +|power_shift           |shift down of level of power measurement when
>> enabled         |
>> ++----------------------+--------------------------------------------------------------+
>> +|fp16_exp_adjust       |value added to FP16 exponent at conversion from
>> INT16         |
>> ++----------------------+--------------------------------------------------------------+
>> +
>> +The mbuf input ``base_input`` is mandatory for all BBDEV PMDs and is
>> +the incoming data for the processing. Its size may not fit into an
>> +actual mbuf, but the stucture is used to pass iova address.
>> +The mbuf output ``output`` is mandatory and is output of the FFT processing
>> chain.
>> +Each point is a complex number of 32bits : either as 2 INT16 or as 2
>> +FP16 based when the option supported.
>> +The data layout is based on contiguous concatenation of output data
>> +first by cyclic shift then by antenna.
>>
>>   Sample code
>>   -----------
>> diff --git a/lib/bbdev/rte_bbdev.c b/lib/bbdev/rte_bbdev.c index
>> aaee7b7..a72ecba 100644
>> --- a/lib/bbdev/rte_bbdev.c
>> +++ b/lib/bbdev/rte_bbdev.c
>> @@ -850,6 +850,9 @@ struct rte_bbdev *
>>   	case RTE_BBDEV_OP_LDPC_ENC:
>>   		result = sizeof(struct rte_bbdev_enc_op);
>>   		break;
>> +	case RTE_BBDEV_OP_FFT:
>> +		result = sizeof(struct rte_bbdev_fft_op);
>> +		break;
>>   	default:
>>   		break;
>>   	}
>> @@ -873,6 +876,10 @@ struct rte_bbdev *
>>   		struct rte_bbdev_enc_op *op = element;
>>   		memset(op, 0, mempool->elt_size);
>>   		op->mempool = mempool;
>> +	} else if (type == RTE_BBDEV_OP_FFT) {
>> +		struct rte_bbdev_fft_op *op = element;
>> +		memset(op, 0, mempool->elt_size);
>> +		op->mempool = mempool;
>>   	}
>>   }
>>
>> @@ -1123,6 +1130,7 @@ struct rte_mempool *
>>   		"RTE_BBDEV_OP_TURBO_ENC",
>>   		"RTE_BBDEV_OP_LDPC_DEC",
>>   		"RTE_BBDEV_OP_LDPC_ENC",
>> +		"RTE_BBDEV_OP_FFT",
>>   	};
>>
>>   	if (op_type < RTE_BBDEV_OP_TYPE_COUNT) diff --git
>> a/lib/bbdev/rte_bbdev.h b/lib/bbdev/rte_bbdev.h index b88c881..e9ca673
>> 100644
>> --- a/lib/bbdev/rte_bbdev.h
>> +++ b/lib/bbdev/rte_bbdev.h
>> @@ -380,6 +380,12 @@ typedef uint16_t
>> (*rte_bbdev_enqueue_dec_ops_t)(
>>   		struct rte_bbdev_dec_op **ops,
>>   		uint16_t num);
>>
>> +/** @internal Enqueue fft operations for processing on queue of a
>> +device. */ typedef uint16_t (*rte_bbdev_enqueue_fft_ops_t)(
>> +		struct rte_bbdev_queue_data *q_data,
>> +		struct rte_bbdev_fft_op **ops,
>> +		uint16_t num);
>> +
>>   /** @internal Dequeue encode operations from a queue of a device. */
>> typedef uint16_t (*rte_bbdev_dequeue_enc_ops_t)(
>>   		struct rte_bbdev_queue_data *q_data,
>> @@ -390,6 +396,11 @@ typedef uint16_t
>> (*rte_bbdev_dequeue_dec_ops_t)(
>>   		struct rte_bbdev_queue_data *q_data,
>>   		struct rte_bbdev_dec_op **ops, uint16_t num);
>>
>> +/** @internal Dequeue fft operations from a queue of a device. */
>> +typedef uint16_t (*rte_bbdev_dequeue_fft_ops_t)(
>> +		struct rte_bbdev_queue_data *q_data,
>> +		struct rte_bbdev_fft_op **ops, uint16_t num);
>> +
>>   #define RTE_BBDEV_NAME_MAX_LEN  64  /**< Max length of device name
>> */
>>
>>   /**
>> @@ -438,6 +449,10 @@ struct __rte_cache_aligned rte_bbdev {
>>   	rte_bbdev_dequeue_enc_ops_t dequeue_ldpc_enc_ops;
>>   	/** Dequeue decode function */
>>   	rte_bbdev_dequeue_dec_ops_t dequeue_ldpc_dec_ops;
>> +	/** Enqueue FFT function */
>> +	rte_bbdev_enqueue_fft_ops_t enqueue_fft_ops;
>> +	/** Dequeue FFT function */
>> +	rte_bbdev_dequeue_fft_ops_t dequeue_fft_ops;
>>   	const struct rte_bbdev_ops *dev_ops;  /**< Functions exported by
>> PMD */
>>   	struct rte_bbdev_data *data;  /**< Pointer to device data */
>>   	enum rte_bbdev_state state;  /**< If device is currently used or not
>> */ @@ -570,6 +585,36 @@ struct __rte_cache_aligned rte_bbdev {
>>   	return dev->enqueue_ldpc_dec_ops(q_data, ops, num_ops);  }
>>
>> +/**
>> + * Enqueue a burst of fft operations to a queue of the device.
>> + * This functions only enqueues as many operations as currently
>> +possible and
>> + * does not block until @p num_ops entries in the queue are available.
>> + * This function does not provide any error notification to avoid the
>> + * corresponding overhead.
>> + *
>> + * @param dev_id
>> + *   The identifier of the device.
>> + * @param queue_id
>> + *   The index of the queue.
>> + * @param ops
>> + *   Pointer array containing operations to be enqueued Must have at least
>> + *   @p num_ops entries
>> + * @param num_ops
>> + *   The maximum number of operations to enqueue.
>> + *
>> + * @return
>> + *   The number of operations actually enqueued (this is the number of
>> processed
>> + *   entries in the @p ops array).
>> + */
>> +__rte_experimental
>> +static inline uint16_t
>> +rte_bbdev_enqueue_fft_ops(uint16_t dev_id, uint16_t queue_id,
>> +		struct rte_bbdev_fft_op **ops, uint16_t num_ops) {
>> +	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
>> +	struct rte_bbdev_queue_data *q_data = &dev->data-
>>> queues[queue_id];
>> +	return dev->enqueue_fft_ops(q_data, ops, num_ops); }
>>
>>   /**
>>    * Dequeue a burst of processed encode operations from a queue of the
>> device.
>> @@ -695,6 +740,37 @@ struct __rte_cache_aligned rte_bbdev {
>>   	return dev->dequeue_ldpc_dec_ops(q_data, ops, num_ops);  }
>>
>> +/**
>> + * Dequeue a burst of fft operations from a queue of the device.
>> + * This functions returns only the current contents of the queue, and
>> +does not
>> + * block until @ num_ops is available.
>> + * This function does not provide any error notification to avoid the
>> + * corresponding overhead.
>> + *
>> + * @param dev_id
>> + *   The identifier of the device.
>> + * @param queue_id
>> + *   The index of the queue.
>> + * @param ops
>> + *   Pointer array where operations will be dequeued to. Must have at least
>> + *   @p num_ops entries
>> + * @param num_ops
>> + *   The maximum number of operations to dequeue.
>> + *
>> + * @return
>> + *   The number of operations actually dequeued (this is the number of
>> entries
>> + *   copied into the @p ops array).
>> + */
>> +__rte_experimental
>> +static inline uint16_t
>> +rte_bbdev_dequeue_fft_ops(uint16_t dev_id, uint16_t queue_id,
>> +		struct rte_bbdev_fft_op **ops, uint16_t num_ops) {
>> +	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
>> +	struct rte_bbdev_queue_data *q_data = &dev->data-
>>> queues[queue_id];
>> +	return dev->dequeue_fft_ops(q_data, ops, num_ops); }
>> +
>>   /** Definitions of device event types */  enum rte_bbdev_event_type {
>>   	RTE_BBDEV_EVENT_UNKNOWN,  /**< unknown event type */ diff --
>> git a/lib/bbdev/rte_bbdev_op.h b/lib/bbdev/rte_bbdev_op.h index
>> 6d56133..57e35f4 100644
>> --- a/lib/bbdev/rte_bbdev_op.h
>> +++ b/lib/bbdev/rte_bbdev_op.h
>> @@ -47,6 +47,8 @@
>>   #define RTE_BBDEV_TURBO_MAX_CODE_BLOCKS (64)
>>   /* LDPC:  Maximum number of Code Blocks in Transport Block.*/  #define
>> RTE_BBDEV_LDPC_MAX_CODE_BLOCKS (256)
>> +/* 12 CS maximum */
>> +#define RTE_BBDEV_MAX_CS_2 (6)
>>
>>   /** Flags for turbo decoder operation and capability structure */  enum
>> rte_bbdev_op_td_flag_bitmasks { @@ -211,6 +213,26 @@ enum
>> rte_bbdev_op_ldpcenc_flag_bitmasks {
>>   	RTE_BBDEV_LDPC_ENC_CONCATENATION = (1ULL << 7)  };
>>
>> +/** Flags for DFT operation and capability structure */ enum
>> +rte_bbdev_op_fft_flag_bitmasks {
>> +	/** Flexible windowing capability */
>> +	RTE_BBDEV_FFT_WINDOWING = (1ULL << 0),
>> +	/** Flexible adjustment of Cyclic Shift time offset */
>> +	RTE_BBDEV_FFT_CS_ADJUSTMENT = (1ULL << 1),
>> +	/** Set for bypass the DFT and get directly into iDFT input */
>> +	RTE_BBDEV_FFT_DFT_BYPASS = (1ULL << 2),
>> +	/** Set for bypass the IDFT and get directly the DFT output */
>> +	RTE_BBDEV_FFT_IDFT_BYPASS = (1ULL << 3),
>> +	/** Set for bypass time domain windowing */
>> +	RTE_BBDEV_FFT_WINDOWING_BYPASS = (1ULL << 4),
>> +	/** Set for optional power measurement on DFT output */
>> +	RTE_BBDEV_FFT_POWER_MEAS = (1ULL << 5),
>> +	/** Set if the the input data used FP16 format */
>> +	RTE_BBDEV_FFT_FP16_INPUT = (1ULL << 6),
>> +	/**  Set if the the output data uses FP16 format  */
>> +	RTE_BBDEV_FFT_FP16_OUTPUT = (1ULL << 7) };
>> +
>>   /** Flags for the Code Block/Transport block mode  */  enum
>> rte_bbdev_op_cb_mode {
>>   	/** One operation is one or fraction of one transport block  */ @@ -
>> 689,6 +711,55 @@ struct rte_bbdev_op_ldpc_enc {
>>   	};
>>   };
>>
>> +/** Operation structure for FFT processing.
>> + *
>> + * The operation processes the data for multiple antennas in a single
>> +call
>> + * (.i.e for all the REs belonging to a given SRS sequence for
>> +instance)
>> + *
>> + * The output mbuf data structure is expected to be allocated by the
>> + * application with enough room for the output data.
>> + */
>> +struct rte_bbdev_op_fft {
>> +	/** Input data starting from first antenna */
>> +	struct rte_bbdev_op_data base_input;
>> +	/** Output data starting from first antenna and first cyclic shift */
>> +	struct rte_bbdev_op_data base_output;
>> +	/** Optional power measurement output data */
>> +	struct rte_bbdev_op_data power_meas_output;
>> +	/** Flags from rte_bbdev_op_fft_flag_bitmasks */
>> +	uint32_t op_flags;
>> +	/** Input sequence size in 32-bits points */
>> +	uint16_t input_sequence_size;
>> +	/** Padding at the start of the sequence */
>> +	uint16_t input_leading_padding;
>> +	/** Output sequence size in 32-bits points */
>> +	uint16_t output_sequence_size;
>> +	/** Depadding at the start of the DFT output */
>> +	uint16_t output_leading_depadding;
>> +	/** Window index being used for each cyclic shift output */
>> +	uint8_t window_index[RTE_BBDEV_MAX_CS_2];
>> +	/** Bitmap of the cyclic shift output requested */
>> +	uint16_t cs_bitmap;
>> +	/** Number of antennas as a log2 – 8 to 128 */
>> +	uint8_t num_antennas_log2;
>> +	/** iDFT size as a log2 - 32 to 2048 */
>> +	uint8_t idft_log2;
>> +	/** DFT size as a log2 - 8 to 2048 */
>> +	uint8_t dft_log2;
>> +	/** Adjustment of position of the cyclic shifts - -31 to 31 */
>> +	int8_t cs_time_adjustment;
>> +	/** iDFT shift down */
>> +	int8_t idft_shift;
>> +	/** DFT shift down */
>> +	int8_t dft_shift;
>> +	/** NCS reciprocal factor  */
>> +	uint16_t ncs_reciprocal;
>> +	/** power measurement out shift down */
>> +	uint16_t power_shift;
>> +	/** Adjust the FP6 exponent for INT<->FP16 conversion */
>> +	uint16_t fp16_exp_adjust;
>> +};
>> +
>>   /** List of the capabilities for the Turbo Decoder */  struct
>> rte_bbdev_op_cap_turbo_dec {
>>   	/** Flags from rte_bbdev_op_td_flag_bitmasks */ @@ -741,6
>> +812,16 @@ struct rte_bbdev_op_cap_ldpc_enc {
>>   	uint16_t num_buffers_dst;
>>   };
>>
>> +/** List of the capabilities for the FFT */ struct rte_bbdev_op_cap_fft
>> +{
>> +	/** Flags from rte_bbdev_op_ldpcenc_flag_bitmasks */
>> +	uint32_t capability_flags;
>> +	/** Num input code block buffers */
>> +	uint16_t num_buffers_src;
>> +	/** Num output code block buffers */
>> +	uint16_t num_buffers_dst;
>> +};
>> +
>>   /** Different operation types supported by the device */  enum
>> rte_bbdev_op_type {
>>   	RTE_BBDEV_OP_NONE,  /**< Dummy operation that does nothing */
>> @@ -748,6 +829,7 @@ enum rte_bbdev_op_type {
>>   	RTE_BBDEV_OP_TURBO_ENC,  /**< Turbo encode */
>>   	RTE_BBDEV_OP_LDPC_DEC,  /**< LDPC decode */
>>   	RTE_BBDEV_OP_LDPC_ENC,  /**< LDPC encode */
>> +	RTE_BBDEV_OP_FFT,  /**< FFT */
>>   	RTE_BBDEV_OP_TYPE_COUNT,  /**< Count of different op types */
>> };
>>
>> @@ -791,6 +873,18 @@ struct rte_bbdev_dec_op {
>>   	};
>>   };
>>
>> +/** Structure specifying a single fft operation */ struct
>> +rte_bbdev_fft_op {
>> +	/** Status of operation that was performed */
>> +	int status;
>> +	/** Mempool which op instance is in */
>> +	struct rte_mempool *mempool;
>> +	/** Opaque pointer for user data */
>> +	void *opaque_data;
>> +	/** Contains turbo decoder specific parameters */
>> +	struct rte_bbdev_op_fft fft;
>> +};
>> +
>>   /** Operation capabilities supported by a device */  struct
>> rte_bbdev_op_cap {
>>   	enum rte_bbdev_op_type type;  /**< Type of operation */ @@ -
>> 799,6 +893,7 @@ struct rte_bbdev_op_cap {
>>   		struct rte_bbdev_op_cap_turbo_enc turbo_enc;
>>   		struct rte_bbdev_op_cap_ldpc_dec ldpc_dec;
>>   		struct rte_bbdev_op_cap_ldpc_enc ldpc_enc;
>> +		struct rte_bbdev_op_cap_fft fft;
>>   	} cap;  /**< Operation-type specific capabilities */  };
>>
>> @@ -918,6 +1013,42 @@ struct rte_mempool *  }
>>
>>   /**
>> + * Bulk allocate fft operations from a mempool with parameter defaults
>> reset.
>> + *
>> + * @param mempool
>> + *   Operation mempool, created by rte_bbdev_op_pool_create().
>> + * @param ops
>> + *   Output array to place allocated operations
>> + * @param num_ops
>> + *   Number of operations to allocate
>> + *
>> + * @returns
>> + *   - 0 on success
>> + *   - EINVAL if invalid mempool is provided
>> + */
>> +__rte_experimental
>> +static inline int
>> +rte_bbdev_fft_op_alloc_bulk(struct rte_mempool *mempool,
>> +		struct rte_bbdev_fft_op **ops, uint16_t num_ops) {
>> +	struct rte_bbdev_op_pool_private *priv;
>> +	int ret;
>> +
>> +	/* Check type */
>> +	priv = (struct rte_bbdev_op_pool_private *)
>> +			rte_mempool_get_priv(mempool);
>> +	if (unlikely(priv->type != RTE_BBDEV_OP_FFT))
>> +		return -EINVAL;
>> +
>> +	/* Get elements */
>> +	ret = rte_mempool_get_bulk(mempool, (void **)ops, num_ops);
>> +	if (unlikely(ret < 0))
>> +		return ret;
>> +
>> +	return 0;
>> +}
>> +
>> +/**
>>    * Free decode operation structures that were allocated by
>>    * rte_bbdev_dec_op_alloc_bulk().
>>    * All structures must belong to the same mempool.
>> @@ -951,6 +1082,24 @@ struct rte_mempool *
>>   		rte_mempool_put_bulk(ops[0]->mempool, (void **)ops,
>> num_ops);  }
>>
>> +/**
>> + * Free encode operation structures that were allocated by
>> + * rte_bbdev_fft_op_alloc_bulk().
>> + * All structures must belong to the same mempool.
>> + *
>> + * @param ops
>> + *   Operation structures
>> + * @param num_ops
>> + *   Number of structures
>> + */
>> +__rte_experimental
>> +static inline void
>> +rte_bbdev_fft_op_free_bulk(struct rte_bbdev_fft_op **ops, unsigned int
>> +num_ops) {
>> +	if (num_ops > 0)
>> +		rte_mempool_put_bulk(ops[0]->mempool, (void **)ops,
>> num_ops); }
>> +
>>   #ifdef __cplusplus
>>   }
>>   #endif
>> diff --git a/lib/bbdev/version.map b/lib/bbdev/version.map index
>> cce3f3c..16a16dc 100644
>> --- a/lib/bbdev/version.map
>> +++ b/lib/bbdev/version.map
>> @@ -39,3 +39,13 @@ DPDK_22 {
>>
>>   	local: *;
>>   };
>> +
>> +EXPERIMENTAL {
>> +	global:
>> +
>> +	# added in 22.06
>> +	rte_bbdev_enqueue_fft_ops;
>> +	rte_bbdev_dequeue_fft_ops;
>> +	rte_bbdev_fft_op_alloc_bulk;
>> +	rte_bbdev_fft_op_free_bulk;
>> +};
>> --
>> 1.8.3.1
  
David Marchand June 7, 2022, 11:29 a.m. UTC | #5
Hi guys,

On Thu, May 26, 2022 at 8:05 AM Hemant Agrawal
<hemant.agrawal@oss.nxp.com> wrote:
> On 5/26/2022 3:37 AM, Chautru, Nicolas wrote:
> > Hi Hernant,
> > Gentle reminder in case you can find the time. Could please have a look this patch for bbdev api extension. This is targeting 22.11.
> > New operation type for FFT processing (On top of 4 existing operations types). There is no impact to the la12xx PMD but would be good to get review/feedback.
> > This can be used for vanilla FFT and/or chained FFT/iFFT + point-wise multiplication making it applicable for SRS processing.
> > Thanks,
> > Nic
>
> Acked-by:  Hemant Agrawal <hemant.agrawal@nxp.com>
>
> HI Nicolas,
>
>      Yes, I had a look into it.  I see no issue in it. This is fairly
> independent feature addition to BBDEV.
>

Hemant,

For this v22.11 change, we will need some acks on the ABI breakage
announce in v22.07:
https://patchwork.dpdk.org/project/dpdk/patch/1647542252-35727-2-git-send-email-nicolas.chautru@intel.com/

Could you review it?
Thanks.
  

Patch

diff --git a/doc/guides/prog_guide/bbdev.rst b/doc/guides/prog_guide/bbdev.rst
index 70fa01a..2791286 100644
--- a/doc/guides/prog_guide/bbdev.rst
+++ b/doc/guides/prog_guide/bbdev.rst
@@ -1118,6 +1118,136 @@  Figure :numref:`figure_turbo_tb_decode` above
 showing the Turbo decoding of CBs using BBDEV interface in TB-mode
 is also valid for LDPC decode.
 
+BBDEV FFT Operation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This operation allows to run a combination of DFT and/or IDFT and/or time-domain windowing.
+These can be used in a modular fashion (using bypass modes) or as a processing pipeline
+which can be used for FFT-based baseband signal processing.
+In more details it allows :
+- to process the data first through an IDFT of adjustable size and padding;
+- to perform the windowing as a programmable cyclic shift offset of the data followed by a
+pointwise multiplication by a time domain window;
+- to process the related data through a DFT of adjustable size and depadding for each such cyclic
+shift output.
+
+A flexible number of Rx antennas are being processed in parallel with the same configuration.
+The API allows more generally for flexibility in what the PMD may support (cabability flags) and
+flexibility to adjust some of the parameters of the processing.
+
+The operation/capability flags that can be set for each FFT operation are given below.
+
+  **NOTE:** The actual operation flags that may be used with a specific
+  BBDEV PMD are dependent on the driver capabilities as reported via
+  ``rte_bbdev_info_get()``, and may be a subset of those below.
+
++--------------------------------------------------------------------+
+|Description of FFT capability flags                                 |
++====================================================================+
+|RTE_BBDEV_FFT_WINDOWING                                             |
+| Set to enable/support windowing in time domain                     |
++--------------------------------------------------------------------+
+|RTE_BBDEV_FFT_CS_ADJUSTMENT                                         |
+| Set to enable/support  the cyclic shift time offset adjustment     |
++--------------------------------------------------------------------+
+|RTE_BBDEV_FFT_DFT_BYPASS                                            |
+| Set to bypass the DFT and use directly the IDFT as an option       |
++--------------------------------------------------------------------+
+|RTE_BBDEV_FFT_IDFT_BYPASS                                           |
+| Set to bypass the IDFT and use directly the DFT as an option       |
++--------------------------------------------------------------------+
+|RTE_BBDEV_FFT_WINDOWING_BYPASS                                      |
+| Set to bypass the time domain windowing  as an option              |
++--------------------------------------------------------------------+
+|RTE_BBDEV_FFT_POWER_MEAS                                            |
+| Set to provide an optional power measument of the DFT output       |
++--------------------------------------------------------------------+
+|RTE_BBDEV_FFT_FP16_INPUT                                            |
+| Set if the input data shall use FP16 format instead of INT16       |
++--------------------------------------------------------------------+
+|RTE_BBDEV_FFT_FP16_OUTPUT                                           |
+| Set if the output data shall use FP16 format instead of INT16      |
++--------------------------------------------------------------------+
+
+The structure passed for each FFT operation is given below,
+with the operation flags forming a bitmask in the ``op_flags`` field.
+
+.. code-block:: c
+
+    struct rte_bbdev_op_fft {
+        struct rte_bbdev_op_data base_input;
+        struct rte_bbdev_op_data base_output;
+        struct rte_bbdev_op_data power_meas_output;
+        uint32_t op_flags;
+        uint16_t input_sequence_size;
+        uint16_t input_leading_padding;
+        uint16_t output_sequence_size;
+        uint16_t output_leading_depadding;
+        uint8_t window_index[RTE_BBDEV_MAX_CS_2];
+        uint16_t cs_bitmap;
+        uint8_t num_antennas_log2;
+        uint8_t idft_log2;
+        uint8_t dft_log2;
+        int8_t cs_time_adjustment;
+        int8_t idft_shift;
+        int8_t dft_shift;
+        uint16_t ncs_reciprocal;
+        uint16_t power_shift;
+        uint16_t fp16_exp_adjust;
+    };
+
+The FFT parameters are set out in the table below.
+
++----------------------+--------------------------------------------------------------+
+|Parameter             |Description                                                   |
++======================+==============================================================+
+|base_input            |input data                                                    |
++----------------------+--------------------------------------------------------------+
+|base_output           |output data                                                   |
++----------------------+--------------------------------------------------------------+
+|power_meas_output     |optional output data with power measurement on DFT output     |                                                    |
++----------------------+--------------------------------------------------------------+
+|op_flags              |bitmask of all active operation capabilities                  |
++----------------------+--------------------------------------------------------------+
+|input_sequence_size   |size of the input sequence in 32-bits points per antenna      |
++----------------------+--------------------------------------------------------------+
+|input_leading_padding |number of points padded at the start of input data            |
++----------------------+--------------------------------------------------------------+
+|output_sequence_size  |size of the output sequence per antenna and cyclic shift      |
++----------------------+--------------------------------------------------------------+
+|output_depadding      |number of points depadded at the start of output data         |
++----------------------+--------------------------------------------------------------+
+|window_index          |optional windowing profile index used for each cyclic shift   |
++----------------------+--------------------------------------------------------------+
+|cs_bitmap             |bitmap of the cyclic shift output requested (LSB for index 0) |
++----------------------+--------------------------------------------------------------+
+|num_antennas_log2     |number of antennas as a log2 (10 maps to 1024...)             |
++----------------------+--------------------------------------------------------------+
+|idft_log2             |iDFT size as a log2                                           |
++----------------------+--------------------------------------------------------------+
+|dft_log2              |DFT size as a log2                                            |
++----------------------+--------------------------------------------------------------+
+|cs_time_adjustment    |adjustment of time position of all the cyclic shift output    |
++----------------------+--------------------------------------------------------------+
+|idft_shift            |shift down of signal level post iDFT                          |
++----------------------+--------------------------------------------------------------+
+|dft_shift             |shift down of signal level post DFT                           |
++----------------------+--------------------------------------------------------------+
+|ncs_reciprocal        |inverse of max number of CS normalized to 15b (ie. 231 for 12)|
++----------------------+--------------------------------------------------------------+
+|power_shift           |shift down of level of power measurement when enabled         |
++----------------------+--------------------------------------------------------------+
+|fp16_exp_adjust       |value added to FP16 exponent at conversion from INT16         |
++----------------------+--------------------------------------------------------------+
+
+The mbuf input ``base_input`` is mandatory for all BBDEV PMDs and is the
+incoming data for the processing. Its size may not fit into an actual mbuf, but the
+stucture is used to pass iova address.
+The mbuf output ``output`` is mandatory and is output of the FFT processing chain.
+Each point is a complex number of 32bits : either as 2 INT16 or as 2 FP16 based when the option
+supported.
+The data layout is based on contiguous concatenation of output data first by cyclic shift then
+by antenna.
 
 Sample code
 -----------
diff --git a/lib/bbdev/rte_bbdev.c b/lib/bbdev/rte_bbdev.c
index aaee7b7..a72ecba 100644
--- a/lib/bbdev/rte_bbdev.c
+++ b/lib/bbdev/rte_bbdev.c
@@ -850,6 +850,9 @@  struct rte_bbdev *
 	case RTE_BBDEV_OP_LDPC_ENC:
 		result = sizeof(struct rte_bbdev_enc_op);
 		break;
+	case RTE_BBDEV_OP_FFT:
+		result = sizeof(struct rte_bbdev_fft_op);
+		break;
 	default:
 		break;
 	}
@@ -873,6 +876,10 @@  struct rte_bbdev *
 		struct rte_bbdev_enc_op *op = element;
 		memset(op, 0, mempool->elt_size);
 		op->mempool = mempool;
+	} else if (type == RTE_BBDEV_OP_FFT) {
+		struct rte_bbdev_fft_op *op = element;
+		memset(op, 0, mempool->elt_size);
+		op->mempool = mempool;
 	}
 }
 
@@ -1123,6 +1130,7 @@  struct rte_mempool *
 		"RTE_BBDEV_OP_TURBO_ENC",
 		"RTE_BBDEV_OP_LDPC_DEC",
 		"RTE_BBDEV_OP_LDPC_ENC",
+		"RTE_BBDEV_OP_FFT",
 	};
 
 	if (op_type < RTE_BBDEV_OP_TYPE_COUNT)
diff --git a/lib/bbdev/rte_bbdev.h b/lib/bbdev/rte_bbdev.h
index b88c881..e9ca673 100644
--- a/lib/bbdev/rte_bbdev.h
+++ b/lib/bbdev/rte_bbdev.h
@@ -380,6 +380,12 @@  typedef uint16_t (*rte_bbdev_enqueue_dec_ops_t)(
 		struct rte_bbdev_dec_op **ops,
 		uint16_t num);
 
+/** @internal Enqueue fft operations for processing on queue of a device. */
+typedef uint16_t (*rte_bbdev_enqueue_fft_ops_t)(
+		struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_fft_op **ops,
+		uint16_t num);
+
 /** @internal Dequeue encode operations from a queue of a device. */
 typedef uint16_t (*rte_bbdev_dequeue_enc_ops_t)(
 		struct rte_bbdev_queue_data *q_data,
@@ -390,6 +396,11 @@  typedef uint16_t (*rte_bbdev_dequeue_dec_ops_t)(
 		struct rte_bbdev_queue_data *q_data,
 		struct rte_bbdev_dec_op **ops, uint16_t num);
 
+/** @internal Dequeue fft operations from a queue of a device. */
+typedef uint16_t (*rte_bbdev_dequeue_fft_ops_t)(
+		struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_fft_op **ops, uint16_t num);
+
 #define RTE_BBDEV_NAME_MAX_LEN  64  /**< Max length of device name */
 
 /**
@@ -438,6 +449,10 @@  struct __rte_cache_aligned rte_bbdev {
 	rte_bbdev_dequeue_enc_ops_t dequeue_ldpc_enc_ops;
 	/** Dequeue decode function */
 	rte_bbdev_dequeue_dec_ops_t dequeue_ldpc_dec_ops;
+	/** Enqueue FFT function */
+	rte_bbdev_enqueue_fft_ops_t enqueue_fft_ops;
+	/** Dequeue FFT function */
+	rte_bbdev_dequeue_fft_ops_t dequeue_fft_ops;
 	const struct rte_bbdev_ops *dev_ops;  /**< Functions exported by PMD */
 	struct rte_bbdev_data *data;  /**< Pointer to device data */
 	enum rte_bbdev_state state;  /**< If device is currently used or not */
@@ -570,6 +585,36 @@  struct __rte_cache_aligned rte_bbdev {
 	return dev->enqueue_ldpc_dec_ops(q_data, ops, num_ops);
 }
 
+/**
+ * Enqueue a burst of fft operations to a queue of the device.
+ * This functions only enqueues as many operations as currently possible and
+ * does not block until @p num_ops entries in the queue are available.
+ * This function does not provide any error notification to avoid the
+ * corresponding overhead.
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ * @param queue_id
+ *   The index of the queue.
+ * @param ops
+ *   Pointer array containing operations to be enqueued Must have at least
+ *   @p num_ops entries
+ * @param num_ops
+ *   The maximum number of operations to enqueue.
+ *
+ * @return
+ *   The number of operations actually enqueued (this is the number of processed
+ *   entries in the @p ops array).
+ */
+__rte_experimental
+static inline uint16_t
+rte_bbdev_enqueue_fft_ops(uint16_t dev_id, uint16_t queue_id,
+		struct rte_bbdev_fft_op **ops, uint16_t num_ops)
+{
+	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
+	struct rte_bbdev_queue_data *q_data = &dev->data->queues[queue_id];
+	return dev->enqueue_fft_ops(q_data, ops, num_ops);
+}
 
 /**
  * Dequeue a burst of processed encode operations from a queue of the device.
@@ -695,6 +740,37 @@  struct __rte_cache_aligned rte_bbdev {
 	return dev->dequeue_ldpc_dec_ops(q_data, ops, num_ops);
 }
 
+/**
+ * Dequeue a burst of fft operations from a queue of the device.
+ * This functions returns only the current contents of the queue, and does not
+ * block until @ num_ops is available.
+ * This function does not provide any error notification to avoid the
+ * corresponding overhead.
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ * @param queue_id
+ *   The index of the queue.
+ * @param ops
+ *   Pointer array where operations will be dequeued to. Must have at least
+ *   @p num_ops entries
+ * @param num_ops
+ *   The maximum number of operations to dequeue.
+ *
+ * @return
+ *   The number of operations actually dequeued (this is the number of entries
+ *   copied into the @p ops array).
+ */
+__rte_experimental
+static inline uint16_t
+rte_bbdev_dequeue_fft_ops(uint16_t dev_id, uint16_t queue_id,
+		struct rte_bbdev_fft_op **ops, uint16_t num_ops)
+{
+	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
+	struct rte_bbdev_queue_data *q_data = &dev->data->queues[queue_id];
+	return dev->dequeue_fft_ops(q_data, ops, num_ops);
+}
+
 /** Definitions of device event types */
 enum rte_bbdev_event_type {
 	RTE_BBDEV_EVENT_UNKNOWN,  /**< unknown event type */
diff --git a/lib/bbdev/rte_bbdev_op.h b/lib/bbdev/rte_bbdev_op.h
index 6d56133..57e35f4 100644
--- a/lib/bbdev/rte_bbdev_op.h
+++ b/lib/bbdev/rte_bbdev_op.h
@@ -47,6 +47,8 @@ 
 #define RTE_BBDEV_TURBO_MAX_CODE_BLOCKS (64)
 /* LDPC:  Maximum number of Code Blocks in Transport Block.*/
 #define RTE_BBDEV_LDPC_MAX_CODE_BLOCKS (256)
+/* 12 CS maximum */
+#define RTE_BBDEV_MAX_CS_2 (6)
 
 /** Flags for turbo decoder operation and capability structure */
 enum rte_bbdev_op_td_flag_bitmasks {
@@ -211,6 +213,26 @@  enum rte_bbdev_op_ldpcenc_flag_bitmasks {
 	RTE_BBDEV_LDPC_ENC_CONCATENATION = (1ULL << 7)
 };
 
+/** Flags for DFT operation and capability structure */
+enum rte_bbdev_op_fft_flag_bitmasks {
+	/** Flexible windowing capability */
+	RTE_BBDEV_FFT_WINDOWING = (1ULL << 0),
+	/** Flexible adjustment of Cyclic Shift time offset */
+	RTE_BBDEV_FFT_CS_ADJUSTMENT = (1ULL << 1),
+	/** Set for bypass the DFT and get directly into iDFT input */
+	RTE_BBDEV_FFT_DFT_BYPASS = (1ULL << 2),
+	/** Set for bypass the IDFT and get directly the DFT output */
+	RTE_BBDEV_FFT_IDFT_BYPASS = (1ULL << 3),
+	/** Set for bypass time domain windowing */
+	RTE_BBDEV_FFT_WINDOWING_BYPASS = (1ULL << 4),
+	/** Set for optional power measurement on DFT output */
+	RTE_BBDEV_FFT_POWER_MEAS = (1ULL << 5),
+	/** Set if the the input data used FP16 format */
+	RTE_BBDEV_FFT_FP16_INPUT = (1ULL << 6),
+	/**  Set if the the output data uses FP16 format  */
+	RTE_BBDEV_FFT_FP16_OUTPUT = (1ULL << 7)
+};
+
 /** Flags for the Code Block/Transport block mode  */
 enum rte_bbdev_op_cb_mode {
 	/** One operation is one or fraction of one transport block  */
@@ -689,6 +711,55 @@  struct rte_bbdev_op_ldpc_enc {
 	};
 };
 
+/** Operation structure for FFT processing.
+ *
+ * The operation processes the data for multiple antennas in a single call
+ * (.i.e for all the REs belonging to a given SRS sequence for instance)
+ *
+ * The output mbuf data structure is expected to be allocated by the
+ * application with enough room for the output data.
+ */
+struct rte_bbdev_op_fft {
+	/** Input data starting from first antenna */
+	struct rte_bbdev_op_data base_input;
+	/** Output data starting from first antenna and first cyclic shift */
+	struct rte_bbdev_op_data base_output;
+	/** Optional power measurement output data */
+	struct rte_bbdev_op_data power_meas_output;
+	/** Flags from rte_bbdev_op_fft_flag_bitmasks */
+	uint32_t op_flags;
+	/** Input sequence size in 32-bits points */
+	uint16_t input_sequence_size;
+	/** Padding at the start of the sequence */
+	uint16_t input_leading_padding;
+	/** Output sequence size in 32-bits points */
+	uint16_t output_sequence_size;
+	/** Depadding at the start of the DFT output */
+	uint16_t output_leading_depadding;
+	/** Window index being used for each cyclic shift output */
+	uint8_t window_index[RTE_BBDEV_MAX_CS_2];
+	/** Bitmap of the cyclic shift output requested */
+	uint16_t cs_bitmap;
+	/** Number of antennas as a log2 – 8 to 128 */
+	uint8_t num_antennas_log2;
+	/** iDFT size as a log2 - 32 to 2048 */
+	uint8_t idft_log2;
+	/** DFT size as a log2 - 8 to 2048 */
+	uint8_t dft_log2;
+	/** Adjustment of position of the cyclic shifts - -31 to 31 */
+	int8_t cs_time_adjustment;
+	/** iDFT shift down */
+	int8_t idft_shift;
+	/** DFT shift down */
+	int8_t dft_shift;
+	/** NCS reciprocal factor  */
+	uint16_t ncs_reciprocal;
+	/** power measurement out shift down */
+	uint16_t power_shift;
+	/** Adjust the FP6 exponent for INT<->FP16 conversion */
+	uint16_t fp16_exp_adjust;
+};
+
 /** List of the capabilities for the Turbo Decoder */
 struct rte_bbdev_op_cap_turbo_dec {
 	/** Flags from rte_bbdev_op_td_flag_bitmasks */
@@ -741,6 +812,16 @@  struct rte_bbdev_op_cap_ldpc_enc {
 	uint16_t num_buffers_dst;
 };
 
+/** List of the capabilities for the FFT */
+struct rte_bbdev_op_cap_fft {
+	/** Flags from rte_bbdev_op_ldpcenc_flag_bitmasks */
+	uint32_t capability_flags;
+	/** Num input code block buffers */
+	uint16_t num_buffers_src;
+	/** Num output code block buffers */
+	uint16_t num_buffers_dst;
+};
+
 /** Different operation types supported by the device */
 enum rte_bbdev_op_type {
 	RTE_BBDEV_OP_NONE,  /**< Dummy operation that does nothing */
@@ -748,6 +829,7 @@  enum rte_bbdev_op_type {
 	RTE_BBDEV_OP_TURBO_ENC,  /**< Turbo encode */
 	RTE_BBDEV_OP_LDPC_DEC,  /**< LDPC decode */
 	RTE_BBDEV_OP_LDPC_ENC,  /**< LDPC encode */
+	RTE_BBDEV_OP_FFT,  /**< FFT */
 	RTE_BBDEV_OP_TYPE_COUNT,  /**< Count of different op types */
 };
 
@@ -791,6 +873,18 @@  struct rte_bbdev_dec_op {
 	};
 };
 
+/** Structure specifying a single fft operation */
+struct rte_bbdev_fft_op {
+	/** Status of operation that was performed */
+	int status;
+	/** Mempool which op instance is in */
+	struct rte_mempool *mempool;
+	/** Opaque pointer for user data */
+	void *opaque_data;
+	/** Contains turbo decoder specific parameters */
+	struct rte_bbdev_op_fft fft;
+};
+
 /** Operation capabilities supported by a device */
 struct rte_bbdev_op_cap {
 	enum rte_bbdev_op_type type;  /**< Type of operation */
@@ -799,6 +893,7 @@  struct rte_bbdev_op_cap {
 		struct rte_bbdev_op_cap_turbo_enc turbo_enc;
 		struct rte_bbdev_op_cap_ldpc_dec ldpc_dec;
 		struct rte_bbdev_op_cap_ldpc_enc ldpc_enc;
+		struct rte_bbdev_op_cap_fft fft;
 	} cap;  /**< Operation-type specific capabilities */
 };
 
@@ -918,6 +1013,42 @@  struct rte_mempool *
 }
 
 /**
+ * Bulk allocate fft operations from a mempool with parameter defaults reset.
+ *
+ * @param mempool
+ *   Operation mempool, created by rte_bbdev_op_pool_create().
+ * @param ops
+ *   Output array to place allocated operations
+ * @param num_ops
+ *   Number of operations to allocate
+ *
+ * @returns
+ *   - 0 on success
+ *   - EINVAL if invalid mempool is provided
+ */
+__rte_experimental
+static inline int
+rte_bbdev_fft_op_alloc_bulk(struct rte_mempool *mempool,
+		struct rte_bbdev_fft_op **ops, uint16_t num_ops)
+{
+	struct rte_bbdev_op_pool_private *priv;
+	int ret;
+
+	/* Check type */
+	priv = (struct rte_bbdev_op_pool_private *)
+			rte_mempool_get_priv(mempool);
+	if (unlikely(priv->type != RTE_BBDEV_OP_FFT))
+		return -EINVAL;
+
+	/* Get elements */
+	ret = rte_mempool_get_bulk(mempool, (void **)ops, num_ops);
+	if (unlikely(ret < 0))
+		return ret;
+
+	return 0;
+}
+
+/**
  * Free decode operation structures that were allocated by
  * rte_bbdev_dec_op_alloc_bulk().
  * All structures must belong to the same mempool.
@@ -951,6 +1082,24 @@  struct rte_mempool *
 		rte_mempool_put_bulk(ops[0]->mempool, (void **)ops, num_ops);
 }
 
+/**
+ * Free encode operation structures that were allocated by
+ * rte_bbdev_fft_op_alloc_bulk().
+ * All structures must belong to the same mempool.
+ *
+ * @param ops
+ *   Operation structures
+ * @param num_ops
+ *   Number of structures
+ */
+__rte_experimental
+static inline void
+rte_bbdev_fft_op_free_bulk(struct rte_bbdev_fft_op **ops, unsigned int num_ops)
+{
+	if (num_ops > 0)
+		rte_mempool_put_bulk(ops[0]->mempool, (void **)ops, num_ops);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/bbdev/version.map b/lib/bbdev/version.map
index cce3f3c..16a16dc 100644
--- a/lib/bbdev/version.map
+++ b/lib/bbdev/version.map
@@ -39,3 +39,13 @@  DPDK_22 {
 
 	local: *;
 };
+
+EXPERIMENTAL {
+	global:
+
+	# added in 22.06
+	rte_bbdev_enqueue_fft_ops;
+	rte_bbdev_dequeue_fft_ops;
+	rte_bbdev_fft_op_alloc_bulk;
+	rte_bbdev_fft_op_free_bulk;
+};