[v9,10/10] baseband/acc100: add configure function

Message ID 1601339385-117424-11-git-send-email-nicolas.chautru@intel.com (mailing list archive)
State Superseded, archived
Delegated to: akhil goyal
Headers
Series bbdev PMD ACC100 |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS

Commit Message

Chautru, Nicolas Sept. 29, 2020, 12:29 a.m. UTC
  Add configure function to configure the PF from within
the bbdev-test itself without external application
configuration the device.

Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
Acked-by: Liu Tianjiao <Tianjiao.liu@intel.com>
---
 app/test-bbdev/test_bbdev_perf.c                   |  72 +++
 doc/guides/rel_notes/release_20_11.rst             |   5 +
 drivers/baseband/acc100/meson.build                |   2 +
 drivers/baseband/acc100/rte_acc100_cfg.h           |  17 +
 drivers/baseband/acc100/rte_acc100_pmd.c           | 505 +++++++++++++++++++++
 .../acc100/rte_pmd_bbdev_acc100_version.map        |   7 +
 6 files changed, 608 insertions(+)
  

Comments

Tom Rix Sept. 30, 2020, 7:58 p.m. UTC | #1
On 9/28/20 5:29 PM, Nicolas Chautru wrote:
> Add configure function to configure the PF from within
> the bbdev-test itself without external application
> configuration the device.
>
> Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
> Acked-by: Liu Tianjiao <Tianjiao.liu@intel.com>
> ---
>  app/test-bbdev/test_bbdev_perf.c                   |  72 +++
>  doc/guides/rel_notes/release_20_11.rst             |   5 +
>  drivers/baseband/acc100/meson.build                |   2 +
>  drivers/baseband/acc100/rte_acc100_cfg.h           |  17 +
>  drivers/baseband/acc100/rte_acc100_pmd.c           | 505 +++++++++++++++++++++
>  .../acc100/rte_pmd_bbdev_acc100_version.map        |   7 +
>  6 files changed, 608 insertions(+)
>
> diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c
> index 45c0d62..32f23ff 100644
> --- a/app/test-bbdev/test_bbdev_perf.c
> +++ b/app/test-bbdev/test_bbdev_perf.c
> @@ -52,6 +52,18 @@
>  #define FLR_5G_TIMEOUT 610
>  #endif
>  
> +#ifdef RTE_LIBRTE_PMD_BBDEV_ACC100
> +#include <rte_acc100_cfg.h>
> +#define ACC100PF_DRIVER_NAME   ("intel_acc100_pf")
> +#define ACC100VF_DRIVER_NAME   ("intel_acc100_vf")
> +#define ACC100_QMGR_NUM_AQS 16
> +#define ACC100_QMGR_NUM_QGS 2
> +#define ACC100_QMGR_AQ_DEPTH 5
> +#define ACC100_QMGR_INVALID_IDX -1
> +#define ACC100_QMGR_RR 1
> +#define ACC100_QOS_GBR 0
> +#endif
> +
>  #define OPS_CACHE_SIZE 256U
>  #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
>  
> @@ -653,6 +665,66 @@ typedef int (test_case_function)(struct active_device *ad,
>  				info->dev_name);
>  	}
>  #endif
> +#ifdef RTE_LIBRTE_PMD_BBDEV_ACC100
seems like this function would break if one of the other bbdev's were #defined.
> +	if ((get_init_device() == true) &&
> +		(!strcmp(info->drv.driver_name, ACC100PF_DRIVER_NAME))) {
> +		struct acc100_conf conf;
> +		unsigned int i;
> +
> +		printf("Configure ACC100 FEC Driver %s with default values\n",
> +				info->drv.driver_name);
> +
> +		/* clear default configuration before initialization */
> +		memset(&conf, 0, sizeof(struct acc100_conf));
> +
> +		/* Always set in PF mode for built-in configuration */
> +		conf.pf_mode_en = true;
> +		for (i = 0; i < RTE_ACC100_NUM_VFS; ++i) {
> +			conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
> +			conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
> +			conf.arb_dl_4g[i].round_robin_weight = ACC100_QMGR_RR;
> +			conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
> +			conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
> +			conf.arb_ul_4g[i].round_robin_weight = ACC100_QMGR_RR;
> +			conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
> +			conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
> +			conf.arb_dl_5g[i].round_robin_weight = ACC100_QMGR_RR;
> +			conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
> +			conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
> +			conf.arb_ul_5g[i].round_robin_weight = ACC100_QMGR_RR;
> +		}
> +
> +		conf.input_pos_llr_1_bit = true;
> +		conf.output_pos_llr_1_bit = true;
> +		conf.num_vf_bundles = 1; /**< Number of VF bundles to setup */
> +
> +		conf.q_ul_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
> +		conf.q_ul_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
> +		conf.q_ul_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
> +		conf.q_ul_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
> +		conf.q_dl_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
> +		conf.q_dl_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
> +		conf.q_dl_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
> +		conf.q_dl_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
> +		conf.q_ul_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
> +		conf.q_ul_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
> +		conf.q_ul_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
> +		conf.q_ul_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
> +		conf.q_dl_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
> +		conf.q_dl_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
> +		conf.q_dl_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
> +		conf.q_dl_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
> +
> +		/* setup PF with configuration information */
> +		ret = acc100_configure(info->dev_name, &conf);
> +		TEST_ASSERT_SUCCESS(ret,
> +				"Failed to configure ACC100 PF for bbdev %s",
> +				info->dev_name);
> +		/* Let's refresh this now this is configured */
> +	}
> +	rte_bbdev_info_get(dev_id, info);
The other bbdev's do not call rte_bbdev_info_get, can this be removed ?
> +#endif
> +
>  	nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
>  	nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
>  
> diff --git a/doc/guides/rel_notes/release_20_11.rst b/doc/guides/rel_notes/release_20_11.rst
> index 73ac08f..c8d0586 100644
> --- a/doc/guides/rel_notes/release_20_11.rst
> +++ b/doc/guides/rel_notes/release_20_11.rst
> @@ -55,6 +55,11 @@ New Features
>       Also, make sure to start the actual text at the margin.
>       =======================================================
>  
> +* **Added Intel ACC100 bbdev PMD.**
> +
> +  Added a new ``acc100`` bbdev driver for the Intel\ |reg| ACC100 accelerator
> +  also known as Mount Bryce.  See the
> +  :doc:`../bbdevs/acc100` BBDEV guide for more details on this new driver.
>  
>  Removed Items
>  -------------
> diff --git a/drivers/baseband/acc100/meson.build b/drivers/baseband/acc100/meson.build
> index 8afafc2..7ac44dc 100644
> --- a/drivers/baseband/acc100/meson.build
> +++ b/drivers/baseband/acc100/meson.build
> @@ -4,3 +4,5 @@
>  deps += ['bbdev', 'bus_vdev', 'ring', 'pci', 'bus_pci']
>  
>  sources = files('rte_acc100_pmd.c')
> +
> +install_headers('rte_acc100_cfg.h')
> diff --git a/drivers/baseband/acc100/rte_acc100_cfg.h b/drivers/baseband/acc100/rte_acc100_cfg.h
> index 73bbe36..7f523bc 100644
> --- a/drivers/baseband/acc100/rte_acc100_cfg.h
> +++ b/drivers/baseband/acc100/rte_acc100_cfg.h
> @@ -89,6 +89,23 @@ struct acc100_conf {
>  	struct rte_arbitration_t arb_dl_5g[RTE_ACC100_NUM_VFS];
>  };
>  
> +/**
> + * Configure a ACC100 device
> + *
> + * @param dev_name
> + *   The name of the device. This is the short form of PCI BDF, e.g. 00:01.0.
> + *   It can also be retrieved for a bbdev device from the dev_name field in the
> + *   rte_bbdev_info structure returned by rte_bbdev_info_get().
> + * @param conf
> + *   Configuration to apply to ACC100 HW.
> + *
> + * @return
> + *   Zero on success, negative value on failure.
> + */
> +__rte_experimental
> +int
> +acc100_configure(const char *dev_name, struct acc100_conf *conf);
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c b/drivers/baseband/acc100/rte_acc100_pmd.c
> index 3589814..b50dd32 100644
> --- a/drivers/baseband/acc100/rte_acc100_pmd.c
> +++ b/drivers/baseband/acc100/rte_acc100_pmd.c
> @@ -85,6 +85,26 @@
>  
>  enum {UL_4G = 0, UL_5G, DL_4G, DL_5G, NUM_ACC};
>  
> +/* Return the accelerator enum for a Queue Group Index */
> +static inline int
> +accFromQgid(int qg_idx, const struct acc100_conf *acc100_conf)
> +{
> +	int accQg[ACC100_NUM_QGRPS];
> +	int NumQGroupsPerFn[NUM_ACC];
> +	int acc, qgIdx, qgIndex = 0;
> +	for (qgIdx = 0; qgIdx < ACC100_NUM_QGRPS; qgIdx++)
> +		accQg[qgIdx] = 0;
> +	NumQGroupsPerFn[UL_4G] = acc100_conf->q_ul_4g.num_qgroups;
> +	NumQGroupsPerFn[UL_5G] = acc100_conf->q_ul_5g.num_qgroups;
> +	NumQGroupsPerFn[DL_4G] = acc100_conf->q_dl_4g.num_qgroups;
> +	NumQGroupsPerFn[DL_5G] = acc100_conf->q_dl_5g.num_qgroups;
> +	for (acc = UL_4G;  acc < NUM_ACC; acc++)
> +		for (qgIdx = 0; qgIdx < NumQGroupsPerFn[acc]; qgIdx++)
> +			accQg[qgIndex++] = acc;

This looks inefficient, is there a way this could be calculated without filling arrays to

access 1 value ?

> +	acc = accQg[qg_idx];
> +	return acc;
> +}
> +
>  /* Return the queue topology for a Queue Group Index */
>  static inline void
>  qtopFromAcc(struct rte_q_topology_t **qtop, int acc_enum,
> @@ -113,6 +133,30 @@
>  	*qtop = p_qtop;
>  }
>  
> +/* Return the AQ depth for a Queue Group Index */
> +static inline int
> +aqDepth(int qg_idx, struct acc100_conf *acc100_conf)
> +{
> +	struct rte_q_topology_t *q_top = NULL;
> +	int acc_enum = accFromQgid(qg_idx, acc100_conf);
> +	qtopFromAcc(&q_top, acc_enum, acc100_conf);
> +	if (unlikely(q_top == NULL))
> +		return 0;

This error is not handled well be the callers.

aqNum is similar.

> +	return q_top->aq_depth_log2;
> +}
> +
> +/* Return the AQ depth for a Queue Group Index */
> +static inline int
> +aqNum(int qg_idx, struct acc100_conf *acc100_conf)
> +{
> +	struct rte_q_topology_t *q_top = NULL;
> +	int acc_enum = accFromQgid(qg_idx, acc100_conf);
> +	qtopFromAcc(&q_top, acc_enum, acc100_conf);
> +	if (unlikely(q_top == NULL))
> +		return 0;
> +	return q_top->num_aqs_per_groups;
> +}
> +
>  static void
>  initQTop(struct acc100_conf *acc100_conf)
>  {
> @@ -4177,3 +4221,464 @@ static int acc100_pci_remove(struct rte_pci_device *pci_dev)
>  RTE_PMD_REGISTER_PCI_TABLE(ACC100PF_DRIVER_NAME, pci_id_acc100_pf_map);
>  RTE_PMD_REGISTER_PCI(ACC100VF_DRIVER_NAME, acc100_pci_vf_driver);
>  RTE_PMD_REGISTER_PCI_TABLE(ACC100VF_DRIVER_NAME, pci_id_acc100_vf_map);
> +
> +/*
> + * Implementation to fix the power on status of some 5GUL engines
> + * This requires DMA permission if ported outside DPDK
This sounds like a workaround, can more detail be added here ?
> + */
> +static void
> +poweron_cleanup(struct rte_bbdev *bbdev, struct acc100_device *d,
> +		struct acc100_conf *conf)
> +{
> +	int i, template_idx, qg_idx;
> +	uint32_t address, status, payload;
> +	printf("Need to clear power-on 5GUL status in internal memory\n");
> +	/* Reset LDPC Cores */
> +	for (i = 0; i < ACC100_ENGINES_MAX; i++)
> +		acc100_reg_write(d, HWPfFecUl5gCntrlReg +
> +				ACC100_ENGINE_OFFSET * i, ACC100_RESET_HI);
> +	usleep(LONG_WAIT);
> +	for (i = 0; i < ACC100_ENGINES_MAX; i++)
> +		acc100_reg_write(d, HWPfFecUl5gCntrlReg +
> +				ACC100_ENGINE_OFFSET * i, ACC100_RESET_LO);
> +	usleep(LONG_WAIT);
> +	/* Prepare dummy workload */
> +	alloc_2x64mb_sw_rings_mem(bbdev, d, 0);
> +	/* Set base addresses */
> +	uint32_t phys_high = (uint32_t)(d->sw_rings_phys >> 32);
> +	uint32_t phys_low  = (uint32_t)(d->sw_rings_phys &
> +			~(ACC100_SIZE_64MBYTE-1));
> +	acc100_reg_write(d, HWPfDmaFec5GulDescBaseHiRegVf, phys_high);
> +	acc100_reg_write(d, HWPfDmaFec5GulDescBaseLoRegVf, phys_low);
> +
> +	/* Descriptor for a dummy 5GUL code block processing*/
> +	union acc100_dma_desc *desc = NULL;
> +	desc = d->sw_rings;
> +	desc->req.data_ptrs[0].address = d->sw_rings_phys +
> +			ACC100_DESC_FCW_OFFSET;
> +	desc->req.data_ptrs[0].blen = ACC100_FCW_LD_BLEN;
> +	desc->req.data_ptrs[0].blkid = ACC100_DMA_BLKID_FCW;
> +	desc->req.data_ptrs[0].last = 0;
> +	desc->req.data_ptrs[0].dma_ext = 0;
> +	desc->req.data_ptrs[1].address = d->sw_rings_phys + 512;
> +	desc->req.data_ptrs[1].blkid = ACC100_DMA_BLKID_IN;
> +	desc->req.data_ptrs[1].last = 1;
> +	desc->req.data_ptrs[1].dma_ext = 0;
> +	desc->req.data_ptrs[1].blen = 44;
> +	desc->req.data_ptrs[2].address = d->sw_rings_phys + 1024;
> +	desc->req.data_ptrs[2].blkid = ACC100_DMA_BLKID_OUT_ENC;
> +	desc->req.data_ptrs[2].last = 1;
> +	desc->req.data_ptrs[2].dma_ext = 0;
> +	desc->req.data_ptrs[2].blen = 5;
> +	/* Dummy FCW */
> +	desc->req.fcw_ld.FCWversion = ACC100_FCW_VER;
> +	desc->req.fcw_ld.qm = 1;
> +	desc->req.fcw_ld.nfiller = 30;
> +	desc->req.fcw_ld.BG = 2 - 1;
> +	desc->req.fcw_ld.Zc = 7;
> +	desc->req.fcw_ld.ncb = 350;
> +	desc->req.fcw_ld.rm_e = 4;
> +	desc->req.fcw_ld.itmax = 10;
> +	desc->req.fcw_ld.gain_i = 1;
> +	desc->req.fcw_ld.gain_h = 1;
> +
> +	int engines_to_restart[SIG_UL_5G_LAST + 1] = {0};
> +	int num_failed_engine = 0;
> +	/* Detect engines in undefined state */
> +	for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
> +			template_idx++) {
> +		/* Check engine power-on status */
> +		address = HwPfFecUl5gIbDebugReg +
> +				ACC100_ENGINE_OFFSET * template_idx;
> +		status = (acc100_reg_read(d, address) >> 4) & 0xF;
> +		if (status == 0) {
> +			engines_to_restart[num_failed_engine] = template_idx;
> +			num_failed_engine++;
> +		}
> +	}
> +
> +	int numQqsAcc = conf->q_ul_5g.num_qgroups;
> +	int numQgs = conf->q_ul_5g.num_qgroups;
> +	payload = 0;
> +	for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++)
> +		payload |= (1 << qg_idx);
> +	/* Force each engine which is in unspecified state */
> +	for (i = 0; i < num_failed_engine; i++) {
> +		int failed_engine = engines_to_restart[i];
> +		printf("Force engine %d\n", failed_engine);
> +		for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
> +				template_idx++) {
> +			address = HWPfQmgrGrpTmplateReg4Indx
> +					+ BYTES_IN_WORD * template_idx;
> +			if (template_idx == failed_engine)
> +				acc100_reg_write(d, address, payload);
> +			else
> +				acc100_reg_write(d, address, 0);
> +		}
> +		/* Reset descriptor header */
> +		desc->req.word0 = ACC100_DMA_DESC_TYPE;
> +		desc->req.word1 = 0;
> +		desc->req.word2 = 0;
> +		desc->req.word3 = 0;
> +		desc->req.numCBs = 1;
> +		desc->req.m2dlen = 2;
> +		desc->req.d2mlen = 1;
> +		/* Enqueue the code block for processing */
> +		union acc100_enqueue_reg_fmt enq_req;
> +		enq_req.val = 0;
> +		enq_req.addr_offset = ACC100_DESC_OFFSET;
> +		enq_req.num_elem = 1;
> +		enq_req.req_elem_addr = 0;
> +		rte_wmb();
> +		acc100_reg_write(d, HWPfQmgrIngressAq + 0x100, enq_req.val);
> +		usleep(LONG_WAIT * 100);
> +		if (desc->req.word0 != 2)
> +			printf("DMA Response %#"PRIx32"\n", desc->req.word0);
> +	}
> +
> +	/* Reset LDPC Cores */
> +	for (i = 0; i < ACC100_ENGINES_MAX; i++)
> +		acc100_reg_write(d, HWPfFecUl5gCntrlReg +
> +				ACC100_ENGINE_OFFSET * i, ACC100_RESET_HI);
> +	usleep(LONG_WAIT);
> +	for (i = 0; i < ACC100_ENGINES_MAX; i++)
> +		acc100_reg_write(d, HWPfFecUl5gCntrlReg +
> +				ACC100_ENGINE_OFFSET * i, ACC100_RESET_LO);
> +	usleep(LONG_WAIT);
> +	acc100_reg_write(d, HWPfHi5GHardResetReg, ACC100_RESET_HARD);
> +	usleep(LONG_WAIT);
> +	int numEngines = 0;
> +	/* Check engine power-on status again */
> +	for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
> +			template_idx++) {
> +		address = HwPfFecUl5gIbDebugReg +
> +				ACC100_ENGINE_OFFSET * template_idx;
> +		status = (acc100_reg_read(d, address) >> 4) & 0xF;
> +		address = HWPfQmgrGrpTmplateReg4Indx
> +				+ BYTES_IN_WORD * template_idx;
> +		if (status == 1) {
> +			acc100_reg_write(d, address, payload);
> +			numEngines++;
> +		} else
> +			acc100_reg_write(d, address, 0);
> +	}
> +	printf("Number of 5GUL engines %d\n", numEngines);
> +
> +	if (d->sw_rings_base != NULL)
> +		rte_free(d->sw_rings_base);
> +	usleep(LONG_WAIT);
> +}
> +
> +/* Initial configuration of a ACC100 device prior to running configure() */
> +int
> +acc100_configure(const char *dev_name, struct acc100_conf *conf)
> +{
> +	rte_bbdev_log(INFO, "acc100_configure");
> +	uint32_t payload, address, status;

maybe value or data would be a better variable name than payload.

would mean changing acc100_reg_write

> +	int qg_idx, template_idx, vf_idx, acc, i;
> +	struct rte_bbdev *bbdev = rte_bbdev_get_named_dev(dev_name);
> +
> +	/* Compile time checks */
> +	RTE_BUILD_BUG_ON(sizeof(struct acc100_dma_req_desc) != 256);
> +	RTE_BUILD_BUG_ON(sizeof(union acc100_dma_desc) != 256);
> +	RTE_BUILD_BUG_ON(sizeof(struct acc100_fcw_td) != 24);
> +	RTE_BUILD_BUG_ON(sizeof(struct acc100_fcw_te) != 32);
> +
> +	if (bbdev == NULL) {
> +		rte_bbdev_log(ERR,
> +		"Invalid dev_name (%s), or device is not yet initialised",
> +		dev_name);
> +		return -ENODEV;
> +	}
> +	struct acc100_device *d = bbdev->data->dev_private;
> +
> +	/* Store configuration */
> +	rte_memcpy(&d->acc100_conf, conf, sizeof(d->acc100_conf));
> +
> +	/* PCIe Bridge configuration */
> +	acc100_reg_write(d, HwPfPcieGpexBridgeControl, ACC100_CFG_PCI_BRIDGE);
> +	for (i = 1; i < 17; i++)

17 is a magic number, use a #define

this is a general issue.

> +		acc100_reg_write(d,
> +				HwPfPcieGpexAxiAddrMappingWindowPexBaseHigh
> +				+ i * 16, 0);
> +
> +	/* PCIe Link Trainiing and Status State Machine */
> +	acc100_reg_write(d, HwPfPcieGpexLtssmStateCntrl, 0xDFC00000);
> +
> +	/* Prevent blocking AXI read on BRESP for AXI Write */
> +	address = HwPfPcieGpexAxiPioControl;
> +	payload = ACC100_CFG_PCI_AXI;
> +	acc100_reg_write(d, address, payload);
> +
> +	/* 5GDL PLL phase shift */
> +	acc100_reg_write(d, HWPfChaDl5gPllPhshft0, 0x1);
> +
> +	/* Explicitly releasing AXI as this may be stopped after PF FLR/BME */
> +	address = HWPfDmaAxiControl;
> +	payload = 1;
> +	acc100_reg_write(d, address, payload);
> +
> +	/* DDR Configuration */
> +	address = HWPfDdrBcTim6;
> +	payload = acc100_reg_read(d, address);
> +	payload &= 0xFFFFFFFB; /* Bit 2 */
> +#ifdef ACC100_DDR_ECC_ENABLE
> +	payload |= 0x4;
> +#endif
> +	acc100_reg_write(d, address, payload);
> +	address = HWPfDdrPhyDqsCountNum;
> +#ifdef ACC100_DDR_ECC_ENABLE
> +	payload = 9;
> +#else
> +	payload = 8;
> +#endif
> +	acc100_reg_write(d, address, payload);
> +
> +	/* Set default descriptor signature */
> +	address = HWPfDmaDescriptorSignatuture;
> +	payload = 0;
> +	acc100_reg_write(d, address, payload);
> +
> +	/* Enable the Error Detection in DMA */
> +	payload = ACC100_CFG_DMA_ERROR;
> +	address = HWPfDmaErrorDetectionEn;
> +	acc100_reg_write(d, address, payload);
> +
> +	/* AXI Cache configuration */
> +	payload = ACC100_CFG_AXI_CACHE;
> +	address = HWPfDmaAxcacheReg;
> +	acc100_reg_write(d, address, payload);
> +
> +	/* Default DMA Configuration (Qmgr Enabled) */
> +	address = HWPfDmaConfig0Reg;
> +	payload = 0;
> +	acc100_reg_write(d, address, payload);
> +	address = HWPfDmaQmanen;
> +	payload = 0;
> +	acc100_reg_write(d, address, payload);
> +
> +	/* Default RLIM/ALEN configuration */
> +	address = HWPfDmaConfig1Reg;
> +	payload = (1 << 31) + (23 << 8) + (1 << 6) + 7;
> +	acc100_reg_write(d, address, payload);
> +
> +	/* Configure DMA Qmanager addresses */
> +	address = HWPfDmaQmgrAddrReg;
> +	payload = HWPfQmgrEgressQueuesTemplate;
> +	acc100_reg_write(d, address, payload);
> +
> +	/* ===== Qmgr Configuration ===== */
> +	/* Configuration of the AQueue Depth QMGR_GRP_0_DEPTH_LOG2 for UL */
> +	int totalQgs = conf->q_ul_4g.num_qgroups +
> +			conf->q_ul_5g.num_qgroups +
> +			conf->q_dl_4g.num_qgroups +
> +			conf->q_dl_5g.num_qgroups;
> +	for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
> +		address = HWPfQmgrDepthLog2Grp +
> +		BYTES_IN_WORD * qg_idx;
> +		payload = aqDepth(qg_idx, conf);
> +		acc100_reg_write(d, address, payload);
> +		address = HWPfQmgrTholdGrp +
> +		BYTES_IN_WORD * qg_idx;
> +		payload = (1 << 16) + (1 << (aqDepth(qg_idx, conf) - 1));
> +		acc100_reg_write(d, address, payload);
> +	}
> +
> +	/* Template Priority in incremental order */
> +	for (template_idx = 0; template_idx < ACC100_NUM_TMPL;
> +			template_idx++) {
> +		address = HWPfQmgrGrpTmplateReg0Indx +
> +		BYTES_IN_WORD * (template_idx % 8);
> +		payload = TMPL_PRI_0;
> +		acc100_reg_write(d, address, payload);
> +		address = HWPfQmgrGrpTmplateReg1Indx +
> +		BYTES_IN_WORD * (template_idx % 8);
> +		payload = TMPL_PRI_1;
> +		acc100_reg_write(d, address, payload);
> +		address = HWPfQmgrGrpTmplateReg2indx +
> +		BYTES_IN_WORD * (template_idx % 8);
> +		payload = TMPL_PRI_2;
> +		acc100_reg_write(d, address, payload);
> +		address = HWPfQmgrGrpTmplateReg3Indx +
> +		BYTES_IN_WORD * (template_idx % 8);
> +		payload = TMPL_PRI_3;
> +		acc100_reg_write(d, address, payload);
> +	}
> +
> +	address = HWPfQmgrGrpPriority;
> +	payload = ACC100_CFG_QMGR_HI_P;
> +	acc100_reg_write(d, address, payload);
> +
> +	/* Template Configuration */
> +	for (template_idx = 0; template_idx < ACC100_NUM_TMPL; template_idx++) {
> +		payload = 0;
> +		address = HWPfQmgrGrpTmplateReg4Indx
> +				+ BYTES_IN_WORD * template_idx;
> +		acc100_reg_write(d, address, payload);
> +	}
> +	/* 4GUL */
> +	int numQgs = conf->q_ul_4g.num_qgroups;
> +	int numQqsAcc = 0;
> +	payload = 0;
> +	for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++)
> +		payload |= (1 << qg_idx);
> +	for (template_idx = SIG_UL_4G; template_idx <= SIG_UL_4G_LAST;
> +			template_idx++) {
> +		address = HWPfQmgrGrpTmplateReg4Indx
> +				+ BYTES_IN_WORD*template_idx;
> +		acc100_reg_write(d, address, payload);
> +	}
> +	/* 5GUL */
> +	numQqsAcc += numQgs;
> +	numQgs	= conf->q_ul_5g.num_qgroups;
> +	payload = 0;
> +	int numEngines = 0;
> +	for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++)
> +		payload |= (1 << qg_idx);
> +	for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
> +			template_idx++) {
> +		/* Check engine power-on status */
> +		address = HwPfFecUl5gIbDebugReg +
> +				ACC100_ENGINE_OFFSET * template_idx;
> +		status = (acc100_reg_read(d, address) >> 4) & 0xF;
> +		address = HWPfQmgrGrpTmplateReg4Indx
> +				+ BYTES_IN_WORD * template_idx;
> +		if (status == 1) {
> +			acc100_reg_write(d, address, payload);
> +			numEngines++;
> +		} else
> +			acc100_reg_write(d, address, 0);
> +		#if RTE_ACC100_SINGLE_FEC == 1
#if should be at start of line
> +		payload = 0;
> +		#endif
> +	}
> +	printf("Number of 5GUL engines %d\n", numEngines);
> +	/* 4GDL */
> +	numQqsAcc += numQgs;
> +	numQgs	= conf->q_dl_4g.num_qgroups;
> +	payload = 0;
> +	for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++)
> +		payload |= (1 << qg_idx);
> +	for (template_idx = SIG_DL_4G; template_idx <= SIG_DL_4G_LAST;
> +			template_idx++) {
> +		address = HWPfQmgrGrpTmplateReg4Indx
> +				+ BYTES_IN_WORD*template_idx;
> +		acc100_reg_write(d, address, payload);
> +		#if RTE_ACC100_SINGLE_FEC == 1
> +			payload = 0;
> +		#endif
> +	}
> +	/* 5GDL */
> +	numQqsAcc += numQgs;
> +	numQgs	= conf->q_dl_5g.num_qgroups;
> +	payload = 0;
> +	for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++)
> +		payload |= (1 << qg_idx);
> +	for (template_idx = SIG_DL_5G; template_idx <= SIG_DL_5G_LAST;
> +			template_idx++) {
> +		address = HWPfQmgrGrpTmplateReg4Indx
> +				+ BYTES_IN_WORD*template_idx;
> +		acc100_reg_write(d, address, payload);
> +		#if RTE_ACC100_SINGLE_FEC == 1
> +		payload = 0;
> +		#endif
> +	}
> +
> +	/* Queue Group Function mapping */
> +	int qman_func_id[5] = {0, 2, 1, 3, 4};
> +	address = HWPfQmgrGrpFunction0;
> +	payload = 0;
> +	for (qg_idx = 0; qg_idx < 8; qg_idx++) {
> +		acc = accFromQgid(qg_idx, conf);
> +		payload |= qman_func_id[acc]<<(qg_idx * 4);
> +	}
> +	acc100_reg_write(d, address, payload);
> +
> +	/* Configuration of the Arbitration QGroup depth to 1 */
> +	for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
> +		address = HWPfQmgrArbQDepthGrp +
> +		BYTES_IN_WORD * qg_idx;
> +		payload = 0;
> +		acc100_reg_write(d, address, payload);
> +	}
> +
> +	/* Enabling AQueues through the Queue hierarchy*/
> +	for (vf_idx = 0; vf_idx < ACC100_NUM_VFS; vf_idx++) {
> +		for (qg_idx = 0; qg_idx < ACC100_NUM_QGRPS; qg_idx++) {
> +			payload = 0;
> +			if (vf_idx < conf->num_vf_bundles &&
> +					qg_idx < totalQgs)
> +				payload = (1 << aqNum(qg_idx, conf)) - 1;
> +			address = HWPfQmgrAqEnableVf
> +					+ vf_idx * BYTES_IN_WORD;
> +			payload += (qg_idx << 16);
> +			acc100_reg_write(d, address, payload);
> +		}
> +	}
> +
> +	/* This pointer to ARAM (256kB) is shifted by 2 (4B per register) */
> +	uint32_t aram_address = 0;
> +	for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
> +		for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) {
> +			address = HWPfQmgrVfBaseAddr + vf_idx
> +					* BYTES_IN_WORD + qg_idx
> +					* BYTES_IN_WORD * 64;
> +			payload = aram_address;
> +			acc100_reg_write(d, address, payload);
> +			/* Offset ARAM Address for next memory bank
> +			 * - increment of 4B
> +			 */
> +			aram_address += aqNum(qg_idx, conf) *
> +					(1 << aqDepth(qg_idx, conf));
> +		}
> +	}
> +
> +	if (aram_address > WORDS_IN_ARAM_SIZE) {
> +		rte_bbdev_log(ERR, "ARAM Configuration not fitting %d %d\n",
> +				aram_address, WORDS_IN_ARAM_SIZE);
> +		return -EINVAL;
> +	}
> +
> +	/* ==== HI Configuration ==== */
> +
> +	/* Prevent Block on Transmit Error */
> +	address = HWPfHiBlockTransmitOnErrorEn;
> +	payload = 0;
> +	acc100_reg_write(d, address, payload);
> +	/* Prevents to drop MSI */
> +	address = HWPfHiMsiDropEnableReg;
> +	payload = 0;
> +	acc100_reg_write(d, address, payload);
> +	/* Set the PF Mode register */
> +	address = HWPfHiPfMode;
> +	payload = (conf->pf_mode_en) ? 2 : 0;
> +	acc100_reg_write(d, address, payload);
> +	/* Enable Error Detection in HW */
> +	address = HWPfDmaErrorDetectionEn;
> +	payload = 0x3D7;
> +	acc100_reg_write(d, address, payload);
> +
> +	/* QoS overflow init */
> +	payload = 1;
> +	address = HWPfQosmonAEvalOverflow0;
> +	acc100_reg_write(d, address, payload);
> +	address = HWPfQosmonBEvalOverflow0;
> +	acc100_reg_write(d, address, payload);
> +
> +	/* HARQ DDR Configuration */
> +	unsigned int ddrSizeInMb = 512; /* Fixed to 512 MB per VF for now */
> +	for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) {
> +		address = HWPfDmaVfDdrBaseRw + vf_idx
> +				* 0x10;
> +		payload = ((vf_idx * (ddrSizeInMb / 64)) << 16) +
> +				(ddrSizeInMb - 1);
> +		acc100_reg_write(d, address, payload);
> +	}
> +	usleep(LONG_WAIT);
Is sleep needed here ? the reg_write has one.
> +

Since this seems like a workaround, add a comment here.

Tom

> +	if (numEngines < (SIG_UL_5G_LAST + 1))
> +		poweron_cleanup(bbdev, d, conf);
> +
> +	rte_bbdev_log_debug("PF Tip configuration complete for %s", dev_name);
> +	return 0;
> +}
> diff --git a/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map b/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
> index 4a76d1d..91c234d 100644
> --- a/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
> +++ b/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
> @@ -1,3 +1,10 @@
>  DPDK_21 {
>  	local: *;
>  };
> +
> +EXPERIMENTAL {
> +	global:
> +
> +	acc100_configure;
> +
> +};
  
Chautru, Nicolas Sept. 30, 2020, 10:54 p.m. UTC | #2
Hi Tom, 

> From: Tom Rix <trix@redhat.com>
> On 9/28/20 5:29 PM, Nicolas Chautru wrote:
> > Add configure function to configure the PF from within the 
> > bbdev-test itself without external application configuration the device.
> >
> > Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
> > Acked-by: Liu Tianjiao <Tianjiao.liu@intel.com>
> > ---
> >  app/test-bbdev/test_bbdev_perf.c                   |  72 +++
> >  doc/guides/rel_notes/release_20_11.rst             |   5 +
> >  drivers/baseband/acc100/meson.build                |   2 +
> >  drivers/baseband/acc100/rte_acc100_cfg.h           |  17 +
> >  drivers/baseband/acc100/rte_acc100_pmd.c           | 505
> +++++++++++++++++++++
> >  .../acc100/rte_pmd_bbdev_acc100_version.map        |   7 +
> >  6 files changed, 608 insertions(+)
> >
> > diff --git a/app/test-bbdev/test_bbdev_perf.c
> > b/app/test-bbdev/test_bbdev_perf.c
> > index 45c0d62..32f23ff 100644
> > --- a/app/test-bbdev/test_bbdev_perf.c
> > +++ b/app/test-bbdev/test_bbdev_perf.c
> > @@ -52,6 +52,18 @@
> >  #define FLR_5G_TIMEOUT 610
> >  #endif
> >
> > +#ifdef RTE_LIBRTE_PMD_BBDEV_ACC100
> > +#include <rte_acc100_cfg.h>
> > +#define ACC100PF_DRIVER_NAME   ("intel_acc100_pf")
> > +#define ACC100VF_DRIVER_NAME   ("intel_acc100_vf")
> > +#define ACC100_QMGR_NUM_AQS 16
> > +#define ACC100_QMGR_NUM_QGS 2
> > +#define ACC100_QMGR_AQ_DEPTH 5
> > +#define ACC100_QMGR_INVALID_IDX -1
> > +#define ACC100_QMGR_RR 1
> > +#define ACC100_QOS_GBR 0
> > +#endif
> > +
> >  #define OPS_CACHE_SIZE 256U
> >  #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
> >
> > @@ -653,6 +665,66 @@ typedef int (test_case_function)(struct
> active_device *ad,
> >  				info->dev_name);
> >  	}
> >  #endif
> > +#ifdef RTE_LIBRTE_PMD_BBDEV_ACC100
> seems like this function would break if one of the other bbdev's were 
> #defined.

No these are independent. By default they are all defined. 


> > +	if ((get_init_device() == true) &&
> > +		(!strcmp(info->drv.driver_name,
> ACC100PF_DRIVER_NAME))) {
> > +		struct acc100_conf conf;
> > +		unsigned int i;
> > +
> > +		printf("Configure ACC100 FEC Driver %s with default
> values\n",
> > +				info->drv.driver_name);
> > +
> > +		/* clear default configuration before initialization */
> > +		memset(&conf, 0, sizeof(struct acc100_conf));
> > +
> > +		/* Always set in PF mode for built-in configuration */
> > +		conf.pf_mode_en = true;
> > +		for (i = 0; i < RTE_ACC100_NUM_VFS; ++i) {
> > +			conf.arb_dl_4g[i].gbr_threshold1 =
> ACC100_QOS_GBR;
> > +			conf.arb_dl_4g[i].gbr_threshold1 =
> ACC100_QOS_GBR;
> > +			conf.arb_dl_4g[i].round_robin_weight =
> ACC100_QMGR_RR;
> > +			conf.arb_ul_4g[i].gbr_threshold1 =
> ACC100_QOS_GBR;
> > +			conf.arb_ul_4g[i].gbr_threshold1 =
> ACC100_QOS_GBR;
> > +			conf.arb_ul_4g[i].round_robin_weight =
> ACC100_QMGR_RR;
> > +			conf.arb_dl_5g[i].gbr_threshold1 =
> ACC100_QOS_GBR;
> > +			conf.arb_dl_5g[i].gbr_threshold1 =
> ACC100_QOS_GBR;
> > +			conf.arb_dl_5g[i].round_robin_weight =
> ACC100_QMGR_RR;
> > +			conf.arb_ul_5g[i].gbr_threshold1 =
> ACC100_QOS_GBR;
> > +			conf.arb_ul_5g[i].gbr_threshold1 =
> ACC100_QOS_GBR;
> > +			conf.arb_ul_5g[i].round_robin_weight =
> ACC100_QMGR_RR;
> > +		}
> > +
> > +		conf.input_pos_llr_1_bit = true;
> > +		conf.output_pos_llr_1_bit = true;
> > +		conf.num_vf_bundles = 1; /**< Number of VF bundles to
> setup */
> > +
> > +		conf.q_ul_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
> > +		conf.q_ul_4g.first_qgroup_index =
> ACC100_QMGR_INVALID_IDX;
> > +		conf.q_ul_4g.num_aqs_per_groups =
> ACC100_QMGR_NUM_AQS;
> > +		conf.q_ul_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
> > +		conf.q_dl_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
> > +		conf.q_dl_4g.first_qgroup_index =
> ACC100_QMGR_INVALID_IDX;
> > +		conf.q_dl_4g.num_aqs_per_groups =
> ACC100_QMGR_NUM_AQS;
> > +		conf.q_dl_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
> > +		conf.q_ul_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
> > +		conf.q_ul_5g.first_qgroup_index =
> ACC100_QMGR_INVALID_IDX;
> > +		conf.q_ul_5g.num_aqs_per_groups =
> ACC100_QMGR_NUM_AQS;
> > +		conf.q_ul_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
> > +		conf.q_dl_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
> > +		conf.q_dl_5g.first_qgroup_index =
> ACC100_QMGR_INVALID_IDX;
> > +		conf.q_dl_5g.num_aqs_per_groups =
> ACC100_QMGR_NUM_AQS;
> > +		conf.q_dl_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
> > +
> > +		/* setup PF with configuration information */
> > +		ret = acc100_configure(info->dev_name, &conf);
> > +		TEST_ASSERT_SUCCESS(ret,
> > +				"Failed to configure ACC100 PF for bbdev
> %s",
> > +				info->dev_name);
> > +		/* Let's refresh this now this is configured */
> > +	}
> > +	rte_bbdev_info_get(dev_id, info);
> The other bbdev's do not call rte_bbdev_info_get, can this be removed ?

Actually it should be added outside for all versions post-configuraion. Thanks

> > +#endif
> > +
> >  	nb_queues = RTE_MIN(rte_lcore_count(), info- drv.max_num_queues);
> >  	nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
> >
> > diff --git a/doc/guides/rel_notes/release_20_11.rst
> > b/doc/guides/rel_notes/release_20_11.rst
> > index 73ac08f..c8d0586 100644
> > --- a/doc/guides/rel_notes/release_20_11.rst
> > +++ b/doc/guides/rel_notes/release_20_11.rst
> > @@ -55,6 +55,11 @@ New Features
> >       Also, make sure to start the actual text at the margin.
> >       =======================================================
> >
> > +* **Added Intel ACC100 bbdev PMD.**
> > +
> > +  Added a new ``acc100`` bbdev driver for the Intel\ |reg| ACC100 
> > + accelerator  also known as Mount Bryce.  See the 
> > + :doc:`../bbdevs/acc100` BBDEV guide for more details on this new driver.
> >
> >  Removed Items
> >  -------------
> > diff --git a/drivers/baseband/acc100/meson.build
> > b/drivers/baseband/acc100/meson.build
> > index 8afafc2..7ac44dc 100644
> > --- a/drivers/baseband/acc100/meson.build
> > +++ b/drivers/baseband/acc100/meson.build
> > @@ -4,3 +4,5 @@
> >  deps += ['bbdev', 'bus_vdev', 'ring', 'pci', 'bus_pci']
> >
> >  sources = files('rte_acc100_pmd.c')
> > +
> > +install_headers('rte_acc100_cfg.h')
> > diff --git a/drivers/baseband/acc100/rte_acc100_cfg.h
> > b/drivers/baseband/acc100/rte_acc100_cfg.h
> > index 73bbe36..7f523bc 100644
> > --- a/drivers/baseband/acc100/rte_acc100_cfg.h
> > +++ b/drivers/baseband/acc100/rte_acc100_cfg.h
> > @@ -89,6 +89,23 @@ struct acc100_conf {
> >  	struct rte_arbitration_t arb_dl_5g[RTE_ACC100_NUM_VFS];  };
> >
> > +/**
> > + * Configure a ACC100 device
> > + *
> > + * @param dev_name
> > + *   The name of the device. This is the short form of PCI BDF, e.g. 00:01.0.
> > + *   It can also be retrieved for a bbdev device from the dev_name field in
> the
> > + *   rte_bbdev_info structure returned by rte_bbdev_info_get().
> > + * @param conf
> > + *   Configuration to apply to ACC100 HW.
> > + *
> > + * @return
> > + *   Zero on success, negative value on failure.
> > + */
> > +__rte_experimental
> > +int
> > +acc100_configure(const char *dev_name, struct acc100_conf *conf);
> > +
> >  #ifdef __cplusplus
> >  }
> >  #endif
> > diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c
> > b/drivers/baseband/acc100/rte_acc100_pmd.c
> > index 3589814..b50dd32 100644
> > --- a/drivers/baseband/acc100/rte_acc100_pmd.c
> > +++ b/drivers/baseband/acc100/rte_acc100_pmd.c
> > @@ -85,6 +85,26 @@
> >
> >  enum {UL_4G = 0, UL_5G, DL_4G, DL_5G, NUM_ACC};
> >
> > +/* Return the accelerator enum for a Queue Group Index */ static 
> > +inline int accFromQgid(int qg_idx, const struct acc100_conf
> > +*acc100_conf) {
> > +	int accQg[ACC100_NUM_QGRPS];
> > +	int NumQGroupsPerFn[NUM_ACC];
> > +	int acc, qgIdx, qgIndex = 0;
> > +	for (qgIdx = 0; qgIdx < ACC100_NUM_QGRPS; qgIdx++)
> > +		accQg[qgIdx] = 0;
> > +	NumQGroupsPerFn[UL_4G] = acc100_conf->q_ul_4g.num_qgroups;
> > +	NumQGroupsPerFn[UL_5G] = acc100_conf->q_ul_5g.num_qgroups;
> > +	NumQGroupsPerFn[DL_4G] = acc100_conf->q_dl_4g.num_qgroups;
> > +	NumQGroupsPerFn[DL_5G] = acc100_conf->q_dl_5g.num_qgroups;
> > +	for (acc = UL_4G;  acc < NUM_ACC; acc++)
> > +		for (qgIdx = 0; qgIdx < NumQGroupsPerFn[acc]; qgIdx++)
> > +			accQg[qgIndex++] = acc;
> 
> This looks inefficient, is there a way this could be calculated 
> without filling arrays to
> 
> access 1 value ?

That is not time critical, and the same common code is run each time. 

> 
> > +	acc = accQg[qg_idx];
> > +	return acc;
> > +}
> > +
> >  /* Return the queue topology for a Queue Group Index */  static 
> > inline void  qtopFromAcc(struct rte_q_topology_t **qtop, int 
> > acc_enum, @@ -113,6 +133,30 @@
> >  	*qtop = p_qtop;
> >  }
> >
> > +/* Return the AQ depth for a Queue Group Index */ static inline int 
> > +aqDepth(int qg_idx, struct acc100_conf *acc100_conf) {
> > +	struct rte_q_topology_t *q_top = NULL;
> > +	int acc_enum = accFromQgid(qg_idx, acc100_conf);
> > +	qtopFromAcc(&q_top, acc_enum, acc100_conf);
> > +	if (unlikely(q_top == NULL))
> > +		return 0;
> 
> This error is not handled well be the callers.
> 
> aqNum is similar.

This fails in a consistent basis, by having not queue available and handling this as the default case.

> 
> > +	return q_top->aq_depth_log2;
> > +}
> > +
> > +/* Return the AQ depth for a Queue Group Index */ static inline int 
> > +aqNum(int qg_idx, struct acc100_conf *acc100_conf) {
> > +	struct rte_q_topology_t *q_top = NULL;
> > +	int acc_enum = accFromQgid(qg_idx, acc100_conf);
> > +	qtopFromAcc(&q_top, acc_enum, acc100_conf);
> > +	if (unlikely(q_top == NULL))
> > +		return 0;
> > +	return q_top->num_aqs_per_groups;
> > +}
> > +
> >  static void
> >  initQTop(struct acc100_conf *acc100_conf)  { @@ -4177,3 +4221,464 
> > @@ static int acc100_pci_remove(struct rte_pci_device *pci_dev) 
> > RTE_PMD_REGISTER_PCI_TABLE(ACC100PF_DRIVER_NAME,
> > pci_id_acc100_pf_map);
> RTE_PMD_REGISTER_PCI(ACC100VF_DRIVER_NAME,
> > acc100_pci_vf_driver);
> > RTE_PMD_REGISTER_PCI_TABLE(ACC100VF_DRIVER_NAME,
> > pci_id_acc100_vf_map);
> > +
> > +/*
> > + * Implementation to fix the power on status of some 5GUL engines
> > + * This requires DMA permission if ported outside DPDK
> This sounds like a workaround, can more detail be added here ?

There are comments through the code I believe:
  - /* Detect engines in undefined state */
  - /* Force each engine which is in unspecified state */
  - /* Reset LDPC Cores */
  - /* Check engine power-on status again */ Do you believe this is not explicit enough. Power-on status may be in an undefined state hence this engine are avtivate with dummy payload to make sure they are in a predicable state once configuration is done. 

> > + */
> > +static void
> > +poweron_cleanup(struct rte_bbdev *bbdev, struct acc100_device *d,
> > +		struct acc100_conf *conf)
> > +{
> > +	int i, template_idx, qg_idx;
> > +	uint32_t address, status, payload;
> > +	printf("Need to clear power-on 5GUL status in internal memory\n");
> > +	/* Reset LDPC Cores */
> > +	for (i = 0; i < ACC100_ENGINES_MAX; i++)
> > +		acc100_reg_write(d, HWPfFecUl5gCntrlReg +
> > +				ACC100_ENGINE_OFFSET * i,
> ACC100_RESET_HI);
> > +	usleep(LONG_WAIT);
> > +	for (i = 0; i < ACC100_ENGINES_MAX; i++)
> > +		acc100_reg_write(d, HWPfFecUl5gCntrlReg +
> > +				ACC100_ENGINE_OFFSET * i,
> ACC100_RESET_LO);
> > +	usleep(LONG_WAIT);
> > +	/* Prepare dummy workload */
> > +	alloc_2x64mb_sw_rings_mem(bbdev, d, 0);
> > +	/* Set base addresses */
> > +	uint32_t phys_high = (uint32_t)(d->sw_rings_phys >> 32);
> > +	uint32_t phys_low  = (uint32_t)(d->sw_rings_phys &
> > +			~(ACC100_SIZE_64MBYTE-1));
> > +	acc100_reg_write(d, HWPfDmaFec5GulDescBaseHiRegVf,
> phys_high);
> > +	acc100_reg_write(d, HWPfDmaFec5GulDescBaseLoRegVf, phys_low);
> > +
> > +	/* Descriptor for a dummy 5GUL code block processing*/
> > +	union acc100_dma_desc *desc = NULL;
> > +	desc = d->sw_rings;
> > +	desc->req.data_ptrs[0].address = d->sw_rings_phys +
> > +			ACC100_DESC_FCW_OFFSET;
> > +	desc->req.data_ptrs[0].blen = ACC100_FCW_LD_BLEN;
> > +	desc->req.data_ptrs[0].blkid = ACC100_DMA_BLKID_FCW;
> > +	desc->req.data_ptrs[0].last = 0;
> > +	desc->req.data_ptrs[0].dma_ext = 0;
> > +	desc->req.data_ptrs[1].address = d->sw_rings_phys + 512;
> > +	desc->req.data_ptrs[1].blkid = ACC100_DMA_BLKID_IN;
> > +	desc->req.data_ptrs[1].last = 1;
> > +	desc->req.data_ptrs[1].dma_ext = 0;
> > +	desc->req.data_ptrs[1].blen = 44;
> > +	desc->req.data_ptrs[2].address = d->sw_rings_phys + 1024;
> > +	desc->req.data_ptrs[2].blkid = ACC100_DMA_BLKID_OUT_ENC;
> > +	desc->req.data_ptrs[2].last = 1;
> > +	desc->req.data_ptrs[2].dma_ext = 0;
> > +	desc->req.data_ptrs[2].blen = 5;
> > +	/* Dummy FCW */
> > +	desc->req.fcw_ld.FCWversion = ACC100_FCW_VER;
> > +	desc->req.fcw_ld.qm = 1;
> > +	desc->req.fcw_ld.nfiller = 30;
> > +	desc->req.fcw_ld.BG = 2 - 1;
> > +	desc->req.fcw_ld.Zc = 7;
> > +	desc->req.fcw_ld.ncb = 350;
> > +	desc->req.fcw_ld.rm_e = 4;
> > +	desc->req.fcw_ld.itmax = 10;
> > +	desc->req.fcw_ld.gain_i = 1;
> > +	desc->req.fcw_ld.gain_h = 1;
> > +
> > +	int engines_to_restart[SIG_UL_5G_LAST + 1] = {0};
> > +	int num_failed_engine = 0;
> > +	/* Detect engines in undefined state */
> > +	for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
> > +			template_idx++) {
> > +		/* Check engine power-on status */
> > +		address = HwPfFecUl5gIbDebugReg +
> > +				ACC100_ENGINE_OFFSET * template_idx;
> > +		status = (acc100_reg_read(d, address) >> 4) & 0xF;
> > +		if (status == 0) {
> > +			engines_to_restart[num_failed_engine] =
> template_idx;
> > +			num_failed_engine++;
> > +		}
> > +	}
> > +
> > +	int numQqsAcc = conf->q_ul_5g.num_qgroups;
> > +	int numQgs = conf->q_ul_5g.num_qgroups;
> > +	payload = 0;
> > +	for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
> qg_idx++)
> > +		payload |= (1 << qg_idx);
> > +	/* Force each engine which is in unspecified state */
> > +	for (i = 0; i < num_failed_engine; i++) {
> > +		int failed_engine = engines_to_restart[i];
> > +		printf("Force engine %d\n", failed_engine);
> > +		for (template_idx = SIG_UL_5G; template_idx <=
> SIG_UL_5G_LAST;
> > +				template_idx++) {
> > +			address = HWPfQmgrGrpTmplateReg4Indx
> > +					+ BYTES_IN_WORD * template_idx;
> > +			if (template_idx == failed_engine)
> > +				acc100_reg_write(d, address, payload);
> > +			else
> > +				acc100_reg_write(d, address, 0);
> > +		}
> > +		/* Reset descriptor header */
> > +		desc->req.word0 = ACC100_DMA_DESC_TYPE;
> > +		desc->req.word1 = 0;
> > +		desc->req.word2 = 0;
> > +		desc->req.word3 = 0;
> > +		desc->req.numCBs = 1;
> > +		desc->req.m2dlen = 2;
> > +		desc->req.d2mlen = 1;
> > +		/* Enqueue the code block for processing */
> > +		union acc100_enqueue_reg_fmt enq_req;
> > +		enq_req.val = 0;
> > +		enq_req.addr_offset = ACC100_DESC_OFFSET;
> > +		enq_req.num_elem = 1;
> > +		enq_req.req_elem_addr = 0;
> > +		rte_wmb();
> > +		acc100_reg_write(d, HWPfQmgrIngressAq + 0x100,
> enq_req.val);
> > +		usleep(LONG_WAIT * 100);
> > +		if (desc->req.word0 != 2)
> > +			printf("DMA Response %#"PRIx32"\n", desc-
> >req.word0);
> > +	}
> > +
> > +	/* Reset LDPC Cores */
> > +	for (i = 0; i < ACC100_ENGINES_MAX; i++)
> > +		acc100_reg_write(d, HWPfFecUl5gCntrlReg +
> > +				ACC100_ENGINE_OFFSET * i,
> ACC100_RESET_HI);
> > +	usleep(LONG_WAIT);
> > +	for (i = 0; i < ACC100_ENGINES_MAX; i++)
> > +		acc100_reg_write(d, HWPfFecUl5gCntrlReg +
> > +				ACC100_ENGINE_OFFSET * i,
> ACC100_RESET_LO);
> > +	usleep(LONG_WAIT);
> > +	acc100_reg_write(d, HWPfHi5GHardResetReg,
> ACC100_RESET_HARD);
> > +	usleep(LONG_WAIT);
> > +	int numEngines = 0;
> > +	/* Check engine power-on status again */
> > +	for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
> > +			template_idx++) {
> > +		address = HwPfFecUl5gIbDebugReg +
> > +				ACC100_ENGINE_OFFSET * template_idx;
> > +		status = (acc100_reg_read(d, address) >> 4) & 0xF;
> > +		address = HWPfQmgrGrpTmplateReg4Indx
> > +				+ BYTES_IN_WORD * template_idx;
> > +		if (status == 1) {
> > +			acc100_reg_write(d, address, payload);
> > +			numEngines++;
> > +		} else
> > +			acc100_reg_write(d, address, 0);
> > +	}
> > +	printf("Number of 5GUL engines %d\n", numEngines);
> > +
> > +	if (d->sw_rings_base != NULL)
> > +		rte_free(d->sw_rings_base);
> > +	usleep(LONG_WAIT);
> > +}
> > +
> > +/* Initial configuration of a ACC100 device prior to running
> > +configure() */ int acc100_configure(const char *dev_name, struct 
> > +acc100_conf *conf) {
> > +	rte_bbdev_log(INFO, "acc100_configure");
> > +	uint32_t payload, address, status;
> 
> maybe value or data would be a better variable name than payload.
> 
> would mean changing acc100_reg_write

transparent to me, but can change given DPDK uses term value. 


> 
> > +	int qg_idx, template_idx, vf_idx, acc, i;
> > +	struct rte_bbdev *bbdev = rte_bbdev_get_named_dev(dev_name);
> > +
> > +	/* Compile time checks */
> > +	RTE_BUILD_BUG_ON(sizeof(struct acc100_dma_req_desc) != 256);
> > +	RTE_BUILD_BUG_ON(sizeof(union acc100_dma_desc) != 256);
> > +	RTE_BUILD_BUG_ON(sizeof(struct acc100_fcw_td) != 24);
> > +	RTE_BUILD_BUG_ON(sizeof(struct acc100_fcw_te) != 32);
> > +
> > +	if (bbdev == NULL) {
> > +		rte_bbdev_log(ERR,
> > +		"Invalid dev_name (%s), or device is not yet initialised",
> > +		dev_name);
> > +		return -ENODEV;
> > +	}
> > +	struct acc100_device *d = bbdev->data->dev_private;
> > +
> > +	/* Store configuration */
> > +	rte_memcpy(&d->acc100_conf, conf, sizeof(d->acc100_conf));
> > +
> > +	/* PCIe Bridge configuration */
> > +	acc100_reg_write(d, HwPfPcieGpexBridgeControl,
> ACC100_CFG_PCI_BRIDGE);
> > +	for (i = 1; i < 17; i++)
> 
> 17 is a magic number, use a #define
> 
> this is a general issue.

These are only used once but still agreed.

> 
> > +		acc100_reg_write(d,
> > +
> 	HwPfPcieGpexAxiAddrMappingWindowPexBaseHigh
> > +				+ i * 16, 0);
> > +
> > +	/* PCIe Link Trainiing and Status State Machine */
> > +	acc100_reg_write(d, HwPfPcieGpexLtssmStateCntrl, 0xDFC00000);
> > +
> > +	/* Prevent blocking AXI read on BRESP for AXI Write */
> > +	address = HwPfPcieGpexAxiPioControl;
> > +	payload = ACC100_CFG_PCI_AXI;
> > +	acc100_reg_write(d, address, payload);
> > +
> > +	/* 5GDL PLL phase shift */
> > +	acc100_reg_write(d, HWPfChaDl5gPllPhshft0, 0x1);
> > +
> > +	/* Explicitly releasing AXI as this may be stopped after PF FLR/BME */
> > +	address = HWPfDmaAxiControl;
> > +	payload = 1;
> > +	acc100_reg_write(d, address, payload);
> > +
> > +	/* DDR Configuration */
> > +	address = HWPfDdrBcTim6;
> > +	payload = acc100_reg_read(d, address);
> > +	payload &= 0xFFFFFFFB; /* Bit 2 */ #ifdef ACC100_DDR_ECC_ENABLE
> > +	payload |= 0x4;
> > +#endif
> > +	acc100_reg_write(d, address, payload);
> > +	address = HWPfDdrPhyDqsCountNum;
> > +#ifdef ACC100_DDR_ECC_ENABLE
> > +	payload = 9;
> > +#else
> > +	payload = 8;
> > +#endif
> > +	acc100_reg_write(d, address, payload);
> > +
> > +	/* Set default descriptor signature */
> > +	address = HWPfDmaDescriptorSignatuture;
> > +	payload = 0;
> > +	acc100_reg_write(d, address, payload);
> > +
> > +	/* Enable the Error Detection in DMA */
> > +	payload = ACC100_CFG_DMA_ERROR;
> > +	address = HWPfDmaErrorDetectionEn;
> > +	acc100_reg_write(d, address, payload);
> > +
> > +	/* AXI Cache configuration */
> > +	payload = ACC100_CFG_AXI_CACHE;
> > +	address = HWPfDmaAxcacheReg;
> > +	acc100_reg_write(d, address, payload);
> > +
> > +	/* Default DMA Configuration (Qmgr Enabled) */
> > +	address = HWPfDmaConfig0Reg;
> > +	payload = 0;
> > +	acc100_reg_write(d, address, payload);
> > +	address = HWPfDmaQmanen;
> > +	payload = 0;
> > +	acc100_reg_write(d, address, payload);
> > +
> > +	/* Default RLIM/ALEN configuration */
> > +	address = HWPfDmaConfig1Reg;
> > +	payload = (1 << 31) + (23 << 8) + (1 << 6) + 7;
> > +	acc100_reg_write(d, address, payload);
> > +
> > +	/* Configure DMA Qmanager addresses */
> > +	address = HWPfDmaQmgrAddrReg;
> > +	payload = HWPfQmgrEgressQueuesTemplate;
> > +	acc100_reg_write(d, address, payload);
> > +
> > +	/* ===== Qmgr Configuration ===== */
> > +	/* Configuration of the AQueue Depth QMGR_GRP_0_DEPTH_LOG2
> for UL */
> > +	int totalQgs = conf->q_ul_4g.num_qgroups +
> > +			conf->q_ul_5g.num_qgroups +
> > +			conf->q_dl_4g.num_qgroups +
> > +			conf->q_dl_5g.num_qgroups;
> > +	for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
> > +		address = HWPfQmgrDepthLog2Grp +
> > +		BYTES_IN_WORD * qg_idx;
> > +		payload = aqDepth(qg_idx, conf);
> > +		acc100_reg_write(d, address, payload);
> > +		address = HWPfQmgrTholdGrp +
> > +		BYTES_IN_WORD * qg_idx;
> > +		payload = (1 << 16) + (1 << (aqDepth(qg_idx, conf) - 1));
> > +		acc100_reg_write(d, address, payload);
> > +	}
> > +
> > +	/* Template Priority in incremental order */
> > +	for (template_idx = 0; template_idx < ACC100_NUM_TMPL;
> > +			template_idx++) {
> > +		address = HWPfQmgrGrpTmplateReg0Indx +
> > +		BYTES_IN_WORD * (template_idx % 8);
> > +		payload = TMPL_PRI_0;
> > +		acc100_reg_write(d, address, payload);
> > +		address = HWPfQmgrGrpTmplateReg1Indx +
> > +		BYTES_IN_WORD * (template_idx % 8);
> > +		payload = TMPL_PRI_1;
> > +		acc100_reg_write(d, address, payload);
> > +		address = HWPfQmgrGrpTmplateReg2indx +
> > +		BYTES_IN_WORD * (template_idx % 8);
> > +		payload = TMPL_PRI_2;
> > +		acc100_reg_write(d, address, payload);
> > +		address = HWPfQmgrGrpTmplateReg3Indx +
> > +		BYTES_IN_WORD * (template_idx % 8);
> > +		payload = TMPL_PRI_3;
> > +		acc100_reg_write(d, address, payload);
> > +	}
> > +
> > +	address = HWPfQmgrGrpPriority;
> > +	payload = ACC100_CFG_QMGR_HI_P;
> > +	acc100_reg_write(d, address, payload);
> > +
> > +	/* Template Configuration */
> > +	for (template_idx = 0; template_idx < ACC100_NUM_TMPL;
> template_idx++) {
> > +		payload = 0;
> > +		address = HWPfQmgrGrpTmplateReg4Indx
> > +				+ BYTES_IN_WORD * template_idx;
> > +		acc100_reg_write(d, address, payload);
> > +	}
> > +	/* 4GUL */
> > +	int numQgs = conf->q_ul_4g.num_qgroups;
> > +	int numQqsAcc = 0;
> > +	payload = 0;
> > +	for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
> qg_idx++)
> > +		payload |= (1 << qg_idx);
> > +	for (template_idx = SIG_UL_4G; template_idx <= SIG_UL_4G_LAST;
> > +			template_idx++) {
> > +		address = HWPfQmgrGrpTmplateReg4Indx
> > +				+ BYTES_IN_WORD*template_idx;
> > +		acc100_reg_write(d, address, payload);
> > +	}
> > +	/* 5GUL */
> > +	numQqsAcc += numQgs;
> > +	numQgs	= conf->q_ul_5g.num_qgroups;
> > +	payload = 0;
> > +	int numEngines = 0;
> > +	for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
> qg_idx++)
> > +		payload |= (1 << qg_idx);
> > +	for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
> > +			template_idx++) {
> > +		/* Check engine power-on status */
> > +		address = HwPfFecUl5gIbDebugReg +
> > +				ACC100_ENGINE_OFFSET * template_idx;
> > +		status = (acc100_reg_read(d, address) >> 4) & 0xF;
> > +		address = HWPfQmgrGrpTmplateReg4Indx
> > +				+ BYTES_IN_WORD * template_idx;
> > +		if (status == 1) {
> > +			acc100_reg_write(d, address, payload);
> > +			numEngines++;
> > +		} else
> > +			acc100_reg_write(d, address, 0);
> > +		#if RTE_ACC100_SINGLE_FEC == 1
> #if should be at start of line

ok

> > +		payload = 0;
> > +		#endif
> > +	}
> > +	printf("Number of 5GUL engines %d\n", numEngines);
> > +	/* 4GDL */
> > +	numQqsAcc += numQgs;
> > +	numQgs	= conf->q_dl_4g.num_qgroups;
> > +	payload = 0;
> > +	for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
> qg_idx++)
> > +		payload |= (1 << qg_idx);
> > +	for (template_idx = SIG_DL_4G; template_idx <= SIG_DL_4G_LAST;
> > +			template_idx++) {
> > +		address = HWPfQmgrGrpTmplateReg4Indx
> > +				+ BYTES_IN_WORD*template_idx;
> > +		acc100_reg_write(d, address, payload);
> > +		#if RTE_ACC100_SINGLE_FEC == 1
> > +			payload = 0;
> > +		#endif
> > +	}
> > +	/* 5GDL */
> > +	numQqsAcc += numQgs;
> > +	numQgs	= conf->q_dl_5g.num_qgroups;
> > +	payload = 0;
> > +	for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
> qg_idx++)
> > +		payload |= (1 << qg_idx);
> > +	for (template_idx = SIG_DL_5G; template_idx <= SIG_DL_5G_LAST;
> > +			template_idx++) {
> > +		address = HWPfQmgrGrpTmplateReg4Indx
> > +				+ BYTES_IN_WORD*template_idx;
> > +		acc100_reg_write(d, address, payload);
> > +		#if RTE_ACC100_SINGLE_FEC == 1
> > +		payload = 0;
> > +		#endif
> > +	}
> > +
> > +	/* Queue Group Function mapping */
> > +	int qman_func_id[5] = {0, 2, 1, 3, 4};
> > +	address = HWPfQmgrGrpFunction0;
> > +	payload = 0;
> > +	for (qg_idx = 0; qg_idx < 8; qg_idx++) {
> > +		acc = accFromQgid(qg_idx, conf);
> > +		payload |= qman_func_id[acc]<<(qg_idx * 4);
> > +	}
> > +	acc100_reg_write(d, address, payload);
> > +
> > +	/* Configuration of the Arbitration QGroup depth to 1 */
> > +	for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
> > +		address = HWPfQmgrArbQDepthGrp +
> > +		BYTES_IN_WORD * qg_idx;
> > +		payload = 0;
> > +		acc100_reg_write(d, address, payload);
> > +	}
> > +
> > +	/* Enabling AQueues through the Queue hierarchy*/
> > +	for (vf_idx = 0; vf_idx < ACC100_NUM_VFS; vf_idx++) {
> > +		for (qg_idx = 0; qg_idx < ACC100_NUM_QGRPS; qg_idx++) {
> > +			payload = 0;
> > +			if (vf_idx < conf->num_vf_bundles &&
> > +					qg_idx < totalQgs)
> > +				payload = (1 << aqNum(qg_idx, conf)) - 1;
> > +			address = HWPfQmgrAqEnableVf
> > +					+ vf_idx * BYTES_IN_WORD;
> > +			payload += (qg_idx << 16);
> > +			acc100_reg_write(d, address, payload);
> > +		}
> > +	}
> > +
> > +	/* This pointer to ARAM (256kB) is shifted by 2 (4B per register) */
> > +	uint32_t aram_address = 0;
> > +	for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
> > +		for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) {
> > +			address = HWPfQmgrVfBaseAddr + vf_idx
> > +					* BYTES_IN_WORD + qg_idx
> > +					* BYTES_IN_WORD * 64;
> > +			payload = aram_address;
> > +			acc100_reg_write(d, address, payload);
> > +			/* Offset ARAM Address for next memory bank
> > +			 * - increment of 4B
> > +			 */
> > +			aram_address += aqNum(qg_idx, conf) *
> > +					(1 << aqDepth(qg_idx, conf));
> > +		}
> > +	}
> > +
> > +	if (aram_address > WORDS_IN_ARAM_SIZE) {
> > +		rte_bbdev_log(ERR, "ARAM Configuration not fitting %d
> %d\n",
> > +				aram_address, WORDS_IN_ARAM_SIZE);
> > +		return -EINVAL;
> > +	}
> > +
> > +	/* ==== HI Configuration ==== */
> > +
> > +	/* Prevent Block on Transmit Error */
> > +	address = HWPfHiBlockTransmitOnErrorEn;
> > +	payload = 0;
> > +	acc100_reg_write(d, address, payload);
> > +	/* Prevents to drop MSI */
> > +	address = HWPfHiMsiDropEnableReg;
> > +	payload = 0;
> > +	acc100_reg_write(d, address, payload);
> > +	/* Set the PF Mode register */
> > +	address = HWPfHiPfMode;
> > +	payload = (conf->pf_mode_en) ? 2 : 0;
> > +	acc100_reg_write(d, address, payload);
> > +	/* Enable Error Detection in HW */
> > +	address = HWPfDmaErrorDetectionEn;
> > +	payload = 0x3D7;
> > +	acc100_reg_write(d, address, payload);
> > +
> > +	/* QoS overflow init */
> > +	payload = 1;
> > +	address = HWPfQosmonAEvalOverflow0;
> > +	acc100_reg_write(d, address, payload);
> > +	address = HWPfQosmonBEvalOverflow0;
> > +	acc100_reg_write(d, address, payload);
> > +
> > +	/* HARQ DDR Configuration */
> > +	unsigned int ddrSizeInMb = 512; /* Fixed to 512 MB per VF for now
> */
> > +	for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) {
> > +		address = HWPfDmaVfDdrBaseRw + vf_idx
> > +				* 0x10;
> > +		payload = ((vf_idx * (ddrSizeInMb / 64)) << 16) +
> > +				(ddrSizeInMb - 1);
> > +		acc100_reg_write(d, address, payload);
> > +	}
> > +	usleep(LONG_WAIT);
> Is sleep needed here ? the reg_write has one.

This one is needed on top

> > +
> 
> Since this seems like a workaround, add a comment here.

fair enough, ok, thanks

> 
> Tom
> 
> > +	if (numEngines < (SIG_UL_5G_LAST + 1))
> > +		poweron_cleanup(bbdev, d, conf);
> > +
> > +	rte_bbdev_log_debug("PF Tip configuration complete for %s",
> dev_name);
> > +	return 0;
> > +}
> > diff --git 
> > a/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
> > b/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
> > index 4a76d1d..91c234d 100644
> > --- a/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
> > +++ b/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
> > @@ -1,3 +1,10 @@
> >  DPDK_21 {
> >  	local: *;
> >  };
> > +
> > +EXPERIMENTAL {
> > +	global:
> > +
> > +	acc100_configure;
> > +
> > +};
  
Tom Rix Oct. 1, 2020, 4:18 p.m. UTC | #3
On 9/30/20 3:54 PM, Chautru, Nicolas wrote:
> Hi Tom, 
>
>> From: Tom Rix <trix@redhat.com>
>> On 9/28/20 5:29 PM, Nicolas Chautru wrote:
>>> Add configure function to configure the PF from within the 
>>> bbdev-test itself without external application configuration the device.
>>>
>>> Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
>>> Acked-by: Liu Tianjiao <Tianjiao.liu@intel.com>
>>> ---
>>>  app/test-bbdev/test_bbdev_perf.c                   |  72 +++
>>>  doc/guides/rel_notes/release_20_11.rst             |   5 +
>>>  drivers/baseband/acc100/meson.build                |   2 +
>>>  drivers/baseband/acc100/rte_acc100_cfg.h           |  17 +
>>>  drivers/baseband/acc100/rte_acc100_pmd.c           | 505
>> +++++++++++++++++++++
>>>  .../acc100/rte_pmd_bbdev_acc100_version.map        |   7 +
>>>  6 files changed, 608 insertions(+)
>>>
>>> diff --git a/app/test-bbdev/test_bbdev_perf.c
>>> b/app/test-bbdev/test_bbdev_perf.c
>>> index 45c0d62..32f23ff 100644
>>> --- a/app/test-bbdev/test_bbdev_perf.c
>>> +++ b/app/test-bbdev/test_bbdev_perf.c
>>> @@ -52,6 +52,18 @@
>>>  #define FLR_5G_TIMEOUT 610
>>>  #endif
>>>
>>> +#ifdef RTE_LIBRTE_PMD_BBDEV_ACC100
>>> +#include <rte_acc100_cfg.h>
>>> +#define ACC100PF_DRIVER_NAME   ("intel_acc100_pf")
>>> +#define ACC100VF_DRIVER_NAME   ("intel_acc100_vf")
>>> +#define ACC100_QMGR_NUM_AQS 16
>>> +#define ACC100_QMGR_NUM_QGS 2
>>> +#define ACC100_QMGR_AQ_DEPTH 5
>>> +#define ACC100_QMGR_INVALID_IDX -1
>>> +#define ACC100_QMGR_RR 1
>>> +#define ACC100_QOS_GBR 0
>>> +#endif
>>> +
>>>  #define OPS_CACHE_SIZE 256U
>>>  #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
>>>
>>> @@ -653,6 +665,66 @@ typedef int (test_case_function)(struct
>> active_device *ad,
>>>  				info->dev_name);
>>>  	}
>>>  #endif
>>> +#ifdef RTE_LIBRTE_PMD_BBDEV_ACC100
>> seems like this function would break if one of the other bbdev's were 
>> #defined.
> No these are independent. By default they are all defined. 
ok
>
>
>>> +	if ((get_init_device() == true) &&
>>> +		(!strcmp(info->drv.driver_name,
>> ACC100PF_DRIVER_NAME))) {
>>> +		struct acc100_conf conf;
>>> +		unsigned int i;
>>> +
>>> +		printf("Configure ACC100 FEC Driver %s with default
>> values\n",
>>> +				info->drv.driver_name);
>>> +
>>> +		/* clear default configuration before initialization */
>>> +		memset(&conf, 0, sizeof(struct acc100_conf));
>>> +
>>> +		/* Always set in PF mode for built-in configuration */
>>> +		conf.pf_mode_en = true;
>>> +		for (i = 0; i < RTE_ACC100_NUM_VFS; ++i) {
>>> +			conf.arb_dl_4g[i].gbr_threshold1 =
>> ACC100_QOS_GBR;
>>> +			conf.arb_dl_4g[i].gbr_threshold1 =
>> ACC100_QOS_GBR;
>>> +			conf.arb_dl_4g[i].round_robin_weight =
>> ACC100_QMGR_RR;
>>> +			conf.arb_ul_4g[i].gbr_threshold1 =
>> ACC100_QOS_GBR;
>>> +			conf.arb_ul_4g[i].gbr_threshold1 =
>> ACC100_QOS_GBR;
>>> +			conf.arb_ul_4g[i].round_robin_weight =
>> ACC100_QMGR_RR;
>>> +			conf.arb_dl_5g[i].gbr_threshold1 =
>> ACC100_QOS_GBR;
>>> +			conf.arb_dl_5g[i].gbr_threshold1 =
>> ACC100_QOS_GBR;
>>> +			conf.arb_dl_5g[i].round_robin_weight =
>> ACC100_QMGR_RR;
>>> +			conf.arb_ul_5g[i].gbr_threshold1 =
>> ACC100_QOS_GBR;
>>> +			conf.arb_ul_5g[i].gbr_threshold1 =
>> ACC100_QOS_GBR;
>>> +			conf.arb_ul_5g[i].round_robin_weight =
>> ACC100_QMGR_RR;
>>> +		}
>>> +
>>> +		conf.input_pos_llr_1_bit = true;
>>> +		conf.output_pos_llr_1_bit = true;
>>> +		conf.num_vf_bundles = 1; /**< Number of VF bundles to
>> setup */
>>> +
>>> +		conf.q_ul_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
>>> +		conf.q_ul_4g.first_qgroup_index =
>> ACC100_QMGR_INVALID_IDX;
>>> +		conf.q_ul_4g.num_aqs_per_groups =
>> ACC100_QMGR_NUM_AQS;
>>> +		conf.q_ul_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
>>> +		conf.q_dl_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
>>> +		conf.q_dl_4g.first_qgroup_index =
>> ACC100_QMGR_INVALID_IDX;
>>> +		conf.q_dl_4g.num_aqs_per_groups =
>> ACC100_QMGR_NUM_AQS;
>>> +		conf.q_dl_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
>>> +		conf.q_ul_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
>>> +		conf.q_ul_5g.first_qgroup_index =
>> ACC100_QMGR_INVALID_IDX;
>>> +		conf.q_ul_5g.num_aqs_per_groups =
>> ACC100_QMGR_NUM_AQS;
>>> +		conf.q_ul_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
>>> +		conf.q_dl_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
>>> +		conf.q_dl_5g.first_qgroup_index =
>> ACC100_QMGR_INVALID_IDX;
>>> +		conf.q_dl_5g.num_aqs_per_groups =
>> ACC100_QMGR_NUM_AQS;
>>> +		conf.q_dl_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
>>> +
>>> +		/* setup PF with configuration information */
>>> +		ret = acc100_configure(info->dev_name, &conf);
>>> +		TEST_ASSERT_SUCCESS(ret,
>>> +				"Failed to configure ACC100 PF for bbdev
>> %s",
>>> +				info->dev_name);
>>> +		/* Let's refresh this now this is configured */
>>> +	}
>>> +	rte_bbdev_info_get(dev_id, info);
>> The other bbdev's do not call rte_bbdev_info_get, can this be removed ?
> Actually it should be added outside for all versions post-configuraion. Thanks
>
>>> +#endif
>>> +
>>>  	nb_queues = RTE_MIN(rte_lcore_count(), info- drv.max_num_queues);
>>>  	nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
>>>
>>> diff --git a/doc/guides/rel_notes/release_20_11.rst
>>> b/doc/guides/rel_notes/release_20_11.rst
>>> index 73ac08f..c8d0586 100644
>>> --- a/doc/guides/rel_notes/release_20_11.rst
>>> +++ b/doc/guides/rel_notes/release_20_11.rst
>>> @@ -55,6 +55,11 @@ New Features
>>>       Also, make sure to start the actual text at the margin.
>>>       =======================================================
>>>
>>> +* **Added Intel ACC100 bbdev PMD.**
>>> +
>>> +  Added a new ``acc100`` bbdev driver for the Intel\ |reg| ACC100 
>>> + accelerator  also known as Mount Bryce.  See the 
>>> + :doc:`../bbdevs/acc100` BBDEV guide for more details on this new driver.
>>>
>>>  Removed Items
>>>  -------------
>>> diff --git a/drivers/baseband/acc100/meson.build
>>> b/drivers/baseband/acc100/meson.build
>>> index 8afafc2..7ac44dc 100644
>>> --- a/drivers/baseband/acc100/meson.build
>>> +++ b/drivers/baseband/acc100/meson.build
>>> @@ -4,3 +4,5 @@
>>>  deps += ['bbdev', 'bus_vdev', 'ring', 'pci', 'bus_pci']
>>>
>>>  sources = files('rte_acc100_pmd.c')
>>> +
>>> +install_headers('rte_acc100_cfg.h')
>>> diff --git a/drivers/baseband/acc100/rte_acc100_cfg.h
>>> b/drivers/baseband/acc100/rte_acc100_cfg.h
>>> index 73bbe36..7f523bc 100644
>>> --- a/drivers/baseband/acc100/rte_acc100_cfg.h
>>> +++ b/drivers/baseband/acc100/rte_acc100_cfg.h
>>> @@ -89,6 +89,23 @@ struct acc100_conf {
>>>  	struct rte_arbitration_t arb_dl_5g[RTE_ACC100_NUM_VFS];  };
>>>
>>> +/**
>>> + * Configure a ACC100 device
>>> + *
>>> + * @param dev_name
>>> + *   The name of the device. This is the short form of PCI BDF, e.g. 00:01.0.
>>> + *   It can also be retrieved for a bbdev device from the dev_name field in
>> the
>>> + *   rte_bbdev_info structure returned by rte_bbdev_info_get().
>>> + * @param conf
>>> + *   Configuration to apply to ACC100 HW.
>>> + *
>>> + * @return
>>> + *   Zero on success, negative value on failure.
>>> + */
>>> +__rte_experimental
>>> +int
>>> +acc100_configure(const char *dev_name, struct acc100_conf *conf);
>>> +
>>>  #ifdef __cplusplus
>>>  }
>>>  #endif
>>> diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c
>>> b/drivers/baseband/acc100/rte_acc100_pmd.c
>>> index 3589814..b50dd32 100644
>>> --- a/drivers/baseband/acc100/rte_acc100_pmd.c
>>> +++ b/drivers/baseband/acc100/rte_acc100_pmd.c
>>> @@ -85,6 +85,26 @@
>>>
>>>  enum {UL_4G = 0, UL_5G, DL_4G, DL_5G, NUM_ACC};
>>>
>>> +/* Return the accelerator enum for a Queue Group Index */ static 
>>> +inline int accFromQgid(int qg_idx, const struct acc100_conf
>>> +*acc100_conf) {
>>> +	int accQg[ACC100_NUM_QGRPS];
>>> +	int NumQGroupsPerFn[NUM_ACC];
>>> +	int acc, qgIdx, qgIndex = 0;
>>> +	for (qgIdx = 0; qgIdx < ACC100_NUM_QGRPS; qgIdx++)
>>> +		accQg[qgIdx] = 0;
>>> +	NumQGroupsPerFn[UL_4G] = acc100_conf->q_ul_4g.num_qgroups;
>>> +	NumQGroupsPerFn[UL_5G] = acc100_conf->q_ul_5g.num_qgroups;
>>> +	NumQGroupsPerFn[DL_4G] = acc100_conf->q_dl_4g.num_qgroups;
>>> +	NumQGroupsPerFn[DL_5G] = acc100_conf->q_dl_5g.num_qgroups;
>>> +	for (acc = UL_4G;  acc < NUM_ACC; acc++)
>>> +		for (qgIdx = 0; qgIdx < NumQGroupsPerFn[acc]; qgIdx++)
>>> +			accQg[qgIndex++] = acc;
>> This looks inefficient, is there a way this could be calculated 
>> without filling arrays to
>>
>> access 1 value ?
> That is not time critical, and the same common code is run each time. 
ok
>
>>> +	acc = accQg[qg_idx];
>>> +	return acc;
>>> +}
>>> +
>>>  /* Return the queue topology for a Queue Group Index */  static 
>>> inline void  qtopFromAcc(struct rte_q_topology_t **qtop, int 
>>> acc_enum, @@ -113,6 +133,30 @@
>>>  	*qtop = p_qtop;
>>>  }
>>>
>>> +/* Return the AQ depth for a Queue Group Index */ static inline int 
>>> +aqDepth(int qg_idx, struct acc100_conf *acc100_conf) {
>>> +	struct rte_q_topology_t *q_top = NULL;
>>> +	int acc_enum = accFromQgid(qg_idx, acc100_conf);
>>> +	qtopFromAcc(&q_top, acc_enum, acc100_conf);
>>> +	if (unlikely(q_top == NULL))
>>> +		return 0;
>> This error is not handled well be the callers.
>>
>> aqNum is similar.
> This fails in a consistent basis, by having not queue available and handling this as the default case.
ok
>
>>> +	return q_top->aq_depth_log2;
>>> +}
>>> +
>>> +/* Return the AQ depth for a Queue Group Index */ static inline int 
>>> +aqNum(int qg_idx, struct acc100_conf *acc100_conf) {
>>> +	struct rte_q_topology_t *q_top = NULL;
>>> +	int acc_enum = accFromQgid(qg_idx, acc100_conf);
>>> +	qtopFromAcc(&q_top, acc_enum, acc100_conf);
>>> +	if (unlikely(q_top == NULL))
>>> +		return 0;
>>> +	return q_top->num_aqs_per_groups;
>>> +}
>>> +
>>>  static void
>>>  initQTop(struct acc100_conf *acc100_conf)  { @@ -4177,3 +4221,464 
>>> @@ static int acc100_pci_remove(struct rte_pci_device *pci_dev) 
>>> RTE_PMD_REGISTER_PCI_TABLE(ACC100PF_DRIVER_NAME,
>>> pci_id_acc100_pf_map);
>> RTE_PMD_REGISTER_PCI(ACC100VF_DRIVER_NAME,
>>> acc100_pci_vf_driver);
>>> RTE_PMD_REGISTER_PCI_TABLE(ACC100VF_DRIVER_NAME,
>>> pci_id_acc100_vf_map);
>>> +
>>> +/*
>>> + * Implementation to fix the power on status of some 5GUL engines
>>> + * This requires DMA permission if ported outside DPDK
>> This sounds like a workaround, can more detail be added here ?
> There are comments through the code I believe:
>   - /* Detect engines in undefined state */
>   - /* Force each engine which is in unspecified state */
>   - /* Reset LDPC Cores */
>   - /* Check engine power-on status again */ Do you believe this is not explicit enough. Power-on status may be in an undefined state hence this engine are avtivate with dummy payload to make sure they are in a predicable state once configuration is done. 

Yes, not explicit enough. They do not say it is a workaround so someone else would not know that

this is needed or is likely needs adjusting in the future.  Maybe change

/* Check engine power-on status again */ to

/*

 * Power-on status may be in an undefined state.

 * Active this engine with a dummy payload to make sure the state is defined.

 */ 

Tom

>>> + */
>>> +static void
>>> +poweron_cleanup(struct rte_bbdev *bbdev, struct acc100_device *d,
>>> +		struct acc100_conf *conf)
>>> +{
>>> +	int i, template_idx, qg_idx;
>>> +	uint32_t address, status, payload;
>>> +	printf("Need to clear power-on 5GUL status in internal memory\n");
>>> +	/* Reset LDPC Cores */
>>> +	for (i = 0; i < ACC100_ENGINES_MAX; i++)
>>> +		acc100_reg_write(d, HWPfFecUl5gCntrlReg +
>>> +				ACC100_ENGINE_OFFSET * i,
>> ACC100_RESET_HI);
>>> +	usleep(LONG_WAIT);
>>> +	for (i = 0; i < ACC100_ENGINES_MAX; i++)
>>> +		acc100_reg_write(d, HWPfFecUl5gCntrlReg +
>>> +				ACC100_ENGINE_OFFSET * i,
>> ACC100_RESET_LO);
>>> +	usleep(LONG_WAIT);
>>> +	/* Prepare dummy workload */
>>> +	alloc_2x64mb_sw_rings_mem(bbdev, d, 0);
>>> +	/* Set base addresses */
>>> +	uint32_t phys_high = (uint32_t)(d->sw_rings_phys >> 32);
>>> +	uint32_t phys_low  = (uint32_t)(d->sw_rings_phys &
>>> +			~(ACC100_SIZE_64MBYTE-1));
>>> +	acc100_reg_write(d, HWPfDmaFec5GulDescBaseHiRegVf,
>> phys_high);
>>> +	acc100_reg_write(d, HWPfDmaFec5GulDescBaseLoRegVf, phys_low);
>>> +
>>> +	/* Descriptor for a dummy 5GUL code block processing*/
>>> +	union acc100_dma_desc *desc = NULL;
>>> +	desc = d->sw_rings;
>>> +	desc->req.data_ptrs[0].address = d->sw_rings_phys +
>>> +			ACC100_DESC_FCW_OFFSET;
>>> +	desc->req.data_ptrs[0].blen = ACC100_FCW_LD_BLEN;
>>> +	desc->req.data_ptrs[0].blkid = ACC100_DMA_BLKID_FCW;
>>> +	desc->req.data_ptrs[0].last = 0;
>>> +	desc->req.data_ptrs[0].dma_ext = 0;
>>> +	desc->req.data_ptrs[1].address = d->sw_rings_phys + 512;
>>> +	desc->req.data_ptrs[1].blkid = ACC100_DMA_BLKID_IN;
>>> +	desc->req.data_ptrs[1].last = 1;
>>> +	desc->req.data_ptrs[1].dma_ext = 0;
>>> +	desc->req.data_ptrs[1].blen = 44;
>>> +	desc->req.data_ptrs[2].address = d->sw_rings_phys + 1024;
>>> +	desc->req.data_ptrs[2].blkid = ACC100_DMA_BLKID_OUT_ENC;
>>> +	desc->req.data_ptrs[2].last = 1;
>>> +	desc->req.data_ptrs[2].dma_ext = 0;
>>> +	desc->req.data_ptrs[2].blen = 5;
>>> +	/* Dummy FCW */
>>> +	desc->req.fcw_ld.FCWversion = ACC100_FCW_VER;
>>> +	desc->req.fcw_ld.qm = 1;
>>> +	desc->req.fcw_ld.nfiller = 30;
>>> +	desc->req.fcw_ld.BG = 2 - 1;
>>> +	desc->req.fcw_ld.Zc = 7;
>>> +	desc->req.fcw_ld.ncb = 350;
>>> +	desc->req.fcw_ld.rm_e = 4;
>>> +	desc->req.fcw_ld.itmax = 10;
>>> +	desc->req.fcw_ld.gain_i = 1;
>>> +	desc->req.fcw_ld.gain_h = 1;
>>> +
>>> +	int engines_to_restart[SIG_UL_5G_LAST + 1] = {0};
>>> +	int num_failed_engine = 0;
>>> +	/* Detect engines in undefined state */
>>> +	for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
>>> +			template_idx++) {
>>> +		/* Check engine power-on status */
>>> +		address = HwPfFecUl5gIbDebugReg +
>>> +				ACC100_ENGINE_OFFSET * template_idx;
>>> +		status = (acc100_reg_read(d, address) >> 4) & 0xF;
>>> +		if (status == 0) {
>>> +			engines_to_restart[num_failed_engine] =
>> template_idx;
>>> +			num_failed_engine++;
>>> +		}
>>> +	}
>>> +
>>> +	int numQqsAcc = conf->q_ul_5g.num_qgroups;
>>> +	int numQgs = conf->q_ul_5g.num_qgroups;
>>> +	payload = 0;
>>> +	for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
>> qg_idx++)
>>> +		payload |= (1 << qg_idx);
>>> +	/* Force each engine which is in unspecified state */
>>> +	for (i = 0; i < num_failed_engine; i++) {
>>> +		int failed_engine = engines_to_restart[i];
>>> +		printf("Force engine %d\n", failed_engine);
>>> +		for (template_idx = SIG_UL_5G; template_idx <=
>> SIG_UL_5G_LAST;
>>> +				template_idx++) {
>>> +			address = HWPfQmgrGrpTmplateReg4Indx
>>> +					+ BYTES_IN_WORD * template_idx;
>>> +			if (template_idx == failed_engine)
>>> +				acc100_reg_write(d, address, payload);
>>> +			else
>>> +				acc100_reg_write(d, address, 0);
>>> +		}
>>> +		/* Reset descriptor header */
>>> +		desc->req.word0 = ACC100_DMA_DESC_TYPE;
>>> +		desc->req.word1 = 0;
>>> +		desc->req.word2 = 0;
>>> +		desc->req.word3 = 0;
>>> +		desc->req.numCBs = 1;
>>> +		desc->req.m2dlen = 2;
>>> +		desc->req.d2mlen = 1;
>>> +		/* Enqueue the code block for processing */
>>> +		union acc100_enqueue_reg_fmt enq_req;
>>> +		enq_req.val = 0;
>>> +		enq_req.addr_offset = ACC100_DESC_OFFSET;
>>> +		enq_req.num_elem = 1;
>>> +		enq_req.req_elem_addr = 0;
>>> +		rte_wmb();
>>> +		acc100_reg_write(d, HWPfQmgrIngressAq + 0x100,
>> enq_req.val);
>>> +		usleep(LONG_WAIT * 100);
>>> +		if (desc->req.word0 != 2)
>>> +			printf("DMA Response %#"PRIx32"\n", desc-
>>> req.word0);
>>> +	}
>>> +
>>> +	/* Reset LDPC Cores */
>>> +	for (i = 0; i < ACC100_ENGINES_MAX; i++)
>>> +		acc100_reg_write(d, HWPfFecUl5gCntrlReg +
>>> +				ACC100_ENGINE_OFFSET * i,
>> ACC100_RESET_HI);
>>> +	usleep(LONG_WAIT);
>>> +	for (i = 0; i < ACC100_ENGINES_MAX; i++)
>>> +		acc100_reg_write(d, HWPfFecUl5gCntrlReg +
>>> +				ACC100_ENGINE_OFFSET * i,
>> ACC100_RESET_LO);
>>> +	usleep(LONG_WAIT);
>>> +	acc100_reg_write(d, HWPfHi5GHardResetReg,
>> ACC100_RESET_HARD);
>>> +	usleep(LONG_WAIT);
>>> +	int numEngines = 0;
>>> +	/* Check engine power-on status again */
>>> +	for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
>>> +			template_idx++) {
>>> +		address = HwPfFecUl5gIbDebugReg +
>>> +				ACC100_ENGINE_OFFSET * template_idx;
>>> +		status = (acc100_reg_read(d, address) >> 4) & 0xF;
>>> +		address = HWPfQmgrGrpTmplateReg4Indx
>>> +				+ BYTES_IN_WORD * template_idx;
>>> +		if (status == 1) {
>>> +			acc100_reg_write(d, address, payload);
>>> +			numEngines++;
>>> +		} else
>>> +			acc100_reg_write(d, address, 0);
>>> +	}
>>> +	printf("Number of 5GUL engines %d\n", numEngines);
>>> +
>>> +	if (d->sw_rings_base != NULL)
>>> +		rte_free(d->sw_rings_base);
>>> +	usleep(LONG_WAIT);
>>> +}
>>> +
>>> +/* Initial configuration of a ACC100 device prior to running
>>> +configure() */ int acc100_configure(const char *dev_name, struct 
>>> +acc100_conf *conf) {
>>> +	rte_bbdev_log(INFO, "acc100_configure");
>>> +	uint32_t payload, address, status;
>> maybe value or data would be a better variable name than payload.
>>
>> would mean changing acc100_reg_write
> transparent to me, but can change given DPDK uses term value. 
>
>
>>> +	int qg_idx, template_idx, vf_idx, acc, i;
>>> +	struct rte_bbdev *bbdev = rte_bbdev_get_named_dev(dev_name);
>>> +
>>> +	/* Compile time checks */
>>> +	RTE_BUILD_BUG_ON(sizeof(struct acc100_dma_req_desc) != 256);
>>> +	RTE_BUILD_BUG_ON(sizeof(union acc100_dma_desc) != 256);
>>> +	RTE_BUILD_BUG_ON(sizeof(struct acc100_fcw_td) != 24);
>>> +	RTE_BUILD_BUG_ON(sizeof(struct acc100_fcw_te) != 32);
>>> +
>>> +	if (bbdev == NULL) {
>>> +		rte_bbdev_log(ERR,
>>> +		"Invalid dev_name (%s), or device is not yet initialised",
>>> +		dev_name);
>>> +		return -ENODEV;
>>> +	}
>>> +	struct acc100_device *d = bbdev->data->dev_private;
>>> +
>>> +	/* Store configuration */
>>> +	rte_memcpy(&d->acc100_conf, conf, sizeof(d->acc100_conf));
>>> +
>>> +	/* PCIe Bridge configuration */
>>> +	acc100_reg_write(d, HwPfPcieGpexBridgeControl,
>> ACC100_CFG_PCI_BRIDGE);
>>> +	for (i = 1; i < 17; i++)
>> 17 is a magic number, use a #define
>>
>> this is a general issue.
> These are only used once but still agreed.
>
>>> +		acc100_reg_write(d,
>>> +
>> 	HwPfPcieGpexAxiAddrMappingWindowPexBaseHigh
>>> +				+ i * 16, 0);
>>> +
>>> +	/* PCIe Link Trainiing and Status State Machine */
>>> +	acc100_reg_write(d, HwPfPcieGpexLtssmStateCntrl, 0xDFC00000);
>>> +
>>> +	/* Prevent blocking AXI read on BRESP for AXI Write */
>>> +	address = HwPfPcieGpexAxiPioControl;
>>> +	payload = ACC100_CFG_PCI_AXI;
>>> +	acc100_reg_write(d, address, payload);
>>> +
>>> +	/* 5GDL PLL phase shift */
>>> +	acc100_reg_write(d, HWPfChaDl5gPllPhshft0, 0x1);
>>> +
>>> +	/* Explicitly releasing AXI as this may be stopped after PF FLR/BME */
>>> +	address = HWPfDmaAxiControl;
>>> +	payload = 1;
>>> +	acc100_reg_write(d, address, payload);
>>> +
>>> +	/* DDR Configuration */
>>> +	address = HWPfDdrBcTim6;
>>> +	payload = acc100_reg_read(d, address);
>>> +	payload &= 0xFFFFFFFB; /* Bit 2 */ #ifdef ACC100_DDR_ECC_ENABLE
>>> +	payload |= 0x4;
>>> +#endif
>>> +	acc100_reg_write(d, address, payload);
>>> +	address = HWPfDdrPhyDqsCountNum;
>>> +#ifdef ACC100_DDR_ECC_ENABLE
>>> +	payload = 9;
>>> +#else
>>> +	payload = 8;
>>> +#endif
>>> +	acc100_reg_write(d, address, payload);
>>> +
>>> +	/* Set default descriptor signature */
>>> +	address = HWPfDmaDescriptorSignatuture;
>>> +	payload = 0;
>>> +	acc100_reg_write(d, address, payload);
>>> +
>>> +	/* Enable the Error Detection in DMA */
>>> +	payload = ACC100_CFG_DMA_ERROR;
>>> +	address = HWPfDmaErrorDetectionEn;
>>> +	acc100_reg_write(d, address, payload);
>>> +
>>> +	/* AXI Cache configuration */
>>> +	payload = ACC100_CFG_AXI_CACHE;
>>> +	address = HWPfDmaAxcacheReg;
>>> +	acc100_reg_write(d, address, payload);
>>> +
>>> +	/* Default DMA Configuration (Qmgr Enabled) */
>>> +	address = HWPfDmaConfig0Reg;
>>> +	payload = 0;
>>> +	acc100_reg_write(d, address, payload);
>>> +	address = HWPfDmaQmanen;
>>> +	payload = 0;
>>> +	acc100_reg_write(d, address, payload);
>>> +
>>> +	/* Default RLIM/ALEN configuration */
>>> +	address = HWPfDmaConfig1Reg;
>>> +	payload = (1 << 31) + (23 << 8) + (1 << 6) + 7;
>>> +	acc100_reg_write(d, address, payload);
>>> +
>>> +	/* Configure DMA Qmanager addresses */
>>> +	address = HWPfDmaQmgrAddrReg;
>>> +	payload = HWPfQmgrEgressQueuesTemplate;
>>> +	acc100_reg_write(d, address, payload);
>>> +
>>> +	/* ===== Qmgr Configuration ===== */
>>> +	/* Configuration of the AQueue Depth QMGR_GRP_0_DEPTH_LOG2
>> for UL */
>>> +	int totalQgs = conf->q_ul_4g.num_qgroups +
>>> +			conf->q_ul_5g.num_qgroups +
>>> +			conf->q_dl_4g.num_qgroups +
>>> +			conf->q_dl_5g.num_qgroups;
>>> +	for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
>>> +		address = HWPfQmgrDepthLog2Grp +
>>> +		BYTES_IN_WORD * qg_idx;
>>> +		payload = aqDepth(qg_idx, conf);
>>> +		acc100_reg_write(d, address, payload);
>>> +		address = HWPfQmgrTholdGrp +
>>> +		BYTES_IN_WORD * qg_idx;
>>> +		payload = (1 << 16) + (1 << (aqDepth(qg_idx, conf) - 1));
>>> +		acc100_reg_write(d, address, payload);
>>> +	}
>>> +
>>> +	/* Template Priority in incremental order */
>>> +	for (template_idx = 0; template_idx < ACC100_NUM_TMPL;
>>> +			template_idx++) {
>>> +		address = HWPfQmgrGrpTmplateReg0Indx +
>>> +		BYTES_IN_WORD * (template_idx % 8);
>>> +		payload = TMPL_PRI_0;
>>> +		acc100_reg_write(d, address, payload);
>>> +		address = HWPfQmgrGrpTmplateReg1Indx +
>>> +		BYTES_IN_WORD * (template_idx % 8);
>>> +		payload = TMPL_PRI_1;
>>> +		acc100_reg_write(d, address, payload);
>>> +		address = HWPfQmgrGrpTmplateReg2indx +
>>> +		BYTES_IN_WORD * (template_idx % 8);
>>> +		payload = TMPL_PRI_2;
>>> +		acc100_reg_write(d, address, payload);
>>> +		address = HWPfQmgrGrpTmplateReg3Indx +
>>> +		BYTES_IN_WORD * (template_idx % 8);
>>> +		payload = TMPL_PRI_3;
>>> +		acc100_reg_write(d, address, payload);
>>> +	}
>>> +
>>> +	address = HWPfQmgrGrpPriority;
>>> +	payload = ACC100_CFG_QMGR_HI_P;
>>> +	acc100_reg_write(d, address, payload);
>>> +
>>> +	/* Template Configuration */
>>> +	for (template_idx = 0; template_idx < ACC100_NUM_TMPL;
>> template_idx++) {
>>> +		payload = 0;
>>> +		address = HWPfQmgrGrpTmplateReg4Indx
>>> +				+ BYTES_IN_WORD * template_idx;
>>> +		acc100_reg_write(d, address, payload);
>>> +	}
>>> +	/* 4GUL */
>>> +	int numQgs = conf->q_ul_4g.num_qgroups;
>>> +	int numQqsAcc = 0;
>>> +	payload = 0;
>>> +	for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
>> qg_idx++)
>>> +		payload |= (1 << qg_idx);
>>> +	for (template_idx = SIG_UL_4G; template_idx <= SIG_UL_4G_LAST;
>>> +			template_idx++) {
>>> +		address = HWPfQmgrGrpTmplateReg4Indx
>>> +				+ BYTES_IN_WORD*template_idx;
>>> +		acc100_reg_write(d, address, payload);
>>> +	}
>>> +	/* 5GUL */
>>> +	numQqsAcc += numQgs;
>>> +	numQgs	= conf->q_ul_5g.num_qgroups;
>>> +	payload = 0;
>>> +	int numEngines = 0;
>>> +	for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
>> qg_idx++)
>>> +		payload |= (1 << qg_idx);
>>> +	for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
>>> +			template_idx++) {
>>> +		/* Check engine power-on status */
>>> +		address = HwPfFecUl5gIbDebugReg +
>>> +				ACC100_ENGINE_OFFSET * template_idx;
>>> +		status = (acc100_reg_read(d, address) >> 4) & 0xF;
>>> +		address = HWPfQmgrGrpTmplateReg4Indx
>>> +				+ BYTES_IN_WORD * template_idx;
>>> +		if (status == 1) {
>>> +			acc100_reg_write(d, address, payload);
>>> +			numEngines++;
>>> +		} else
>>> +			acc100_reg_write(d, address, 0);
>>> +		#if RTE_ACC100_SINGLE_FEC == 1
>> #if should be at start of line
> ok
>
>>> +		payload = 0;
>>> +		#endif
>>> +	}
>>> +	printf("Number of 5GUL engines %d\n", numEngines);
>>> +	/* 4GDL */
>>> +	numQqsAcc += numQgs;
>>> +	numQgs	= conf->q_dl_4g.num_qgroups;
>>> +	payload = 0;
>>> +	for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
>> qg_idx++)
>>> +		payload |= (1 << qg_idx);
>>> +	for (template_idx = SIG_DL_4G; template_idx <= SIG_DL_4G_LAST;
>>> +			template_idx++) {
>>> +		address = HWPfQmgrGrpTmplateReg4Indx
>>> +				+ BYTES_IN_WORD*template_idx;
>>> +		acc100_reg_write(d, address, payload);
>>> +		#if RTE_ACC100_SINGLE_FEC == 1
>>> +			payload = 0;
>>> +		#endif
>>> +	}
>>> +	/* 5GDL */
>>> +	numQqsAcc += numQgs;
>>> +	numQgs	= conf->q_dl_5g.num_qgroups;
>>> +	payload = 0;
>>> +	for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
>> qg_idx++)
>>> +		payload |= (1 << qg_idx);
>>> +	for (template_idx = SIG_DL_5G; template_idx <= SIG_DL_5G_LAST;
>>> +			template_idx++) {
>>> +		address = HWPfQmgrGrpTmplateReg4Indx
>>> +				+ BYTES_IN_WORD*template_idx;
>>> +		acc100_reg_write(d, address, payload);
>>> +		#if RTE_ACC100_SINGLE_FEC == 1
>>> +		payload = 0;
>>> +		#endif
>>> +	}
>>> +
>>> +	/* Queue Group Function mapping */
>>> +	int qman_func_id[5] = {0, 2, 1, 3, 4};
>>> +	address = HWPfQmgrGrpFunction0;
>>> +	payload = 0;
>>> +	for (qg_idx = 0; qg_idx < 8; qg_idx++) {
>>> +		acc = accFromQgid(qg_idx, conf);
>>> +		payload |= qman_func_id[acc]<<(qg_idx * 4);
>>> +	}
>>> +	acc100_reg_write(d, address, payload);
>>> +
>>> +	/* Configuration of the Arbitration QGroup depth to 1 */
>>> +	for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
>>> +		address = HWPfQmgrArbQDepthGrp +
>>> +		BYTES_IN_WORD * qg_idx;
>>> +		payload = 0;
>>> +		acc100_reg_write(d, address, payload);
>>> +	}
>>> +
>>> +	/* Enabling AQueues through the Queue hierarchy*/
>>> +	for (vf_idx = 0; vf_idx < ACC100_NUM_VFS; vf_idx++) {
>>> +		for (qg_idx = 0; qg_idx < ACC100_NUM_QGRPS; qg_idx++) {
>>> +			payload = 0;
>>> +			if (vf_idx < conf->num_vf_bundles &&
>>> +					qg_idx < totalQgs)
>>> +				payload = (1 << aqNum(qg_idx, conf)) - 1;
>>> +			address = HWPfQmgrAqEnableVf
>>> +					+ vf_idx * BYTES_IN_WORD;
>>> +			payload += (qg_idx << 16);
>>> +			acc100_reg_write(d, address, payload);
>>> +		}
>>> +	}
>>> +
>>> +	/* This pointer to ARAM (256kB) is shifted by 2 (4B per register) */
>>> +	uint32_t aram_address = 0;
>>> +	for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
>>> +		for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) {
>>> +			address = HWPfQmgrVfBaseAddr + vf_idx
>>> +					* BYTES_IN_WORD + qg_idx
>>> +					* BYTES_IN_WORD * 64;
>>> +			payload = aram_address;
>>> +			acc100_reg_write(d, address, payload);
>>> +			/* Offset ARAM Address for next memory bank
>>> +			 * - increment of 4B
>>> +			 */
>>> +			aram_address += aqNum(qg_idx, conf) *
>>> +					(1 << aqDepth(qg_idx, conf));
>>> +		}
>>> +	}
>>> +
>>> +	if (aram_address > WORDS_IN_ARAM_SIZE) {
>>> +		rte_bbdev_log(ERR, "ARAM Configuration not fitting %d
>> %d\n",
>>> +				aram_address, WORDS_IN_ARAM_SIZE);
>>> +		return -EINVAL;
>>> +	}
>>> +
>>> +	/* ==== HI Configuration ==== */
>>> +
>>> +	/* Prevent Block on Transmit Error */
>>> +	address = HWPfHiBlockTransmitOnErrorEn;
>>> +	payload = 0;
>>> +	acc100_reg_write(d, address, payload);
>>> +	/* Prevents to drop MSI */
>>> +	address = HWPfHiMsiDropEnableReg;
>>> +	payload = 0;
>>> +	acc100_reg_write(d, address, payload);
>>> +	/* Set the PF Mode register */
>>> +	address = HWPfHiPfMode;
>>> +	payload = (conf->pf_mode_en) ? 2 : 0;
>>> +	acc100_reg_write(d, address, payload);
>>> +	/* Enable Error Detection in HW */
>>> +	address = HWPfDmaErrorDetectionEn;
>>> +	payload = 0x3D7;
>>> +	acc100_reg_write(d, address, payload);
>>> +
>>> +	/* QoS overflow init */
>>> +	payload = 1;
>>> +	address = HWPfQosmonAEvalOverflow0;
>>> +	acc100_reg_write(d, address, payload);
>>> +	address = HWPfQosmonBEvalOverflow0;
>>> +	acc100_reg_write(d, address, payload);
>>> +
>>> +	/* HARQ DDR Configuration */
>>> +	unsigned int ddrSizeInMb = 512; /* Fixed to 512 MB per VF for now
>> */
>>> +	for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) {
>>> +		address = HWPfDmaVfDdrBaseRw + vf_idx
>>> +				* 0x10;
>>> +		payload = ((vf_idx * (ddrSizeInMb / 64)) << 16) +
>>> +				(ddrSizeInMb - 1);
>>> +		acc100_reg_write(d, address, payload);
>>> +	}
>>> +	usleep(LONG_WAIT);
>> Is sleep needed here ? the reg_write has one.
> This one is needed on top
>
>>> +
>> Since this seems like a workaround, add a comment here.
> fair enough, ok, thanks
>
>> Tom
>>
>>> +	if (numEngines < (SIG_UL_5G_LAST + 1))
>>> +		poweron_cleanup(bbdev, d, conf);
>>> +
>>> +	rte_bbdev_log_debug("PF Tip configuration complete for %s",
>> dev_name);
>>> +	return 0;
>>> +}
>>> diff --git 
>>> a/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
>>> b/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
>>> index 4a76d1d..91c234d 100644
>>> --- a/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
>>> +++ b/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
>>> @@ -1,3 +1,10 @@
>>>  DPDK_21 {
>>>  	local: *;
>>>  };
>>> +
>>> +EXPERIMENTAL {
>>> +	global:
>>> +
>>> +	acc100_configure;
>>> +
>>> +};
  
Chautru, Nicolas Oct. 1, 2020, 9:11 p.m. UTC | #4
Hi Tom, 

> From: Tom Rix <trix@redhat.com>
> On 9/30/20 3:54 PM, Chautru, Nicolas wrote:
> > Hi Tom,
> >
> >> From: Tom Rix <trix@redhat.com>
> >> On 9/28/20 5:29 PM, Nicolas Chautru wrote:
> >>> Add configure function to configure the PF from within the
> >>> bbdev-test itself without external application configuration the device.
> >>>
> >>> Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
> >>> Acked-by: Liu Tianjiao <Tianjiao.liu@intel.com>
> >>> ---
> >>>  app/test-bbdev/test_bbdev_perf.c                   |  72 +++
> >>>  doc/guides/rel_notes/release_20_11.rst             |   5 +
> >>>  drivers/baseband/acc100/meson.build                |   2 +
> >>>  drivers/baseband/acc100/rte_acc100_cfg.h           |  17 +
> >>>  drivers/baseband/acc100/rte_acc100_pmd.c           | 505
> >> +++++++++++++++++++++
> >>>  .../acc100/rte_pmd_bbdev_acc100_version.map        |   7 +
> >>>  6 files changed, 608 insertions(+)
> >>>
> >>> diff --git a/app/test-bbdev/test_bbdev_perf.c
> >>> b/app/test-bbdev/test_bbdev_perf.c
> >>> index 45c0d62..32f23ff 100644
> >>> --- a/app/test-bbdev/test_bbdev_perf.c
> >>> +++ b/app/test-bbdev/test_bbdev_perf.c
> >>> @@ -52,6 +52,18 @@
> >>>  #define FLR_5G_TIMEOUT 610
> >>>  #endif
> >>>
> >>> +#ifdef RTE_LIBRTE_PMD_BBDEV_ACC100
> >>> +#include <rte_acc100_cfg.h>
> >>> +#define ACC100PF_DRIVER_NAME   ("intel_acc100_pf")
> >>> +#define ACC100VF_DRIVER_NAME   ("intel_acc100_vf")
> >>> +#define ACC100_QMGR_NUM_AQS 16
> >>> +#define ACC100_QMGR_NUM_QGS 2
> >>> +#define ACC100_QMGR_AQ_DEPTH 5
> >>> +#define ACC100_QMGR_INVALID_IDX -1
> >>> +#define ACC100_QMGR_RR 1
> >>> +#define ACC100_QOS_GBR 0
> >>> +#endif
> >>> +
> >>>  #define OPS_CACHE_SIZE 256U
> >>>  #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
> >>>
> >>> @@ -653,6 +665,66 @@ typedef int (test_case_function)(struct
> >> active_device *ad,
> >>>  				info->dev_name);
> >>>  	}
> >>>  #endif
> >>> +#ifdef RTE_LIBRTE_PMD_BBDEV_ACC100
> >> seems like this function would break if one of the other bbdev's were
> >> #defined.
> > No these are independent. By default they are all defined.
> ok
> >
> >
> >>> +	if ((get_init_device() == true) &&
> >>> +		(!strcmp(info->drv.driver_name,
> >> ACC100PF_DRIVER_NAME))) {
> >>> +		struct acc100_conf conf;
> >>> +		unsigned int i;
> >>> +
> >>> +		printf("Configure ACC100 FEC Driver %s with default
> >> values\n",
> >>> +				info->drv.driver_name);
> >>> +
> >>> +		/* clear default configuration before initialization */
> >>> +		memset(&conf, 0, sizeof(struct acc100_conf));
> >>> +
> >>> +		/* Always set in PF mode for built-in configuration */
> >>> +		conf.pf_mode_en = true;
> >>> +		for (i = 0; i < RTE_ACC100_NUM_VFS; ++i) {
> >>> +			conf.arb_dl_4g[i].gbr_threshold1 =
> >> ACC100_QOS_GBR;
> >>> +			conf.arb_dl_4g[i].gbr_threshold1 =
> >> ACC100_QOS_GBR;
> >>> +			conf.arb_dl_4g[i].round_robin_weight =
> >> ACC100_QMGR_RR;
> >>> +			conf.arb_ul_4g[i].gbr_threshold1 =
> >> ACC100_QOS_GBR;
> >>> +			conf.arb_ul_4g[i].gbr_threshold1 =
> >> ACC100_QOS_GBR;
> >>> +			conf.arb_ul_4g[i].round_robin_weight =
> >> ACC100_QMGR_RR;
> >>> +			conf.arb_dl_5g[i].gbr_threshold1 =
> >> ACC100_QOS_GBR;
> >>> +			conf.arb_dl_5g[i].gbr_threshold1 =
> >> ACC100_QOS_GBR;
> >>> +			conf.arb_dl_5g[i].round_robin_weight =
> >> ACC100_QMGR_RR;
> >>> +			conf.arb_ul_5g[i].gbr_threshold1 =
> >> ACC100_QOS_GBR;
> >>> +			conf.arb_ul_5g[i].gbr_threshold1 =
> >> ACC100_QOS_GBR;
> >>> +			conf.arb_ul_5g[i].round_robin_weight =
> >> ACC100_QMGR_RR;
> >>> +		}
> >>> +
> >>> +		conf.input_pos_llr_1_bit = true;
> >>> +		conf.output_pos_llr_1_bit = true;
> >>> +		conf.num_vf_bundles = 1; /**< Number of VF bundles to
> >> setup */
> >>> +
> >>> +		conf.q_ul_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
> >>> +		conf.q_ul_4g.first_qgroup_index =
> >> ACC100_QMGR_INVALID_IDX;
> >>> +		conf.q_ul_4g.num_aqs_per_groups =
> >> ACC100_QMGR_NUM_AQS;
> >>> +		conf.q_ul_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
> >>> +		conf.q_dl_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
> >>> +		conf.q_dl_4g.first_qgroup_index =
> >> ACC100_QMGR_INVALID_IDX;
> >>> +		conf.q_dl_4g.num_aqs_per_groups =
> >> ACC100_QMGR_NUM_AQS;
> >>> +		conf.q_dl_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
> >>> +		conf.q_ul_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
> >>> +		conf.q_ul_5g.first_qgroup_index =
> >> ACC100_QMGR_INVALID_IDX;
> >>> +		conf.q_ul_5g.num_aqs_per_groups =
> >> ACC100_QMGR_NUM_AQS;
> >>> +		conf.q_ul_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
> >>> +		conf.q_dl_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
> >>> +		conf.q_dl_5g.first_qgroup_index =
> >> ACC100_QMGR_INVALID_IDX;
> >>> +		conf.q_dl_5g.num_aqs_per_groups =
> >> ACC100_QMGR_NUM_AQS;
> >>> +		conf.q_dl_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
> >>> +
> >>> +		/* setup PF with configuration information */
> >>> +		ret = acc100_configure(info->dev_name, &conf);
> >>> +		TEST_ASSERT_SUCCESS(ret,
> >>> +				"Failed to configure ACC100 PF for bbdev
> >> %s",
> >>> +				info->dev_name);
> >>> +		/* Let's refresh this now this is configured */
> >>> +	}
> >>> +	rte_bbdev_info_get(dev_id, info);
> >> The other bbdev's do not call rte_bbdev_info_get, can this be removed ?
> > Actually it should be added outside for all versions
> > post-configuraion. Thanks
> >
> >>> +#endif
> >>> +
> >>>  	nb_queues = RTE_MIN(rte_lcore_count(), info-
> drv.max_num_queues);
> >>>  	nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
> >>>
> >>> diff --git a/doc/guides/rel_notes/release_20_11.rst
> >>> b/doc/guides/rel_notes/release_20_11.rst
> >>> index 73ac08f..c8d0586 100644
> >>> --- a/doc/guides/rel_notes/release_20_11.rst
> >>> +++ b/doc/guides/rel_notes/release_20_11.rst
> >>> @@ -55,6 +55,11 @@ New Features
> >>>       Also, make sure to start the actual text at the margin.
> >>>       =======================================================
> >>>
> >>> +* **Added Intel ACC100 bbdev PMD.**
> >>> +
> >>> +  Added a new ``acc100`` bbdev driver for the Intel\ |reg| ACC100
> >>> + accelerator  also known as Mount Bryce.  See the
> >>> + :doc:`../bbdevs/acc100` BBDEV guide for more details on this new
> driver.
> >>>
> >>>  Removed Items
> >>>  -------------
> >>> diff --git a/drivers/baseband/acc100/meson.build
> >>> b/drivers/baseband/acc100/meson.build
> >>> index 8afafc2..7ac44dc 100644
> >>> --- a/drivers/baseband/acc100/meson.build
> >>> +++ b/drivers/baseband/acc100/meson.build
> >>> @@ -4,3 +4,5 @@
> >>>  deps += ['bbdev', 'bus_vdev', 'ring', 'pci', 'bus_pci']
> >>>
> >>>  sources = files('rte_acc100_pmd.c')
> >>> +
> >>> +install_headers('rte_acc100_cfg.h')
> >>> diff --git a/drivers/baseband/acc100/rte_acc100_cfg.h
> >>> b/drivers/baseband/acc100/rte_acc100_cfg.h
> >>> index 73bbe36..7f523bc 100644
> >>> --- a/drivers/baseband/acc100/rte_acc100_cfg.h
> >>> +++ b/drivers/baseband/acc100/rte_acc100_cfg.h
> >>> @@ -89,6 +89,23 @@ struct acc100_conf {
> >>>  	struct rte_arbitration_t arb_dl_5g[RTE_ACC100_NUM_VFS];  };
> >>>
> >>> +/**
> >>> + * Configure a ACC100 device
> >>> + *
> >>> + * @param dev_name
> >>> + *   The name of the device. This is the short form of PCI BDF, e.g.
> 00:01.0.
> >>> + *   It can also be retrieved for a bbdev device from the dev_name field
> in
> >> the
> >>> + *   rte_bbdev_info structure returned by rte_bbdev_info_get().
> >>> + * @param conf
> >>> + *   Configuration to apply to ACC100 HW.
> >>> + *
> >>> + * @return
> >>> + *   Zero on success, negative value on failure.
> >>> + */
> >>> +__rte_experimental
> >>> +int
> >>> +acc100_configure(const char *dev_name, struct acc100_conf *conf);
> >>> +
> >>>  #ifdef __cplusplus
> >>>  }
> >>>  #endif
> >>> diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c
> >>> b/drivers/baseband/acc100/rte_acc100_pmd.c
> >>> index 3589814..b50dd32 100644
> >>> --- a/drivers/baseband/acc100/rte_acc100_pmd.c
> >>> +++ b/drivers/baseband/acc100/rte_acc100_pmd.c
> >>> @@ -85,6 +85,26 @@
> >>>
> >>>  enum {UL_4G = 0, UL_5G, DL_4G, DL_5G, NUM_ACC};
> >>>
> >>> +/* Return the accelerator enum for a Queue Group Index */ static
> >>> +inline int accFromQgid(int qg_idx, const struct acc100_conf
> >>> +*acc100_conf) {
> >>> +	int accQg[ACC100_NUM_QGRPS];
> >>> +	int NumQGroupsPerFn[NUM_ACC];
> >>> +	int acc, qgIdx, qgIndex = 0;
> >>> +	for (qgIdx = 0; qgIdx < ACC100_NUM_QGRPS; qgIdx++)
> >>> +		accQg[qgIdx] = 0;
> >>> +	NumQGroupsPerFn[UL_4G] = acc100_conf->q_ul_4g.num_qgroups;
> >>> +	NumQGroupsPerFn[UL_5G] = acc100_conf->q_ul_5g.num_qgroups;
> >>> +	NumQGroupsPerFn[DL_4G] = acc100_conf->q_dl_4g.num_qgroups;
> >>> +	NumQGroupsPerFn[DL_5G] = acc100_conf->q_dl_5g.num_qgroups;
> >>> +	for (acc = UL_4G;  acc < NUM_ACC; acc++)
> >>> +		for (qgIdx = 0; qgIdx < NumQGroupsPerFn[acc]; qgIdx++)
> >>> +			accQg[qgIndex++] = acc;
> >> This looks inefficient, is there a way this could be calculated
> >> without filling arrays to
> >>
> >> access 1 value ?
> > That is not time critical, and the same common code is run each time.
> ok
> >
> >>> +	acc = accQg[qg_idx];
> >>> +	return acc;
> >>> +}
> >>> +
> >>>  /* Return the queue topology for a Queue Group Index */  static
> >>> inline void  qtopFromAcc(struct rte_q_topology_t **qtop, int
> >>> acc_enum, @@ -113,6 +133,30 @@
> >>>  	*qtop = p_qtop;
> >>>  }
> >>>
> >>> +/* Return the AQ depth for a Queue Group Index */ static inline int
> >>> +aqDepth(int qg_idx, struct acc100_conf *acc100_conf) {
> >>> +	struct rte_q_topology_t *q_top = NULL;
> >>> +	int acc_enum = accFromQgid(qg_idx, acc100_conf);
> >>> +	qtopFromAcc(&q_top, acc_enum, acc100_conf);
> >>> +	if (unlikely(q_top == NULL))
> >>> +		return 0;
> >> This error is not handled well be the callers.
> >>
> >> aqNum is similar.
> > This fails in a consistent basis, by having not queue available and handling
> this as the default case.
> ok
> >
> >>> +	return q_top->aq_depth_log2;
> >>> +}
> >>> +
> >>> +/* Return the AQ depth for a Queue Group Index */ static inline int
> >>> +aqNum(int qg_idx, struct acc100_conf *acc100_conf) {
> >>> +	struct rte_q_topology_t *q_top = NULL;
> >>> +	int acc_enum = accFromQgid(qg_idx, acc100_conf);
> >>> +	qtopFromAcc(&q_top, acc_enum, acc100_conf);
> >>> +	if (unlikely(q_top == NULL))
> >>> +		return 0;
> >>> +	return q_top->num_aqs_per_groups;
> >>> +}
> >>> +
> >>>  static void
> >>>  initQTop(struct acc100_conf *acc100_conf)  { @@ -4177,3 +4221,464
> >>> @@ static int acc100_pci_remove(struct rte_pci_device *pci_dev)
> >>> RTE_PMD_REGISTER_PCI_TABLE(ACC100PF_DRIVER_NAME,
> >>> pci_id_acc100_pf_map);
> >> RTE_PMD_REGISTER_PCI(ACC100VF_DRIVER_NAME,
> >>> acc100_pci_vf_driver);
> >>> RTE_PMD_REGISTER_PCI_TABLE(ACC100VF_DRIVER_NAME,
> >>> pci_id_acc100_vf_map);
> >>> +
> >>> +/*
> >>> + * Implementation to fix the power on status of some 5GUL engines
> >>> + * This requires DMA permission if ported outside DPDK
> >> This sounds like a workaround, can more detail be added here ?
> > There are comments through the code I believe:
> >   - /* Detect engines in undefined state */
> >   - /* Force each engine which is in unspecified state */
> >   - /* Reset LDPC Cores */
> >   - /* Check engine power-on status again */ Do you believe this is not
> explicit enough. Power-on status may be in an undefined state hence this
> engine are avtivate with dummy payload to make sure they are in a
> predicable state once configuration is done.
> 
> Yes, not explicit enough. They do not say it is a workaround so someone else
> would not know that
> 
> this is needed or is likely needs adjusting in the future.  Maybe change
> 
> /* Check engine power-on status again */ to
> 
> /*
> 
>  * Power-on status may be in an undefined state.
> 
>  * Active this engine with a dummy payload to make sure the state is
> defined.
> 
>  */
> 

OK I will add a bit more in comments. Thanks


> Tom
> 
> >>> + */
> >>> +static void
> >>> +poweron_cleanup(struct rte_bbdev *bbdev, struct acc100_device *d,
> >>> +		struct acc100_conf *conf)
> >>> +{
> >>> +	int i, template_idx, qg_idx;
> >>> +	uint32_t address, status, payload;
> >>> +	printf("Need to clear power-on 5GUL status in internal memory\n");
> >>> +	/* Reset LDPC Cores */
> >>> +	for (i = 0; i < ACC100_ENGINES_MAX; i++)
> >>> +		acc100_reg_write(d, HWPfFecUl5gCntrlReg +
> >>> +				ACC100_ENGINE_OFFSET * i,
> >> ACC100_RESET_HI);
> >>> +	usleep(LONG_WAIT);
> >>> +	for (i = 0; i < ACC100_ENGINES_MAX; i++)
> >>> +		acc100_reg_write(d, HWPfFecUl5gCntrlReg +
> >>> +				ACC100_ENGINE_OFFSET * i,
> >> ACC100_RESET_LO);
> >>> +	usleep(LONG_WAIT);
> >>> +	/* Prepare dummy workload */
> >>> +	alloc_2x64mb_sw_rings_mem(bbdev, d, 0);
> >>> +	/* Set base addresses */
> >>> +	uint32_t phys_high = (uint32_t)(d->sw_rings_phys >> 32);
> >>> +	uint32_t phys_low  = (uint32_t)(d->sw_rings_phys &
> >>> +			~(ACC100_SIZE_64MBYTE-1));
> >>> +	acc100_reg_write(d, HWPfDmaFec5GulDescBaseHiRegVf,
> >> phys_high);
> >>> +	acc100_reg_write(d, HWPfDmaFec5GulDescBaseLoRegVf, phys_low);
> >>> +
> >>> +	/* Descriptor for a dummy 5GUL code block processing*/
> >>> +	union acc100_dma_desc *desc = NULL;
> >>> +	desc = d->sw_rings;
> >>> +	desc->req.data_ptrs[0].address = d->sw_rings_phys +
> >>> +			ACC100_DESC_FCW_OFFSET;
> >>> +	desc->req.data_ptrs[0].blen = ACC100_FCW_LD_BLEN;
> >>> +	desc->req.data_ptrs[0].blkid = ACC100_DMA_BLKID_FCW;
> >>> +	desc->req.data_ptrs[0].last = 0;
> >>> +	desc->req.data_ptrs[0].dma_ext = 0;
> >>> +	desc->req.data_ptrs[1].address = d->sw_rings_phys + 512;
> >>> +	desc->req.data_ptrs[1].blkid = ACC100_DMA_BLKID_IN;
> >>> +	desc->req.data_ptrs[1].last = 1;
> >>> +	desc->req.data_ptrs[1].dma_ext = 0;
> >>> +	desc->req.data_ptrs[1].blen = 44;
> >>> +	desc->req.data_ptrs[2].address = d->sw_rings_phys + 1024;
> >>> +	desc->req.data_ptrs[2].blkid = ACC100_DMA_BLKID_OUT_ENC;
> >>> +	desc->req.data_ptrs[2].last = 1;
> >>> +	desc->req.data_ptrs[2].dma_ext = 0;
> >>> +	desc->req.data_ptrs[2].blen = 5;
> >>> +	/* Dummy FCW */
> >>> +	desc->req.fcw_ld.FCWversion = ACC100_FCW_VER;
> >>> +	desc->req.fcw_ld.qm = 1;
> >>> +	desc->req.fcw_ld.nfiller = 30;
> >>> +	desc->req.fcw_ld.BG = 2 - 1;
> >>> +	desc->req.fcw_ld.Zc = 7;
> >>> +	desc->req.fcw_ld.ncb = 350;
> >>> +	desc->req.fcw_ld.rm_e = 4;
> >>> +	desc->req.fcw_ld.itmax = 10;
> >>> +	desc->req.fcw_ld.gain_i = 1;
> >>> +	desc->req.fcw_ld.gain_h = 1;
> >>> +
> >>> +	int engines_to_restart[SIG_UL_5G_LAST + 1] = {0};
> >>> +	int num_failed_engine = 0;
> >>> +	/* Detect engines in undefined state */
> >>> +	for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
> >>> +			template_idx++) {
> >>> +		/* Check engine power-on status */
> >>> +		address = HwPfFecUl5gIbDebugReg +
> >>> +				ACC100_ENGINE_OFFSET * template_idx;
> >>> +		status = (acc100_reg_read(d, address) >> 4) & 0xF;
> >>> +		if (status == 0) {
> >>> +			engines_to_restart[num_failed_engine] =
> >> template_idx;
> >>> +			num_failed_engine++;
> >>> +		}
> >>> +	}
> >>> +
> >>> +	int numQqsAcc = conf->q_ul_5g.num_qgroups;
> >>> +	int numQgs = conf->q_ul_5g.num_qgroups;
> >>> +	payload = 0;
> >>> +	for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
> >> qg_idx++)
> >>> +		payload |= (1 << qg_idx);
> >>> +	/* Force each engine which is in unspecified state */
> >>> +	for (i = 0; i < num_failed_engine; i++) {
> >>> +		int failed_engine = engines_to_restart[i];
> >>> +		printf("Force engine %d\n", failed_engine);
> >>> +		for (template_idx = SIG_UL_5G; template_idx <=
> >> SIG_UL_5G_LAST;
> >>> +				template_idx++) {
> >>> +			address = HWPfQmgrGrpTmplateReg4Indx
> >>> +					+ BYTES_IN_WORD * template_idx;
> >>> +			if (template_idx == failed_engine)
> >>> +				acc100_reg_write(d, address, payload);
> >>> +			else
> >>> +				acc100_reg_write(d, address, 0);
> >>> +		}
> >>> +		/* Reset descriptor header */
> >>> +		desc->req.word0 = ACC100_DMA_DESC_TYPE;
> >>> +		desc->req.word1 = 0;
> >>> +		desc->req.word2 = 0;
> >>> +		desc->req.word3 = 0;
> >>> +		desc->req.numCBs = 1;
> >>> +		desc->req.m2dlen = 2;
> >>> +		desc->req.d2mlen = 1;
> >>> +		/* Enqueue the code block for processing */
> >>> +		union acc100_enqueue_reg_fmt enq_req;
> >>> +		enq_req.val = 0;
> >>> +		enq_req.addr_offset = ACC100_DESC_OFFSET;
> >>> +		enq_req.num_elem = 1;
> >>> +		enq_req.req_elem_addr = 0;
> >>> +		rte_wmb();
> >>> +		acc100_reg_write(d, HWPfQmgrIngressAq + 0x100,
> >> enq_req.val);
> >>> +		usleep(LONG_WAIT * 100);
> >>> +		if (desc->req.word0 != 2)
> >>> +			printf("DMA Response %#"PRIx32"\n", desc-
> >>> req.word0);
> >>> +	}
> >>> +
> >>> +	/* Reset LDPC Cores */
> >>> +	for (i = 0; i < ACC100_ENGINES_MAX; i++)
> >>> +		acc100_reg_write(d, HWPfFecUl5gCntrlReg +
> >>> +				ACC100_ENGINE_OFFSET * i,
> >> ACC100_RESET_HI);
> >>> +	usleep(LONG_WAIT);
> >>> +	for (i = 0; i < ACC100_ENGINES_MAX; i++)
> >>> +		acc100_reg_write(d, HWPfFecUl5gCntrlReg +
> >>> +				ACC100_ENGINE_OFFSET * i,
> >> ACC100_RESET_LO);
> >>> +	usleep(LONG_WAIT);
> >>> +	acc100_reg_write(d, HWPfHi5GHardResetReg,
> >> ACC100_RESET_HARD);
> >>> +	usleep(LONG_WAIT);
> >>> +	int numEngines = 0;
> >>> +	/* Check engine power-on status again */
> >>> +	for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
> >>> +			template_idx++) {
> >>> +		address = HwPfFecUl5gIbDebugReg +
> >>> +				ACC100_ENGINE_OFFSET * template_idx;
> >>> +		status = (acc100_reg_read(d, address) >> 4) & 0xF;
> >>> +		address = HWPfQmgrGrpTmplateReg4Indx
> >>> +				+ BYTES_IN_WORD * template_idx;
> >>> +		if (status == 1) {
> >>> +			acc100_reg_write(d, address, payload);
> >>> +			numEngines++;
> >>> +		} else
> >>> +			acc100_reg_write(d, address, 0);
> >>> +	}
> >>> +	printf("Number of 5GUL engines %d\n", numEngines);
> >>> +
> >>> +	if (d->sw_rings_base != NULL)
> >>> +		rte_free(d->sw_rings_base);
> >>> +	usleep(LONG_WAIT);
> >>> +}
> >>> +
> >>> +/* Initial configuration of a ACC100 device prior to running
> >>> +configure() */ int acc100_configure(const char *dev_name, struct
> >>> +acc100_conf *conf) {
> >>> +	rte_bbdev_log(INFO, "acc100_configure");
> >>> +	uint32_t payload, address, status;
> >> maybe value or data would be a better variable name than payload.
> >>
> >> would mean changing acc100_reg_write
> > transparent to me, but can change given DPDK uses term value.
> >
> >
> >>> +	int qg_idx, template_idx, vf_idx, acc, i;
> >>> +	struct rte_bbdev *bbdev = rte_bbdev_get_named_dev(dev_name);
> >>> +
> >>> +	/* Compile time checks */
> >>> +	RTE_BUILD_BUG_ON(sizeof(struct acc100_dma_req_desc) != 256);
> >>> +	RTE_BUILD_BUG_ON(sizeof(union acc100_dma_desc) != 256);
> >>> +	RTE_BUILD_BUG_ON(sizeof(struct acc100_fcw_td) != 24);
> >>> +	RTE_BUILD_BUG_ON(sizeof(struct acc100_fcw_te) != 32);
> >>> +
> >>> +	if (bbdev == NULL) {
> >>> +		rte_bbdev_log(ERR,
> >>> +		"Invalid dev_name (%s), or device is not yet initialised",
> >>> +		dev_name);
> >>> +		return -ENODEV;
> >>> +	}
> >>> +	struct acc100_device *d = bbdev->data->dev_private;
> >>> +
> >>> +	/* Store configuration */
> >>> +	rte_memcpy(&d->acc100_conf, conf, sizeof(d->acc100_conf));
> >>> +
> >>> +	/* PCIe Bridge configuration */
> >>> +	acc100_reg_write(d, HwPfPcieGpexBridgeControl,
> >> ACC100_CFG_PCI_BRIDGE);
> >>> +	for (i = 1; i < 17; i++)
> >> 17 is a magic number, use a #define
> >>
> >> this is a general issue.
> > These are only used once but still agreed.
> >
> >>> +		acc100_reg_write(d,
> >>> +
> >> 	HwPfPcieGpexAxiAddrMappingWindowPexBaseHigh
> >>> +				+ i * 16, 0);
> >>> +
> >>> +	/* PCIe Link Trainiing and Status State Machine */
> >>> +	acc100_reg_write(d, HwPfPcieGpexLtssmStateCntrl, 0xDFC00000);
> >>> +
> >>> +	/* Prevent blocking AXI read on BRESP for AXI Write */
> >>> +	address = HwPfPcieGpexAxiPioControl;
> >>> +	payload = ACC100_CFG_PCI_AXI;
> >>> +	acc100_reg_write(d, address, payload);
> >>> +
> >>> +	/* 5GDL PLL phase shift */
> >>> +	acc100_reg_write(d, HWPfChaDl5gPllPhshft0, 0x1);
> >>> +
> >>> +	/* Explicitly releasing AXI as this may be stopped after PF FLR/BME */
> >>> +	address = HWPfDmaAxiControl;
> >>> +	payload = 1;
> >>> +	acc100_reg_write(d, address, payload);
> >>> +
> >>> +	/* DDR Configuration */
> >>> +	address = HWPfDdrBcTim6;
> >>> +	payload = acc100_reg_read(d, address);
> >>> +	payload &= 0xFFFFFFFB; /* Bit 2 */ #ifdef ACC100_DDR_ECC_ENABLE
> >>> +	payload |= 0x4;
> >>> +#endif
> >>> +	acc100_reg_write(d, address, payload);
> >>> +	address = HWPfDdrPhyDqsCountNum;
> >>> +#ifdef ACC100_DDR_ECC_ENABLE
> >>> +	payload = 9;
> >>> +#else
> >>> +	payload = 8;
> >>> +#endif
> >>> +	acc100_reg_write(d, address, payload);
> >>> +
> >>> +	/* Set default descriptor signature */
> >>> +	address = HWPfDmaDescriptorSignatuture;
> >>> +	payload = 0;
> >>> +	acc100_reg_write(d, address, payload);
> >>> +
> >>> +	/* Enable the Error Detection in DMA */
> >>> +	payload = ACC100_CFG_DMA_ERROR;
> >>> +	address = HWPfDmaErrorDetectionEn;
> >>> +	acc100_reg_write(d, address, payload);
> >>> +
> >>> +	/* AXI Cache configuration */
> >>> +	payload = ACC100_CFG_AXI_CACHE;
> >>> +	address = HWPfDmaAxcacheReg;
> >>> +	acc100_reg_write(d, address, payload);
> >>> +
> >>> +	/* Default DMA Configuration (Qmgr Enabled) */
> >>> +	address = HWPfDmaConfig0Reg;
> >>> +	payload = 0;
> >>> +	acc100_reg_write(d, address, payload);
> >>> +	address = HWPfDmaQmanen;
> >>> +	payload = 0;
> >>> +	acc100_reg_write(d, address, payload);
> >>> +
> >>> +	/* Default RLIM/ALEN configuration */
> >>> +	address = HWPfDmaConfig1Reg;
> >>> +	payload = (1 << 31) + (23 << 8) + (1 << 6) + 7;
> >>> +	acc100_reg_write(d, address, payload);
> >>> +
> >>> +	/* Configure DMA Qmanager addresses */
> >>> +	address = HWPfDmaQmgrAddrReg;
> >>> +	payload = HWPfQmgrEgressQueuesTemplate;
> >>> +	acc100_reg_write(d, address, payload);
> >>> +
> >>> +	/* ===== Qmgr Configuration ===== */
> >>> +	/* Configuration of the AQueue Depth QMGR_GRP_0_DEPTH_LOG2
> >> for UL */
> >>> +	int totalQgs = conf->q_ul_4g.num_qgroups +
> >>> +			conf->q_ul_5g.num_qgroups +
> >>> +			conf->q_dl_4g.num_qgroups +
> >>> +			conf->q_dl_5g.num_qgroups;
> >>> +	for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
> >>> +		address = HWPfQmgrDepthLog2Grp +
> >>> +		BYTES_IN_WORD * qg_idx;
> >>> +		payload = aqDepth(qg_idx, conf);
> >>> +		acc100_reg_write(d, address, payload);
> >>> +		address = HWPfQmgrTholdGrp +
> >>> +		BYTES_IN_WORD * qg_idx;
> >>> +		payload = (1 << 16) + (1 << (aqDepth(qg_idx, conf) - 1));
> >>> +		acc100_reg_write(d, address, payload);
> >>> +	}
> >>> +
> >>> +	/* Template Priority in incremental order */
> >>> +	for (template_idx = 0; template_idx < ACC100_NUM_TMPL;
> >>> +			template_idx++) {
> >>> +		address = HWPfQmgrGrpTmplateReg0Indx +
> >>> +		BYTES_IN_WORD * (template_idx % 8);
> >>> +		payload = TMPL_PRI_0;
> >>> +		acc100_reg_write(d, address, payload);
> >>> +		address = HWPfQmgrGrpTmplateReg1Indx +
> >>> +		BYTES_IN_WORD * (template_idx % 8);
> >>> +		payload = TMPL_PRI_1;
> >>> +		acc100_reg_write(d, address, payload);
> >>> +		address = HWPfQmgrGrpTmplateReg2indx +
> >>> +		BYTES_IN_WORD * (template_idx % 8);
> >>> +		payload = TMPL_PRI_2;
> >>> +		acc100_reg_write(d, address, payload);
> >>> +		address = HWPfQmgrGrpTmplateReg3Indx +
> >>> +		BYTES_IN_WORD * (template_idx % 8);
> >>> +		payload = TMPL_PRI_3;
> >>> +		acc100_reg_write(d, address, payload);
> >>> +	}
> >>> +
> >>> +	address = HWPfQmgrGrpPriority;
> >>> +	payload = ACC100_CFG_QMGR_HI_P;
> >>> +	acc100_reg_write(d, address, payload);
> >>> +
> >>> +	/* Template Configuration */
> >>> +	for (template_idx = 0; template_idx < ACC100_NUM_TMPL;
> >> template_idx++) {
> >>> +		payload = 0;
> >>> +		address = HWPfQmgrGrpTmplateReg4Indx
> >>> +				+ BYTES_IN_WORD * template_idx;
> >>> +		acc100_reg_write(d, address, payload);
> >>> +	}
> >>> +	/* 4GUL */
> >>> +	int numQgs = conf->q_ul_4g.num_qgroups;
> >>> +	int numQqsAcc = 0;
> >>> +	payload = 0;
> >>> +	for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
> >> qg_idx++)
> >>> +		payload |= (1 << qg_idx);
> >>> +	for (template_idx = SIG_UL_4G; template_idx <= SIG_UL_4G_LAST;
> >>> +			template_idx++) {
> >>> +		address = HWPfQmgrGrpTmplateReg4Indx
> >>> +				+ BYTES_IN_WORD*template_idx;
> >>> +		acc100_reg_write(d, address, payload);
> >>> +	}
> >>> +	/* 5GUL */
> >>> +	numQqsAcc += numQgs;
> >>> +	numQgs	= conf->q_ul_5g.num_qgroups;
> >>> +	payload = 0;
> >>> +	int numEngines = 0;
> >>> +	for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
> >> qg_idx++)
> >>> +		payload |= (1 << qg_idx);
> >>> +	for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
> >>> +			template_idx++) {
> >>> +		/* Check engine power-on status */
> >>> +		address = HwPfFecUl5gIbDebugReg +
> >>> +				ACC100_ENGINE_OFFSET * template_idx;
> >>> +		status = (acc100_reg_read(d, address) >> 4) & 0xF;
> >>> +		address = HWPfQmgrGrpTmplateReg4Indx
> >>> +				+ BYTES_IN_WORD * template_idx;
> >>> +		if (status == 1) {
> >>> +			acc100_reg_write(d, address, payload);
> >>> +			numEngines++;
> >>> +		} else
> >>> +			acc100_reg_write(d, address, 0);
> >>> +		#if RTE_ACC100_SINGLE_FEC == 1
> >> #if should be at start of line
> > ok
> >
> >>> +		payload = 0;
> >>> +		#endif
> >>> +	}
> >>> +	printf("Number of 5GUL engines %d\n", numEngines);
> >>> +	/* 4GDL */
> >>> +	numQqsAcc += numQgs;
> >>> +	numQgs	= conf->q_dl_4g.num_qgroups;
> >>> +	payload = 0;
> >>> +	for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
> >> qg_idx++)
> >>> +		payload |= (1 << qg_idx);
> >>> +	for (template_idx = SIG_DL_4G; template_idx <= SIG_DL_4G_LAST;
> >>> +			template_idx++) {
> >>> +		address = HWPfQmgrGrpTmplateReg4Indx
> >>> +				+ BYTES_IN_WORD*template_idx;
> >>> +		acc100_reg_write(d, address, payload);
> >>> +		#if RTE_ACC100_SINGLE_FEC == 1
> >>> +			payload = 0;
> >>> +		#endif
> >>> +	}
> >>> +	/* 5GDL */
> >>> +	numQqsAcc += numQgs;
> >>> +	numQgs	= conf->q_dl_5g.num_qgroups;
> >>> +	payload = 0;
> >>> +	for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
> >> qg_idx++)
> >>> +		payload |= (1 << qg_idx);
> >>> +	for (template_idx = SIG_DL_5G; template_idx <= SIG_DL_5G_LAST;
> >>> +			template_idx++) {
> >>> +		address = HWPfQmgrGrpTmplateReg4Indx
> >>> +				+ BYTES_IN_WORD*template_idx;
> >>> +		acc100_reg_write(d, address, payload);
> >>> +		#if RTE_ACC100_SINGLE_FEC == 1
> >>> +		payload = 0;
> >>> +		#endif
> >>> +	}
> >>> +
> >>> +	/* Queue Group Function mapping */
> >>> +	int qman_func_id[5] = {0, 2, 1, 3, 4};
> >>> +	address = HWPfQmgrGrpFunction0;
> >>> +	payload = 0;
> >>> +	for (qg_idx = 0; qg_idx < 8; qg_idx++) {
> >>> +		acc = accFromQgid(qg_idx, conf);
> >>> +		payload |= qman_func_id[acc]<<(qg_idx * 4);
> >>> +	}
> >>> +	acc100_reg_write(d, address, payload);
> >>> +
> >>> +	/* Configuration of the Arbitration QGroup depth to 1 */
> >>> +	for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
> >>> +		address = HWPfQmgrArbQDepthGrp +
> >>> +		BYTES_IN_WORD * qg_idx;
> >>> +		payload = 0;
> >>> +		acc100_reg_write(d, address, payload);
> >>> +	}
> >>> +
> >>> +	/* Enabling AQueues through the Queue hierarchy*/
> >>> +	for (vf_idx = 0; vf_idx < ACC100_NUM_VFS; vf_idx++) {
> >>> +		for (qg_idx = 0; qg_idx < ACC100_NUM_QGRPS; qg_idx++) {
> >>> +			payload = 0;
> >>> +			if (vf_idx < conf->num_vf_bundles &&
> >>> +					qg_idx < totalQgs)
> >>> +				payload = (1 << aqNum(qg_idx, conf)) - 1;
> >>> +			address = HWPfQmgrAqEnableVf
> >>> +					+ vf_idx * BYTES_IN_WORD;
> >>> +			payload += (qg_idx << 16);
> >>> +			acc100_reg_write(d, address, payload);
> >>> +		}
> >>> +	}
> >>> +
> >>> +	/* This pointer to ARAM (256kB) is shifted by 2 (4B per register) */
> >>> +	uint32_t aram_address = 0;
> >>> +	for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
> >>> +		for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) {
> >>> +			address = HWPfQmgrVfBaseAddr + vf_idx
> >>> +					* BYTES_IN_WORD + qg_idx
> >>> +					* BYTES_IN_WORD * 64;
> >>> +			payload = aram_address;
> >>> +			acc100_reg_write(d, address, payload);
> >>> +			/* Offset ARAM Address for next memory bank
> >>> +			 * - increment of 4B
> >>> +			 */
> >>> +			aram_address += aqNum(qg_idx, conf) *
> >>> +					(1 << aqDepth(qg_idx, conf));
> >>> +		}
> >>> +	}
> >>> +
> >>> +	if (aram_address > WORDS_IN_ARAM_SIZE) {
> >>> +		rte_bbdev_log(ERR, "ARAM Configuration not fitting %d
> >> %d\n",
> >>> +				aram_address, WORDS_IN_ARAM_SIZE);
> >>> +		return -EINVAL;
> >>> +	}
> >>> +
> >>> +	/* ==== HI Configuration ==== */
> >>> +
> >>> +	/* Prevent Block on Transmit Error */
> >>> +	address = HWPfHiBlockTransmitOnErrorEn;
> >>> +	payload = 0;
> >>> +	acc100_reg_write(d, address, payload);
> >>> +	/* Prevents to drop MSI */
> >>> +	address = HWPfHiMsiDropEnableReg;
> >>> +	payload = 0;
> >>> +	acc100_reg_write(d, address, payload);
> >>> +	/* Set the PF Mode register */
> >>> +	address = HWPfHiPfMode;
> >>> +	payload = (conf->pf_mode_en) ? 2 : 0;
> >>> +	acc100_reg_write(d, address, payload);
> >>> +	/* Enable Error Detection in HW */
> >>> +	address = HWPfDmaErrorDetectionEn;
> >>> +	payload = 0x3D7;
> >>> +	acc100_reg_write(d, address, payload);
> >>> +
> >>> +	/* QoS overflow init */
> >>> +	payload = 1;
> >>> +	address = HWPfQosmonAEvalOverflow0;
> >>> +	acc100_reg_write(d, address, payload);
> >>> +	address = HWPfQosmonBEvalOverflow0;
> >>> +	acc100_reg_write(d, address, payload);
> >>> +
> >>> +	/* HARQ DDR Configuration */
> >>> +	unsigned int ddrSizeInMb = 512; /* Fixed to 512 MB per VF for now
> >> */
> >>> +	for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) {
> >>> +		address = HWPfDmaVfDdrBaseRw + vf_idx
> >>> +				* 0x10;
> >>> +		payload = ((vf_idx * (ddrSizeInMb / 64)) << 16) +
> >>> +				(ddrSizeInMb - 1);
> >>> +		acc100_reg_write(d, address, payload);
> >>> +	}
> >>> +	usleep(LONG_WAIT);
> >> Is sleep needed here ? the reg_write has one.
> > This one is needed on top
> >
> >>> +
> >> Since this seems like a workaround, add a comment here.
> > fair enough, ok, thanks
> >
> >> Tom
> >>
> >>> +	if (numEngines < (SIG_UL_5G_LAST + 1))
> >>> +		poweron_cleanup(bbdev, d, conf);
> >>> +
> >>> +	rte_bbdev_log_debug("PF Tip configuration complete for %s",
> >> dev_name);
> >>> +	return 0;
> >>> +}
> >>> diff --git
> >>> a/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
> >>> b/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
> >>> index 4a76d1d..91c234d 100644
> >>> --- a/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
> >>> +++ b/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
> >>> @@ -1,3 +1,10 @@
> >>>  DPDK_21 {
> >>>  	local: *;
> >>>  };
> >>> +
> >>> +EXPERIMENTAL {
> >>> +	global:
> >>> +
> >>> +	acc100_configure;
> >>> +
> >>> +};
  

Patch

diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c
index 45c0d62..32f23ff 100644
--- a/app/test-bbdev/test_bbdev_perf.c
+++ b/app/test-bbdev/test_bbdev_perf.c
@@ -52,6 +52,18 @@ 
 #define FLR_5G_TIMEOUT 610
 #endif
 
+#ifdef RTE_LIBRTE_PMD_BBDEV_ACC100
+#include <rte_acc100_cfg.h>
+#define ACC100PF_DRIVER_NAME   ("intel_acc100_pf")
+#define ACC100VF_DRIVER_NAME   ("intel_acc100_vf")
+#define ACC100_QMGR_NUM_AQS 16
+#define ACC100_QMGR_NUM_QGS 2
+#define ACC100_QMGR_AQ_DEPTH 5
+#define ACC100_QMGR_INVALID_IDX -1
+#define ACC100_QMGR_RR 1
+#define ACC100_QOS_GBR 0
+#endif
+
 #define OPS_CACHE_SIZE 256U
 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
 
@@ -653,6 +665,66 @@  typedef int (test_case_function)(struct active_device *ad,
 				info->dev_name);
 	}
 #endif
+#ifdef RTE_LIBRTE_PMD_BBDEV_ACC100
+	if ((get_init_device() == true) &&
+		(!strcmp(info->drv.driver_name, ACC100PF_DRIVER_NAME))) {
+		struct acc100_conf conf;
+		unsigned int i;
+
+		printf("Configure ACC100 FEC Driver %s with default values\n",
+				info->drv.driver_name);
+
+		/* clear default configuration before initialization */
+		memset(&conf, 0, sizeof(struct acc100_conf));
+
+		/* Always set in PF mode for built-in configuration */
+		conf.pf_mode_en = true;
+		for (i = 0; i < RTE_ACC100_NUM_VFS; ++i) {
+			conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
+			conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
+			conf.arb_dl_4g[i].round_robin_weight = ACC100_QMGR_RR;
+			conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
+			conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
+			conf.arb_ul_4g[i].round_robin_weight = ACC100_QMGR_RR;
+			conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
+			conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
+			conf.arb_dl_5g[i].round_robin_weight = ACC100_QMGR_RR;
+			conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
+			conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
+			conf.arb_ul_5g[i].round_robin_weight = ACC100_QMGR_RR;
+		}
+
+		conf.input_pos_llr_1_bit = true;
+		conf.output_pos_llr_1_bit = true;
+		conf.num_vf_bundles = 1; /**< Number of VF bundles to setup */
+
+		conf.q_ul_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
+		conf.q_ul_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
+		conf.q_ul_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
+		conf.q_ul_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
+		conf.q_dl_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
+		conf.q_dl_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
+		conf.q_dl_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
+		conf.q_dl_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
+		conf.q_ul_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
+		conf.q_ul_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
+		conf.q_ul_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
+		conf.q_ul_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
+		conf.q_dl_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
+		conf.q_dl_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
+		conf.q_dl_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
+		conf.q_dl_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
+
+		/* setup PF with configuration information */
+		ret = acc100_configure(info->dev_name, &conf);
+		TEST_ASSERT_SUCCESS(ret,
+				"Failed to configure ACC100 PF for bbdev %s",
+				info->dev_name);
+		/* Let's refresh this now this is configured */
+	}
+	rte_bbdev_info_get(dev_id, info);
+#endif
+
 	nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
 	nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
 
diff --git a/doc/guides/rel_notes/release_20_11.rst b/doc/guides/rel_notes/release_20_11.rst
index 73ac08f..c8d0586 100644
--- a/doc/guides/rel_notes/release_20_11.rst
+++ b/doc/guides/rel_notes/release_20_11.rst
@@ -55,6 +55,11 @@  New Features
      Also, make sure to start the actual text at the margin.
      =======================================================
 
+* **Added Intel ACC100 bbdev PMD.**
+
+  Added a new ``acc100`` bbdev driver for the Intel\ |reg| ACC100 accelerator
+  also known as Mount Bryce.  See the
+  :doc:`../bbdevs/acc100` BBDEV guide for more details on this new driver.
 
 Removed Items
 -------------
diff --git a/drivers/baseband/acc100/meson.build b/drivers/baseband/acc100/meson.build
index 8afafc2..7ac44dc 100644
--- a/drivers/baseband/acc100/meson.build
+++ b/drivers/baseband/acc100/meson.build
@@ -4,3 +4,5 @@ 
 deps += ['bbdev', 'bus_vdev', 'ring', 'pci', 'bus_pci']
 
 sources = files('rte_acc100_pmd.c')
+
+install_headers('rte_acc100_cfg.h')
diff --git a/drivers/baseband/acc100/rte_acc100_cfg.h b/drivers/baseband/acc100/rte_acc100_cfg.h
index 73bbe36..7f523bc 100644
--- a/drivers/baseband/acc100/rte_acc100_cfg.h
+++ b/drivers/baseband/acc100/rte_acc100_cfg.h
@@ -89,6 +89,23 @@  struct acc100_conf {
 	struct rte_arbitration_t arb_dl_5g[RTE_ACC100_NUM_VFS];
 };
 
+/**
+ * Configure a ACC100 device
+ *
+ * @param dev_name
+ *   The name of the device. This is the short form of PCI BDF, e.g. 00:01.0.
+ *   It can also be retrieved for a bbdev device from the dev_name field in the
+ *   rte_bbdev_info structure returned by rte_bbdev_info_get().
+ * @param conf
+ *   Configuration to apply to ACC100 HW.
+ *
+ * @return
+ *   Zero on success, negative value on failure.
+ */
+__rte_experimental
+int
+acc100_configure(const char *dev_name, struct acc100_conf *conf);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c b/drivers/baseband/acc100/rte_acc100_pmd.c
index 3589814..b50dd32 100644
--- a/drivers/baseband/acc100/rte_acc100_pmd.c
+++ b/drivers/baseband/acc100/rte_acc100_pmd.c
@@ -85,6 +85,26 @@ 
 
 enum {UL_4G = 0, UL_5G, DL_4G, DL_5G, NUM_ACC};
 
+/* Return the accelerator enum for a Queue Group Index */
+static inline int
+accFromQgid(int qg_idx, const struct acc100_conf *acc100_conf)
+{
+	int accQg[ACC100_NUM_QGRPS];
+	int NumQGroupsPerFn[NUM_ACC];
+	int acc, qgIdx, qgIndex = 0;
+	for (qgIdx = 0; qgIdx < ACC100_NUM_QGRPS; qgIdx++)
+		accQg[qgIdx] = 0;
+	NumQGroupsPerFn[UL_4G] = acc100_conf->q_ul_4g.num_qgroups;
+	NumQGroupsPerFn[UL_5G] = acc100_conf->q_ul_5g.num_qgroups;
+	NumQGroupsPerFn[DL_4G] = acc100_conf->q_dl_4g.num_qgroups;
+	NumQGroupsPerFn[DL_5G] = acc100_conf->q_dl_5g.num_qgroups;
+	for (acc = UL_4G;  acc < NUM_ACC; acc++)
+		for (qgIdx = 0; qgIdx < NumQGroupsPerFn[acc]; qgIdx++)
+			accQg[qgIndex++] = acc;
+	acc = accQg[qg_idx];
+	return acc;
+}
+
 /* Return the queue topology for a Queue Group Index */
 static inline void
 qtopFromAcc(struct rte_q_topology_t **qtop, int acc_enum,
@@ -113,6 +133,30 @@ 
 	*qtop = p_qtop;
 }
 
+/* Return the AQ depth for a Queue Group Index */
+static inline int
+aqDepth(int qg_idx, struct acc100_conf *acc100_conf)
+{
+	struct rte_q_topology_t *q_top = NULL;
+	int acc_enum = accFromQgid(qg_idx, acc100_conf);
+	qtopFromAcc(&q_top, acc_enum, acc100_conf);
+	if (unlikely(q_top == NULL))
+		return 0;
+	return q_top->aq_depth_log2;
+}
+
+/* Return the AQ depth for a Queue Group Index */
+static inline int
+aqNum(int qg_idx, struct acc100_conf *acc100_conf)
+{
+	struct rte_q_topology_t *q_top = NULL;
+	int acc_enum = accFromQgid(qg_idx, acc100_conf);
+	qtopFromAcc(&q_top, acc_enum, acc100_conf);
+	if (unlikely(q_top == NULL))
+		return 0;
+	return q_top->num_aqs_per_groups;
+}
+
 static void
 initQTop(struct acc100_conf *acc100_conf)
 {
@@ -4177,3 +4221,464 @@  static int acc100_pci_remove(struct rte_pci_device *pci_dev)
 RTE_PMD_REGISTER_PCI_TABLE(ACC100PF_DRIVER_NAME, pci_id_acc100_pf_map);
 RTE_PMD_REGISTER_PCI(ACC100VF_DRIVER_NAME, acc100_pci_vf_driver);
 RTE_PMD_REGISTER_PCI_TABLE(ACC100VF_DRIVER_NAME, pci_id_acc100_vf_map);
+
+/*
+ * Implementation to fix the power on status of some 5GUL engines
+ * This requires DMA permission if ported outside DPDK
+ */
+static void
+poweron_cleanup(struct rte_bbdev *bbdev, struct acc100_device *d,
+		struct acc100_conf *conf)
+{
+	int i, template_idx, qg_idx;
+	uint32_t address, status, payload;
+	printf("Need to clear power-on 5GUL status in internal memory\n");
+	/* Reset LDPC Cores */
+	for (i = 0; i < ACC100_ENGINES_MAX; i++)
+		acc100_reg_write(d, HWPfFecUl5gCntrlReg +
+				ACC100_ENGINE_OFFSET * i, ACC100_RESET_HI);
+	usleep(LONG_WAIT);
+	for (i = 0; i < ACC100_ENGINES_MAX; i++)
+		acc100_reg_write(d, HWPfFecUl5gCntrlReg +
+				ACC100_ENGINE_OFFSET * i, ACC100_RESET_LO);
+	usleep(LONG_WAIT);
+	/* Prepare dummy workload */
+	alloc_2x64mb_sw_rings_mem(bbdev, d, 0);
+	/* Set base addresses */
+	uint32_t phys_high = (uint32_t)(d->sw_rings_phys >> 32);
+	uint32_t phys_low  = (uint32_t)(d->sw_rings_phys &
+			~(ACC100_SIZE_64MBYTE-1));
+	acc100_reg_write(d, HWPfDmaFec5GulDescBaseHiRegVf, phys_high);
+	acc100_reg_write(d, HWPfDmaFec5GulDescBaseLoRegVf, phys_low);
+
+	/* Descriptor for a dummy 5GUL code block processing*/
+	union acc100_dma_desc *desc = NULL;
+	desc = d->sw_rings;
+	desc->req.data_ptrs[0].address = d->sw_rings_phys +
+			ACC100_DESC_FCW_OFFSET;
+	desc->req.data_ptrs[0].blen = ACC100_FCW_LD_BLEN;
+	desc->req.data_ptrs[0].blkid = ACC100_DMA_BLKID_FCW;
+	desc->req.data_ptrs[0].last = 0;
+	desc->req.data_ptrs[0].dma_ext = 0;
+	desc->req.data_ptrs[1].address = d->sw_rings_phys + 512;
+	desc->req.data_ptrs[1].blkid = ACC100_DMA_BLKID_IN;
+	desc->req.data_ptrs[1].last = 1;
+	desc->req.data_ptrs[1].dma_ext = 0;
+	desc->req.data_ptrs[1].blen = 44;
+	desc->req.data_ptrs[2].address = d->sw_rings_phys + 1024;
+	desc->req.data_ptrs[2].blkid = ACC100_DMA_BLKID_OUT_ENC;
+	desc->req.data_ptrs[2].last = 1;
+	desc->req.data_ptrs[2].dma_ext = 0;
+	desc->req.data_ptrs[2].blen = 5;
+	/* Dummy FCW */
+	desc->req.fcw_ld.FCWversion = ACC100_FCW_VER;
+	desc->req.fcw_ld.qm = 1;
+	desc->req.fcw_ld.nfiller = 30;
+	desc->req.fcw_ld.BG = 2 - 1;
+	desc->req.fcw_ld.Zc = 7;
+	desc->req.fcw_ld.ncb = 350;
+	desc->req.fcw_ld.rm_e = 4;
+	desc->req.fcw_ld.itmax = 10;
+	desc->req.fcw_ld.gain_i = 1;
+	desc->req.fcw_ld.gain_h = 1;
+
+	int engines_to_restart[SIG_UL_5G_LAST + 1] = {0};
+	int num_failed_engine = 0;
+	/* Detect engines in undefined state */
+	for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
+			template_idx++) {
+		/* Check engine power-on status */
+		address = HwPfFecUl5gIbDebugReg +
+				ACC100_ENGINE_OFFSET * template_idx;
+		status = (acc100_reg_read(d, address) >> 4) & 0xF;
+		if (status == 0) {
+			engines_to_restart[num_failed_engine] = template_idx;
+			num_failed_engine++;
+		}
+	}
+
+	int numQqsAcc = conf->q_ul_5g.num_qgroups;
+	int numQgs = conf->q_ul_5g.num_qgroups;
+	payload = 0;
+	for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++)
+		payload |= (1 << qg_idx);
+	/* Force each engine which is in unspecified state */
+	for (i = 0; i < num_failed_engine; i++) {
+		int failed_engine = engines_to_restart[i];
+		printf("Force engine %d\n", failed_engine);
+		for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
+				template_idx++) {
+			address = HWPfQmgrGrpTmplateReg4Indx
+					+ BYTES_IN_WORD * template_idx;
+			if (template_idx == failed_engine)
+				acc100_reg_write(d, address, payload);
+			else
+				acc100_reg_write(d, address, 0);
+		}
+		/* Reset descriptor header */
+		desc->req.word0 = ACC100_DMA_DESC_TYPE;
+		desc->req.word1 = 0;
+		desc->req.word2 = 0;
+		desc->req.word3 = 0;
+		desc->req.numCBs = 1;
+		desc->req.m2dlen = 2;
+		desc->req.d2mlen = 1;
+		/* Enqueue the code block for processing */
+		union acc100_enqueue_reg_fmt enq_req;
+		enq_req.val = 0;
+		enq_req.addr_offset = ACC100_DESC_OFFSET;
+		enq_req.num_elem = 1;
+		enq_req.req_elem_addr = 0;
+		rte_wmb();
+		acc100_reg_write(d, HWPfQmgrIngressAq + 0x100, enq_req.val);
+		usleep(LONG_WAIT * 100);
+		if (desc->req.word0 != 2)
+			printf("DMA Response %#"PRIx32"\n", desc->req.word0);
+	}
+
+	/* Reset LDPC Cores */
+	for (i = 0; i < ACC100_ENGINES_MAX; i++)
+		acc100_reg_write(d, HWPfFecUl5gCntrlReg +
+				ACC100_ENGINE_OFFSET * i, ACC100_RESET_HI);
+	usleep(LONG_WAIT);
+	for (i = 0; i < ACC100_ENGINES_MAX; i++)
+		acc100_reg_write(d, HWPfFecUl5gCntrlReg +
+				ACC100_ENGINE_OFFSET * i, ACC100_RESET_LO);
+	usleep(LONG_WAIT);
+	acc100_reg_write(d, HWPfHi5GHardResetReg, ACC100_RESET_HARD);
+	usleep(LONG_WAIT);
+	int numEngines = 0;
+	/* Check engine power-on status again */
+	for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
+			template_idx++) {
+		address = HwPfFecUl5gIbDebugReg +
+				ACC100_ENGINE_OFFSET * template_idx;
+		status = (acc100_reg_read(d, address) >> 4) & 0xF;
+		address = HWPfQmgrGrpTmplateReg4Indx
+				+ BYTES_IN_WORD * template_idx;
+		if (status == 1) {
+			acc100_reg_write(d, address, payload);
+			numEngines++;
+		} else
+			acc100_reg_write(d, address, 0);
+	}
+	printf("Number of 5GUL engines %d\n", numEngines);
+
+	if (d->sw_rings_base != NULL)
+		rte_free(d->sw_rings_base);
+	usleep(LONG_WAIT);
+}
+
+/* Initial configuration of a ACC100 device prior to running configure() */
+int
+acc100_configure(const char *dev_name, struct acc100_conf *conf)
+{
+	rte_bbdev_log(INFO, "acc100_configure");
+	uint32_t payload, address, status;
+	int qg_idx, template_idx, vf_idx, acc, i;
+	struct rte_bbdev *bbdev = rte_bbdev_get_named_dev(dev_name);
+
+	/* Compile time checks */
+	RTE_BUILD_BUG_ON(sizeof(struct acc100_dma_req_desc) != 256);
+	RTE_BUILD_BUG_ON(sizeof(union acc100_dma_desc) != 256);
+	RTE_BUILD_BUG_ON(sizeof(struct acc100_fcw_td) != 24);
+	RTE_BUILD_BUG_ON(sizeof(struct acc100_fcw_te) != 32);
+
+	if (bbdev == NULL) {
+		rte_bbdev_log(ERR,
+		"Invalid dev_name (%s), or device is not yet initialised",
+		dev_name);
+		return -ENODEV;
+	}
+	struct acc100_device *d = bbdev->data->dev_private;
+
+	/* Store configuration */
+	rte_memcpy(&d->acc100_conf, conf, sizeof(d->acc100_conf));
+
+	/* PCIe Bridge configuration */
+	acc100_reg_write(d, HwPfPcieGpexBridgeControl, ACC100_CFG_PCI_BRIDGE);
+	for (i = 1; i < 17; i++)
+		acc100_reg_write(d,
+				HwPfPcieGpexAxiAddrMappingWindowPexBaseHigh
+				+ i * 16, 0);
+
+	/* PCIe Link Trainiing and Status State Machine */
+	acc100_reg_write(d, HwPfPcieGpexLtssmStateCntrl, 0xDFC00000);
+
+	/* Prevent blocking AXI read on BRESP for AXI Write */
+	address = HwPfPcieGpexAxiPioControl;
+	payload = ACC100_CFG_PCI_AXI;
+	acc100_reg_write(d, address, payload);
+
+	/* 5GDL PLL phase shift */
+	acc100_reg_write(d, HWPfChaDl5gPllPhshft0, 0x1);
+
+	/* Explicitly releasing AXI as this may be stopped after PF FLR/BME */
+	address = HWPfDmaAxiControl;
+	payload = 1;
+	acc100_reg_write(d, address, payload);
+
+	/* DDR Configuration */
+	address = HWPfDdrBcTim6;
+	payload = acc100_reg_read(d, address);
+	payload &= 0xFFFFFFFB; /* Bit 2 */
+#ifdef ACC100_DDR_ECC_ENABLE
+	payload |= 0x4;
+#endif
+	acc100_reg_write(d, address, payload);
+	address = HWPfDdrPhyDqsCountNum;
+#ifdef ACC100_DDR_ECC_ENABLE
+	payload = 9;
+#else
+	payload = 8;
+#endif
+	acc100_reg_write(d, address, payload);
+
+	/* Set default descriptor signature */
+	address = HWPfDmaDescriptorSignatuture;
+	payload = 0;
+	acc100_reg_write(d, address, payload);
+
+	/* Enable the Error Detection in DMA */
+	payload = ACC100_CFG_DMA_ERROR;
+	address = HWPfDmaErrorDetectionEn;
+	acc100_reg_write(d, address, payload);
+
+	/* AXI Cache configuration */
+	payload = ACC100_CFG_AXI_CACHE;
+	address = HWPfDmaAxcacheReg;
+	acc100_reg_write(d, address, payload);
+
+	/* Default DMA Configuration (Qmgr Enabled) */
+	address = HWPfDmaConfig0Reg;
+	payload = 0;
+	acc100_reg_write(d, address, payload);
+	address = HWPfDmaQmanen;
+	payload = 0;
+	acc100_reg_write(d, address, payload);
+
+	/* Default RLIM/ALEN configuration */
+	address = HWPfDmaConfig1Reg;
+	payload = (1 << 31) + (23 << 8) + (1 << 6) + 7;
+	acc100_reg_write(d, address, payload);
+
+	/* Configure DMA Qmanager addresses */
+	address = HWPfDmaQmgrAddrReg;
+	payload = HWPfQmgrEgressQueuesTemplate;
+	acc100_reg_write(d, address, payload);
+
+	/* ===== Qmgr Configuration ===== */
+	/* Configuration of the AQueue Depth QMGR_GRP_0_DEPTH_LOG2 for UL */
+	int totalQgs = conf->q_ul_4g.num_qgroups +
+			conf->q_ul_5g.num_qgroups +
+			conf->q_dl_4g.num_qgroups +
+			conf->q_dl_5g.num_qgroups;
+	for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
+		address = HWPfQmgrDepthLog2Grp +
+		BYTES_IN_WORD * qg_idx;
+		payload = aqDepth(qg_idx, conf);
+		acc100_reg_write(d, address, payload);
+		address = HWPfQmgrTholdGrp +
+		BYTES_IN_WORD * qg_idx;
+		payload = (1 << 16) + (1 << (aqDepth(qg_idx, conf) - 1));
+		acc100_reg_write(d, address, payload);
+	}
+
+	/* Template Priority in incremental order */
+	for (template_idx = 0; template_idx < ACC100_NUM_TMPL;
+			template_idx++) {
+		address = HWPfQmgrGrpTmplateReg0Indx +
+		BYTES_IN_WORD * (template_idx % 8);
+		payload = TMPL_PRI_0;
+		acc100_reg_write(d, address, payload);
+		address = HWPfQmgrGrpTmplateReg1Indx +
+		BYTES_IN_WORD * (template_idx % 8);
+		payload = TMPL_PRI_1;
+		acc100_reg_write(d, address, payload);
+		address = HWPfQmgrGrpTmplateReg2indx +
+		BYTES_IN_WORD * (template_idx % 8);
+		payload = TMPL_PRI_2;
+		acc100_reg_write(d, address, payload);
+		address = HWPfQmgrGrpTmplateReg3Indx +
+		BYTES_IN_WORD * (template_idx % 8);
+		payload = TMPL_PRI_3;
+		acc100_reg_write(d, address, payload);
+	}
+
+	address = HWPfQmgrGrpPriority;
+	payload = ACC100_CFG_QMGR_HI_P;
+	acc100_reg_write(d, address, payload);
+
+	/* Template Configuration */
+	for (template_idx = 0; template_idx < ACC100_NUM_TMPL; template_idx++) {
+		payload = 0;
+		address = HWPfQmgrGrpTmplateReg4Indx
+				+ BYTES_IN_WORD * template_idx;
+		acc100_reg_write(d, address, payload);
+	}
+	/* 4GUL */
+	int numQgs = conf->q_ul_4g.num_qgroups;
+	int numQqsAcc = 0;
+	payload = 0;
+	for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++)
+		payload |= (1 << qg_idx);
+	for (template_idx = SIG_UL_4G; template_idx <= SIG_UL_4G_LAST;
+			template_idx++) {
+		address = HWPfQmgrGrpTmplateReg4Indx
+				+ BYTES_IN_WORD*template_idx;
+		acc100_reg_write(d, address, payload);
+	}
+	/* 5GUL */
+	numQqsAcc += numQgs;
+	numQgs	= conf->q_ul_5g.num_qgroups;
+	payload = 0;
+	int numEngines = 0;
+	for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++)
+		payload |= (1 << qg_idx);
+	for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
+			template_idx++) {
+		/* Check engine power-on status */
+		address = HwPfFecUl5gIbDebugReg +
+				ACC100_ENGINE_OFFSET * template_idx;
+		status = (acc100_reg_read(d, address) >> 4) & 0xF;
+		address = HWPfQmgrGrpTmplateReg4Indx
+				+ BYTES_IN_WORD * template_idx;
+		if (status == 1) {
+			acc100_reg_write(d, address, payload);
+			numEngines++;
+		} else
+			acc100_reg_write(d, address, 0);
+		#if RTE_ACC100_SINGLE_FEC == 1
+		payload = 0;
+		#endif
+	}
+	printf("Number of 5GUL engines %d\n", numEngines);
+	/* 4GDL */
+	numQqsAcc += numQgs;
+	numQgs	= conf->q_dl_4g.num_qgroups;
+	payload = 0;
+	for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++)
+		payload |= (1 << qg_idx);
+	for (template_idx = SIG_DL_4G; template_idx <= SIG_DL_4G_LAST;
+			template_idx++) {
+		address = HWPfQmgrGrpTmplateReg4Indx
+				+ BYTES_IN_WORD*template_idx;
+		acc100_reg_write(d, address, payload);
+		#if RTE_ACC100_SINGLE_FEC == 1
+			payload = 0;
+		#endif
+	}
+	/* 5GDL */
+	numQqsAcc += numQgs;
+	numQgs	= conf->q_dl_5g.num_qgroups;
+	payload = 0;
+	for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++)
+		payload |= (1 << qg_idx);
+	for (template_idx = SIG_DL_5G; template_idx <= SIG_DL_5G_LAST;
+			template_idx++) {
+		address = HWPfQmgrGrpTmplateReg4Indx
+				+ BYTES_IN_WORD*template_idx;
+		acc100_reg_write(d, address, payload);
+		#if RTE_ACC100_SINGLE_FEC == 1
+		payload = 0;
+		#endif
+	}
+
+	/* Queue Group Function mapping */
+	int qman_func_id[5] = {0, 2, 1, 3, 4};
+	address = HWPfQmgrGrpFunction0;
+	payload = 0;
+	for (qg_idx = 0; qg_idx < 8; qg_idx++) {
+		acc = accFromQgid(qg_idx, conf);
+		payload |= qman_func_id[acc]<<(qg_idx * 4);
+	}
+	acc100_reg_write(d, address, payload);
+
+	/* Configuration of the Arbitration QGroup depth to 1 */
+	for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
+		address = HWPfQmgrArbQDepthGrp +
+		BYTES_IN_WORD * qg_idx;
+		payload = 0;
+		acc100_reg_write(d, address, payload);
+	}
+
+	/* Enabling AQueues through the Queue hierarchy*/
+	for (vf_idx = 0; vf_idx < ACC100_NUM_VFS; vf_idx++) {
+		for (qg_idx = 0; qg_idx < ACC100_NUM_QGRPS; qg_idx++) {
+			payload = 0;
+			if (vf_idx < conf->num_vf_bundles &&
+					qg_idx < totalQgs)
+				payload = (1 << aqNum(qg_idx, conf)) - 1;
+			address = HWPfQmgrAqEnableVf
+					+ vf_idx * BYTES_IN_WORD;
+			payload += (qg_idx << 16);
+			acc100_reg_write(d, address, payload);
+		}
+	}
+
+	/* This pointer to ARAM (256kB) is shifted by 2 (4B per register) */
+	uint32_t aram_address = 0;
+	for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
+		for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) {
+			address = HWPfQmgrVfBaseAddr + vf_idx
+					* BYTES_IN_WORD + qg_idx
+					* BYTES_IN_WORD * 64;
+			payload = aram_address;
+			acc100_reg_write(d, address, payload);
+			/* Offset ARAM Address for next memory bank
+			 * - increment of 4B
+			 */
+			aram_address += aqNum(qg_idx, conf) *
+					(1 << aqDepth(qg_idx, conf));
+		}
+	}
+
+	if (aram_address > WORDS_IN_ARAM_SIZE) {
+		rte_bbdev_log(ERR, "ARAM Configuration not fitting %d %d\n",
+				aram_address, WORDS_IN_ARAM_SIZE);
+		return -EINVAL;
+	}
+
+	/* ==== HI Configuration ==== */
+
+	/* Prevent Block on Transmit Error */
+	address = HWPfHiBlockTransmitOnErrorEn;
+	payload = 0;
+	acc100_reg_write(d, address, payload);
+	/* Prevents to drop MSI */
+	address = HWPfHiMsiDropEnableReg;
+	payload = 0;
+	acc100_reg_write(d, address, payload);
+	/* Set the PF Mode register */
+	address = HWPfHiPfMode;
+	payload = (conf->pf_mode_en) ? 2 : 0;
+	acc100_reg_write(d, address, payload);
+	/* Enable Error Detection in HW */
+	address = HWPfDmaErrorDetectionEn;
+	payload = 0x3D7;
+	acc100_reg_write(d, address, payload);
+
+	/* QoS overflow init */
+	payload = 1;
+	address = HWPfQosmonAEvalOverflow0;
+	acc100_reg_write(d, address, payload);
+	address = HWPfQosmonBEvalOverflow0;
+	acc100_reg_write(d, address, payload);
+
+	/* HARQ DDR Configuration */
+	unsigned int ddrSizeInMb = 512; /* Fixed to 512 MB per VF for now */
+	for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) {
+		address = HWPfDmaVfDdrBaseRw + vf_idx
+				* 0x10;
+		payload = ((vf_idx * (ddrSizeInMb / 64)) << 16) +
+				(ddrSizeInMb - 1);
+		acc100_reg_write(d, address, payload);
+	}
+	usleep(LONG_WAIT);
+
+	if (numEngines < (SIG_UL_5G_LAST + 1))
+		poweron_cleanup(bbdev, d, conf);
+
+	rte_bbdev_log_debug("PF Tip configuration complete for %s", dev_name);
+	return 0;
+}
diff --git a/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map b/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
index 4a76d1d..91c234d 100644
--- a/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
+++ b/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
@@ -1,3 +1,10 @@ 
 DPDK_21 {
 	local: *;
 };
+
+EXPERIMENTAL {
+	global:
+
+	acc100_configure;
+
+};