[v9,10/10] baseband/acc100: add configure function
Checks
Commit Message
Add configure function to configure the PF from within
the bbdev-test itself without external application
configuration the device.
Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
Acked-by: Liu Tianjiao <Tianjiao.liu@intel.com>
---
app/test-bbdev/test_bbdev_perf.c | 72 +++
doc/guides/rel_notes/release_20_11.rst | 5 +
drivers/baseband/acc100/meson.build | 2 +
drivers/baseband/acc100/rte_acc100_cfg.h | 17 +
drivers/baseband/acc100/rte_acc100_pmd.c | 505 +++++++++++++++++++++
.../acc100/rte_pmd_bbdev_acc100_version.map | 7 +
6 files changed, 608 insertions(+)
Comments
On 9/28/20 5:29 PM, Nicolas Chautru wrote:
> Add configure function to configure the PF from within
> the bbdev-test itself without external application
> configuration the device.
>
> Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
> Acked-by: Liu Tianjiao <Tianjiao.liu@intel.com>
> ---
> app/test-bbdev/test_bbdev_perf.c | 72 +++
> doc/guides/rel_notes/release_20_11.rst | 5 +
> drivers/baseband/acc100/meson.build | 2 +
> drivers/baseband/acc100/rte_acc100_cfg.h | 17 +
> drivers/baseband/acc100/rte_acc100_pmd.c | 505 +++++++++++++++++++++
> .../acc100/rte_pmd_bbdev_acc100_version.map | 7 +
> 6 files changed, 608 insertions(+)
>
> diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c
> index 45c0d62..32f23ff 100644
> --- a/app/test-bbdev/test_bbdev_perf.c
> +++ b/app/test-bbdev/test_bbdev_perf.c
> @@ -52,6 +52,18 @@
> #define FLR_5G_TIMEOUT 610
> #endif
>
> +#ifdef RTE_LIBRTE_PMD_BBDEV_ACC100
> +#include <rte_acc100_cfg.h>
> +#define ACC100PF_DRIVER_NAME ("intel_acc100_pf")
> +#define ACC100VF_DRIVER_NAME ("intel_acc100_vf")
> +#define ACC100_QMGR_NUM_AQS 16
> +#define ACC100_QMGR_NUM_QGS 2
> +#define ACC100_QMGR_AQ_DEPTH 5
> +#define ACC100_QMGR_INVALID_IDX -1
> +#define ACC100_QMGR_RR 1
> +#define ACC100_QOS_GBR 0
> +#endif
> +
> #define OPS_CACHE_SIZE 256U
> #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
>
> @@ -653,6 +665,66 @@ typedef int (test_case_function)(struct active_device *ad,
> info->dev_name);
> }
> #endif
> +#ifdef RTE_LIBRTE_PMD_BBDEV_ACC100
seems like this function would break if one of the other bbdev's were #defined.
> + if ((get_init_device() == true) &&
> + (!strcmp(info->drv.driver_name, ACC100PF_DRIVER_NAME))) {
> + struct acc100_conf conf;
> + unsigned int i;
> +
> + printf("Configure ACC100 FEC Driver %s with default values\n",
> + info->drv.driver_name);
> +
> + /* clear default configuration before initialization */
> + memset(&conf, 0, sizeof(struct acc100_conf));
> +
> + /* Always set in PF mode for built-in configuration */
> + conf.pf_mode_en = true;
> + for (i = 0; i < RTE_ACC100_NUM_VFS; ++i) {
> + conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
> + conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
> + conf.arb_dl_4g[i].round_robin_weight = ACC100_QMGR_RR;
> + conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
> + conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
> + conf.arb_ul_4g[i].round_robin_weight = ACC100_QMGR_RR;
> + conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
> + conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
> + conf.arb_dl_5g[i].round_robin_weight = ACC100_QMGR_RR;
> + conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
> + conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
> + conf.arb_ul_5g[i].round_robin_weight = ACC100_QMGR_RR;
> + }
> +
> + conf.input_pos_llr_1_bit = true;
> + conf.output_pos_llr_1_bit = true;
> + conf.num_vf_bundles = 1; /**< Number of VF bundles to setup */
> +
> + conf.q_ul_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
> + conf.q_ul_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
> + conf.q_ul_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
> + conf.q_ul_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
> + conf.q_dl_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
> + conf.q_dl_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
> + conf.q_dl_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
> + conf.q_dl_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
> + conf.q_ul_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
> + conf.q_ul_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
> + conf.q_ul_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
> + conf.q_ul_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
> + conf.q_dl_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
> + conf.q_dl_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
> + conf.q_dl_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
> + conf.q_dl_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
> +
> + /* setup PF with configuration information */
> + ret = acc100_configure(info->dev_name, &conf);
> + TEST_ASSERT_SUCCESS(ret,
> + "Failed to configure ACC100 PF for bbdev %s",
> + info->dev_name);
> + /* Let's refresh this now this is configured */
> + }
> + rte_bbdev_info_get(dev_id, info);
The other bbdev's do not call rte_bbdev_info_get, can this be removed ?
> +#endif
> +
> nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
> nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
>
> diff --git a/doc/guides/rel_notes/release_20_11.rst b/doc/guides/rel_notes/release_20_11.rst
> index 73ac08f..c8d0586 100644
> --- a/doc/guides/rel_notes/release_20_11.rst
> +++ b/doc/guides/rel_notes/release_20_11.rst
> @@ -55,6 +55,11 @@ New Features
> Also, make sure to start the actual text at the margin.
> =======================================================
>
> +* **Added Intel ACC100 bbdev PMD.**
> +
> + Added a new ``acc100`` bbdev driver for the Intel\ |reg| ACC100 accelerator
> + also known as Mount Bryce. See the
> + :doc:`../bbdevs/acc100` BBDEV guide for more details on this new driver.
>
> Removed Items
> -------------
> diff --git a/drivers/baseband/acc100/meson.build b/drivers/baseband/acc100/meson.build
> index 8afafc2..7ac44dc 100644
> --- a/drivers/baseband/acc100/meson.build
> +++ b/drivers/baseband/acc100/meson.build
> @@ -4,3 +4,5 @@
> deps += ['bbdev', 'bus_vdev', 'ring', 'pci', 'bus_pci']
>
> sources = files('rte_acc100_pmd.c')
> +
> +install_headers('rte_acc100_cfg.h')
> diff --git a/drivers/baseband/acc100/rte_acc100_cfg.h b/drivers/baseband/acc100/rte_acc100_cfg.h
> index 73bbe36..7f523bc 100644
> --- a/drivers/baseband/acc100/rte_acc100_cfg.h
> +++ b/drivers/baseband/acc100/rte_acc100_cfg.h
> @@ -89,6 +89,23 @@ struct acc100_conf {
> struct rte_arbitration_t arb_dl_5g[RTE_ACC100_NUM_VFS];
> };
>
> +/**
> + * Configure a ACC100 device
> + *
> + * @param dev_name
> + * The name of the device. This is the short form of PCI BDF, e.g. 00:01.0.
> + * It can also be retrieved for a bbdev device from the dev_name field in the
> + * rte_bbdev_info structure returned by rte_bbdev_info_get().
> + * @param conf
> + * Configuration to apply to ACC100 HW.
> + *
> + * @return
> + * Zero on success, negative value on failure.
> + */
> +__rte_experimental
> +int
> +acc100_configure(const char *dev_name, struct acc100_conf *conf);
> +
> #ifdef __cplusplus
> }
> #endif
> diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c b/drivers/baseband/acc100/rte_acc100_pmd.c
> index 3589814..b50dd32 100644
> --- a/drivers/baseband/acc100/rte_acc100_pmd.c
> +++ b/drivers/baseband/acc100/rte_acc100_pmd.c
> @@ -85,6 +85,26 @@
>
> enum {UL_4G = 0, UL_5G, DL_4G, DL_5G, NUM_ACC};
>
> +/* Return the accelerator enum for a Queue Group Index */
> +static inline int
> +accFromQgid(int qg_idx, const struct acc100_conf *acc100_conf)
> +{
> + int accQg[ACC100_NUM_QGRPS];
> + int NumQGroupsPerFn[NUM_ACC];
> + int acc, qgIdx, qgIndex = 0;
> + for (qgIdx = 0; qgIdx < ACC100_NUM_QGRPS; qgIdx++)
> + accQg[qgIdx] = 0;
> + NumQGroupsPerFn[UL_4G] = acc100_conf->q_ul_4g.num_qgroups;
> + NumQGroupsPerFn[UL_5G] = acc100_conf->q_ul_5g.num_qgroups;
> + NumQGroupsPerFn[DL_4G] = acc100_conf->q_dl_4g.num_qgroups;
> + NumQGroupsPerFn[DL_5G] = acc100_conf->q_dl_5g.num_qgroups;
> + for (acc = UL_4G; acc < NUM_ACC; acc++)
> + for (qgIdx = 0; qgIdx < NumQGroupsPerFn[acc]; qgIdx++)
> + accQg[qgIndex++] = acc;
This looks inefficient, is there a way this could be calculated without filling arrays to
access 1 value ?
> + acc = accQg[qg_idx];
> + return acc;
> +}
> +
> /* Return the queue topology for a Queue Group Index */
> static inline void
> qtopFromAcc(struct rte_q_topology_t **qtop, int acc_enum,
> @@ -113,6 +133,30 @@
> *qtop = p_qtop;
> }
>
> +/* Return the AQ depth for a Queue Group Index */
> +static inline int
> +aqDepth(int qg_idx, struct acc100_conf *acc100_conf)
> +{
> + struct rte_q_topology_t *q_top = NULL;
> + int acc_enum = accFromQgid(qg_idx, acc100_conf);
> + qtopFromAcc(&q_top, acc_enum, acc100_conf);
> + if (unlikely(q_top == NULL))
> + return 0;
This error is not handled well be the callers.
aqNum is similar.
> + return q_top->aq_depth_log2;
> +}
> +
> +/* Return the AQ depth for a Queue Group Index */
> +static inline int
> +aqNum(int qg_idx, struct acc100_conf *acc100_conf)
> +{
> + struct rte_q_topology_t *q_top = NULL;
> + int acc_enum = accFromQgid(qg_idx, acc100_conf);
> + qtopFromAcc(&q_top, acc_enum, acc100_conf);
> + if (unlikely(q_top == NULL))
> + return 0;
> + return q_top->num_aqs_per_groups;
> +}
> +
> static void
> initQTop(struct acc100_conf *acc100_conf)
> {
> @@ -4177,3 +4221,464 @@ static int acc100_pci_remove(struct rte_pci_device *pci_dev)
> RTE_PMD_REGISTER_PCI_TABLE(ACC100PF_DRIVER_NAME, pci_id_acc100_pf_map);
> RTE_PMD_REGISTER_PCI(ACC100VF_DRIVER_NAME, acc100_pci_vf_driver);
> RTE_PMD_REGISTER_PCI_TABLE(ACC100VF_DRIVER_NAME, pci_id_acc100_vf_map);
> +
> +/*
> + * Implementation to fix the power on status of some 5GUL engines
> + * This requires DMA permission if ported outside DPDK
This sounds like a workaround, can more detail be added here ?
> + */
> +static void
> +poweron_cleanup(struct rte_bbdev *bbdev, struct acc100_device *d,
> + struct acc100_conf *conf)
> +{
> + int i, template_idx, qg_idx;
> + uint32_t address, status, payload;
> + printf("Need to clear power-on 5GUL status in internal memory\n");
> + /* Reset LDPC Cores */
> + for (i = 0; i < ACC100_ENGINES_MAX; i++)
> + acc100_reg_write(d, HWPfFecUl5gCntrlReg +
> + ACC100_ENGINE_OFFSET * i, ACC100_RESET_HI);
> + usleep(LONG_WAIT);
> + for (i = 0; i < ACC100_ENGINES_MAX; i++)
> + acc100_reg_write(d, HWPfFecUl5gCntrlReg +
> + ACC100_ENGINE_OFFSET * i, ACC100_RESET_LO);
> + usleep(LONG_WAIT);
> + /* Prepare dummy workload */
> + alloc_2x64mb_sw_rings_mem(bbdev, d, 0);
> + /* Set base addresses */
> + uint32_t phys_high = (uint32_t)(d->sw_rings_phys >> 32);
> + uint32_t phys_low = (uint32_t)(d->sw_rings_phys &
> + ~(ACC100_SIZE_64MBYTE-1));
> + acc100_reg_write(d, HWPfDmaFec5GulDescBaseHiRegVf, phys_high);
> + acc100_reg_write(d, HWPfDmaFec5GulDescBaseLoRegVf, phys_low);
> +
> + /* Descriptor for a dummy 5GUL code block processing*/
> + union acc100_dma_desc *desc = NULL;
> + desc = d->sw_rings;
> + desc->req.data_ptrs[0].address = d->sw_rings_phys +
> + ACC100_DESC_FCW_OFFSET;
> + desc->req.data_ptrs[0].blen = ACC100_FCW_LD_BLEN;
> + desc->req.data_ptrs[0].blkid = ACC100_DMA_BLKID_FCW;
> + desc->req.data_ptrs[0].last = 0;
> + desc->req.data_ptrs[0].dma_ext = 0;
> + desc->req.data_ptrs[1].address = d->sw_rings_phys + 512;
> + desc->req.data_ptrs[1].blkid = ACC100_DMA_BLKID_IN;
> + desc->req.data_ptrs[1].last = 1;
> + desc->req.data_ptrs[1].dma_ext = 0;
> + desc->req.data_ptrs[1].blen = 44;
> + desc->req.data_ptrs[2].address = d->sw_rings_phys + 1024;
> + desc->req.data_ptrs[2].blkid = ACC100_DMA_BLKID_OUT_ENC;
> + desc->req.data_ptrs[2].last = 1;
> + desc->req.data_ptrs[2].dma_ext = 0;
> + desc->req.data_ptrs[2].blen = 5;
> + /* Dummy FCW */
> + desc->req.fcw_ld.FCWversion = ACC100_FCW_VER;
> + desc->req.fcw_ld.qm = 1;
> + desc->req.fcw_ld.nfiller = 30;
> + desc->req.fcw_ld.BG = 2 - 1;
> + desc->req.fcw_ld.Zc = 7;
> + desc->req.fcw_ld.ncb = 350;
> + desc->req.fcw_ld.rm_e = 4;
> + desc->req.fcw_ld.itmax = 10;
> + desc->req.fcw_ld.gain_i = 1;
> + desc->req.fcw_ld.gain_h = 1;
> +
> + int engines_to_restart[SIG_UL_5G_LAST + 1] = {0};
> + int num_failed_engine = 0;
> + /* Detect engines in undefined state */
> + for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
> + template_idx++) {
> + /* Check engine power-on status */
> + address = HwPfFecUl5gIbDebugReg +
> + ACC100_ENGINE_OFFSET * template_idx;
> + status = (acc100_reg_read(d, address) >> 4) & 0xF;
> + if (status == 0) {
> + engines_to_restart[num_failed_engine] = template_idx;
> + num_failed_engine++;
> + }
> + }
> +
> + int numQqsAcc = conf->q_ul_5g.num_qgroups;
> + int numQgs = conf->q_ul_5g.num_qgroups;
> + payload = 0;
> + for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++)
> + payload |= (1 << qg_idx);
> + /* Force each engine which is in unspecified state */
> + for (i = 0; i < num_failed_engine; i++) {
> + int failed_engine = engines_to_restart[i];
> + printf("Force engine %d\n", failed_engine);
> + for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
> + template_idx++) {
> + address = HWPfQmgrGrpTmplateReg4Indx
> + + BYTES_IN_WORD * template_idx;
> + if (template_idx == failed_engine)
> + acc100_reg_write(d, address, payload);
> + else
> + acc100_reg_write(d, address, 0);
> + }
> + /* Reset descriptor header */
> + desc->req.word0 = ACC100_DMA_DESC_TYPE;
> + desc->req.word1 = 0;
> + desc->req.word2 = 0;
> + desc->req.word3 = 0;
> + desc->req.numCBs = 1;
> + desc->req.m2dlen = 2;
> + desc->req.d2mlen = 1;
> + /* Enqueue the code block for processing */
> + union acc100_enqueue_reg_fmt enq_req;
> + enq_req.val = 0;
> + enq_req.addr_offset = ACC100_DESC_OFFSET;
> + enq_req.num_elem = 1;
> + enq_req.req_elem_addr = 0;
> + rte_wmb();
> + acc100_reg_write(d, HWPfQmgrIngressAq + 0x100, enq_req.val);
> + usleep(LONG_WAIT * 100);
> + if (desc->req.word0 != 2)
> + printf("DMA Response %#"PRIx32"\n", desc->req.word0);
> + }
> +
> + /* Reset LDPC Cores */
> + for (i = 0; i < ACC100_ENGINES_MAX; i++)
> + acc100_reg_write(d, HWPfFecUl5gCntrlReg +
> + ACC100_ENGINE_OFFSET * i, ACC100_RESET_HI);
> + usleep(LONG_WAIT);
> + for (i = 0; i < ACC100_ENGINES_MAX; i++)
> + acc100_reg_write(d, HWPfFecUl5gCntrlReg +
> + ACC100_ENGINE_OFFSET * i, ACC100_RESET_LO);
> + usleep(LONG_WAIT);
> + acc100_reg_write(d, HWPfHi5GHardResetReg, ACC100_RESET_HARD);
> + usleep(LONG_WAIT);
> + int numEngines = 0;
> + /* Check engine power-on status again */
> + for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
> + template_idx++) {
> + address = HwPfFecUl5gIbDebugReg +
> + ACC100_ENGINE_OFFSET * template_idx;
> + status = (acc100_reg_read(d, address) >> 4) & 0xF;
> + address = HWPfQmgrGrpTmplateReg4Indx
> + + BYTES_IN_WORD * template_idx;
> + if (status == 1) {
> + acc100_reg_write(d, address, payload);
> + numEngines++;
> + } else
> + acc100_reg_write(d, address, 0);
> + }
> + printf("Number of 5GUL engines %d\n", numEngines);
> +
> + if (d->sw_rings_base != NULL)
> + rte_free(d->sw_rings_base);
> + usleep(LONG_WAIT);
> +}
> +
> +/* Initial configuration of a ACC100 device prior to running configure() */
> +int
> +acc100_configure(const char *dev_name, struct acc100_conf *conf)
> +{
> + rte_bbdev_log(INFO, "acc100_configure");
> + uint32_t payload, address, status;
maybe value or data would be a better variable name than payload.
would mean changing acc100_reg_write
> + int qg_idx, template_idx, vf_idx, acc, i;
> + struct rte_bbdev *bbdev = rte_bbdev_get_named_dev(dev_name);
> +
> + /* Compile time checks */
> + RTE_BUILD_BUG_ON(sizeof(struct acc100_dma_req_desc) != 256);
> + RTE_BUILD_BUG_ON(sizeof(union acc100_dma_desc) != 256);
> + RTE_BUILD_BUG_ON(sizeof(struct acc100_fcw_td) != 24);
> + RTE_BUILD_BUG_ON(sizeof(struct acc100_fcw_te) != 32);
> +
> + if (bbdev == NULL) {
> + rte_bbdev_log(ERR,
> + "Invalid dev_name (%s), or device is not yet initialised",
> + dev_name);
> + return -ENODEV;
> + }
> + struct acc100_device *d = bbdev->data->dev_private;
> +
> + /* Store configuration */
> + rte_memcpy(&d->acc100_conf, conf, sizeof(d->acc100_conf));
> +
> + /* PCIe Bridge configuration */
> + acc100_reg_write(d, HwPfPcieGpexBridgeControl, ACC100_CFG_PCI_BRIDGE);
> + for (i = 1; i < 17; i++)
17 is a magic number, use a #define
this is a general issue.
> + acc100_reg_write(d,
> + HwPfPcieGpexAxiAddrMappingWindowPexBaseHigh
> + + i * 16, 0);
> +
> + /* PCIe Link Trainiing and Status State Machine */
> + acc100_reg_write(d, HwPfPcieGpexLtssmStateCntrl, 0xDFC00000);
> +
> + /* Prevent blocking AXI read on BRESP for AXI Write */
> + address = HwPfPcieGpexAxiPioControl;
> + payload = ACC100_CFG_PCI_AXI;
> + acc100_reg_write(d, address, payload);
> +
> + /* 5GDL PLL phase shift */
> + acc100_reg_write(d, HWPfChaDl5gPllPhshft0, 0x1);
> +
> + /* Explicitly releasing AXI as this may be stopped after PF FLR/BME */
> + address = HWPfDmaAxiControl;
> + payload = 1;
> + acc100_reg_write(d, address, payload);
> +
> + /* DDR Configuration */
> + address = HWPfDdrBcTim6;
> + payload = acc100_reg_read(d, address);
> + payload &= 0xFFFFFFFB; /* Bit 2 */
> +#ifdef ACC100_DDR_ECC_ENABLE
> + payload |= 0x4;
> +#endif
> + acc100_reg_write(d, address, payload);
> + address = HWPfDdrPhyDqsCountNum;
> +#ifdef ACC100_DDR_ECC_ENABLE
> + payload = 9;
> +#else
> + payload = 8;
> +#endif
> + acc100_reg_write(d, address, payload);
> +
> + /* Set default descriptor signature */
> + address = HWPfDmaDescriptorSignatuture;
> + payload = 0;
> + acc100_reg_write(d, address, payload);
> +
> + /* Enable the Error Detection in DMA */
> + payload = ACC100_CFG_DMA_ERROR;
> + address = HWPfDmaErrorDetectionEn;
> + acc100_reg_write(d, address, payload);
> +
> + /* AXI Cache configuration */
> + payload = ACC100_CFG_AXI_CACHE;
> + address = HWPfDmaAxcacheReg;
> + acc100_reg_write(d, address, payload);
> +
> + /* Default DMA Configuration (Qmgr Enabled) */
> + address = HWPfDmaConfig0Reg;
> + payload = 0;
> + acc100_reg_write(d, address, payload);
> + address = HWPfDmaQmanen;
> + payload = 0;
> + acc100_reg_write(d, address, payload);
> +
> + /* Default RLIM/ALEN configuration */
> + address = HWPfDmaConfig1Reg;
> + payload = (1 << 31) + (23 << 8) + (1 << 6) + 7;
> + acc100_reg_write(d, address, payload);
> +
> + /* Configure DMA Qmanager addresses */
> + address = HWPfDmaQmgrAddrReg;
> + payload = HWPfQmgrEgressQueuesTemplate;
> + acc100_reg_write(d, address, payload);
> +
> + /* ===== Qmgr Configuration ===== */
> + /* Configuration of the AQueue Depth QMGR_GRP_0_DEPTH_LOG2 for UL */
> + int totalQgs = conf->q_ul_4g.num_qgroups +
> + conf->q_ul_5g.num_qgroups +
> + conf->q_dl_4g.num_qgroups +
> + conf->q_dl_5g.num_qgroups;
> + for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
> + address = HWPfQmgrDepthLog2Grp +
> + BYTES_IN_WORD * qg_idx;
> + payload = aqDepth(qg_idx, conf);
> + acc100_reg_write(d, address, payload);
> + address = HWPfQmgrTholdGrp +
> + BYTES_IN_WORD * qg_idx;
> + payload = (1 << 16) + (1 << (aqDepth(qg_idx, conf) - 1));
> + acc100_reg_write(d, address, payload);
> + }
> +
> + /* Template Priority in incremental order */
> + for (template_idx = 0; template_idx < ACC100_NUM_TMPL;
> + template_idx++) {
> + address = HWPfQmgrGrpTmplateReg0Indx +
> + BYTES_IN_WORD * (template_idx % 8);
> + payload = TMPL_PRI_0;
> + acc100_reg_write(d, address, payload);
> + address = HWPfQmgrGrpTmplateReg1Indx +
> + BYTES_IN_WORD * (template_idx % 8);
> + payload = TMPL_PRI_1;
> + acc100_reg_write(d, address, payload);
> + address = HWPfQmgrGrpTmplateReg2indx +
> + BYTES_IN_WORD * (template_idx % 8);
> + payload = TMPL_PRI_2;
> + acc100_reg_write(d, address, payload);
> + address = HWPfQmgrGrpTmplateReg3Indx +
> + BYTES_IN_WORD * (template_idx % 8);
> + payload = TMPL_PRI_3;
> + acc100_reg_write(d, address, payload);
> + }
> +
> + address = HWPfQmgrGrpPriority;
> + payload = ACC100_CFG_QMGR_HI_P;
> + acc100_reg_write(d, address, payload);
> +
> + /* Template Configuration */
> + for (template_idx = 0; template_idx < ACC100_NUM_TMPL; template_idx++) {
> + payload = 0;
> + address = HWPfQmgrGrpTmplateReg4Indx
> + + BYTES_IN_WORD * template_idx;
> + acc100_reg_write(d, address, payload);
> + }
> + /* 4GUL */
> + int numQgs = conf->q_ul_4g.num_qgroups;
> + int numQqsAcc = 0;
> + payload = 0;
> + for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++)
> + payload |= (1 << qg_idx);
> + for (template_idx = SIG_UL_4G; template_idx <= SIG_UL_4G_LAST;
> + template_idx++) {
> + address = HWPfQmgrGrpTmplateReg4Indx
> + + BYTES_IN_WORD*template_idx;
> + acc100_reg_write(d, address, payload);
> + }
> + /* 5GUL */
> + numQqsAcc += numQgs;
> + numQgs = conf->q_ul_5g.num_qgroups;
> + payload = 0;
> + int numEngines = 0;
> + for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++)
> + payload |= (1 << qg_idx);
> + for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
> + template_idx++) {
> + /* Check engine power-on status */
> + address = HwPfFecUl5gIbDebugReg +
> + ACC100_ENGINE_OFFSET * template_idx;
> + status = (acc100_reg_read(d, address) >> 4) & 0xF;
> + address = HWPfQmgrGrpTmplateReg4Indx
> + + BYTES_IN_WORD * template_idx;
> + if (status == 1) {
> + acc100_reg_write(d, address, payload);
> + numEngines++;
> + } else
> + acc100_reg_write(d, address, 0);
> + #if RTE_ACC100_SINGLE_FEC == 1
#if should be at start of line
> + payload = 0;
> + #endif
> + }
> + printf("Number of 5GUL engines %d\n", numEngines);
> + /* 4GDL */
> + numQqsAcc += numQgs;
> + numQgs = conf->q_dl_4g.num_qgroups;
> + payload = 0;
> + for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++)
> + payload |= (1 << qg_idx);
> + for (template_idx = SIG_DL_4G; template_idx <= SIG_DL_4G_LAST;
> + template_idx++) {
> + address = HWPfQmgrGrpTmplateReg4Indx
> + + BYTES_IN_WORD*template_idx;
> + acc100_reg_write(d, address, payload);
> + #if RTE_ACC100_SINGLE_FEC == 1
> + payload = 0;
> + #endif
> + }
> + /* 5GDL */
> + numQqsAcc += numQgs;
> + numQgs = conf->q_dl_5g.num_qgroups;
> + payload = 0;
> + for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++)
> + payload |= (1 << qg_idx);
> + for (template_idx = SIG_DL_5G; template_idx <= SIG_DL_5G_LAST;
> + template_idx++) {
> + address = HWPfQmgrGrpTmplateReg4Indx
> + + BYTES_IN_WORD*template_idx;
> + acc100_reg_write(d, address, payload);
> + #if RTE_ACC100_SINGLE_FEC == 1
> + payload = 0;
> + #endif
> + }
> +
> + /* Queue Group Function mapping */
> + int qman_func_id[5] = {0, 2, 1, 3, 4};
> + address = HWPfQmgrGrpFunction0;
> + payload = 0;
> + for (qg_idx = 0; qg_idx < 8; qg_idx++) {
> + acc = accFromQgid(qg_idx, conf);
> + payload |= qman_func_id[acc]<<(qg_idx * 4);
> + }
> + acc100_reg_write(d, address, payload);
> +
> + /* Configuration of the Arbitration QGroup depth to 1 */
> + for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
> + address = HWPfQmgrArbQDepthGrp +
> + BYTES_IN_WORD * qg_idx;
> + payload = 0;
> + acc100_reg_write(d, address, payload);
> + }
> +
> + /* Enabling AQueues through the Queue hierarchy*/
> + for (vf_idx = 0; vf_idx < ACC100_NUM_VFS; vf_idx++) {
> + for (qg_idx = 0; qg_idx < ACC100_NUM_QGRPS; qg_idx++) {
> + payload = 0;
> + if (vf_idx < conf->num_vf_bundles &&
> + qg_idx < totalQgs)
> + payload = (1 << aqNum(qg_idx, conf)) - 1;
> + address = HWPfQmgrAqEnableVf
> + + vf_idx * BYTES_IN_WORD;
> + payload += (qg_idx << 16);
> + acc100_reg_write(d, address, payload);
> + }
> + }
> +
> + /* This pointer to ARAM (256kB) is shifted by 2 (4B per register) */
> + uint32_t aram_address = 0;
> + for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
> + for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) {
> + address = HWPfQmgrVfBaseAddr + vf_idx
> + * BYTES_IN_WORD + qg_idx
> + * BYTES_IN_WORD * 64;
> + payload = aram_address;
> + acc100_reg_write(d, address, payload);
> + /* Offset ARAM Address for next memory bank
> + * - increment of 4B
> + */
> + aram_address += aqNum(qg_idx, conf) *
> + (1 << aqDepth(qg_idx, conf));
> + }
> + }
> +
> + if (aram_address > WORDS_IN_ARAM_SIZE) {
> + rte_bbdev_log(ERR, "ARAM Configuration not fitting %d %d\n",
> + aram_address, WORDS_IN_ARAM_SIZE);
> + return -EINVAL;
> + }
> +
> + /* ==== HI Configuration ==== */
> +
> + /* Prevent Block on Transmit Error */
> + address = HWPfHiBlockTransmitOnErrorEn;
> + payload = 0;
> + acc100_reg_write(d, address, payload);
> + /* Prevents to drop MSI */
> + address = HWPfHiMsiDropEnableReg;
> + payload = 0;
> + acc100_reg_write(d, address, payload);
> + /* Set the PF Mode register */
> + address = HWPfHiPfMode;
> + payload = (conf->pf_mode_en) ? 2 : 0;
> + acc100_reg_write(d, address, payload);
> + /* Enable Error Detection in HW */
> + address = HWPfDmaErrorDetectionEn;
> + payload = 0x3D7;
> + acc100_reg_write(d, address, payload);
> +
> + /* QoS overflow init */
> + payload = 1;
> + address = HWPfQosmonAEvalOverflow0;
> + acc100_reg_write(d, address, payload);
> + address = HWPfQosmonBEvalOverflow0;
> + acc100_reg_write(d, address, payload);
> +
> + /* HARQ DDR Configuration */
> + unsigned int ddrSizeInMb = 512; /* Fixed to 512 MB per VF for now */
> + for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) {
> + address = HWPfDmaVfDdrBaseRw + vf_idx
> + * 0x10;
> + payload = ((vf_idx * (ddrSizeInMb / 64)) << 16) +
> + (ddrSizeInMb - 1);
> + acc100_reg_write(d, address, payload);
> + }
> + usleep(LONG_WAIT);
Is sleep needed here ? the reg_write has one.
> +
Since this seems like a workaround, add a comment here.
Tom
> + if (numEngines < (SIG_UL_5G_LAST + 1))
> + poweron_cleanup(bbdev, d, conf);
> +
> + rte_bbdev_log_debug("PF Tip configuration complete for %s", dev_name);
> + return 0;
> +}
> diff --git a/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map b/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
> index 4a76d1d..91c234d 100644
> --- a/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
> +++ b/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
> @@ -1,3 +1,10 @@
> DPDK_21 {
> local: *;
> };
> +
> +EXPERIMENTAL {
> + global:
> +
> + acc100_configure;
> +
> +};
Hi Tom,
> From: Tom Rix <trix@redhat.com>
> On 9/28/20 5:29 PM, Nicolas Chautru wrote:
> > Add configure function to configure the PF from within the
> > bbdev-test itself without external application configuration the device.
> >
> > Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
> > Acked-by: Liu Tianjiao <Tianjiao.liu@intel.com>
> > ---
> > app/test-bbdev/test_bbdev_perf.c | 72 +++
> > doc/guides/rel_notes/release_20_11.rst | 5 +
> > drivers/baseband/acc100/meson.build | 2 +
> > drivers/baseband/acc100/rte_acc100_cfg.h | 17 +
> > drivers/baseband/acc100/rte_acc100_pmd.c | 505
> +++++++++++++++++++++
> > .../acc100/rte_pmd_bbdev_acc100_version.map | 7 +
> > 6 files changed, 608 insertions(+)
> >
> > diff --git a/app/test-bbdev/test_bbdev_perf.c
> > b/app/test-bbdev/test_bbdev_perf.c
> > index 45c0d62..32f23ff 100644
> > --- a/app/test-bbdev/test_bbdev_perf.c
> > +++ b/app/test-bbdev/test_bbdev_perf.c
> > @@ -52,6 +52,18 @@
> > #define FLR_5G_TIMEOUT 610
> > #endif
> >
> > +#ifdef RTE_LIBRTE_PMD_BBDEV_ACC100
> > +#include <rte_acc100_cfg.h>
> > +#define ACC100PF_DRIVER_NAME ("intel_acc100_pf")
> > +#define ACC100VF_DRIVER_NAME ("intel_acc100_vf")
> > +#define ACC100_QMGR_NUM_AQS 16
> > +#define ACC100_QMGR_NUM_QGS 2
> > +#define ACC100_QMGR_AQ_DEPTH 5
> > +#define ACC100_QMGR_INVALID_IDX -1
> > +#define ACC100_QMGR_RR 1
> > +#define ACC100_QOS_GBR 0
> > +#endif
> > +
> > #define OPS_CACHE_SIZE 256U
> > #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
> >
> > @@ -653,6 +665,66 @@ typedef int (test_case_function)(struct
> active_device *ad,
> > info->dev_name);
> > }
> > #endif
> > +#ifdef RTE_LIBRTE_PMD_BBDEV_ACC100
> seems like this function would break if one of the other bbdev's were
> #defined.
No these are independent. By default they are all defined.
> > + if ((get_init_device() == true) &&
> > + (!strcmp(info->drv.driver_name,
> ACC100PF_DRIVER_NAME))) {
> > + struct acc100_conf conf;
> > + unsigned int i;
> > +
> > + printf("Configure ACC100 FEC Driver %s with default
> values\n",
> > + info->drv.driver_name);
> > +
> > + /* clear default configuration before initialization */
> > + memset(&conf, 0, sizeof(struct acc100_conf));
> > +
> > + /* Always set in PF mode for built-in configuration */
> > + conf.pf_mode_en = true;
> > + for (i = 0; i < RTE_ACC100_NUM_VFS; ++i) {
> > + conf.arb_dl_4g[i].gbr_threshold1 =
> ACC100_QOS_GBR;
> > + conf.arb_dl_4g[i].gbr_threshold1 =
> ACC100_QOS_GBR;
> > + conf.arb_dl_4g[i].round_robin_weight =
> ACC100_QMGR_RR;
> > + conf.arb_ul_4g[i].gbr_threshold1 =
> ACC100_QOS_GBR;
> > + conf.arb_ul_4g[i].gbr_threshold1 =
> ACC100_QOS_GBR;
> > + conf.arb_ul_4g[i].round_robin_weight =
> ACC100_QMGR_RR;
> > + conf.arb_dl_5g[i].gbr_threshold1 =
> ACC100_QOS_GBR;
> > + conf.arb_dl_5g[i].gbr_threshold1 =
> ACC100_QOS_GBR;
> > + conf.arb_dl_5g[i].round_robin_weight =
> ACC100_QMGR_RR;
> > + conf.arb_ul_5g[i].gbr_threshold1 =
> ACC100_QOS_GBR;
> > + conf.arb_ul_5g[i].gbr_threshold1 =
> ACC100_QOS_GBR;
> > + conf.arb_ul_5g[i].round_robin_weight =
> ACC100_QMGR_RR;
> > + }
> > +
> > + conf.input_pos_llr_1_bit = true;
> > + conf.output_pos_llr_1_bit = true;
> > + conf.num_vf_bundles = 1; /**< Number of VF bundles to
> setup */
> > +
> > + conf.q_ul_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
> > + conf.q_ul_4g.first_qgroup_index =
> ACC100_QMGR_INVALID_IDX;
> > + conf.q_ul_4g.num_aqs_per_groups =
> ACC100_QMGR_NUM_AQS;
> > + conf.q_ul_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
> > + conf.q_dl_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
> > + conf.q_dl_4g.first_qgroup_index =
> ACC100_QMGR_INVALID_IDX;
> > + conf.q_dl_4g.num_aqs_per_groups =
> ACC100_QMGR_NUM_AQS;
> > + conf.q_dl_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
> > + conf.q_ul_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
> > + conf.q_ul_5g.first_qgroup_index =
> ACC100_QMGR_INVALID_IDX;
> > + conf.q_ul_5g.num_aqs_per_groups =
> ACC100_QMGR_NUM_AQS;
> > + conf.q_ul_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
> > + conf.q_dl_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
> > + conf.q_dl_5g.first_qgroup_index =
> ACC100_QMGR_INVALID_IDX;
> > + conf.q_dl_5g.num_aqs_per_groups =
> ACC100_QMGR_NUM_AQS;
> > + conf.q_dl_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
> > +
> > + /* setup PF with configuration information */
> > + ret = acc100_configure(info->dev_name, &conf);
> > + TEST_ASSERT_SUCCESS(ret,
> > + "Failed to configure ACC100 PF for bbdev
> %s",
> > + info->dev_name);
> > + /* Let's refresh this now this is configured */
> > + }
> > + rte_bbdev_info_get(dev_id, info);
> The other bbdev's do not call rte_bbdev_info_get, can this be removed ?
Actually it should be added outside for all versions post-configuraion. Thanks
> > +#endif
> > +
> > nb_queues = RTE_MIN(rte_lcore_count(), info- drv.max_num_queues);
> > nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
> >
> > diff --git a/doc/guides/rel_notes/release_20_11.rst
> > b/doc/guides/rel_notes/release_20_11.rst
> > index 73ac08f..c8d0586 100644
> > --- a/doc/guides/rel_notes/release_20_11.rst
> > +++ b/doc/guides/rel_notes/release_20_11.rst
> > @@ -55,6 +55,11 @@ New Features
> > Also, make sure to start the actual text at the margin.
> > =======================================================
> >
> > +* **Added Intel ACC100 bbdev PMD.**
> > +
> > + Added a new ``acc100`` bbdev driver for the Intel\ |reg| ACC100
> > + accelerator also known as Mount Bryce. See the
> > + :doc:`../bbdevs/acc100` BBDEV guide for more details on this new driver.
> >
> > Removed Items
> > -------------
> > diff --git a/drivers/baseband/acc100/meson.build
> > b/drivers/baseband/acc100/meson.build
> > index 8afafc2..7ac44dc 100644
> > --- a/drivers/baseband/acc100/meson.build
> > +++ b/drivers/baseband/acc100/meson.build
> > @@ -4,3 +4,5 @@
> > deps += ['bbdev', 'bus_vdev', 'ring', 'pci', 'bus_pci']
> >
> > sources = files('rte_acc100_pmd.c')
> > +
> > +install_headers('rte_acc100_cfg.h')
> > diff --git a/drivers/baseband/acc100/rte_acc100_cfg.h
> > b/drivers/baseband/acc100/rte_acc100_cfg.h
> > index 73bbe36..7f523bc 100644
> > --- a/drivers/baseband/acc100/rte_acc100_cfg.h
> > +++ b/drivers/baseband/acc100/rte_acc100_cfg.h
> > @@ -89,6 +89,23 @@ struct acc100_conf {
> > struct rte_arbitration_t arb_dl_5g[RTE_ACC100_NUM_VFS]; };
> >
> > +/**
> > + * Configure a ACC100 device
> > + *
> > + * @param dev_name
> > + * The name of the device. This is the short form of PCI BDF, e.g. 00:01.0.
> > + * It can also be retrieved for a bbdev device from the dev_name field in
> the
> > + * rte_bbdev_info structure returned by rte_bbdev_info_get().
> > + * @param conf
> > + * Configuration to apply to ACC100 HW.
> > + *
> > + * @return
> > + * Zero on success, negative value on failure.
> > + */
> > +__rte_experimental
> > +int
> > +acc100_configure(const char *dev_name, struct acc100_conf *conf);
> > +
> > #ifdef __cplusplus
> > }
> > #endif
> > diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c
> > b/drivers/baseband/acc100/rte_acc100_pmd.c
> > index 3589814..b50dd32 100644
> > --- a/drivers/baseband/acc100/rte_acc100_pmd.c
> > +++ b/drivers/baseband/acc100/rte_acc100_pmd.c
> > @@ -85,6 +85,26 @@
> >
> > enum {UL_4G = 0, UL_5G, DL_4G, DL_5G, NUM_ACC};
> >
> > +/* Return the accelerator enum for a Queue Group Index */ static
> > +inline int accFromQgid(int qg_idx, const struct acc100_conf
> > +*acc100_conf) {
> > + int accQg[ACC100_NUM_QGRPS];
> > + int NumQGroupsPerFn[NUM_ACC];
> > + int acc, qgIdx, qgIndex = 0;
> > + for (qgIdx = 0; qgIdx < ACC100_NUM_QGRPS; qgIdx++)
> > + accQg[qgIdx] = 0;
> > + NumQGroupsPerFn[UL_4G] = acc100_conf->q_ul_4g.num_qgroups;
> > + NumQGroupsPerFn[UL_5G] = acc100_conf->q_ul_5g.num_qgroups;
> > + NumQGroupsPerFn[DL_4G] = acc100_conf->q_dl_4g.num_qgroups;
> > + NumQGroupsPerFn[DL_5G] = acc100_conf->q_dl_5g.num_qgroups;
> > + for (acc = UL_4G; acc < NUM_ACC; acc++)
> > + for (qgIdx = 0; qgIdx < NumQGroupsPerFn[acc]; qgIdx++)
> > + accQg[qgIndex++] = acc;
>
> This looks inefficient, is there a way this could be calculated
> without filling arrays to
>
> access 1 value ?
That is not time critical, and the same common code is run each time.
>
> > + acc = accQg[qg_idx];
> > + return acc;
> > +}
> > +
> > /* Return the queue topology for a Queue Group Index */ static
> > inline void qtopFromAcc(struct rte_q_topology_t **qtop, int
> > acc_enum, @@ -113,6 +133,30 @@
> > *qtop = p_qtop;
> > }
> >
> > +/* Return the AQ depth for a Queue Group Index */ static inline int
> > +aqDepth(int qg_idx, struct acc100_conf *acc100_conf) {
> > + struct rte_q_topology_t *q_top = NULL;
> > + int acc_enum = accFromQgid(qg_idx, acc100_conf);
> > + qtopFromAcc(&q_top, acc_enum, acc100_conf);
> > + if (unlikely(q_top == NULL))
> > + return 0;
>
> This error is not handled well be the callers.
>
> aqNum is similar.
This fails in a consistent basis, by having not queue available and handling this as the default case.
>
> > + return q_top->aq_depth_log2;
> > +}
> > +
> > +/* Return the AQ depth for a Queue Group Index */ static inline int
> > +aqNum(int qg_idx, struct acc100_conf *acc100_conf) {
> > + struct rte_q_topology_t *q_top = NULL;
> > + int acc_enum = accFromQgid(qg_idx, acc100_conf);
> > + qtopFromAcc(&q_top, acc_enum, acc100_conf);
> > + if (unlikely(q_top == NULL))
> > + return 0;
> > + return q_top->num_aqs_per_groups;
> > +}
> > +
> > static void
> > initQTop(struct acc100_conf *acc100_conf) { @@ -4177,3 +4221,464
> > @@ static int acc100_pci_remove(struct rte_pci_device *pci_dev)
> > RTE_PMD_REGISTER_PCI_TABLE(ACC100PF_DRIVER_NAME,
> > pci_id_acc100_pf_map);
> RTE_PMD_REGISTER_PCI(ACC100VF_DRIVER_NAME,
> > acc100_pci_vf_driver);
> > RTE_PMD_REGISTER_PCI_TABLE(ACC100VF_DRIVER_NAME,
> > pci_id_acc100_vf_map);
> > +
> > +/*
> > + * Implementation to fix the power on status of some 5GUL engines
> > + * This requires DMA permission if ported outside DPDK
> This sounds like a workaround, can more detail be added here ?
There are comments through the code I believe:
- /* Detect engines in undefined state */
- /* Force each engine which is in unspecified state */
- /* Reset LDPC Cores */
- /* Check engine power-on status again */ Do you believe this is not explicit enough. Power-on status may be in an undefined state hence this engine are avtivate with dummy payload to make sure they are in a predicable state once configuration is done.
> > + */
> > +static void
> > +poweron_cleanup(struct rte_bbdev *bbdev, struct acc100_device *d,
> > + struct acc100_conf *conf)
> > +{
> > + int i, template_idx, qg_idx;
> > + uint32_t address, status, payload;
> > + printf("Need to clear power-on 5GUL status in internal memory\n");
> > + /* Reset LDPC Cores */
> > + for (i = 0; i < ACC100_ENGINES_MAX; i++)
> > + acc100_reg_write(d, HWPfFecUl5gCntrlReg +
> > + ACC100_ENGINE_OFFSET * i,
> ACC100_RESET_HI);
> > + usleep(LONG_WAIT);
> > + for (i = 0; i < ACC100_ENGINES_MAX; i++)
> > + acc100_reg_write(d, HWPfFecUl5gCntrlReg +
> > + ACC100_ENGINE_OFFSET * i,
> ACC100_RESET_LO);
> > + usleep(LONG_WAIT);
> > + /* Prepare dummy workload */
> > + alloc_2x64mb_sw_rings_mem(bbdev, d, 0);
> > + /* Set base addresses */
> > + uint32_t phys_high = (uint32_t)(d->sw_rings_phys >> 32);
> > + uint32_t phys_low = (uint32_t)(d->sw_rings_phys &
> > + ~(ACC100_SIZE_64MBYTE-1));
> > + acc100_reg_write(d, HWPfDmaFec5GulDescBaseHiRegVf,
> phys_high);
> > + acc100_reg_write(d, HWPfDmaFec5GulDescBaseLoRegVf, phys_low);
> > +
> > + /* Descriptor for a dummy 5GUL code block processing*/
> > + union acc100_dma_desc *desc = NULL;
> > + desc = d->sw_rings;
> > + desc->req.data_ptrs[0].address = d->sw_rings_phys +
> > + ACC100_DESC_FCW_OFFSET;
> > + desc->req.data_ptrs[0].blen = ACC100_FCW_LD_BLEN;
> > + desc->req.data_ptrs[0].blkid = ACC100_DMA_BLKID_FCW;
> > + desc->req.data_ptrs[0].last = 0;
> > + desc->req.data_ptrs[0].dma_ext = 0;
> > + desc->req.data_ptrs[1].address = d->sw_rings_phys + 512;
> > + desc->req.data_ptrs[1].blkid = ACC100_DMA_BLKID_IN;
> > + desc->req.data_ptrs[1].last = 1;
> > + desc->req.data_ptrs[1].dma_ext = 0;
> > + desc->req.data_ptrs[1].blen = 44;
> > + desc->req.data_ptrs[2].address = d->sw_rings_phys + 1024;
> > + desc->req.data_ptrs[2].blkid = ACC100_DMA_BLKID_OUT_ENC;
> > + desc->req.data_ptrs[2].last = 1;
> > + desc->req.data_ptrs[2].dma_ext = 0;
> > + desc->req.data_ptrs[2].blen = 5;
> > + /* Dummy FCW */
> > + desc->req.fcw_ld.FCWversion = ACC100_FCW_VER;
> > + desc->req.fcw_ld.qm = 1;
> > + desc->req.fcw_ld.nfiller = 30;
> > + desc->req.fcw_ld.BG = 2 - 1;
> > + desc->req.fcw_ld.Zc = 7;
> > + desc->req.fcw_ld.ncb = 350;
> > + desc->req.fcw_ld.rm_e = 4;
> > + desc->req.fcw_ld.itmax = 10;
> > + desc->req.fcw_ld.gain_i = 1;
> > + desc->req.fcw_ld.gain_h = 1;
> > +
> > + int engines_to_restart[SIG_UL_5G_LAST + 1] = {0};
> > + int num_failed_engine = 0;
> > + /* Detect engines in undefined state */
> > + for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
> > + template_idx++) {
> > + /* Check engine power-on status */
> > + address = HwPfFecUl5gIbDebugReg +
> > + ACC100_ENGINE_OFFSET * template_idx;
> > + status = (acc100_reg_read(d, address) >> 4) & 0xF;
> > + if (status == 0) {
> > + engines_to_restart[num_failed_engine] =
> template_idx;
> > + num_failed_engine++;
> > + }
> > + }
> > +
> > + int numQqsAcc = conf->q_ul_5g.num_qgroups;
> > + int numQgs = conf->q_ul_5g.num_qgroups;
> > + payload = 0;
> > + for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
> qg_idx++)
> > + payload |= (1 << qg_idx);
> > + /* Force each engine which is in unspecified state */
> > + for (i = 0; i < num_failed_engine; i++) {
> > + int failed_engine = engines_to_restart[i];
> > + printf("Force engine %d\n", failed_engine);
> > + for (template_idx = SIG_UL_5G; template_idx <=
> SIG_UL_5G_LAST;
> > + template_idx++) {
> > + address = HWPfQmgrGrpTmplateReg4Indx
> > + + BYTES_IN_WORD * template_idx;
> > + if (template_idx == failed_engine)
> > + acc100_reg_write(d, address, payload);
> > + else
> > + acc100_reg_write(d, address, 0);
> > + }
> > + /* Reset descriptor header */
> > + desc->req.word0 = ACC100_DMA_DESC_TYPE;
> > + desc->req.word1 = 0;
> > + desc->req.word2 = 0;
> > + desc->req.word3 = 0;
> > + desc->req.numCBs = 1;
> > + desc->req.m2dlen = 2;
> > + desc->req.d2mlen = 1;
> > + /* Enqueue the code block for processing */
> > + union acc100_enqueue_reg_fmt enq_req;
> > + enq_req.val = 0;
> > + enq_req.addr_offset = ACC100_DESC_OFFSET;
> > + enq_req.num_elem = 1;
> > + enq_req.req_elem_addr = 0;
> > + rte_wmb();
> > + acc100_reg_write(d, HWPfQmgrIngressAq + 0x100,
> enq_req.val);
> > + usleep(LONG_WAIT * 100);
> > + if (desc->req.word0 != 2)
> > + printf("DMA Response %#"PRIx32"\n", desc-
> >req.word0);
> > + }
> > +
> > + /* Reset LDPC Cores */
> > + for (i = 0; i < ACC100_ENGINES_MAX; i++)
> > + acc100_reg_write(d, HWPfFecUl5gCntrlReg +
> > + ACC100_ENGINE_OFFSET * i,
> ACC100_RESET_HI);
> > + usleep(LONG_WAIT);
> > + for (i = 0; i < ACC100_ENGINES_MAX; i++)
> > + acc100_reg_write(d, HWPfFecUl5gCntrlReg +
> > + ACC100_ENGINE_OFFSET * i,
> ACC100_RESET_LO);
> > + usleep(LONG_WAIT);
> > + acc100_reg_write(d, HWPfHi5GHardResetReg,
> ACC100_RESET_HARD);
> > + usleep(LONG_WAIT);
> > + int numEngines = 0;
> > + /* Check engine power-on status again */
> > + for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
> > + template_idx++) {
> > + address = HwPfFecUl5gIbDebugReg +
> > + ACC100_ENGINE_OFFSET * template_idx;
> > + status = (acc100_reg_read(d, address) >> 4) & 0xF;
> > + address = HWPfQmgrGrpTmplateReg4Indx
> > + + BYTES_IN_WORD * template_idx;
> > + if (status == 1) {
> > + acc100_reg_write(d, address, payload);
> > + numEngines++;
> > + } else
> > + acc100_reg_write(d, address, 0);
> > + }
> > + printf("Number of 5GUL engines %d\n", numEngines);
> > +
> > + if (d->sw_rings_base != NULL)
> > + rte_free(d->sw_rings_base);
> > + usleep(LONG_WAIT);
> > +}
> > +
> > +/* Initial configuration of a ACC100 device prior to running
> > +configure() */ int acc100_configure(const char *dev_name, struct
> > +acc100_conf *conf) {
> > + rte_bbdev_log(INFO, "acc100_configure");
> > + uint32_t payload, address, status;
>
> maybe value or data would be a better variable name than payload.
>
> would mean changing acc100_reg_write
transparent to me, but can change given DPDK uses term value.
>
> > + int qg_idx, template_idx, vf_idx, acc, i;
> > + struct rte_bbdev *bbdev = rte_bbdev_get_named_dev(dev_name);
> > +
> > + /* Compile time checks */
> > + RTE_BUILD_BUG_ON(sizeof(struct acc100_dma_req_desc) != 256);
> > + RTE_BUILD_BUG_ON(sizeof(union acc100_dma_desc) != 256);
> > + RTE_BUILD_BUG_ON(sizeof(struct acc100_fcw_td) != 24);
> > + RTE_BUILD_BUG_ON(sizeof(struct acc100_fcw_te) != 32);
> > +
> > + if (bbdev == NULL) {
> > + rte_bbdev_log(ERR,
> > + "Invalid dev_name (%s), or device is not yet initialised",
> > + dev_name);
> > + return -ENODEV;
> > + }
> > + struct acc100_device *d = bbdev->data->dev_private;
> > +
> > + /* Store configuration */
> > + rte_memcpy(&d->acc100_conf, conf, sizeof(d->acc100_conf));
> > +
> > + /* PCIe Bridge configuration */
> > + acc100_reg_write(d, HwPfPcieGpexBridgeControl,
> ACC100_CFG_PCI_BRIDGE);
> > + for (i = 1; i < 17; i++)
>
> 17 is a magic number, use a #define
>
> this is a general issue.
These are only used once but still agreed.
>
> > + acc100_reg_write(d,
> > +
> HwPfPcieGpexAxiAddrMappingWindowPexBaseHigh
> > + + i * 16, 0);
> > +
> > + /* PCIe Link Trainiing and Status State Machine */
> > + acc100_reg_write(d, HwPfPcieGpexLtssmStateCntrl, 0xDFC00000);
> > +
> > + /* Prevent blocking AXI read on BRESP for AXI Write */
> > + address = HwPfPcieGpexAxiPioControl;
> > + payload = ACC100_CFG_PCI_AXI;
> > + acc100_reg_write(d, address, payload);
> > +
> > + /* 5GDL PLL phase shift */
> > + acc100_reg_write(d, HWPfChaDl5gPllPhshft0, 0x1);
> > +
> > + /* Explicitly releasing AXI as this may be stopped after PF FLR/BME */
> > + address = HWPfDmaAxiControl;
> > + payload = 1;
> > + acc100_reg_write(d, address, payload);
> > +
> > + /* DDR Configuration */
> > + address = HWPfDdrBcTim6;
> > + payload = acc100_reg_read(d, address);
> > + payload &= 0xFFFFFFFB; /* Bit 2 */ #ifdef ACC100_DDR_ECC_ENABLE
> > + payload |= 0x4;
> > +#endif
> > + acc100_reg_write(d, address, payload);
> > + address = HWPfDdrPhyDqsCountNum;
> > +#ifdef ACC100_DDR_ECC_ENABLE
> > + payload = 9;
> > +#else
> > + payload = 8;
> > +#endif
> > + acc100_reg_write(d, address, payload);
> > +
> > + /* Set default descriptor signature */
> > + address = HWPfDmaDescriptorSignatuture;
> > + payload = 0;
> > + acc100_reg_write(d, address, payload);
> > +
> > + /* Enable the Error Detection in DMA */
> > + payload = ACC100_CFG_DMA_ERROR;
> > + address = HWPfDmaErrorDetectionEn;
> > + acc100_reg_write(d, address, payload);
> > +
> > + /* AXI Cache configuration */
> > + payload = ACC100_CFG_AXI_CACHE;
> > + address = HWPfDmaAxcacheReg;
> > + acc100_reg_write(d, address, payload);
> > +
> > + /* Default DMA Configuration (Qmgr Enabled) */
> > + address = HWPfDmaConfig0Reg;
> > + payload = 0;
> > + acc100_reg_write(d, address, payload);
> > + address = HWPfDmaQmanen;
> > + payload = 0;
> > + acc100_reg_write(d, address, payload);
> > +
> > + /* Default RLIM/ALEN configuration */
> > + address = HWPfDmaConfig1Reg;
> > + payload = (1 << 31) + (23 << 8) + (1 << 6) + 7;
> > + acc100_reg_write(d, address, payload);
> > +
> > + /* Configure DMA Qmanager addresses */
> > + address = HWPfDmaQmgrAddrReg;
> > + payload = HWPfQmgrEgressQueuesTemplate;
> > + acc100_reg_write(d, address, payload);
> > +
> > + /* ===== Qmgr Configuration ===== */
> > + /* Configuration of the AQueue Depth QMGR_GRP_0_DEPTH_LOG2
> for UL */
> > + int totalQgs = conf->q_ul_4g.num_qgroups +
> > + conf->q_ul_5g.num_qgroups +
> > + conf->q_dl_4g.num_qgroups +
> > + conf->q_dl_5g.num_qgroups;
> > + for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
> > + address = HWPfQmgrDepthLog2Grp +
> > + BYTES_IN_WORD * qg_idx;
> > + payload = aqDepth(qg_idx, conf);
> > + acc100_reg_write(d, address, payload);
> > + address = HWPfQmgrTholdGrp +
> > + BYTES_IN_WORD * qg_idx;
> > + payload = (1 << 16) + (1 << (aqDepth(qg_idx, conf) - 1));
> > + acc100_reg_write(d, address, payload);
> > + }
> > +
> > + /* Template Priority in incremental order */
> > + for (template_idx = 0; template_idx < ACC100_NUM_TMPL;
> > + template_idx++) {
> > + address = HWPfQmgrGrpTmplateReg0Indx +
> > + BYTES_IN_WORD * (template_idx % 8);
> > + payload = TMPL_PRI_0;
> > + acc100_reg_write(d, address, payload);
> > + address = HWPfQmgrGrpTmplateReg1Indx +
> > + BYTES_IN_WORD * (template_idx % 8);
> > + payload = TMPL_PRI_1;
> > + acc100_reg_write(d, address, payload);
> > + address = HWPfQmgrGrpTmplateReg2indx +
> > + BYTES_IN_WORD * (template_idx % 8);
> > + payload = TMPL_PRI_2;
> > + acc100_reg_write(d, address, payload);
> > + address = HWPfQmgrGrpTmplateReg3Indx +
> > + BYTES_IN_WORD * (template_idx % 8);
> > + payload = TMPL_PRI_3;
> > + acc100_reg_write(d, address, payload);
> > + }
> > +
> > + address = HWPfQmgrGrpPriority;
> > + payload = ACC100_CFG_QMGR_HI_P;
> > + acc100_reg_write(d, address, payload);
> > +
> > + /* Template Configuration */
> > + for (template_idx = 0; template_idx < ACC100_NUM_TMPL;
> template_idx++) {
> > + payload = 0;
> > + address = HWPfQmgrGrpTmplateReg4Indx
> > + + BYTES_IN_WORD * template_idx;
> > + acc100_reg_write(d, address, payload);
> > + }
> > + /* 4GUL */
> > + int numQgs = conf->q_ul_4g.num_qgroups;
> > + int numQqsAcc = 0;
> > + payload = 0;
> > + for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
> qg_idx++)
> > + payload |= (1 << qg_idx);
> > + for (template_idx = SIG_UL_4G; template_idx <= SIG_UL_4G_LAST;
> > + template_idx++) {
> > + address = HWPfQmgrGrpTmplateReg4Indx
> > + + BYTES_IN_WORD*template_idx;
> > + acc100_reg_write(d, address, payload);
> > + }
> > + /* 5GUL */
> > + numQqsAcc += numQgs;
> > + numQgs = conf->q_ul_5g.num_qgroups;
> > + payload = 0;
> > + int numEngines = 0;
> > + for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
> qg_idx++)
> > + payload |= (1 << qg_idx);
> > + for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
> > + template_idx++) {
> > + /* Check engine power-on status */
> > + address = HwPfFecUl5gIbDebugReg +
> > + ACC100_ENGINE_OFFSET * template_idx;
> > + status = (acc100_reg_read(d, address) >> 4) & 0xF;
> > + address = HWPfQmgrGrpTmplateReg4Indx
> > + + BYTES_IN_WORD * template_idx;
> > + if (status == 1) {
> > + acc100_reg_write(d, address, payload);
> > + numEngines++;
> > + } else
> > + acc100_reg_write(d, address, 0);
> > + #if RTE_ACC100_SINGLE_FEC == 1
> #if should be at start of line
ok
> > + payload = 0;
> > + #endif
> > + }
> > + printf("Number of 5GUL engines %d\n", numEngines);
> > + /* 4GDL */
> > + numQqsAcc += numQgs;
> > + numQgs = conf->q_dl_4g.num_qgroups;
> > + payload = 0;
> > + for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
> qg_idx++)
> > + payload |= (1 << qg_idx);
> > + for (template_idx = SIG_DL_4G; template_idx <= SIG_DL_4G_LAST;
> > + template_idx++) {
> > + address = HWPfQmgrGrpTmplateReg4Indx
> > + + BYTES_IN_WORD*template_idx;
> > + acc100_reg_write(d, address, payload);
> > + #if RTE_ACC100_SINGLE_FEC == 1
> > + payload = 0;
> > + #endif
> > + }
> > + /* 5GDL */
> > + numQqsAcc += numQgs;
> > + numQgs = conf->q_dl_5g.num_qgroups;
> > + payload = 0;
> > + for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
> qg_idx++)
> > + payload |= (1 << qg_idx);
> > + for (template_idx = SIG_DL_5G; template_idx <= SIG_DL_5G_LAST;
> > + template_idx++) {
> > + address = HWPfQmgrGrpTmplateReg4Indx
> > + + BYTES_IN_WORD*template_idx;
> > + acc100_reg_write(d, address, payload);
> > + #if RTE_ACC100_SINGLE_FEC == 1
> > + payload = 0;
> > + #endif
> > + }
> > +
> > + /* Queue Group Function mapping */
> > + int qman_func_id[5] = {0, 2, 1, 3, 4};
> > + address = HWPfQmgrGrpFunction0;
> > + payload = 0;
> > + for (qg_idx = 0; qg_idx < 8; qg_idx++) {
> > + acc = accFromQgid(qg_idx, conf);
> > + payload |= qman_func_id[acc]<<(qg_idx * 4);
> > + }
> > + acc100_reg_write(d, address, payload);
> > +
> > + /* Configuration of the Arbitration QGroup depth to 1 */
> > + for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
> > + address = HWPfQmgrArbQDepthGrp +
> > + BYTES_IN_WORD * qg_idx;
> > + payload = 0;
> > + acc100_reg_write(d, address, payload);
> > + }
> > +
> > + /* Enabling AQueues through the Queue hierarchy*/
> > + for (vf_idx = 0; vf_idx < ACC100_NUM_VFS; vf_idx++) {
> > + for (qg_idx = 0; qg_idx < ACC100_NUM_QGRPS; qg_idx++) {
> > + payload = 0;
> > + if (vf_idx < conf->num_vf_bundles &&
> > + qg_idx < totalQgs)
> > + payload = (1 << aqNum(qg_idx, conf)) - 1;
> > + address = HWPfQmgrAqEnableVf
> > + + vf_idx * BYTES_IN_WORD;
> > + payload += (qg_idx << 16);
> > + acc100_reg_write(d, address, payload);
> > + }
> > + }
> > +
> > + /* This pointer to ARAM (256kB) is shifted by 2 (4B per register) */
> > + uint32_t aram_address = 0;
> > + for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
> > + for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) {
> > + address = HWPfQmgrVfBaseAddr + vf_idx
> > + * BYTES_IN_WORD + qg_idx
> > + * BYTES_IN_WORD * 64;
> > + payload = aram_address;
> > + acc100_reg_write(d, address, payload);
> > + /* Offset ARAM Address for next memory bank
> > + * - increment of 4B
> > + */
> > + aram_address += aqNum(qg_idx, conf) *
> > + (1 << aqDepth(qg_idx, conf));
> > + }
> > + }
> > +
> > + if (aram_address > WORDS_IN_ARAM_SIZE) {
> > + rte_bbdev_log(ERR, "ARAM Configuration not fitting %d
> %d\n",
> > + aram_address, WORDS_IN_ARAM_SIZE);
> > + return -EINVAL;
> > + }
> > +
> > + /* ==== HI Configuration ==== */
> > +
> > + /* Prevent Block on Transmit Error */
> > + address = HWPfHiBlockTransmitOnErrorEn;
> > + payload = 0;
> > + acc100_reg_write(d, address, payload);
> > + /* Prevents to drop MSI */
> > + address = HWPfHiMsiDropEnableReg;
> > + payload = 0;
> > + acc100_reg_write(d, address, payload);
> > + /* Set the PF Mode register */
> > + address = HWPfHiPfMode;
> > + payload = (conf->pf_mode_en) ? 2 : 0;
> > + acc100_reg_write(d, address, payload);
> > + /* Enable Error Detection in HW */
> > + address = HWPfDmaErrorDetectionEn;
> > + payload = 0x3D7;
> > + acc100_reg_write(d, address, payload);
> > +
> > + /* QoS overflow init */
> > + payload = 1;
> > + address = HWPfQosmonAEvalOverflow0;
> > + acc100_reg_write(d, address, payload);
> > + address = HWPfQosmonBEvalOverflow0;
> > + acc100_reg_write(d, address, payload);
> > +
> > + /* HARQ DDR Configuration */
> > + unsigned int ddrSizeInMb = 512; /* Fixed to 512 MB per VF for now
> */
> > + for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) {
> > + address = HWPfDmaVfDdrBaseRw + vf_idx
> > + * 0x10;
> > + payload = ((vf_idx * (ddrSizeInMb / 64)) << 16) +
> > + (ddrSizeInMb - 1);
> > + acc100_reg_write(d, address, payload);
> > + }
> > + usleep(LONG_WAIT);
> Is sleep needed here ? the reg_write has one.
This one is needed on top
> > +
>
> Since this seems like a workaround, add a comment here.
fair enough, ok, thanks
>
> Tom
>
> > + if (numEngines < (SIG_UL_5G_LAST + 1))
> > + poweron_cleanup(bbdev, d, conf);
> > +
> > + rte_bbdev_log_debug("PF Tip configuration complete for %s",
> dev_name);
> > + return 0;
> > +}
> > diff --git
> > a/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
> > b/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
> > index 4a76d1d..91c234d 100644
> > --- a/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
> > +++ b/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
> > @@ -1,3 +1,10 @@
> > DPDK_21 {
> > local: *;
> > };
> > +
> > +EXPERIMENTAL {
> > + global:
> > +
> > + acc100_configure;
> > +
> > +};
On 9/30/20 3:54 PM, Chautru, Nicolas wrote:
> Hi Tom,
>
>> From: Tom Rix <trix@redhat.com>
>> On 9/28/20 5:29 PM, Nicolas Chautru wrote:
>>> Add configure function to configure the PF from within the
>>> bbdev-test itself without external application configuration the device.
>>>
>>> Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
>>> Acked-by: Liu Tianjiao <Tianjiao.liu@intel.com>
>>> ---
>>> app/test-bbdev/test_bbdev_perf.c | 72 +++
>>> doc/guides/rel_notes/release_20_11.rst | 5 +
>>> drivers/baseband/acc100/meson.build | 2 +
>>> drivers/baseband/acc100/rte_acc100_cfg.h | 17 +
>>> drivers/baseband/acc100/rte_acc100_pmd.c | 505
>> +++++++++++++++++++++
>>> .../acc100/rte_pmd_bbdev_acc100_version.map | 7 +
>>> 6 files changed, 608 insertions(+)
>>>
>>> diff --git a/app/test-bbdev/test_bbdev_perf.c
>>> b/app/test-bbdev/test_bbdev_perf.c
>>> index 45c0d62..32f23ff 100644
>>> --- a/app/test-bbdev/test_bbdev_perf.c
>>> +++ b/app/test-bbdev/test_bbdev_perf.c
>>> @@ -52,6 +52,18 @@
>>> #define FLR_5G_TIMEOUT 610
>>> #endif
>>>
>>> +#ifdef RTE_LIBRTE_PMD_BBDEV_ACC100
>>> +#include <rte_acc100_cfg.h>
>>> +#define ACC100PF_DRIVER_NAME ("intel_acc100_pf")
>>> +#define ACC100VF_DRIVER_NAME ("intel_acc100_vf")
>>> +#define ACC100_QMGR_NUM_AQS 16
>>> +#define ACC100_QMGR_NUM_QGS 2
>>> +#define ACC100_QMGR_AQ_DEPTH 5
>>> +#define ACC100_QMGR_INVALID_IDX -1
>>> +#define ACC100_QMGR_RR 1
>>> +#define ACC100_QOS_GBR 0
>>> +#endif
>>> +
>>> #define OPS_CACHE_SIZE 256U
>>> #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
>>>
>>> @@ -653,6 +665,66 @@ typedef int (test_case_function)(struct
>> active_device *ad,
>>> info->dev_name);
>>> }
>>> #endif
>>> +#ifdef RTE_LIBRTE_PMD_BBDEV_ACC100
>> seems like this function would break if one of the other bbdev's were
>> #defined.
> No these are independent. By default they are all defined.
ok
>
>
>>> + if ((get_init_device() == true) &&
>>> + (!strcmp(info->drv.driver_name,
>> ACC100PF_DRIVER_NAME))) {
>>> + struct acc100_conf conf;
>>> + unsigned int i;
>>> +
>>> + printf("Configure ACC100 FEC Driver %s with default
>> values\n",
>>> + info->drv.driver_name);
>>> +
>>> + /* clear default configuration before initialization */
>>> + memset(&conf, 0, sizeof(struct acc100_conf));
>>> +
>>> + /* Always set in PF mode for built-in configuration */
>>> + conf.pf_mode_en = true;
>>> + for (i = 0; i < RTE_ACC100_NUM_VFS; ++i) {
>>> + conf.arb_dl_4g[i].gbr_threshold1 =
>> ACC100_QOS_GBR;
>>> + conf.arb_dl_4g[i].gbr_threshold1 =
>> ACC100_QOS_GBR;
>>> + conf.arb_dl_4g[i].round_robin_weight =
>> ACC100_QMGR_RR;
>>> + conf.arb_ul_4g[i].gbr_threshold1 =
>> ACC100_QOS_GBR;
>>> + conf.arb_ul_4g[i].gbr_threshold1 =
>> ACC100_QOS_GBR;
>>> + conf.arb_ul_4g[i].round_robin_weight =
>> ACC100_QMGR_RR;
>>> + conf.arb_dl_5g[i].gbr_threshold1 =
>> ACC100_QOS_GBR;
>>> + conf.arb_dl_5g[i].gbr_threshold1 =
>> ACC100_QOS_GBR;
>>> + conf.arb_dl_5g[i].round_robin_weight =
>> ACC100_QMGR_RR;
>>> + conf.arb_ul_5g[i].gbr_threshold1 =
>> ACC100_QOS_GBR;
>>> + conf.arb_ul_5g[i].gbr_threshold1 =
>> ACC100_QOS_GBR;
>>> + conf.arb_ul_5g[i].round_robin_weight =
>> ACC100_QMGR_RR;
>>> + }
>>> +
>>> + conf.input_pos_llr_1_bit = true;
>>> + conf.output_pos_llr_1_bit = true;
>>> + conf.num_vf_bundles = 1; /**< Number of VF bundles to
>> setup */
>>> +
>>> + conf.q_ul_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
>>> + conf.q_ul_4g.first_qgroup_index =
>> ACC100_QMGR_INVALID_IDX;
>>> + conf.q_ul_4g.num_aqs_per_groups =
>> ACC100_QMGR_NUM_AQS;
>>> + conf.q_ul_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
>>> + conf.q_dl_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
>>> + conf.q_dl_4g.first_qgroup_index =
>> ACC100_QMGR_INVALID_IDX;
>>> + conf.q_dl_4g.num_aqs_per_groups =
>> ACC100_QMGR_NUM_AQS;
>>> + conf.q_dl_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
>>> + conf.q_ul_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
>>> + conf.q_ul_5g.first_qgroup_index =
>> ACC100_QMGR_INVALID_IDX;
>>> + conf.q_ul_5g.num_aqs_per_groups =
>> ACC100_QMGR_NUM_AQS;
>>> + conf.q_ul_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
>>> + conf.q_dl_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
>>> + conf.q_dl_5g.first_qgroup_index =
>> ACC100_QMGR_INVALID_IDX;
>>> + conf.q_dl_5g.num_aqs_per_groups =
>> ACC100_QMGR_NUM_AQS;
>>> + conf.q_dl_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
>>> +
>>> + /* setup PF with configuration information */
>>> + ret = acc100_configure(info->dev_name, &conf);
>>> + TEST_ASSERT_SUCCESS(ret,
>>> + "Failed to configure ACC100 PF for bbdev
>> %s",
>>> + info->dev_name);
>>> + /* Let's refresh this now this is configured */
>>> + }
>>> + rte_bbdev_info_get(dev_id, info);
>> The other bbdev's do not call rte_bbdev_info_get, can this be removed ?
> Actually it should be added outside for all versions post-configuraion. Thanks
>
>>> +#endif
>>> +
>>> nb_queues = RTE_MIN(rte_lcore_count(), info- drv.max_num_queues);
>>> nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
>>>
>>> diff --git a/doc/guides/rel_notes/release_20_11.rst
>>> b/doc/guides/rel_notes/release_20_11.rst
>>> index 73ac08f..c8d0586 100644
>>> --- a/doc/guides/rel_notes/release_20_11.rst
>>> +++ b/doc/guides/rel_notes/release_20_11.rst
>>> @@ -55,6 +55,11 @@ New Features
>>> Also, make sure to start the actual text at the margin.
>>> =======================================================
>>>
>>> +* **Added Intel ACC100 bbdev PMD.**
>>> +
>>> + Added a new ``acc100`` bbdev driver for the Intel\ |reg| ACC100
>>> + accelerator also known as Mount Bryce. See the
>>> + :doc:`../bbdevs/acc100` BBDEV guide for more details on this new driver.
>>>
>>> Removed Items
>>> -------------
>>> diff --git a/drivers/baseband/acc100/meson.build
>>> b/drivers/baseband/acc100/meson.build
>>> index 8afafc2..7ac44dc 100644
>>> --- a/drivers/baseband/acc100/meson.build
>>> +++ b/drivers/baseband/acc100/meson.build
>>> @@ -4,3 +4,5 @@
>>> deps += ['bbdev', 'bus_vdev', 'ring', 'pci', 'bus_pci']
>>>
>>> sources = files('rte_acc100_pmd.c')
>>> +
>>> +install_headers('rte_acc100_cfg.h')
>>> diff --git a/drivers/baseband/acc100/rte_acc100_cfg.h
>>> b/drivers/baseband/acc100/rte_acc100_cfg.h
>>> index 73bbe36..7f523bc 100644
>>> --- a/drivers/baseband/acc100/rte_acc100_cfg.h
>>> +++ b/drivers/baseband/acc100/rte_acc100_cfg.h
>>> @@ -89,6 +89,23 @@ struct acc100_conf {
>>> struct rte_arbitration_t arb_dl_5g[RTE_ACC100_NUM_VFS]; };
>>>
>>> +/**
>>> + * Configure a ACC100 device
>>> + *
>>> + * @param dev_name
>>> + * The name of the device. This is the short form of PCI BDF, e.g. 00:01.0.
>>> + * It can also be retrieved for a bbdev device from the dev_name field in
>> the
>>> + * rte_bbdev_info structure returned by rte_bbdev_info_get().
>>> + * @param conf
>>> + * Configuration to apply to ACC100 HW.
>>> + *
>>> + * @return
>>> + * Zero on success, negative value on failure.
>>> + */
>>> +__rte_experimental
>>> +int
>>> +acc100_configure(const char *dev_name, struct acc100_conf *conf);
>>> +
>>> #ifdef __cplusplus
>>> }
>>> #endif
>>> diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c
>>> b/drivers/baseband/acc100/rte_acc100_pmd.c
>>> index 3589814..b50dd32 100644
>>> --- a/drivers/baseband/acc100/rte_acc100_pmd.c
>>> +++ b/drivers/baseband/acc100/rte_acc100_pmd.c
>>> @@ -85,6 +85,26 @@
>>>
>>> enum {UL_4G = 0, UL_5G, DL_4G, DL_5G, NUM_ACC};
>>>
>>> +/* Return the accelerator enum for a Queue Group Index */ static
>>> +inline int accFromQgid(int qg_idx, const struct acc100_conf
>>> +*acc100_conf) {
>>> + int accQg[ACC100_NUM_QGRPS];
>>> + int NumQGroupsPerFn[NUM_ACC];
>>> + int acc, qgIdx, qgIndex = 0;
>>> + for (qgIdx = 0; qgIdx < ACC100_NUM_QGRPS; qgIdx++)
>>> + accQg[qgIdx] = 0;
>>> + NumQGroupsPerFn[UL_4G] = acc100_conf->q_ul_4g.num_qgroups;
>>> + NumQGroupsPerFn[UL_5G] = acc100_conf->q_ul_5g.num_qgroups;
>>> + NumQGroupsPerFn[DL_4G] = acc100_conf->q_dl_4g.num_qgroups;
>>> + NumQGroupsPerFn[DL_5G] = acc100_conf->q_dl_5g.num_qgroups;
>>> + for (acc = UL_4G; acc < NUM_ACC; acc++)
>>> + for (qgIdx = 0; qgIdx < NumQGroupsPerFn[acc]; qgIdx++)
>>> + accQg[qgIndex++] = acc;
>> This looks inefficient, is there a way this could be calculated
>> without filling arrays to
>>
>> access 1 value ?
> That is not time critical, and the same common code is run each time.
ok
>
>>> + acc = accQg[qg_idx];
>>> + return acc;
>>> +}
>>> +
>>> /* Return the queue topology for a Queue Group Index */ static
>>> inline void qtopFromAcc(struct rte_q_topology_t **qtop, int
>>> acc_enum, @@ -113,6 +133,30 @@
>>> *qtop = p_qtop;
>>> }
>>>
>>> +/* Return the AQ depth for a Queue Group Index */ static inline int
>>> +aqDepth(int qg_idx, struct acc100_conf *acc100_conf) {
>>> + struct rte_q_topology_t *q_top = NULL;
>>> + int acc_enum = accFromQgid(qg_idx, acc100_conf);
>>> + qtopFromAcc(&q_top, acc_enum, acc100_conf);
>>> + if (unlikely(q_top == NULL))
>>> + return 0;
>> This error is not handled well be the callers.
>>
>> aqNum is similar.
> This fails in a consistent basis, by having not queue available and handling this as the default case.
ok
>
>>> + return q_top->aq_depth_log2;
>>> +}
>>> +
>>> +/* Return the AQ depth for a Queue Group Index */ static inline int
>>> +aqNum(int qg_idx, struct acc100_conf *acc100_conf) {
>>> + struct rte_q_topology_t *q_top = NULL;
>>> + int acc_enum = accFromQgid(qg_idx, acc100_conf);
>>> + qtopFromAcc(&q_top, acc_enum, acc100_conf);
>>> + if (unlikely(q_top == NULL))
>>> + return 0;
>>> + return q_top->num_aqs_per_groups;
>>> +}
>>> +
>>> static void
>>> initQTop(struct acc100_conf *acc100_conf) { @@ -4177,3 +4221,464
>>> @@ static int acc100_pci_remove(struct rte_pci_device *pci_dev)
>>> RTE_PMD_REGISTER_PCI_TABLE(ACC100PF_DRIVER_NAME,
>>> pci_id_acc100_pf_map);
>> RTE_PMD_REGISTER_PCI(ACC100VF_DRIVER_NAME,
>>> acc100_pci_vf_driver);
>>> RTE_PMD_REGISTER_PCI_TABLE(ACC100VF_DRIVER_NAME,
>>> pci_id_acc100_vf_map);
>>> +
>>> +/*
>>> + * Implementation to fix the power on status of some 5GUL engines
>>> + * This requires DMA permission if ported outside DPDK
>> This sounds like a workaround, can more detail be added here ?
> There are comments through the code I believe:
> - /* Detect engines in undefined state */
> - /* Force each engine which is in unspecified state */
> - /* Reset LDPC Cores */
> - /* Check engine power-on status again */ Do you believe this is not explicit enough. Power-on status may be in an undefined state hence this engine are avtivate with dummy payload to make sure they are in a predicable state once configuration is done.
Yes, not explicit enough. They do not say it is a workaround so someone else would not know that
this is needed or is likely needs adjusting in the future. Maybe change
/* Check engine power-on status again */ to
/*
* Power-on status may be in an undefined state.
* Active this engine with a dummy payload to make sure the state is defined.
*/
Tom
>>> + */
>>> +static void
>>> +poweron_cleanup(struct rte_bbdev *bbdev, struct acc100_device *d,
>>> + struct acc100_conf *conf)
>>> +{
>>> + int i, template_idx, qg_idx;
>>> + uint32_t address, status, payload;
>>> + printf("Need to clear power-on 5GUL status in internal memory\n");
>>> + /* Reset LDPC Cores */
>>> + for (i = 0; i < ACC100_ENGINES_MAX; i++)
>>> + acc100_reg_write(d, HWPfFecUl5gCntrlReg +
>>> + ACC100_ENGINE_OFFSET * i,
>> ACC100_RESET_HI);
>>> + usleep(LONG_WAIT);
>>> + for (i = 0; i < ACC100_ENGINES_MAX; i++)
>>> + acc100_reg_write(d, HWPfFecUl5gCntrlReg +
>>> + ACC100_ENGINE_OFFSET * i,
>> ACC100_RESET_LO);
>>> + usleep(LONG_WAIT);
>>> + /* Prepare dummy workload */
>>> + alloc_2x64mb_sw_rings_mem(bbdev, d, 0);
>>> + /* Set base addresses */
>>> + uint32_t phys_high = (uint32_t)(d->sw_rings_phys >> 32);
>>> + uint32_t phys_low = (uint32_t)(d->sw_rings_phys &
>>> + ~(ACC100_SIZE_64MBYTE-1));
>>> + acc100_reg_write(d, HWPfDmaFec5GulDescBaseHiRegVf,
>> phys_high);
>>> + acc100_reg_write(d, HWPfDmaFec5GulDescBaseLoRegVf, phys_low);
>>> +
>>> + /* Descriptor for a dummy 5GUL code block processing*/
>>> + union acc100_dma_desc *desc = NULL;
>>> + desc = d->sw_rings;
>>> + desc->req.data_ptrs[0].address = d->sw_rings_phys +
>>> + ACC100_DESC_FCW_OFFSET;
>>> + desc->req.data_ptrs[0].blen = ACC100_FCW_LD_BLEN;
>>> + desc->req.data_ptrs[0].blkid = ACC100_DMA_BLKID_FCW;
>>> + desc->req.data_ptrs[0].last = 0;
>>> + desc->req.data_ptrs[0].dma_ext = 0;
>>> + desc->req.data_ptrs[1].address = d->sw_rings_phys + 512;
>>> + desc->req.data_ptrs[1].blkid = ACC100_DMA_BLKID_IN;
>>> + desc->req.data_ptrs[1].last = 1;
>>> + desc->req.data_ptrs[1].dma_ext = 0;
>>> + desc->req.data_ptrs[1].blen = 44;
>>> + desc->req.data_ptrs[2].address = d->sw_rings_phys + 1024;
>>> + desc->req.data_ptrs[2].blkid = ACC100_DMA_BLKID_OUT_ENC;
>>> + desc->req.data_ptrs[2].last = 1;
>>> + desc->req.data_ptrs[2].dma_ext = 0;
>>> + desc->req.data_ptrs[2].blen = 5;
>>> + /* Dummy FCW */
>>> + desc->req.fcw_ld.FCWversion = ACC100_FCW_VER;
>>> + desc->req.fcw_ld.qm = 1;
>>> + desc->req.fcw_ld.nfiller = 30;
>>> + desc->req.fcw_ld.BG = 2 - 1;
>>> + desc->req.fcw_ld.Zc = 7;
>>> + desc->req.fcw_ld.ncb = 350;
>>> + desc->req.fcw_ld.rm_e = 4;
>>> + desc->req.fcw_ld.itmax = 10;
>>> + desc->req.fcw_ld.gain_i = 1;
>>> + desc->req.fcw_ld.gain_h = 1;
>>> +
>>> + int engines_to_restart[SIG_UL_5G_LAST + 1] = {0};
>>> + int num_failed_engine = 0;
>>> + /* Detect engines in undefined state */
>>> + for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
>>> + template_idx++) {
>>> + /* Check engine power-on status */
>>> + address = HwPfFecUl5gIbDebugReg +
>>> + ACC100_ENGINE_OFFSET * template_idx;
>>> + status = (acc100_reg_read(d, address) >> 4) & 0xF;
>>> + if (status == 0) {
>>> + engines_to_restart[num_failed_engine] =
>> template_idx;
>>> + num_failed_engine++;
>>> + }
>>> + }
>>> +
>>> + int numQqsAcc = conf->q_ul_5g.num_qgroups;
>>> + int numQgs = conf->q_ul_5g.num_qgroups;
>>> + payload = 0;
>>> + for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
>> qg_idx++)
>>> + payload |= (1 << qg_idx);
>>> + /* Force each engine which is in unspecified state */
>>> + for (i = 0; i < num_failed_engine; i++) {
>>> + int failed_engine = engines_to_restart[i];
>>> + printf("Force engine %d\n", failed_engine);
>>> + for (template_idx = SIG_UL_5G; template_idx <=
>> SIG_UL_5G_LAST;
>>> + template_idx++) {
>>> + address = HWPfQmgrGrpTmplateReg4Indx
>>> + + BYTES_IN_WORD * template_idx;
>>> + if (template_idx == failed_engine)
>>> + acc100_reg_write(d, address, payload);
>>> + else
>>> + acc100_reg_write(d, address, 0);
>>> + }
>>> + /* Reset descriptor header */
>>> + desc->req.word0 = ACC100_DMA_DESC_TYPE;
>>> + desc->req.word1 = 0;
>>> + desc->req.word2 = 0;
>>> + desc->req.word3 = 0;
>>> + desc->req.numCBs = 1;
>>> + desc->req.m2dlen = 2;
>>> + desc->req.d2mlen = 1;
>>> + /* Enqueue the code block for processing */
>>> + union acc100_enqueue_reg_fmt enq_req;
>>> + enq_req.val = 0;
>>> + enq_req.addr_offset = ACC100_DESC_OFFSET;
>>> + enq_req.num_elem = 1;
>>> + enq_req.req_elem_addr = 0;
>>> + rte_wmb();
>>> + acc100_reg_write(d, HWPfQmgrIngressAq + 0x100,
>> enq_req.val);
>>> + usleep(LONG_WAIT * 100);
>>> + if (desc->req.word0 != 2)
>>> + printf("DMA Response %#"PRIx32"\n", desc-
>>> req.word0);
>>> + }
>>> +
>>> + /* Reset LDPC Cores */
>>> + for (i = 0; i < ACC100_ENGINES_MAX; i++)
>>> + acc100_reg_write(d, HWPfFecUl5gCntrlReg +
>>> + ACC100_ENGINE_OFFSET * i,
>> ACC100_RESET_HI);
>>> + usleep(LONG_WAIT);
>>> + for (i = 0; i < ACC100_ENGINES_MAX; i++)
>>> + acc100_reg_write(d, HWPfFecUl5gCntrlReg +
>>> + ACC100_ENGINE_OFFSET * i,
>> ACC100_RESET_LO);
>>> + usleep(LONG_WAIT);
>>> + acc100_reg_write(d, HWPfHi5GHardResetReg,
>> ACC100_RESET_HARD);
>>> + usleep(LONG_WAIT);
>>> + int numEngines = 0;
>>> + /* Check engine power-on status again */
>>> + for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
>>> + template_idx++) {
>>> + address = HwPfFecUl5gIbDebugReg +
>>> + ACC100_ENGINE_OFFSET * template_idx;
>>> + status = (acc100_reg_read(d, address) >> 4) & 0xF;
>>> + address = HWPfQmgrGrpTmplateReg4Indx
>>> + + BYTES_IN_WORD * template_idx;
>>> + if (status == 1) {
>>> + acc100_reg_write(d, address, payload);
>>> + numEngines++;
>>> + } else
>>> + acc100_reg_write(d, address, 0);
>>> + }
>>> + printf("Number of 5GUL engines %d\n", numEngines);
>>> +
>>> + if (d->sw_rings_base != NULL)
>>> + rte_free(d->sw_rings_base);
>>> + usleep(LONG_WAIT);
>>> +}
>>> +
>>> +/* Initial configuration of a ACC100 device prior to running
>>> +configure() */ int acc100_configure(const char *dev_name, struct
>>> +acc100_conf *conf) {
>>> + rte_bbdev_log(INFO, "acc100_configure");
>>> + uint32_t payload, address, status;
>> maybe value or data would be a better variable name than payload.
>>
>> would mean changing acc100_reg_write
> transparent to me, but can change given DPDK uses term value.
>
>
>>> + int qg_idx, template_idx, vf_idx, acc, i;
>>> + struct rte_bbdev *bbdev = rte_bbdev_get_named_dev(dev_name);
>>> +
>>> + /* Compile time checks */
>>> + RTE_BUILD_BUG_ON(sizeof(struct acc100_dma_req_desc) != 256);
>>> + RTE_BUILD_BUG_ON(sizeof(union acc100_dma_desc) != 256);
>>> + RTE_BUILD_BUG_ON(sizeof(struct acc100_fcw_td) != 24);
>>> + RTE_BUILD_BUG_ON(sizeof(struct acc100_fcw_te) != 32);
>>> +
>>> + if (bbdev == NULL) {
>>> + rte_bbdev_log(ERR,
>>> + "Invalid dev_name (%s), or device is not yet initialised",
>>> + dev_name);
>>> + return -ENODEV;
>>> + }
>>> + struct acc100_device *d = bbdev->data->dev_private;
>>> +
>>> + /* Store configuration */
>>> + rte_memcpy(&d->acc100_conf, conf, sizeof(d->acc100_conf));
>>> +
>>> + /* PCIe Bridge configuration */
>>> + acc100_reg_write(d, HwPfPcieGpexBridgeControl,
>> ACC100_CFG_PCI_BRIDGE);
>>> + for (i = 1; i < 17; i++)
>> 17 is a magic number, use a #define
>>
>> this is a general issue.
> These are only used once but still agreed.
>
>>> + acc100_reg_write(d,
>>> +
>> HwPfPcieGpexAxiAddrMappingWindowPexBaseHigh
>>> + + i * 16, 0);
>>> +
>>> + /* PCIe Link Trainiing and Status State Machine */
>>> + acc100_reg_write(d, HwPfPcieGpexLtssmStateCntrl, 0xDFC00000);
>>> +
>>> + /* Prevent blocking AXI read on BRESP for AXI Write */
>>> + address = HwPfPcieGpexAxiPioControl;
>>> + payload = ACC100_CFG_PCI_AXI;
>>> + acc100_reg_write(d, address, payload);
>>> +
>>> + /* 5GDL PLL phase shift */
>>> + acc100_reg_write(d, HWPfChaDl5gPllPhshft0, 0x1);
>>> +
>>> + /* Explicitly releasing AXI as this may be stopped after PF FLR/BME */
>>> + address = HWPfDmaAxiControl;
>>> + payload = 1;
>>> + acc100_reg_write(d, address, payload);
>>> +
>>> + /* DDR Configuration */
>>> + address = HWPfDdrBcTim6;
>>> + payload = acc100_reg_read(d, address);
>>> + payload &= 0xFFFFFFFB; /* Bit 2 */ #ifdef ACC100_DDR_ECC_ENABLE
>>> + payload |= 0x4;
>>> +#endif
>>> + acc100_reg_write(d, address, payload);
>>> + address = HWPfDdrPhyDqsCountNum;
>>> +#ifdef ACC100_DDR_ECC_ENABLE
>>> + payload = 9;
>>> +#else
>>> + payload = 8;
>>> +#endif
>>> + acc100_reg_write(d, address, payload);
>>> +
>>> + /* Set default descriptor signature */
>>> + address = HWPfDmaDescriptorSignatuture;
>>> + payload = 0;
>>> + acc100_reg_write(d, address, payload);
>>> +
>>> + /* Enable the Error Detection in DMA */
>>> + payload = ACC100_CFG_DMA_ERROR;
>>> + address = HWPfDmaErrorDetectionEn;
>>> + acc100_reg_write(d, address, payload);
>>> +
>>> + /* AXI Cache configuration */
>>> + payload = ACC100_CFG_AXI_CACHE;
>>> + address = HWPfDmaAxcacheReg;
>>> + acc100_reg_write(d, address, payload);
>>> +
>>> + /* Default DMA Configuration (Qmgr Enabled) */
>>> + address = HWPfDmaConfig0Reg;
>>> + payload = 0;
>>> + acc100_reg_write(d, address, payload);
>>> + address = HWPfDmaQmanen;
>>> + payload = 0;
>>> + acc100_reg_write(d, address, payload);
>>> +
>>> + /* Default RLIM/ALEN configuration */
>>> + address = HWPfDmaConfig1Reg;
>>> + payload = (1 << 31) + (23 << 8) + (1 << 6) + 7;
>>> + acc100_reg_write(d, address, payload);
>>> +
>>> + /* Configure DMA Qmanager addresses */
>>> + address = HWPfDmaQmgrAddrReg;
>>> + payload = HWPfQmgrEgressQueuesTemplate;
>>> + acc100_reg_write(d, address, payload);
>>> +
>>> + /* ===== Qmgr Configuration ===== */
>>> + /* Configuration of the AQueue Depth QMGR_GRP_0_DEPTH_LOG2
>> for UL */
>>> + int totalQgs = conf->q_ul_4g.num_qgroups +
>>> + conf->q_ul_5g.num_qgroups +
>>> + conf->q_dl_4g.num_qgroups +
>>> + conf->q_dl_5g.num_qgroups;
>>> + for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
>>> + address = HWPfQmgrDepthLog2Grp +
>>> + BYTES_IN_WORD * qg_idx;
>>> + payload = aqDepth(qg_idx, conf);
>>> + acc100_reg_write(d, address, payload);
>>> + address = HWPfQmgrTholdGrp +
>>> + BYTES_IN_WORD * qg_idx;
>>> + payload = (1 << 16) + (1 << (aqDepth(qg_idx, conf) - 1));
>>> + acc100_reg_write(d, address, payload);
>>> + }
>>> +
>>> + /* Template Priority in incremental order */
>>> + for (template_idx = 0; template_idx < ACC100_NUM_TMPL;
>>> + template_idx++) {
>>> + address = HWPfQmgrGrpTmplateReg0Indx +
>>> + BYTES_IN_WORD * (template_idx % 8);
>>> + payload = TMPL_PRI_0;
>>> + acc100_reg_write(d, address, payload);
>>> + address = HWPfQmgrGrpTmplateReg1Indx +
>>> + BYTES_IN_WORD * (template_idx % 8);
>>> + payload = TMPL_PRI_1;
>>> + acc100_reg_write(d, address, payload);
>>> + address = HWPfQmgrGrpTmplateReg2indx +
>>> + BYTES_IN_WORD * (template_idx % 8);
>>> + payload = TMPL_PRI_2;
>>> + acc100_reg_write(d, address, payload);
>>> + address = HWPfQmgrGrpTmplateReg3Indx +
>>> + BYTES_IN_WORD * (template_idx % 8);
>>> + payload = TMPL_PRI_3;
>>> + acc100_reg_write(d, address, payload);
>>> + }
>>> +
>>> + address = HWPfQmgrGrpPriority;
>>> + payload = ACC100_CFG_QMGR_HI_P;
>>> + acc100_reg_write(d, address, payload);
>>> +
>>> + /* Template Configuration */
>>> + for (template_idx = 0; template_idx < ACC100_NUM_TMPL;
>> template_idx++) {
>>> + payload = 0;
>>> + address = HWPfQmgrGrpTmplateReg4Indx
>>> + + BYTES_IN_WORD * template_idx;
>>> + acc100_reg_write(d, address, payload);
>>> + }
>>> + /* 4GUL */
>>> + int numQgs = conf->q_ul_4g.num_qgroups;
>>> + int numQqsAcc = 0;
>>> + payload = 0;
>>> + for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
>> qg_idx++)
>>> + payload |= (1 << qg_idx);
>>> + for (template_idx = SIG_UL_4G; template_idx <= SIG_UL_4G_LAST;
>>> + template_idx++) {
>>> + address = HWPfQmgrGrpTmplateReg4Indx
>>> + + BYTES_IN_WORD*template_idx;
>>> + acc100_reg_write(d, address, payload);
>>> + }
>>> + /* 5GUL */
>>> + numQqsAcc += numQgs;
>>> + numQgs = conf->q_ul_5g.num_qgroups;
>>> + payload = 0;
>>> + int numEngines = 0;
>>> + for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
>> qg_idx++)
>>> + payload |= (1 << qg_idx);
>>> + for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
>>> + template_idx++) {
>>> + /* Check engine power-on status */
>>> + address = HwPfFecUl5gIbDebugReg +
>>> + ACC100_ENGINE_OFFSET * template_idx;
>>> + status = (acc100_reg_read(d, address) >> 4) & 0xF;
>>> + address = HWPfQmgrGrpTmplateReg4Indx
>>> + + BYTES_IN_WORD * template_idx;
>>> + if (status == 1) {
>>> + acc100_reg_write(d, address, payload);
>>> + numEngines++;
>>> + } else
>>> + acc100_reg_write(d, address, 0);
>>> + #if RTE_ACC100_SINGLE_FEC == 1
>> #if should be at start of line
> ok
>
>>> + payload = 0;
>>> + #endif
>>> + }
>>> + printf("Number of 5GUL engines %d\n", numEngines);
>>> + /* 4GDL */
>>> + numQqsAcc += numQgs;
>>> + numQgs = conf->q_dl_4g.num_qgroups;
>>> + payload = 0;
>>> + for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
>> qg_idx++)
>>> + payload |= (1 << qg_idx);
>>> + for (template_idx = SIG_DL_4G; template_idx <= SIG_DL_4G_LAST;
>>> + template_idx++) {
>>> + address = HWPfQmgrGrpTmplateReg4Indx
>>> + + BYTES_IN_WORD*template_idx;
>>> + acc100_reg_write(d, address, payload);
>>> + #if RTE_ACC100_SINGLE_FEC == 1
>>> + payload = 0;
>>> + #endif
>>> + }
>>> + /* 5GDL */
>>> + numQqsAcc += numQgs;
>>> + numQgs = conf->q_dl_5g.num_qgroups;
>>> + payload = 0;
>>> + for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
>> qg_idx++)
>>> + payload |= (1 << qg_idx);
>>> + for (template_idx = SIG_DL_5G; template_idx <= SIG_DL_5G_LAST;
>>> + template_idx++) {
>>> + address = HWPfQmgrGrpTmplateReg4Indx
>>> + + BYTES_IN_WORD*template_idx;
>>> + acc100_reg_write(d, address, payload);
>>> + #if RTE_ACC100_SINGLE_FEC == 1
>>> + payload = 0;
>>> + #endif
>>> + }
>>> +
>>> + /* Queue Group Function mapping */
>>> + int qman_func_id[5] = {0, 2, 1, 3, 4};
>>> + address = HWPfQmgrGrpFunction0;
>>> + payload = 0;
>>> + for (qg_idx = 0; qg_idx < 8; qg_idx++) {
>>> + acc = accFromQgid(qg_idx, conf);
>>> + payload |= qman_func_id[acc]<<(qg_idx * 4);
>>> + }
>>> + acc100_reg_write(d, address, payload);
>>> +
>>> + /* Configuration of the Arbitration QGroup depth to 1 */
>>> + for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
>>> + address = HWPfQmgrArbQDepthGrp +
>>> + BYTES_IN_WORD * qg_idx;
>>> + payload = 0;
>>> + acc100_reg_write(d, address, payload);
>>> + }
>>> +
>>> + /* Enabling AQueues through the Queue hierarchy*/
>>> + for (vf_idx = 0; vf_idx < ACC100_NUM_VFS; vf_idx++) {
>>> + for (qg_idx = 0; qg_idx < ACC100_NUM_QGRPS; qg_idx++) {
>>> + payload = 0;
>>> + if (vf_idx < conf->num_vf_bundles &&
>>> + qg_idx < totalQgs)
>>> + payload = (1 << aqNum(qg_idx, conf)) - 1;
>>> + address = HWPfQmgrAqEnableVf
>>> + + vf_idx * BYTES_IN_WORD;
>>> + payload += (qg_idx << 16);
>>> + acc100_reg_write(d, address, payload);
>>> + }
>>> + }
>>> +
>>> + /* This pointer to ARAM (256kB) is shifted by 2 (4B per register) */
>>> + uint32_t aram_address = 0;
>>> + for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
>>> + for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) {
>>> + address = HWPfQmgrVfBaseAddr + vf_idx
>>> + * BYTES_IN_WORD + qg_idx
>>> + * BYTES_IN_WORD * 64;
>>> + payload = aram_address;
>>> + acc100_reg_write(d, address, payload);
>>> + /* Offset ARAM Address for next memory bank
>>> + * - increment of 4B
>>> + */
>>> + aram_address += aqNum(qg_idx, conf) *
>>> + (1 << aqDepth(qg_idx, conf));
>>> + }
>>> + }
>>> +
>>> + if (aram_address > WORDS_IN_ARAM_SIZE) {
>>> + rte_bbdev_log(ERR, "ARAM Configuration not fitting %d
>> %d\n",
>>> + aram_address, WORDS_IN_ARAM_SIZE);
>>> + return -EINVAL;
>>> + }
>>> +
>>> + /* ==== HI Configuration ==== */
>>> +
>>> + /* Prevent Block on Transmit Error */
>>> + address = HWPfHiBlockTransmitOnErrorEn;
>>> + payload = 0;
>>> + acc100_reg_write(d, address, payload);
>>> + /* Prevents to drop MSI */
>>> + address = HWPfHiMsiDropEnableReg;
>>> + payload = 0;
>>> + acc100_reg_write(d, address, payload);
>>> + /* Set the PF Mode register */
>>> + address = HWPfHiPfMode;
>>> + payload = (conf->pf_mode_en) ? 2 : 0;
>>> + acc100_reg_write(d, address, payload);
>>> + /* Enable Error Detection in HW */
>>> + address = HWPfDmaErrorDetectionEn;
>>> + payload = 0x3D7;
>>> + acc100_reg_write(d, address, payload);
>>> +
>>> + /* QoS overflow init */
>>> + payload = 1;
>>> + address = HWPfQosmonAEvalOverflow0;
>>> + acc100_reg_write(d, address, payload);
>>> + address = HWPfQosmonBEvalOverflow0;
>>> + acc100_reg_write(d, address, payload);
>>> +
>>> + /* HARQ DDR Configuration */
>>> + unsigned int ddrSizeInMb = 512; /* Fixed to 512 MB per VF for now
>> */
>>> + for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) {
>>> + address = HWPfDmaVfDdrBaseRw + vf_idx
>>> + * 0x10;
>>> + payload = ((vf_idx * (ddrSizeInMb / 64)) << 16) +
>>> + (ddrSizeInMb - 1);
>>> + acc100_reg_write(d, address, payload);
>>> + }
>>> + usleep(LONG_WAIT);
>> Is sleep needed here ? the reg_write has one.
> This one is needed on top
>
>>> +
>> Since this seems like a workaround, add a comment here.
> fair enough, ok, thanks
>
>> Tom
>>
>>> + if (numEngines < (SIG_UL_5G_LAST + 1))
>>> + poweron_cleanup(bbdev, d, conf);
>>> +
>>> + rte_bbdev_log_debug("PF Tip configuration complete for %s",
>> dev_name);
>>> + return 0;
>>> +}
>>> diff --git
>>> a/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
>>> b/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
>>> index 4a76d1d..91c234d 100644
>>> --- a/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
>>> +++ b/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
>>> @@ -1,3 +1,10 @@
>>> DPDK_21 {
>>> local: *;
>>> };
>>> +
>>> +EXPERIMENTAL {
>>> + global:
>>> +
>>> + acc100_configure;
>>> +
>>> +};
Hi Tom,
> From: Tom Rix <trix@redhat.com>
> On 9/30/20 3:54 PM, Chautru, Nicolas wrote:
> > Hi Tom,
> >
> >> From: Tom Rix <trix@redhat.com>
> >> On 9/28/20 5:29 PM, Nicolas Chautru wrote:
> >>> Add configure function to configure the PF from within the
> >>> bbdev-test itself without external application configuration the device.
> >>>
> >>> Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
> >>> Acked-by: Liu Tianjiao <Tianjiao.liu@intel.com>
> >>> ---
> >>> app/test-bbdev/test_bbdev_perf.c | 72 +++
> >>> doc/guides/rel_notes/release_20_11.rst | 5 +
> >>> drivers/baseband/acc100/meson.build | 2 +
> >>> drivers/baseband/acc100/rte_acc100_cfg.h | 17 +
> >>> drivers/baseband/acc100/rte_acc100_pmd.c | 505
> >> +++++++++++++++++++++
> >>> .../acc100/rte_pmd_bbdev_acc100_version.map | 7 +
> >>> 6 files changed, 608 insertions(+)
> >>>
> >>> diff --git a/app/test-bbdev/test_bbdev_perf.c
> >>> b/app/test-bbdev/test_bbdev_perf.c
> >>> index 45c0d62..32f23ff 100644
> >>> --- a/app/test-bbdev/test_bbdev_perf.c
> >>> +++ b/app/test-bbdev/test_bbdev_perf.c
> >>> @@ -52,6 +52,18 @@
> >>> #define FLR_5G_TIMEOUT 610
> >>> #endif
> >>>
> >>> +#ifdef RTE_LIBRTE_PMD_BBDEV_ACC100
> >>> +#include <rte_acc100_cfg.h>
> >>> +#define ACC100PF_DRIVER_NAME ("intel_acc100_pf")
> >>> +#define ACC100VF_DRIVER_NAME ("intel_acc100_vf")
> >>> +#define ACC100_QMGR_NUM_AQS 16
> >>> +#define ACC100_QMGR_NUM_QGS 2
> >>> +#define ACC100_QMGR_AQ_DEPTH 5
> >>> +#define ACC100_QMGR_INVALID_IDX -1
> >>> +#define ACC100_QMGR_RR 1
> >>> +#define ACC100_QOS_GBR 0
> >>> +#endif
> >>> +
> >>> #define OPS_CACHE_SIZE 256U
> >>> #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
> >>>
> >>> @@ -653,6 +665,66 @@ typedef int (test_case_function)(struct
> >> active_device *ad,
> >>> info->dev_name);
> >>> }
> >>> #endif
> >>> +#ifdef RTE_LIBRTE_PMD_BBDEV_ACC100
> >> seems like this function would break if one of the other bbdev's were
> >> #defined.
> > No these are independent. By default they are all defined.
> ok
> >
> >
> >>> + if ((get_init_device() == true) &&
> >>> + (!strcmp(info->drv.driver_name,
> >> ACC100PF_DRIVER_NAME))) {
> >>> + struct acc100_conf conf;
> >>> + unsigned int i;
> >>> +
> >>> + printf("Configure ACC100 FEC Driver %s with default
> >> values\n",
> >>> + info->drv.driver_name);
> >>> +
> >>> + /* clear default configuration before initialization */
> >>> + memset(&conf, 0, sizeof(struct acc100_conf));
> >>> +
> >>> + /* Always set in PF mode for built-in configuration */
> >>> + conf.pf_mode_en = true;
> >>> + for (i = 0; i < RTE_ACC100_NUM_VFS; ++i) {
> >>> + conf.arb_dl_4g[i].gbr_threshold1 =
> >> ACC100_QOS_GBR;
> >>> + conf.arb_dl_4g[i].gbr_threshold1 =
> >> ACC100_QOS_GBR;
> >>> + conf.arb_dl_4g[i].round_robin_weight =
> >> ACC100_QMGR_RR;
> >>> + conf.arb_ul_4g[i].gbr_threshold1 =
> >> ACC100_QOS_GBR;
> >>> + conf.arb_ul_4g[i].gbr_threshold1 =
> >> ACC100_QOS_GBR;
> >>> + conf.arb_ul_4g[i].round_robin_weight =
> >> ACC100_QMGR_RR;
> >>> + conf.arb_dl_5g[i].gbr_threshold1 =
> >> ACC100_QOS_GBR;
> >>> + conf.arb_dl_5g[i].gbr_threshold1 =
> >> ACC100_QOS_GBR;
> >>> + conf.arb_dl_5g[i].round_robin_weight =
> >> ACC100_QMGR_RR;
> >>> + conf.arb_ul_5g[i].gbr_threshold1 =
> >> ACC100_QOS_GBR;
> >>> + conf.arb_ul_5g[i].gbr_threshold1 =
> >> ACC100_QOS_GBR;
> >>> + conf.arb_ul_5g[i].round_robin_weight =
> >> ACC100_QMGR_RR;
> >>> + }
> >>> +
> >>> + conf.input_pos_llr_1_bit = true;
> >>> + conf.output_pos_llr_1_bit = true;
> >>> + conf.num_vf_bundles = 1; /**< Number of VF bundles to
> >> setup */
> >>> +
> >>> + conf.q_ul_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
> >>> + conf.q_ul_4g.first_qgroup_index =
> >> ACC100_QMGR_INVALID_IDX;
> >>> + conf.q_ul_4g.num_aqs_per_groups =
> >> ACC100_QMGR_NUM_AQS;
> >>> + conf.q_ul_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
> >>> + conf.q_dl_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
> >>> + conf.q_dl_4g.first_qgroup_index =
> >> ACC100_QMGR_INVALID_IDX;
> >>> + conf.q_dl_4g.num_aqs_per_groups =
> >> ACC100_QMGR_NUM_AQS;
> >>> + conf.q_dl_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
> >>> + conf.q_ul_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
> >>> + conf.q_ul_5g.first_qgroup_index =
> >> ACC100_QMGR_INVALID_IDX;
> >>> + conf.q_ul_5g.num_aqs_per_groups =
> >> ACC100_QMGR_NUM_AQS;
> >>> + conf.q_ul_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
> >>> + conf.q_dl_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
> >>> + conf.q_dl_5g.first_qgroup_index =
> >> ACC100_QMGR_INVALID_IDX;
> >>> + conf.q_dl_5g.num_aqs_per_groups =
> >> ACC100_QMGR_NUM_AQS;
> >>> + conf.q_dl_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
> >>> +
> >>> + /* setup PF with configuration information */
> >>> + ret = acc100_configure(info->dev_name, &conf);
> >>> + TEST_ASSERT_SUCCESS(ret,
> >>> + "Failed to configure ACC100 PF for bbdev
> >> %s",
> >>> + info->dev_name);
> >>> + /* Let's refresh this now this is configured */
> >>> + }
> >>> + rte_bbdev_info_get(dev_id, info);
> >> The other bbdev's do not call rte_bbdev_info_get, can this be removed ?
> > Actually it should be added outside for all versions
> > post-configuraion. Thanks
> >
> >>> +#endif
> >>> +
> >>> nb_queues = RTE_MIN(rte_lcore_count(), info-
> drv.max_num_queues);
> >>> nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
> >>>
> >>> diff --git a/doc/guides/rel_notes/release_20_11.rst
> >>> b/doc/guides/rel_notes/release_20_11.rst
> >>> index 73ac08f..c8d0586 100644
> >>> --- a/doc/guides/rel_notes/release_20_11.rst
> >>> +++ b/doc/guides/rel_notes/release_20_11.rst
> >>> @@ -55,6 +55,11 @@ New Features
> >>> Also, make sure to start the actual text at the margin.
> >>> =======================================================
> >>>
> >>> +* **Added Intel ACC100 bbdev PMD.**
> >>> +
> >>> + Added a new ``acc100`` bbdev driver for the Intel\ |reg| ACC100
> >>> + accelerator also known as Mount Bryce. See the
> >>> + :doc:`../bbdevs/acc100` BBDEV guide for more details on this new
> driver.
> >>>
> >>> Removed Items
> >>> -------------
> >>> diff --git a/drivers/baseband/acc100/meson.build
> >>> b/drivers/baseband/acc100/meson.build
> >>> index 8afafc2..7ac44dc 100644
> >>> --- a/drivers/baseband/acc100/meson.build
> >>> +++ b/drivers/baseband/acc100/meson.build
> >>> @@ -4,3 +4,5 @@
> >>> deps += ['bbdev', 'bus_vdev', 'ring', 'pci', 'bus_pci']
> >>>
> >>> sources = files('rte_acc100_pmd.c')
> >>> +
> >>> +install_headers('rte_acc100_cfg.h')
> >>> diff --git a/drivers/baseband/acc100/rte_acc100_cfg.h
> >>> b/drivers/baseband/acc100/rte_acc100_cfg.h
> >>> index 73bbe36..7f523bc 100644
> >>> --- a/drivers/baseband/acc100/rte_acc100_cfg.h
> >>> +++ b/drivers/baseband/acc100/rte_acc100_cfg.h
> >>> @@ -89,6 +89,23 @@ struct acc100_conf {
> >>> struct rte_arbitration_t arb_dl_5g[RTE_ACC100_NUM_VFS]; };
> >>>
> >>> +/**
> >>> + * Configure a ACC100 device
> >>> + *
> >>> + * @param dev_name
> >>> + * The name of the device. This is the short form of PCI BDF, e.g.
> 00:01.0.
> >>> + * It can also be retrieved for a bbdev device from the dev_name field
> in
> >> the
> >>> + * rte_bbdev_info structure returned by rte_bbdev_info_get().
> >>> + * @param conf
> >>> + * Configuration to apply to ACC100 HW.
> >>> + *
> >>> + * @return
> >>> + * Zero on success, negative value on failure.
> >>> + */
> >>> +__rte_experimental
> >>> +int
> >>> +acc100_configure(const char *dev_name, struct acc100_conf *conf);
> >>> +
> >>> #ifdef __cplusplus
> >>> }
> >>> #endif
> >>> diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c
> >>> b/drivers/baseband/acc100/rte_acc100_pmd.c
> >>> index 3589814..b50dd32 100644
> >>> --- a/drivers/baseband/acc100/rte_acc100_pmd.c
> >>> +++ b/drivers/baseband/acc100/rte_acc100_pmd.c
> >>> @@ -85,6 +85,26 @@
> >>>
> >>> enum {UL_4G = 0, UL_5G, DL_4G, DL_5G, NUM_ACC};
> >>>
> >>> +/* Return the accelerator enum for a Queue Group Index */ static
> >>> +inline int accFromQgid(int qg_idx, const struct acc100_conf
> >>> +*acc100_conf) {
> >>> + int accQg[ACC100_NUM_QGRPS];
> >>> + int NumQGroupsPerFn[NUM_ACC];
> >>> + int acc, qgIdx, qgIndex = 0;
> >>> + for (qgIdx = 0; qgIdx < ACC100_NUM_QGRPS; qgIdx++)
> >>> + accQg[qgIdx] = 0;
> >>> + NumQGroupsPerFn[UL_4G] = acc100_conf->q_ul_4g.num_qgroups;
> >>> + NumQGroupsPerFn[UL_5G] = acc100_conf->q_ul_5g.num_qgroups;
> >>> + NumQGroupsPerFn[DL_4G] = acc100_conf->q_dl_4g.num_qgroups;
> >>> + NumQGroupsPerFn[DL_5G] = acc100_conf->q_dl_5g.num_qgroups;
> >>> + for (acc = UL_4G; acc < NUM_ACC; acc++)
> >>> + for (qgIdx = 0; qgIdx < NumQGroupsPerFn[acc]; qgIdx++)
> >>> + accQg[qgIndex++] = acc;
> >> This looks inefficient, is there a way this could be calculated
> >> without filling arrays to
> >>
> >> access 1 value ?
> > That is not time critical, and the same common code is run each time.
> ok
> >
> >>> + acc = accQg[qg_idx];
> >>> + return acc;
> >>> +}
> >>> +
> >>> /* Return the queue topology for a Queue Group Index */ static
> >>> inline void qtopFromAcc(struct rte_q_topology_t **qtop, int
> >>> acc_enum, @@ -113,6 +133,30 @@
> >>> *qtop = p_qtop;
> >>> }
> >>>
> >>> +/* Return the AQ depth for a Queue Group Index */ static inline int
> >>> +aqDepth(int qg_idx, struct acc100_conf *acc100_conf) {
> >>> + struct rte_q_topology_t *q_top = NULL;
> >>> + int acc_enum = accFromQgid(qg_idx, acc100_conf);
> >>> + qtopFromAcc(&q_top, acc_enum, acc100_conf);
> >>> + if (unlikely(q_top == NULL))
> >>> + return 0;
> >> This error is not handled well be the callers.
> >>
> >> aqNum is similar.
> > This fails in a consistent basis, by having not queue available and handling
> this as the default case.
> ok
> >
> >>> + return q_top->aq_depth_log2;
> >>> +}
> >>> +
> >>> +/* Return the AQ depth for a Queue Group Index */ static inline int
> >>> +aqNum(int qg_idx, struct acc100_conf *acc100_conf) {
> >>> + struct rte_q_topology_t *q_top = NULL;
> >>> + int acc_enum = accFromQgid(qg_idx, acc100_conf);
> >>> + qtopFromAcc(&q_top, acc_enum, acc100_conf);
> >>> + if (unlikely(q_top == NULL))
> >>> + return 0;
> >>> + return q_top->num_aqs_per_groups;
> >>> +}
> >>> +
> >>> static void
> >>> initQTop(struct acc100_conf *acc100_conf) { @@ -4177,3 +4221,464
> >>> @@ static int acc100_pci_remove(struct rte_pci_device *pci_dev)
> >>> RTE_PMD_REGISTER_PCI_TABLE(ACC100PF_DRIVER_NAME,
> >>> pci_id_acc100_pf_map);
> >> RTE_PMD_REGISTER_PCI(ACC100VF_DRIVER_NAME,
> >>> acc100_pci_vf_driver);
> >>> RTE_PMD_REGISTER_PCI_TABLE(ACC100VF_DRIVER_NAME,
> >>> pci_id_acc100_vf_map);
> >>> +
> >>> +/*
> >>> + * Implementation to fix the power on status of some 5GUL engines
> >>> + * This requires DMA permission if ported outside DPDK
> >> This sounds like a workaround, can more detail be added here ?
> > There are comments through the code I believe:
> > - /* Detect engines in undefined state */
> > - /* Force each engine which is in unspecified state */
> > - /* Reset LDPC Cores */
> > - /* Check engine power-on status again */ Do you believe this is not
> explicit enough. Power-on status may be in an undefined state hence this
> engine are avtivate with dummy payload to make sure they are in a
> predicable state once configuration is done.
>
> Yes, not explicit enough. They do not say it is a workaround so someone else
> would not know that
>
> this is needed or is likely needs adjusting in the future. Maybe change
>
> /* Check engine power-on status again */ to
>
> /*
>
> * Power-on status may be in an undefined state.
>
> * Active this engine with a dummy payload to make sure the state is
> defined.
>
> */
>
OK I will add a bit more in comments. Thanks
> Tom
>
> >>> + */
> >>> +static void
> >>> +poweron_cleanup(struct rte_bbdev *bbdev, struct acc100_device *d,
> >>> + struct acc100_conf *conf)
> >>> +{
> >>> + int i, template_idx, qg_idx;
> >>> + uint32_t address, status, payload;
> >>> + printf("Need to clear power-on 5GUL status in internal memory\n");
> >>> + /* Reset LDPC Cores */
> >>> + for (i = 0; i < ACC100_ENGINES_MAX; i++)
> >>> + acc100_reg_write(d, HWPfFecUl5gCntrlReg +
> >>> + ACC100_ENGINE_OFFSET * i,
> >> ACC100_RESET_HI);
> >>> + usleep(LONG_WAIT);
> >>> + for (i = 0; i < ACC100_ENGINES_MAX; i++)
> >>> + acc100_reg_write(d, HWPfFecUl5gCntrlReg +
> >>> + ACC100_ENGINE_OFFSET * i,
> >> ACC100_RESET_LO);
> >>> + usleep(LONG_WAIT);
> >>> + /* Prepare dummy workload */
> >>> + alloc_2x64mb_sw_rings_mem(bbdev, d, 0);
> >>> + /* Set base addresses */
> >>> + uint32_t phys_high = (uint32_t)(d->sw_rings_phys >> 32);
> >>> + uint32_t phys_low = (uint32_t)(d->sw_rings_phys &
> >>> + ~(ACC100_SIZE_64MBYTE-1));
> >>> + acc100_reg_write(d, HWPfDmaFec5GulDescBaseHiRegVf,
> >> phys_high);
> >>> + acc100_reg_write(d, HWPfDmaFec5GulDescBaseLoRegVf, phys_low);
> >>> +
> >>> + /* Descriptor for a dummy 5GUL code block processing*/
> >>> + union acc100_dma_desc *desc = NULL;
> >>> + desc = d->sw_rings;
> >>> + desc->req.data_ptrs[0].address = d->sw_rings_phys +
> >>> + ACC100_DESC_FCW_OFFSET;
> >>> + desc->req.data_ptrs[0].blen = ACC100_FCW_LD_BLEN;
> >>> + desc->req.data_ptrs[0].blkid = ACC100_DMA_BLKID_FCW;
> >>> + desc->req.data_ptrs[0].last = 0;
> >>> + desc->req.data_ptrs[0].dma_ext = 0;
> >>> + desc->req.data_ptrs[1].address = d->sw_rings_phys + 512;
> >>> + desc->req.data_ptrs[1].blkid = ACC100_DMA_BLKID_IN;
> >>> + desc->req.data_ptrs[1].last = 1;
> >>> + desc->req.data_ptrs[1].dma_ext = 0;
> >>> + desc->req.data_ptrs[1].blen = 44;
> >>> + desc->req.data_ptrs[2].address = d->sw_rings_phys + 1024;
> >>> + desc->req.data_ptrs[2].blkid = ACC100_DMA_BLKID_OUT_ENC;
> >>> + desc->req.data_ptrs[2].last = 1;
> >>> + desc->req.data_ptrs[2].dma_ext = 0;
> >>> + desc->req.data_ptrs[2].blen = 5;
> >>> + /* Dummy FCW */
> >>> + desc->req.fcw_ld.FCWversion = ACC100_FCW_VER;
> >>> + desc->req.fcw_ld.qm = 1;
> >>> + desc->req.fcw_ld.nfiller = 30;
> >>> + desc->req.fcw_ld.BG = 2 - 1;
> >>> + desc->req.fcw_ld.Zc = 7;
> >>> + desc->req.fcw_ld.ncb = 350;
> >>> + desc->req.fcw_ld.rm_e = 4;
> >>> + desc->req.fcw_ld.itmax = 10;
> >>> + desc->req.fcw_ld.gain_i = 1;
> >>> + desc->req.fcw_ld.gain_h = 1;
> >>> +
> >>> + int engines_to_restart[SIG_UL_5G_LAST + 1] = {0};
> >>> + int num_failed_engine = 0;
> >>> + /* Detect engines in undefined state */
> >>> + for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
> >>> + template_idx++) {
> >>> + /* Check engine power-on status */
> >>> + address = HwPfFecUl5gIbDebugReg +
> >>> + ACC100_ENGINE_OFFSET * template_idx;
> >>> + status = (acc100_reg_read(d, address) >> 4) & 0xF;
> >>> + if (status == 0) {
> >>> + engines_to_restart[num_failed_engine] =
> >> template_idx;
> >>> + num_failed_engine++;
> >>> + }
> >>> + }
> >>> +
> >>> + int numQqsAcc = conf->q_ul_5g.num_qgroups;
> >>> + int numQgs = conf->q_ul_5g.num_qgroups;
> >>> + payload = 0;
> >>> + for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
> >> qg_idx++)
> >>> + payload |= (1 << qg_idx);
> >>> + /* Force each engine which is in unspecified state */
> >>> + for (i = 0; i < num_failed_engine; i++) {
> >>> + int failed_engine = engines_to_restart[i];
> >>> + printf("Force engine %d\n", failed_engine);
> >>> + for (template_idx = SIG_UL_5G; template_idx <=
> >> SIG_UL_5G_LAST;
> >>> + template_idx++) {
> >>> + address = HWPfQmgrGrpTmplateReg4Indx
> >>> + + BYTES_IN_WORD * template_idx;
> >>> + if (template_idx == failed_engine)
> >>> + acc100_reg_write(d, address, payload);
> >>> + else
> >>> + acc100_reg_write(d, address, 0);
> >>> + }
> >>> + /* Reset descriptor header */
> >>> + desc->req.word0 = ACC100_DMA_DESC_TYPE;
> >>> + desc->req.word1 = 0;
> >>> + desc->req.word2 = 0;
> >>> + desc->req.word3 = 0;
> >>> + desc->req.numCBs = 1;
> >>> + desc->req.m2dlen = 2;
> >>> + desc->req.d2mlen = 1;
> >>> + /* Enqueue the code block for processing */
> >>> + union acc100_enqueue_reg_fmt enq_req;
> >>> + enq_req.val = 0;
> >>> + enq_req.addr_offset = ACC100_DESC_OFFSET;
> >>> + enq_req.num_elem = 1;
> >>> + enq_req.req_elem_addr = 0;
> >>> + rte_wmb();
> >>> + acc100_reg_write(d, HWPfQmgrIngressAq + 0x100,
> >> enq_req.val);
> >>> + usleep(LONG_WAIT * 100);
> >>> + if (desc->req.word0 != 2)
> >>> + printf("DMA Response %#"PRIx32"\n", desc-
> >>> req.word0);
> >>> + }
> >>> +
> >>> + /* Reset LDPC Cores */
> >>> + for (i = 0; i < ACC100_ENGINES_MAX; i++)
> >>> + acc100_reg_write(d, HWPfFecUl5gCntrlReg +
> >>> + ACC100_ENGINE_OFFSET * i,
> >> ACC100_RESET_HI);
> >>> + usleep(LONG_WAIT);
> >>> + for (i = 0; i < ACC100_ENGINES_MAX; i++)
> >>> + acc100_reg_write(d, HWPfFecUl5gCntrlReg +
> >>> + ACC100_ENGINE_OFFSET * i,
> >> ACC100_RESET_LO);
> >>> + usleep(LONG_WAIT);
> >>> + acc100_reg_write(d, HWPfHi5GHardResetReg,
> >> ACC100_RESET_HARD);
> >>> + usleep(LONG_WAIT);
> >>> + int numEngines = 0;
> >>> + /* Check engine power-on status again */
> >>> + for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
> >>> + template_idx++) {
> >>> + address = HwPfFecUl5gIbDebugReg +
> >>> + ACC100_ENGINE_OFFSET * template_idx;
> >>> + status = (acc100_reg_read(d, address) >> 4) & 0xF;
> >>> + address = HWPfQmgrGrpTmplateReg4Indx
> >>> + + BYTES_IN_WORD * template_idx;
> >>> + if (status == 1) {
> >>> + acc100_reg_write(d, address, payload);
> >>> + numEngines++;
> >>> + } else
> >>> + acc100_reg_write(d, address, 0);
> >>> + }
> >>> + printf("Number of 5GUL engines %d\n", numEngines);
> >>> +
> >>> + if (d->sw_rings_base != NULL)
> >>> + rte_free(d->sw_rings_base);
> >>> + usleep(LONG_WAIT);
> >>> +}
> >>> +
> >>> +/* Initial configuration of a ACC100 device prior to running
> >>> +configure() */ int acc100_configure(const char *dev_name, struct
> >>> +acc100_conf *conf) {
> >>> + rte_bbdev_log(INFO, "acc100_configure");
> >>> + uint32_t payload, address, status;
> >> maybe value or data would be a better variable name than payload.
> >>
> >> would mean changing acc100_reg_write
> > transparent to me, but can change given DPDK uses term value.
> >
> >
> >>> + int qg_idx, template_idx, vf_idx, acc, i;
> >>> + struct rte_bbdev *bbdev = rte_bbdev_get_named_dev(dev_name);
> >>> +
> >>> + /* Compile time checks */
> >>> + RTE_BUILD_BUG_ON(sizeof(struct acc100_dma_req_desc) != 256);
> >>> + RTE_BUILD_BUG_ON(sizeof(union acc100_dma_desc) != 256);
> >>> + RTE_BUILD_BUG_ON(sizeof(struct acc100_fcw_td) != 24);
> >>> + RTE_BUILD_BUG_ON(sizeof(struct acc100_fcw_te) != 32);
> >>> +
> >>> + if (bbdev == NULL) {
> >>> + rte_bbdev_log(ERR,
> >>> + "Invalid dev_name (%s), or device is not yet initialised",
> >>> + dev_name);
> >>> + return -ENODEV;
> >>> + }
> >>> + struct acc100_device *d = bbdev->data->dev_private;
> >>> +
> >>> + /* Store configuration */
> >>> + rte_memcpy(&d->acc100_conf, conf, sizeof(d->acc100_conf));
> >>> +
> >>> + /* PCIe Bridge configuration */
> >>> + acc100_reg_write(d, HwPfPcieGpexBridgeControl,
> >> ACC100_CFG_PCI_BRIDGE);
> >>> + for (i = 1; i < 17; i++)
> >> 17 is a magic number, use a #define
> >>
> >> this is a general issue.
> > These are only used once but still agreed.
> >
> >>> + acc100_reg_write(d,
> >>> +
> >> HwPfPcieGpexAxiAddrMappingWindowPexBaseHigh
> >>> + + i * 16, 0);
> >>> +
> >>> + /* PCIe Link Trainiing and Status State Machine */
> >>> + acc100_reg_write(d, HwPfPcieGpexLtssmStateCntrl, 0xDFC00000);
> >>> +
> >>> + /* Prevent blocking AXI read on BRESP for AXI Write */
> >>> + address = HwPfPcieGpexAxiPioControl;
> >>> + payload = ACC100_CFG_PCI_AXI;
> >>> + acc100_reg_write(d, address, payload);
> >>> +
> >>> + /* 5GDL PLL phase shift */
> >>> + acc100_reg_write(d, HWPfChaDl5gPllPhshft0, 0x1);
> >>> +
> >>> + /* Explicitly releasing AXI as this may be stopped after PF FLR/BME */
> >>> + address = HWPfDmaAxiControl;
> >>> + payload = 1;
> >>> + acc100_reg_write(d, address, payload);
> >>> +
> >>> + /* DDR Configuration */
> >>> + address = HWPfDdrBcTim6;
> >>> + payload = acc100_reg_read(d, address);
> >>> + payload &= 0xFFFFFFFB; /* Bit 2 */ #ifdef ACC100_DDR_ECC_ENABLE
> >>> + payload |= 0x4;
> >>> +#endif
> >>> + acc100_reg_write(d, address, payload);
> >>> + address = HWPfDdrPhyDqsCountNum;
> >>> +#ifdef ACC100_DDR_ECC_ENABLE
> >>> + payload = 9;
> >>> +#else
> >>> + payload = 8;
> >>> +#endif
> >>> + acc100_reg_write(d, address, payload);
> >>> +
> >>> + /* Set default descriptor signature */
> >>> + address = HWPfDmaDescriptorSignatuture;
> >>> + payload = 0;
> >>> + acc100_reg_write(d, address, payload);
> >>> +
> >>> + /* Enable the Error Detection in DMA */
> >>> + payload = ACC100_CFG_DMA_ERROR;
> >>> + address = HWPfDmaErrorDetectionEn;
> >>> + acc100_reg_write(d, address, payload);
> >>> +
> >>> + /* AXI Cache configuration */
> >>> + payload = ACC100_CFG_AXI_CACHE;
> >>> + address = HWPfDmaAxcacheReg;
> >>> + acc100_reg_write(d, address, payload);
> >>> +
> >>> + /* Default DMA Configuration (Qmgr Enabled) */
> >>> + address = HWPfDmaConfig0Reg;
> >>> + payload = 0;
> >>> + acc100_reg_write(d, address, payload);
> >>> + address = HWPfDmaQmanen;
> >>> + payload = 0;
> >>> + acc100_reg_write(d, address, payload);
> >>> +
> >>> + /* Default RLIM/ALEN configuration */
> >>> + address = HWPfDmaConfig1Reg;
> >>> + payload = (1 << 31) + (23 << 8) + (1 << 6) + 7;
> >>> + acc100_reg_write(d, address, payload);
> >>> +
> >>> + /* Configure DMA Qmanager addresses */
> >>> + address = HWPfDmaQmgrAddrReg;
> >>> + payload = HWPfQmgrEgressQueuesTemplate;
> >>> + acc100_reg_write(d, address, payload);
> >>> +
> >>> + /* ===== Qmgr Configuration ===== */
> >>> + /* Configuration of the AQueue Depth QMGR_GRP_0_DEPTH_LOG2
> >> for UL */
> >>> + int totalQgs = conf->q_ul_4g.num_qgroups +
> >>> + conf->q_ul_5g.num_qgroups +
> >>> + conf->q_dl_4g.num_qgroups +
> >>> + conf->q_dl_5g.num_qgroups;
> >>> + for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
> >>> + address = HWPfQmgrDepthLog2Grp +
> >>> + BYTES_IN_WORD * qg_idx;
> >>> + payload = aqDepth(qg_idx, conf);
> >>> + acc100_reg_write(d, address, payload);
> >>> + address = HWPfQmgrTholdGrp +
> >>> + BYTES_IN_WORD * qg_idx;
> >>> + payload = (1 << 16) + (1 << (aqDepth(qg_idx, conf) - 1));
> >>> + acc100_reg_write(d, address, payload);
> >>> + }
> >>> +
> >>> + /* Template Priority in incremental order */
> >>> + for (template_idx = 0; template_idx < ACC100_NUM_TMPL;
> >>> + template_idx++) {
> >>> + address = HWPfQmgrGrpTmplateReg0Indx +
> >>> + BYTES_IN_WORD * (template_idx % 8);
> >>> + payload = TMPL_PRI_0;
> >>> + acc100_reg_write(d, address, payload);
> >>> + address = HWPfQmgrGrpTmplateReg1Indx +
> >>> + BYTES_IN_WORD * (template_idx % 8);
> >>> + payload = TMPL_PRI_1;
> >>> + acc100_reg_write(d, address, payload);
> >>> + address = HWPfQmgrGrpTmplateReg2indx +
> >>> + BYTES_IN_WORD * (template_idx % 8);
> >>> + payload = TMPL_PRI_2;
> >>> + acc100_reg_write(d, address, payload);
> >>> + address = HWPfQmgrGrpTmplateReg3Indx +
> >>> + BYTES_IN_WORD * (template_idx % 8);
> >>> + payload = TMPL_PRI_3;
> >>> + acc100_reg_write(d, address, payload);
> >>> + }
> >>> +
> >>> + address = HWPfQmgrGrpPriority;
> >>> + payload = ACC100_CFG_QMGR_HI_P;
> >>> + acc100_reg_write(d, address, payload);
> >>> +
> >>> + /* Template Configuration */
> >>> + for (template_idx = 0; template_idx < ACC100_NUM_TMPL;
> >> template_idx++) {
> >>> + payload = 0;
> >>> + address = HWPfQmgrGrpTmplateReg4Indx
> >>> + + BYTES_IN_WORD * template_idx;
> >>> + acc100_reg_write(d, address, payload);
> >>> + }
> >>> + /* 4GUL */
> >>> + int numQgs = conf->q_ul_4g.num_qgroups;
> >>> + int numQqsAcc = 0;
> >>> + payload = 0;
> >>> + for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
> >> qg_idx++)
> >>> + payload |= (1 << qg_idx);
> >>> + for (template_idx = SIG_UL_4G; template_idx <= SIG_UL_4G_LAST;
> >>> + template_idx++) {
> >>> + address = HWPfQmgrGrpTmplateReg4Indx
> >>> + + BYTES_IN_WORD*template_idx;
> >>> + acc100_reg_write(d, address, payload);
> >>> + }
> >>> + /* 5GUL */
> >>> + numQqsAcc += numQgs;
> >>> + numQgs = conf->q_ul_5g.num_qgroups;
> >>> + payload = 0;
> >>> + int numEngines = 0;
> >>> + for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
> >> qg_idx++)
> >>> + payload |= (1 << qg_idx);
> >>> + for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
> >>> + template_idx++) {
> >>> + /* Check engine power-on status */
> >>> + address = HwPfFecUl5gIbDebugReg +
> >>> + ACC100_ENGINE_OFFSET * template_idx;
> >>> + status = (acc100_reg_read(d, address) >> 4) & 0xF;
> >>> + address = HWPfQmgrGrpTmplateReg4Indx
> >>> + + BYTES_IN_WORD * template_idx;
> >>> + if (status == 1) {
> >>> + acc100_reg_write(d, address, payload);
> >>> + numEngines++;
> >>> + } else
> >>> + acc100_reg_write(d, address, 0);
> >>> + #if RTE_ACC100_SINGLE_FEC == 1
> >> #if should be at start of line
> > ok
> >
> >>> + payload = 0;
> >>> + #endif
> >>> + }
> >>> + printf("Number of 5GUL engines %d\n", numEngines);
> >>> + /* 4GDL */
> >>> + numQqsAcc += numQgs;
> >>> + numQgs = conf->q_dl_4g.num_qgroups;
> >>> + payload = 0;
> >>> + for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
> >> qg_idx++)
> >>> + payload |= (1 << qg_idx);
> >>> + for (template_idx = SIG_DL_4G; template_idx <= SIG_DL_4G_LAST;
> >>> + template_idx++) {
> >>> + address = HWPfQmgrGrpTmplateReg4Indx
> >>> + + BYTES_IN_WORD*template_idx;
> >>> + acc100_reg_write(d, address, payload);
> >>> + #if RTE_ACC100_SINGLE_FEC == 1
> >>> + payload = 0;
> >>> + #endif
> >>> + }
> >>> + /* 5GDL */
> >>> + numQqsAcc += numQgs;
> >>> + numQgs = conf->q_dl_5g.num_qgroups;
> >>> + payload = 0;
> >>> + for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc);
> >> qg_idx++)
> >>> + payload |= (1 << qg_idx);
> >>> + for (template_idx = SIG_DL_5G; template_idx <= SIG_DL_5G_LAST;
> >>> + template_idx++) {
> >>> + address = HWPfQmgrGrpTmplateReg4Indx
> >>> + + BYTES_IN_WORD*template_idx;
> >>> + acc100_reg_write(d, address, payload);
> >>> + #if RTE_ACC100_SINGLE_FEC == 1
> >>> + payload = 0;
> >>> + #endif
> >>> + }
> >>> +
> >>> + /* Queue Group Function mapping */
> >>> + int qman_func_id[5] = {0, 2, 1, 3, 4};
> >>> + address = HWPfQmgrGrpFunction0;
> >>> + payload = 0;
> >>> + for (qg_idx = 0; qg_idx < 8; qg_idx++) {
> >>> + acc = accFromQgid(qg_idx, conf);
> >>> + payload |= qman_func_id[acc]<<(qg_idx * 4);
> >>> + }
> >>> + acc100_reg_write(d, address, payload);
> >>> +
> >>> + /* Configuration of the Arbitration QGroup depth to 1 */
> >>> + for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
> >>> + address = HWPfQmgrArbQDepthGrp +
> >>> + BYTES_IN_WORD * qg_idx;
> >>> + payload = 0;
> >>> + acc100_reg_write(d, address, payload);
> >>> + }
> >>> +
> >>> + /* Enabling AQueues through the Queue hierarchy*/
> >>> + for (vf_idx = 0; vf_idx < ACC100_NUM_VFS; vf_idx++) {
> >>> + for (qg_idx = 0; qg_idx < ACC100_NUM_QGRPS; qg_idx++) {
> >>> + payload = 0;
> >>> + if (vf_idx < conf->num_vf_bundles &&
> >>> + qg_idx < totalQgs)
> >>> + payload = (1 << aqNum(qg_idx, conf)) - 1;
> >>> + address = HWPfQmgrAqEnableVf
> >>> + + vf_idx * BYTES_IN_WORD;
> >>> + payload += (qg_idx << 16);
> >>> + acc100_reg_write(d, address, payload);
> >>> + }
> >>> + }
> >>> +
> >>> + /* This pointer to ARAM (256kB) is shifted by 2 (4B per register) */
> >>> + uint32_t aram_address = 0;
> >>> + for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
> >>> + for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) {
> >>> + address = HWPfQmgrVfBaseAddr + vf_idx
> >>> + * BYTES_IN_WORD + qg_idx
> >>> + * BYTES_IN_WORD * 64;
> >>> + payload = aram_address;
> >>> + acc100_reg_write(d, address, payload);
> >>> + /* Offset ARAM Address for next memory bank
> >>> + * - increment of 4B
> >>> + */
> >>> + aram_address += aqNum(qg_idx, conf) *
> >>> + (1 << aqDepth(qg_idx, conf));
> >>> + }
> >>> + }
> >>> +
> >>> + if (aram_address > WORDS_IN_ARAM_SIZE) {
> >>> + rte_bbdev_log(ERR, "ARAM Configuration not fitting %d
> >> %d\n",
> >>> + aram_address, WORDS_IN_ARAM_SIZE);
> >>> + return -EINVAL;
> >>> + }
> >>> +
> >>> + /* ==== HI Configuration ==== */
> >>> +
> >>> + /* Prevent Block on Transmit Error */
> >>> + address = HWPfHiBlockTransmitOnErrorEn;
> >>> + payload = 0;
> >>> + acc100_reg_write(d, address, payload);
> >>> + /* Prevents to drop MSI */
> >>> + address = HWPfHiMsiDropEnableReg;
> >>> + payload = 0;
> >>> + acc100_reg_write(d, address, payload);
> >>> + /* Set the PF Mode register */
> >>> + address = HWPfHiPfMode;
> >>> + payload = (conf->pf_mode_en) ? 2 : 0;
> >>> + acc100_reg_write(d, address, payload);
> >>> + /* Enable Error Detection in HW */
> >>> + address = HWPfDmaErrorDetectionEn;
> >>> + payload = 0x3D7;
> >>> + acc100_reg_write(d, address, payload);
> >>> +
> >>> + /* QoS overflow init */
> >>> + payload = 1;
> >>> + address = HWPfQosmonAEvalOverflow0;
> >>> + acc100_reg_write(d, address, payload);
> >>> + address = HWPfQosmonBEvalOverflow0;
> >>> + acc100_reg_write(d, address, payload);
> >>> +
> >>> + /* HARQ DDR Configuration */
> >>> + unsigned int ddrSizeInMb = 512; /* Fixed to 512 MB per VF for now
> >> */
> >>> + for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) {
> >>> + address = HWPfDmaVfDdrBaseRw + vf_idx
> >>> + * 0x10;
> >>> + payload = ((vf_idx * (ddrSizeInMb / 64)) << 16) +
> >>> + (ddrSizeInMb - 1);
> >>> + acc100_reg_write(d, address, payload);
> >>> + }
> >>> + usleep(LONG_WAIT);
> >> Is sleep needed here ? the reg_write has one.
> > This one is needed on top
> >
> >>> +
> >> Since this seems like a workaround, add a comment here.
> > fair enough, ok, thanks
> >
> >> Tom
> >>
> >>> + if (numEngines < (SIG_UL_5G_LAST + 1))
> >>> + poweron_cleanup(bbdev, d, conf);
> >>> +
> >>> + rte_bbdev_log_debug("PF Tip configuration complete for %s",
> >> dev_name);
> >>> + return 0;
> >>> +}
> >>> diff --git
> >>> a/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
> >>> b/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
> >>> index 4a76d1d..91c234d 100644
> >>> --- a/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
> >>> +++ b/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map
> >>> @@ -1,3 +1,10 @@
> >>> DPDK_21 {
> >>> local: *;
> >>> };
> >>> +
> >>> +EXPERIMENTAL {
> >>> + global:
> >>> +
> >>> + acc100_configure;
> >>> +
> >>> +};
@@ -52,6 +52,18 @@
#define FLR_5G_TIMEOUT 610
#endif
+#ifdef RTE_LIBRTE_PMD_BBDEV_ACC100
+#include <rte_acc100_cfg.h>
+#define ACC100PF_DRIVER_NAME ("intel_acc100_pf")
+#define ACC100VF_DRIVER_NAME ("intel_acc100_vf")
+#define ACC100_QMGR_NUM_AQS 16
+#define ACC100_QMGR_NUM_QGS 2
+#define ACC100_QMGR_AQ_DEPTH 5
+#define ACC100_QMGR_INVALID_IDX -1
+#define ACC100_QMGR_RR 1
+#define ACC100_QOS_GBR 0
+#endif
+
#define OPS_CACHE_SIZE 256U
#define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
@@ -653,6 +665,66 @@ typedef int (test_case_function)(struct active_device *ad,
info->dev_name);
}
#endif
+#ifdef RTE_LIBRTE_PMD_BBDEV_ACC100
+ if ((get_init_device() == true) &&
+ (!strcmp(info->drv.driver_name, ACC100PF_DRIVER_NAME))) {
+ struct acc100_conf conf;
+ unsigned int i;
+
+ printf("Configure ACC100 FEC Driver %s with default values\n",
+ info->drv.driver_name);
+
+ /* clear default configuration before initialization */
+ memset(&conf, 0, sizeof(struct acc100_conf));
+
+ /* Always set in PF mode for built-in configuration */
+ conf.pf_mode_en = true;
+ for (i = 0; i < RTE_ACC100_NUM_VFS; ++i) {
+ conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
+ conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
+ conf.arb_dl_4g[i].round_robin_weight = ACC100_QMGR_RR;
+ conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
+ conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
+ conf.arb_ul_4g[i].round_robin_weight = ACC100_QMGR_RR;
+ conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
+ conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
+ conf.arb_dl_5g[i].round_robin_weight = ACC100_QMGR_RR;
+ conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
+ conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
+ conf.arb_ul_5g[i].round_robin_weight = ACC100_QMGR_RR;
+ }
+
+ conf.input_pos_llr_1_bit = true;
+ conf.output_pos_llr_1_bit = true;
+ conf.num_vf_bundles = 1; /**< Number of VF bundles to setup */
+
+ conf.q_ul_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
+ conf.q_ul_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
+ conf.q_ul_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
+ conf.q_ul_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
+ conf.q_dl_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
+ conf.q_dl_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
+ conf.q_dl_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
+ conf.q_dl_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
+ conf.q_ul_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
+ conf.q_ul_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
+ conf.q_ul_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
+ conf.q_ul_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
+ conf.q_dl_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
+ conf.q_dl_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
+ conf.q_dl_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
+ conf.q_dl_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
+
+ /* setup PF with configuration information */
+ ret = acc100_configure(info->dev_name, &conf);
+ TEST_ASSERT_SUCCESS(ret,
+ "Failed to configure ACC100 PF for bbdev %s",
+ info->dev_name);
+ /* Let's refresh this now this is configured */
+ }
+ rte_bbdev_info_get(dev_id, info);
+#endif
+
nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
@@ -55,6 +55,11 @@ New Features
Also, make sure to start the actual text at the margin.
=======================================================
+* **Added Intel ACC100 bbdev PMD.**
+
+ Added a new ``acc100`` bbdev driver for the Intel\ |reg| ACC100 accelerator
+ also known as Mount Bryce. See the
+ :doc:`../bbdevs/acc100` BBDEV guide for more details on this new driver.
Removed Items
-------------
@@ -4,3 +4,5 @@
deps += ['bbdev', 'bus_vdev', 'ring', 'pci', 'bus_pci']
sources = files('rte_acc100_pmd.c')
+
+install_headers('rte_acc100_cfg.h')
@@ -89,6 +89,23 @@ struct acc100_conf {
struct rte_arbitration_t arb_dl_5g[RTE_ACC100_NUM_VFS];
};
+/**
+ * Configure a ACC100 device
+ *
+ * @param dev_name
+ * The name of the device. This is the short form of PCI BDF, e.g. 00:01.0.
+ * It can also be retrieved for a bbdev device from the dev_name field in the
+ * rte_bbdev_info structure returned by rte_bbdev_info_get().
+ * @param conf
+ * Configuration to apply to ACC100 HW.
+ *
+ * @return
+ * Zero on success, negative value on failure.
+ */
+__rte_experimental
+int
+acc100_configure(const char *dev_name, struct acc100_conf *conf);
+
#ifdef __cplusplus
}
#endif
@@ -85,6 +85,26 @@
enum {UL_4G = 0, UL_5G, DL_4G, DL_5G, NUM_ACC};
+/* Return the accelerator enum for a Queue Group Index */
+static inline int
+accFromQgid(int qg_idx, const struct acc100_conf *acc100_conf)
+{
+ int accQg[ACC100_NUM_QGRPS];
+ int NumQGroupsPerFn[NUM_ACC];
+ int acc, qgIdx, qgIndex = 0;
+ for (qgIdx = 0; qgIdx < ACC100_NUM_QGRPS; qgIdx++)
+ accQg[qgIdx] = 0;
+ NumQGroupsPerFn[UL_4G] = acc100_conf->q_ul_4g.num_qgroups;
+ NumQGroupsPerFn[UL_5G] = acc100_conf->q_ul_5g.num_qgroups;
+ NumQGroupsPerFn[DL_4G] = acc100_conf->q_dl_4g.num_qgroups;
+ NumQGroupsPerFn[DL_5G] = acc100_conf->q_dl_5g.num_qgroups;
+ for (acc = UL_4G; acc < NUM_ACC; acc++)
+ for (qgIdx = 0; qgIdx < NumQGroupsPerFn[acc]; qgIdx++)
+ accQg[qgIndex++] = acc;
+ acc = accQg[qg_idx];
+ return acc;
+}
+
/* Return the queue topology for a Queue Group Index */
static inline void
qtopFromAcc(struct rte_q_topology_t **qtop, int acc_enum,
@@ -113,6 +133,30 @@
*qtop = p_qtop;
}
+/* Return the AQ depth for a Queue Group Index */
+static inline int
+aqDepth(int qg_idx, struct acc100_conf *acc100_conf)
+{
+ struct rte_q_topology_t *q_top = NULL;
+ int acc_enum = accFromQgid(qg_idx, acc100_conf);
+ qtopFromAcc(&q_top, acc_enum, acc100_conf);
+ if (unlikely(q_top == NULL))
+ return 0;
+ return q_top->aq_depth_log2;
+}
+
+/* Return the AQ depth for a Queue Group Index */
+static inline int
+aqNum(int qg_idx, struct acc100_conf *acc100_conf)
+{
+ struct rte_q_topology_t *q_top = NULL;
+ int acc_enum = accFromQgid(qg_idx, acc100_conf);
+ qtopFromAcc(&q_top, acc_enum, acc100_conf);
+ if (unlikely(q_top == NULL))
+ return 0;
+ return q_top->num_aqs_per_groups;
+}
+
static void
initQTop(struct acc100_conf *acc100_conf)
{
@@ -4177,3 +4221,464 @@ static int acc100_pci_remove(struct rte_pci_device *pci_dev)
RTE_PMD_REGISTER_PCI_TABLE(ACC100PF_DRIVER_NAME, pci_id_acc100_pf_map);
RTE_PMD_REGISTER_PCI(ACC100VF_DRIVER_NAME, acc100_pci_vf_driver);
RTE_PMD_REGISTER_PCI_TABLE(ACC100VF_DRIVER_NAME, pci_id_acc100_vf_map);
+
+/*
+ * Implementation to fix the power on status of some 5GUL engines
+ * This requires DMA permission if ported outside DPDK
+ */
+static void
+poweron_cleanup(struct rte_bbdev *bbdev, struct acc100_device *d,
+ struct acc100_conf *conf)
+{
+ int i, template_idx, qg_idx;
+ uint32_t address, status, payload;
+ printf("Need to clear power-on 5GUL status in internal memory\n");
+ /* Reset LDPC Cores */
+ for (i = 0; i < ACC100_ENGINES_MAX; i++)
+ acc100_reg_write(d, HWPfFecUl5gCntrlReg +
+ ACC100_ENGINE_OFFSET * i, ACC100_RESET_HI);
+ usleep(LONG_WAIT);
+ for (i = 0; i < ACC100_ENGINES_MAX; i++)
+ acc100_reg_write(d, HWPfFecUl5gCntrlReg +
+ ACC100_ENGINE_OFFSET * i, ACC100_RESET_LO);
+ usleep(LONG_WAIT);
+ /* Prepare dummy workload */
+ alloc_2x64mb_sw_rings_mem(bbdev, d, 0);
+ /* Set base addresses */
+ uint32_t phys_high = (uint32_t)(d->sw_rings_phys >> 32);
+ uint32_t phys_low = (uint32_t)(d->sw_rings_phys &
+ ~(ACC100_SIZE_64MBYTE-1));
+ acc100_reg_write(d, HWPfDmaFec5GulDescBaseHiRegVf, phys_high);
+ acc100_reg_write(d, HWPfDmaFec5GulDescBaseLoRegVf, phys_low);
+
+ /* Descriptor for a dummy 5GUL code block processing*/
+ union acc100_dma_desc *desc = NULL;
+ desc = d->sw_rings;
+ desc->req.data_ptrs[0].address = d->sw_rings_phys +
+ ACC100_DESC_FCW_OFFSET;
+ desc->req.data_ptrs[0].blen = ACC100_FCW_LD_BLEN;
+ desc->req.data_ptrs[0].blkid = ACC100_DMA_BLKID_FCW;
+ desc->req.data_ptrs[0].last = 0;
+ desc->req.data_ptrs[0].dma_ext = 0;
+ desc->req.data_ptrs[1].address = d->sw_rings_phys + 512;
+ desc->req.data_ptrs[1].blkid = ACC100_DMA_BLKID_IN;
+ desc->req.data_ptrs[1].last = 1;
+ desc->req.data_ptrs[1].dma_ext = 0;
+ desc->req.data_ptrs[1].blen = 44;
+ desc->req.data_ptrs[2].address = d->sw_rings_phys + 1024;
+ desc->req.data_ptrs[2].blkid = ACC100_DMA_BLKID_OUT_ENC;
+ desc->req.data_ptrs[2].last = 1;
+ desc->req.data_ptrs[2].dma_ext = 0;
+ desc->req.data_ptrs[2].blen = 5;
+ /* Dummy FCW */
+ desc->req.fcw_ld.FCWversion = ACC100_FCW_VER;
+ desc->req.fcw_ld.qm = 1;
+ desc->req.fcw_ld.nfiller = 30;
+ desc->req.fcw_ld.BG = 2 - 1;
+ desc->req.fcw_ld.Zc = 7;
+ desc->req.fcw_ld.ncb = 350;
+ desc->req.fcw_ld.rm_e = 4;
+ desc->req.fcw_ld.itmax = 10;
+ desc->req.fcw_ld.gain_i = 1;
+ desc->req.fcw_ld.gain_h = 1;
+
+ int engines_to_restart[SIG_UL_5G_LAST + 1] = {0};
+ int num_failed_engine = 0;
+ /* Detect engines in undefined state */
+ for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
+ template_idx++) {
+ /* Check engine power-on status */
+ address = HwPfFecUl5gIbDebugReg +
+ ACC100_ENGINE_OFFSET * template_idx;
+ status = (acc100_reg_read(d, address) >> 4) & 0xF;
+ if (status == 0) {
+ engines_to_restart[num_failed_engine] = template_idx;
+ num_failed_engine++;
+ }
+ }
+
+ int numQqsAcc = conf->q_ul_5g.num_qgroups;
+ int numQgs = conf->q_ul_5g.num_qgroups;
+ payload = 0;
+ for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++)
+ payload |= (1 << qg_idx);
+ /* Force each engine which is in unspecified state */
+ for (i = 0; i < num_failed_engine; i++) {
+ int failed_engine = engines_to_restart[i];
+ printf("Force engine %d\n", failed_engine);
+ for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
+ template_idx++) {
+ address = HWPfQmgrGrpTmplateReg4Indx
+ + BYTES_IN_WORD * template_idx;
+ if (template_idx == failed_engine)
+ acc100_reg_write(d, address, payload);
+ else
+ acc100_reg_write(d, address, 0);
+ }
+ /* Reset descriptor header */
+ desc->req.word0 = ACC100_DMA_DESC_TYPE;
+ desc->req.word1 = 0;
+ desc->req.word2 = 0;
+ desc->req.word3 = 0;
+ desc->req.numCBs = 1;
+ desc->req.m2dlen = 2;
+ desc->req.d2mlen = 1;
+ /* Enqueue the code block for processing */
+ union acc100_enqueue_reg_fmt enq_req;
+ enq_req.val = 0;
+ enq_req.addr_offset = ACC100_DESC_OFFSET;
+ enq_req.num_elem = 1;
+ enq_req.req_elem_addr = 0;
+ rte_wmb();
+ acc100_reg_write(d, HWPfQmgrIngressAq + 0x100, enq_req.val);
+ usleep(LONG_WAIT * 100);
+ if (desc->req.word0 != 2)
+ printf("DMA Response %#"PRIx32"\n", desc->req.word0);
+ }
+
+ /* Reset LDPC Cores */
+ for (i = 0; i < ACC100_ENGINES_MAX; i++)
+ acc100_reg_write(d, HWPfFecUl5gCntrlReg +
+ ACC100_ENGINE_OFFSET * i, ACC100_RESET_HI);
+ usleep(LONG_WAIT);
+ for (i = 0; i < ACC100_ENGINES_MAX; i++)
+ acc100_reg_write(d, HWPfFecUl5gCntrlReg +
+ ACC100_ENGINE_OFFSET * i, ACC100_RESET_LO);
+ usleep(LONG_WAIT);
+ acc100_reg_write(d, HWPfHi5GHardResetReg, ACC100_RESET_HARD);
+ usleep(LONG_WAIT);
+ int numEngines = 0;
+ /* Check engine power-on status again */
+ for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
+ template_idx++) {
+ address = HwPfFecUl5gIbDebugReg +
+ ACC100_ENGINE_OFFSET * template_idx;
+ status = (acc100_reg_read(d, address) >> 4) & 0xF;
+ address = HWPfQmgrGrpTmplateReg4Indx
+ + BYTES_IN_WORD * template_idx;
+ if (status == 1) {
+ acc100_reg_write(d, address, payload);
+ numEngines++;
+ } else
+ acc100_reg_write(d, address, 0);
+ }
+ printf("Number of 5GUL engines %d\n", numEngines);
+
+ if (d->sw_rings_base != NULL)
+ rte_free(d->sw_rings_base);
+ usleep(LONG_WAIT);
+}
+
+/* Initial configuration of a ACC100 device prior to running configure() */
+int
+acc100_configure(const char *dev_name, struct acc100_conf *conf)
+{
+ rte_bbdev_log(INFO, "acc100_configure");
+ uint32_t payload, address, status;
+ int qg_idx, template_idx, vf_idx, acc, i;
+ struct rte_bbdev *bbdev = rte_bbdev_get_named_dev(dev_name);
+
+ /* Compile time checks */
+ RTE_BUILD_BUG_ON(sizeof(struct acc100_dma_req_desc) != 256);
+ RTE_BUILD_BUG_ON(sizeof(union acc100_dma_desc) != 256);
+ RTE_BUILD_BUG_ON(sizeof(struct acc100_fcw_td) != 24);
+ RTE_BUILD_BUG_ON(sizeof(struct acc100_fcw_te) != 32);
+
+ if (bbdev == NULL) {
+ rte_bbdev_log(ERR,
+ "Invalid dev_name (%s), or device is not yet initialised",
+ dev_name);
+ return -ENODEV;
+ }
+ struct acc100_device *d = bbdev->data->dev_private;
+
+ /* Store configuration */
+ rte_memcpy(&d->acc100_conf, conf, sizeof(d->acc100_conf));
+
+ /* PCIe Bridge configuration */
+ acc100_reg_write(d, HwPfPcieGpexBridgeControl, ACC100_CFG_PCI_BRIDGE);
+ for (i = 1; i < 17; i++)
+ acc100_reg_write(d,
+ HwPfPcieGpexAxiAddrMappingWindowPexBaseHigh
+ + i * 16, 0);
+
+ /* PCIe Link Trainiing and Status State Machine */
+ acc100_reg_write(d, HwPfPcieGpexLtssmStateCntrl, 0xDFC00000);
+
+ /* Prevent blocking AXI read on BRESP for AXI Write */
+ address = HwPfPcieGpexAxiPioControl;
+ payload = ACC100_CFG_PCI_AXI;
+ acc100_reg_write(d, address, payload);
+
+ /* 5GDL PLL phase shift */
+ acc100_reg_write(d, HWPfChaDl5gPllPhshft0, 0x1);
+
+ /* Explicitly releasing AXI as this may be stopped after PF FLR/BME */
+ address = HWPfDmaAxiControl;
+ payload = 1;
+ acc100_reg_write(d, address, payload);
+
+ /* DDR Configuration */
+ address = HWPfDdrBcTim6;
+ payload = acc100_reg_read(d, address);
+ payload &= 0xFFFFFFFB; /* Bit 2 */
+#ifdef ACC100_DDR_ECC_ENABLE
+ payload |= 0x4;
+#endif
+ acc100_reg_write(d, address, payload);
+ address = HWPfDdrPhyDqsCountNum;
+#ifdef ACC100_DDR_ECC_ENABLE
+ payload = 9;
+#else
+ payload = 8;
+#endif
+ acc100_reg_write(d, address, payload);
+
+ /* Set default descriptor signature */
+ address = HWPfDmaDescriptorSignatuture;
+ payload = 0;
+ acc100_reg_write(d, address, payload);
+
+ /* Enable the Error Detection in DMA */
+ payload = ACC100_CFG_DMA_ERROR;
+ address = HWPfDmaErrorDetectionEn;
+ acc100_reg_write(d, address, payload);
+
+ /* AXI Cache configuration */
+ payload = ACC100_CFG_AXI_CACHE;
+ address = HWPfDmaAxcacheReg;
+ acc100_reg_write(d, address, payload);
+
+ /* Default DMA Configuration (Qmgr Enabled) */
+ address = HWPfDmaConfig0Reg;
+ payload = 0;
+ acc100_reg_write(d, address, payload);
+ address = HWPfDmaQmanen;
+ payload = 0;
+ acc100_reg_write(d, address, payload);
+
+ /* Default RLIM/ALEN configuration */
+ address = HWPfDmaConfig1Reg;
+ payload = (1 << 31) + (23 << 8) + (1 << 6) + 7;
+ acc100_reg_write(d, address, payload);
+
+ /* Configure DMA Qmanager addresses */
+ address = HWPfDmaQmgrAddrReg;
+ payload = HWPfQmgrEgressQueuesTemplate;
+ acc100_reg_write(d, address, payload);
+
+ /* ===== Qmgr Configuration ===== */
+ /* Configuration of the AQueue Depth QMGR_GRP_0_DEPTH_LOG2 for UL */
+ int totalQgs = conf->q_ul_4g.num_qgroups +
+ conf->q_ul_5g.num_qgroups +
+ conf->q_dl_4g.num_qgroups +
+ conf->q_dl_5g.num_qgroups;
+ for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
+ address = HWPfQmgrDepthLog2Grp +
+ BYTES_IN_WORD * qg_idx;
+ payload = aqDepth(qg_idx, conf);
+ acc100_reg_write(d, address, payload);
+ address = HWPfQmgrTholdGrp +
+ BYTES_IN_WORD * qg_idx;
+ payload = (1 << 16) + (1 << (aqDepth(qg_idx, conf) - 1));
+ acc100_reg_write(d, address, payload);
+ }
+
+ /* Template Priority in incremental order */
+ for (template_idx = 0; template_idx < ACC100_NUM_TMPL;
+ template_idx++) {
+ address = HWPfQmgrGrpTmplateReg0Indx +
+ BYTES_IN_WORD * (template_idx % 8);
+ payload = TMPL_PRI_0;
+ acc100_reg_write(d, address, payload);
+ address = HWPfQmgrGrpTmplateReg1Indx +
+ BYTES_IN_WORD * (template_idx % 8);
+ payload = TMPL_PRI_1;
+ acc100_reg_write(d, address, payload);
+ address = HWPfQmgrGrpTmplateReg2indx +
+ BYTES_IN_WORD * (template_idx % 8);
+ payload = TMPL_PRI_2;
+ acc100_reg_write(d, address, payload);
+ address = HWPfQmgrGrpTmplateReg3Indx +
+ BYTES_IN_WORD * (template_idx % 8);
+ payload = TMPL_PRI_3;
+ acc100_reg_write(d, address, payload);
+ }
+
+ address = HWPfQmgrGrpPriority;
+ payload = ACC100_CFG_QMGR_HI_P;
+ acc100_reg_write(d, address, payload);
+
+ /* Template Configuration */
+ for (template_idx = 0; template_idx < ACC100_NUM_TMPL; template_idx++) {
+ payload = 0;
+ address = HWPfQmgrGrpTmplateReg4Indx
+ + BYTES_IN_WORD * template_idx;
+ acc100_reg_write(d, address, payload);
+ }
+ /* 4GUL */
+ int numQgs = conf->q_ul_4g.num_qgroups;
+ int numQqsAcc = 0;
+ payload = 0;
+ for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++)
+ payload |= (1 << qg_idx);
+ for (template_idx = SIG_UL_4G; template_idx <= SIG_UL_4G_LAST;
+ template_idx++) {
+ address = HWPfQmgrGrpTmplateReg4Indx
+ + BYTES_IN_WORD*template_idx;
+ acc100_reg_write(d, address, payload);
+ }
+ /* 5GUL */
+ numQqsAcc += numQgs;
+ numQgs = conf->q_ul_5g.num_qgroups;
+ payload = 0;
+ int numEngines = 0;
+ for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++)
+ payload |= (1 << qg_idx);
+ for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST;
+ template_idx++) {
+ /* Check engine power-on status */
+ address = HwPfFecUl5gIbDebugReg +
+ ACC100_ENGINE_OFFSET * template_idx;
+ status = (acc100_reg_read(d, address) >> 4) & 0xF;
+ address = HWPfQmgrGrpTmplateReg4Indx
+ + BYTES_IN_WORD * template_idx;
+ if (status == 1) {
+ acc100_reg_write(d, address, payload);
+ numEngines++;
+ } else
+ acc100_reg_write(d, address, 0);
+ #if RTE_ACC100_SINGLE_FEC == 1
+ payload = 0;
+ #endif
+ }
+ printf("Number of 5GUL engines %d\n", numEngines);
+ /* 4GDL */
+ numQqsAcc += numQgs;
+ numQgs = conf->q_dl_4g.num_qgroups;
+ payload = 0;
+ for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++)
+ payload |= (1 << qg_idx);
+ for (template_idx = SIG_DL_4G; template_idx <= SIG_DL_4G_LAST;
+ template_idx++) {
+ address = HWPfQmgrGrpTmplateReg4Indx
+ + BYTES_IN_WORD*template_idx;
+ acc100_reg_write(d, address, payload);
+ #if RTE_ACC100_SINGLE_FEC == 1
+ payload = 0;
+ #endif
+ }
+ /* 5GDL */
+ numQqsAcc += numQgs;
+ numQgs = conf->q_dl_5g.num_qgroups;
+ payload = 0;
+ for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++)
+ payload |= (1 << qg_idx);
+ for (template_idx = SIG_DL_5G; template_idx <= SIG_DL_5G_LAST;
+ template_idx++) {
+ address = HWPfQmgrGrpTmplateReg4Indx
+ + BYTES_IN_WORD*template_idx;
+ acc100_reg_write(d, address, payload);
+ #if RTE_ACC100_SINGLE_FEC == 1
+ payload = 0;
+ #endif
+ }
+
+ /* Queue Group Function mapping */
+ int qman_func_id[5] = {0, 2, 1, 3, 4};
+ address = HWPfQmgrGrpFunction0;
+ payload = 0;
+ for (qg_idx = 0; qg_idx < 8; qg_idx++) {
+ acc = accFromQgid(qg_idx, conf);
+ payload |= qman_func_id[acc]<<(qg_idx * 4);
+ }
+ acc100_reg_write(d, address, payload);
+
+ /* Configuration of the Arbitration QGroup depth to 1 */
+ for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
+ address = HWPfQmgrArbQDepthGrp +
+ BYTES_IN_WORD * qg_idx;
+ payload = 0;
+ acc100_reg_write(d, address, payload);
+ }
+
+ /* Enabling AQueues through the Queue hierarchy*/
+ for (vf_idx = 0; vf_idx < ACC100_NUM_VFS; vf_idx++) {
+ for (qg_idx = 0; qg_idx < ACC100_NUM_QGRPS; qg_idx++) {
+ payload = 0;
+ if (vf_idx < conf->num_vf_bundles &&
+ qg_idx < totalQgs)
+ payload = (1 << aqNum(qg_idx, conf)) - 1;
+ address = HWPfQmgrAqEnableVf
+ + vf_idx * BYTES_IN_WORD;
+ payload += (qg_idx << 16);
+ acc100_reg_write(d, address, payload);
+ }
+ }
+
+ /* This pointer to ARAM (256kB) is shifted by 2 (4B per register) */
+ uint32_t aram_address = 0;
+ for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
+ for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) {
+ address = HWPfQmgrVfBaseAddr + vf_idx
+ * BYTES_IN_WORD + qg_idx
+ * BYTES_IN_WORD * 64;
+ payload = aram_address;
+ acc100_reg_write(d, address, payload);
+ /* Offset ARAM Address for next memory bank
+ * - increment of 4B
+ */
+ aram_address += aqNum(qg_idx, conf) *
+ (1 << aqDepth(qg_idx, conf));
+ }
+ }
+
+ if (aram_address > WORDS_IN_ARAM_SIZE) {
+ rte_bbdev_log(ERR, "ARAM Configuration not fitting %d %d\n",
+ aram_address, WORDS_IN_ARAM_SIZE);
+ return -EINVAL;
+ }
+
+ /* ==== HI Configuration ==== */
+
+ /* Prevent Block on Transmit Error */
+ address = HWPfHiBlockTransmitOnErrorEn;
+ payload = 0;
+ acc100_reg_write(d, address, payload);
+ /* Prevents to drop MSI */
+ address = HWPfHiMsiDropEnableReg;
+ payload = 0;
+ acc100_reg_write(d, address, payload);
+ /* Set the PF Mode register */
+ address = HWPfHiPfMode;
+ payload = (conf->pf_mode_en) ? 2 : 0;
+ acc100_reg_write(d, address, payload);
+ /* Enable Error Detection in HW */
+ address = HWPfDmaErrorDetectionEn;
+ payload = 0x3D7;
+ acc100_reg_write(d, address, payload);
+
+ /* QoS overflow init */
+ payload = 1;
+ address = HWPfQosmonAEvalOverflow0;
+ acc100_reg_write(d, address, payload);
+ address = HWPfQosmonBEvalOverflow0;
+ acc100_reg_write(d, address, payload);
+
+ /* HARQ DDR Configuration */
+ unsigned int ddrSizeInMb = 512; /* Fixed to 512 MB per VF for now */
+ for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) {
+ address = HWPfDmaVfDdrBaseRw + vf_idx
+ * 0x10;
+ payload = ((vf_idx * (ddrSizeInMb / 64)) << 16) +
+ (ddrSizeInMb - 1);
+ acc100_reg_write(d, address, payload);
+ }
+ usleep(LONG_WAIT);
+
+ if (numEngines < (SIG_UL_5G_LAST + 1))
+ poweron_cleanup(bbdev, d, conf);
+
+ rte_bbdev_log_debug("PF Tip configuration complete for %s", dev_name);
+ return 0;
+}
@@ -1,3 +1,10 @@
DPDK_21 {
local: *;
};
+
+EXPERIMENTAL {
+ global:
+
+ acc100_configure;
+
+};