[v3,02/10] vdpa/sfc: add support for device initialization

Message ID 20211029144645.30295-3-vsrivast@xilinx.com (mailing list archive)
State Superseded, archived
Delegated to: Maxime Coquelin
Headers
Series vdpa/sfc: introduce Xilinx vDPA driver |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Vijay Srivastava Oct. 29, 2021, 2:46 p.m. UTC
  From: Vijay Kumar Srivastava <vsrivast@xilinx.com>

Add HW initialization and vDPA device registration support.

Signed-off-by: Vijay Kumar Srivastava <vsrivast@xilinx.com>
Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
---
v2:
* Used rte_memzone_reserve_aligned for mcdi buffer allocation.
* Freeing mcdi buff when DMA map fails.
* Fixed one typo.

 doc/guides/vdpadevs/sfc.rst       |   6 +
 drivers/vdpa/sfc/meson.build      |   3 +
 drivers/vdpa/sfc/sfc_vdpa.c       |  23 +++
 drivers/vdpa/sfc/sfc_vdpa.h       |  49 +++++-
 drivers/vdpa/sfc/sfc_vdpa_debug.h |  21 +++
 drivers/vdpa/sfc/sfc_vdpa_hw.c    | 327 ++++++++++++++++++++++++++++++++++++++
 drivers/vdpa/sfc/sfc_vdpa_log.h   |   3 +
 drivers/vdpa/sfc/sfc_vdpa_mcdi.c  |  74 +++++++++
 drivers/vdpa/sfc/sfc_vdpa_ops.c   | 129 +++++++++++++++
 drivers/vdpa/sfc/sfc_vdpa_ops.h   |  36 +++++
 10 files changed, 670 insertions(+), 1 deletion(-)
 create mode 100644 drivers/vdpa/sfc/sfc_vdpa_debug.h
 create mode 100644 drivers/vdpa/sfc/sfc_vdpa_hw.c
 create mode 100644 drivers/vdpa/sfc/sfc_vdpa_mcdi.c
 create mode 100644 drivers/vdpa/sfc/sfc_vdpa_ops.c
 create mode 100644 drivers/vdpa/sfc/sfc_vdpa_ops.h
  

Comments

Mattias Rönnblom Oct. 29, 2021, 8:21 p.m. UTC | #1
On 2021-10-29 16:46, Vijay Srivastava wrote:
> From: Vijay Kumar Srivastava <vsrivast@xilinx.com>
> 
> Add HW initialization and vDPA device registration support.
> 
> Signed-off-by: Vijay Kumar Srivastava <vsrivast@xilinx.com>
> Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
> ---
> v2:
> * Used rte_memzone_reserve_aligned for mcdi buffer allocation.
> * Freeing mcdi buff when DMA map fails.
> * Fixed one typo.
> 
>   doc/guides/vdpadevs/sfc.rst       |   6 +
>   drivers/vdpa/sfc/meson.build      |   3 +
>   drivers/vdpa/sfc/sfc_vdpa.c       |  23 +++
>   drivers/vdpa/sfc/sfc_vdpa.h       |  49 +++++-
>   drivers/vdpa/sfc/sfc_vdpa_debug.h |  21 +++
>   drivers/vdpa/sfc/sfc_vdpa_hw.c    | 327 ++++++++++++++++++++++++++++++++++++++
>   drivers/vdpa/sfc/sfc_vdpa_log.h   |   3 +
>   drivers/vdpa/sfc/sfc_vdpa_mcdi.c  |  74 +++++++++
>   drivers/vdpa/sfc/sfc_vdpa_ops.c   | 129 +++++++++++++++
>   drivers/vdpa/sfc/sfc_vdpa_ops.h   |  36 +++++
>   10 files changed, 670 insertions(+), 1 deletion(-)
>   create mode 100644 drivers/vdpa/sfc/sfc_vdpa_debug.h
>   create mode 100644 drivers/vdpa/sfc/sfc_vdpa_hw.c
>   create mode 100644 drivers/vdpa/sfc/sfc_vdpa_mcdi.c
>   create mode 100644 drivers/vdpa/sfc/sfc_vdpa_ops.c
>   create mode 100644 drivers/vdpa/sfc/sfc_vdpa_ops.h
> 
> diff --git a/doc/guides/vdpadevs/sfc.rst b/doc/guides/vdpadevs/sfc.rst
> index 44e694f..d06c427 100644
> --- a/doc/guides/vdpadevs/sfc.rst
> +++ b/doc/guides/vdpadevs/sfc.rst
> @@ -95,3 +95,9 @@ SFC vDPA PMD provides the following log types available for control:
>     Matches a subset of per-port log types registered during runtime.
>     A full name for a particular type may be obtained by appending a
>     dot and a PCI device identifier (``XXXX:XX:XX.X``) to the prefix.
> +
> +- ``pmd.vdpa.sfc.mcdi`` (default level is **notice**)
> +
> +  Extra logging of the communication with the NIC's management CPU.
> +  The format of the log is consumed by the netlogdecode cross-platform
> +  tool. May be managed per-port, as explained above.
> diff --git a/drivers/vdpa/sfc/meson.build b/drivers/vdpa/sfc/meson.build
> index 4255d65..dc333de 100644
> --- a/drivers/vdpa/sfc/meson.build
> +++ b/drivers/vdpa/sfc/meson.build
> @@ -19,4 +19,7 @@ endforeach
>   deps += ['common_sfc_efx', 'bus_pci']
>   sources = files(
>   	'sfc_vdpa.c',
> +	'sfc_vdpa_hw.c',
> +	'sfc_vdpa_mcdi.c',
> +	'sfc_vdpa_ops.c',
>   )
> diff --git a/drivers/vdpa/sfc/sfc_vdpa.c b/drivers/vdpa/sfc/sfc_vdpa.c
> index d85c52b..b7eca56 100644
> --- a/drivers/vdpa/sfc/sfc_vdpa.c
> +++ b/drivers/vdpa/sfc/sfc_vdpa.c
> @@ -232,6 +232,19 @@ struct sfc_vdpa_adapter *
>   		goto fail_vfio_setup;
>   	}
>   
> +	sfc_vdpa_log_init(sva, "hw init");
> +	if (sfc_vdpa_hw_init(sva) != 0) {
> +		sfc_vdpa_err(sva, "failed to init HW %s", pci_dev->name);
> +		goto fail_hw_init;
> +	}
> +
> +	sfc_vdpa_log_init(sva, "dev init");
> +	sva->ops_data = sfc_vdpa_device_init(sva, SFC_VDPA_AS_VF);
> +	if (sva->ops_data == NULL) {
> +		sfc_vdpa_err(sva, "failed vDPA dev init %s", pci_dev->name);
> +		goto fail_dev_init;
> +	}
> +
>   	pthread_mutex_lock(&sfc_vdpa_adapter_list_lock);
>   	TAILQ_INSERT_TAIL(&sfc_vdpa_adapter_list, sva, next);
>   	pthread_mutex_unlock(&sfc_vdpa_adapter_list_lock);
> @@ -240,6 +253,12 @@ struct sfc_vdpa_adapter *
>   
>   	return 0;
>   
> +fail_dev_init:
> +	sfc_vdpa_hw_fini(sva);
> +
> +fail_hw_init:
> +	sfc_vdpa_vfio_teardown(sva);
> +
>   fail_vfio_setup:
>   fail_set_log_prefix:
>   	rte_free(sva);
> @@ -266,6 +285,10 @@ struct sfc_vdpa_adapter *
>   	TAILQ_REMOVE(&sfc_vdpa_adapter_list, sva, next);
>   	pthread_mutex_unlock(&sfc_vdpa_adapter_list_lock);
>   
> +	sfc_vdpa_device_fini(sva->ops_data);
> +
> +	sfc_vdpa_hw_fini(sva);
> +
>   	sfc_vdpa_vfio_teardown(sva);
>   
>   	rte_free(sva);
> diff --git a/drivers/vdpa/sfc/sfc_vdpa.h b/drivers/vdpa/sfc/sfc_vdpa.h
> index 3b77900..046f25d 100644
> --- a/drivers/vdpa/sfc/sfc_vdpa.h
> +++ b/drivers/vdpa/sfc/sfc_vdpa.h
> @@ -11,14 +11,38 @@
>   
>   #include <rte_bus_pci.h>
>   
> +#include "sfc_efx.h"
> +#include "sfc_efx_mcdi.h"
> +#include "sfc_vdpa_debug.h"
>   #include "sfc_vdpa_log.h"
> +#include "sfc_vdpa_ops.h"
> +
> +#define SFC_VDPA_DEFAULT_MCDI_IOVA		0x200000000000
>   
>   /* Adapter private data */
>   struct sfc_vdpa_adapter {
>   	TAILQ_ENTRY(sfc_vdpa_adapter)	next;
> +	/*
> +	 * PMD setup and configuration is not thread safe. Since it is not
> +	 * performance sensitive, it is better to guarantee thread-safety
> +	 * and add device level lock. vDPA control operations which
> +	 * change its state should acquire the lock.
> +	 */
> +	rte_spinlock_t			lock;
>   	struct rte_pci_device		*pdev;
>   	struct rte_pci_addr		pci_addr;
>   
> +	efx_family_t			family;
> +	efx_nic_t			*nic;
> +	rte_spinlock_t			nic_lock;
> +
> +	efsys_bar_t			mem_bar;
> +
> +	struct sfc_efx_mcdi		mcdi;
> +	size_t				mcdi_buff_size;
> +
> +	uint32_t			max_queue_count;
> +
>   	char				log_prefix[SFC_VDPA_LOG_PREFIX_MAX];
>   	uint32_t			logtype_main;
>   
> @@ -26,6 +50,7 @@ struct sfc_vdpa_adapter {
>   	int				vfio_dev_fd;
>   	int				vfio_container_fd;
>   	int				iommu_group_num;
> +	struct sfc_vdpa_ops_data	*ops_data;
>   };
>   
>   uint32_t
> @@ -36,5 +61,27 @@ struct sfc_vdpa_adapter {
>   struct sfc_vdpa_adapter *
>   sfc_vdpa_get_adapter_by_dev(struct rte_pci_device *pdev);
>   
> -#endif  /* _SFC_VDPA_H */
> +int
> +sfc_vdpa_hw_init(struct sfc_vdpa_adapter *sva);
> +void
> +sfc_vdpa_hw_fini(struct sfc_vdpa_adapter *sva);
>   
> +int
> +sfc_vdpa_mcdi_init(struct sfc_vdpa_adapter *sva);
> +void
> +sfc_vdpa_mcdi_fini(struct sfc_vdpa_adapter *sva);
> +
> +int
> +sfc_vdpa_dma_alloc(struct sfc_vdpa_adapter *sva, const char *name,
> +		   size_t len, efsys_mem_t *esmp);
> +
> +void
> +sfc_vdpa_dma_free(struct sfc_vdpa_adapter *sva, efsys_mem_t *esmp);
> +
> +static inline struct sfc_vdpa_adapter *
> +sfc_vdpa_adapter_by_dev_handle(void *dev_handle)
> +{
> +	return (struct sfc_vdpa_adapter *)dev_handle;
> +}
> +
> +#endif  /* _SFC_VDPA_H */
> diff --git a/drivers/vdpa/sfc/sfc_vdpa_debug.h b/drivers/vdpa/sfc/sfc_vdpa_debug.h
> new file mode 100644
> index 0000000..cfa8cc5
> --- /dev/null
> +++ b/drivers/vdpa/sfc/sfc_vdpa_debug.h
> @@ -0,0 +1,21 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + *
> + * Copyright(c) 2020-2021 Xilinx, Inc.
> + */
> +
> +#ifndef _SFC_VDPA_DEBUG_H_
> +#define _SFC_VDPA_DEBUG_H_
> +
> +#include <rte_debug.h>
> +
> +#ifdef RTE_LIBRTE_SFC_VDPA_DEBUG
> +/* Avoid dependency from RTE_LOG_DP_LEVEL to be able to enable debug check
> + * in the driver only.
> + */
> +#define SFC_VDPA_ASSERT(exp)			RTE_VERIFY(exp)
> +#else
> +/* If the driver debug is not enabled, follow DPDK debug/non-debug */
> +#define SFC_VDPA_ASSERT(exp)			RTE_ASSERT(exp)
> +#endif
> +
> +#endif /* _SFC_VDPA_DEBUG_H_ */
> diff --git a/drivers/vdpa/sfc/sfc_vdpa_hw.c b/drivers/vdpa/sfc/sfc_vdpa_hw.c
> new file mode 100644
> index 0000000..7c256ff
> --- /dev/null
> +++ b/drivers/vdpa/sfc/sfc_vdpa_hw.c
> @@ -0,0 +1,327 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + *
> + * Copyright(c) 2020-2021 Xilinx, Inc.
> + */
> +
> +#include <unistd.h>
> +
> +#include <rte_common.h>
> +#include <rte_errno.h>
> +#include <rte_vfio.h>
> +
> +#include "efx.h"
> +#include "sfc_vdpa.h"
> +#include "sfc_vdpa_ops.h"
> +
> +extern uint32_t sfc_logtype_driver;
> +
> +#ifndef PAGE_SIZE
> +#define PAGE_SIZE   (sysconf(_SC_PAGESIZE))
> +#endif
> +
> +int
> +sfc_vdpa_dma_alloc(struct sfc_vdpa_adapter *sva, const char *name,
> +		   size_t len, efsys_mem_t *esmp)
> +{
> +	uint64_t mcdi_iova;
> +	size_t mcdi_buff_size;
> +	const struct rte_memzone *mz = NULL;

Redundant initialization.

> +	int numa_node = sva->pdev->device.numa_node;
> +	int ret;
> +
> +	mcdi_buff_size = RTE_ALIGN_CEIL(len, PAGE_SIZE);
> +
> +	sfc_vdpa_log_init(sva, "name=%s, len=%zu", name, len);
> +
> +	mz = rte_memzone_reserve_aligned(name, mcdi_buff_size,
> +					 numa_node,
> +					 RTE_MEMZONE_IOVA_CONTIG,
> +					 PAGE_SIZE);
> +	if (mz == NULL) {
> +		sfc_vdpa_err(sva, "cannot reserve memory for %s: len=%#x: %s",
> +			     name, (unsigned int)len, rte_strerror(rte_errno));
> +		return -ENOMEM;
> +	}
> +
> +	/* IOVA address for MCDI would be re-calculated if mapping
> +	 * using default IOVA would fail.
> +	 * TODO: Earlier there was no way to get valid IOVA range.
> +	 * Recently a patch has been submitted to get the IOVA range
> +	 * using ioctl. VFIO_IOMMU_GET_INFO. This patch is available
> +	 * in the kernel version >= 5.4. Support to get the default
> +	 * IOVA address for MCDI buffer using available IOVA range
> +	 * would be added later. Meanwhile default IOVA for MCDI buffer
> +	 * is kept at high mem at 2TB. In case of overlap new available
> +	 * addresses would be searched and same would be used.
> +	 */
> +	mcdi_iova = SFC_VDPA_DEFAULT_MCDI_IOVA;
> +
> +	do {
> +		ret = rte_vfio_container_dma_map(sva->vfio_container_fd,
> +						 (uint64_t)mz->addr, mcdi_iova,
> +						 mcdi_buff_size);
> +		if (ret == 0)
> +			break;
> +
> +		mcdi_iova = mcdi_iova >> 1;
> +		if (mcdi_iova < mcdi_buff_size)	{
> +			sfc_vdpa_err(sva,
> +				     "DMA mapping failed for MCDI : %s",
> +				     rte_strerror(rte_errno));
> +			rte_memzone_free(mz);
> +			return ret;
> +		}
> +
> +	} while (ret < 0);

"ret < 0" will never evaluate to false here. Use "for (;;)" instead.

> +
> +	esmp->esm_addr = mcdi_iova;
> +	esmp->esm_base = mz->addr;
> +	sva->mcdi_buff_size = mcdi_buff_size;
> +
> +	sfc_vdpa_info(sva,
> +		      "DMA name=%s len=%zu => virt=%p iova=%" PRIx64,
> +		      name, len, esmp->esm_base, esmp->esm_addr);
> +
> +	return 0;
> +}
> +
> +void
> +sfc_vdpa_dma_free(struct sfc_vdpa_adapter *sva, efsys_mem_t *esmp)
> +{
> +	int ret;
> +
> +	sfc_vdpa_log_init(sva, "name=%s", esmp->esm_mz->name);
> +
> +	ret = rte_vfio_container_dma_unmap(sva->vfio_container_fd,
> +					   (uint64_t)esmp->esm_base,
> +					   esmp->esm_addr, sva->mcdi_buff_size);
> +	if (ret < 0)
> +		sfc_vdpa_err(sva, "DMA unmap failed for MCDI : %s",
> +			     rte_strerror(rte_errno));
> +
> +	sfc_vdpa_info(sva,
> +		      "DMA free name=%s => virt=%p iova=%" PRIx64,
> +		      esmp->esm_mz->name, esmp->esm_base, esmp->esm_addr);
> +
> +	rte_free((void *)(esmp->esm_base));
> +
> +	sva->mcdi_buff_size = 0;
> +	memset(esmp, 0, sizeof(*esmp));
> +}
> +
> +static int
> +sfc_vdpa_mem_bar_init(struct sfc_vdpa_adapter *sva,
> +		      const efx_bar_region_t *mem_ebrp)
> +{
> +	struct rte_pci_device *pci_dev = sva->pdev;
> +	efsys_bar_t *ebp = &sva->mem_bar;
> +	struct rte_mem_resource *res =
> +		&pci_dev->mem_resource[mem_ebrp->ebr_index];
> +
> +	SFC_BAR_LOCK_INIT(ebp, pci_dev->name);
> +	ebp->esb_rid = mem_ebrp->ebr_index;
> +	ebp->esb_dev = pci_dev;
> +	ebp->esb_base = res->addr;
> +
> +	return 0;
> +}
> +
> +static void
> +sfc_vdpa_mem_bar_fini(struct sfc_vdpa_adapter *sva)
> +{
> +	efsys_bar_t *ebp = &sva->mem_bar;
> +
> +	SFC_BAR_LOCK_DESTROY(ebp);
> +	memset(ebp, 0, sizeof(*ebp));
> +}
> +
> +static int
> +sfc_vdpa_nic_probe(struct sfc_vdpa_adapter *sva)
> +{
> +	efx_nic_t *enp = sva->nic;
> +	int rc;
> +
> +	rc = efx_nic_probe(enp, EFX_FW_VARIANT_DONT_CARE);
> +	if (rc != 0)
> +		sfc_vdpa_err(sva, "nic probe failed: %s", rte_strerror(rc));
> +
> +	return rc;
> +}
> +
> +static int
> +sfc_vdpa_estimate_resource_limits(struct sfc_vdpa_adapter *sva)
> +{
> +	efx_drv_limits_t limits;
> +	int rc;
> +	uint32_t evq_allocated;
> +	uint32_t rxq_allocated;
> +	uint32_t txq_allocated;
> +	uint32_t max_queue_cnt;
> +
> +	memset(&limits, 0, sizeof(limits));

Avoid using typedefs for structs. This is a struct?

> +
> +	/* Request at least one Rx and Tx queue */
> +	limits.edl_min_rxq_count = 1;
> +	limits.edl_min_txq_count = 1;
> +	/* Management event queue plus event queue for Tx/Rx queue */
> +	limits.edl_min_evq_count =
> +		1 + RTE_MAX(limits.edl_min_rxq_count, limits.edl_min_txq_count);
> +
> +	limits.edl_max_rxq_count = SFC_VDPA_MAX_QUEUE_PAIRS;
> +	limits.edl_max_txq_count = SFC_VDPA_MAX_QUEUE_PAIRS;
> +	limits.edl_max_evq_count = 1 + SFC_VDPA_MAX_QUEUE_PAIRS;
> +
> +	SFC_VDPA_ASSERT(limits.edl_max_evq_count >= limits.edl_min_rxq_count);
> +	SFC_VDPA_ASSERT(limits.edl_max_rxq_count >= limits.edl_min_rxq_count);
> +	SFC_VDPA_ASSERT(limits.edl_max_txq_count >= limits.edl_min_rxq_count);
> +
> +	/* Configure the minimum required resources needed for the
> +	 * driver to operate, and the maximum desired resources that the
> +	 * driver is capable of using.
> +	 */
> +	sfc_vdpa_log_init(sva, "set drv limit");
> +	efx_nic_set_drv_limits(sva->nic, &limits);
> +
> +	sfc_vdpa_log_init(sva, "init nic");
> +	rc = efx_nic_init(sva->nic);
> +	if (rc != 0) {
> +		sfc_vdpa_err(sva, "nic init failed: %s", rte_strerror(rc));
> +		goto fail_nic_init;
> +	}
> +
> +	/* Find resource dimensions assigned by firmware to this function */
> +	rc = efx_nic_get_vi_pool(sva->nic, &evq_allocated, &rxq_allocated,
> +				 &txq_allocated);
> +	if (rc != 0) {
> +		sfc_vdpa_err(sva, "vi pool get failed: %s", rte_strerror(rc));
> +		goto fail_get_vi_pool;
> +	}
> +
> +	/* It still may allocate more than maximum, ensure limit */
> +	evq_allocated = RTE_MIN(evq_allocated, limits.edl_max_evq_count);
> +	rxq_allocated = RTE_MIN(rxq_allocated, limits.edl_max_rxq_count);
> +	txq_allocated = RTE_MIN(txq_allocated, limits.edl_max_txq_count);
> +
> +
> +	max_queue_cnt = RTE_MIN(rxq_allocated, txq_allocated);
> +	/* Subtract management EVQ not used for traffic */
> +	max_queue_cnt = RTE_MIN(evq_allocated - 1, max_queue_cnt);
> +
> +	SFC_VDPA_ASSERT(max_queue_cnt > 0);
> +
> +	sva->max_queue_count = max_queue_cnt;
> +
> +	return 0;
> +
> +fail_get_vi_pool:
> +	efx_nic_fini(sva->nic);
> +fail_nic_init:
> +	sfc_vdpa_log_init(sva, "failed: %s", rte_strerror(rc));
> +	return rc;
> +}
> +
> +int
> +sfc_vdpa_hw_init(struct sfc_vdpa_adapter *sva)
> +{
> +	efx_bar_region_t mem_ebr;
> +	efx_nic_t *enp;
> +	int rc;
> +
> +	sfc_vdpa_log_init(sva, "entry");
> +
> +	sfc_vdpa_log_init(sva, "get family");
> +	rc = sfc_efx_family(sva->pdev, &mem_ebr, &sva->family);
> +	if (rc != 0)
> +		goto fail_family;
> +	sfc_vdpa_log_init(sva,
> +			  "family is %u, membar is %u,"
> +			  "function control window offset is %#" PRIx64,
> +			  sva->family, mem_ebr.ebr_index, mem_ebr.ebr_offset);
> +
> +	sfc_vdpa_log_init(sva, "init mem bar");
> +	rc = sfc_vdpa_mem_bar_init(sva, &mem_ebr);
> +	if (rc != 0)
> +		goto fail_mem_bar_init;
> +
> +	sfc_vdpa_log_init(sva, "create nic");
> +	rte_spinlock_init(&sva->nic_lock);
> +	rc = efx_nic_create(sva->family, (efsys_identifier_t *)sva,
> +			    &sva->mem_bar, mem_ebr.ebr_offset,
> +			    &sva->nic_lock, &enp);
> +	if (rc != 0) {
> +		sfc_vdpa_err(sva, "nic create failed: %s", rte_strerror(rc));
> +		goto fail_nic_create;
> +	}
> +	sva->nic = enp;
> +
> +	sfc_vdpa_log_init(sva, "init mcdi");
> +	rc = sfc_vdpa_mcdi_init(sva);
> +	if (rc != 0) {
> +		sfc_vdpa_err(sva, "mcdi init failed: %s", rte_strerror(rc));
> +		goto fail_mcdi_init;
> +	}
> +
> +	sfc_vdpa_log_init(sva, "probe nic");
> +	rc = sfc_vdpa_nic_probe(sva);
> +	if (rc != 0)
> +		goto fail_nic_probe;
> +
> +	sfc_vdpa_log_init(sva, "reset nic");
> +	rc = efx_nic_reset(enp);
> +	if (rc != 0) {
> +		sfc_vdpa_err(sva, "nic reset failed: %s", rte_strerror(rc));
> +		goto fail_nic_reset;
> +	}
> +
> +	sfc_vdpa_log_init(sva, "estimate resource limits");
> +	rc = sfc_vdpa_estimate_resource_limits(sva);
> +	if (rc != 0)
> +		goto fail_estimate_rsrc_limits;
> +
> +	sfc_vdpa_log_init(sva, "done");
> +
> +	return 0;
> +
> +fail_estimate_rsrc_limits:
> +fail_nic_reset:
> +	efx_nic_unprobe(enp);
> +
> +fail_nic_probe:
> +	sfc_vdpa_mcdi_fini(sva);
> +
> +fail_mcdi_init:
> +	sfc_vdpa_log_init(sva, "destroy nic");
> +	sva->nic = NULL;
> +	efx_nic_destroy(enp);
> +
> +fail_nic_create:
> +	sfc_vdpa_mem_bar_fini(sva);
> +
> +fail_mem_bar_init:
> +fail_family:
> +	sfc_vdpa_log_init(sva, "failed: %s", rte_strerror(rc));
> +	return rc;
> +}
> +
> +void
> +sfc_vdpa_hw_fini(struct sfc_vdpa_adapter *sva)
> +{
> +	efx_nic_t *enp = sva->nic;
> +
> +	sfc_vdpa_log_init(sva, "entry");
> +
> +	sfc_vdpa_log_init(sva, "unprobe nic");
> +	efx_nic_unprobe(enp);
> +
> +	sfc_vdpa_log_init(sva, "mcdi fini");
> +	sfc_vdpa_mcdi_fini(sva);
> +
> +	sfc_vdpa_log_init(sva, "nic fini");
> +	efx_nic_fini(enp);
> +
> +	sfc_vdpa_log_init(sva, "destroy nic");
> +	sva->nic = NULL;
> +	efx_nic_destroy(enp);
> +
> +	sfc_vdpa_mem_bar_fini(sva);
> +}
> diff --git a/drivers/vdpa/sfc/sfc_vdpa_log.h b/drivers/vdpa/sfc/sfc_vdpa_log.h
> index 858e5ee..4e7a84f 100644
> --- a/drivers/vdpa/sfc/sfc_vdpa_log.h
> +++ b/drivers/vdpa/sfc/sfc_vdpa_log.h
> @@ -21,6 +21,9 @@
>   /** Name prefix for the per-device log type used to report basic information */
>   #define SFC_VDPA_LOGTYPE_MAIN_STR	SFC_VDPA_LOGTYPE_PREFIX "main"
>   
> +/** Device MCDI log type name prefix */
> +#define SFC_VDPA_LOGTYPE_MCDI_STR	SFC_VDPA_LOGTYPE_PREFIX "mcdi"
> +
>   #define SFC_VDPA_LOG_PREFIX_MAX	32
>   
>   /* Log PMD message, automatically add prefix and \n */
> diff --git a/drivers/vdpa/sfc/sfc_vdpa_mcdi.c b/drivers/vdpa/sfc/sfc_vdpa_mcdi.c
> new file mode 100644
> index 0000000..961d2d3
> --- /dev/null
> +++ b/drivers/vdpa/sfc/sfc_vdpa_mcdi.c
> @@ -0,0 +1,74 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + *
> + * Copyright(c) 2020-2021 Xilinx, Inc.
> + */
> +
> +#include "sfc_efx_mcdi.h"
> +
> +#include "sfc_vdpa.h"
> +#include "sfc_vdpa_debug.h"
> +#include "sfc_vdpa_log.h"
> +
> +static sfc_efx_mcdi_dma_alloc_cb sfc_vdpa_mcdi_dma_alloc;
> +static int
> +sfc_vdpa_mcdi_dma_alloc(void *cookie, const char *name, size_t len,
> +			efsys_mem_t *esmp)
> +{
> +	struct sfc_vdpa_adapter *sva = cookie;
> +
> +	return sfc_vdpa_dma_alloc(sva, name, len, esmp);
> +}
> +
> +static sfc_efx_mcdi_dma_free_cb sfc_vdpa_mcdi_dma_free;
> +static void
> +sfc_vdpa_mcdi_dma_free(void *cookie, efsys_mem_t *esmp)
> +{
> +	struct sfc_vdpa_adapter *sva = cookie;
> +
> +	sfc_vdpa_dma_free(sva, esmp);
> +}
> +
> +static sfc_efx_mcdi_sched_restart_cb sfc_vdpa_mcdi_sched_restart;
> +static void
> +sfc_vdpa_mcdi_sched_restart(void *cookie)
> +{
> +	RTE_SET_USED(cookie);
> +}
> +
> +static sfc_efx_mcdi_mgmt_evq_poll_cb sfc_vdpa_mcdi_mgmt_evq_poll;
> +static void
> +sfc_vdpa_mcdi_mgmt_evq_poll(void *cookie)
> +{
> +	RTE_SET_USED(cookie);
> +}
> +
> +static const struct sfc_efx_mcdi_ops sfc_vdpa_mcdi_ops = {
> +	.dma_alloc	= sfc_vdpa_mcdi_dma_alloc,
> +	.dma_free	= sfc_vdpa_mcdi_dma_free,
> +	.sched_restart  = sfc_vdpa_mcdi_sched_restart,
> +	.mgmt_evq_poll  = sfc_vdpa_mcdi_mgmt_evq_poll,
> +
> +};
> +
> +int
> +sfc_vdpa_mcdi_init(struct sfc_vdpa_adapter *sva)
> +{
> +	uint32_t logtype;
> +
> +	sfc_vdpa_log_init(sva, "entry");
> +
> +	logtype = sfc_vdpa_register_logtype(&(sva->pdev->addr),
> +					    SFC_VDPA_LOGTYPE_MCDI_STR,
> +					    RTE_LOG_NOTICE);
> +
> +	return sfc_efx_mcdi_init(&sva->mcdi, logtype,
> +				 sva->log_prefix, sva->nic,
> +				 &sfc_vdpa_mcdi_ops, sva);
> +}
> +
> +void
> +sfc_vdpa_mcdi_fini(struct sfc_vdpa_adapter *sva)
> +{
> +	sfc_vdpa_log_init(sva, "entry");
> +	sfc_efx_mcdi_fini(&sva->mcdi);
> +}
> diff --git a/drivers/vdpa/sfc/sfc_vdpa_ops.c b/drivers/vdpa/sfc/sfc_vdpa_ops.c
> new file mode 100644
> index 0000000..71696be
> --- /dev/null
> +++ b/drivers/vdpa/sfc/sfc_vdpa_ops.c
> @@ -0,0 +1,129 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + *
> + * Copyright(c) 2020-2021 Xilinx, Inc.
> + */
> +
> +#include <rte_malloc.h>
> +#include <rte_vdpa.h>
> +#include <rte_vdpa_dev.h>
> +#include <rte_vhost.h>
> +
> +#include "sfc_vdpa_ops.h"
> +#include "sfc_vdpa.h"
> +
> +/* Dummy functions for mandatory vDPA ops to pass vDPA device registration.
> + * In subsequent patches these ops would be implemented.
> + */
> +static int
> +sfc_vdpa_get_queue_num(struct rte_vdpa_device *vdpa_dev, uint32_t *queue_num)
> +{
> +	RTE_SET_USED(vdpa_dev);
> +	RTE_SET_USED(queue_num);
> +
> +	return -1;
> +}
> +
> +static int
> +sfc_vdpa_get_features(struct rte_vdpa_device *vdpa_dev, uint64_t *features)
> +{
> +	RTE_SET_USED(vdpa_dev);
> +	RTE_SET_USED(features);
> +
> +	return -1;
> +}
> +
> +static int
> +sfc_vdpa_get_protocol_features(struct rte_vdpa_device *vdpa_dev,
> +			       uint64_t *features)
> +{
> +	RTE_SET_USED(vdpa_dev);
> +	RTE_SET_USED(features);
> +
> +	return -1;
> +}
> +
> +static int
> +sfc_vdpa_dev_config(int vid)
> +{
> +	RTE_SET_USED(vid);
> +
> +	return -1;
> +}
> +
> +static int
> +sfc_vdpa_dev_close(int vid)
> +{
> +	RTE_SET_USED(vid);
> +
> +	return -1;
> +}
> +
> +static int
> +sfc_vdpa_set_vring_state(int vid, int vring, int state)
> +{
> +	RTE_SET_USED(vid);
> +	RTE_SET_USED(vring);
> +	RTE_SET_USED(state);
> +
> +	return -1;
> +}
> +
> +static int
> +sfc_vdpa_set_features(int vid)
> +{
> +	RTE_SET_USED(vid);
> +
> +	return -1;
> +}
> +
> +static struct rte_vdpa_dev_ops sfc_vdpa_ops = {
> +	.get_queue_num = sfc_vdpa_get_queue_num,
> +	.get_features = sfc_vdpa_get_features,
> +	.get_protocol_features = sfc_vdpa_get_protocol_features,
> +	.dev_conf = sfc_vdpa_dev_config,
> +	.dev_close = sfc_vdpa_dev_close,
> +	.set_vring_state = sfc_vdpa_set_vring_state,
> +	.set_features = sfc_vdpa_set_features,
> +};
> +
> +struct sfc_vdpa_ops_data *
> +sfc_vdpa_device_init(void *dev_handle, enum sfc_vdpa_context context)
> +{
> +	struct sfc_vdpa_ops_data *ops_data;
> +	struct rte_pci_device *pci_dev;
> +
> +	/* Create vDPA ops context */
> +	ops_data = rte_zmalloc("vdpa", sizeof(struct sfc_vdpa_ops_data), 0);
> +	if (ops_data == NULL)
> +		return NULL;
> +
> +	ops_data->vdpa_context = context;
> +	ops_data->dev_handle = dev_handle;
> +
> +	pci_dev = sfc_vdpa_adapter_by_dev_handle(dev_handle)->pdev;
> +
> +	/* Register vDPA Device */
> +	sfc_vdpa_log_init(dev_handle, "register vDPA device");
> +	ops_data->vdpa_dev =
> +		rte_vdpa_register_device(&pci_dev->device, &sfc_vdpa_ops);
> +	if (ops_data->vdpa_dev == NULL) {
> +		sfc_vdpa_err(dev_handle, "vDPA device registration failed");
> +		goto fail_register_device;
> +	}
> +
> +	ops_data->state = SFC_VDPA_STATE_INITIALIZED;
> +
> +	return ops_data;
> +
> +fail_register_device:
> +	rte_free(ops_data);
> +	return NULL;
> +}
> +
> +void
> +sfc_vdpa_device_fini(struct sfc_vdpa_ops_data *ops_data)
> +{
> +	rte_vdpa_unregister_device(ops_data->vdpa_dev);
> +
> +	rte_free(ops_data);
> +}
> diff --git a/drivers/vdpa/sfc/sfc_vdpa_ops.h b/drivers/vdpa/sfc/sfc_vdpa_ops.h
> new file mode 100644
> index 0000000..817b302
> --- /dev/null
> +++ b/drivers/vdpa/sfc/sfc_vdpa_ops.h
> @@ -0,0 +1,36 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + *
> + * Copyright(c) 2020-2021 Xilinx, Inc.
> + */
> +
> +#ifndef _SFC_VDPA_OPS_H
> +#define _SFC_VDPA_OPS_H
> +
> +#include <rte_vdpa.h>
> +
> +#define SFC_VDPA_MAX_QUEUE_PAIRS		1
> +
> +enum sfc_vdpa_context {
> +	SFC_VDPA_AS_PF = 0,

0 is the default.

> +	SFC_VDPA_AS_VF
> +};
> +
> +enum sfc_vdpa_state {
> +	SFC_VDPA_STATE_UNINITIALIZED = 0,

Same here.

> +	SFC_VDPA_STATE_INITIALIZED,
> +	SFC_VDPA_STATE_NSTATES
> +};
> +
> +struct sfc_vdpa_ops_data {
> +	void				*dev_handle;
> +	struct rte_vdpa_device		*vdpa_dev;
> +	enum sfc_vdpa_context		vdpa_context;
> +	enum sfc_vdpa_state		state;
> +};
> +
> +struct sfc_vdpa_ops_data *
> +sfc_vdpa_device_init(void *adapter, enum sfc_vdpa_context context);
> +void
> +sfc_vdpa_device_fini(struct sfc_vdpa_ops_data *ops_data);
> +
> +#endif /* _SFC_VDPA_OPS_H */
>
  
Andrew Rybchenko Nov. 1, 2021, 8:09 a.m. UTC | #2
Hi Mattias,

Many thanks for review. Please, see below.

Andrew.

On 10/29/21 11:21 PM, Mattias Rönnblom wrote:
> On 2021-10-29 16:46, Vijay Srivastava wrote:
>> From: Vijay Kumar Srivastava <vsrivast@xilinx.com>
>>
>> Add HW initialization and vDPA device registration support.
>>
>> Signed-off-by: Vijay Kumar Srivastava <vsrivast@xilinx.com>
>> Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
>> ---
>> v2:
>> * Used rte_memzone_reserve_aligned for mcdi buffer allocation.
>> * Freeing mcdi buff when DMA map fails.
>> * Fixed one typo.
>>
>>   doc/guides/vdpadevs/sfc.rst       |   6 +
>>   drivers/vdpa/sfc/meson.build      |   3 +
>>   drivers/vdpa/sfc/sfc_vdpa.c       |  23 +++
>>   drivers/vdpa/sfc/sfc_vdpa.h       |  49 +++++-
>>   drivers/vdpa/sfc/sfc_vdpa_debug.h |  21 +++
>>   drivers/vdpa/sfc/sfc_vdpa_hw.c    | 327 
>> ++++++++++++++++++++++++++++++++++++++
>>   drivers/vdpa/sfc/sfc_vdpa_log.h   |   3 +
>>   drivers/vdpa/sfc/sfc_vdpa_mcdi.c  |  74 +++++++++
>>   drivers/vdpa/sfc/sfc_vdpa_ops.c   | 129 +++++++++++++++
>>   drivers/vdpa/sfc/sfc_vdpa_ops.h   |  36 +++++
>>   10 files changed, 670 insertions(+), 1 deletion(-)
>>   create mode 100644 drivers/vdpa/sfc/sfc_vdpa_debug.h
>>   create mode 100644 drivers/vdpa/sfc/sfc_vdpa_hw.c
>>   create mode 100644 drivers/vdpa/sfc/sfc_vdpa_mcdi.c
>>   create mode 100644 drivers/vdpa/sfc/sfc_vdpa_ops.c
>>   create mode 100644 drivers/vdpa/sfc/sfc_vdpa_ops.h
>>
>> diff --git a/doc/guides/vdpadevs/sfc.rst b/doc/guides/vdpadevs/sfc.rst
>> index 44e694f..d06c427 100644
>> --- a/doc/guides/vdpadevs/sfc.rst
>> +++ b/doc/guides/vdpadevs/sfc.rst
>> @@ -95,3 +95,9 @@ SFC vDPA PMD provides the following log types 
>> available for control:
>>     Matches a subset of per-port log types registered during runtime.
>>     A full name for a particular type may be obtained by appending a
>>     dot and a PCI device identifier (``XXXX:XX:XX.X``) to the prefix.
>> +
>> +- ``pmd.vdpa.sfc.mcdi`` (default level is **notice**)
>> +
>> +  Extra logging of the communication with the NIC's management CPU.
>> +  The format of the log is consumed by the netlogdecode cross-platform
>> +  tool. May be managed per-port, as explained above.
>> diff --git a/drivers/vdpa/sfc/meson.build b/drivers/vdpa/sfc/meson.build
>> index 4255d65..dc333de 100644
>> --- a/drivers/vdpa/sfc/meson.build
>> +++ b/drivers/vdpa/sfc/meson.build
>> @@ -19,4 +19,7 @@ endforeach
>>   deps += ['common_sfc_efx', 'bus_pci']
>>   sources = files(
>>       'sfc_vdpa.c',
>> +    'sfc_vdpa_hw.c',
>> +    'sfc_vdpa_mcdi.c',
>> +    'sfc_vdpa_ops.c',
>>   )
>> diff --git a/drivers/vdpa/sfc/sfc_vdpa.c b/drivers/vdpa/sfc/sfc_vdpa.c
>> index d85c52b..b7eca56 100644
>> --- a/drivers/vdpa/sfc/sfc_vdpa.c
>> +++ b/drivers/vdpa/sfc/sfc_vdpa.c
>> @@ -232,6 +232,19 @@ struct sfc_vdpa_adapter *
>>           goto fail_vfio_setup;
>>       }
>> +    sfc_vdpa_log_init(sva, "hw init");
>> +    if (sfc_vdpa_hw_init(sva) != 0) {
>> +        sfc_vdpa_err(sva, "failed to init HW %s", pci_dev->name);
>> +        goto fail_hw_init;
>> +    }
>> +
>> +    sfc_vdpa_log_init(sva, "dev init");
>> +    sva->ops_data = sfc_vdpa_device_init(sva, SFC_VDPA_AS_VF);
>> +    if (sva->ops_data == NULL) {
>> +        sfc_vdpa_err(sva, "failed vDPA dev init %s", pci_dev->name);
>> +        goto fail_dev_init;
>> +    }
>> +
>>       pthread_mutex_lock(&sfc_vdpa_adapter_list_lock);
>>       TAILQ_INSERT_TAIL(&sfc_vdpa_adapter_list, sva, next);
>>       pthread_mutex_unlock(&sfc_vdpa_adapter_list_lock);
>> @@ -240,6 +253,12 @@ struct sfc_vdpa_adapter *
>>       return 0;
>> +fail_dev_init:
>> +    sfc_vdpa_hw_fini(sva);
>> +
>> +fail_hw_init:
>> +    sfc_vdpa_vfio_teardown(sva);
>> +
>>   fail_vfio_setup:
>>   fail_set_log_prefix:
>>       rte_free(sva);
>> @@ -266,6 +285,10 @@ struct sfc_vdpa_adapter *
>>       TAILQ_REMOVE(&sfc_vdpa_adapter_list, sva, next);
>>       pthread_mutex_unlock(&sfc_vdpa_adapter_list_lock);
>> +    sfc_vdpa_device_fini(sva->ops_data);
>> +
>> +    sfc_vdpa_hw_fini(sva);
>> +
>>       sfc_vdpa_vfio_teardown(sva);
>>       rte_free(sva);
>> diff --git a/drivers/vdpa/sfc/sfc_vdpa.h b/drivers/vdpa/sfc/sfc_vdpa.h
>> index 3b77900..046f25d 100644
>> --- a/drivers/vdpa/sfc/sfc_vdpa.h
>> +++ b/drivers/vdpa/sfc/sfc_vdpa.h
>> @@ -11,14 +11,38 @@
>>   #include <rte_bus_pci.h>
>> +#include "sfc_efx.h"
>> +#include "sfc_efx_mcdi.h"
>> +#include "sfc_vdpa_debug.h"
>>   #include "sfc_vdpa_log.h"
>> +#include "sfc_vdpa_ops.h"
>> +
>> +#define SFC_VDPA_DEFAULT_MCDI_IOVA        0x200000000000
>>   /* Adapter private data */
>>   struct sfc_vdpa_adapter {
>>       TAILQ_ENTRY(sfc_vdpa_adapter)    next;
>> +    /*
>> +     * PMD setup and configuration is not thread safe. Since it is not
>> +     * performance sensitive, it is better to guarantee thread-safety
>> +     * and add device level lock. vDPA control operations which
>> +     * change its state should acquire the lock.
>> +     */
>> +    rte_spinlock_t            lock;
>>       struct rte_pci_device        *pdev;
>>       struct rte_pci_addr        pci_addr;
>> +    efx_family_t            family;
>> +    efx_nic_t            *nic;
>> +    rte_spinlock_t            nic_lock;
>> +
>> +    efsys_bar_t            mem_bar;
>> +
>> +    struct sfc_efx_mcdi        mcdi;
>> +    size_t                mcdi_buff_size;
>> +
>> +    uint32_t            max_queue_count;
>> +
>>       char                log_prefix[SFC_VDPA_LOG_PREFIX_MAX];
>>       uint32_t            logtype_main;
>> @@ -26,6 +50,7 @@ struct sfc_vdpa_adapter {
>>       int                vfio_dev_fd;
>>       int                vfio_container_fd;
>>       int                iommu_group_num;
>> +    struct sfc_vdpa_ops_data    *ops_data;
>>   };
>>   uint32_t
>> @@ -36,5 +61,27 @@ struct sfc_vdpa_adapter {
>>   struct sfc_vdpa_adapter *
>>   sfc_vdpa_get_adapter_by_dev(struct rte_pci_device *pdev);
>> -#endif  /* _SFC_VDPA_H */
>> +int
>> +sfc_vdpa_hw_init(struct sfc_vdpa_adapter *sva);
>> +void
>> +sfc_vdpa_hw_fini(struct sfc_vdpa_adapter *sva);
>> +int
>> +sfc_vdpa_mcdi_init(struct sfc_vdpa_adapter *sva);
>> +void
>> +sfc_vdpa_mcdi_fini(struct sfc_vdpa_adapter *sva);
>> +
>> +int
>> +sfc_vdpa_dma_alloc(struct sfc_vdpa_adapter *sva, const char *name,
>> +           size_t len, efsys_mem_t *esmp);
>> +
>> +void
>> +sfc_vdpa_dma_free(struct sfc_vdpa_adapter *sva, efsys_mem_t *esmp);
>> +
>> +static inline struct sfc_vdpa_adapter *
>> +sfc_vdpa_adapter_by_dev_handle(void *dev_handle)
>> +{
>> +    return (struct sfc_vdpa_adapter *)dev_handle;
>> +}
>> +
>> +#endif  /* _SFC_VDPA_H */
>> diff --git a/drivers/vdpa/sfc/sfc_vdpa_debug.h 
>> b/drivers/vdpa/sfc/sfc_vdpa_debug.h
>> new file mode 100644
>> index 0000000..cfa8cc5
>> --- /dev/null
>> +++ b/drivers/vdpa/sfc/sfc_vdpa_debug.h
>> @@ -0,0 +1,21 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + *
>> + * Copyright(c) 2020-2021 Xilinx, Inc.
>> + */
>> +
>> +#ifndef _SFC_VDPA_DEBUG_H_
>> +#define _SFC_VDPA_DEBUG_H_
>> +
>> +#include <rte_debug.h>
>> +
>> +#ifdef RTE_LIBRTE_SFC_VDPA_DEBUG
>> +/* Avoid dependency from RTE_LOG_DP_LEVEL to be able to enable debug 
>> check
>> + * in the driver only.
>> + */
>> +#define SFC_VDPA_ASSERT(exp)            RTE_VERIFY(exp)
>> +#else
>> +/* If the driver debug is not enabled, follow DPDK debug/non-debug */
>> +#define SFC_VDPA_ASSERT(exp)            RTE_ASSERT(exp)
>> +#endif
>> +
>> +#endif /* _SFC_VDPA_DEBUG_H_ */
>> diff --git a/drivers/vdpa/sfc/sfc_vdpa_hw.c 
>> b/drivers/vdpa/sfc/sfc_vdpa_hw.c
>> new file mode 100644
>> index 0000000..7c256ff
>> --- /dev/null
>> +++ b/drivers/vdpa/sfc/sfc_vdpa_hw.c
>> @@ -0,0 +1,327 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + *
>> + * Copyright(c) 2020-2021 Xilinx, Inc.
>> + */
>> +
>> +#include <unistd.h>
>> +
>> +#include <rte_common.h>
>> +#include <rte_errno.h>
>> +#include <rte_vfio.h>
>> +
>> +#include "efx.h"
>> +#include "sfc_vdpa.h"
>> +#include "sfc_vdpa_ops.h"
>> +
>> +extern uint32_t sfc_logtype_driver;
>> +
>> +#ifndef PAGE_SIZE
>> +#define PAGE_SIZE   (sysconf(_SC_PAGESIZE))
>> +#endif
>> +
>> +int
>> +sfc_vdpa_dma_alloc(struct sfc_vdpa_adapter *sva, const char *name,
>> +           size_t len, efsys_mem_t *esmp)
>> +{
>> +    uint64_t mcdi_iova;
>> +    size_t mcdi_buff_size;
>> +    const struct rte_memzone *mz = NULL;
> 
> Redundant initialization.

Is it a problem? Is it defined in DPDK coding style to avoid it?

> 
>> +    int numa_node = sva->pdev->device.numa_node;
>> +    int ret;
>> +
>> +    mcdi_buff_size = RTE_ALIGN_CEIL(len, PAGE_SIZE);
>> +
>> +    sfc_vdpa_log_init(sva, "name=%s, len=%zu", name, len);
>> +
>> +    mz = rte_memzone_reserve_aligned(name, mcdi_buff_size,
>> +                     numa_node,
>> +                     RTE_MEMZONE_IOVA_CONTIG,
>> +                     PAGE_SIZE);
>> +    if (mz == NULL) {
>> +        sfc_vdpa_err(sva, "cannot reserve memory for %s: len=%#x: %s",
>> +                 name, (unsigned int)len, rte_strerror(rte_errno));
>> +        return -ENOMEM;
>> +    }
>> +
>> +    /* IOVA address for MCDI would be re-calculated if mapping
>> +     * using default IOVA would fail.
>> +     * TODO: Earlier there was no way to get valid IOVA range.
>> +     * Recently a patch has been submitted to get the IOVA range
>> +     * using ioctl. VFIO_IOMMU_GET_INFO. This patch is available
>> +     * in the kernel version >= 5.4. Support to get the default
>> +     * IOVA address for MCDI buffer using available IOVA range
>> +     * would be added later. Meanwhile default IOVA for MCDI buffer
>> +     * is kept at high mem at 2TB. In case of overlap new available
>> +     * addresses would be searched and same would be used.
>> +     */
>> +    mcdi_iova = SFC_VDPA_DEFAULT_MCDI_IOVA;
>> +
>> +    do {
>> +        ret = rte_vfio_container_dma_map(sva->vfio_container_fd,
>> +                         (uint64_t)mz->addr, mcdi_iova,
>> +                         mcdi_buff_size);
>> +        if (ret == 0)
>> +            break;
>> +
>> +        mcdi_iova = mcdi_iova >> 1;
>> +        if (mcdi_iova < mcdi_buff_size)    {
>> +            sfc_vdpa_err(sva,
>> +                     "DMA mapping failed for MCDI : %s",
>> +                     rte_strerror(rte_errno));
>> +            rte_memzone_free(mz);
>> +            return ret;
>> +        }
>> +
>> +    } while (ret < 0);
> 
> "ret < 0" will never evaluate to false here. Use "for (;;)" instead.

Ack.

> 
>> +
>> +    esmp->esm_addr = mcdi_iova;
>> +    esmp->esm_base = mz->addr;
>> +    sva->mcdi_buff_size = mcdi_buff_size;
>> +
>> +    sfc_vdpa_info(sva,
>> +              "DMA name=%s len=%zu => virt=%p iova=%" PRIx64,
>> +              name, len, esmp->esm_base, esmp->esm_addr);
>> +
>> +    return 0;
>> +}
>> +
>> +void
>> +sfc_vdpa_dma_free(struct sfc_vdpa_adapter *sva, efsys_mem_t *esmp)
>> +{
>> +    int ret;
>> +
>> +    sfc_vdpa_log_init(sva, "name=%s", esmp->esm_mz->name);
>> +
>> +    ret = rte_vfio_container_dma_unmap(sva->vfio_container_fd,
>> +                       (uint64_t)esmp->esm_base,
>> +                       esmp->esm_addr, sva->mcdi_buff_size);
>> +    if (ret < 0)
>> +        sfc_vdpa_err(sva, "DMA unmap failed for MCDI : %s",
>> +                 rte_strerror(rte_errno));
>> +
>> +    sfc_vdpa_info(sva,
>> +              "DMA free name=%s => virt=%p iova=%" PRIx64,
>> +              esmp->esm_mz->name, esmp->esm_base, esmp->esm_addr);
>> +
>> +    rte_free((void *)(esmp->esm_base));
>> +
>> +    sva->mcdi_buff_size = 0;
>> +    memset(esmp, 0, sizeof(*esmp));
>> +}
>> +
>> +static int
>> +sfc_vdpa_mem_bar_init(struct sfc_vdpa_adapter *sva,
>> +              const efx_bar_region_t *mem_ebrp)
>> +{
>> +    struct rte_pci_device *pci_dev = sva->pdev;
>> +    efsys_bar_t *ebp = &sva->mem_bar;
>> +    struct rte_mem_resource *res =
>> +        &pci_dev->mem_resource[mem_ebrp->ebr_index];
>> +
>> +    SFC_BAR_LOCK_INIT(ebp, pci_dev->name);
>> +    ebp->esb_rid = mem_ebrp->ebr_index;
>> +    ebp->esb_dev = pci_dev;
>> +    ebp->esb_base = res->addr;
>> +
>> +    return 0;
>> +}
>> +
>> +static void
>> +sfc_vdpa_mem_bar_fini(struct sfc_vdpa_adapter *sva)
>> +{
>> +    efsys_bar_t *ebp = &sva->mem_bar;
>> +
>> +    SFC_BAR_LOCK_DESTROY(ebp);
>> +    memset(ebp, 0, sizeof(*ebp));
>> +}
>> +
>> +static int
>> +sfc_vdpa_nic_probe(struct sfc_vdpa_adapter *sva)
>> +{
>> +    efx_nic_t *enp = sva->nic;
>> +    int rc;
>> +
>> +    rc = efx_nic_probe(enp, EFX_FW_VARIANT_DONT_CARE);
>> +    if (rc != 0)
>> +        sfc_vdpa_err(sva, "nic probe failed: %s", rte_strerror(rc));
>> +
>> +    return rc;
>> +}
>> +
>> +static int
>> +sfc_vdpa_estimate_resource_limits(struct sfc_vdpa_adapter *sva)
>> +{
>> +    efx_drv_limits_t limits;
>> +    int rc;
>> +    uint32_t evq_allocated;
>> +    uint32_t rxq_allocated;
>> +    uint32_t txq_allocated;
>> +    uint32_t max_queue_cnt;
>> +
>> +    memset(&limits, 0, sizeof(limits));
> 
> Avoid using typedefs for structs. This is a struct?

Yes, it is a struct. But it is an interface of the base
driver which is out of scope of DPDK coding style.

> 
>> +
>> +    /* Request at least one Rx and Tx queue */
>> +    limits.edl_min_rxq_count = 1;
>> +    limits.edl_min_txq_count = 1;
>> +    /* Management event queue plus event queue for Tx/Rx queue */
>> +    limits.edl_min_evq_count =
>> +        1 + RTE_MAX(limits.edl_min_rxq_count, limits.edl_min_txq_count);
>> +
>> +    limits.edl_max_rxq_count = SFC_VDPA_MAX_QUEUE_PAIRS;
>> +    limits.edl_max_txq_count = SFC_VDPA_MAX_QUEUE_PAIRS;
>> +    limits.edl_max_evq_count = 1 + SFC_VDPA_MAX_QUEUE_PAIRS;
>> +
>> +    SFC_VDPA_ASSERT(limits.edl_max_evq_count >= 
>> limits.edl_min_rxq_count);
>> +    SFC_VDPA_ASSERT(limits.edl_max_rxq_count >= 
>> limits.edl_min_rxq_count);
>> +    SFC_VDPA_ASSERT(limits.edl_max_txq_count >= 
>> limits.edl_min_rxq_count);
>> +
>> +    /* Configure the minimum required resources needed for the
>> +     * driver to operate, and the maximum desired resources that the
>> +     * driver is capable of using.
>> +     */
>> +    sfc_vdpa_log_init(sva, "set drv limit");
>> +    efx_nic_set_drv_limits(sva->nic, &limits);
>> +
>> +    sfc_vdpa_log_init(sva, "init nic");
>> +    rc = efx_nic_init(sva->nic);
>> +    if (rc != 0) {
>> +        sfc_vdpa_err(sva, "nic init failed: %s", rte_strerror(rc));
>> +        goto fail_nic_init;
>> +    }
>> +
>> +    /* Find resource dimensions assigned by firmware to this function */
>> +    rc = efx_nic_get_vi_pool(sva->nic, &evq_allocated, &rxq_allocated,
>> +                 &txq_allocated);
>> +    if (rc != 0) {
>> +        sfc_vdpa_err(sva, "vi pool get failed: %s", rte_strerror(rc));
>> +        goto fail_get_vi_pool;
>> +    }
>> +
>> +    /* It still may allocate more than maximum, ensure limit */
>> +    evq_allocated = RTE_MIN(evq_allocated, limits.edl_max_evq_count);
>> +    rxq_allocated = RTE_MIN(rxq_allocated, limits.edl_max_rxq_count);
>> +    txq_allocated = RTE_MIN(txq_allocated, limits.edl_max_txq_count);
>> +
>> +
>> +    max_queue_cnt = RTE_MIN(rxq_allocated, txq_allocated);
>> +    /* Subtract management EVQ not used for traffic */
>> +    max_queue_cnt = RTE_MIN(evq_allocated - 1, max_queue_cnt);
>> +
>> +    SFC_VDPA_ASSERT(max_queue_cnt > 0);
>> +
>> +    sva->max_queue_count = max_queue_cnt;
>> +
>> +    return 0;
>> +
>> +fail_get_vi_pool:
>> +    efx_nic_fini(sva->nic);
>> +fail_nic_init:
>> +    sfc_vdpa_log_init(sva, "failed: %s", rte_strerror(rc));
>> +    return rc;
>> +}
>> +
>> +int
>> +sfc_vdpa_hw_init(struct sfc_vdpa_adapter *sva)
>> +{
>> +    efx_bar_region_t mem_ebr;
>> +    efx_nic_t *enp;
>> +    int rc;
>> +
>> +    sfc_vdpa_log_init(sva, "entry");
>> +
>> +    sfc_vdpa_log_init(sva, "get family");
>> +    rc = sfc_efx_family(sva->pdev, &mem_ebr, &sva->family);
>> +    if (rc != 0)
>> +        goto fail_family;
>> +    sfc_vdpa_log_init(sva,
>> +              "family is %u, membar is %u,"
>> +              "function control window offset is %#" PRIx64,
>> +              sva->family, mem_ebr.ebr_index, mem_ebr.ebr_offset);
>> +
>> +    sfc_vdpa_log_init(sva, "init mem bar");
>> +    rc = sfc_vdpa_mem_bar_init(sva, &mem_ebr);
>> +    if (rc != 0)
>> +        goto fail_mem_bar_init;
>> +
>> +    sfc_vdpa_log_init(sva, "create nic");
>> +    rte_spinlock_init(&sva->nic_lock);
>> +    rc = efx_nic_create(sva->family, (efsys_identifier_t *)sva,
>> +                &sva->mem_bar, mem_ebr.ebr_offset,
>> +                &sva->nic_lock, &enp);
>> +    if (rc != 0) {
>> +        sfc_vdpa_err(sva, "nic create failed: %s", rte_strerror(rc));
>> +        goto fail_nic_create;
>> +    }
>> +    sva->nic = enp;
>> +
>> +    sfc_vdpa_log_init(sva, "init mcdi");
>> +    rc = sfc_vdpa_mcdi_init(sva);
>> +    if (rc != 0) {
>> +        sfc_vdpa_err(sva, "mcdi init failed: %s", rte_strerror(rc));
>> +        goto fail_mcdi_init;
>> +    }
>> +
>> +    sfc_vdpa_log_init(sva, "probe nic");
>> +    rc = sfc_vdpa_nic_probe(sva);
>> +    if (rc != 0)
>> +        goto fail_nic_probe;
>> +
>> +    sfc_vdpa_log_init(sva, "reset nic");
>> +    rc = efx_nic_reset(enp);
>> +    if (rc != 0) {
>> +        sfc_vdpa_err(sva, "nic reset failed: %s", rte_strerror(rc));
>> +        goto fail_nic_reset;
>> +    }
>> +
>> +    sfc_vdpa_log_init(sva, "estimate resource limits");
>> +    rc = sfc_vdpa_estimate_resource_limits(sva);
>> +    if (rc != 0)
>> +        goto fail_estimate_rsrc_limits;
>> +
>> +    sfc_vdpa_log_init(sva, "done");
>> +
>> +    return 0;
>> +
>> +fail_estimate_rsrc_limits:
>> +fail_nic_reset:
>> +    efx_nic_unprobe(enp);
>> +
>> +fail_nic_probe:
>> +    sfc_vdpa_mcdi_fini(sva);
>> +
>> +fail_mcdi_init:
>> +    sfc_vdpa_log_init(sva, "destroy nic");
>> +    sva->nic = NULL;
>> +    efx_nic_destroy(enp);
>> +
>> +fail_nic_create:
>> +    sfc_vdpa_mem_bar_fini(sva);
>> +
>> +fail_mem_bar_init:
>> +fail_family:
>> +    sfc_vdpa_log_init(sva, "failed: %s", rte_strerror(rc));
>> +    return rc;
>> +}
>> +
>> +void
>> +sfc_vdpa_hw_fini(struct sfc_vdpa_adapter *sva)
>> +{
>> +    efx_nic_t *enp = sva->nic;
>> +
>> +    sfc_vdpa_log_init(sva, "entry");
>> +
>> +    sfc_vdpa_log_init(sva, "unprobe nic");
>> +    efx_nic_unprobe(enp);
>> +
>> +    sfc_vdpa_log_init(sva, "mcdi fini");
>> +    sfc_vdpa_mcdi_fini(sva);
>> +
>> +    sfc_vdpa_log_init(sva, "nic fini");
>> +    efx_nic_fini(enp);
>> +
>> +    sfc_vdpa_log_init(sva, "destroy nic");
>> +    sva->nic = NULL;
>> +    efx_nic_destroy(enp);
>> +
>> +    sfc_vdpa_mem_bar_fini(sva);
>> +}
>> diff --git a/drivers/vdpa/sfc/sfc_vdpa_log.h 
>> b/drivers/vdpa/sfc/sfc_vdpa_log.h
>> index 858e5ee..4e7a84f 100644
>> --- a/drivers/vdpa/sfc/sfc_vdpa_log.h
>> +++ b/drivers/vdpa/sfc/sfc_vdpa_log.h
>> @@ -21,6 +21,9 @@
>>   /** Name prefix for the per-device log type used to report basic 
>> information */
>>   #define SFC_VDPA_LOGTYPE_MAIN_STR    SFC_VDPA_LOGTYPE_PREFIX "main"
>> +/** Device MCDI log type name prefix */
>> +#define SFC_VDPA_LOGTYPE_MCDI_STR    SFC_VDPA_LOGTYPE_PREFIX "mcdi"
>> +
>>   #define SFC_VDPA_LOG_PREFIX_MAX    32
>>   /* Log PMD message, automatically add prefix and \n */
>> diff --git a/drivers/vdpa/sfc/sfc_vdpa_mcdi.c 
>> b/drivers/vdpa/sfc/sfc_vdpa_mcdi.c
>> new file mode 100644
>> index 0000000..961d2d3
>> --- /dev/null
>> +++ b/drivers/vdpa/sfc/sfc_vdpa_mcdi.c
>> @@ -0,0 +1,74 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + *
>> + * Copyright(c) 2020-2021 Xilinx, Inc.
>> + */
>> +
>> +#include "sfc_efx_mcdi.h"
>> +
>> +#include "sfc_vdpa.h"
>> +#include "sfc_vdpa_debug.h"
>> +#include "sfc_vdpa_log.h"
>> +
>> +static sfc_efx_mcdi_dma_alloc_cb sfc_vdpa_mcdi_dma_alloc;
>> +static int
>> +sfc_vdpa_mcdi_dma_alloc(void *cookie, const char *name, size_t len,
>> +            efsys_mem_t *esmp)
>> +{
>> +    struct sfc_vdpa_adapter *sva = cookie;
>> +
>> +    return sfc_vdpa_dma_alloc(sva, name, len, esmp);
>> +}
>> +
>> +static sfc_efx_mcdi_dma_free_cb sfc_vdpa_mcdi_dma_free;
>> +static void
>> +sfc_vdpa_mcdi_dma_free(void *cookie, efsys_mem_t *esmp)
>> +{
>> +    struct sfc_vdpa_adapter *sva = cookie;
>> +
>> +    sfc_vdpa_dma_free(sva, esmp);
>> +}
>> +
>> +static sfc_efx_mcdi_sched_restart_cb sfc_vdpa_mcdi_sched_restart;
>> +static void
>> +sfc_vdpa_mcdi_sched_restart(void *cookie)
>> +{
>> +    RTE_SET_USED(cookie);
>> +}
>> +
>> +static sfc_efx_mcdi_mgmt_evq_poll_cb sfc_vdpa_mcdi_mgmt_evq_poll;
>> +static void
>> +sfc_vdpa_mcdi_mgmt_evq_poll(void *cookie)
>> +{
>> +    RTE_SET_USED(cookie);
>> +}
>> +
>> +static const struct sfc_efx_mcdi_ops sfc_vdpa_mcdi_ops = {
>> +    .dma_alloc    = sfc_vdpa_mcdi_dma_alloc,
>> +    .dma_free    = sfc_vdpa_mcdi_dma_free,
>> +    .sched_restart  = sfc_vdpa_mcdi_sched_restart,
>> +    .mgmt_evq_poll  = sfc_vdpa_mcdi_mgmt_evq_poll,
>> +
>> +};
>> +
>> +int
>> +sfc_vdpa_mcdi_init(struct sfc_vdpa_adapter *sva)
>> +{
>> +    uint32_t logtype;
>> +
>> +    sfc_vdpa_log_init(sva, "entry");
>> +
>> +    logtype = sfc_vdpa_register_logtype(&(sva->pdev->addr),
>> +                        SFC_VDPA_LOGTYPE_MCDI_STR,
>> +                        RTE_LOG_NOTICE);
>> +
>> +    return sfc_efx_mcdi_init(&sva->mcdi, logtype,
>> +                 sva->log_prefix, sva->nic,
>> +                 &sfc_vdpa_mcdi_ops, sva);
>> +}
>> +
>> +void
>> +sfc_vdpa_mcdi_fini(struct sfc_vdpa_adapter *sva)
>> +{
>> +    sfc_vdpa_log_init(sva, "entry");
>> +    sfc_efx_mcdi_fini(&sva->mcdi);
>> +}
>> diff --git a/drivers/vdpa/sfc/sfc_vdpa_ops.c 
>> b/drivers/vdpa/sfc/sfc_vdpa_ops.c
>> new file mode 100644
>> index 0000000..71696be
>> --- /dev/null
>> +++ b/drivers/vdpa/sfc/sfc_vdpa_ops.c
>> @@ -0,0 +1,129 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + *
>> + * Copyright(c) 2020-2021 Xilinx, Inc.
>> + */
>> +
>> +#include <rte_malloc.h>
>> +#include <rte_vdpa.h>
>> +#include <rte_vdpa_dev.h>
>> +#include <rte_vhost.h>
>> +
>> +#include "sfc_vdpa_ops.h"
>> +#include "sfc_vdpa.h"
>> +
>> +/* Dummy functions for mandatory vDPA ops to pass vDPA device 
>> registration.
>> + * In subsequent patches these ops would be implemented.
>> + */
>> +static int
>> +sfc_vdpa_get_queue_num(struct rte_vdpa_device *vdpa_dev, uint32_t 
>> *queue_num)
>> +{
>> +    RTE_SET_USED(vdpa_dev);
>> +    RTE_SET_USED(queue_num);
>> +
>> +    return -1;
>> +}
>> +
>> +static int
>> +sfc_vdpa_get_features(struct rte_vdpa_device *vdpa_dev, uint64_t 
>> *features)
>> +{
>> +    RTE_SET_USED(vdpa_dev);
>> +    RTE_SET_USED(features);
>> +
>> +    return -1;
>> +}
>> +
>> +static int
>> +sfc_vdpa_get_protocol_features(struct rte_vdpa_device *vdpa_dev,
>> +                   uint64_t *features)
>> +{
>> +    RTE_SET_USED(vdpa_dev);
>> +    RTE_SET_USED(features);
>> +
>> +    return -1;
>> +}
>> +
>> +static int
>> +sfc_vdpa_dev_config(int vid)
>> +{
>> +    RTE_SET_USED(vid);
>> +
>> +    return -1;
>> +}
>> +
>> +static int
>> +sfc_vdpa_dev_close(int vid)
>> +{
>> +    RTE_SET_USED(vid);
>> +
>> +    return -1;
>> +}
>> +
>> +static int
>> +sfc_vdpa_set_vring_state(int vid, int vring, int state)
>> +{
>> +    RTE_SET_USED(vid);
>> +    RTE_SET_USED(vring);
>> +    RTE_SET_USED(state);
>> +
>> +    return -1;
>> +}
>> +
>> +static int
>> +sfc_vdpa_set_features(int vid)
>> +{
>> +    RTE_SET_USED(vid);
>> +
>> +    return -1;
>> +}
>> +
>> +static struct rte_vdpa_dev_ops sfc_vdpa_ops = {
>> +    .get_queue_num = sfc_vdpa_get_queue_num,
>> +    .get_features = sfc_vdpa_get_features,
>> +    .get_protocol_features = sfc_vdpa_get_protocol_features,
>> +    .dev_conf = sfc_vdpa_dev_config,
>> +    .dev_close = sfc_vdpa_dev_close,
>> +    .set_vring_state = sfc_vdpa_set_vring_state,
>> +    .set_features = sfc_vdpa_set_features,
>> +};
>> +
>> +struct sfc_vdpa_ops_data *
>> +sfc_vdpa_device_init(void *dev_handle, enum sfc_vdpa_context context)
>> +{
>> +    struct sfc_vdpa_ops_data *ops_data;
>> +    struct rte_pci_device *pci_dev;
>> +
>> +    /* Create vDPA ops context */
>> +    ops_data = rte_zmalloc("vdpa", sizeof(struct sfc_vdpa_ops_data), 0);
>> +    if (ops_data == NULL)
>> +        return NULL;
>> +
>> +    ops_data->vdpa_context = context;
>> +    ops_data->dev_handle = dev_handle;
>> +
>> +    pci_dev = sfc_vdpa_adapter_by_dev_handle(dev_handle)->pdev;
>> +
>> +    /* Register vDPA Device */
>> +    sfc_vdpa_log_init(dev_handle, "register vDPA device");
>> +    ops_data->vdpa_dev =
>> +        rte_vdpa_register_device(&pci_dev->device, &sfc_vdpa_ops);
>> +    if (ops_data->vdpa_dev == NULL) {
>> +        sfc_vdpa_err(dev_handle, "vDPA device registration failed");
>> +        goto fail_register_device;
>> +    }
>> +
>> +    ops_data->state = SFC_VDPA_STATE_INITIALIZED;
>> +
>> +    return ops_data;
>> +
>> +fail_register_device:
>> +    rte_free(ops_data);
>> +    return NULL;
>> +}
>> +
>> +void
>> +sfc_vdpa_device_fini(struct sfc_vdpa_ops_data *ops_data)
>> +{
>> +    rte_vdpa_unregister_device(ops_data->vdpa_dev);
>> +
>> +    rte_free(ops_data);
>> +}
>> diff --git a/drivers/vdpa/sfc/sfc_vdpa_ops.h 
>> b/drivers/vdpa/sfc/sfc_vdpa_ops.h
>> new file mode 100644
>> index 0000000..817b302
>> --- /dev/null
>> +++ b/drivers/vdpa/sfc/sfc_vdpa_ops.h
>> @@ -0,0 +1,36 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + *
>> + * Copyright(c) 2020-2021 Xilinx, Inc.
>> + */
>> +
>> +#ifndef _SFC_VDPA_OPS_H
>> +#define _SFC_VDPA_OPS_H
>> +
>> +#include <rte_vdpa.h>
>> +
>> +#define SFC_VDPA_MAX_QUEUE_PAIRS        1
>> +
>> +enum sfc_vdpa_context {
>> +    SFC_VDPA_AS_PF = 0,
> 
> 0 is the default.

There are a number of coding standards which recommnd to
initalize the first member to make it easier for less
skilled reader to calculate enum values.

So, I don't think it is a problem to initalize.

> 
>> +    SFC_VDPA_AS_VF
>> +};
>> +
>> +enum sfc_vdpa_state {
>> +    SFC_VDPA_STATE_UNINITIALIZED = 0,
> 
> Same here.
> 
>> +    SFC_VDPA_STATE_INITIALIZED,
>> +    SFC_VDPA_STATE_NSTATES
>> +};
>> +
>> +struct sfc_vdpa_ops_data {
>> +    void                *dev_handle;
>> +    struct rte_vdpa_device        *vdpa_dev;
>> +    enum sfc_vdpa_context        vdpa_context;
>> +    enum sfc_vdpa_state        state;
>> +};
>> +
>> +struct sfc_vdpa_ops_data *
>> +sfc_vdpa_device_init(void *adapter, enum sfc_vdpa_context context);
>> +void
>> +sfc_vdpa_device_fini(struct sfc_vdpa_ops_data *ops_data);
>> +
>> +#endif /* _SFC_VDPA_OPS_H */
>>
  
Chenbo Xia Nov. 1, 2021, 11:48 a.m. UTC | #3
Hi Vijay,

> -----Original Message-----
> From: Vijay Srivastava <vijay.srivastava@xilinx.com>
> Sent: Friday, October 29, 2021 10:47 PM
> To: dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>;
> andrew.rybchenko@oktetlabs.ru; Vijay Kumar Srivastava <vsrivast@xilinx.com>
> Subject: [PATCH v3 02/10] vdpa/sfc: add support for device initialization
> 
> From: Vijay Kumar Srivastava <vsrivast@xilinx.com>
> 
> Add HW initialization and vDPA device registration support.
> 
> Signed-off-by: Vijay Kumar Srivastava <vsrivast@xilinx.com>
> Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
> ---
> v2:
> * Used rte_memzone_reserve_aligned for mcdi buffer allocation.
> * Freeing mcdi buff when DMA map fails.
> * Fixed one typo.
> 
>  doc/guides/vdpadevs/sfc.rst       |   6 +
>  drivers/vdpa/sfc/meson.build      |   3 +
>  drivers/vdpa/sfc/sfc_vdpa.c       |  23 +++
>  drivers/vdpa/sfc/sfc_vdpa.h       |  49 +++++-
>  drivers/vdpa/sfc/sfc_vdpa_debug.h |  21 +++
>  drivers/vdpa/sfc/sfc_vdpa_hw.c    | 327
> ++++++++++++++++++++++++++++++++++++++
>  drivers/vdpa/sfc/sfc_vdpa_log.h   |   3 +
>  drivers/vdpa/sfc/sfc_vdpa_mcdi.c  |  74 +++++++++
>  drivers/vdpa/sfc/sfc_vdpa_ops.c   | 129 +++++++++++++++
>  drivers/vdpa/sfc/sfc_vdpa_ops.h   |  36 +++++
>  10 files changed, 670 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/vdpa/sfc/sfc_vdpa_debug.h
>  create mode 100644 drivers/vdpa/sfc/sfc_vdpa_hw.c
>  create mode 100644 drivers/vdpa/sfc/sfc_vdpa_mcdi.c
>  create mode 100644 drivers/vdpa/sfc/sfc_vdpa_ops.c
>  create mode 100644 drivers/vdpa/sfc/sfc_vdpa_ops.h
> 
> diff --git a/doc/guides/vdpadevs/sfc.rst b/doc/guides/vdpadevs/sfc.rst
> index 44e694f..d06c427 100644
> --- a/doc/guides/vdpadevs/sfc.rst
> +++ b/doc/guides/vdpadevs/sfc.rst
> @@ -95,3 +95,9 @@ SFC vDPA PMD provides the following log types available for
> control:
>    Matches a subset of per-port log types registered during runtime.
>    A full name for a particular type may be obtained by appending a
>    dot and a PCI device identifier (``XXXX:XX:XX.X``) to the prefix.
> +
> +- ``pmd.vdpa.sfc.mcdi`` (default level is **notice**)
> +
> +  Extra logging of the communication with the NIC's management CPU.
> +  The format of the log is consumed by the netlogdecode cross-platform
> +  tool. May be managed per-port, as explained above.
> diff --git a/drivers/vdpa/sfc/meson.build b/drivers/vdpa/sfc/meson.build
> index 4255d65..dc333de 100644
> --- a/drivers/vdpa/sfc/meson.build
> +++ b/drivers/vdpa/sfc/meson.build
> @@ -19,4 +19,7 @@ endforeach
>  deps += ['common_sfc_efx', 'bus_pci']
>  sources = files(
>  	'sfc_vdpa.c',
> +	'sfc_vdpa_hw.c',
> +	'sfc_vdpa_mcdi.c',
> +	'sfc_vdpa_ops.c',
>  )
> diff --git a/drivers/vdpa/sfc/sfc_vdpa.c b/drivers/vdpa/sfc/sfc_vdpa.c
> index d85c52b..b7eca56 100644
> --- a/drivers/vdpa/sfc/sfc_vdpa.c
> +++ b/drivers/vdpa/sfc/sfc_vdpa.c
> @@ -232,6 +232,19 @@ struct sfc_vdpa_adapter *
>  		goto fail_vfio_setup;
>  	}
> 
> +	sfc_vdpa_log_init(sva, "hw init");
> +	if (sfc_vdpa_hw_init(sva) != 0) {
> +		sfc_vdpa_err(sva, "failed to init HW %s", pci_dev->name);
> +		goto fail_hw_init;
> +	}
> +
> +	sfc_vdpa_log_init(sva, "dev init");
> +	sva->ops_data = sfc_vdpa_device_init(sva, SFC_VDPA_AS_VF);
> +	if (sva->ops_data == NULL) {
> +		sfc_vdpa_err(sva, "failed vDPA dev init %s", pci_dev->name);
> +		goto fail_dev_init;
> +	}
> +
>  	pthread_mutex_lock(&sfc_vdpa_adapter_list_lock);
>  	TAILQ_INSERT_TAIL(&sfc_vdpa_adapter_list, sva, next);
>  	pthread_mutex_unlock(&sfc_vdpa_adapter_list_lock);
> @@ -240,6 +253,12 @@ struct sfc_vdpa_adapter *
> 
>  	return 0;
> 
> +fail_dev_init:
> +	sfc_vdpa_hw_fini(sva);
> +
> +fail_hw_init:
> +	sfc_vdpa_vfio_teardown(sva);
> +
>  fail_vfio_setup:
>  fail_set_log_prefix:
>  	rte_free(sva);
> @@ -266,6 +285,10 @@ struct sfc_vdpa_adapter *
>  	TAILQ_REMOVE(&sfc_vdpa_adapter_list, sva, next);
>  	pthread_mutex_unlock(&sfc_vdpa_adapter_list_lock);
> 
> +	sfc_vdpa_device_fini(sva->ops_data);
> +
> +	sfc_vdpa_hw_fini(sva);
> +
>  	sfc_vdpa_vfio_teardown(sva);
> 
>  	rte_free(sva);
> diff --git a/drivers/vdpa/sfc/sfc_vdpa.h b/drivers/vdpa/sfc/sfc_vdpa.h
> index 3b77900..046f25d 100644
> --- a/drivers/vdpa/sfc/sfc_vdpa.h
> +++ b/drivers/vdpa/sfc/sfc_vdpa.h
> @@ -11,14 +11,38 @@
> 
>  #include <rte_bus_pci.h>
> 
> +#include "sfc_efx.h"
> +#include "sfc_efx_mcdi.h"
> +#include "sfc_vdpa_debug.h"
>  #include "sfc_vdpa_log.h"
> +#include "sfc_vdpa_ops.h"
> +
> +#define SFC_VDPA_DEFAULT_MCDI_IOVA		0x200000000000
> 
>  /* Adapter private data */
>  struct sfc_vdpa_adapter {
>  	TAILQ_ENTRY(sfc_vdpa_adapter)	next;
> +	/*
> +	 * PMD setup and configuration is not thread safe. Since it is not
> +	 * performance sensitive, it is better to guarantee thread-safety
> +	 * and add device level lock. vDPA control operations which
> +	 * change its state should acquire the lock.
> +	 */
> +	rte_spinlock_t			lock;
>  	struct rte_pci_device		*pdev;
>  	struct rte_pci_addr		pci_addr;
> 
> +	efx_family_t			family;
> +	efx_nic_t			*nic;
> +	rte_spinlock_t			nic_lock;
> +
> +	efsys_bar_t			mem_bar;
> +
> +	struct sfc_efx_mcdi		mcdi;
> +	size_t				mcdi_buff_size;
> +
> +	uint32_t			max_queue_count;
> +
>  	char				log_prefix[SFC_VDPA_LOG_PREFIX_MAX];
>  	uint32_t			logtype_main;
> 
> @@ -26,6 +50,7 @@ struct sfc_vdpa_adapter {
>  	int				vfio_dev_fd;
>  	int				vfio_container_fd;
>  	int				iommu_group_num;
> +	struct sfc_vdpa_ops_data	*ops_data;
>  };
> 
>  uint32_t
> @@ -36,5 +61,27 @@ struct sfc_vdpa_adapter {
>  struct sfc_vdpa_adapter *
>  sfc_vdpa_get_adapter_by_dev(struct rte_pci_device *pdev);
> 
> -#endif  /* _SFC_VDPA_H */
> +int
> +sfc_vdpa_hw_init(struct sfc_vdpa_adapter *sva);
> +void
> +sfc_vdpa_hw_fini(struct sfc_vdpa_adapter *sva);
> 
> +int
> +sfc_vdpa_mcdi_init(struct sfc_vdpa_adapter *sva);
> +void
> +sfc_vdpa_mcdi_fini(struct sfc_vdpa_adapter *sva);
> +
> +int
> +sfc_vdpa_dma_alloc(struct sfc_vdpa_adapter *sva, const char *name,
> +		   size_t len, efsys_mem_t *esmp);
> +
> +void
> +sfc_vdpa_dma_free(struct sfc_vdpa_adapter *sva, efsys_mem_t *esmp);
> +
> +static inline struct sfc_vdpa_adapter *
> +sfc_vdpa_adapter_by_dev_handle(void *dev_handle)
> +{
> +	return (struct sfc_vdpa_adapter *)dev_handle;
> +}
> +
> +#endif  /* _SFC_VDPA_H */
> diff --git a/drivers/vdpa/sfc/sfc_vdpa_debug.h
> b/drivers/vdpa/sfc/sfc_vdpa_debug.h
> new file mode 100644
> index 0000000..cfa8cc5
> --- /dev/null
> +++ b/drivers/vdpa/sfc/sfc_vdpa_debug.h
> @@ -0,0 +1,21 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + *
> + * Copyright(c) 2020-2021 Xilinx, Inc.
> + */
> +
> +#ifndef _SFC_VDPA_DEBUG_H_
> +#define _SFC_VDPA_DEBUG_H_
> +
> +#include <rte_debug.h>
> +
> +#ifdef RTE_LIBRTE_SFC_VDPA_DEBUG
> +/* Avoid dependency from RTE_LOG_DP_LEVEL to be able to enable debug check
> + * in the driver only.
> + */
> +#define SFC_VDPA_ASSERT(exp)			RTE_VERIFY(exp)
> +#else
> +/* If the driver debug is not enabled, follow DPDK debug/non-debug */
> +#define SFC_VDPA_ASSERT(exp)			RTE_ASSERT(exp)
> +#endif
> +
> +#endif /* _SFC_VDPA_DEBUG_H_ */
> diff --git a/drivers/vdpa/sfc/sfc_vdpa_hw.c b/drivers/vdpa/sfc/sfc_vdpa_hw.c
> new file mode 100644
> index 0000000..7c256ff
> --- /dev/null
> +++ b/drivers/vdpa/sfc/sfc_vdpa_hw.c
> @@ -0,0 +1,327 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + *
> + * Copyright(c) 2020-2021 Xilinx, Inc.
> + */
> +
> +#include <unistd.h>
> +
> +#include <rte_common.h>
> +#include <rte_errno.h>
> +#include <rte_vfio.h>
> +
> +#include "efx.h"
> +#include "sfc_vdpa.h"
> +#include "sfc_vdpa_ops.h"
> +
> +extern uint32_t sfc_logtype_driver;
> +
> +#ifndef PAGE_SIZE
> +#define PAGE_SIZE   (sysconf(_SC_PAGESIZE))
> +#endif
> +
> +int
> +sfc_vdpa_dma_alloc(struct sfc_vdpa_adapter *sva, const char *name,
> +		   size_t len, efsys_mem_t *esmp)
> +{
> +	uint64_t mcdi_iova;
> +	size_t mcdi_buff_size;
> +	const struct rte_memzone *mz = NULL;
> +	int numa_node = sva->pdev->device.numa_node;
> +	int ret;
> +
> +	mcdi_buff_size = RTE_ALIGN_CEIL(len, PAGE_SIZE);
> +
> +	sfc_vdpa_log_init(sva, "name=%s, len=%zu", name, len);
> +
> +	mz = rte_memzone_reserve_aligned(name, mcdi_buff_size,
> +					 numa_node,
> +					 RTE_MEMZONE_IOVA_CONTIG,
> +					 PAGE_SIZE);
> +	if (mz == NULL) {
> +		sfc_vdpa_err(sva, "cannot reserve memory for %s: len=%#x: %s",
> +			     name, (unsigned int)len, rte_strerror(rte_errno));
> +		return -ENOMEM;
> +	}
> +
> +	/* IOVA address for MCDI would be re-calculated if mapping
> +	 * using default IOVA would fail.
> +	 * TODO: Earlier there was no way to get valid IOVA range.
> +	 * Recently a patch has been submitted to get the IOVA range
> +	 * using ioctl. VFIO_IOMMU_GET_INFO. This patch is available
> +	 * in the kernel version >= 5.4. Support to get the default
> +	 * IOVA address for MCDI buffer using available IOVA range
> +	 * would be added later. Meanwhile default IOVA for MCDI buffer
> +	 * is kept at high mem at 2TB. In case of overlap new available
> +	 * addresses would be searched and same would be used.
> +	 */
> +	mcdi_iova = SFC_VDPA_DEFAULT_MCDI_IOVA;
> +
> +	do {
> +		ret = rte_vfio_container_dma_map(sva->vfio_container_fd,
> +						 (uint64_t)mz->addr, mcdi_iova,
> +						 mcdi_buff_size);
> +		if (ret == 0)
> +			break;
> +
> +		mcdi_iova = mcdi_iova >> 1;
> +		if (mcdi_iova < mcdi_buff_size)	{
> +			sfc_vdpa_err(sva,
> +				     "DMA mapping failed for MCDI : %s",
> +				     rte_strerror(rte_errno));
> +			rte_memzone_free(mz);
> +			return ret;
> +		}
> +
> +	} while (ret < 0);

So when QEMU iova and mcdi_iova conflicts, you just let vdpa dev failed to
configure, right?

Why not use re-mapping mcdi dma region as the solution? Any side-effect?
Or you just assume conflict can hardly happen?

> +
> +	esmp->esm_addr = mcdi_iova;
> +	esmp->esm_base = mz->addr;
> +	sva->mcdi_buff_size = mcdi_buff_size;
> +
> +	sfc_vdpa_info(sva,
> +		      "DMA name=%s len=%zu => virt=%p iova=%" PRIx64,

I believe 0x%" PRIx64 or %#" PRIx64 will be more friendly

> +		      name, len, esmp->esm_base, esmp->esm_addr);
> +
> +	return 0;
> +}
> +
> +void
> +sfc_vdpa_dma_free(struct sfc_vdpa_adapter *sva, efsys_mem_t *esmp)
> +{
> +	int ret;
> +
> +	sfc_vdpa_log_init(sva, "name=%s", esmp->esm_mz->name);
> +
> +	ret = rte_vfio_container_dma_unmap(sva->vfio_container_fd,
> +					   (uint64_t)esmp->esm_base,
> +					   esmp->esm_addr, sva->mcdi_buff_size);
> +	if (ret < 0)
> +		sfc_vdpa_err(sva, "DMA unmap failed for MCDI : %s",
> +			     rte_strerror(rte_errno));
> +
> +	sfc_vdpa_info(sva,
> +		      "DMA free name=%s => virt=%p iova=%" PRIx64,
> +		      esmp->esm_mz->name, esmp->esm_base, esmp->esm_addr);

Ditto.

> +
> +	rte_free((void *)(esmp->esm_base));
> +
> +	sva->mcdi_buff_size = 0;
> +	memset(esmp, 0, sizeof(*esmp));
> +}
> +
> +static int
> +sfc_vdpa_mem_bar_init(struct sfc_vdpa_adapter *sva,
> +		      const efx_bar_region_t *mem_ebrp)
> +{
> +	struct rte_pci_device *pci_dev = sva->pdev;
> +	efsys_bar_t *ebp = &sva->mem_bar;
> +	struct rte_mem_resource *res =
> +		&pci_dev->mem_resource[mem_ebrp->ebr_index];
> +
> +	SFC_BAR_LOCK_INIT(ebp, pci_dev->name);
> +	ebp->esb_rid = mem_ebrp->ebr_index;
> +	ebp->esb_dev = pci_dev;
> +	ebp->esb_base = res->addr;
> +
> +	return 0;
> +}
> +
> +static void
> +sfc_vdpa_mem_bar_fini(struct sfc_vdpa_adapter *sva)
> +{
> +	efsys_bar_t *ebp = &sva->mem_bar;
> +
> +	SFC_BAR_LOCK_DESTROY(ebp);
> +	memset(ebp, 0, sizeof(*ebp));
> +}
> +
> +static int
> +sfc_vdpa_nic_probe(struct sfc_vdpa_adapter *sva)
> +{
> +	efx_nic_t *enp = sva->nic;
> +	int rc;
> +
> +	rc = efx_nic_probe(enp, EFX_FW_VARIANT_DONT_CARE);
> +	if (rc != 0)
> +		sfc_vdpa_err(sva, "nic probe failed: %s", rte_strerror(rc));
> +
> +	return rc;
> +}
> +
> +static int
> +sfc_vdpa_estimate_resource_limits(struct sfc_vdpa_adapter *sva)
> +{
> +	efx_drv_limits_t limits;
> +	int rc;
> +	uint32_t evq_allocated;
> +	uint32_t rxq_allocated;
> +	uint32_t txq_allocated;
> +	uint32_t max_queue_cnt;
> +
> +	memset(&limits, 0, sizeof(limits));
> +
> +	/* Request at least one Rx and Tx queue */
> +	limits.edl_min_rxq_count = 1;
> +	limits.edl_min_txq_count = 1;
> +	/* Management event queue plus event queue for Tx/Rx queue */
> +	limits.edl_min_evq_count =
> +		1 + RTE_MAX(limits.edl_min_rxq_count, limits.edl_min_txq_count);
> +
> +	limits.edl_max_rxq_count = SFC_VDPA_MAX_QUEUE_PAIRS;
> +	limits.edl_max_txq_count = SFC_VDPA_MAX_QUEUE_PAIRS;
> +	limits.edl_max_evq_count = 1 + SFC_VDPA_MAX_QUEUE_PAIRS;
> +
> +	SFC_VDPA_ASSERT(limits.edl_max_evq_count >= limits.edl_min_rxq_count);
> +	SFC_VDPA_ASSERT(limits.edl_max_rxq_count >= limits.edl_min_rxq_count);
> +	SFC_VDPA_ASSERT(limits.edl_max_txq_count >= limits.edl_min_rxq_count);
> +
> +	/* Configure the minimum required resources needed for the
> +	 * driver to operate, and the maximum desired resources that the
> +	 * driver is capable of using.
> +	 */
> +	sfc_vdpa_log_init(sva, "set drv limit");
> +	efx_nic_set_drv_limits(sva->nic, &limits);
> +
> +	sfc_vdpa_log_init(sva, "init nic");
> +	rc = efx_nic_init(sva->nic);
> +	if (rc != 0) {
> +		sfc_vdpa_err(sva, "nic init failed: %s", rte_strerror(rc));
> +		goto fail_nic_init;
> +	}
> +
> +	/* Find resource dimensions assigned by firmware to this function */
> +	rc = efx_nic_get_vi_pool(sva->nic, &evq_allocated, &rxq_allocated,
> +				 &txq_allocated);
> +	if (rc != 0) {
> +		sfc_vdpa_err(sva, "vi pool get failed: %s", rte_strerror(rc));
> +		goto fail_get_vi_pool;
> +	}
> +
> +	/* It still may allocate more than maximum, ensure limit */
> +	evq_allocated = RTE_MIN(evq_allocated, limits.edl_max_evq_count);
> +	rxq_allocated = RTE_MIN(rxq_allocated, limits.edl_max_rxq_count);
> +	txq_allocated = RTE_MIN(txq_allocated, limits.edl_max_txq_count);
> +
> +
> +	max_queue_cnt = RTE_MIN(rxq_allocated, txq_allocated);
> +	/* Subtract management EVQ not used for traffic */
> +	max_queue_cnt = RTE_MIN(evq_allocated - 1, max_queue_cnt);
> +
> +	SFC_VDPA_ASSERT(max_queue_cnt > 0);
> +
> +	sva->max_queue_count = max_queue_cnt;
> +
> +	return 0;
> +
> +fail_get_vi_pool:
> +	efx_nic_fini(sva->nic);
> +fail_nic_init:
> +	sfc_vdpa_log_init(sva, "failed: %s", rte_strerror(rc));
> +	return rc;
> +}
> +
> +int
> +sfc_vdpa_hw_init(struct sfc_vdpa_adapter *sva)
> +{
> +	efx_bar_region_t mem_ebr;
> +	efx_nic_t *enp;
> +	int rc;
> +
> +	sfc_vdpa_log_init(sva, "entry");
> +
> +	sfc_vdpa_log_init(sva, "get family");
> +	rc = sfc_efx_family(sva->pdev, &mem_ebr, &sva->family);
> +	if (rc != 0)
> +		goto fail_family;
> +	sfc_vdpa_log_init(sva,
> +			  "family is %u, membar is %u,"
> +			  "function control window offset is %#" PRIx64,
> +			  sva->family, mem_ebr.ebr_index, mem_ebr.ebr_offset);

If ebr_idx is int, then %u -> %d. But if it's a bar index, its type should
be unsigned int and you should change the definition in sfc common code.

> +
> +	sfc_vdpa_log_init(sva, "init mem bar");
> +	rc = sfc_vdpa_mem_bar_init(sva, &mem_ebr);
> +	if (rc != 0)
> +		goto fail_mem_bar_init;
> +
> +	sfc_vdpa_log_init(sva, "create nic");
> +	rte_spinlock_init(&sva->nic_lock);
> +	rc = efx_nic_create(sva->family, (efsys_identifier_t *)sva,
> +			    &sva->mem_bar, mem_ebr.ebr_offset,
> +			    &sva->nic_lock, &enp);
> +	if (rc != 0) {
> +		sfc_vdpa_err(sva, "nic create failed: %s", rte_strerror(rc));
> +		goto fail_nic_create;
> +	}
> +	sva->nic = enp;
> +
> +	sfc_vdpa_log_init(sva, "init mcdi");
> +	rc = sfc_vdpa_mcdi_init(sva);
> +	if (rc != 0) {
> +		sfc_vdpa_err(sva, "mcdi init failed: %s", rte_strerror(rc));
> +		goto fail_mcdi_init;
> +	}
> +
> +	sfc_vdpa_log_init(sva, "probe nic");
> +	rc = sfc_vdpa_nic_probe(sva);
> +	if (rc != 0)
> +		goto fail_nic_probe;
> +
> +	sfc_vdpa_log_init(sva, "reset nic");
> +	rc = efx_nic_reset(enp);
> +	if (rc != 0) {
> +		sfc_vdpa_err(sva, "nic reset failed: %s", rte_strerror(rc));
> +		goto fail_nic_reset;
> +	}
> +
> +	sfc_vdpa_log_init(sva, "estimate resource limits");
> +	rc = sfc_vdpa_estimate_resource_limits(sva);
> +	if (rc != 0)
> +		goto fail_estimate_rsrc_limits;
> +
> +	sfc_vdpa_log_init(sva, "done");
> +
> +	return 0;
> +
> +fail_estimate_rsrc_limits:
> +fail_nic_reset:
> +	efx_nic_unprobe(enp);
> +
> +fail_nic_probe:
> +	sfc_vdpa_mcdi_fini(sva);
> +
> +fail_mcdi_init:
> +	sfc_vdpa_log_init(sva, "destroy nic");
> +	sva->nic = NULL;
> +	efx_nic_destroy(enp);
> +
> +fail_nic_create:
> +	sfc_vdpa_mem_bar_fini(sva);
> +
> +fail_mem_bar_init:
> +fail_family:
> +	sfc_vdpa_log_init(sva, "failed: %s", rte_strerror(rc));
> +	return rc;
> +}
> +
> +void
> +sfc_vdpa_hw_fini(struct sfc_vdpa_adapter *sva)
> +{
> +	efx_nic_t *enp = sva->nic;
> +
> +	sfc_vdpa_log_init(sva, "entry");
> +
> +	sfc_vdpa_log_init(sva, "unprobe nic");
> +	efx_nic_unprobe(enp);
> +
> +	sfc_vdpa_log_init(sva, "mcdi fini");
> +	sfc_vdpa_mcdi_fini(sva);
> +
> +	sfc_vdpa_log_init(sva, "nic fini");
> +	efx_nic_fini(enp);
> +
> +	sfc_vdpa_log_init(sva, "destroy nic");
> +	sva->nic = NULL;
> +	efx_nic_destroy(enp);
> +
> +	sfc_vdpa_mem_bar_fini(sva);
> +}
> diff --git a/drivers/vdpa/sfc/sfc_vdpa_log.h b/drivers/vdpa/sfc/sfc_vdpa_log.h
> index 858e5ee..4e7a84f 100644
> --- a/drivers/vdpa/sfc/sfc_vdpa_log.h
> +++ b/drivers/vdpa/sfc/sfc_vdpa_log.h
> @@ -21,6 +21,9 @@
>  /** Name prefix for the per-device log type used to report basic information
> */
>  #define SFC_VDPA_LOGTYPE_MAIN_STR	SFC_VDPA_LOGTYPE_PREFIX "main"
> 
> +/** Device MCDI log type name prefix */
> +#define SFC_VDPA_LOGTYPE_MCDI_STR	SFC_VDPA_LOGTYPE_PREFIX "mcdi"
> +
>  #define SFC_VDPA_LOG_PREFIX_MAX	32
> 
>  /* Log PMD message, automatically add prefix and \n */
> diff --git a/drivers/vdpa/sfc/sfc_vdpa_mcdi.c
> b/drivers/vdpa/sfc/sfc_vdpa_mcdi.c
> new file mode 100644
> index 0000000..961d2d3
> --- /dev/null
> +++ b/drivers/vdpa/sfc/sfc_vdpa_mcdi.c
> @@ -0,0 +1,74 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + *
> + * Copyright(c) 2020-2021 Xilinx, Inc.
> + */
> +
> +#include "sfc_efx_mcdi.h"
> +
> +#include "sfc_vdpa.h"
> +#include "sfc_vdpa_debug.h"
> +#include "sfc_vdpa_log.h"
> +
> +static sfc_efx_mcdi_dma_alloc_cb sfc_vdpa_mcdi_dma_alloc;
> +static int
> +sfc_vdpa_mcdi_dma_alloc(void *cookie, const char *name, size_t len,
> +			efsys_mem_t *esmp)
> +{
> +	struct sfc_vdpa_adapter *sva = cookie;
> +
> +	return sfc_vdpa_dma_alloc(sva, name, len, esmp);
> +}
> +
> +static sfc_efx_mcdi_dma_free_cb sfc_vdpa_mcdi_dma_free;
> +static void
> +sfc_vdpa_mcdi_dma_free(void *cookie, efsys_mem_t *esmp)
> +{
> +	struct sfc_vdpa_adapter *sva = cookie;
> +
> +	sfc_vdpa_dma_free(sva, esmp);
> +}
> +
> +static sfc_efx_mcdi_sched_restart_cb sfc_vdpa_mcdi_sched_restart;
> +static void
> +sfc_vdpa_mcdi_sched_restart(void *cookie)
> +{
> +	RTE_SET_USED(cookie);
> +}
> +
> +static sfc_efx_mcdi_mgmt_evq_poll_cb sfc_vdpa_mcdi_mgmt_evq_poll;
> +static void
> +sfc_vdpa_mcdi_mgmt_evq_poll(void *cookie)
> +{
> +	RTE_SET_USED(cookie);
> +}
> +
> +static const struct sfc_efx_mcdi_ops sfc_vdpa_mcdi_ops = {
> +	.dma_alloc	= sfc_vdpa_mcdi_dma_alloc,
> +	.dma_free	= sfc_vdpa_mcdi_dma_free,
> +	.sched_restart  = sfc_vdpa_mcdi_sched_restart,
> +	.mgmt_evq_poll  = sfc_vdpa_mcdi_mgmt_evq_poll,
> +
> +};
> +
> +int
> +sfc_vdpa_mcdi_init(struct sfc_vdpa_adapter *sva)
> +{
> +	uint32_t logtype;
> +
> +	sfc_vdpa_log_init(sva, "entry");
> +
> +	logtype = sfc_vdpa_register_logtype(&(sva->pdev->addr),
> +					    SFC_VDPA_LOGTYPE_MCDI_STR,
> +					    RTE_LOG_NOTICE);
> +
> +	return sfc_efx_mcdi_init(&sva->mcdi, logtype,
> +				 sva->log_prefix, sva->nic,
> +				 &sfc_vdpa_mcdi_ops, sva);
> +}
> +
> +void
> +sfc_vdpa_mcdi_fini(struct sfc_vdpa_adapter *sva)
> +{
> +	sfc_vdpa_log_init(sva, "entry");
> +	sfc_efx_mcdi_fini(&sva->mcdi);
> +}
> diff --git a/drivers/vdpa/sfc/sfc_vdpa_ops.c b/drivers/vdpa/sfc/sfc_vdpa_ops.c
> new file mode 100644
> index 0000000..71696be
> --- /dev/null
> +++ b/drivers/vdpa/sfc/sfc_vdpa_ops.c
> @@ -0,0 +1,129 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + *
> + * Copyright(c) 2020-2021 Xilinx, Inc.
> + */
> +
> +#include <rte_malloc.h>
> +#include <rte_vdpa.h>
> +#include <rte_vdpa_dev.h>
> +#include <rte_vhost.h>
> +
> +#include "sfc_vdpa_ops.h"
> +#include "sfc_vdpa.h"
> +
> +/* Dummy functions for mandatory vDPA ops to pass vDPA device registration.
> + * In subsequent patches these ops would be implemented.
> + */
> +static int
> +sfc_vdpa_get_queue_num(struct rte_vdpa_device *vdpa_dev, uint32_t *queue_num)
> +{
> +	RTE_SET_USED(vdpa_dev);
> +	RTE_SET_USED(queue_num);
> +
> +	return -1;
> +}
> +
> +static int
> +sfc_vdpa_get_features(struct rte_vdpa_device *vdpa_dev, uint64_t *features)
> +{
> +	RTE_SET_USED(vdpa_dev);
> +	RTE_SET_USED(features);
> +
> +	return -1;
> +}
> +
> +static int
> +sfc_vdpa_get_protocol_features(struct rte_vdpa_device *vdpa_dev,
> +			       uint64_t *features)
> +{
> +	RTE_SET_USED(vdpa_dev);
> +	RTE_SET_USED(features);
> +
> +	return -1;
> +}
> +
> +static int
> +sfc_vdpa_dev_config(int vid)
> +{
> +	RTE_SET_USED(vid);
> +
> +	return -1;
> +}
> +
> +static int
> +sfc_vdpa_dev_close(int vid)
> +{
> +	RTE_SET_USED(vid);
> +
> +	return -1;
> +}
> +
> +static int
> +sfc_vdpa_set_vring_state(int vid, int vring, int state)
> +{
> +	RTE_SET_USED(vid);
> +	RTE_SET_USED(vring);
> +	RTE_SET_USED(state);
> +
> +	return -1;
> +}
> +
> +static int
> +sfc_vdpa_set_features(int vid)
> +{
> +	RTE_SET_USED(vid);
> +
> +	return -1;
> +}
> +
> +static struct rte_vdpa_dev_ops sfc_vdpa_ops = {
> +	.get_queue_num = sfc_vdpa_get_queue_num,
> +	.get_features = sfc_vdpa_get_features,
> +	.get_protocol_features = sfc_vdpa_get_protocol_features,
> +	.dev_conf = sfc_vdpa_dev_config,
> +	.dev_close = sfc_vdpa_dev_close,
> +	.set_vring_state = sfc_vdpa_set_vring_state,
> +	.set_features = sfc_vdpa_set_features,
> +};
> +
> +struct sfc_vdpa_ops_data *
> +sfc_vdpa_device_init(void *dev_handle, enum sfc_vdpa_context context)
> +{
> +	struct sfc_vdpa_ops_data *ops_data;
> +	struct rte_pci_device *pci_dev;
> +
> +	/* Create vDPA ops context */
> +	ops_data = rte_zmalloc("vdpa", sizeof(struct sfc_vdpa_ops_data), 0);
> +	if (ops_data == NULL)
> +		return NULL;
> +
> +	ops_data->vdpa_context = context;
> +	ops_data->dev_handle = dev_handle;
> +
> +	pci_dev = sfc_vdpa_adapter_by_dev_handle(dev_handle)->pdev;
> +
> +	/* Register vDPA Device */
> +	sfc_vdpa_log_init(dev_handle, "register vDPA device");
> +	ops_data->vdpa_dev =
> +		rte_vdpa_register_device(&pci_dev->device, &sfc_vdpa_ops);
> +	if (ops_data->vdpa_dev == NULL) {
> +		sfc_vdpa_err(dev_handle, "vDPA device registration failed");
> +		goto fail_register_device;
> +	}
> +
> +	ops_data->state = SFC_VDPA_STATE_INITIALIZED;
> +
> +	return ops_data;
> +
> +fail_register_device:
> +	rte_free(ops_data);
> +	return NULL;
> +}
> +
> +void
> +sfc_vdpa_device_fini(struct sfc_vdpa_ops_data *ops_data)
> +{
> +	rte_vdpa_unregister_device(ops_data->vdpa_dev);
> +
> +	rte_free(ops_data);
> +}
> diff --git a/drivers/vdpa/sfc/sfc_vdpa_ops.h b/drivers/vdpa/sfc/sfc_vdpa_ops.h
> new file mode 100644
> index 0000000..817b302
> --- /dev/null
> +++ b/drivers/vdpa/sfc/sfc_vdpa_ops.h
> @@ -0,0 +1,36 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + *
> + * Copyright(c) 2020-2021 Xilinx, Inc.
> + */
> +
> +#ifndef _SFC_VDPA_OPS_H
> +#define _SFC_VDPA_OPS_H
> +
> +#include <rte_vdpa.h>
> +
> +#define SFC_VDPA_MAX_QUEUE_PAIRS		1
> +
> +enum sfc_vdpa_context {
> +	SFC_VDPA_AS_PF = 0,

If you don't use SFC_VDPA_AS_PF for this version of driver, why
not introduce it when you need it?

> +	SFC_VDPA_AS_VF
> +};
> +
> +enum sfc_vdpa_state {
> +	SFC_VDPA_STATE_UNINITIALIZED = 0,
> +	SFC_VDPA_STATE_INITIALIZED,
> +	SFC_VDPA_STATE_NSTATES

Similar comment: if you don't need info like this 'number of states',
you can just remove it.

Thanks,
Chenbo

> +};
> +
> +struct sfc_vdpa_ops_data {
> +	void				*dev_handle;
> +	struct rte_vdpa_device		*vdpa_dev;
> +	enum sfc_vdpa_context		vdpa_context;
> +	enum sfc_vdpa_state		state;
> +};
> +
> +struct sfc_vdpa_ops_data *
> +sfc_vdpa_device_init(void *adapter, enum sfc_vdpa_context context);
> +void
> +sfc_vdpa_device_fini(struct sfc_vdpa_ops_data *ops_data);
> +
> +#endif /* _SFC_VDPA_OPS_H */
> --
> 1.8.3.1
  
Vijay Kumar Srivastava Nov. 2, 2021, 4:38 a.m. UTC | #4
Hi Chenbo, 

>-----Original Message-----
>From: Xia, Chenbo <chenbo.xia@intel.com>
>Sent: Monday, November 1, 2021 5:19 PM
>To: Vijay Kumar Srivastava <vsrivast@xilinx.com>; dev@dpdk.org
>Cc: maxime.coquelin@redhat.com; andrew.rybchenko@oktetlabs.ru; Vijay
>Kumar Srivastava <vsrivast@xilinx.com>
>Subject: RE: [PATCH v3 02/10] vdpa/sfc: add support for device initialization
>
>Hi Vijay,
>
>> -----Original Message-----
>> From: Vijay Srivastava <vijay.srivastava@xilinx.com>
>> Sent: Friday, October 29, 2021 10:47 PM
>> To: dev@dpdk.org
>> Cc: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>;
>> andrew.rybchenko@oktetlabs.ru; Vijay Kumar Srivastava
>> <vsrivast@xilinx.com>
>> Subject: [PATCH v3 02/10] vdpa/sfc: add support for device
>> initialization
>>
>> From: Vijay Kumar Srivastava <vsrivast@xilinx.com>
>>
>> Add HW initialization and vDPA device registration support.
>>
>> Signed-off-by: Vijay Kumar Srivastava <vsrivast@xilinx.com>
>> Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
>> ---
[SNIP]
>> +
>> +	do {
>> +		ret = rte_vfio_container_dma_map(sva->vfio_container_fd,
>> +						 (uint64_t)mz->addr,
>mcdi_iova,
>> +						 mcdi_buff_size);
>> +		if (ret == 0)
>> +			break;
>> +
>> +		mcdi_iova = mcdi_iova >> 1;
>> +		if (mcdi_iova < mcdi_buff_size)	{
>> +			sfc_vdpa_err(sva,
>> +				     "DMA mapping failed for MCDI : %s",
>> +				     rte_strerror(rte_errno));
>> +			rte_memzone_free(mz);
>> +			return ret;
>> +		}
>> +
>> +	} while (ret < 0);
>
>So when QEMU iova and mcdi_iova conflicts, you just let vdpa dev failed to
>configure, right?
>
>Why not use re-mapping mcdi dma region as the solution? Any side-effect?
>Or you just assume conflict can hardly happen?

MCDI configuration is being done at the very early point of initialization.
Conflict would be detected later when rte_vhost_get_mem_table() would be invoked in .dev_conf callback and
then MCDI re-mapping can be done in case of conflict, for this a patch is in progress which would be submitted separately.  

Regards,
Vijay

[SNIP]
  
Chenbo Xia Nov. 2, 2021, 5:16 a.m. UTC | #5
> -----Original Message-----
> From: Vijay Kumar Srivastava <vsrivast@xilinx.com>
> Sent: Tuesday, November 2, 2021 12:38 PM
> To: Xia, Chenbo <chenbo.xia@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; andrew.rybchenko@oktetlabs.ru; Praveen Kumar
> Jain <praveenj@xilinx.com>
> Subject: RE: [PATCH v3 02/10] vdpa/sfc: add support for device initialization
> 
> Hi Chenbo,
> 
> >-----Original Message-----
> >From: Xia, Chenbo <chenbo.xia@intel.com>
> >Sent: Monday, November 1, 2021 5:19 PM
> >To: Vijay Kumar Srivastava <vsrivast@xilinx.com>; dev@dpdk.org
> >Cc: maxime.coquelin@redhat.com; andrew.rybchenko@oktetlabs.ru; Vijay
> >Kumar Srivastava <vsrivast@xilinx.com>
> >Subject: RE: [PATCH v3 02/10] vdpa/sfc: add support for device initialization
> >
> >Hi Vijay,
> >
> >> -----Original Message-----
> >> From: Vijay Srivastava <vijay.srivastava@xilinx.com>
> >> Sent: Friday, October 29, 2021 10:47 PM
> >> To: dev@dpdk.org
> >> Cc: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>;
> >> andrew.rybchenko@oktetlabs.ru; Vijay Kumar Srivastava
> >> <vsrivast@xilinx.com>
> >> Subject: [PATCH v3 02/10] vdpa/sfc: add support for device
> >> initialization
> >>
> >> From: Vijay Kumar Srivastava <vsrivast@xilinx.com>
> >>
> >> Add HW initialization and vDPA device registration support.
> >>
> >> Signed-off-by: Vijay Kumar Srivastava <vsrivast@xilinx.com>
> >> Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
> >> ---
> [SNIP]
> >> +
> >> +	do {
> >> +		ret = rte_vfio_container_dma_map(sva->vfio_container_fd,
> >> +						 (uint64_t)mz->addr,
> >mcdi_iova,
> >> +						 mcdi_buff_size);
> >> +		if (ret == 0)
> >> +			break;
> >> +
> >> +		mcdi_iova = mcdi_iova >> 1;
> >> +		if (mcdi_iova < mcdi_buff_size)	{
> >> +			sfc_vdpa_err(sva,
> >> +				     "DMA mapping failed for MCDI : %s",
> >> +				     rte_strerror(rte_errno));
> >> +			rte_memzone_free(mz);
> >> +			return ret;
> >> +		}
> >> +
> >> +	} while (ret < 0);
> >
> >So when QEMU iova and mcdi_iova conflicts, you just let vdpa dev failed to
> >configure, right?
> >
> >Why not use re-mapping mcdi dma region as the solution? Any side-effect?
> >Or you just assume conflict can hardly happen?
> 
> MCDI configuration is being done at the very early point of initialization.
> Conflict would be detected later when rte_vhost_get_mem_table() would be
> invoked in .dev_conf callback and
> then MCDI re-mapping can be done in case of conflict,

Agree. It should be done in dev_conf callback.

> for this a patch is in
> progress which would be submitted separately.

OK for me, as the initial version, you can just let dev_conf fail if conflict
happens.

/Chenbo

> 
> Regards,
> Vijay
> 
> [SNIP]
  
Vijay Kumar Srivastava Nov. 2, 2021, 7:42 a.m. UTC | #6
Hi Chenbo, 

>-----Original Message-----
>From: Xia, Chenbo <chenbo.xia@intel.com>
>Sent: Monday, November 1, 2021 5:19 PM
>To: Vijay Kumar Srivastava <vsrivast@xilinx.com>; dev@dpdk.org
>Cc: maxime.coquelin@redhat.com; andrew.rybchenko@oktetlabs.ru; Vijay
>Kumar Srivastava <vsrivast@xilinx.com>
>Subject: RE: [PATCH v3 02/10] vdpa/sfc: add support for device initialization
>
>Hi Vijay,
>
>> -----Original Message-----
>> From: Vijay Srivastava <vijay.srivastava@xilinx.com>
>> Sent: Friday, October 29, 2021 10:47 PM
>> To: dev@dpdk.org
>> Cc: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>;
>> andrew.rybchenko@oktetlabs.ru; Vijay Kumar Srivastava
>> <vsrivast@xilinx.com>
>> Subject: [PATCH v3 02/10] vdpa/sfc: add support for device
>> initialization
>>
>> From: Vijay Kumar Srivastava <vsrivast@xilinx.com>
>>
>> Add HW initialization and vDPA device registration support.
>>
>> Signed-off-by: Vijay Kumar Srivastava <vsrivast@xilinx.com>
>> Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
>> ---
[SNIP]
>> +sfc_vdpa_hw_init(struct sfc_vdpa_adapter *sva) {
>> +	efx_bar_region_t mem_ebr;
>> +	efx_nic_t *enp;
>> +	int rc;
>> +
>> +	sfc_vdpa_log_init(sva, "entry");
>> +
>> +	sfc_vdpa_log_init(sva, "get family");
>> +	rc = sfc_efx_family(sva->pdev, &mem_ebr, &sva->family);
>> +	if (rc != 0)
>> +		goto fail_family;
>> +	sfc_vdpa_log_init(sva,
>> +			  "family is %u, membar is %u,"
>> +			  "function control window offset is %#" PRIx64,
>> +			  sva->family, mem_ebr.ebr_index,
>mem_ebr.ebr_offset);
>
>If ebr_idx is int, then %u -> %d. But if it's a bar index, its type should be
>unsigned int and you should change the definition in sfc common code.
Yes. It’s BAR index. 
Thanks for the catch. I agree that usage of 'unsigned int' looks better
for BAR index, however it should result in more changes in similar
places. Is it OK if we use %d here right now to be consistent with
current type and address the review note in a follow up patches
for common/sfc_efx, net/sfc and vdpa/sfc?
  
Chenbo Xia Nov. 2, 2021, 7:50 a.m. UTC | #7
> -----Original Message-----
> From: Vijay Kumar Srivastava <vsrivast@xilinx.com>
> Sent: Tuesday, November 2, 2021 3:43 PM
> To: Xia, Chenbo <chenbo.xia@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; andrew.rybchenko@oktetlabs.ru; Praveen Kumar
> Jain <praveenj@xilinx.com>; Harpreet Singh Anand <hanand@xilinx.com>
> Subject: RE: [PATCH v3 02/10] vdpa/sfc: add support for device initialization
> 
> Hi Chenbo,
> 
> >-----Original Message-----
> >From: Xia, Chenbo <chenbo.xia@intel.com>
> >Sent: Monday, November 1, 2021 5:19 PM
> >To: Vijay Kumar Srivastava <vsrivast@xilinx.com>; dev@dpdk.org
> >Cc: maxime.coquelin@redhat.com; andrew.rybchenko@oktetlabs.ru; Vijay
> >Kumar Srivastava <vsrivast@xilinx.com>
> >Subject: RE: [PATCH v3 02/10] vdpa/sfc: add support for device initialization
> >
> >Hi Vijay,
> >
> >> -----Original Message-----
> >> From: Vijay Srivastava <vijay.srivastava@xilinx.com>
> >> Sent: Friday, October 29, 2021 10:47 PM
> >> To: dev@dpdk.org
> >> Cc: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>;
> >> andrew.rybchenko@oktetlabs.ru; Vijay Kumar Srivastava
> >> <vsrivast@xilinx.com>
> >> Subject: [PATCH v3 02/10] vdpa/sfc: add support for device
> >> initialization
> >>
> >> From: Vijay Kumar Srivastava <vsrivast@xilinx.com>
> >>
> >> Add HW initialization and vDPA device registration support.
> >>
> >> Signed-off-by: Vijay Kumar Srivastava <vsrivast@xilinx.com>
> >> Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
> >> ---
> [SNIP]
> >> +sfc_vdpa_hw_init(struct sfc_vdpa_adapter *sva) {
> >> +	efx_bar_region_t mem_ebr;
> >> +	efx_nic_t *enp;
> >> +	int rc;
> >> +
> >> +	sfc_vdpa_log_init(sva, "entry");
> >> +
> >> +	sfc_vdpa_log_init(sva, "get family");
> >> +	rc = sfc_efx_family(sva->pdev, &mem_ebr, &sva->family);
> >> +	if (rc != 0)
> >> +		goto fail_family;
> >> +	sfc_vdpa_log_init(sva,
> >> +			  "family is %u, membar is %u,"
> >> +			  "function control window offset is %#" PRIx64,
> >> +			  sva->family, mem_ebr.ebr_index,
> >mem_ebr.ebr_offset);
> >
> >If ebr_idx is int, then %u -> %d. But if it's a bar index, its type should be
> >unsigned int and you should change the definition in sfc common code.
> Yes. It’s BAR index.
> Thanks for the catch. I agree that usage of 'unsigned int' looks better
> for BAR index, however it should result in more changes in similar
> places. Is it OK if we use %d here right now to be consistent with
> current type and address the review note in a follow up patches
> for common/sfc_efx, net/sfc and vdpa/sfc?

Sure. Ok for me.

/Chenbo
  
Vijay Kumar Srivastava Nov. 2, 2021, 9:50 a.m. UTC | #8
Hi Chenbo,

>-----Original Message-----
>From: Xia, Chenbo <chenbo.xia@intel.com>
>Sent: Tuesday, November 2, 2021 10:47 AM
>To: Vijay Kumar Srivastava <vsrivast@xilinx.com>; dev@dpdk.org
>Cc: maxime.coquelin@redhat.com; andrew.rybchenko@oktetlabs.ru; Praveen
>Kumar Jain <praveenj@xilinx.com>
>Subject: RE: [PATCH v3 02/10] vdpa/sfc: add support for device initialization
>
>> -----Original Message-----
>> From: Vijay Kumar Srivastava <vsrivast@xilinx.com>
>> Sent: Tuesday, November 2, 2021 12:38 PM
>> To: Xia, Chenbo <chenbo.xia@intel.com>; dev@dpdk.org
>> Cc: maxime.coquelin@redhat.com; andrew.rybchenko@oktetlabs.ru; Praveen
>> Kumar Jain <praveenj@xilinx.com>
>> Subject: RE: [PATCH v3 02/10] vdpa/sfc: add support for device
>> initialization
>>
>> Hi Chenbo,
>>
>> >-----Original Message-----
>> >From: Xia, Chenbo <chenbo.xia@intel.com>
>> >Sent: Monday, November 1, 2021 5:19 PM
>> >To: Vijay Kumar Srivastava <vsrivast@xilinx.com>; dev@dpdk.org
>> >Cc: maxime.coquelin@redhat.com; andrew.rybchenko@oktetlabs.ru; Vijay
>> >Kumar Srivastava <vsrivast@xilinx.com>
>> >Subject: RE: [PATCH v3 02/10] vdpa/sfc: add support for device
>> >initialization
>> >
>> >Hi Vijay,
>> >
>> >> -----Original Message-----
>> >> From: Vijay Srivastava <vijay.srivastava@xilinx.com>
>> >> Sent: Friday, October 29, 2021 10:47 PM
>> >> To: dev@dpdk.org
>> >> Cc: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>;
>> >> andrew.rybchenko@oktetlabs.ru; Vijay Kumar Srivastava
>> >> <vsrivast@xilinx.com>
>> >> Subject: [PATCH v3 02/10] vdpa/sfc: add support for device
>> >> initialization
>> >>
>> >> From: Vijay Kumar Srivastava <vsrivast@xilinx.com>
>> >>
>> >> Add HW initialization and vDPA device registration support.
>> >>
>> >> Signed-off-by: Vijay Kumar Srivastava <vsrivast@xilinx.com>
>> >> Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
>> >> ---
>> [SNIP]
>> >> +
>> >> +	do {
>> >> +		ret = rte_vfio_container_dma_map(sva->vfio_container_fd,
>> >> +						 (uint64_t)mz->addr,
>> >mcdi_iova,
>> >> +						 mcdi_buff_size);
>> >> +		if (ret == 0)
>> >> +			break;
>> >> +
>> >> +		mcdi_iova = mcdi_iova >> 1;
>> >> +		if (mcdi_iova < mcdi_buff_size)	{
>> >> +			sfc_vdpa_err(sva,
>> >> +				     "DMA mapping failed for MCDI : %s",
>> >> +				     rte_strerror(rte_errno));
>> >> +			rte_memzone_free(mz);
>> >> +			return ret;
>> >> +		}
>> >> +
>> >> +	} while (ret < 0);
>> >
>> >So when QEMU iova and mcdi_iova conflicts, you just let vdpa dev
>> >failed to configure, right?
>> >
>> >Why not use re-mapping mcdi dma region as the solution? Any side-effect?
>> >Or you just assume conflict can hardly happen?
>>
>> MCDI configuration is being done at the very early point of initialization.
>> Conflict would be detected later when rte_vhost_get_mem_table() would
>> be invoked in .dev_conf callback and then MCDI re-mapping can be done
>> in case of conflict,
>
>Agree. It should be done in dev_conf callback.
>
>> for this a patch is in
>> progress which would be submitted separately.
>
>OK for me, as the initial version, you can just let dev_conf fail if conflict
>happens.
Yes. In case of conflict dev_conf would fail. 

Regards,
Vijay

>> [SNIP]
  

Patch

diff --git a/doc/guides/vdpadevs/sfc.rst b/doc/guides/vdpadevs/sfc.rst
index 44e694f..d06c427 100644
--- a/doc/guides/vdpadevs/sfc.rst
+++ b/doc/guides/vdpadevs/sfc.rst
@@ -95,3 +95,9 @@  SFC vDPA PMD provides the following log types available for control:
   Matches a subset of per-port log types registered during runtime.
   A full name for a particular type may be obtained by appending a
   dot and a PCI device identifier (``XXXX:XX:XX.X``) to the prefix.
+
+- ``pmd.vdpa.sfc.mcdi`` (default level is **notice**)
+
+  Extra logging of the communication with the NIC's management CPU.
+  The format of the log is consumed by the netlogdecode cross-platform
+  tool. May be managed per-port, as explained above.
diff --git a/drivers/vdpa/sfc/meson.build b/drivers/vdpa/sfc/meson.build
index 4255d65..dc333de 100644
--- a/drivers/vdpa/sfc/meson.build
+++ b/drivers/vdpa/sfc/meson.build
@@ -19,4 +19,7 @@  endforeach
 deps += ['common_sfc_efx', 'bus_pci']
 sources = files(
 	'sfc_vdpa.c',
+	'sfc_vdpa_hw.c',
+	'sfc_vdpa_mcdi.c',
+	'sfc_vdpa_ops.c',
 )
diff --git a/drivers/vdpa/sfc/sfc_vdpa.c b/drivers/vdpa/sfc/sfc_vdpa.c
index d85c52b..b7eca56 100644
--- a/drivers/vdpa/sfc/sfc_vdpa.c
+++ b/drivers/vdpa/sfc/sfc_vdpa.c
@@ -232,6 +232,19 @@  struct sfc_vdpa_adapter *
 		goto fail_vfio_setup;
 	}
 
+	sfc_vdpa_log_init(sva, "hw init");
+	if (sfc_vdpa_hw_init(sva) != 0) {
+		sfc_vdpa_err(sva, "failed to init HW %s", pci_dev->name);
+		goto fail_hw_init;
+	}
+
+	sfc_vdpa_log_init(sva, "dev init");
+	sva->ops_data = sfc_vdpa_device_init(sva, SFC_VDPA_AS_VF);
+	if (sva->ops_data == NULL) {
+		sfc_vdpa_err(sva, "failed vDPA dev init %s", pci_dev->name);
+		goto fail_dev_init;
+	}
+
 	pthread_mutex_lock(&sfc_vdpa_adapter_list_lock);
 	TAILQ_INSERT_TAIL(&sfc_vdpa_adapter_list, sva, next);
 	pthread_mutex_unlock(&sfc_vdpa_adapter_list_lock);
@@ -240,6 +253,12 @@  struct sfc_vdpa_adapter *
 
 	return 0;
 
+fail_dev_init:
+	sfc_vdpa_hw_fini(sva);
+
+fail_hw_init:
+	sfc_vdpa_vfio_teardown(sva);
+
 fail_vfio_setup:
 fail_set_log_prefix:
 	rte_free(sva);
@@ -266,6 +285,10 @@  struct sfc_vdpa_adapter *
 	TAILQ_REMOVE(&sfc_vdpa_adapter_list, sva, next);
 	pthread_mutex_unlock(&sfc_vdpa_adapter_list_lock);
 
+	sfc_vdpa_device_fini(sva->ops_data);
+
+	sfc_vdpa_hw_fini(sva);
+
 	sfc_vdpa_vfio_teardown(sva);
 
 	rte_free(sva);
diff --git a/drivers/vdpa/sfc/sfc_vdpa.h b/drivers/vdpa/sfc/sfc_vdpa.h
index 3b77900..046f25d 100644
--- a/drivers/vdpa/sfc/sfc_vdpa.h
+++ b/drivers/vdpa/sfc/sfc_vdpa.h
@@ -11,14 +11,38 @@ 
 
 #include <rte_bus_pci.h>
 
+#include "sfc_efx.h"
+#include "sfc_efx_mcdi.h"
+#include "sfc_vdpa_debug.h"
 #include "sfc_vdpa_log.h"
+#include "sfc_vdpa_ops.h"
+
+#define SFC_VDPA_DEFAULT_MCDI_IOVA		0x200000000000
 
 /* Adapter private data */
 struct sfc_vdpa_adapter {
 	TAILQ_ENTRY(sfc_vdpa_adapter)	next;
+	/*
+	 * PMD setup and configuration is not thread safe. Since it is not
+	 * performance sensitive, it is better to guarantee thread-safety
+	 * and add device level lock. vDPA control operations which
+	 * change its state should acquire the lock.
+	 */
+	rte_spinlock_t			lock;
 	struct rte_pci_device		*pdev;
 	struct rte_pci_addr		pci_addr;
 
+	efx_family_t			family;
+	efx_nic_t			*nic;
+	rte_spinlock_t			nic_lock;
+
+	efsys_bar_t			mem_bar;
+
+	struct sfc_efx_mcdi		mcdi;
+	size_t				mcdi_buff_size;
+
+	uint32_t			max_queue_count;
+
 	char				log_prefix[SFC_VDPA_LOG_PREFIX_MAX];
 	uint32_t			logtype_main;
 
@@ -26,6 +50,7 @@  struct sfc_vdpa_adapter {
 	int				vfio_dev_fd;
 	int				vfio_container_fd;
 	int				iommu_group_num;
+	struct sfc_vdpa_ops_data	*ops_data;
 };
 
 uint32_t
@@ -36,5 +61,27 @@  struct sfc_vdpa_adapter {
 struct sfc_vdpa_adapter *
 sfc_vdpa_get_adapter_by_dev(struct rte_pci_device *pdev);
 
-#endif  /* _SFC_VDPA_H */
+int
+sfc_vdpa_hw_init(struct sfc_vdpa_adapter *sva);
+void
+sfc_vdpa_hw_fini(struct sfc_vdpa_adapter *sva);
 
+int
+sfc_vdpa_mcdi_init(struct sfc_vdpa_adapter *sva);
+void
+sfc_vdpa_mcdi_fini(struct sfc_vdpa_adapter *sva);
+
+int
+sfc_vdpa_dma_alloc(struct sfc_vdpa_adapter *sva, const char *name,
+		   size_t len, efsys_mem_t *esmp);
+
+void
+sfc_vdpa_dma_free(struct sfc_vdpa_adapter *sva, efsys_mem_t *esmp);
+
+static inline struct sfc_vdpa_adapter *
+sfc_vdpa_adapter_by_dev_handle(void *dev_handle)
+{
+	return (struct sfc_vdpa_adapter *)dev_handle;
+}
+
+#endif  /* _SFC_VDPA_H */
diff --git a/drivers/vdpa/sfc/sfc_vdpa_debug.h b/drivers/vdpa/sfc/sfc_vdpa_debug.h
new file mode 100644
index 0000000..cfa8cc5
--- /dev/null
+++ b/drivers/vdpa/sfc/sfc_vdpa_debug.h
@@ -0,0 +1,21 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright(c) 2020-2021 Xilinx, Inc.
+ */
+
+#ifndef _SFC_VDPA_DEBUG_H_
+#define _SFC_VDPA_DEBUG_H_
+
+#include <rte_debug.h>
+
+#ifdef RTE_LIBRTE_SFC_VDPA_DEBUG
+/* Avoid dependency from RTE_LOG_DP_LEVEL to be able to enable debug check
+ * in the driver only.
+ */
+#define SFC_VDPA_ASSERT(exp)			RTE_VERIFY(exp)
+#else
+/* If the driver debug is not enabled, follow DPDK debug/non-debug */
+#define SFC_VDPA_ASSERT(exp)			RTE_ASSERT(exp)
+#endif
+
+#endif /* _SFC_VDPA_DEBUG_H_ */
diff --git a/drivers/vdpa/sfc/sfc_vdpa_hw.c b/drivers/vdpa/sfc/sfc_vdpa_hw.c
new file mode 100644
index 0000000..7c256ff
--- /dev/null
+++ b/drivers/vdpa/sfc/sfc_vdpa_hw.c
@@ -0,0 +1,327 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright(c) 2020-2021 Xilinx, Inc.
+ */
+
+#include <unistd.h>
+
+#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_vfio.h>
+
+#include "efx.h"
+#include "sfc_vdpa.h"
+#include "sfc_vdpa_ops.h"
+
+extern uint32_t sfc_logtype_driver;
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE   (sysconf(_SC_PAGESIZE))
+#endif
+
+int
+sfc_vdpa_dma_alloc(struct sfc_vdpa_adapter *sva, const char *name,
+		   size_t len, efsys_mem_t *esmp)
+{
+	uint64_t mcdi_iova;
+	size_t mcdi_buff_size;
+	const struct rte_memzone *mz = NULL;
+	int numa_node = sva->pdev->device.numa_node;
+	int ret;
+
+	mcdi_buff_size = RTE_ALIGN_CEIL(len, PAGE_SIZE);
+
+	sfc_vdpa_log_init(sva, "name=%s, len=%zu", name, len);
+
+	mz = rte_memzone_reserve_aligned(name, mcdi_buff_size,
+					 numa_node,
+					 RTE_MEMZONE_IOVA_CONTIG,
+					 PAGE_SIZE);
+	if (mz == NULL) {
+		sfc_vdpa_err(sva, "cannot reserve memory for %s: len=%#x: %s",
+			     name, (unsigned int)len, rte_strerror(rte_errno));
+		return -ENOMEM;
+	}
+
+	/* IOVA address for MCDI would be re-calculated if mapping
+	 * using default IOVA would fail.
+	 * TODO: Earlier there was no way to get valid IOVA range.
+	 * Recently a patch has been submitted to get the IOVA range
+	 * using ioctl. VFIO_IOMMU_GET_INFO. This patch is available
+	 * in the kernel version >= 5.4. Support to get the default
+	 * IOVA address for MCDI buffer using available IOVA range
+	 * would be added later. Meanwhile default IOVA for MCDI buffer
+	 * is kept at high mem at 2TB. In case of overlap new available
+	 * addresses would be searched and same would be used.
+	 */
+	mcdi_iova = SFC_VDPA_DEFAULT_MCDI_IOVA;
+
+	do {
+		ret = rte_vfio_container_dma_map(sva->vfio_container_fd,
+						 (uint64_t)mz->addr, mcdi_iova,
+						 mcdi_buff_size);
+		if (ret == 0)
+			break;
+
+		mcdi_iova = mcdi_iova >> 1;
+		if (mcdi_iova < mcdi_buff_size)	{
+			sfc_vdpa_err(sva,
+				     "DMA mapping failed for MCDI : %s",
+				     rte_strerror(rte_errno));
+			rte_memzone_free(mz);
+			return ret;
+		}
+
+	} while (ret < 0);
+
+	esmp->esm_addr = mcdi_iova;
+	esmp->esm_base = mz->addr;
+	sva->mcdi_buff_size = mcdi_buff_size;
+
+	sfc_vdpa_info(sva,
+		      "DMA name=%s len=%zu => virt=%p iova=%" PRIx64,
+		      name, len, esmp->esm_base, esmp->esm_addr);
+
+	return 0;
+}
+
+void
+sfc_vdpa_dma_free(struct sfc_vdpa_adapter *sva, efsys_mem_t *esmp)
+{
+	int ret;
+
+	sfc_vdpa_log_init(sva, "name=%s", esmp->esm_mz->name);
+
+	ret = rte_vfio_container_dma_unmap(sva->vfio_container_fd,
+					   (uint64_t)esmp->esm_base,
+					   esmp->esm_addr, sva->mcdi_buff_size);
+	if (ret < 0)
+		sfc_vdpa_err(sva, "DMA unmap failed for MCDI : %s",
+			     rte_strerror(rte_errno));
+
+	sfc_vdpa_info(sva,
+		      "DMA free name=%s => virt=%p iova=%" PRIx64,
+		      esmp->esm_mz->name, esmp->esm_base, esmp->esm_addr);
+
+	rte_free((void *)(esmp->esm_base));
+
+	sva->mcdi_buff_size = 0;
+	memset(esmp, 0, sizeof(*esmp));
+}
+
+static int
+sfc_vdpa_mem_bar_init(struct sfc_vdpa_adapter *sva,
+		      const efx_bar_region_t *mem_ebrp)
+{
+	struct rte_pci_device *pci_dev = sva->pdev;
+	efsys_bar_t *ebp = &sva->mem_bar;
+	struct rte_mem_resource *res =
+		&pci_dev->mem_resource[mem_ebrp->ebr_index];
+
+	SFC_BAR_LOCK_INIT(ebp, pci_dev->name);
+	ebp->esb_rid = mem_ebrp->ebr_index;
+	ebp->esb_dev = pci_dev;
+	ebp->esb_base = res->addr;
+
+	return 0;
+}
+
+static void
+sfc_vdpa_mem_bar_fini(struct sfc_vdpa_adapter *sva)
+{
+	efsys_bar_t *ebp = &sva->mem_bar;
+
+	SFC_BAR_LOCK_DESTROY(ebp);
+	memset(ebp, 0, sizeof(*ebp));
+}
+
+static int
+sfc_vdpa_nic_probe(struct sfc_vdpa_adapter *sva)
+{
+	efx_nic_t *enp = sva->nic;
+	int rc;
+
+	rc = efx_nic_probe(enp, EFX_FW_VARIANT_DONT_CARE);
+	if (rc != 0)
+		sfc_vdpa_err(sva, "nic probe failed: %s", rte_strerror(rc));
+
+	return rc;
+}
+
+static int
+sfc_vdpa_estimate_resource_limits(struct sfc_vdpa_adapter *sva)
+{
+	efx_drv_limits_t limits;
+	int rc;
+	uint32_t evq_allocated;
+	uint32_t rxq_allocated;
+	uint32_t txq_allocated;
+	uint32_t max_queue_cnt;
+
+	memset(&limits, 0, sizeof(limits));
+
+	/* Request at least one Rx and Tx queue */
+	limits.edl_min_rxq_count = 1;
+	limits.edl_min_txq_count = 1;
+	/* Management event queue plus event queue for Tx/Rx queue */
+	limits.edl_min_evq_count =
+		1 + RTE_MAX(limits.edl_min_rxq_count, limits.edl_min_txq_count);
+
+	limits.edl_max_rxq_count = SFC_VDPA_MAX_QUEUE_PAIRS;
+	limits.edl_max_txq_count = SFC_VDPA_MAX_QUEUE_PAIRS;
+	limits.edl_max_evq_count = 1 + SFC_VDPA_MAX_QUEUE_PAIRS;
+
+	SFC_VDPA_ASSERT(limits.edl_max_evq_count >= limits.edl_min_rxq_count);
+	SFC_VDPA_ASSERT(limits.edl_max_rxq_count >= limits.edl_min_rxq_count);
+	SFC_VDPA_ASSERT(limits.edl_max_txq_count >= limits.edl_min_rxq_count);
+
+	/* Configure the minimum required resources needed for the
+	 * driver to operate, and the maximum desired resources that the
+	 * driver is capable of using.
+	 */
+	sfc_vdpa_log_init(sva, "set drv limit");
+	efx_nic_set_drv_limits(sva->nic, &limits);
+
+	sfc_vdpa_log_init(sva, "init nic");
+	rc = efx_nic_init(sva->nic);
+	if (rc != 0) {
+		sfc_vdpa_err(sva, "nic init failed: %s", rte_strerror(rc));
+		goto fail_nic_init;
+	}
+
+	/* Find resource dimensions assigned by firmware to this function */
+	rc = efx_nic_get_vi_pool(sva->nic, &evq_allocated, &rxq_allocated,
+				 &txq_allocated);
+	if (rc != 0) {
+		sfc_vdpa_err(sva, "vi pool get failed: %s", rte_strerror(rc));
+		goto fail_get_vi_pool;
+	}
+
+	/* It still may allocate more than maximum, ensure limit */
+	evq_allocated = RTE_MIN(evq_allocated, limits.edl_max_evq_count);
+	rxq_allocated = RTE_MIN(rxq_allocated, limits.edl_max_rxq_count);
+	txq_allocated = RTE_MIN(txq_allocated, limits.edl_max_txq_count);
+
+
+	max_queue_cnt = RTE_MIN(rxq_allocated, txq_allocated);
+	/* Subtract management EVQ not used for traffic */
+	max_queue_cnt = RTE_MIN(evq_allocated - 1, max_queue_cnt);
+
+	SFC_VDPA_ASSERT(max_queue_cnt > 0);
+
+	sva->max_queue_count = max_queue_cnt;
+
+	return 0;
+
+fail_get_vi_pool:
+	efx_nic_fini(sva->nic);
+fail_nic_init:
+	sfc_vdpa_log_init(sva, "failed: %s", rte_strerror(rc));
+	return rc;
+}
+
+int
+sfc_vdpa_hw_init(struct sfc_vdpa_adapter *sva)
+{
+	efx_bar_region_t mem_ebr;
+	efx_nic_t *enp;
+	int rc;
+
+	sfc_vdpa_log_init(sva, "entry");
+
+	sfc_vdpa_log_init(sva, "get family");
+	rc = sfc_efx_family(sva->pdev, &mem_ebr, &sva->family);
+	if (rc != 0)
+		goto fail_family;
+	sfc_vdpa_log_init(sva,
+			  "family is %u, membar is %u,"
+			  "function control window offset is %#" PRIx64,
+			  sva->family, mem_ebr.ebr_index, mem_ebr.ebr_offset);
+
+	sfc_vdpa_log_init(sva, "init mem bar");
+	rc = sfc_vdpa_mem_bar_init(sva, &mem_ebr);
+	if (rc != 0)
+		goto fail_mem_bar_init;
+
+	sfc_vdpa_log_init(sva, "create nic");
+	rte_spinlock_init(&sva->nic_lock);
+	rc = efx_nic_create(sva->family, (efsys_identifier_t *)sva,
+			    &sva->mem_bar, mem_ebr.ebr_offset,
+			    &sva->nic_lock, &enp);
+	if (rc != 0) {
+		sfc_vdpa_err(sva, "nic create failed: %s", rte_strerror(rc));
+		goto fail_nic_create;
+	}
+	sva->nic = enp;
+
+	sfc_vdpa_log_init(sva, "init mcdi");
+	rc = sfc_vdpa_mcdi_init(sva);
+	if (rc != 0) {
+		sfc_vdpa_err(sva, "mcdi init failed: %s", rte_strerror(rc));
+		goto fail_mcdi_init;
+	}
+
+	sfc_vdpa_log_init(sva, "probe nic");
+	rc = sfc_vdpa_nic_probe(sva);
+	if (rc != 0)
+		goto fail_nic_probe;
+
+	sfc_vdpa_log_init(sva, "reset nic");
+	rc = efx_nic_reset(enp);
+	if (rc != 0) {
+		sfc_vdpa_err(sva, "nic reset failed: %s", rte_strerror(rc));
+		goto fail_nic_reset;
+	}
+
+	sfc_vdpa_log_init(sva, "estimate resource limits");
+	rc = sfc_vdpa_estimate_resource_limits(sva);
+	if (rc != 0)
+		goto fail_estimate_rsrc_limits;
+
+	sfc_vdpa_log_init(sva, "done");
+
+	return 0;
+
+fail_estimate_rsrc_limits:
+fail_nic_reset:
+	efx_nic_unprobe(enp);
+
+fail_nic_probe:
+	sfc_vdpa_mcdi_fini(sva);
+
+fail_mcdi_init:
+	sfc_vdpa_log_init(sva, "destroy nic");
+	sva->nic = NULL;
+	efx_nic_destroy(enp);
+
+fail_nic_create:
+	sfc_vdpa_mem_bar_fini(sva);
+
+fail_mem_bar_init:
+fail_family:
+	sfc_vdpa_log_init(sva, "failed: %s", rte_strerror(rc));
+	return rc;
+}
+
+void
+sfc_vdpa_hw_fini(struct sfc_vdpa_adapter *sva)
+{
+	efx_nic_t *enp = sva->nic;
+
+	sfc_vdpa_log_init(sva, "entry");
+
+	sfc_vdpa_log_init(sva, "unprobe nic");
+	efx_nic_unprobe(enp);
+
+	sfc_vdpa_log_init(sva, "mcdi fini");
+	sfc_vdpa_mcdi_fini(sva);
+
+	sfc_vdpa_log_init(sva, "nic fini");
+	efx_nic_fini(enp);
+
+	sfc_vdpa_log_init(sva, "destroy nic");
+	sva->nic = NULL;
+	efx_nic_destroy(enp);
+
+	sfc_vdpa_mem_bar_fini(sva);
+}
diff --git a/drivers/vdpa/sfc/sfc_vdpa_log.h b/drivers/vdpa/sfc/sfc_vdpa_log.h
index 858e5ee..4e7a84f 100644
--- a/drivers/vdpa/sfc/sfc_vdpa_log.h
+++ b/drivers/vdpa/sfc/sfc_vdpa_log.h
@@ -21,6 +21,9 @@ 
 /** Name prefix for the per-device log type used to report basic information */
 #define SFC_VDPA_LOGTYPE_MAIN_STR	SFC_VDPA_LOGTYPE_PREFIX "main"
 
+/** Device MCDI log type name prefix */
+#define SFC_VDPA_LOGTYPE_MCDI_STR	SFC_VDPA_LOGTYPE_PREFIX "mcdi"
+
 #define SFC_VDPA_LOG_PREFIX_MAX	32
 
 /* Log PMD message, automatically add prefix and \n */
diff --git a/drivers/vdpa/sfc/sfc_vdpa_mcdi.c b/drivers/vdpa/sfc/sfc_vdpa_mcdi.c
new file mode 100644
index 0000000..961d2d3
--- /dev/null
+++ b/drivers/vdpa/sfc/sfc_vdpa_mcdi.c
@@ -0,0 +1,74 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright(c) 2020-2021 Xilinx, Inc.
+ */
+
+#include "sfc_efx_mcdi.h"
+
+#include "sfc_vdpa.h"
+#include "sfc_vdpa_debug.h"
+#include "sfc_vdpa_log.h"
+
+static sfc_efx_mcdi_dma_alloc_cb sfc_vdpa_mcdi_dma_alloc;
+static int
+sfc_vdpa_mcdi_dma_alloc(void *cookie, const char *name, size_t len,
+			efsys_mem_t *esmp)
+{
+	struct sfc_vdpa_adapter *sva = cookie;
+
+	return sfc_vdpa_dma_alloc(sva, name, len, esmp);
+}
+
+static sfc_efx_mcdi_dma_free_cb sfc_vdpa_mcdi_dma_free;
+static void
+sfc_vdpa_mcdi_dma_free(void *cookie, efsys_mem_t *esmp)
+{
+	struct sfc_vdpa_adapter *sva = cookie;
+
+	sfc_vdpa_dma_free(sva, esmp);
+}
+
+static sfc_efx_mcdi_sched_restart_cb sfc_vdpa_mcdi_sched_restart;
+static void
+sfc_vdpa_mcdi_sched_restart(void *cookie)
+{
+	RTE_SET_USED(cookie);
+}
+
+static sfc_efx_mcdi_mgmt_evq_poll_cb sfc_vdpa_mcdi_mgmt_evq_poll;
+static void
+sfc_vdpa_mcdi_mgmt_evq_poll(void *cookie)
+{
+	RTE_SET_USED(cookie);
+}
+
+static const struct sfc_efx_mcdi_ops sfc_vdpa_mcdi_ops = {
+	.dma_alloc	= sfc_vdpa_mcdi_dma_alloc,
+	.dma_free	= sfc_vdpa_mcdi_dma_free,
+	.sched_restart  = sfc_vdpa_mcdi_sched_restart,
+	.mgmt_evq_poll  = sfc_vdpa_mcdi_mgmt_evq_poll,
+
+};
+
+int
+sfc_vdpa_mcdi_init(struct sfc_vdpa_adapter *sva)
+{
+	uint32_t logtype;
+
+	sfc_vdpa_log_init(sva, "entry");
+
+	logtype = sfc_vdpa_register_logtype(&(sva->pdev->addr),
+					    SFC_VDPA_LOGTYPE_MCDI_STR,
+					    RTE_LOG_NOTICE);
+
+	return sfc_efx_mcdi_init(&sva->mcdi, logtype,
+				 sva->log_prefix, sva->nic,
+				 &sfc_vdpa_mcdi_ops, sva);
+}
+
+void
+sfc_vdpa_mcdi_fini(struct sfc_vdpa_adapter *sva)
+{
+	sfc_vdpa_log_init(sva, "entry");
+	sfc_efx_mcdi_fini(&sva->mcdi);
+}
diff --git a/drivers/vdpa/sfc/sfc_vdpa_ops.c b/drivers/vdpa/sfc/sfc_vdpa_ops.c
new file mode 100644
index 0000000..71696be
--- /dev/null
+++ b/drivers/vdpa/sfc/sfc_vdpa_ops.c
@@ -0,0 +1,129 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright(c) 2020-2021 Xilinx, Inc.
+ */
+
+#include <rte_malloc.h>
+#include <rte_vdpa.h>
+#include <rte_vdpa_dev.h>
+#include <rte_vhost.h>
+
+#include "sfc_vdpa_ops.h"
+#include "sfc_vdpa.h"
+
+/* Dummy functions for mandatory vDPA ops to pass vDPA device registration.
+ * In subsequent patches these ops would be implemented.
+ */
+static int
+sfc_vdpa_get_queue_num(struct rte_vdpa_device *vdpa_dev, uint32_t *queue_num)
+{
+	RTE_SET_USED(vdpa_dev);
+	RTE_SET_USED(queue_num);
+
+	return -1;
+}
+
+static int
+sfc_vdpa_get_features(struct rte_vdpa_device *vdpa_dev, uint64_t *features)
+{
+	RTE_SET_USED(vdpa_dev);
+	RTE_SET_USED(features);
+
+	return -1;
+}
+
+static int
+sfc_vdpa_get_protocol_features(struct rte_vdpa_device *vdpa_dev,
+			       uint64_t *features)
+{
+	RTE_SET_USED(vdpa_dev);
+	RTE_SET_USED(features);
+
+	return -1;
+}
+
+static int
+sfc_vdpa_dev_config(int vid)
+{
+	RTE_SET_USED(vid);
+
+	return -1;
+}
+
+static int
+sfc_vdpa_dev_close(int vid)
+{
+	RTE_SET_USED(vid);
+
+	return -1;
+}
+
+static int
+sfc_vdpa_set_vring_state(int vid, int vring, int state)
+{
+	RTE_SET_USED(vid);
+	RTE_SET_USED(vring);
+	RTE_SET_USED(state);
+
+	return -1;
+}
+
+static int
+sfc_vdpa_set_features(int vid)
+{
+	RTE_SET_USED(vid);
+
+	return -1;
+}
+
+static struct rte_vdpa_dev_ops sfc_vdpa_ops = {
+	.get_queue_num = sfc_vdpa_get_queue_num,
+	.get_features = sfc_vdpa_get_features,
+	.get_protocol_features = sfc_vdpa_get_protocol_features,
+	.dev_conf = sfc_vdpa_dev_config,
+	.dev_close = sfc_vdpa_dev_close,
+	.set_vring_state = sfc_vdpa_set_vring_state,
+	.set_features = sfc_vdpa_set_features,
+};
+
+struct sfc_vdpa_ops_data *
+sfc_vdpa_device_init(void *dev_handle, enum sfc_vdpa_context context)
+{
+	struct sfc_vdpa_ops_data *ops_data;
+	struct rte_pci_device *pci_dev;
+
+	/* Create vDPA ops context */
+	ops_data = rte_zmalloc("vdpa", sizeof(struct sfc_vdpa_ops_data), 0);
+	if (ops_data == NULL)
+		return NULL;
+
+	ops_data->vdpa_context = context;
+	ops_data->dev_handle = dev_handle;
+
+	pci_dev = sfc_vdpa_adapter_by_dev_handle(dev_handle)->pdev;
+
+	/* Register vDPA Device */
+	sfc_vdpa_log_init(dev_handle, "register vDPA device");
+	ops_data->vdpa_dev =
+		rte_vdpa_register_device(&pci_dev->device, &sfc_vdpa_ops);
+	if (ops_data->vdpa_dev == NULL) {
+		sfc_vdpa_err(dev_handle, "vDPA device registration failed");
+		goto fail_register_device;
+	}
+
+	ops_data->state = SFC_VDPA_STATE_INITIALIZED;
+
+	return ops_data;
+
+fail_register_device:
+	rte_free(ops_data);
+	return NULL;
+}
+
+void
+sfc_vdpa_device_fini(struct sfc_vdpa_ops_data *ops_data)
+{
+	rte_vdpa_unregister_device(ops_data->vdpa_dev);
+
+	rte_free(ops_data);
+}
diff --git a/drivers/vdpa/sfc/sfc_vdpa_ops.h b/drivers/vdpa/sfc/sfc_vdpa_ops.h
new file mode 100644
index 0000000..817b302
--- /dev/null
+++ b/drivers/vdpa/sfc/sfc_vdpa_ops.h
@@ -0,0 +1,36 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright(c) 2020-2021 Xilinx, Inc.
+ */
+
+#ifndef _SFC_VDPA_OPS_H
+#define _SFC_VDPA_OPS_H
+
+#include <rte_vdpa.h>
+
+#define SFC_VDPA_MAX_QUEUE_PAIRS		1
+
+enum sfc_vdpa_context {
+	SFC_VDPA_AS_PF = 0,
+	SFC_VDPA_AS_VF
+};
+
+enum sfc_vdpa_state {
+	SFC_VDPA_STATE_UNINITIALIZED = 0,
+	SFC_VDPA_STATE_INITIALIZED,
+	SFC_VDPA_STATE_NSTATES
+};
+
+struct sfc_vdpa_ops_data {
+	void				*dev_handle;
+	struct rte_vdpa_device		*vdpa_dev;
+	enum sfc_vdpa_context		vdpa_context;
+	enum sfc_vdpa_state		state;
+};
+
+struct sfc_vdpa_ops_data *
+sfc_vdpa_device_init(void *adapter, enum sfc_vdpa_context context);
+void
+sfc_vdpa_device_fini(struct sfc_vdpa_ops_data *ops_data);
+
+#endif /* _SFC_VDPA_OPS_H */