[v2,02/20] net/ice: support device initialization

Message ID 1543820821-108122-3-git-send-email-wenzhuo.lu@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Qi Zhang
Headers
Series A new net PMD - ice |

Checks

Context Check Description
ci/Intel-compilation success Compilation OK
ci/checkpatch warning coding style issues

Commit Message

Wenzhuo Lu Dec. 3, 2018, 7:06 a.m. UTC
  Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
Signed-off-by: Qiming Yang <qiming.yang@intel.com>
Signed-off-by: Xiaoyun Li <xiaoyun.li@intel.com>
Signed-off-by: Jingjing Wu <jingjing.wu@intel.com>
---
 config/common_base                      |   9 +
 drivers/net/Makefile                    |   1 +
 drivers/net/ice/Makefile                |  75 ++++
 drivers/net/ice/ice_ethdev.c            | 643 ++++++++++++++++++++++++++++++++
 drivers/net/ice/ice_ethdev.h            | 348 +++++++++++++++++
 drivers/net/ice/ice_logs.h              |  45 +++
 drivers/net/ice/ice_rxtx.h              | 117 ++++++
 drivers/net/ice/rte_pmd_ice_version.map |   4 +
 mk/rte.app.mk                           |   1 +
 9 files changed, 1243 insertions(+)
 create mode 100644 drivers/net/ice/Makefile
 create mode 100644 drivers/net/ice/ice_ethdev.c
 create mode 100644 drivers/net/ice/ice_ethdev.h
 create mode 100644 drivers/net/ice/ice_logs.h
 create mode 100644 drivers/net/ice/ice_rxtx.h
 create mode 100644 drivers/net/ice/rte_pmd_ice_version.map
  

Comments

Varghese, Vipin Dec. 3, 2018, 9:07 a.m. UTC | #1
Hi,

snipped

> +# Compile burst-oriented ICE PMD driver # CONFIG_RTE_LIBRTE_ICE_PMD=y

Based on ' https://patches.dpdk.org/patch/48488/' it is suggested to configure. But here is it already set to 'y'. Is this correct? If yes can you update ' https://patches.dpdk.org/patch/48488/'

snipped
  
Varghese, Vipin Dec. 4, 2018, 4:40 a.m. UTC | #2
Snipped

> +	/* Set the info.ingress_table and info.egress_table
> +	 * for UP translate table. Now just set it to 1:1 map by default
> +	 * -- 0b 111 110 101 100 011 010 001 000 == 0xFAC688
> +	 */
> +	info->ingress_table  = rte_cpu_to_le_32(0x00FAC688);
> +	info->egress_table   = rte_cpu_to_le_32(0x00FAC688);
> +	info->outer_up_table = rte_cpu_to_le_32(0x00FAC688);

Can we use MACRO instead of exact values for ingress, egress and outer_up table.

> +	return 0;
> +}
> +

snipped

> +static int
> +ice_dev_init(struct rte_eth_dev *dev)
> +{
> +	struct rte_pci_device *pci_dev;
> +	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data-
> >dev_private);
> +	struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data->dev_private);
> +	int ret;
> +
> +	dev->dev_ops = &ice_eth_dev_ops;
> +
> +	pci_dev = RTE_DEV_TO_PCI(dev->device);
> +
> +	rte_eth_copy_pci_info(dev, pci_dev);
> +	pf->adapter = ICE_DEV_PRIVATE_TO_ADAPTER(dev->data-
> >dev_private);
> +	pf->adapter->eth_dev = dev;
> +	pf->dev_data = dev->data;
> +	hw->back = pf->adapter;
> +	hw->hw_addr = (uint8_t *)pci_dev->mem_resource[0].addr;
> +	hw->vendor_id = pci_dev->id.vendor_id;
> +	hw->device_id = pci_dev->id.device_id;
> +	hw->subsystem_vendor_id = pci_dev->id.subsystem_vendor_id;
> +	hw->subsystem_device_id = pci_dev->id.subsystem_device_id;
> +	hw->bus.device = pci_dev->addr.devid;
> +	hw->bus.func = pci_dev->addr.function;
> +
> +	ice_init_controlq_parameter(hw);
> +
> +	ret = ice_init_hw(hw);
> +	if (ret) {
> +		PMD_INIT_LOG(ERR, "Failed to initialize HW");
> +		return -EINVAL;
> +	}

Definition for ice_init_hw in patch 01/20 does not check for primary-secondary. Are we allowing secondary to invoke ice_init_hw if it is initialized by primary?

> +
> +	PMD_INIT_LOG(INFO, "FW %d.%d.%05d API %d.%d",
> +		     hw->fw_maj_ver, hw->fw_min_ver, hw->fw_build,
> +		     hw->api_maj_ver, hw->api_min_ver);
> +

Snipped

> +
> +static int
> +ice_dev_uninit(struct rte_eth_dev *dev) {
> +	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data-
> >dev_private);
> +	struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data->dev_private);
> +
> +	ICE_PROC_SECONDARY_CHECK_RET_0;

Should not we check if primary is alive and NIC is used or initialized by primary then ' ICE_PROC_SECONDARY_CHECK_RET_0'?

> +
> +	ice_dev_close(dev);
> +
> +	dev->dev_ops = NULL;
> +	dev->rx_pkt_burst = NULL;
> +	dev->tx_pkt_burst = NULL;
> +
> +	rte_free(dev->data->mac_addrs);
> +	dev->data->mac_addrs = NULL;
> +
> +	ice_release_vsi(pf->main_vsi);
> +	ice_sched_cleanup_all(hw);
> +	rte_free(hw->port_info);
> +	ice_shutdown_all_ctrlq(hw);
> +
> +	return 0;
> +}
> +

snipped

> +static void
> +ice_dev_close(struct rte_eth_dev *dev)
> +{
> +	struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data->dev_private);
> +	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data-
> >dev_private);
> +
> +	ICE_PROC_SECONDARY_CHECK_NO_ERR;
> +

I am just wondering in a multi process (primary-secondary) if primary is killed or exited, then if we try to stop the secondary due to this check the vsi, pool and shutdown is not called. Should not we check if primary is still alive, if yes then 
ICE_PROC_SECONDARY_CHECK_NO_ERR?

> +	ice_res_pool_destroy(&pf->msix_pool);
> +	ice_release_vsi(pf->main_vsi);
> +
> +	ice_shutdown_all_ctrlq(hw);
> +}

snipped
  
Wenzhuo Lu Dec. 6, 2018, 5:01 a.m. UTC | #3
Hi Vipin,

> -----Original Message-----
> From: Varghese, Vipin
> Sent: Tuesday, December 4, 2018 12:41 PM
> To: Lu, Wenzhuo <wenzhuo.lu@intel.com>; dev@dpdk.org
> Cc: Lu, Wenzhuo <wenzhuo.lu@intel.com>; Yang, Qiming
> <qiming.yang@intel.com>; Li, Xiaoyun <xiaoyun.li@intel.com>; Wu, Jingjing
> <jingjing.wu@intel.com>
> Subject: RE: [dpdk-dev] [PATCH v2 02/20] net/ice: support device
> initialization
> 
> 
> Snipped
> 
> > +	/* Set the info.ingress_table and info.egress_table
> > +	 * for UP translate table. Now just set it to 1:1 map by default
> > +	 * -- 0b 111 110 101 100 011 010 001 000 == 0xFAC688
> > +	 */
> > +	info->ingress_table  = rte_cpu_to_le_32(0x00FAC688);
> > +	info->egress_table   = rte_cpu_to_le_32(0x00FAC688);
> > +	info->outer_up_table = rte_cpu_to_le_32(0x00FAC688);
> 
> Can we use MACRO instead of exact values for ingress, egress and outer_up
> table.
Good suggestion. Will update it in the next version.

> 
> > +	return 0;
> > +}
> > +
> 
> snipped
> 
> > +static int
> > +ice_dev_init(struct rte_eth_dev *dev) {
> > +	struct rte_pci_device *pci_dev;
> > +	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data-
> > >dev_private);
> > +	struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data-
> >dev_private);
> > +	int ret;
> > +
> > +	dev->dev_ops = &ice_eth_dev_ops;
> > +
> > +	pci_dev = RTE_DEV_TO_PCI(dev->device);
> > +
> > +	rte_eth_copy_pci_info(dev, pci_dev);
> > +	pf->adapter = ICE_DEV_PRIVATE_TO_ADAPTER(dev->data-
> > >dev_private);
> > +	pf->adapter->eth_dev = dev;
> > +	pf->dev_data = dev->data;
> > +	hw->back = pf->adapter;
> > +	hw->hw_addr = (uint8_t *)pci_dev->mem_resource[0].addr;
> > +	hw->vendor_id = pci_dev->id.vendor_id;
> > +	hw->device_id = pci_dev->id.device_id;
> > +	hw->subsystem_vendor_id = pci_dev->id.subsystem_vendor_id;
> > +	hw->subsystem_device_id = pci_dev->id.subsystem_device_id;
> > +	hw->bus.device = pci_dev->addr.devid;
> > +	hw->bus.func = pci_dev->addr.function;
> > +
> > +	ice_init_controlq_parameter(hw);
> > +
> > +	ret = ice_init_hw(hw);
> > +	if (ret) {
> > +		PMD_INIT_LOG(ERR, "Failed to initialize HW");
> > +		return -EINVAL;
> > +	}
> 
> Definition for ice_init_hw in patch 01/20 does not check for primary-
> secondary. Are we allowing secondary to invoke ice_init_hw if it is initialized
> by primary?
It's a patch split issue. We add the check in later patch. Will put it in this patch in the new version.

> 
> > +
> > +	PMD_INIT_LOG(INFO, "FW %d.%d.%05d API %d.%d",
> > +		     hw->fw_maj_ver, hw->fw_min_ver, hw->fw_build,
> > +		     hw->api_maj_ver, hw->api_min_ver);
> > +
> 
> Snipped
> 
> > +
> > +static int
> > +ice_dev_uninit(struct rte_eth_dev *dev) {
> > +	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data-
> > >dev_private);
> > +	struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data-
> >dev_private);
> > +
> > +	ICE_PROC_SECONDARY_CHECK_RET_0;
> 
> Should not we check if primary is alive and NIC is used or initialized by
> primary then ' ICE_PROC_SECONDARY_CHECK_RET_0'?
I think it's not a critical issue if the process is terminate abnormally without uninit.
Comparing with that, I have more concern about this scenario, if the primary process exit and uninit the resource, the secondary process is left alone. And also to me it looks not a good solution to change every PMD for this feature. I don't see many PMD support it. Maybe we'd better not support it now and wait for a better whole picture.
The same below.

> 
> > +
> > +	ice_dev_close(dev);
> > +
> > +	dev->dev_ops = NULL;
> > +	dev->rx_pkt_burst = NULL;
> > +	dev->tx_pkt_burst = NULL;
> > +
> > +	rte_free(dev->data->mac_addrs);
> > +	dev->data->mac_addrs = NULL;
> > +
> > +	ice_release_vsi(pf->main_vsi);
> > +	ice_sched_cleanup_all(hw);
> > +	rte_free(hw->port_info);
> > +	ice_shutdown_all_ctrlq(hw);
> > +
> > +	return 0;
> > +}
> > +
> 
> snipped
> 
> > +static void
> > +ice_dev_close(struct rte_eth_dev *dev) {
> > +	struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data-
> >dev_private);
> > +	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data-
> > >dev_private);
> > +
> > +	ICE_PROC_SECONDARY_CHECK_NO_ERR;
> > +
> 
> I am just wondering in a multi process (primary-secondary) if primary is
> killed or exited, then if we try to stop the secondary due to this check the vsi,
> pool and shutdown is not called. Should not we check if primary is still alive,
> if yes then ICE_PROC_SECONDARY_CHECK_NO_ERR?
> 
> > +	ice_res_pool_destroy(&pf->msix_pool);
> > +	ice_release_vsi(pf->main_vsi);
> > +
> > +	ice_shutdown_all_ctrlq(hw);
> > +}
> 
> snipped
  
Varghese, Vipin Dec. 6, 2018, 5:33 a.m. UTC | #4
snipped
> > > +	ice_init_controlq_parameter(hw);
> > > +
> > > +	ret = ice_init_hw(hw);
> > > +	if (ret) {
> > > +		PMD_INIT_LOG(ERR, "Failed to initialize HW");
> > > +		return -EINVAL;
> > > +	}
> >
> > Definition for ice_init_hw in patch 01/20 does not check for primary-
> > secondary. Are we allowing secondary to invoke ice_init_hw if it is
> > initialized by primary?
> It's a patch split issue. We add the check in later patch. Will put it in this patch in
> the new version.
Suggestion in current patch if comment is kept it will be easier to understand that it is taken care in future patch. 

Example patch 2/20 has comment stating adding support in patch 5/20. Then in patch 5/20 it removes the ToDo it is easier to read and understand the flow

> 
> >
> > > +
> > > +	PMD_INIT_LOG(INFO, "FW %d.%d.%05d API %d.%d",
> > > +		     hw->fw_maj_ver, hw->fw_min_ver, hw->fw_build,
> > > +		     hw->api_maj_ver, hw->api_min_ver);
> > > +
> >
> > Snipped
> >
> > > +
> > > +static int
> > > +ice_dev_uninit(struct rte_eth_dev *dev) {
> > > +	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data-
> > > >dev_private);
> > > +	struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data-
> > >dev_private);
> > > +
> > > +	ICE_PROC_SECONDARY_CHECK_RET_0;
> >
> > Should not we check if primary is alive and NIC is used or initialized
> > by primary then ' ICE_PROC_SECONDARY_CHECK_RET_0'?
> I think it's not a critical issue if the process is terminate abnormally without uninit.
> Comparing with that, I have more concern about this scenario, if the primary
> process exit and uninit the resource, the secondary process is left alone.
Since primary is application which reserves the huge page memory (malloc, zmalloc, memzone). So when secondary is killed or stop whole huge pages are released. I am bit confused what is check suggested affecting?

 And also
> to me it looks not a good solution to change every PMD for this feature. 
I am not aware about why other PMD are done in specific way. In my humble opinion, if there is a right way let it be used rather than doing other way.

I don't
> see many PMD support it. Maybe we'd better not support it now and wait for a
> better whole picture.
I wait for others to comment to this approach. 

snipped
  
Wenzhuo Lu Dec. 6, 2018, 6:13 a.m. UTC | #5
Hi Vipin,

> -----Original Message-----
> From: Varghese, Vipin
> Sent: Thursday, December 6, 2018 1:33 PM
> To: Lu, Wenzhuo <wenzhuo.lu@intel.com>; dev@dpdk.org
> Cc: Yang, Qiming <qiming.yang@intel.com>; Li, Xiaoyun
> <xiaoyun.li@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>
> Subject: RE: [dpdk-dev] [PATCH v2 02/20] net/ice: support device
> initialization
> 
> snipped
> > > > +	ice_init_controlq_parameter(hw);
> > > > +
> > > > +	ret = ice_init_hw(hw);
> > > > +	if (ret) {
> > > > +		PMD_INIT_LOG(ERR, "Failed to initialize HW");
> > > > +		return -EINVAL;
> > > > +	}
> > >
> > > Definition for ice_init_hw in patch 01/20 does not check for
> > > primary- secondary. Are we allowing secondary to invoke ice_init_hw
> > > if it is initialized by primary?
> > It's a patch split issue. We add the check in later patch. Will put it
> > in this patch in the new version.
> Suggestion in current patch if comment is kept it will be easier to understand
> that it is taken care in future patch.
> 
> Example patch 2/20 has comment stating adding support in patch 5/20.
> Then in patch 5/20 it removes the ToDo it is easier to read and understand
> the flow
I mean I made a mistake that put the check code in a later patch. Actually this code should be put in this patch. I plan to correct it.
But currently I think we're running out of time. I prefer not supporting multi process in this release.

> 
> >
> > >
> > > > +
> > > > +	PMD_INIT_LOG(INFO, "FW %d.%d.%05d API %d.%d",
> > > > +		     hw->fw_maj_ver, hw->fw_min_ver, hw->fw_build,
> > > > +		     hw->api_maj_ver, hw->api_min_ver);
> > > > +
> > >
> > > Snipped
> > >
> > > > +
> > > > +static int
> > > > +ice_dev_uninit(struct rte_eth_dev *dev) {
> > > > +	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data-
> > > > >dev_private);
> > > > +	struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data-
> > > >dev_private);
> > > > +
> > > > +	ICE_PROC_SECONDARY_CHECK_RET_0;
> > >
> > > Should not we check if primary is alive and NIC is used or
> > > initialized by primary then ' ICE_PROC_SECONDARY_CHECK_RET_0'?
> > I think it's not a critical issue if the process is terminate abnormally without
> uninit.
> > Comparing with that, I have more concern about this scenario, if the
> > primary process exit and uninit the resource, the secondary process is left
> alone.
> Since primary is application which reserves the huge page memory (malloc,
> zmalloc, memzone). So when secondary is killed or stop whole huge pages
> are released. I am bit confused what is check suggested affecting?
> 
>  And also
> > to me it looks not a good solution to change every PMD for this feature.
> I am not aware about why other PMD are done in specific way. In my
> humble opinion, if there is a right way let it be used rather than doing other
> way.
> 
> I don't
> > see many PMD support it. Maybe we'd better not support it now and wait
> > for a better whole picture.
> I wait for others to comment to this approach.
> 
> snipped
  
Varghese, Vipin Dec. 6, 2018, 6:31 a.m. UTC | #6
snipped
> > > > > +	ice_init_controlq_parameter(hw);
> > > > > +
> > > > > +	ret = ice_init_hw(hw);
> > > > > +	if (ret) {
> > > > > +		PMD_INIT_LOG(ERR, "Failed to initialize HW");
> > > > > +		return -EINVAL;
> > > > > +	}
> > > >
> > > > Definition for ice_init_hw in patch 01/20 does not check for
> > > > primary- secondary. Are we allowing secondary to invoke
> > > > ice_init_hw if it is initialized by primary?
> > > It's a patch split issue. We add the check in later patch. Will put
> > > it in this patch in the new version.
> > Suggestion in current patch if comment is kept it will be easier to
> > understand that it is taken care in future patch.
> >
> > Example patch 2/20 has comment stating adding support in patch 5/20.
> > Then in patch 5/20 it removes the ToDo it is easier to read and
> > understand the flow
> I mean I made a mistake that put the check code in a later patch. Actually this
> code should be put in this patch. I plan to correct it.
> But currently I think we're running out of time. I prefer not supporting multi
> process in this release.
Thanks for clarifying the same. It will helpful to add 'to do or future items' in cover letter, code comment and release documents which helps reviewers, early adopters and later maintainers.

> 
> >
> > >
> > > >
> > > > > +
> > > > > +	PMD_INIT_LOG(INFO, "FW %d.%d.%05d API %d.%d",
> > > > > +		     hw->fw_maj_ver, hw->fw_min_ver, hw->fw_build,
> > > > > +		     hw->api_maj_ver, hw->api_min_ver);
> > > > > +
> > > >
> > > > Snipped
> > > >
> > > > > +
> > > > > +static int
> > > > > +ice_dev_uninit(struct rte_eth_dev *dev) {
> > > > > +	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data-
> > > > > >dev_private);
> > > > > +	struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data-
> > > > >dev_private);
> > > > > +
> > > > > +	ICE_PROC_SECONDARY_CHECK_RET_0;
> > > >
> > > > Should not we check if primary is alive and NIC is used or
> > > > initialized by primary then ' ICE_PROC_SECONDARY_CHECK_RET_0'?
> > > I think it's not a critical issue if the process is terminate
> > > abnormally without
> > uninit.
> > > Comparing with that, I have more concern about this scenario, if the
> > > primary process exit and uninit the resource, the secondary process
> > > is left
> > alone.
> > Since primary is application which reserves the huge page memory
> > (malloc, zmalloc, memzone). So when secondary is killed or stop whole
> > huge pages are released. I am bit confused what is check suggested affecting?
> >
> >  And also
> > > to me it looks not a good solution to change every PMD for this feature.
> > I am not aware about why other PMD are done in specific way. In my
> > humble opinion, if there is a right way let it be used rather than
> > doing other way.
> >
> > I don't
> > > see many PMD support it. Maybe we'd better not support it now and
> > > wait for a better whole picture.
> > I wait for others to comment to this approach.
> >
> > snipped
  
Wenzhuo Lu Dec. 6, 2018, 7:04 a.m. UTC | #7
Hi Vipin,

> -----Original Message-----
> From: Varghese, Vipin
> Sent: Thursday, December 6, 2018 2:31 PM
> To: Lu, Wenzhuo <wenzhuo.lu@intel.com>; dev@dpdk.org
> Cc: Yang, Qiming <qiming.yang@intel.com>; Li, Xiaoyun
> <xiaoyun.li@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>
> Subject: RE: [dpdk-dev] [PATCH v2 02/20] net/ice: support device
> initialization
> 
> snipped
> > > > > > +	ice_init_controlq_parameter(hw);
> > > > > > +
> > > > > > +	ret = ice_init_hw(hw);
> > > > > > +	if (ret) {
> > > > > > +		PMD_INIT_LOG(ERR, "Failed to initialize HW");
> > > > > > +		return -EINVAL;
> > > > > > +	}
> > > > >
> > > > > Definition for ice_init_hw in patch 01/20 does not check for
> > > > > primary- secondary. Are we allowing secondary to invoke
> > > > > ice_init_hw if it is initialized by primary?
> > > > It's a patch split issue. We add the check in later patch. Will
> > > > put it in this patch in the new version.
> > > Suggestion in current patch if comment is kept it will be easier to
> > > understand that it is taken care in future patch.
> > >
> > > Example patch 2/20 has comment stating adding support in patch 5/20.
> > > Then in patch 5/20 it removes the ToDo it is easier to read and
> > > understand the flow
> > I mean I made a mistake that put the check code in a later patch.
> > Actually this code should be put in this patch. I plan to correct it.
> > But currently I think we're running out of time. I prefer not
> > supporting multi process in this release.
> Thanks for clarifying the same. It will helpful to add 'to do or future items' in
> cover letter, code comment and release documents which helps reviewers,
> early adopters and later maintainers.
I'd like to suggest focusing on what we have. Sorry, for many reasons it's not appropriate to talk too much about we'll do in the future. Like Internally we have a plan, but it keeps changing. Like something is still under investigation...

> 
> >
> > >
> > > >
> > > > >
> > > > > > +
> > > > > > +	PMD_INIT_LOG(INFO, "FW %d.%d.%05d API %d.%d",
> > > > > > +		     hw->fw_maj_ver, hw->fw_min_ver, hw->fw_build,
> > > > > > +		     hw->api_maj_ver, hw->api_min_ver);
> > > > > > +
> > > > >
> > > > > Snipped
> > > > >
> > > > > > +
> > > > > > +static int
> > > > > > +ice_dev_uninit(struct rte_eth_dev *dev) {
> > > > > > +	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data-
> > > > > > >dev_private);
> > > > > > +	struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data-
> > > > > >dev_private);
> > > > > > +
> > > > > > +	ICE_PROC_SECONDARY_CHECK_RET_0;
> > > > >
> > > > > Should not we check if primary is alive and NIC is used or
> > > > > initialized by primary then ' ICE_PROC_SECONDARY_CHECK_RET_0'?
> > > > I think it's not a critical issue if the process is terminate
> > > > abnormally without
> > > uninit.
> > > > Comparing with that, I have more concern about this scenario, if
> > > > the primary process exit and uninit the resource, the secondary
> > > > process is left
> > > alone.
> > > Since primary is application which reserves the huge page memory
> > > (malloc, zmalloc, memzone). So when secondary is killed or stop
> > > whole huge pages are released. I am bit confused what is check
> suggested affecting?
> > >
> > >  And also
> > > > to me it looks not a good solution to change every PMD for this feature.
> > > I am not aware about why other PMD are done in specific way. In my
> > > humble opinion, if there is a right way let it be used rather than
> > > doing other way.
> > >
> > > I don't
> > > > see many PMD support it. Maybe we'd better not support it now and
> > > > wait for a better whole picture.
> > > I wait for others to comment to this approach.
> > >
> > > snipped
  
Varghese, Vipin Dec. 13, 2018, 5:16 a.m. UTC | #8
Adding to mailing list for the last update

> -----Original Message-----
> From: Lu, Wenzhuo
> Sent: Thursday, December 6, 2018 1:19 PM
> To: Zhang, Qi Z <qi.z.zhang@intel.com>; Varghese, Vipin
> <vipin.varghese@intel.com>
> Cc: Yigit, Ferruh <ferruh.yigit@intel.com>; Zhang, Helin
> <helin.zhang@intel.com>; Xu, Qian Q <qian.q.xu@intel.com>
> Subject: RE: [dpdk-dev] [PATCH v2 02/20] net/ice: support device initialization
> 
> Yes, after discussing with colleagues, I think this feature is not well designed and
> implemented. Will remove it from this release.
> 
> 
> Best regards
> Wenzhuo Lu
> 
> 
> > -----Original Message-----
> > From: Zhang, Qi Z
> > Sent: Thursday, December 6, 2018 3:46 PM
> > To: Lu, Wenzhuo <wenzhuo.lu@intel.com>; Varghese, Vipin
> > <vipin.varghese@intel.com>
> > Cc: Yigit, Ferruh <ferruh.yigit@intel.com>; Zhang, Helin
> > <helin.zhang@intel.com>; Xu, Qian Q <qian.q.xu@intel.com>
> > Subject: RE: [dpdk-dev] [PATCH v2 02/20] net/ice: support device
> > initialization
> >
> > Yes, I saw that, so there is no gap for this,  we can simply set this
> > feature to N in our current release.
> >
> > Thanks
> > Qi
> >
> > > -----Original Message-----
> > > From: Lu, Wenzhuo
> > > Sent: Thursday, December 6, 2018 3:43 PM
> > > To: Zhang, Qi Z <qi.z.zhang@intel.com>; Varghese, Vipin
> > > <vipin.varghese@intel.com>
> > > Cc: Yigit, Ferruh <ferruh.yigit@intel.com>; Zhang, Helin
> > > <helin.zhang@intel.com>; Xu, Qian Q <qian.q.xu@intel.com>
> > > Subject: RE: [dpdk-dev] [PATCH v2 02/20] net/ice: support device
> > > initialization
> > >
> > > Hi Qi,
> > > I think we have "secondary support", it's named as " Multiprocess
> > > aware". We can omit that.
> > >
> > > Best regards
> > > Wenzhuo Lu
> > >
> > >
> > > > -----Original Message-----
> > > > From: Zhang, Qi Z
> > > > Sent: Thursday, December 6, 2018 3:31 PM
> > > > To: Varghese, Vipin <vipin.varghese@intel.com>
> > > > Cc: Yigit, Ferruh <ferruh.yigit@intel.com>; Lu, Wenzhuo
> > > > <wenzhuo.lu@intel.com>; Zhang, Helin <helin.zhang@intel.com>; Xu,
> > > > Qian Q <qian.q.xu@intel.com>
> > > > Subject: RE: [dpdk-dev] [PATCH v2 02/20] net/ice: support device
> > > > initialization
> > > >
> > > > Hi Vipin:
> > > >
> > > > 	I saw you observed the missing feature description of CPK
> > > > document, from my view it's not a CPK specific issue, but a
> > > > generic issue as missing item in exist nic feature list.
> > > > 	I'm thinking if you could summarize all the gap from DTS view and
> > > > raise a Jira case to DPDK team, so we can add this to future
> > > > development plan.
> > > >
> > > > 	So far what I captured is
> > > > 	1. tx loopback
> > > > 	2. secondary support
> > > >
> > > > 	What do you think about it?
> > > >
> > > > Thanks
> > > > Qi
> > > >
> > > > > -----Original Message-----
> > > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Lu, Wenzhuo
> > > > > Sent: Thursday, December 6, 2018 3:04 PM
> > > > > To: Varghese, Vipin <vipin.varghese@intel.com>; dev@dpdk.org
> > > > > Cc: Yang, Qiming <qiming.yang@intel.com>; Li, Xiaoyun
> > > > > <xiaoyun.li@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>
> > > > > Subject: Re: [dpdk-dev] [PATCH v2 02/20] net/ice: support device
> > > > > initialization
> > > > >
> > > > > Hi Vipin,
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Varghese, Vipin
> > > > > > Sent: Thursday, December 6, 2018 2:31 PM
> > > > > > To: Lu, Wenzhuo <wenzhuo.lu@intel.com>; dev@dpdk.org
> > > > > > Cc: Yang, Qiming <qiming.yang@intel.com>; Li, Xiaoyun
> > > > > > <xiaoyun.li@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>
> > > > > > Subject: RE: [dpdk-dev] [PATCH v2 02/20] net/ice: support
> > > > > > device initialization
> > > > > >
> > > > > > snipped
> > > > > > > > > > > +	ice_init_controlq_parameter(hw);
> > > > > > > > > > > +
> > > > > > > > > > > +	ret = ice_init_hw(hw);
> > > > > > > > > > > +	if (ret) {
> > > > > > > > > > > +		PMD_INIT_LOG(ERR, "Failed to initialize
> > HW");
> > > > > > > > > > > +		return -EINVAL;
> > > > > > > > > > > +	}
> > > > > > > > > >
> > > > > > > > > > Definition for ice_init_hw in patch 01/20 does not
> > > > > > > > > > check for
> > > > > > > > > > primary- secondary. Are we allowing secondary to
> > > > > > > > > > invoke ice_init_hw if it is initialized by primary?
> > > > > > > > > It's a patch split issue. We add the check in later patch.
> > > > > > > > > Will put it in this patch in the new version.
> > > > > > > > Suggestion in current patch if comment is kept it will be
> > > > > > > > easier to understand that it is taken care in future patch.
> > > > > > > >
> > > > > > > > Example patch 2/20 has comment stating adding support in
> > > > > > > > patch
> > > > 5/20.
> > > > > > > > Then in patch 5/20 it removes the ToDo it is easier to
> > > > > > > > read and understand the flow
> > > > > > > I mean I made a mistake that put the check code in a later patch.
> > > > > > > Actually this code should be put in this patch. I plan to correct it.
> > > > > > > But currently I think we're running out of time. I prefer
> > > > > > > not supporting multi process in this release.
> > > > > > Thanks for clarifying the same. It will helpful to add 'to do
> > > > > > or future items' in cover letter, code comment and release
> > > > > > documents which helps reviewers, early adopters and later
> > maintainers.
> > > > > I'd like to suggest focusing on what we have. Sorry, for many
> > > > > reasons it's not appropriate to talk too much about we'll do in
> > > > > the
> > future.
> > > > > Like Internally we have a plan, but it keeps changing. Like
> > > > > something is still
> > > > under investigation...
> > > > >
> > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > > +
> > > > > > > > > > > +	PMD_INIT_LOG(INFO, "FW %d.%d.%05d API %d.%d",
> > > > > > > > > > > +		     hw->fw_maj_ver, hw->fw_min_ver,
> > > hw->fw_build,
> > > > > > > > > > > +		     hw->api_maj_ver, hw->api_min_ver);
> > > > > > > > > > > +
> > > > > > > > > >
> > > > > > > > > > Snipped
> > > > > > > > > >
> > > > > > > > > > > +
> > > > > > > > > > > +static int
> > > > > > > > > > > +ice_dev_uninit(struct rte_eth_dev *dev) {
> > > > > > > > > > > +	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev-
> > >data-
> > > > > > > > > > > >dev_private);
> > > > > > > > > > > +	struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev-
> > >data-
> > > > > > > > > > >dev_private);
> > > > > > > > > > > +
> > > > > > > > > > > +	ICE_PROC_SECONDARY_CHECK_RET_0;
> > > > > > > > > >
> > > > > > > > > > Should not we check if primary is alive and NIC is
> > > > > > > > > > used or initialized by primary then '
> > > > ICE_PROC_SECONDARY_CHECK_RET_0'?
> > > > > > > > > I think it's not a critical issue if the process is
> > > > > > > > > terminate abnormally without
> > > > > > > > uninit.
> > > > > > > > > Comparing with that, I have more concern about this
> > > > > > > > > scenario, if the primary process exit and uninit the
> > > > > > > > > resource, the secondary process is left
> > > > > > > > alone.
> > > > > > > > Since primary is application which reserves the huge page
> > > > > > > > memory (malloc, zmalloc, memzone). So when secondary is
> > > > > > > > killed or stop whole huge pages are released. I am bit
> > > > > > > > confused what is check
> > > > > > suggested affecting?
> > > > > > > >
> > > > > > > >  And also
> > > > > > > > > to me it looks not a good solution to change every PMD
> > > > > > > > > for this
> > > > feature.
> > > > > > > > I am not aware about why other PMD are done in specific way.
> > > > > > > > In my humble opinion, if there is a right way let it be
> > > > > > > > used rather than doing other way.
> > > > > > > >
> > > > > > > > I don't
> > > > > > > > > see many PMD support it. Maybe we'd better not support
> > > > > > > > > it now and wait for a better whole picture.
> > > > > > > > I wait for others to comment to this approach.
> > > > > > > >
> > > > > > > > snipped
  

Patch

diff --git a/config/common_base b/config/common_base
index d12ae98..a342760 100644
--- a/config/common_base
+++ b/config/common_base
@@ -297,6 +297,15 @@  CONFIG_RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE=y
 CONFIG_RTE_LIBRTE_FM10K_INC_VECTOR=y
 
 #
+# Compile burst-oriented ICE PMD driver
+#
+CONFIG_RTE_LIBRTE_ICE_PMD=y
+CONFIG_RTE_LIBRTE_ICE_DEBUG_RX=n
+CONFIG_RTE_LIBRTE_ICE_DEBUG_TX=n
+CONFIG_RTE_LIBRTE_ICE_DEBUG_TX_FREE=n
+CONFIG_RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC=y
+CONFIG_RTE_LIBRTE_ICE_16BYTE_RX_DESC=n
+
 # Compile burst-oriented AVF PMD driver
 #
 CONFIG_RTE_LIBRTE_AVF_PMD=y
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index c0386fe..670d7f7 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -30,6 +30,7 @@  DIRS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe
 DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k
 DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += i40e
+DIRS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice
 DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe
 DIRS-$(CONFIG_RTE_LIBRTE_LIO_PMD) += liquidio
 DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4
diff --git a/drivers/net/ice/Makefile b/drivers/net/ice/Makefile
new file mode 100644
index 0000000..5af66d9
--- /dev/null
+++ b/drivers/net/ice/Makefile
@@ -0,0 +1,75 @@ 
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_ice.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+LDLIBS += -lrte_eal -lrte_ethdev -lrte_kvargs -lrte_bus_pci
+
+EXPORT_MAP := rte_pmd_ice_version.map
+
+LIBABIVER := 1
+
+#
+# Add extra flags for base driver files (also known as shared code)
+# to disable warnings
+#
+ifeq ($(CONFIG_RTE_TOOLCHAIN_ICC),y)
+CFLAGS_BASE_DRIVER = -wd593 -wd188
+else ifeq ($(CONFIG_RTE_TOOLCHAIN_CLANG),y)
+CFLAGS_BASE_DRIVER += -Wno-sign-compare
+CFLAGS_BASE_DRIVER += -Wno-unused-value
+CFLAGS_BASE_DRIVER += -Wno-unused-parameter
+CFLAGS_BASE_DRIVER += -Wno-strict-aliasing
+CFLAGS_BASE_DRIVER += -Wno-format
+CFLAGS_BASE_DRIVER += -Wno-missing-field-initializers
+CFLAGS_BASE_DRIVER += -Wno-pointer-to-int-cast
+CFLAGS_BASE_DRIVER += -Wno-format-nonliteral
+CFLAGS_BASE_DRIVER += -Wno-unused-variable
+else
+CFLAGS_BASE_DRIVER  = -Wno-sign-compare
+CFLAGS_BASE_DRIVER += -Wno-unused-value
+CFLAGS_BASE_DRIVER += -Wno-unused-parameter
+CFLAGS_BASE_DRIVER += -Wno-strict-aliasing
+CFLAGS_BASE_DRIVER += -Wno-format
+CFLAGS_BASE_DRIVER += -Wno-missing-field-initializers
+CFLAGS_BASE_DRIVER += -Wno-pointer-to-int-cast
+CFLAGS_BASE_DRIVER += -Wno-format-nonliteral
+CFLAGS_BASE_DRIVER += -Wno-format-security
+CFLAGS_BASE_DRIVER += -Wno-unused-variable
+
+ifeq ($(shell test $(GCC_VERSION) -ge 44 && echo 1), 1)
+CFLAGS_BASE_DRIVER += -Wno-unused-but-set-variable
+endif
+
+endif
+OBJS_BASE_DRIVER=$(patsubst %.c,%.o,$(notdir $(wildcard $(SRCDIR)/base/*.c)))
+$(foreach obj, $(OBJS_BASE_DRIVER), $(eval CFLAGS_$(obj)+=$(CFLAGS_BASE_DRIVER)))
+
+VPATH += $(SRCDIR)/base
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_controlq.c
+SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_common.c
+SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_sched.c
+SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_switch.c
+SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_nvm.c
+
+SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_ethdev.c
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += lib/librte_eal lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += lib/librte_mempool lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += lib/librte_net
+DEPDIRS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += lib/librte_kvargs
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
new file mode 100644
index 0000000..6ea9bf0
--- /dev/null
+++ b/drivers/net/ice/ice_ethdev.c
@@ -0,0 +1,643 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <rte_ethdev_pci.h>
+
+#include "base/ice_sched.h"
+#include "ice_ethdev.h"
+#include "ice_rxtx.h"
+
+#define ICE_MAX_QP_NUM "max_queue_pair_num"
+#define ICE_DFLT_OUTER_TAG_TYPE ICE_AQ_VSI_OUTER_TAG_VLAN_9100
+
+int ice_logtype_init;
+int ice_logtype_driver;
+
+static void ice_dev_close(struct rte_eth_dev *dev);
+
+static const struct rte_pci_id pci_id_ice_map[] = {
+	{ RTE_PCI_DEVICE(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E810C_BACKPLANE) },
+	{ RTE_PCI_DEVICE(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E810C_QSFP) },
+	{ RTE_PCI_DEVICE(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E810C_SFP) },
+	{ .vendor_id = 0, /* sentinel */ },
+};
+
+static const struct eth_dev_ops ice_eth_dev_ops = {
+	.dev_configure                = NULL,
+};
+
+static void
+ice_init_controlq_parameter(struct ice_hw *hw)
+{
+	/* fields for adminq */
+	hw->adminq.num_rq_entries = ICE_ADMINQ_LEN;
+	hw->adminq.num_sq_entries = ICE_ADMINQ_LEN;
+	hw->adminq.rq_buf_size = ICE_ADMINQ_BUF_SZ;
+	hw->adminq.sq_buf_size = ICE_ADMINQ_BUF_SZ;
+
+	/* fields for mailboxq, DPDK used as PF host */
+	hw->mailboxq.num_rq_entries = ICE_MAILBOXQ_LEN;
+	hw->mailboxq.num_sq_entries = ICE_MAILBOXQ_LEN;
+	hw->mailboxq.rq_buf_size = ICE_MAILBOXQ_BUF_SZ;
+	hw->mailboxq.sq_buf_size = ICE_MAILBOXQ_BUF_SZ;
+}
+
+static int
+ice_check_qp_num(const char *key, const char *qp_value,
+		 __rte_unused void *opaque)
+{
+	char *end = NULL;
+	int num = 0;
+
+	while (isblank(*qp_value))
+		qp_value++;
+
+	num = strtoul(qp_value, &end, 10);
+
+	if (!num || (*end == '-') || errno) {
+		PMD_DRV_LOG(WARNING, "invalid value:\"%s\" for key:\"%s\", "
+			    "value must be > 0",
+			    qp_value, key);
+		return -1;
+	}
+
+	return num;
+}
+
+static int
+ice_config_max_queue_pair_num(struct rte_devargs *devargs)
+{
+	struct rte_kvargs *kvlist;
+	const char *queue_num_key = ICE_MAX_QP_NUM;
+	int ret;
+
+	if (!devargs)
+		return 0;
+
+	kvlist = rte_kvargs_parse(devargs->args, NULL);
+	if (!kvlist)
+		return 0;
+
+	if (!rte_kvargs_count(kvlist, queue_num_key)) {
+		rte_kvargs_free(kvlist);
+		return 0;
+	}
+
+	if (rte_kvargs_process(kvlist, queue_num_key,
+			       ice_check_qp_num, NULL) < 0) {
+		rte_kvargs_free(kvlist);
+		return 0;
+	}
+	ret = rte_kvargs_process(kvlist, queue_num_key,
+				 ice_check_qp_num, NULL);
+	rte_kvargs_free(kvlist);
+
+	return ret;
+}
+
+static int
+ice_res_pool_init(struct ice_res_pool_info *pool, uint32_t base,
+		  uint32_t num)
+{
+	struct pool_entry *entry;
+
+	if (!pool || !num)
+		return -EINVAL;
+
+	entry = rte_zmalloc("ice", sizeof(*entry), 0);
+	if (!entry) {
+		PMD_INIT_LOG(ERR,
+			     "Failed to allocate memory for resource pool");
+		return -ENOMEM;
+	}
+
+	/* queue heap initialize */
+	pool->num_free = num;
+	pool->num_alloc = 0;
+	pool->base = base;
+	LIST_INIT(&pool->alloc_list);
+	LIST_INIT(&pool->free_list);
+
+	/* Initialize element  */
+	entry->base = 0;
+	entry->len = num;
+
+	LIST_INSERT_HEAD(&pool->free_list, entry, next);
+	return 0;
+}
+
+static int
+ice_res_pool_alloc(struct ice_res_pool_info *pool,
+		   uint16_t num)
+{
+	struct pool_entry *entry, *valid_entry;
+
+	if (!pool || !num) {
+		PMD_INIT_LOG(ERR, "Invalid parameter");
+		return -EINVAL;
+	}
+
+	if (pool->num_free < num) {
+		PMD_INIT_LOG(ERR, "No resource. ask:%u, available:%u",
+			     num, pool->num_free);
+		return -ENOMEM;
+	}
+
+	valid_entry = NULL;
+	/* Lookup  in free list and find most fit one */
+	LIST_FOREACH(entry, &pool->free_list, next) {
+		if (entry->len >= num) {
+			/* Find best one */
+			if (entry->len == num) {
+				valid_entry = entry;
+				break;
+			}
+			if (!valid_entry ||
+			    valid_entry->len > entry->len)
+				valid_entry = entry;
+		}
+	}
+
+	/* Not find one to satisfy the request, return */
+	if (!valid_entry) {
+		PMD_INIT_LOG(ERR, "No valid entry found");
+		return -ENOMEM;
+	}
+	/**
+	 * The entry have equal queue number as requested,
+	 * remove it from alloc_list.
+	 */
+	if (valid_entry->len == num) {
+		LIST_REMOVE(valid_entry, next);
+	} else {
+		/**
+		 * The entry have more numbers than requested,
+		 * create a new entry for alloc_list and minus its
+		 * queue base and number in free_list.
+		 */
+		entry = rte_zmalloc("res_pool", sizeof(*entry), 0);
+		if (!entry) {
+			PMD_INIT_LOG(ERR,
+				     "Failed to allocate memory for "
+				     "resource pool");
+			return -ENOMEM;
+		}
+		entry->base = valid_entry->base;
+		entry->len = num;
+		valid_entry->base += num;
+		valid_entry->len -= num;
+		valid_entry = entry;
+	}
+
+	/* Insert it into alloc list, not sorted */
+	LIST_INSERT_HEAD(&pool->alloc_list, valid_entry, next);
+
+	pool->num_free -= valid_entry->len;
+	pool->num_alloc += valid_entry->len;
+
+	return valid_entry->base + pool->base;
+}
+
+static void
+ice_res_pool_destroy(struct ice_res_pool_info *pool)
+{
+	struct pool_entry *entry, *next_entry;
+
+	if (!pool)
+		return;
+
+	for (entry = LIST_FIRST(&pool->alloc_list);
+	     entry && (next_entry = LIST_NEXT(entry, next), 1);
+	     entry = next_entry) {
+		LIST_REMOVE(entry, next);
+		rte_free(entry);
+	}
+
+	for (entry = LIST_FIRST(&pool->free_list);
+	     entry && (next_entry = LIST_NEXT(entry, next), 1);
+	     entry = next_entry) {
+		LIST_REMOVE(entry, next);
+		rte_free(entry);
+	}
+
+	pool->num_free = 0;
+	pool->num_alloc = 0;
+	pool->base = 0;
+	LIST_INIT(&pool->alloc_list);
+	LIST_INIT(&pool->free_list);
+}
+
+static void
+ice_vsi_config_default_rss(struct ice_aqc_vsi_props *info)
+{
+	/* Set VSI LUT selection */
+	info->q_opt_rss = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI &
+			  ICE_AQ_VSI_Q_OPT_RSS_LUT_M;
+	/* Set Hash scheme */
+	info->q_opt_rss |= ICE_AQ_VSI_Q_OPT_RSS_TPLZ &
+			   ICE_AQ_VSI_Q_OPT_RSS_HASH_M;
+	/* enable TC */
+	info->q_opt_tc = ICE_AQ_VSI_Q_OPT_TC_OVR_M;
+}
+
+static enum ice_status
+ice_vsi_config_tc_queue_mapping(struct ice_vsi *vsi,
+				struct ice_aqc_vsi_props *info,
+				uint8_t enabled_tcmap)
+{
+	uint16_t bsf, qp_idx;
+
+	/* default tc 0 now. Multi-TC supporting need to be done later.
+	 * Configure TC and queue mapping parameters, for enabled TC,
+	 * allocate qpnum_per_tc queues to this traffic.
+	 */
+	if (enabled_tcmap != 0x01) {
+		PMD_INIT_LOG(ERR, "only TC0 is supported");
+		return -ENOTSUP;
+	}
+
+	vsi->nb_qps = RTE_MIN(vsi->nb_qps, ICE_MAX_Q_PER_TC);
+	bsf = rte_bsf32(vsi->nb_qps);
+	/* Adjust the queue number to actual queues that can be applied */
+	vsi->nb_qps = 0x1 << bsf;
+
+	qp_idx = 0;
+	/* Set tc and queue mapping with VSI */
+	info->tc_mapping[0] = rte_cpu_to_le_16((qp_idx <<
+						ICE_AQ_VSI_TC_Q_OFFSET_S) |
+					       (bsf << ICE_AQ_VSI_TC_Q_NUM_S));
+
+	/* Associate queue number with VSI */
+	info->mapping_flags |= rte_cpu_to_le_16(ICE_AQ_VSI_Q_MAP_CONTIG);
+	info->q_mapping[0] = rte_cpu_to_le_16(vsi->base_queue);
+	info->q_mapping[1] = rte_cpu_to_le_16(vsi->nb_qps);
+	info->valid_sections |=
+		rte_cpu_to_le_16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
+	/* Set the info.ingress_table and info.egress_table
+	 * for UP translate table. Now just set it to 1:1 map by default
+	 * -- 0b 111 110 101 100 011 010 001 000 == 0xFAC688
+	 */
+	info->ingress_table  = rte_cpu_to_le_32(0x00FAC688);
+	info->egress_table   = rte_cpu_to_le_32(0x00FAC688);
+	info->outer_up_table = rte_cpu_to_le_32(0x00FAC688);
+	return 0;
+}
+
+static int
+ice_init_mac_address(struct rte_eth_dev *dev)
+{
+	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+	if (!is_unicast_ether_addr
+		((struct ether_addr *)hw->port_info[0].mac.lan_addr)) {
+		PMD_INIT_LOG(ERR, "Invalid MAC address");
+		return -EINVAL;
+	}
+
+	ether_addr_copy((struct ether_addr *)hw->port_info[0].mac.lan_addr,
+			(struct ether_addr *)hw->port_info[0].mac.perm_addr);
+
+	dev->data->mac_addrs = rte_zmalloc("ice", sizeof(struct ether_addr), 0);
+	if (!dev->data->mac_addrs) {
+		PMD_INIT_LOG(ERR,
+			     "Failed to allocate memory to store mac address");
+		return -ENOMEM;
+	}
+	/* store it to dev data */
+	ether_addr_copy((struct ether_addr *)hw->port_info[0].mac.perm_addr,
+			&dev->data->mac_addrs[0]);
+	return 0;
+}
+
+/*  Initialize SW parameters of PF */
+static int
+ice_pf_sw_init(struct rte_eth_dev *dev)
+{
+	struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+	struct ice_hw *hw = ICE_PF_TO_HW(pf);
+
+	if (ice_config_max_queue_pair_num(dev->device->devargs) > 0)
+		pf->lan_nb_qp_max =
+			ice_config_max_queue_pair_num(dev->device->devargs);
+	else
+		pf->lan_nb_qp_max =
+			(uint16_t)RTE_MIN(hw->func_caps.common_cap.num_txq,
+					  hw->func_caps.common_cap.num_rxq);
+
+	pf->lan_nb_qps = pf->lan_nb_qp_max;
+
+	return 0;
+}
+
+static struct ice_vsi *
+ice_setup_vsi(struct ice_pf *pf, enum ice_vsi_type type)
+{
+	struct ice_hw *hw = ICE_PF_TO_HW(pf);
+	struct ice_vsi *vsi = NULL;
+	struct ice_vsi_ctx vsi_ctx;
+	int ret;
+	uint16_t max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+	uint8_t tc_bitmap = 0x1;
+
+	/* hw->num_lports = 1 in NIC mode */
+	vsi = rte_zmalloc("ice_vsi", sizeof(struct ice_vsi), 0);
+	if (!vsi)
+		return NULL;
+
+	vsi->idx = pf->next_vsi_idx;
+	pf->next_vsi_idx++;
+	vsi->type = type;
+	vsi->adapter = ICE_PF_TO_ADAPTER(pf);
+	vsi->max_macaddrs = ICE_NUM_MACADDR_MAX;
+	vsi->vlan_anti_spoof_on = 0;
+	vsi->vlan_filter_on = 1;
+	TAILQ_INIT(&vsi->mac_list);
+	TAILQ_INIT(&vsi->vlan_list);
+
+	memset(&vsi_ctx, 0, sizeof(vsi_ctx));
+	/* base_queue in used in queue mapping of VSI add/update command.
+	 * Suppose vsi->base_queue is 0 now, don't consider SRIOV, VMDQ
+	 * cases in the first stage. Only Main VSI.
+	 */
+	vsi->base_queue = 0;
+	switch (type) {
+	case ICE_VSI_PF:
+		vsi->nb_qps = pf->lan_nb_qps;
+		ice_vsi_config_default_rss(&vsi_ctx.info);
+		vsi_ctx.alloc_from_pool = true;
+		vsi_ctx.flags = ICE_AQ_VSI_TYPE_PF;
+		/* switch_id is queried by get_switch_config aq, which is done
+		 * by ice_init_hw
+		 */
+		vsi_ctx.info.sw_id = hw->port_info->sw_id;
+		vsi_ctx.info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA;
+		/* Allow all untagged or tagged packets */
+		vsi_ctx.info.vlan_flags = ICE_AQ_VSI_VLAN_MODE_ALL;
+		vsi_ctx.info.vlan_flags |= ICE_AQ_VSI_VLAN_EMOD_NOTHING;
+		vsi_ctx.info.q_opt_rss = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF |
+					 ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
+		/* Enable VLAN/UP trip */
+		ret = ice_vsi_config_tc_queue_mapping(vsi,
+						      &vsi_ctx.info,
+						      ICE_DEFAULT_TCMAP);
+		if (ret) {
+			PMD_INIT_LOG(ERR,
+				     "tc queue mapping with vsi failed, "
+				     "err = %d",
+				     ret);
+			goto fail_mem;
+		}
+
+		break;
+	default:
+		/* for other types of VSI */
+		PMD_INIT_LOG(ERR, "other types of VSI not supported");
+		goto fail_mem;
+	}
+
+	/* VF has MSIX interrupt in VF range, don't allocate here */
+	if (type == ICE_VSI_PF) {
+		ret = ice_res_pool_alloc(&pf->msix_pool,
+					 RTE_MIN(vsi->nb_qps,
+						 RTE_MAX_RXTX_INTR_VEC_ID));
+		if (ret < 0) {
+			PMD_INIT_LOG(ERR, "VSI MAIN %d get heap failed %d",
+				     vsi->vsi_id, ret);
+		}
+		vsi->msix_intr = ret;
+		vsi->nb_msix = RTE_MIN(vsi->nb_qps, RTE_MAX_RXTX_INTR_VEC_ID);
+	} else {
+		vsi->msix_intr = 0;
+		vsi->nb_msix = 0;
+	}
+	ret = ice_add_vsi(hw, vsi->idx, &vsi_ctx, NULL);
+	if (ret != ICE_SUCCESS) {
+		PMD_INIT_LOG(ERR, "add vsi failed, err = %d", ret);
+		goto fail_mem;
+	}
+	/* store vsi information is SW structure */
+	vsi->vsi_id = vsi_ctx.vsi_num;
+	vsi->info = vsi_ctx.info;
+	pf->vsis_allocated = vsi_ctx.vsis_allocd;
+	pf->vsis_unallocated = vsi_ctx.vsis_unallocated;
+
+	/* At the beginning, only TC0. */
+	/* What we need here is the maximam number of the TX queues.
+	 * Currently vsi->nb_qps means it.
+	 * Correct it if any change.
+	 */
+	max_txqs[0] = vsi->nb_qps;
+	ret = ice_cfg_vsi_lan(hw->port_info, vsi->idx,
+			      tc_bitmap, max_txqs);
+	if (ret != ICE_SUCCESS)
+		PMD_INIT_LOG(ERR, "Failed to config vsi sched");
+
+	return vsi;
+fail_mem:
+	rte_free(vsi);
+	pf->next_vsi_idx--;
+	return NULL;
+}
+
+static int
+ice_pf_setup(struct ice_pf *pf)
+{
+	struct ice_vsi *vsi;
+
+	/* Clear all stats counters */
+	pf->offset_loaded = FALSE;
+	memset(&pf->stats, 0, sizeof(struct ice_hw_port_stats));
+	memset(&pf->stats_offset, 0, sizeof(struct ice_hw_port_stats));
+	memset(&pf->internal_stats, 0, sizeof(struct ice_eth_stats));
+	memset(&pf->internal_stats_offset, 0, sizeof(struct ice_eth_stats));
+
+	vsi = ice_setup_vsi(pf, ICE_VSI_PF);
+	if (!vsi) {
+		PMD_INIT_LOG(ERR, "Failed to add vsi for PF");
+		return -EINVAL;
+	}
+
+	pf->main_vsi = vsi;
+
+	return 0;
+}
+
+static int
+ice_dev_init(struct rte_eth_dev *dev)
+{
+	struct rte_pci_device *pci_dev;
+	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+	int ret;
+
+	dev->dev_ops = &ice_eth_dev_ops;
+
+	pci_dev = RTE_DEV_TO_PCI(dev->device);
+
+	rte_eth_copy_pci_info(dev, pci_dev);
+	pf->adapter = ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	pf->adapter->eth_dev = dev;
+	pf->dev_data = dev->data;
+	hw->back = pf->adapter;
+	hw->hw_addr = (uint8_t *)pci_dev->mem_resource[0].addr;
+	hw->vendor_id = pci_dev->id.vendor_id;
+	hw->device_id = pci_dev->id.device_id;
+	hw->subsystem_vendor_id = pci_dev->id.subsystem_vendor_id;
+	hw->subsystem_device_id = pci_dev->id.subsystem_device_id;
+	hw->bus.device = pci_dev->addr.devid;
+	hw->bus.func = pci_dev->addr.function;
+
+	ice_init_controlq_parameter(hw);
+
+	ret = ice_init_hw(hw);
+	if (ret) {
+		PMD_INIT_LOG(ERR, "Failed to initialize HW");
+		return -EINVAL;
+	}
+
+	PMD_INIT_LOG(INFO, "FW %d.%d.%05d API %d.%d",
+		     hw->fw_maj_ver, hw->fw_min_ver, hw->fw_build,
+		     hw->api_maj_ver, hw->api_min_ver);
+
+	ice_pf_sw_init(dev);
+	ret = ice_init_mac_address(dev);
+	if (ret) {
+		PMD_INIT_LOG(ERR, "Failed to initialize mac address");
+		goto err_init_mac;
+	}
+
+	ret = ice_res_pool_init(&pf->msix_pool, 1,
+				hw->func_caps.common_cap.num_msix_vectors - 1);
+	if (ret) {
+		PMD_INIT_LOG(ERR, "Failed to init MSIX pool");
+		goto err_msix_pool_init;
+	}
+
+	ret = ice_pf_setup(pf);
+	if (ret) {
+		PMD_INIT_LOG(ERR, "Failed to setup PF");
+		goto err_pf_setup;
+	}
+
+	return 0;
+
+err_pf_setup:
+	ice_res_pool_destroy(&pf->msix_pool);
+err_msix_pool_init:
+	rte_free(dev->data->mac_addrs);
+err_init_mac:
+	ice_sched_cleanup_all(hw);
+	rte_free(hw->port_info);
+	ice_shutdown_all_ctrlq(hw);
+
+	return ret;
+}
+
+static int
+ice_release_vsi(struct ice_vsi *vsi)
+{
+	struct ice_hw *hw;
+	struct ice_vsi_ctx vsi_ctx;
+	enum ice_status ret;
+
+	if (!vsi)
+		return 0;
+
+	hw = ICE_VSI_TO_HW(vsi);
+
+	memset(&vsi_ctx, 0, sizeof(vsi_ctx));
+
+	vsi_ctx.vsi_num = vsi->vsi_id;
+	vsi_ctx.info = vsi->info;
+	ret = ice_free_vsi(hw, vsi->idx, &vsi_ctx, false, NULL);
+	if (ret != ICE_SUCCESS) {
+		PMD_INIT_LOG(ERR, "Failed to free vsi by aq, %u", vsi->vsi_id);
+		rte_free(vsi);
+		return -1;
+	}
+
+	rte_free(vsi);
+	return 0;
+}
+
+static int
+ice_dev_uninit(struct rte_eth_dev *dev)
+{
+	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+
+	ICE_PROC_SECONDARY_CHECK_RET_0;
+
+	ice_dev_close(dev);
+
+	dev->dev_ops = NULL;
+	dev->rx_pkt_burst = NULL;
+	dev->tx_pkt_burst = NULL;
+
+	rte_free(dev->data->mac_addrs);
+	dev->data->mac_addrs = NULL;
+
+	ice_release_vsi(pf->main_vsi);
+	ice_sched_cleanup_all(hw);
+	rte_free(hw->port_info);
+	ice_shutdown_all_ctrlq(hw);
+
+	return 0;
+}
+
+static int
+ice_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
+	      struct rte_pci_device *pci_dev)
+{
+	return rte_eth_dev_pci_generic_probe(pci_dev,
+					     sizeof(struct ice_adapter),
+					     ice_dev_init);
+}
+
+static int
+ice_pci_remove(struct rte_pci_device *pci_dev)
+{
+	return rte_eth_dev_pci_generic_remove(pci_dev, ice_dev_uninit);
+}
+
+static struct rte_pci_driver rte_ice_pmd = {
+	.id_table = pci_id_ice_map,
+	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+	.probe = ice_pci_probe,
+	.remove = ice_pci_remove,
+};
+
+/**
+ * Driver initialization routine.
+ * Invoked once at EAL init time.
+ * Register itself as the [Poll Mode] Driver of PCI devices.
+ */
+RTE_PMD_REGISTER_PCI(net_ice, rte_ice_pmd);
+RTE_PMD_REGISTER_PCI_TABLE(ice, pci_id_ice_map);
+
+RTE_INIT(ice_init_log);
+static void
+ice_init_log(void)
+{
+	ice_logtype_init = rte_log_register("pmd.ice.init");
+	if (ice_logtype_init >= 0)
+		rte_log_set_level(ice_logtype_init, RTE_LOG_NOTICE);
+	ice_logtype_driver = rte_log_register("pmd.ice.driver");
+	if (ice_logtype_driver >= 0)
+		rte_log_set_level(ice_logtype_driver, RTE_LOG_NOTICE);
+}
+
+static void
+ice_dev_close(struct rte_eth_dev *dev)
+{
+	struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+	struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+	ICE_PROC_SECONDARY_CHECK_NO_ERR;
+
+	ice_res_pool_destroy(&pf->msix_pool);
+	ice_release_vsi(pf->main_vsi);
+
+	ice_shutdown_all_ctrlq(hw);
+}
diff --git a/drivers/net/ice/ice_ethdev.h b/drivers/net/ice/ice_ethdev.h
new file mode 100644
index 0000000..bc2d6f2
--- /dev/null
+++ b/drivers/net/ice/ice_ethdev.h
@@ -0,0 +1,348 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _ICE_ETHDEV_H_
+#define _ICE_ETHDEV_H_
+
+#include <rte_kvargs.h>
+
+#include "base/ice_common.h"
+#include "base/ice_adminq_cmd.h"
+
+#define ICE_VLAN_TAG_SIZE        4
+
+#define ICE_ADMINQ_LEN               32
+#define ICE_SBIOQ_LEN                32
+#define ICE_MAILBOXQ_LEN             32
+#define ICE_ADMINQ_BUF_SZ            4096
+#define ICE_SBIOQ_BUF_SZ             4096
+#define ICE_MAILBOXQ_BUF_SZ          4096
+/* Number of queues per TC should be one of 1, 2, 4, 8, 16, 32, 64 */
+#define ICE_MAX_Q_PER_TC         64
+#define ICE_NUM_DESC_DEFAULT     512
+#define ICE_BUF_SIZE_MIN         1024
+#define ICE_FRAME_SIZE_MAX       9728
+#define ICE_QUEUE_BASE_ADDR_UNIT 128
+/* number of VSIs and queue default setting */
+#define ICE_MAX_QP_NUM_PER_VF    16
+#define ICE_DEFAULT_QP_NUM_FDIR  1
+#define ICE_UINT32_BIT_SIZE      (CHAR_BIT * sizeof(uint32_t))
+#define ICE_VFTA_SIZE            (4096 / ICE_UINT32_BIT_SIZE)
+/* Maximun number of MAC addresses */
+#define ICE_NUM_MACADDR_MAX       64
+/* Maximum number of VFs */
+#define ICE_MAX_VF               128
+#define ICE_MAX_INTR_QUEUE_NUM   256
+
+#define ICE_MISC_VEC_ID          RTE_INTR_VEC_ZERO_OFFSET
+#define ICE_RX_VEC_ID            RTE_INTR_VEC_RXTX_OFFSET
+
+#define ICE_MAX_PKT_TYPE  1024
+
+/**
+ * vlan_id is a 12 bit number.
+ * The VFTA array is actually a 4096 bit array, 128 of 32bit elements.
+ * 2^5 = 32. The val of lower 5 bits specifies the bit in the 32bit element.
+ * The higher 7 bit val specifies VFTA array index.
+ */
+#define ICE_VFTA_BIT(vlan_id)    (1 << ((vlan_id) & 0x1F))
+#define ICE_VFTA_IDX(vlan_id)    ((vlan_id) >> 5)
+
+/* Default TC traffic in case DCB is not enabled */
+#define ICE_DEFAULT_TCMAP        0x1
+#define ICE_FDIR_QUEUE_ID        0
+
+/* Always assign pool 0 to main VSI, VMDQ will start from 1 */
+#define ICE_VMDQ_POOL_BASE       1
+
+#define ICE_DEFAULT_RX_FREE_THRESH  32
+#define ICE_DEFAULT_RX_PTHRESH      8
+#define ICE_DEFAULT_RX_HTHRESH      8
+#define ICE_DEFAULT_RX_WTHRESH      0
+
+#define ICE_DEFAULT_TX_FREE_THRESH  32
+#define ICE_DEFAULT_TX_PTHRESH      32
+#define ICE_DEFAULT_TX_HTHRESH      0
+#define ICE_DEFAULT_TX_WTHRESH      0
+#define ICE_DEFAULT_TX_RSBIT_THRESH 32
+
+/* Bit shift and mask */
+#define ICE_4_BIT_WIDTH  (CHAR_BIT / 2)
+#define ICE_4_BIT_MASK   RTE_LEN2MASK(ICE_4_BIT_WIDTH, uint8_t)
+#define ICE_8_BIT_WIDTH  CHAR_BIT
+#define ICE_8_BIT_MASK   UINT8_MAX
+#define ICE_16_BIT_WIDTH (CHAR_BIT * 2)
+#define ICE_16_BIT_MASK  UINT16_MAX
+#define ICE_32_BIT_WIDTH (CHAR_BIT * 4)
+#define ICE_32_BIT_MASK  UINT32_MAX
+#define ICE_40_BIT_WIDTH (CHAR_BIT * 5)
+#define ICE_40_BIT_MASK  RTE_LEN2MASK(ICE_40_BIT_WIDTH, uint64_t)
+#define ICE_48_BIT_WIDTH (CHAR_BIT * 6)
+#define ICE_48_BIT_MASK  RTE_LEN2MASK(ICE_48_BIT_WIDTH, uint64_t)
+
+#define ICE_FLAG_RSS                   BIT_ULL(0)
+#define ICE_FLAG_DCB                   BIT_ULL(1)
+#define ICE_FLAG_VMDQ                  BIT_ULL(2)
+#define ICE_FLAG_SRIOV                 BIT_ULL(3)
+#define ICE_FLAG_HEADER_SPLIT_DISABLED BIT_ULL(4)
+#define ICE_FLAG_HEADER_SPLIT_ENABLED  BIT_ULL(5)
+#define ICE_FLAG_FDIR                  BIT_ULL(6)
+#define ICE_FLAG_VXLAN                 BIT_ULL(7)
+#define ICE_FLAG_RSS_AQ_CAPABLE        BIT_ULL(8)
+#define ICE_FLAG_VF_MAC_BY_PF          BIT_ULL(9)
+#define ICE_FLAG_ALL  (ICE_FLAG_RSS | \
+		       ICE_FLAG_DCB | \
+		       ICE_FLAG_VMDQ | \
+		       ICE_FLAG_SRIOV | \
+		       ICE_FLAG_HEADER_SPLIT_DISABLED | \
+		       ICE_FLAG_HEADER_SPLIT_ENABLED | \
+		       ICE_FLAG_FDIR | \
+		       ICE_FLAG_VXLAN | \
+		       ICE_FLAG_RSS_AQ_CAPABLE | \
+		       ICE_FLAG_VF_MAC_BY_PF)
+
+#define ICE_RSS_OFFLOAD_ALL ( \
+	ETH_RSS_FRAG_IPV4 | \
+	ETH_RSS_NONFRAG_IPV4_TCP | \
+	ETH_RSS_NONFRAG_IPV4_UDP | \
+	ETH_RSS_NONFRAG_IPV4_SCTP | \
+	ETH_RSS_NONFRAG_IPV4_OTHER | \
+	ETH_RSS_FRAG_IPV6 | \
+	ETH_RSS_NONFRAG_IPV6_TCP | \
+	ETH_RSS_NONFRAG_IPV6_UDP | \
+	ETH_RSS_NONFRAG_IPV6_SCTP | \
+	ETH_RSS_NONFRAG_IPV6_OTHER | \
+	ETH_RSS_L2_PAYLOAD)
+
+struct ice_adapter;
+
+/**
+ * MAC filter structure
+ */
+struct ice_mac_filter_info {
+	struct ether_addr mac_addr;
+};
+
+TAILQ_HEAD(ice_mac_filter_list, ice_mac_filter);
+
+/* MAC filter list structure */
+struct ice_mac_filter {
+	TAILQ_ENTRY(ice_mac_filter) next;
+	struct ice_mac_filter_info mac_info;
+};
+
+/**
+ * VLAN filter structure
+ */
+struct ice_vlan_filter_info {
+	uint16_t vlan_id;
+};
+
+TAILQ_HEAD(ice_vlan_filter_list, ice_vlan_filter);
+
+/* VLAN filter list structure */
+struct ice_vlan_filter {
+	TAILQ_ENTRY(ice_vlan_filter) next;
+	struct ice_vlan_filter_info vlan_info;
+};
+
+struct pool_entry {
+	LIST_ENTRY(pool_entry) next;
+	uint16_t base;
+	uint16_t len;
+};
+
+LIST_HEAD(res_list, pool_entry);
+
+struct ice_res_pool_info {
+	uint32_t base;              /* Resource start index */
+	uint32_t num_alloc;         /* Allocated resource number */
+	uint32_t num_free;          /* Total available resource number */
+	struct res_list alloc_list; /* Allocated resource list */
+	struct res_list free_list;  /* Available resource list */
+};
+
+TAILQ_HEAD(ice_vsi_list_head, ice_vsi_list);
+
+struct ice_vsi;
+
+/* VSI list structure */
+struct ice_vsi_list {
+	TAILQ_ENTRY(ice_vsi_list) list;
+	struct ice_vsi *vsi;
+};
+
+struct ice_rx_queue;
+struct ice_tx_queue;
+
+/**
+ * Structure that defines a VSI, associated with a adapter.
+ */
+struct ice_vsi {
+	struct ice_adapter *adapter; /* Backreference to associated adapter */
+	struct ice_aqc_vsi_props info; /* VSI properties */
+	/**
+	 * When drivers loaded, only a default main VSI exists. In case new VSI
+	 * needs to add, HW needs to know the layout that VSIs are organized.
+	 * Besides that, VSI isan element and can't switch packets, which needs
+	 * to add new component VEB to perform switching. So, a new VSI needs
+	 * to specify the the uplink VSI (Parent VSI) before created. The
+	 * uplink VSI will check whether it had a VEB to switch packets. If no,
+	 * it will try to create one. Then, uplink VSI will move the new VSI
+	 * into its' sib_vsi_list to manage all the downlink VSI.
+	 *  sib_vsi_list: the VSI list that shared the same uplink VSI.
+	 *  parent_vsi  : the uplink VSI. It's NULL for main VSI.
+	 *  veb         : the VEB associates with the VSI.
+	 */
+	struct ice_vsi_list sib_vsi_list; /* sibling vsi list */
+	struct ice_vsi *parent_vsi;
+	enum ice_vsi_type type; /* VSI types */
+	uint16_t vlan_num;       /* Total VLAN number */
+	uint16_t mac_num;        /* Total mac number */
+	struct ice_mac_filter_list mac_list; /* macvlan filter list */
+	struct ice_vlan_filter_list vlan_list; /* vlan filter list */
+	uint16_t nb_qps;         /* Number of queue pairs VSI can occupy */
+	uint16_t nb_used_qps;    /* Number of queue pairs VSI uses */
+	uint16_t max_macaddrs;   /* Maximum number of MAC addresses */
+	uint16_t base_queue;     /* The first queue index of this VSI */
+	uint16_t vsi_id;         /* Hardware Id */
+	uint16_t idx;            /* vsi_handle: SW index in hw->vsi_ctx */
+	/* VF number to which the VSI connects, valid when VSI is VF type */
+	uint8_t vf_num;
+	uint16_t msix_intr; /* The MSIX interrupt binds to VSI */
+	uint16_t nb_msix;   /* The max number of msix vector */
+	uint8_t enabled_tc; /* The traffic class enabled */
+	uint8_t vlan_anti_spoof_on; /* The VLAN anti-spoofing enabled */
+	uint8_t vlan_filter_on; /* The VLAN filter enabled */
+	/* information about rss configuration */
+	u32 rss_key_size;
+	u32 rss_lut_size;
+	uint8_t *rss_lut;
+	uint8_t *rss_key;
+	struct ice_eth_stats eth_stats_offset;
+	struct ice_eth_stats eth_stats;
+	bool offset_loaded;
+};
+
+struct ice_pf {
+	struct ice_adapter *adapter; /* The adapter this PF associate to */
+	struct ice_vsi *main_vsi; /* pointer to main VSI structure */
+	/* Used for next free software vsi idx.
+	 * To save the effort, we don't recycle the index.
+	 * Suppose the indexes are more than enough.
+	 */
+	uint16_t next_vsi_idx;
+	uint16_t vsis_allocated;
+	uint16_t vsis_unallocated;
+	struct ice_res_pool_info qp_pool;    /*Queue pair pool */
+	struct ice_res_pool_info msix_pool;  /* MSIX interrupt pool */
+	struct rte_eth_dev_data *dev_data; /* Pointer to the device data */
+	struct ether_addr dev_addr; /* PF device mac address */
+	uint64_t flags; /* PF feature flags */
+	uint16_t hash_lut_size; /* The size of hash lookup table */
+	uint16_t lan_nb_qp_max;
+	uint16_t lan_nb_qps; /* The number of queue pairs of LAN */
+	struct ice_hw_port_stats stats_offset;
+	struct ice_hw_port_stats stats;
+	/* internal packet statistics, it should be excluded from the total */
+	struct ice_eth_stats internal_stats_offset;
+	struct ice_eth_stats internal_stats;
+	bool offset_loaded;
+	bool adapter_stopped;
+};
+
+/**
+ * Structure to store private data for each PF/VF instance.
+ */
+struct ice_adapter {
+	/* Common for both PF and VF */
+	struct ice_hw hw;
+	struct rte_eth_dev *eth_dev;
+	struct ice_pf pf;
+	bool rx_bulk_alloc_allowed;
+	bool tx_simple_allowed;
+	/* ptype mapping table */
+	uint32_t ptype_tbl[ICE_MAX_PKT_TYPE] __rte_cache_min_aligned;
+};
+
+struct ice_vsi_vlan_pvid_info {
+	uint16_t on;		/* Enable or disable pvid */
+	union {
+		uint16_t pvid;	/* Valid in case 'on' is set to set pvid */
+		struct {
+			/* Valid in case 'on' is cleared. 'tagged' will reject
+			 * tagged packets, while 'untagged' will reject
+			 * untagged packets.
+			 */
+			uint8_t tagged;
+			uint8_t untagged;
+		} reject;
+	} config;
+};
+
+#define ICE_DEV_TO_PCI(eth_dev) \
+	RTE_DEV_TO_PCI((eth_dev)->device)
+
+/* ICE_DEV_PRIVATE_TO */
+#define ICE_DEV_PRIVATE_TO_PF(adapter) \
+	(&((struct ice_adapter *)adapter)->pf)
+#define ICE_DEV_PRIVATE_TO_HW(adapter) \
+	(&((struct ice_adapter *)adapter)->hw)
+#define ICE_DEV_PRIVATE_TO_ADAPTER(adapter) \
+	((struct ice_adapter *)adapter)
+
+/* ICE_VSI_TO */
+#define ICE_VSI_TO_HW(vsi) \
+	(&(((struct ice_vsi *)vsi)->adapter->hw))
+#define ICE_VSI_TO_PF(vsi) \
+	(&(((struct ice_vsi *)vsi)->adapter->pf))
+#define ICE_VSI_TO_ETH_DEV(vsi) \
+	(((struct ice_vsi *)vsi)->adapter->eth_dev)
+
+/* ICE_PF_TO */
+#define ICE_PF_TO_HW(pf) \
+	(&(((struct ice_pf *)pf)->adapter->hw))
+#define ICE_PF_TO_ADAPTER(pf) \
+	((struct ice_adapter *)(pf)->adapter)
+#define ICE_PF_TO_ETH_DEV(pf) \
+	(((struct ice_pf *)pf)->adapter->eth_dev)
+
+#define ICE_PROC_SECONDARY_CHECK					\
+	do {								\
+		if (rte_eal_process_type() == RTE_PROC_SECONDARY) {	\
+			PMD_DRV_LOG(ERR,				\
+				    "Control plane functions not "	\
+				    "supported by secondary process.");	\
+			return -E_RTE_SECONDARY;			\
+		}							\
+	} while (0)
+
+#define ICE_PROC_SECONDARY_CHECK_RET_0					\
+	do {								\
+		if (rte_eal_process_type() == RTE_PROC_SECONDARY) {	\
+			PMD_DRV_LOG(ERR,				\
+				    "Control plane functions not "	\
+				    "supported by secondary process.");	\
+			return 0;					\
+		}							\
+	} while (0)
+
+#define ICE_PROC_SECONDARY_CHECK_NO_ERR					\
+	do {								\
+		if (rte_eal_process_type() == RTE_PROC_SECONDARY) {	\
+			PMD_DRV_LOG(ERR,				\
+				    "Control plane functions not "	\
+				    "supported by secondary process.");	\
+			return;						\
+		}							\
+	} while (0)
+
+static inline int
+ice_align_floor(int n)
+{
+	if (n == 0)
+		return 0;
+	return 1 << (sizeof(n) * CHAR_BIT - 1 - __builtin_clz(n));
+}
+#endif /* _ICE_ETHDEV_H_ */
diff --git a/drivers/net/ice/ice_logs.h b/drivers/net/ice/ice_logs.h
new file mode 100644
index 0000000..de2d573
--- /dev/null
+++ b/drivers/net/ice/ice_logs.h
@@ -0,0 +1,45 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _ICE_LOGS_H_
+#define _ICE_LOGS_H_
+
+extern int ice_logtype_init;
+extern int ice_logtype_driver;
+
+#define PMD_INIT_LOG(level, fmt, args...) \
+	rte_log(RTE_LOG_ ## level, ice_logtype_init, "%s(): " fmt "\n", \
+		__func__, ##args)
+
+#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>")
+
+#ifdef RTE_LIBRTE_ICE_DEBUG_RX
+#define PMD_RX_LOG(level, fmt, args...) \
+	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
+#else
+#define PMD_RX_LOG(level, fmt, args...) do { } while (0)
+#endif
+
+#ifdef RTE_LIBRTE_ICE_DEBUG_TX
+#define PMD_TX_LOG(level, fmt, args...) \
+	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
+#else
+#define PMD_TX_LOG(level, fmt, args...) do { } while (0)
+#endif
+
+#ifdef RTE_LIBRTE_ICE_DEBUG_TX_FREE
+#define PMD_TX_FREE_LOG(level, fmt, args...) \
+	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
+#else
+#define PMD_TX_FREE_LOG(level, fmt, args...) do { } while (0)
+#endif
+
+#define PMD_DRV_LOG_RAW(level, fmt, args...) \
+	rte_log(RTE_LOG_ ## level, ice_logtype_driver, "%s(): " fmt, \
+		__func__, ## args)
+
+#define PMD_DRV_LOG(level, fmt, args...) \
+	PMD_DRV_LOG_RAW(level, fmt "\n", ## args)
+
+#endif /* _ICE_LOGS_H_ */
diff --git a/drivers/net/ice/ice_rxtx.h b/drivers/net/ice/ice_rxtx.h
new file mode 100644
index 0000000..c37dc23
--- /dev/null
+++ b/drivers/net/ice/ice_rxtx.h
@@ -0,0 +1,117 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _ICE_RXTX_H_
+#define _ICE_RXTX_H_
+
+#include "ice_ethdev.h"
+
+#define ICE_ALIGN_RING_DESC  32
+#define ICE_MIN_RING_DESC    64
+#define ICE_MAX_RING_DESC    4096
+#define ICE_DMA_MEM_ALIGN    4096
+#define ICE_RING_BASE_ALIGN  128
+
+#define ICE_RX_MAX_BURST 32
+#define ICE_TX_MAX_BURST 32
+
+#define ICE_CHK_Q_ENA_COUNT        100
+#define ICE_CHK_Q_ENA_INTERVAL_US  100
+
+#ifdef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+#define ice_rx_desc ice_16byte_rx_desc
+#else
+#define ice_rx_desc ice_32byte_rx_desc
+#endif
+
+#define ICE_SUPPORT_CHAIN_NUM 5
+
+struct ice_rx_entry {
+	struct rte_mbuf *mbuf;
+};
+
+struct ice_rx_queue {
+	struct rte_mempool *mp; /* mbuf pool to populate RX ring */
+	volatile union ice_rx_desc *rx_ring;/* RX ring virtual address */
+	uint64_t rx_ring_phys_addr; /* RX ring DMA address */
+	struct ice_rx_entry *sw_ring; /* address of RX soft ring */
+	uint16_t nb_rx_desc; /* number of RX descriptors */
+	uint16_t rx_free_thresh; /* max free RX desc to hold */
+	uint16_t rx_tail; /* current value of tail */
+	uint16_t nb_rx_hold; /* number of held free RX desc */
+	struct rte_mbuf *pkt_first_seg; /**< first segment of current packet */
+	struct rte_mbuf *pkt_last_seg; /**< last segment of current packet */
+#ifdef RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC
+	uint16_t rx_nb_avail; /**< number of staged packets ready */
+	uint16_t rx_next_avail; /**< index of next staged packets */
+	uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
+	struct rte_mbuf fake_mbuf; /**< dummy mbuf */
+	struct rte_mbuf *rx_stage[ICE_RX_MAX_BURST * 2];
+#endif
+	uint8_t port_id; /* device port ID */
+	uint8_t crc_len; /* 0 if CRC stripped, 4 otherwise */
+	uint16_t queue_id; /* RX queue index */
+	uint16_t reg_idx; /* RX queue register index */
+	uint8_t drop_en; /* if not 0, set register bit */
+	volatile uint8_t *qrx_tail; /* register address of tail */
+	struct ice_vsi *vsi; /* the VSI this queue belongs to */
+	uint16_t rx_buf_len; /* The packet buffer size */
+	uint16_t rx_hdr_len; /* The header buffer size */
+	uint16_t max_pkt_len; /* Maximum packet length */
+	bool q_set; /* indicate if rx queue has been configured */
+	bool rx_deferred_start; /* don't start this queue in dev start */
+};
+
+struct ice_tx_entry {
+	struct rte_mbuf *mbuf;
+	uint16_t next_id;
+	uint16_t last_id;
+};
+
+struct ice_tx_queue {
+	uint16_t nb_tx_desc; /* number of TX descriptors */
+	uint64_t tx_ring_phys_addr; /* TX ring DMA address */
+	volatile struct ice_tx_desc *tx_ring; /* TX ring virtual address */
+	struct ice_tx_entry *sw_ring; /* virtual address of SW ring */
+	uint16_t tx_tail; /* current value of tail register */
+	volatile uint8_t *qtx_tail; /* register address of tail */
+	uint16_t nb_tx_used; /* number of TX desc used since RS bit set */
+	/* index to last TX descriptor to have been cleaned */
+	uint16_t last_desc_cleaned;
+	/* Total number of TX descriptors ready to be allocated. */
+	uint16_t nb_tx_free;
+	/* Start freeing TX buffers if there are less free descriptors than
+	 * this value.
+	 */
+	uint16_t tx_free_thresh;
+	/* Number of TX descriptors to use before RS bit is set. */
+	uint16_t tx_rs_thresh;
+	uint8_t pthresh; /**< Prefetch threshold register. */
+	uint8_t hthresh; /**< Host threshold register. */
+	uint8_t wthresh; /**< Write-back threshold reg. */
+	uint8_t port_id; /* Device port identifier. */
+	uint16_t queue_id; /* TX queue index. */
+	uint32_t q_teid; /* TX schedule node id. */
+	uint16_t reg_idx;
+	uint64_t offloads;
+	struct ice_vsi *vsi; /* the VSI this queue belongs to */
+	uint16_t tx_next_dd;
+	uint16_t tx_next_rs;
+	bool tx_deferred_start; /* don't start this queue in dev start */
+	bool q_set; /* indicate if tx queue has been configured */
+};
+
+/* Offload features */
+union ice_tx_offload {
+	uint64_t data;
+	struct {
+		uint64_t l2_len:7; /* L2 (MAC) Header Length. */
+		uint64_t l3_len:9; /* L3 (IP) Header Length. */
+		uint64_t l4_len:8; /* L4 Header Length. */
+		uint64_t tso_segsz:16; /* TCP TSO segment size */
+		uint64_t outer_l2_len:8; /* outer L2 Header Length */
+		uint64_t outer_l3_len:16; /* outer L3 Header Length */
+	};
+};
+#endif /* _ICE_RXTX_H_ */
diff --git a/drivers/net/ice/rte_pmd_ice_version.map b/drivers/net/ice/rte_pmd_ice_version.map
new file mode 100644
index 0000000..7b23b60
--- /dev/null
+++ b/drivers/net/ice/rte_pmd_ice_version.map
@@ -0,0 +1,4 @@ 
+DPDK_19.02 {
+
+	local: *;
+};
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 5699d97..02e8b6f 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -163,6 +163,7 @@  _LDLIBS-$(CONFIG_RTE_LIBRTE_ENIC_PMD)       += -lrte_pmd_enic
 _LDLIBS-$(CONFIG_RTE_LIBRTE_FM10K_PMD)      += -lrte_pmd_fm10k
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE)   += -lrte_pmd_failsafe
 _LDLIBS-$(CONFIG_RTE_LIBRTE_I40E_PMD)       += -lrte_pmd_i40e
+_LDLIBS-$(CONFIG_RTE_LIBRTE_ICE_PMD)        += -lrte_pmd_ice
 _LDLIBS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD)      += -lrte_pmd_ixgbe
 ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_KNI)        += -lrte_pmd_kni