diff mbox

[dpdk-dev,v7,09/10] igb: enable rx queue interrupts for PF

Message ID 1430804386-28949-10-git-send-email-cunming.liang@intel.com (mailing list archive)
State Superseded, archived
Headers show

Commit Message

Cunming Liang May 5, 2015, 5:39 a.m. UTC
The patch does below for igb PF:
- Setup NIC to generate MSI-X interrupts
- Set the IVAR register to map interrupt causes to vectors
- Implement interrupt enable/disable functions

Signed-off-by: Danny Zhou <danny.zhou@intel.com>
Signed-off-by: Cunming Liang <cunming.liang@intel.com>
---
v7 changes
 - add condition check when intr vector is not enabled

v6 changes
 - fill queue-vector mapping table

v5 changes
 - Rebase the patchset onto the HEAD

v3 changes
 - Remove unnecessary variables in e1000_mac_info
 - Remove spinlok from PMD

v2 changes
 - Consolidate review comments related to coding style

 lib/librte_pmd_e1000/e1000_ethdev.h |   3 +
 lib/librte_pmd_e1000/igb_ethdev.c   | 256 ++++++++++++++++++++++++++++++++----
 2 files changed, 234 insertions(+), 25 deletions(-)

Comments

Stephen Hemminger May 5, 2015, 11:16 p.m. UTC | #1
On Tue,  5 May 2015 13:39:45 +0800
Cunming Liang <cunming.liang@intel.com> wrote:

> The patch does below for igb PF:
> - Setup NIC to generate MSI-X interrupts
> - Set the IVAR register to map interrupt causes to vectors
> - Implement interrupt enable/disable functions
> 
> Signed-off-by: Danny Zhou <danny.zhou@intel.com>
> Signed-off-by: Cunming Liang <cunming.liang@intel.com>

What about E1000?

This only usable if it works on all devices.
Cunming Liang May 11, 2015, 5:05 a.m. UTC | #2
On 5/6/2015 7:16 AM, Stephen Hemminger wrote:
> On Tue,  5 May 2015 13:39:45 +0800
> Cunming Liang <cunming.liang@intel.com> wrote:
>
>> The patch does below for igb PF:
>> - Setup NIC to generate MSI-X interrupts
>> - Set the IVAR register to map interrupt causes to vectors
>> - Implement interrupt enable/disable functions
>>
>> Signed-off-by: Danny Zhou <danny.zhou@intel.com>
>> Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> What about E1000?
>
> This only usable if it works on all devices.
[LCM] Agree with you, will send separate patch for e1000 after the patch 
series close.
Stephen Hemminger May 28, 2015, 9:25 p.m. UTC | #3
On Tue,  5 May 2015 13:39:45 +0800
Cunming Liang <cunming.liang@intel.com> wrote:

> +		pci_dev->intr_handle.intr_vec =
> +			rte_zmalloc("intr_vec",
> +				    dev_info.max_rx_queues * sizeof(int), 0);
> +	

This and other drivers should be using rte_zmalloc_socket to ensure
that the intr_vec table is allocated on the same NUMA node as the hardware.
diff mbox

Patch

diff --git a/lib/librte_pmd_e1000/e1000_ethdev.h b/lib/librte_pmd_e1000/e1000_ethdev.h
index c451faa..13c4cad 100644
--- a/lib/librte_pmd_e1000/e1000_ethdev.h
+++ b/lib/librte_pmd_e1000/e1000_ethdev.h
@@ -108,6 +108,9 @@ 
 	ETH_RSS_IPV6_TCP_EX | \
 	ETH_RSS_IPV6_UDP_EX)
 
+/* maximum number of other interrupts besides Rx & Tx interrupts */
+#define E1000_MAX_OTHER_INTR		1
+
 /* structure for interrupt relative data */
 struct e1000_interrupt {
 	uint32_t flags;
diff --git a/lib/librte_pmd_e1000/igb_ethdev.c b/lib/librte_pmd_e1000/igb_ethdev.c
index 4415155..d7ec696 100644
--- a/lib/librte_pmd_e1000/igb_ethdev.c
+++ b/lib/librte_pmd_e1000/igb_ethdev.c
@@ -96,6 +96,7 @@  static int  eth_igb_flow_ctrl_get(struct rte_eth_dev *dev,
 static int  eth_igb_flow_ctrl_set(struct rte_eth_dev *dev,
 				struct rte_eth_fc_conf *fc_conf);
 static int eth_igb_lsc_interrupt_setup(struct rte_eth_dev *dev);
+static int eth_igb_rxq_interrupt_setup(struct rte_eth_dev *dev);
 static int eth_igb_interrupt_get_status(struct rte_eth_dev *dev);
 static int eth_igb_interrupt_action(struct rte_eth_dev *dev);
 static void eth_igb_interrupt_handler(struct rte_intr_handle *handle,
@@ -194,6 +195,16 @@  static int eth_igb_filter_ctrl(struct rte_eth_dev *dev,
 		     enum rte_filter_op filter_op,
 		     void *arg);
 
+static int eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev,
+					uint16_t queue_id);
+static int eth_igb_rx_queue_intr_disable(struct rte_eth_dev *dev,
+					uint16_t queue_id);
+static void eth_igb_assign_msix_vector(struct e1000_hw *hw, int8_t direction,
+				uint8_t queue, uint8_t msix_vector);
+static void eth_igb_configure_msix_intr(struct rte_eth_dev *dev);
+static void eth_igb_write_ivar(struct e1000_hw *hw, uint8_t msix_vector,
+				uint8_t index, uint8_t offset);
+
 /*
  * Define VF Stats MACRO for Non "cleared on read" register
  */
@@ -253,6 +264,8 @@  static const struct eth_dev_ops eth_igb_ops = {
 	.vlan_tpid_set        = eth_igb_vlan_tpid_set,
 	.vlan_offload_set     = eth_igb_vlan_offload_set,
 	.rx_queue_setup       = eth_igb_rx_queue_setup,
+	.rx_queue_intr_enable = eth_igb_rx_queue_intr_enable,
+	.rx_queue_intr_disable = eth_igb_rx_queue_intr_disable,
 	.rx_queue_release     = eth_igb_rx_queue_release,
 	.rx_queue_count       = eth_igb_rx_queue_count,
 	.rx_descriptor_done   = eth_igb_rx_descriptor_done,
@@ -463,6 +476,7 @@  eth_igb_dev_init(struct rte_eth_dev *eth_dev)
 	struct e1000_filter_info *filter_info =
 		E1000_DEV_PRIVATE_TO_FILTER_INFO(eth_dev->data->dev_private);
 	uint32_t ctrl_ext;
+	struct rte_eth_dev_info dev_info;
 
 	pci_dev = eth_dev->pci_dev;
 	eth_dev->dev_ops = &eth_igb_ops;
@@ -584,6 +598,23 @@  eth_igb_dev_init(struct rte_eth_dev *eth_dev)
 		     eth_dev->data->port_id, pci_dev->id.vendor_id,
 		     pci_dev->id.device_id);
 
+	/* set max interrupt vfio request */
+	memset(&dev_info, 0, sizeof(dev_info));
+	eth_igb_infos_get(eth_dev, &dev_info);
+
+	if (pci_dev->intr_handle.vec_en) {
+		pci_dev->intr_handle.max_intr = dev_info.max_rx_queues +
+			E1000_MAX_OTHER_INTR;
+		pci_dev->intr_handle.intr_vec =
+			rte_zmalloc("intr_vec",
+				    dev_info.max_rx_queues * sizeof(int), 0);
+		if (pci_dev->intr_handle.intr_vec == NULL) {
+			PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues"
+				     " intr_vec\n", dev_info.max_rx_queues);
+			return -ENOMEM;
+		}
+	}
+
 	rte_intr_callback_register(&(pci_dev->intr_handle),
 		eth_igb_interrupt_handler, (void *)eth_dev);
 
@@ -752,7 +783,7 @@  eth_igb_start(struct rte_eth_dev *dev)
 {
 	struct e1000_hw *hw =
 		E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-	int ret, i, mask;
+	int ret, mask;
 	uint32_t ctrl_ext;
 
 	PMD_INIT_FUNC_TRACE();
@@ -792,6 +823,9 @@  eth_igb_start(struct rte_eth_dev *dev)
 	/* configure PF module if SRIOV enabled */
 	igb_pf_host_configure(dev);
 
+	/* confiugre msix for rx interrupt */
+	eth_igb_configure_msix_intr(dev);
+
 	/* Configure for OS presence */
 	igb_init_manageability(hw);
 
@@ -819,33 +853,9 @@  eth_igb_start(struct rte_eth_dev *dev)
 		igb_vmdq_vlan_hw_filter_enable(dev);
 	}
 
-	/*
-	 * Configure the Interrupt Moderation register (EITR) with the maximum
-	 * possible value (0xFFFF) to minimize "System Partial Write" issued by
-	 * spurious [DMA] memory updates of RX and TX ring descriptors.
-	 *
-	 * With a EITR granularity of 2 microseconds in the 82576, only 7/8
-	 * spurious memory updates per second should be expected.
-	 * ((65535 * 2) / 1000.1000 ~= 0.131 second).
-	 *
-	 * Because interrupts are not used at all, the MSI-X is not activated
-	 * and interrupt moderation is controlled by EITR[0].
-	 *
-	 * Note that having [almost] disabled memory updates of RX and TX ring
-	 * descriptors through the Interrupt Moderation mechanism, memory
-	 * updates of ring descriptors are now moderated by the configurable
-	 * value of Write-Back Threshold registers.
-	 */
 	if ((hw->mac.type == e1000_82576) || (hw->mac.type == e1000_82580) ||
 		(hw->mac.type == e1000_i350) || (hw->mac.type == e1000_i210) ||
 		(hw->mac.type == e1000_i211)) {
-		uint32_t ivar;
-
-		/* Enable all RX & TX queues in the IVAR registers */
-		ivar = (uint32_t) ((E1000_IVAR_VALID << 16) | E1000_IVAR_VALID);
-		for (i = 0; i < 8; i++)
-			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, i, ivar);
-
 		/* Configure EITR with the maximum possible value (0xFFFF) */
 		E1000_WRITE_REG(hw, E1000_EITR(0), 0xFFFF);
 	}
@@ -899,6 +909,10 @@  eth_igb_start(struct rte_eth_dev *dev)
 	if (dev->data->dev_conf.intr_conf.lsc != 0)
 		ret = eth_igb_lsc_interrupt_setup(dev);
 
+	/* check if rxq interrupt is enabled */
+	if (dev->data->dev_conf.intr_conf.rxq != 0)
+		eth_igb_rxq_interrupt_setup(dev);
+
 	/* resume enabled intr since hw reset */
 	igb_intr_enable(dev);
 
@@ -987,6 +1001,7 @@  eth_igb_close(struct rte_eth_dev *dev)
 {
 	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct rte_eth_link link;
+	struct rte_pci_device *pci_dev;
 
 	eth_igb_stop(dev);
 	e1000_phy_hw_reset(hw);
@@ -1004,6 +1019,12 @@  eth_igb_close(struct rte_eth_dev *dev)
 
 	igb_dev_clear_queues(dev);
 
+	pci_dev = dev->pci_dev;
+	if (pci_dev->intr_handle.intr_vec) {
+		rte_free(pci_dev->intr_handle.intr_vec);
+		pci_dev->intr_handle.intr_vec = NULL;
+	}
+
 	memset(&link, 0, sizeof(link));
 	rte_igb_dev_atomic_write_link_status(dev, &link);
 }
@@ -1828,6 +1849,34 @@  eth_igb_lsc_interrupt_setup(struct rte_eth_dev *dev)
 }
 
 /*
+ * It clears the interrupt causes and enables the interrupt.
+ * It will be called once only during nic initialized.
+ *
+ * @param dev
+ *  Pointer to struct rte_eth_dev.
+ *
+ * @return
+ *  - On success, zero.
+ *  - On failure, a negative value.
+ */
+static int eth_igb_rxq_interrupt_setup(struct rte_eth_dev *dev)
+{
+	uint32_t mask, regval;
+	struct e1000_hw *hw =
+		E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct rte_eth_dev_info dev_info;
+
+	memset(&dev_info, 0, sizeof(dev_info));
+	eth_igb_infos_get(dev, &dev_info);
+
+	mask = 0xFFFFFFFF >> (32 - dev_info.max_rx_queues);
+	regval = E1000_READ_REG(hw, E1000_EIMS);
+	E1000_WRITE_REG(hw, E1000_EIMS, regval | mask);
+
+	return 0;
+}
+
+/*
  * It reads ICR and gets interrupt causes, check it and set a bit flag
  * to update link status.
  *
@@ -3652,5 +3701,162 @@  static struct rte_driver pmd_igbvf_drv = {
 	.init = rte_igbvf_pmd_init,
 };
 
+static int
+eth_igb_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+	struct e1000_hw *hw =
+		E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	uint32_t mask = 1 << queue_id;
+
+	E1000_WRITE_REG(hw, E1000_EIMC, mask);
+	E1000_WRITE_FLUSH(hw);
+
+	return 0;
+}
+
+static int
+eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+	struct e1000_hw *hw =
+		E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	uint32_t mask = 1 << queue_id;
+	uint32_t regval;
+
+	regval = E1000_READ_REG(hw, E1000_EIMS);
+	E1000_WRITE_REG(hw, E1000_EIMS, regval | mask);
+	E1000_WRITE_FLUSH(hw);
+
+	return 0;
+}
+
+static void
+eth_igb_write_ivar(struct e1000_hw *hw, uint8_t  msix_vector,
+			uint8_t index, uint8_t offset)
+{
+	uint32_t val = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
+
+	/* clear bits */
+	val &= ~((uint32_t)0xFF << offset);
+
+	/* write vector and valid bit */
+	val |= (msix_vector | E1000_IVAR_VALID) << offset;
+
+	E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, val);
+}
+
+static void
+eth_igb_assign_msix_vector(struct e1000_hw *hw, int8_t direction,
+				 uint8_t queue, uint8_t msix_vector)
+{
+	uint32_t tmp = 0;
+	if (hw->mac.type == e1000_82575) {
+		if (direction == 0)
+			tmp = E1000_EICR_RX_QUEUE0 << queue;
+		else if (direction == 1)
+			tmp = E1000_EICR_TX_QUEUE0 << queue;
+		E1000_WRITE_REG(hw, E1000_MSIXBM(msix_vector), tmp);
+	} else if (hw->mac.type == e1000_82576) {
+		if ((direction == 0) || (direction == 1))
+			eth_igb_write_ivar(hw, msix_vector, queue & 0x7,
+					((queue & 0x8) << 1) + 8 * direction);
+	} else if ((hw->mac.type == e1000_82580) ||
+			(hw->mac.type == e1000_i350) ||
+			(hw->mac.type == e1000_i354) ||
+			(hw->mac.type == e1000_i210) ||
+			(hw->mac.type == e1000_i211)) {
+		if ((direction == 0) || (direction == 1))
+			eth_igb_write_ivar(hw, msix_vector,
+					queue >> 1,
+					((queue & 0x1) << 4) + 8 * direction);
+	}
+}
+
+/*
+ * Sets up the hardware to generate MSI-X interrupts properly
+ * @hw
+ *  board private structure
+ */
+static void
+eth_igb_configure_msix_intr(struct rte_eth_dev *dev)
+{
+	int queue_id;
+	uint32_t tmpval, regval, intr_mask;
+	uint32_t max_rx_queues;
+	struct rte_eth_dev_info dev_info;
+	struct e1000_hw *hw =
+		E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+	int vec = 0;
+
+	/* won't configure msix register if no mapping is done
+	 * between intr vector and event fd */
+	if (!intr_handle->vec_en)
+		return;
+
+	memset(&dev_info, 0, sizeof(dev_info));
+	eth_igb_infos_get(dev, &dev_info);
+	max_rx_queues = dev_info.max_rx_queues;
+
+	/* set interrupt vector for other causes */
+	if (hw->mac.type == e1000_82575) {
+		tmpval = E1000_READ_REG(hw, E1000_CTRL_EXT);
+		/* enable MSI-X PBA support */
+		tmpval |= E1000_CTRL_EXT_PBA_CLR;
+
+		/* Auto-Mask interrupts upon ICR read */
+		tmpval |= E1000_CTRL_EXT_EIAME;
+		tmpval |= E1000_CTRL_EXT_IRCA;
+
+		E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmpval);
+
+		/* enable msix_other interrupt */
+		E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), 0, E1000_EIMS_OTHER);
+		regval = E1000_READ_REG(hw, E1000_EIAC);
+		E1000_WRITE_REG(hw, E1000_EIAC, regval | E1000_EIMS_OTHER);
+		regval = E1000_READ_REG(hw, E1000_EIAM);
+		E1000_WRITE_REG(hw, E1000_EIMS, regval | E1000_EIMS_OTHER);
+	} else if ((hw->mac.type == e1000_82576) ||
+			(hw->mac.type == e1000_82580) ||
+			(hw->mac.type == e1000_i350) ||
+			(hw->mac.type == e1000_i354) ||
+			(hw->mac.type == e1000_i210) ||
+			(hw->mac.type == e1000_i211)) {
+		/* turn on MSI-X capability first */
+		E1000_WRITE_REG(hw, E1000_GPIE, E1000_GPIE_MSIX_MODE |
+					E1000_GPIE_PBA | E1000_GPIE_EIAME |
+					E1000_GPIE_NSICR);
+
+		/* enable msix_other interrupt */
+		intr_mask = 1 << max_rx_queues;
+		regval = E1000_READ_REG(hw, E1000_EIAC);
+		E1000_WRITE_REG(hw, E1000_EIAC, regval | intr_mask);
+		regval = E1000_READ_REG(hw, E1000_EIMS);
+		E1000_WRITE_REG(hw, E1000_EIMS, regval | intr_mask);
+		tmpval = (max_rx_queues | E1000_IVAR_VALID) << 8;
+
+		E1000_WRITE_REG(hw, E1000_IVAR_MISC, tmpval);
+	}
+
+	/*
+	* use EIAM and EIAC to auto-mask and auto-clear when MSI-X interrupt
+	* is asserted, this saves a register write for every interrupt
+	*/
+	intr_mask = 0xFFFFFFFF >> (32 - max_rx_queues);
+	regval = E1000_READ_REG(hw, E1000_EIAC);
+	E1000_WRITE_REG(hw, E1000_EIAC, regval | intr_mask);
+	regval = E1000_READ_REG(hw, E1000_EIAM);
+	E1000_WRITE_REG(hw, E1000_EIAM, regval | intr_mask);
+
+	for (queue_id = 0; queue_id < dev->data->nb_rx_queues; queue_id++) {
+		eth_igb_assign_msix_vector(hw, 0, queue_id, vec);
+		intr_handle->intr_vec[queue_id] = vec;
+		if (vec < RTE_MAX_RXTX_INTR_VEC_ID)
+			vec++;
+	}
+
+	E1000_WRITE_FLUSH(hw);
+}
+
+
 PMD_REGISTER_DRIVER(pmd_igb_drv);
 PMD_REGISTER_DRIVER(pmd_igbvf_drv);