get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/5945/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 5945,
    "url": "https://patches.dpdk.org/api/patches/5945/?format=api",
    "web_url": "https://patches.dpdk.org/project/dpdk/patch/06286ff88c26b3901e5c6605c6350681d9be6652.1435607714.git.rahul.lakkireddy@chelsio.com/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<06286ff88c26b3901e5c6605c6350681d9be6652.1435607714.git.rahul.lakkireddy@chelsio.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/06286ff88c26b3901e5c6605c6350681d9be6652.1435607714.git.rahul.lakkireddy@chelsio.com",
    "date": "2015-06-29T23:28:37",
    "name": "[dpdk-dev,v4,4/9] cxgbe: add TX support for cxgbe PMD.",
    "commit_ref": null,
    "pull_url": null,
    "state": "accepted",
    "archived": true,
    "hash": "7ee3fc96908079c627ca02104e8e31c939a6d06d",
    "submitter": {
        "id": 241,
        "url": "https://patches.dpdk.org/api/people/241/?format=api",
        "name": "Rahul Lakkireddy",
        "email": "rahul.lakkireddy@chelsio.com"
    },
    "delegate": null,
    "mbox": "https://patches.dpdk.org/project/dpdk/patch/06286ff88c26b3901e5c6605c6350681d9be6652.1435607714.git.rahul.lakkireddy@chelsio.com/mbox/",
    "series": [],
    "comments": "https://patches.dpdk.org/api/patches/5945/comments/",
    "check": "pending",
    "checks": "https://patches.dpdk.org/api/patches/5945/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@dpdk.org",
        "Delivered-To": "patchwork@dpdk.org",
        "Received": [
            "from [92.243.14.124] (localhost [IPv6:::1])\n\tby dpdk.org (Postfix) with ESMTP id A7087C4D0;\n\tMon, 29 Jun 2015 19:59:49 +0200 (CEST)",
            "from stargate3.asicdesigners.com (stargate.chelsio.com\n\t[67.207.112.58]) by dpdk.org (Postfix) with ESMTP id E5E92C494\n\tfor <dev@dpdk.org>; Mon, 29 Jun 2015 19:59:33 +0200 (CEST)",
            "from localhost (scalar.blr.asicdesigners.com [10.193.185.94])\n\tby stargate3.asicdesigners.com (8.13.8/8.13.8) with ESMTP id\n\tt5THxQmX003127; Mon, 29 Jun 2015 10:59:26 -0700"
        ],
        "From": "Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>",
        "To": "dev@dpdk.org",
        "Date": "Tue, 30 Jun 2015 04:58:37 +0530",
        "Message-Id": "<06286ff88c26b3901e5c6605c6350681d9be6652.1435607714.git.rahul.lakkireddy@chelsio.com>",
        "X-Mailer": "git-send-email 2.4.1",
        "In-Reply-To": [
            "<cover.1435607714.git.rahul.lakkireddy@chelsio.com>",
            "<cover.1435607714.git.rahul.lakkireddy@chelsio.com>"
        ],
        "References": [
            "<cover.1434628361.git.rahul.lakkireddy@chelsio.com>\n\t<cover.1435607714.git.rahul.lakkireddy@chelsio.com>",
            "<cover.1435607714.git.rahul.lakkireddy@chelsio.com>"
        ],
        "Cc": "Felix Marti <felix@chelsio.com>, Kumar Sanghvi <kumaras@chelsio.com>,\n\tNirranjan Kirubaharan <nirranjan@chelsio.com>",
        "Subject": "[dpdk-dev] [PATCH v4 4/9] cxgbe: add TX support for cxgbe PMD.",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "patches and discussions about DPDK <dev.dpdk.org>",
        "List-Unsubscribe": "<http://dpdk.org/ml/options/dev>,\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://dpdk.org/ml/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<http://dpdk.org/ml/listinfo/dev>,\n\t<mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "Adds TX support for the cxgbe poll mode driver.  This patch:\n\n1. Adds tx queue related eth_dev_ops.\n2. Adds tx_pkt_burst for transmitting packets.\n\nSigned-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>\nSigned-off-by: Kumar Sanghvi <kumaras@chelsio.com>\n---\nv4:\n- Remove unused code to fix clang compilation.\n\nv3:\n- No changes.\n\nv2:\n- This patch is a subset of patch 2/5 submitted in v1.\n- Few bug fixes for tx path.\n\n drivers/net/cxgbe/cxgbe_ethdev.c | 133 ++++++\n drivers/net/cxgbe/cxgbe_main.c   |   1 +\n drivers/net/cxgbe/sge.c          | 957 +++++++++++++++++++++++++++++++++++++++\n 3 files changed, 1091 insertions(+)",
    "diff": "diff --git a/drivers/net/cxgbe/cxgbe_ethdev.c b/drivers/net/cxgbe/cxgbe_ethdev.c\nindex 1c69973..b6e17e4 100644\n--- a/drivers/net/cxgbe/cxgbe_ethdev.c\n+++ b/drivers/net/cxgbe/cxgbe_ethdev.c\n@@ -85,6 +85,39 @@\n  */\n #include \"t4_pci_id_tbl.h\"\n \n+static uint16_t cxgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,\n+\t\t\t\tuint16_t nb_pkts)\n+{\n+\tstruct sge_eth_txq *txq = (struct sge_eth_txq *)tx_queue;\n+\tuint16_t pkts_sent, pkts_remain;\n+\tuint16_t total_sent = 0;\n+\tint ret = 0;\n+\n+\tCXGBE_DEBUG_TX(adapter, \"%s: txq = %p; tx_pkts = %p; nb_pkts = %d\\n\",\n+\t\t       __func__, txq, tx_pkts, nb_pkts);\n+\n+\tt4_os_lock(&txq->txq_lock);\n+\t/* free up desc from already completed tx */\n+\treclaim_completed_tx(&txq->q);\n+\twhile (total_sent < nb_pkts) {\n+\t\tpkts_remain = nb_pkts - total_sent;\n+\n+\t\tfor (pkts_sent = 0; pkts_sent < pkts_remain; pkts_sent++) {\n+\t\t\tret = t4_eth_xmit(txq, tx_pkts[total_sent + pkts_sent]);\n+\t\t\tif (ret < 0)\n+\t\t\t\tbreak;\n+\t\t}\n+\t\tif (!pkts_sent)\n+\t\t\tbreak;\n+\t\ttotal_sent += pkts_sent;\n+\t\t/* reclaim as much as possible */\n+\t\treclaim_completed_tx(&txq->q);\n+\t}\n+\n+\tt4_os_unlock(&txq->txq_lock);\n+\treturn total_sent;\n+}\n+\n static uint16_t cxgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,\n \t\t\t\tuint16_t nb_pkts)\n {\n@@ -131,8 +164,11 @@ static void cxgbe_dev_info_get(struct rte_eth_dev *eth_dev,\n \tdevice_info->reta_size = pi->rss_size;\n }\n \n+static int cxgbe_dev_tx_queue_start(struct rte_eth_dev *eth_dev,\n+\t\t\t\t    uint16_t tx_queue_id);\n static int cxgbe_dev_rx_queue_start(struct rte_eth_dev *eth_dev,\n \t\t\t\t    uint16_t tx_queue_id);\n+static void cxgbe_dev_tx_queue_release(void *q);\n static void cxgbe_dev_rx_queue_release(void *q);\n \n static int cxgbe_dev_configure(struct rte_eth_dev *eth_dev)\n@@ -157,6 +193,98 @@ static int cxgbe_dev_configure(struct rte_eth_dev *eth_dev)\n \treturn 0;\n }\n \n+static int cxgbe_dev_tx_queue_start(struct rte_eth_dev *eth_dev,\n+\t\t\t\t    uint16_t tx_queue_id)\n+{\n+\tstruct sge_eth_txq *txq = (struct sge_eth_txq *)\n+\t\t\t\t  (eth_dev->data->tx_queues[tx_queue_id]);\n+\n+\tdev_debug(NULL, \"%s: tx_queue_id = %d\\n\", __func__, tx_queue_id);\n+\n+\treturn t4_sge_eth_txq_start(txq);\n+}\n+\n+static int cxgbe_dev_tx_queue_stop(struct rte_eth_dev *eth_dev,\n+\t\t\t\t   uint16_t tx_queue_id)\n+{\n+\tstruct sge_eth_txq *txq = (struct sge_eth_txq *)\n+\t\t\t\t  (eth_dev->data->tx_queues[tx_queue_id]);\n+\n+\tdev_debug(NULL, \"%s: tx_queue_id = %d\\n\", __func__, tx_queue_id);\n+\n+\treturn t4_sge_eth_txq_stop(txq);\n+}\n+\n+static int cxgbe_dev_tx_queue_setup(struct rte_eth_dev *eth_dev,\n+\t\t\t\t    uint16_t queue_idx,\tuint16_t nb_desc,\n+\t\t\t\t    unsigned int socket_id,\n+\t\t\t\t    const struct rte_eth_txconf *tx_conf)\n+{\n+\tstruct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);\n+\tstruct adapter *adapter = pi->adapter;\n+\tstruct sge *s = &adapter->sge;\n+\tstruct sge_eth_txq *txq = &s->ethtxq[pi->first_qset + queue_idx];\n+\tint err = 0;\n+\tunsigned int temp_nb_desc;\n+\n+\tRTE_SET_USED(tx_conf);\n+\n+\tdev_debug(adapter, \"%s: eth_dev->data->nb_tx_queues = %d; queue_idx = %d; nb_desc = %d; socket_id = %d; pi->first_qset = %u\\n\",\n+\t\t  __func__, eth_dev->data->nb_tx_queues, queue_idx, nb_desc,\n+\t\t  socket_id, pi->first_qset);\n+\n+\t/*  Free up the existing queue  */\n+\tif (eth_dev->data->tx_queues[queue_idx]) {\n+\t\tcxgbe_dev_tx_queue_release(eth_dev->data->tx_queues[queue_idx]);\n+\t\teth_dev->data->tx_queues[queue_idx] = NULL;\n+\t}\n+\n+\teth_dev->data->tx_queues[queue_idx] = (void *)txq;\n+\n+\t/* Sanity Checking\n+\t *\n+\t * nb_desc should be > 1023 and <= CXGBE_MAX_RING_DESC_SIZE\n+\t */\n+\ttemp_nb_desc = nb_desc;\n+\tif (nb_desc < CXGBE_MIN_RING_DESC_SIZE) {\n+\t\tdev_warn(adapter, \"%s: number of descriptors must be >= %d. Using default [%d]\\n\",\n+\t\t\t __func__, CXGBE_MIN_RING_DESC_SIZE,\n+\t\t\t CXGBE_DEFAULT_TX_DESC_SIZE);\n+\t\ttemp_nb_desc = CXGBE_DEFAULT_TX_DESC_SIZE;\n+\t} else if (nb_desc > CXGBE_MAX_RING_DESC_SIZE) {\n+\t\tdev_err(adapter, \"%s: number of descriptors must be between %d and %d inclusive. Default [%d]\\n\",\n+\t\t\t__func__, CXGBE_MIN_RING_DESC_SIZE,\n+\t\t\tCXGBE_MAX_RING_DESC_SIZE, CXGBE_DEFAULT_TX_DESC_SIZE);\n+\t\treturn -(EINVAL);\n+\t}\n+\n+\ttxq->q.size = temp_nb_desc;\n+\n+\terr = t4_sge_alloc_eth_txq(adapter, txq, eth_dev, queue_idx,\n+\t\t\t\t   s->fw_evtq.cntxt_id, socket_id);\n+\n+\tdev_debug(adapter, \"%s: txq->q.cntxt_id= %d err = %d\\n\",\n+\t\t  __func__, txq->q.cntxt_id, err);\n+\n+\treturn err;\n+}\n+\n+static void cxgbe_dev_tx_queue_release(void *q)\n+{\n+\tstruct sge_eth_txq *txq = (struct sge_eth_txq *)q;\n+\n+\tif (txq) {\n+\t\tstruct port_info *pi = (struct port_info *)\n+\t\t\t\t       (txq->eth_dev->data->dev_private);\n+\t\tstruct adapter *adap = pi->adapter;\n+\n+\t\tdev_debug(adapter, \"%s: pi->port_id = %d; tx_queue_id = %d\\n\",\n+\t\t\t  __func__, pi->port_id, txq->q.cntxt_id);\n+\n+\t\tt4_sge_eth_txq_release(adap, txq);\n+\t}\n+}\n+\n static int cxgbe_dev_rx_queue_start(struct rte_eth_dev *eth_dev,\n \t\t\t\t    uint16_t rx_queue_id)\n {\n@@ -264,6 +392,10 @@ static void cxgbe_dev_rx_queue_release(void *q)\n static struct eth_dev_ops cxgbe_eth_dev_ops = {\n \t.dev_configure\t\t= cxgbe_dev_configure,\n \t.dev_infos_get\t\t= cxgbe_dev_info_get,\n+\t.tx_queue_setup         = cxgbe_dev_tx_queue_setup,\n+\t.tx_queue_start\t\t= cxgbe_dev_tx_queue_start,\n+\t.tx_queue_stop\t\t= cxgbe_dev_tx_queue_stop,\n+\t.tx_queue_release\t= cxgbe_dev_tx_queue_release,\n \t.rx_queue_setup         = cxgbe_dev_rx_queue_setup,\n \t.rx_queue_start\t\t= cxgbe_dev_rx_queue_start,\n \t.rx_queue_stop\t\t= cxgbe_dev_rx_queue_stop,\n@@ -286,6 +418,7 @@ static int eth_cxgbe_dev_init(struct rte_eth_dev *eth_dev)\n \n \teth_dev->dev_ops = &cxgbe_eth_dev_ops;\n \teth_dev->rx_pkt_burst = &cxgbe_recv_pkts;\n+\teth_dev->tx_pkt_burst = &cxgbe_xmit_pkts;\n \n \t/* for secondary processes, we don't initialise any further as primary\n \t * has already done this work.\ndiff --git a/drivers/net/cxgbe/cxgbe_main.c b/drivers/net/cxgbe/cxgbe_main.c\nindex abcef6b..3029b57 100644\n--- a/drivers/net/cxgbe/cxgbe_main.c\n+++ b/drivers/net/cxgbe/cxgbe_main.c\n@@ -1005,6 +1005,7 @@ allocate_mac:\n \t\tpi->eth_dev->data->dev_private = pi;\n \t\tpi->eth_dev->driver = adapter->eth_dev->driver;\n \t\tpi->eth_dev->dev_ops = adapter->eth_dev->dev_ops;\n+\t\tpi->eth_dev->tx_pkt_burst = adapter->eth_dev->tx_pkt_burst;\n \t\tpi->eth_dev->rx_pkt_burst = adapter->eth_dev->rx_pkt_burst;\n \t\tTAILQ_INIT(&pi->eth_dev->link_intr_cbs);\n \ndiff --git a/drivers/net/cxgbe/sge.c b/drivers/net/cxgbe/sge.c\nindex c7abd8d..4f4fdd6 100644\n--- a/drivers/net/cxgbe/sge.c\n+++ b/drivers/net/cxgbe/sge.c\n@@ -68,6 +68,9 @@\n #include \"t4_msg.h\"\n #include \"cxgbe.h\"\n \n+static inline void ship_tx_pkt_coalesce_wr(struct adapter *adap,\n+\t\t\t\t\t   struct sge_eth_txq *txq);\n+\n /*\n  * Max number of Rx buffers we replenish at a time.\n  */\n@@ -76,6 +79,12 @@\n #define NOMEM_TMR_IDX (SGE_NTIMERS - 1)\n \n /*\n+ * Max Tx descriptor space we allow for an Ethernet packet to be inlined\n+ * into a WR.\n+ */\n+#define MAX_IMM_TX_PKT_LEN 256\n+\n+/*\n  * Rx buffer sizes for \"usembufs\" Free List buffers (one ingress packet\n  * per mbuf buffer).  We currently only support two sizes for 1500- and\n  * 9000-byte MTUs. We could easily support more but there doesn't seem to be\n@@ -125,6 +134,81 @@ enum {\n };\n \n /**\n+ * txq_avail - return the number of available slots in a Tx queue\n+ * @q: the Tx queue\n+ *\n+ * Returns the number of descriptors in a Tx queue available to write new\n+ * packets.\n+ */\n+static inline unsigned int txq_avail(const struct sge_txq *q)\n+{\n+\treturn q->size - 1 - q->in_use;\n+}\n+\n+static int map_mbuf(struct rte_mbuf *mbuf, dma_addr_t *addr)\n+{\n+\tstruct rte_mbuf *m = mbuf;\n+\n+\tfor (; m; m = m->next, addr++) {\n+\t\t*addr = m->buf_physaddr + rte_pktmbuf_headroom(m);\n+\t\tif (*addr == 0)\n+\t\t\tgoto out_err;\n+\t}\n+\treturn 0;\n+\n+out_err:\n+\treturn -ENOMEM;\n+}\n+\n+/**\n+ * free_tx_desc - reclaims Tx descriptors and their buffers\n+ * @q: the Tx queue to reclaim descriptors from\n+ * @n: the number of descriptors to reclaim\n+ *\n+ * Reclaims Tx descriptors from an SGE Tx queue and frees the associated\n+ * Tx buffers.  Called with the Tx queue lock held.\n+ */\n+static void free_tx_desc(struct sge_txq *q, unsigned int n)\n+{\n+\tstruct tx_sw_desc *d;\n+\tunsigned int cidx = 0;\n+\n+\td = &q->sdesc[cidx];\n+\twhile (n--) {\n+\t\tif (d->mbuf) {                       /* an SGL is present */\n+\t\t\trte_pktmbuf_free(d->mbuf);\n+\t\t\td->mbuf = NULL;\n+\t\t}\n+\t\tif (d->coalesce.idx) {\n+\t\t\tint i;\n+\n+\t\t\tfor (i = 0; i < d->coalesce.idx; i++) {\n+\t\t\t\trte_pktmbuf_free(d->coalesce.mbuf[i]);\n+\t\t\t\td->coalesce.mbuf[i] = NULL;\n+\t\t\t}\n+\t\t\td->coalesce.idx = 0;\n+\t\t}\n+\t\t++d;\n+\t\tif (++cidx == q->size) {\n+\t\t\tcidx = 0;\n+\t\t\td = q->sdesc;\n+\t\t}\n+\t\tRTE_MBUF_PREFETCH_TO_FREE(&q->sdesc->mbuf->pool);\n+\t}\n+}\n+\n+static void reclaim_tx_desc(struct sge_txq *q, unsigned int n)\n+{\n+\tunsigned int cidx = q->cidx;\n+\n+\twhile (n--) {\n+\t\tif (++cidx == q->size)\n+\t\t\tcidx = 0;\n+\t}\n+\tq->cidx = cidx;\n+}\n+\n+/**\n  * fl_cap - return the capacity of a free-buffer list\n  * @fl: the FL\n  *\n@@ -375,6 +459,742 @@ static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)\n \trefill_fl(adap, fl, min(MAX_RX_REFILL, fl_cap(fl) - fl->avail));\n }\n \n+/*\n+ * Return the number of reclaimable descriptors in a Tx queue.\n+ */\n+static inline int reclaimable(const struct sge_txq *q)\n+{\n+\tint hw_cidx = ntohs(q->stat->cidx);\n+\n+\thw_cidx -= q->cidx;\n+\tif (hw_cidx < 0)\n+\t\treturn hw_cidx + q->size;\n+\treturn hw_cidx;\n+}\n+\n+/**\n+ * reclaim_completed_tx - reclaims completed Tx descriptors\n+ * @q: the Tx queue to reclaim completed descriptors from\n+ *\n+ * Reclaims Tx descriptors that the SGE has indicated it has processed.\n+ */\n+void reclaim_completed_tx(struct sge_txq *q)\n+{\n+\tunsigned int avail = reclaimable(q);\n+\n+\tdo {\n+\t\t/* reclaim as much as possible */\n+\t\treclaim_tx_desc(q, avail);\n+\t\tq->in_use -= avail;\n+\t\tavail = reclaimable(q);\n+\t} while (avail);\n+}\n+\n+/**\n+ * sgl_len - calculates the size of an SGL of the given capacity\n+ * @n: the number of SGL entries\n+ *\n+ * Calculates the number of flits needed for a scatter/gather list that\n+ * can hold the given number of entries.\n+ */\n+static inline unsigned int sgl_len(unsigned int n)\n+{\n+\t/*\n+\t * A Direct Scatter Gather List uses 32-bit lengths and 64-bit PCI DMA\n+\t * addresses.  The DSGL Work Request starts off with a 32-bit DSGL\n+\t * ULPTX header, then Length0, then Address0, then, for 1 <= i <= N,\n+\t * repeated sequences of { Length[i], Length[i+1], Address[i],\n+\t * Address[i+1] } (this ensures that all addresses are on 64-bit\n+\t * boundaries).  If N is even, then Length[N+1] should be set to 0 and\n+\t * Address[N+1] is omitted.\n+\t *\n+\t * The following calculation incorporates all of the above.  It's\n+\t * somewhat hard to follow but, briefly: the \"+2\" accounts for the\n+\t * first two flits which include the DSGL header, Length0 and\n+\t * Address0; the \"(3*(n-1))/2\" covers the main body of list entries (3\n+\t * flits for every pair of the remaining N) +1 if (n-1) is odd; and\n+\t * finally the \"+((n-1)&1)\" adds the one remaining flit needed if\n+\t * (n-1) is odd ...\n+\t */\n+\tn--;\n+\treturn (3 * n) / 2 + (n & 1) + 2;\n+}\n+\n+/**\n+ * flits_to_desc - returns the num of Tx descriptors for the given flits\n+ * @n: the number of flits\n+ *\n+ * Returns the number of Tx descriptors needed for the supplied number\n+ * of flits.\n+ */\n+static inline unsigned int flits_to_desc(unsigned int n)\n+{\n+\treturn DIV_ROUND_UP(n, 8);\n+}\n+\n+/**\n+ * is_eth_imm - can an Ethernet packet be sent as immediate data?\n+ * @m: the packet\n+ *\n+ * Returns whether an Ethernet packet is small enough to fit as\n+ * immediate data. Return value corresponds to the headroom required.\n+ */\n+static inline int is_eth_imm(const struct rte_mbuf *m)\n+{\n+\tunsigned int hdrlen = (m->ol_flags & PKT_TX_TCP_SEG) ?\n+\t\t\t      sizeof(struct cpl_tx_pkt_lso_core) : 0;\n+\n+\thdrlen += sizeof(struct cpl_tx_pkt);\n+\tif (m->pkt_len <= MAX_IMM_TX_PKT_LEN - hdrlen)\n+\t\treturn hdrlen;\n+\n+\treturn 0;\n+}\n+\n+/**\n+ * calc_tx_flits - calculate the number of flits for a packet Tx WR\n+ * @m: the packet\n+ *\n+ * Returns the number of flits needed for a Tx WR for the given Ethernet\n+ * packet, including the needed WR and CPL headers.\n+ */\n+static inline unsigned int calc_tx_flits(const struct rte_mbuf *m)\n+{\n+\tunsigned int flits;\n+\tint hdrlen;\n+\n+\t/*\n+\t * If the mbuf is small enough, we can pump it out as a work request\n+\t * with only immediate data.  In that case we just have to have the\n+\t * TX Packet header plus the mbuf data in the Work Request.\n+\t */\n+\n+\thdrlen = is_eth_imm(m);\n+\tif (hdrlen)\n+\t\treturn DIV_ROUND_UP(m->pkt_len + hdrlen, sizeof(__be64));\n+\n+\t/*\n+\t * Otherwise, we're going to have to construct a Scatter gather list\n+\t * of the mbuf body and fragments.  We also include the flits necessary\n+\t * for the TX Packet Work Request and CPL.  We always have a firmware\n+\t * Write Header (incorporated as part of the cpl_tx_pkt_lso and\n+\t * cpl_tx_pkt structures), followed by either a TX Packet Write CPL\n+\t * message or, if we're doing a Large Send Offload, an LSO CPL message\n+\t * with an embeded TX Packet Write CPL message.\n+\t */\n+\tflits = sgl_len(m->nb_segs);\n+\tif (m->tso_segsz)\n+\t\tflits += (sizeof(struct fw_eth_tx_pkt_wr) +\n+\t\t\t  sizeof(struct cpl_tx_pkt_lso_core) +\n+\t\t\t  sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64);\n+\telse\n+\t\tflits += (sizeof(struct fw_eth_tx_pkt_wr) +\n+\t\t\t  sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64);\n+\treturn flits;\n+}\n+\n+/**\n+ * write_sgl - populate a scatter/gather list for a packet\n+ * @mbuf: the packet\n+ * @q: the Tx queue we are writing into\n+ * @sgl: starting location for writing the SGL\n+ * @end: points right after the end of the SGL\n+ * @start: start offset into mbuf main-body data to include in the SGL\n+ * @addr: address of mapped region\n+ *\n+ * Generates a scatter/gather list for the buffers that make up a packet.\n+ * The caller must provide adequate space for the SGL that will be written.\n+ * The SGL includes all of the packet's page fragments and the data in its\n+ * main body except for the first @start bytes.  @sgl must be 16-byte\n+ * aligned and within a Tx descriptor with available space.  @end points\n+ * write after the end of the SGL but does not account for any potential\n+ * wrap around, i.e., @end > @sgl.\n+ */\n+static void write_sgl(struct rte_mbuf *mbuf, struct sge_txq *q,\n+\t\t      struct ulptx_sgl *sgl, u64 *end, unsigned int start,\n+\t\t      const dma_addr_t *addr)\n+{\n+\tunsigned int i, len;\n+\tstruct ulptx_sge_pair *to;\n+\tstruct rte_mbuf *m = mbuf;\n+\tunsigned int nfrags = m->nb_segs;\n+\tstruct ulptx_sge_pair buf[nfrags / 2];\n+\n+\tlen = m->data_len - start;\n+\tsgl->len0 = htonl(len);\n+\tsgl->addr0 = rte_cpu_to_be_64(addr[0]);\n+\n+\tsgl->cmd_nsge = htonl(V_ULPTX_CMD(ULP_TX_SC_DSGL) |\n+\t\t\t      V_ULPTX_NSGE(nfrags));\n+\tif (likely(--nfrags == 0))\n+\t\treturn;\n+\t/*\n+\t * Most of the complexity below deals with the possibility we hit the\n+\t * end of the queue in the middle of writing the SGL.  For this case\n+\t * only we create the SGL in a temporary buffer and then copy it.\n+\t */\n+\tto = (u8 *)end > (u8 *)q->stat ? buf : sgl->sge;\n+\n+\tfor (i = 0; nfrags >= 2; nfrags -= 2, to++) {\n+\t\tm = m->next;\n+\t\tto->len[0] = rte_cpu_to_be_32(m->data_len);\n+\t\tto->addr[0] = rte_cpu_to_be_64(addr[++i]);\n+\t\tm = m->next;\n+\t\tto->len[1] = rte_cpu_to_be_32(m->data_len);\n+\t\tto->addr[1] = rte_cpu_to_be_64(addr[++i]);\n+\t}\n+\tif (nfrags) {\n+\t\tm = m->next;\n+\t\tto->len[0] = rte_cpu_to_be_32(m->data_len);\n+\t\tto->len[1] = rte_cpu_to_be_32(0);\n+\t\tto->addr[0] = rte_cpu_to_be_64(addr[i + 1]);\n+\t}\n+\tif (unlikely((u8 *)end > (u8 *)q->stat)) {\n+\t\tunsigned int part0 = RTE_PTR_DIFF((u8 *)q->stat,\n+\t\t\t\t\t\t  (u8 *)sgl->sge);\n+\t\tunsigned int part1;\n+\n+\t\tif (likely(part0))\n+\t\t\tmemcpy(sgl->sge, buf, part0);\n+\t\tpart1 = RTE_PTR_DIFF((u8 *)end, (u8 *)q->stat);\n+\t\trte_memcpy(q->desc, RTE_PTR_ADD((u8 *)buf, part0), part1);\n+\t\tend = RTE_PTR_ADD((void *)q->desc, part1);\n+\t}\n+\tif ((uintptr_t)end & 8)           /* 0-pad to multiple of 16 */\n+\t\t*(u64 *)end = 0;\n+}\n+\n+#define IDXDIFF(head, tail, wrap) \\\n+\t((head) >= (tail) ? (head) - (tail) : (wrap) - (tail) + (head))\n+\n+#define Q_IDXDIFF(q, idx) IDXDIFF((q)->pidx, (q)->idx, (q)->size)\n+\n+/**\n+ * ring_tx_db - ring a Tx queue's doorbell\n+ * @adap: the adapter\n+ * @q: the Tx queue\n+ * @n: number of new descriptors to give to HW\n+ *\n+ * Ring the doorbel for a Tx queue.\n+ */\n+static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q)\n+{\n+\tint n = Q_IDXDIFF(q, dbidx);\n+\n+\t/*\n+\t * Make sure that all writes to the TX Descriptors are committed\n+\t * before we tell the hardware about them.\n+\t */\n+\trte_wmb();\n+\n+\t/*\n+\t * If we don't have access to the new User Doorbell (T5+), use the old\n+\t * doorbell mechanism; otherwise use the new BAR2 mechanism.\n+\t */\n+\tif (unlikely(!q->bar2_addr)) {\n+\t\tu32 val = V_PIDX(n);\n+\n+\t\t/*\n+\t\t * For T4 we need to participate in the Doorbell Recovery\n+\t\t * mechanism.\n+\t\t */\n+\t\tif (!q->db_disabled)\n+\t\t\tt4_write_reg(adap, MYPF_REG(A_SGE_PF_KDOORBELL),\n+\t\t\t\t     V_QID(q->cntxt_id) | val);\n+\t\telse\n+\t\t\tq->db_pidx_inc += n;\n+\t\tq->db_pidx = q->pidx;\n+\t} else {\n+\t\tu32 val = V_PIDX_T5(n);\n+\n+\t\t/*\n+\t\t * T4 and later chips share the same PIDX field offset within\n+\t\t * the doorbell, but T5 and later shrank the field in order to\n+\t\t * gain a bit for Doorbell Priority.  The field was absurdly\n+\t\t * large in the first place (14 bits) so we just use the T5\n+\t\t * and later limits and warn if a Queue ID is too large.\n+\t\t */\n+\t\tWARN_ON(val & F_DBPRIO);\n+\n+\t\twritel(val | V_QID(q->bar2_qid),\n+\t\t       (void *)((uintptr_t)q->bar2_addr + SGE_UDB_KDOORBELL));\n+\n+\t\t/*\n+\t\t * This Write Memory Barrier will force the write to the User\n+\t\t * Doorbell area to be flushed.  This is needed to prevent\n+\t\t * writes on different CPUs for the same queue from hitting\n+\t\t * the adapter out of order.  This is required when some Work\n+\t\t * Requests take the Write Combine Gather Buffer path (user\n+\t\t * doorbell area offset [SGE_UDB_WCDOORBELL..+63]) and some\n+\t\t * take the traditional path where we simply increment the\n+\t\t * PIDX (User Doorbell area SGE_UDB_KDOORBELL) and have the\n+\t\t * hardware DMA read the actual Work Request.\n+\t\t */\n+\t\trte_wmb();\n+\t}\n+\tq->dbidx = q->pidx;\n+}\n+\n+/*\n+ * Figure out what HW csum a packet wants and return the appropriate control\n+ * bits.\n+ */\n+static u64 hwcsum(enum chip_type chip, const struct rte_mbuf *m)\n+{\n+\tint csum_type;\n+\n+\tif (m->ol_flags & PKT_TX_IP_CKSUM) {\n+\t\tswitch (m->ol_flags & PKT_TX_L4_MASK) {\n+\t\tcase PKT_TX_TCP_CKSUM:\n+\t\t\tcsum_type = TX_CSUM_TCPIP;\n+\t\t\tbreak;\n+\t\tcase PKT_TX_UDP_CKSUM:\n+\t\t\tcsum_type = TX_CSUM_UDPIP;\n+\t\t\tbreak;\n+\t\tdefault:\n+\t\t\tgoto nocsum;\n+\t\t}\n+\t} else {\n+\t\tgoto nocsum;\n+\t}\n+\n+\tif (likely(csum_type >= TX_CSUM_TCPIP)) {\n+\t\tint hdr_len = V_TXPKT_IPHDR_LEN(m->l3_len);\n+\t\tint eth_hdr_len = m->l2_len;\n+\n+\t\tif (CHELSIO_CHIP_VERSION(chip) <= CHELSIO_T5)\n+\t\t\thdr_len |= V_TXPKT_ETHHDR_LEN(eth_hdr_len);\n+\t\telse\n+\t\t\thdr_len |= V_T6_TXPKT_ETHHDR_LEN(eth_hdr_len);\n+\t\treturn V_TXPKT_CSUM_TYPE(csum_type) | hdr_len;\n+\t}\n+nocsum:\n+\t/*\n+\t * unknown protocol, disable HW csum\n+\t * and hope a bad packet is detected\n+\t */\n+\treturn F_TXPKT_L4CSUM_DIS;\n+}\n+\n+static inline void txq_advance(struct sge_txq *q, unsigned int n)\n+{\n+\tq->in_use += n;\n+\tq->pidx += n;\n+\tif (q->pidx >= q->size)\n+\t\tq->pidx -= q->size;\n+}\n+\n+#define MAX_COALESCE_LEN 64000\n+\n+static inline int wraps_around(struct sge_txq *q, int ndesc)\n+{\n+\treturn (q->pidx + ndesc) > q->size ? 1 : 0;\n+}\n+\n+static void tx_timer_cb(void *data)\n+{\n+\tstruct adapter *adap = (struct adapter *)data;\n+\tstruct sge_eth_txq *txq = &adap->sge.ethtxq[0];\n+\tint i;\n+\n+\t/* monitor any pending tx */\n+\tfor (i = 0; i < adap->sge.max_ethqsets; i++, txq++) {\n+\t\tt4_os_lock(&txq->txq_lock);\n+\t\tif (txq->q.coalesce.idx) {\n+\t\t\tif (txq->q.coalesce.idx == txq->q.last_coal_idx &&\n+\t\t\t    txq->q.pidx == txq->q.last_pidx) {\n+\t\t\t\tship_tx_pkt_coalesce_wr(adap, txq);\n+\t\t\t} else {\n+\t\t\t\ttxq->q.last_coal_idx = txq->q.coalesce.idx;\n+\t\t\t\ttxq->q.last_pidx = txq->q.pidx;\n+\t\t\t}\n+\t\t}\n+\t\tt4_os_unlock(&txq->txq_lock);\n+\t}\n+\trte_eal_alarm_set(50, tx_timer_cb, (void *)adap);\n+}\n+\n+/**\n+ * ship_tx_pkt_coalesce_wr - finalizes and ships a coalesce WR\n+ * @ adap: adapter structure\n+ * @txq: tx queue\n+ *\n+ * writes the different fields of the pkts WR and sends it.\n+ */\n+static inline void ship_tx_pkt_coalesce_wr(struct adapter *adap,\n+\t\t\t\t\t   struct sge_eth_txq *txq)\n+{\n+\tu32 wr_mid;\n+\tstruct sge_txq *q = &txq->q;\n+\tstruct fw_eth_tx_pkts_wr *wr;\n+\tunsigned int ndesc;\n+\n+\t/* fill the pkts WR header */\n+\twr = (void *)&q->desc[q->pidx];\n+\twr->op_pkd = htonl(V_FW_WR_OP(FW_ETH_TX_PKTS_WR));\n+\n+\twr_mid = V_FW_WR_LEN16(DIV_ROUND_UP(q->coalesce.flits, 2));\n+\tndesc = flits_to_desc(q->coalesce.flits);\n+\twr->equiq_to_len16 = htonl(wr_mid);\n+\twr->plen = cpu_to_be16(q->coalesce.len);\n+\twr->npkt = q->coalesce.idx;\n+\twr->r3 = 0;\n+\twr->type = q->coalesce.type;\n+\n+\t/* zero out coalesce structure members */\n+\tq->coalesce.idx = 0;\n+\tq->coalesce.flits = 0;\n+\tq->coalesce.len = 0;\n+\n+\ttxq_advance(q, ndesc);\n+\ttxq->stats.coal_wr++;\n+\ttxq->stats.coal_pkts += wr->npkt;\n+\n+\tif (Q_IDXDIFF(q, equeidx) >= q->size / 2) {\n+\t\tq->equeidx = q->pidx;\n+\t\twr_mid |= F_FW_WR_EQUEQ;\n+\t\twr->equiq_to_len16 = htonl(wr_mid);\n+\t}\n+\tring_tx_db(adap, q);\n+}\n+\n+/**\n+ * should_tx_packet_coalesce - decides wether to coalesce an mbuf or not\n+ * @txq: tx queue where the mbuf is sent\n+ * @mbuf: mbuf to be sent\n+ * @nflits: return value for number of flits needed\n+ * @adap: adapter structure\n+ *\n+ * This function decides if a packet should be coalesced or not.\n+ */\n+static inline int should_tx_packet_coalesce(struct sge_eth_txq *txq,\n+\t\t\t\t\t    struct rte_mbuf *mbuf,\n+\t\t\t\t\t    unsigned int *nflits,\n+\t\t\t\t\t    struct adapter *adap)\n+{\n+\tstruct sge_txq *q = &txq->q;\n+\tunsigned int flits, ndesc;\n+\tunsigned char type = 0;\n+\tint credits, hw_cidx = ntohs(q->stat->cidx);\n+\tint in_use = q->pidx - hw_cidx + flits_to_desc(q->coalesce.flits);\n+\n+\t/* use coal WR type 1 when no frags are present */\n+\ttype = (mbuf->nb_segs == 1) ? 1 : 0;\n+\n+\tif (in_use < 0)\n+\t\tin_use += q->size;\n+\n+\tif (unlikely(type != q->coalesce.type && q->coalesce.idx))\n+\t\tship_tx_pkt_coalesce_wr(adap, txq);\n+\n+\t/* calculate the number of flits required for coalescing this packet\n+\t * without the 2 flits of the WR header. These are added further down\n+\t * if we are just starting in new PKTS WR. sgl_len doesn't account for\n+\t * the possible 16 bytes alignment ULP TX commands so we do it here.\n+\t */\n+\tflits = (sgl_len(mbuf->nb_segs) + 1) & ~1U;\n+\tif (type == 0)\n+\t\tflits += (sizeof(struct ulp_txpkt) +\n+\t\t\t  sizeof(struct ulptx_idata)) / sizeof(__be64);\n+\tflits += sizeof(struct cpl_tx_pkt_core) / sizeof(__be64);\n+\t*nflits = flits;\n+\n+\t/* If coalescing is on, the mbuf is added to a pkts WR */\n+\tif (q->coalesce.idx) {\n+\t\tndesc = DIV_ROUND_UP(q->coalesce.flits + flits, 8);\n+\t\tcredits = txq_avail(q) - ndesc;\n+\n+\t\t/* If we are wrapping or this is last mbuf then, send the\n+\t\t * already coalesced mbufs and let the non-coalesce pass\n+\t\t * handle the mbuf.\n+\t\t */\n+\t\tif (unlikely(credits < 0 || wraps_around(q, ndesc))) {\n+\t\t\tship_tx_pkt_coalesce_wr(adap, txq);\n+\t\t\treturn 0;\n+\t\t}\n+\n+\t\t/* If the max coalesce len or the max WR len is reached\n+\t\t * ship the WR and keep coalescing on.\n+\t\t */\n+\t\tif (unlikely((q->coalesce.len + mbuf->pkt_len >\n+\t\t\t\t\t\tMAX_COALESCE_LEN) ||\n+\t\t\t     (q->coalesce.flits + flits >\n+\t\t\t      q->coalesce.max))) {\n+\t\t\tship_tx_pkt_coalesce_wr(adap, txq);\n+\t\t\tgoto new;\n+\t\t}\n+\t\treturn 1;\n+\t}\n+\n+new:\n+\t/* start a new pkts WR, the WR header is not filled below */\n+\tflits += sizeof(struct fw_eth_tx_pkts_wr) / sizeof(__be64);\n+\tndesc = flits_to_desc(q->coalesce.flits + flits);\n+\tcredits = txq_avail(q) - ndesc;\n+\n+\tif (unlikely(credits < 0 || wraps_around(q, ndesc)))\n+\t\treturn 0;\n+\tq->coalesce.flits += 2;\n+\tq->coalesce.type = type;\n+\tq->coalesce.ptr = (unsigned char *)&q->desc[q->pidx] +\n+\t\t\t   2 * sizeof(__be64);\n+\treturn 1;\n+}\n+\n+/**\n+ * tx_do_packet_coalesce - add an mbuf to a coalesce WR\n+ * @txq: sge_eth_txq used send the mbuf\n+ * @mbuf: mbuf to be sent\n+ * @flits: flits needed for this mbuf\n+ * @adap: adapter structure\n+ * @pi: port_info structure\n+ * @addr: mapped address of the mbuf\n+ *\n+ * Adds an mbuf to be sent as part of a coalesce WR by filling a\n+ * ulp_tx_pkt command, ulp_tx_sc_imm command, cpl message and\n+ * ulp_tx_sc_dsgl command.\n+ */\n+static inline int tx_do_packet_coalesce(struct sge_eth_txq *txq,\n+\t\t\t\t\tstruct rte_mbuf *mbuf,\n+\t\t\t\t\tint flits, struct adapter *adap,\n+\t\t\t\t\tconst struct port_info *pi,\n+\t\t\t\t\tdma_addr_t *addr)\n+{\n+\tu64 cntrl, *end;\n+\tstruct sge_txq *q = &txq->q;\n+\tstruct ulp_txpkt *mc;\n+\tstruct ulptx_idata *sc_imm;\n+\tstruct cpl_tx_pkt_core *cpl;\n+\tstruct tx_sw_desc *sd;\n+\tunsigned int idx = q->coalesce.idx, len = mbuf->pkt_len;\n+\n+\tif (q->coalesce.type == 0) {\n+\t\tmc = (struct ulp_txpkt *)q->coalesce.ptr;\n+\t\tmc->cmd_dest = htonl(V_ULPTX_CMD(4) | V_ULP_TXPKT_DEST(0) |\n+\t\t\t\t     V_ULP_TXPKT_FID(adap->sge.fw_evtq.cntxt_id) |\n+\t\t\t\t     F_ULP_TXPKT_RO);\n+\t\tmc->len = htonl(DIV_ROUND_UP(flits, 2));\n+\t\tsc_imm = (struct ulptx_idata *)(mc + 1);\n+\t\tsc_imm->cmd_more = htonl(V_ULPTX_CMD(ULP_TX_SC_IMM) |\n+\t\t\t\t\t F_ULP_TX_SC_MORE);\n+\t\tsc_imm->len = htonl(sizeof(*cpl));\n+\t\tend = (u64 *)mc + flits;\n+\t\tcpl = (struct cpl_tx_pkt_core *)(sc_imm + 1);\n+\t} else {\n+\t\tend = (u64 *)q->coalesce.ptr + flits;\n+\t\tcpl = (struct cpl_tx_pkt_core *)q->coalesce.ptr;\n+\t}\n+\n+\t/* update coalesce structure for this txq */\n+\tq->coalesce.flits += flits;\n+\tq->coalesce.ptr += flits * sizeof(__be64);\n+\tq->coalesce.len += mbuf->pkt_len;\n+\n+\t/* fill the cpl message, same as in t4_eth_xmit, this should be kept\n+\t * similar to t4_eth_xmit\n+\t */\n+\tif (mbuf->ol_flags & PKT_TX_IP_CKSUM) {\n+\t\tcntrl = hwcsum(adap->params.chip, mbuf) |\n+\t\t\t       F_TXPKT_IPCSUM_DIS;\n+\t\ttxq->stats.tx_cso++;\n+\t} else {\n+\t\tcntrl = F_TXPKT_L4CSUM_DIS | F_TXPKT_IPCSUM_DIS;\n+\t}\n+\n+\tif (mbuf->ol_flags & PKT_TX_VLAN_PKT) {\n+\t\ttxq->stats.vlan_ins++;\n+\t\tcntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(mbuf->vlan_tci);\n+\t}\n+\n+\tcpl->ctrl0 = htonl(V_TXPKT_OPCODE(CPL_TX_PKT_XT) |\n+\t\t\t   V_TXPKT_INTF(pi->tx_chan) |\n+\t\t\t   V_TXPKT_PF(adap->pf));\n+\tcpl->pack = htons(0);\n+\tcpl->len = htons(len);\n+\tcpl->ctrl1 = cpu_to_be64(cntrl);\n+\twrite_sgl(mbuf, q, (struct ulptx_sgl *)(cpl + 1), end, 0,  addr);\n+\ttxq->stats.pkts++;\n+\ttxq->stats.tx_bytes += len;\n+\n+\tsd = &q->sdesc[q->pidx + (idx >> 1)];\n+\tif (!(idx & 1)) {\n+\t\tif (sd->coalesce.idx) {\n+\t\t\tint i;\n+\n+\t\t\tfor (i = 0; i < sd->coalesce.idx; i++) {\n+\t\t\t\trte_pktmbuf_free(sd->coalesce.mbuf[i]);\n+\t\t\t\tsd->coalesce.mbuf[i] = NULL;\n+\t\t\t}\n+\t\t}\n+\t}\n+\n+\t/* store pointers to the mbuf and the sgl used in free_tx_desc.\n+\t * each tx desc can hold two pointers corresponding to the value\n+\t * of ETH_COALESCE_PKT_PER_DESC\n+\t */\n+\tsd->coalesce.mbuf[idx & 1] = mbuf;\n+\tsd->coalesce.sgl[idx & 1] = (struct ulptx_sgl *)(cpl + 1);\n+\tsd->coalesce.idx = (idx & 1) + 1;\n+\n+\t/* send the coaelsced work request if max reached */\n+\tif (++q->coalesce.idx == ETH_COALESCE_PKT_NUM)\n+\t\tship_tx_pkt_coalesce_wr(adap, txq);\n+\treturn 0;\n+}\n+\n+/**\n+ * t4_eth_xmit - add a packet to an Ethernet Tx queue\n+ * @txq: the egress queue\n+ * @mbuf: the packet\n+ *\n+ * Add a packet to an SGE Ethernet Tx queue.  Runs with softirqs disabled.\n+ */\n+int t4_eth_xmit(struct sge_eth_txq *txq, struct rte_mbuf *mbuf)\n+{\n+\tconst struct port_info *pi;\n+\tstruct cpl_tx_pkt_lso_core *lso;\n+\tstruct adapter *adap;\n+\tstruct rte_mbuf *m = mbuf;\n+\tstruct fw_eth_tx_pkt_wr *wr;\n+\tstruct cpl_tx_pkt_core *cpl;\n+\tstruct tx_sw_desc *d;\n+\tdma_addr_t addr[m->nb_segs];\n+\tunsigned int flits, ndesc, cflits;\n+\tint l3hdr_len, l4hdr_len, eth_xtra_len;\n+\tint len, last_desc;\n+\tint credits;\n+\tu32 wr_mid;\n+\tu64 cntrl, *end;\n+\tbool v6;\n+\n+\t/* Reject xmit if queue is stopped */\n+\tif (unlikely(txq->flags & EQ_STOPPED))\n+\t\treturn -(EBUSY);\n+\n+\t/*\n+\t * The chip min packet length is 10 octets but play safe and reject\n+\t * anything shorter than an Ethernet header.\n+\t */\n+\tif (unlikely(m->pkt_len < ETHER_HDR_LEN)) {\n+out_free:\n+\t\trte_pktmbuf_free(m);\n+\t\treturn 0;\n+\t}\n+\n+\trte_prefetch0(&((&txq->q)->sdesc->mbuf->pool));\n+\tpi = (struct port_info *)txq->eth_dev->data->dev_private;\n+\tadap = pi->adapter;\n+\n+\tcntrl = F_TXPKT_L4CSUM_DIS | F_TXPKT_IPCSUM_DIS;\n+\t/* align the end of coalesce WR to a 512 byte boundary */\n+\ttxq->q.coalesce.max = (8 - (txq->q.pidx & 7)) * 8;\n+\n+\tif (!(m->ol_flags & PKT_TX_TCP_SEG)) {\n+\t\tif (should_tx_packet_coalesce(txq, mbuf, &cflits, adap)) {\n+\t\t\tif (unlikely(map_mbuf(mbuf, addr) < 0)) {\n+\t\t\t\tdev_warn(adap, \"%s: mapping err for coalesce\\n\",\n+\t\t\t\t\t __func__);\n+\t\t\t\ttxq->stats.mapping_err++;\n+\t\t\t\tgoto out_free;\n+\t\t\t}\n+\t\t\treturn tx_do_packet_coalesce(txq, mbuf, cflits, adap,\n+\t\t\t\t\t\t     pi, addr);\n+\t\t} else {\n+\t\t\treturn -EBUSY;\n+\t\t}\n+\t}\n+\n+\tif (txq->q.coalesce.idx)\n+\t\tship_tx_pkt_coalesce_wr(adap, txq);\n+\n+\tflits = calc_tx_flits(m);\n+\tndesc = flits_to_desc(flits);\n+\tcredits = txq_avail(&txq->q) - ndesc;\n+\n+\tif (unlikely(credits < 0)) {\n+\t\tdev_debug(adap, \"%s: Tx ring %u full; credits = %d\\n\",\n+\t\t\t  __func__, txq->q.cntxt_id, credits);\n+\t\treturn -EBUSY;\n+\t}\n+\n+\tif (unlikely(map_mbuf(m, addr) < 0)) {\n+\t\ttxq->stats.mapping_err++;\n+\t\tgoto out_free;\n+\t}\n+\n+\twr_mid = V_FW_WR_LEN16(DIV_ROUND_UP(flits, 2));\n+\tif (Q_IDXDIFF(&txq->q, equeidx)  >= 64) {\n+\t\ttxq->q.equeidx = txq->q.pidx;\n+\t\twr_mid |= F_FW_WR_EQUEQ;\n+\t}\n+\n+\twr = (void *)&txq->q.desc[txq->q.pidx];\n+\twr->equiq_to_len16 = htonl(wr_mid);\n+\twr->r3 = rte_cpu_to_be_64(0);\n+\tend = (u64 *)wr + flits;\n+\n+\tlen = 0;\n+\tlen += sizeof(*cpl);\n+\tlso = (void *)(wr + 1);\n+\tv6 = (m->ol_flags & PKT_TX_IPV6) != 0;\n+\tl3hdr_len = m->l3_len;\n+\tl4hdr_len = m->l4_len;\n+\teth_xtra_len = m->l2_len - ETHER_HDR_LEN;\n+\tlen += sizeof(*lso);\n+\twr->op_immdlen = htonl(V_FW_WR_OP(FW_ETH_TX_PKT_WR) |\n+\t\t\t       V_FW_WR_IMMDLEN(len));\n+\tlso->lso_ctrl = htonl(V_LSO_OPCODE(CPL_TX_PKT_LSO) |\n+\t\t\t      F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE |\n+\t\t\t      V_LSO_IPV6(v6) |\n+\t\t\t      V_LSO_ETHHDR_LEN(eth_xtra_len / 4) |\n+\t\t\t      V_LSO_IPHDR_LEN(l3hdr_len / 4) |\n+\t\t\t      V_LSO_TCPHDR_LEN(l4hdr_len / 4));\n+\tlso->ipid_ofst = htons(0);\n+\tlso->mss = htons(m->tso_segsz);\n+\tlso->seqno_offset = htonl(0);\n+\tif (is_t4(adap->params.chip))\n+\t\tlso->len = htonl(m->pkt_len);\n+\telse\n+\t\tlso->len = htonl(V_LSO_T5_XFER_SIZE(m->pkt_len));\n+\tcpl = (void *)(lso + 1);\n+\tcntrl = V_TXPKT_CSUM_TYPE(v6 ? TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) |\n+\t\t\t\t  V_TXPKT_IPHDR_LEN(l3hdr_len) |\n+\t\t\t\t  V_TXPKT_ETHHDR_LEN(eth_xtra_len);\n+\ttxq->stats.tso++;\n+\ttxq->stats.tx_cso += m->tso_segsz;\n+\n+\tif (m->ol_flags & PKT_TX_VLAN_PKT) {\n+\t\ttxq->stats.vlan_ins++;\n+\t\tcntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->vlan_tci);\n+\t}\n+\n+\tcpl->ctrl0 = htonl(V_TXPKT_OPCODE(CPL_TX_PKT_XT) |\n+\t\t\t   V_TXPKT_INTF(pi->tx_chan) |\n+\t\t\t   V_TXPKT_PF(adap->pf));\n+\tcpl->pack = htons(0);\n+\tcpl->len = htons(m->pkt_len);\n+\tcpl->ctrl1 = cpu_to_be64(cntrl);\n+\n+\ttxq->stats.pkts++;\n+\ttxq->stats.tx_bytes += m->pkt_len;\n+\tlast_desc = txq->q.pidx + ndesc - 1;\n+\tif (last_desc >= (int)txq->q.size)\n+\t\tlast_desc -= txq->q.size;\n+\n+\td = &txq->q.sdesc[last_desc];\n+\tif (d->mbuf) {\n+\t\trte_pktmbuf_free(d->mbuf);\n+\t\td->mbuf = NULL;\n+\t}\n+\twrite_sgl(m, &txq->q, (struct ulptx_sgl *)(cpl + 1), end, 0,\n+\t\t  addr);\n+\ttxq->q.sdesc[last_desc].mbuf = m;\n+\ttxq->q.sdesc[last_desc].sgl = (struct ulptx_sgl *)(cpl + 1);\n+\ttxq_advance(&txq->q, ndesc);\n+\tring_tx_db(adap, &txq->q);\n+\treturn 0;\n+}\n+\n /**\n  * alloc_ring - allocate resources for an SGE descriptor ring\n  * @dev: the PCI device's core device\n@@ -1004,6 +1824,121 @@ err:\n \treturn ret;\n }\n \n+static void init_txq(struct adapter *adap, struct sge_txq *q, unsigned int id)\n+{\n+\tq->cntxt_id = id;\n+\tq->bar2_addr = bar2_address(adap, q->cntxt_id, T4_BAR2_QTYPE_EGRESS,\n+\t\t\t\t    &q->bar2_qid);\n+\tq->cidx = 0;\n+\tq->pidx = 0;\n+\tq->dbidx = 0;\n+\tq->in_use = 0;\n+\tq->equeidx = 0;\n+\tq->coalesce.idx = 0;\n+\tq->coalesce.len = 0;\n+\tq->coalesce.flits = 0;\n+\tq->last_coal_idx = 0;\n+\tq->last_pidx = 0;\n+\tq->stat = (void *)&q->desc[q->size];\n+}\n+\n+int t4_sge_eth_txq_start(struct sge_eth_txq *txq)\n+{\n+\t/*\n+\t *  TODO: For flow-control, queue may be stopped waiting to reclaim\n+\t *  credits.\n+\t *  Ensure queue is in EQ_STOPPED state before starting it.\n+\t */\n+\tif (!(txq->flags & EQ_STOPPED))\n+\t\treturn -(EBUSY);\n+\n+\ttxq->flags &= ~EQ_STOPPED;\n+\n+\treturn 0;\n+}\n+\n+int t4_sge_eth_txq_stop(struct sge_eth_txq *txq)\n+{\n+\ttxq->flags |= EQ_STOPPED;\n+\n+\treturn 0;\n+}\n+\n+int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,\n+\t\t\t struct rte_eth_dev *eth_dev, uint16_t queue_id,\n+\t\t\t unsigned int iqid, int socket_id)\n+{\n+\tint ret, nentries;\n+\tstruct fw_eq_eth_cmd c;\n+\tstruct sge *s = &adap->sge;\n+\tstruct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);\n+\tchar z_name[RTE_MEMZONE_NAMESIZE];\n+\tchar z_name_sw[RTE_MEMZONE_NAMESIZE];\n+\n+\t/* Add status entries */\n+\tnentries = txq->q.size + s->stat_len / sizeof(struct tx_desc);\n+\n+\tsnprintf(z_name, sizeof(z_name), \"%s_%s_%d_%d\",\n+\t\t eth_dev->driver->pci_drv.name, \"tx_ring\",\n+\t\t eth_dev->data->port_id, queue_id);\n+\tsnprintf(z_name_sw, sizeof(z_name_sw), \"%s_sw_ring\", z_name);\n+\n+\ttxq->q.desc = alloc_ring(txq->q.size, sizeof(struct tx_desc),\n+\t\t\t\t sizeof(struct tx_sw_desc), &txq->q.phys_addr,\n+\t\t\t\t &txq->q.sdesc, s->stat_len, queue_id,\n+\t\t\t\t socket_id, z_name, z_name_sw);\n+\tif (!txq->q.desc)\n+\t\treturn -ENOMEM;\n+\n+\tmemset(&c, 0, sizeof(c));\n+\tc.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST |\n+\t\t\t    F_FW_CMD_WRITE | F_FW_CMD_EXEC |\n+\t\t\t    V_FW_EQ_ETH_CMD_PFN(adap->pf) |\n+\t\t\t    V_FW_EQ_ETH_CMD_VFN(0));\n+\tc.alloc_to_len16 = htonl(F_FW_EQ_ETH_CMD_ALLOC |\n+\t\t\t\t F_FW_EQ_ETH_CMD_EQSTART | (sizeof(c) / 16));\n+\tc.autoequiqe_to_viid = htonl(F_FW_EQ_ETH_CMD_AUTOEQUEQE |\n+\t\t\t\t     V_FW_EQ_ETH_CMD_VIID(pi->viid));\n+\tc.fetchszm_to_iqid =\n+\t\thtonl(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) |\n+\t\t      V_FW_EQ_ETH_CMD_PCIECHN(pi->tx_chan) |\n+\t\t      F_FW_EQ_ETH_CMD_FETCHRO | V_FW_EQ_ETH_CMD_IQID(iqid));\n+\tc.dcaen_to_eqsize =\n+\t\thtonl(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) |\n+\t\t      V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) |\n+\t\t      V_FW_EQ_ETH_CMD_EQSIZE(nentries));\n+\tc.eqaddr = rte_cpu_to_be_64(txq->q.phys_addr);\n+\n+\tret = t4_wr_mbox(adap, adap->mbox, &c, sizeof(c), &c);\n+\tif (ret) {\n+\t\trte_free(txq->q.sdesc);\n+\t\ttxq->q.sdesc = NULL;\n+\t\ttxq->q.desc = NULL;\n+\t\treturn ret;\n+\t}\n+\n+\tinit_txq(adap, &txq->q, G_FW_EQ_ETH_CMD_EQID(ntohl(c.eqid_pkd)));\n+\ttxq->stats.tso = 0;\n+\ttxq->stats.pkts = 0;\n+\ttxq->stats.tx_cso = 0;\n+\ttxq->stats.coal_wr = 0;\n+\ttxq->stats.vlan_ins = 0;\n+\ttxq->stats.tx_bytes = 0;\n+\ttxq->stats.coal_pkts = 0;\n+\ttxq->stats.mapping_err = 0;\n+\ttxq->flags |= EQ_STOPPED;\n+\ttxq->eth_dev = eth_dev;\n+\tt4_os_lock_init(&txq->txq_lock);\n+\treturn 0;\n+}\n+\n+static void free_txq(struct sge_txq *q)\n+{\n+\tq->cntxt_id = 0;\n+\tq->sdesc = NULL;\n+\tq->desc = NULL;\n+}\n+\n static void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq,\n \t\t\t struct sge_fl *fl)\n {\n@@ -1032,6 +1967,28 @@ void t4_sge_eth_rxq_release(struct adapter *adap, struct sge_eth_rxq *rxq)\n \t}\n }\n \n+void t4_sge_eth_txq_release(struct adapter *adap, struct sge_eth_txq *txq)\n+{\n+\tif (txq->q.desc) {\n+\t\tt4_sge_eth_txq_stop(txq);\n+\t\treclaim_completed_tx(&txq->q);\n+\t\tt4_eth_eq_free(adap, adap->mbox, adap->pf, 0, txq->q.cntxt_id);\n+\t\tfree_tx_desc(&txq->q, txq->q.size);\n+\t\trte_free(txq->q.sdesc);\n+\t\tfree_txq(&txq->q);\n+\t}\n+}\n+\n+void t4_sge_tx_monitor_start(struct adapter *adap)\n+{\n+\trte_eal_alarm_set(50, tx_timer_cb, (void *)adap);\n+}\n+\n+void t4_sge_tx_monitor_stop(struct adapter *adap)\n+{\n+\trte_eal_alarm_cancel(tx_timer_cb, (void *)adap);\n+}\n+\n /**\n  * t4_sge_init - initialize SGE\n  * @adap: the adapter\n",
    "prefixes": [
        "dpdk-dev",
        "v4",
        "4/9"
    ]
}