get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/56450/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 56450,
    "url": "https://patches.dpdk.org/api/patches/56450/?format=api",
    "web_url": "https://patches.dpdk.org/project/dpdk/patch/1563198320-29068-1-git-send-email-viacheslavo@mellanox.com/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<1563198320-29068-1-git-send-email-viacheslavo@mellanox.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/1563198320-29068-1-git-send-email-viacheslavo@mellanox.com",
    "date": "2019-07-15T13:45:20",
    "name": "net/mlx5: fix ESXi VLAN in virtual machine",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "cfb21542d509a92752672bc5e26da423cc24f12a",
    "submitter": {
        "id": 1102,
        "url": "https://patches.dpdk.org/api/people/1102/?format=api",
        "name": "Slava Ovsiienko",
        "email": "viacheslavo@mellanox.com"
    },
    "delegate": {
        "id": 3268,
        "url": "https://patches.dpdk.org/api/users/3268/?format=api",
        "username": "rasland",
        "first_name": "Raslan",
        "last_name": "Darawsheh",
        "email": "rasland@nvidia.com"
    },
    "mbox": "https://patches.dpdk.org/project/dpdk/patch/1563198320-29068-1-git-send-email-viacheslavo@mellanox.com/mbox/",
    "series": [
        {
            "id": 5499,
            "url": "https://patches.dpdk.org/api/series/5499/?format=api",
            "web_url": "https://patches.dpdk.org/project/dpdk/list/?series=5499",
            "date": "2019-07-15T13:45:20",
            "name": "net/mlx5: fix ESXi VLAN in virtual machine",
            "version": 1,
            "mbox": "https://patches.dpdk.org/series/5499/mbox/"
        }
    ],
    "comments": "https://patches.dpdk.org/api/patches/56450/comments/",
    "check": "success",
    "checks": "https://patches.dpdk.org/api/patches/56450/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@dpdk.org",
        "Delivered-To": "patchwork@dpdk.org",
        "Received": [
            "from [92.243.14.124] (localhost [127.0.0.1])\n\tby dpdk.org (Postfix) with ESMTP id 962FF2E83;\n\tMon, 15 Jul 2019 15:45:28 +0200 (CEST)",
            "from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129])\n\tby dpdk.org (Postfix) with ESMTP id 2EF582E81\n\tfor <dev@dpdk.org>; Mon, 15 Jul 2019 15:45:27 +0200 (CEST)",
            "from Internal Mail-Server by MTLPINE2 (envelope-from\n\tviacheslavo@mellanox.com)\n\twith ESMTPS (AES256-SHA encrypted); 15 Jul 2019 16:45:24 +0300",
            "from pegasus12.mtr.labs.mlnx. (pegasus12.mtr.labs.mlnx\n\t[10.210.17.40])\n\tby labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x6FDjOwD000643;\n\tMon, 15 Jul 2019 16:45:24 +0300"
        ],
        "From": "Viacheslav Ovsiienko <viacheslavo@mellanox.com>",
        "To": "dev@dpdk.org",
        "Cc": "yskoh@mellanox.com",
        "Date": "Mon, 15 Jul 2019 13:45:20 +0000",
        "Message-Id": "<1563198320-29068-1-git-send-email-viacheslavo@mellanox.com>",
        "X-Mailer": "git-send-email 1.8.3.1",
        "Subject": "[dpdk-dev] [PATCH] net/mlx5: fix ESXi VLAN in virtual machine",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n\t<mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "On ESXi setups when we have SR-IOV and E-Switch enabled there is the\nproblem to receive VLAN traffic on VF interfaces. The NIC driver\nin ESXi hypervisor does not setup E-Switch vport setting correctly\nand VLAN traffic targeted to VF is dropped.\n\nThe patch provides the temporary workaround - if the rule\ncontaining the VLAN pattern is being installed for VF the VLAN\nnetwork interface over VF is created, like the command does:\n\n  ip link add link vf.if name mlx5.wa.1.100 type vlan id 100\n\nThe PMD in DPDK maintains the database of created VLAN interfaces\nfor each existing VF and requested VLAN tags. When all of the RTE\nFlows using the given VLAN tag are removed the created VLAN interface\nwith this VLAN tag is deleted.\n\nThe name of created VLAN interface follows the format:\n\n  evmlx.d1.d2, where d1 is VF interface ifindex, d2 - VLAN ifindex\n\nImplementation limitations:\n\n- mask in rules is ignored, rule must specify VLAN tags exactly,\n  no wildcards (which are implemented by the masks) are allowed\n\n- virtual environment is detected via rte_hypervisor() call,\n  currently it checks the RTE_CPUFLAG_HYPERVISOR flag for x86\n  platform. For other architectures workaround always\n  applied for the Flow over PCI VF\n\nSigned-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>\n---\n drivers/net/mlx5/mlx5.c            |   6 +\n drivers/net/mlx5/mlx5.h            |  30 ++++\n drivers/net/mlx5/mlx5_flow.c       |  22 +++\n drivers/net/mlx5/mlx5_flow.h       |   5 +\n drivers/net/mlx5/mlx5_flow_dv.c    |  33 ++++-\n drivers/net/mlx5/mlx5_flow_verbs.c |  25 +++-\n drivers/net/mlx5/mlx5_nl.c         | 279 +++++++++++++++++++++++++++++++++++++\n 7 files changed, 396 insertions(+), 4 deletions(-)",
    "diff": "diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c\nindex d93f92d..8549167 100644\n--- a/drivers/net/mlx5/mlx5.c\n+++ b/drivers/net/mlx5/mlx5.c\n@@ -690,6 +690,8 @@ struct mlx5_dev_spawn_data {\n \t\tclose(priv->nl_socket_route);\n \tif (priv->nl_socket_rdma >= 0)\n \t\tclose(priv->nl_socket_rdma);\n+\tif (priv->esxi_context)\n+\t\tmlx5_vlan_esxi_exit(priv->esxi_context);\n \tif (priv->sh) {\n \t\t/*\n \t\t * Free the shared context in last turn, because the cleanup\n@@ -1546,6 +1548,8 @@ struct mlx5_dev_spawn_data {\n #endif\n \t/* Store device configuration on private structure. */\n \tpriv->config = config;\n+\t/* Create context for virtual machine VLAN workaround. */\n+\tpriv->esxi_context = mlx5_vlan_esxi_init(eth_dev, spawn->ifindex);\n \tif (config.dv_flow_en) {\n \t\terr = mlx5_alloc_shared_dr(priv);\n \t\tif (err)\n@@ -1572,6 +1576,8 @@ struct mlx5_dev_spawn_data {\n \t\t\tclose(priv->nl_socket_route);\n \t\tif (priv->nl_socket_rdma >= 0)\n \t\t\tclose(priv->nl_socket_rdma);\n+\t\tif (priv->esxi_context)\n+\t\t\tmlx5_vlan_esxi_exit(priv->esxi_context);\n \t\tif (own_domain_id)\n \t\t\tclaim_zero(rte_eth_switch_domain_free(priv->domain_id));\n \t\trte_free(priv);\ndiff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h\nindex 5af3f41..87afa7a 100644\n--- a/drivers/net/mlx5/mlx5.h\n+++ b/drivers/net/mlx5/mlx5.h\n@@ -231,6 +231,27 @@ enum mlx5_verbs_alloc_type {\n \tMLX5_VERBS_ALLOC_TYPE_RX_QUEUE,\n };\n \n+/* VLAN netdev for ESXi VLAN workaround. */\n+struct mlx5_vlan_dev {\n+\tuint32_t refcnt;\n+\tuint32_t ifindex; /**< Own interface index. */\n+};\n+\n+/* Structure for VF ESXi VLAN workaround. */\n+struct mlx5_vf_vlan {\n+\tuint32_t tag:12;\n+\tuint32_t created:1;\n+};\n+\n+/* Array of VLAN devices created on the base of VF */\n+struct mlx5_vlan_esxi_context {\n+\tint nl_socket;\n+\tuint32_t nl_sn;\n+\tuint32_t vf_ifindex;\n+\tstruct rte_eth_dev *dev;\n+\tstruct mlx5_vlan_dev vlan_dev[4096];\n+};\n+\n /**\n  * Verbs allocator needs a context to know in the callback which kind of\n  * resources it is allocating.\n@@ -386,6 +407,7 @@ struct mlx5_priv {\n \tint nl_socket_rdma; /* Netlink socket (NETLINK_RDMA). */\n \tint nl_socket_route; /* Netlink socket (NETLINK_ROUTE). */\n \tuint32_t nl_sn; /* Netlink message sequence number. */\n+\tstruct mlx5_vlan_esxi_context *esxi_context; /* ESXi VLAN context. */\n #ifndef RTE_ARCH_64\n \trte_spinlock_t uar_lock_cq; /* CQs share a common distinct UAR */\n \trte_spinlock_t uar_lock[MLX5_UAR_PAGE_NUM_MAX];\n@@ -582,6 +604,14 @@ int mlx5_nl_mac_addr_remove(struct rte_eth_dev *dev, struct rte_ether_addr *mac,\n int mlx5_nl_switch_info(int nl, unsigned int ifindex,\n \t\t\tstruct mlx5_switch_info *info);\n \n+struct mlx5_vlan_esxi_context *mlx5_vlan_esxi_init(struct rte_eth_dev *dev,\n+\t\t\t\t\t\t   uint32_t ifindex);\n+void mlx5_vlan_esxi_exit(struct mlx5_vlan_esxi_context *ctx);\n+void mlx5_vlan_esxi_release(struct rte_eth_dev *dev,\n+\t\t\t    struct mlx5_vf_vlan *vf_vlan);\n+void mlx5_vlan_esxi_acquire(struct rte_eth_dev *dev,\n+\t\t\t    struct mlx5_vf_vlan *vf_vlan);\n+\n /* mlx5_devx_cmds.c */\n \n int mlx5_devx_cmd_flow_counter_alloc(struct ibv_context *ctx,\ndiff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c\nindex 4ba34db..42743d2 100644\n--- a/drivers/net/mlx5/mlx5_flow.c\n+++ b/drivers/net/mlx5/mlx5_flow.c\n@@ -1200,6 +1200,8 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,\n  *   Item specification.\n  * @param[in] item_flags\n  *   Bit-fields that holds the items detected until now.\n+ * @param[in] dev\n+ *   Ethernet device flow is being created on.\n  * @param[out] error\n  *   Pointer to error structure.\n  *\n@@ -1209,6 +1211,7 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,\n int\n mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,\n \t\t\t     uint64_t item_flags,\n+\t\t\t     struct rte_eth_dev *dev,\n \t\t\t     struct rte_flow_error *error)\n {\n \tconst struct rte_flow_item_vlan *spec = item->spec;\n@@ -1243,6 +1246,25 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,\n \t\t\t\t\terror);\n \tif (ret)\n \t\treturn ret;\n+\tif (!tunnel && mask->tci != RTE_BE16(0x0fff)) {\n+\t\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\n+\t\tif (priv->esxi_context) {\n+\t\t\t/*\n+\t\t\t * Non-NULL context means we have a virtual machine\n+\t\t\t * and SR-IOV enabled, we have to create VLAN interface\n+\t\t\t * to make hypervisor (ESXi) to setup E-Switch vport\n+\t\t\t * context correctly. We avoid creating the multiple\n+\t\t\t * VLAN interfaces, so we cannot support VLAN tag mask.\n+\t\t\t */\n+\t\t\treturn rte_flow_error_set(error, EINVAL,\n+\t\t\t\t\t\t  RTE_FLOW_ERROR_TYPE_ITEM,\n+\t\t\t\t\t\t  item,\n+\t\t\t\t\t\t  \"VLAN tag mask is not\"\n+\t\t\t\t\t\t  \" supported in virtual\"\n+\t\t\t\t\t\t  \" environment\");\n+\t\t}\n+\t}\n \tif (spec) {\n \t\tvlan_tag = spec->tci;\n \t\tvlan_tag &= mask->tci;\ndiff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h\nindex 72b339e..ac20572 100644\n--- a/drivers/net/mlx5/mlx5_flow.h\n+++ b/drivers/net/mlx5/mlx5_flow.h\n@@ -318,6 +318,8 @@ struct mlx5_flow_dv {\n \t/**< Pointer to the jump action resource. */\n \tstruct mlx5_flow_dv_port_id_action_resource *port_id_action;\n \t/**< Pointer to port ID action resource. */\n+\tstruct mlx5_vf_vlan vf_vlan;\n+\t/**< Structure for VF ESXi VLAN workaround. */\n #ifdef HAVE_IBV_FLOW_DV_SUPPORT\n \tvoid *actions[MLX5_DV_MAX_NUMBER_OF_ACTIONS];\n \t/**< Action list. */\n@@ -343,6 +345,8 @@ struct mlx5_flow_verbs {\n \tstruct ibv_flow *flow; /**< Verbs flow pointer. */\n \tstruct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */\n \tuint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */\n+\tstruct mlx5_vf_vlan vf_vlan;\n+\t/**< Structure for VF ESXi VLAN workaround. */\n };\n \n /** Device flow structure. */\n@@ -507,6 +511,7 @@ int mlx5_flow_validate_item_udp(const struct rte_flow_item *item,\n \t\t\t\tstruct rte_flow_error *error);\n int mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,\n \t\t\t\t uint64_t item_flags,\n+\t\t\t\t struct rte_eth_dev *dev,\n \t\t\t\t struct rte_flow_error *error);\n int mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,\n \t\t\t\t  uint64_t item_flags,\ndiff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c\nindex 3fa624b..63183b5 100644\n--- a/drivers/net/mlx5/mlx5_flow_dv.c\n+++ b/drivers/net/mlx5/mlx5_flow_dv.c\n@@ -2363,7 +2363,7 @@ struct field_modify_info modify_tcp[] = {\n \t\t\tbreak;\n \t\tcase RTE_FLOW_ITEM_TYPE_VLAN:\n \t\t\tret = mlx5_flow_validate_item_vlan(items, item_flags,\n-\t\t\t\t\t\t\t   error);\n+\t\t\t\t\t\t\t   dev, error);\n \t\t\tif (ret < 0)\n \t\t\t\treturn ret;\n \t\t\tlast_item = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :\n@@ -2914,6 +2914,8 @@ struct field_modify_info modify_tcp[] = {\n /**\n  * Add VLAN item to matcher and to the value.\n  *\n+ * @param[in, out] dev_flow\n+ *   Flow descriptor.\n  * @param[in, out] matcher\n  *   Flow matcher.\n  * @param[in, out] key\n@@ -2924,7 +2926,8 @@ struct field_modify_info modify_tcp[] = {\n  *   Item is inner pattern.\n  */\n static void\n-flow_dv_translate_item_vlan(void *matcher, void *key,\n+flow_dv_translate_item_vlan(struct mlx5_flow *dev_flow,\n+\t\t\t    void *matcher, void *key,\n \t\t\t    const struct rte_flow_item *item,\n \t\t\t    int inner)\n {\n@@ -2951,6 +2954,12 @@ struct field_modify_info modify_tcp[] = {\n \t\theaders_m = MLX5_ADDR_OF(fte_match_param, matcher,\n \t\t\t\t\t outer_headers);\n \t\theaders_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers);\n+\t\t/*\n+\t\t * This is workaround, masks are not supported,\n+\t\t * and pre-validated.\n+\t\t */\n+\t\tdev_flow->dv.vf_vlan.tag =\n+\t\t\trte_be_to_cpu_16(vlan_v->tci) & 0x0fff;\n \t}\n \ttci_m = rte_be_to_cpu_16(vlan_m->tci);\n \ttci_v = rte_be_to_cpu_16(vlan_m->tci & vlan_v->tci);\n@@ -4443,7 +4452,8 @@ struct field_modify_info modify_tcp[] = {\n \t\t\t\t\t     MLX5_FLOW_LAYER_OUTER_L2;\n \t\t\tbreak;\n \t\tcase RTE_FLOW_ITEM_TYPE_VLAN:\n-\t\t\tflow_dv_translate_item_vlan(match_mask, match_value,\n+\t\t\tflow_dv_translate_item_vlan(dev_flow,\n+\t\t\t\t\t\t    match_mask, match_value,\n \t\t\t\t\t\t    items, tunnel);\n \t\t\tmatcher.priority = MLX5_PRIORITY_MAP_L2;\n \t\t\tlast_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |\n@@ -4658,6 +4668,17 @@ struct field_modify_info modify_tcp[] = {\n \t\t\t\t\t   \"hardware refuses to create flow\");\n \t\t\tgoto error;\n \t\t}\n+\t\tif (priv->esxi_context &&\n+\t\t    dev_flow->dv.vf_vlan.tag &&\n+\t\t    !dev_flow->dv.vf_vlan.created) {\n+\t\t\t/*\n+\t\t\t * The rule contains the VLAN pattern.\n+\t\t\t * For VF we are going to create VLAN\n+\t\t\t * interface to make ESXi set correct\n+\t\t\t * e-Switch vport context.\n+\t\t\t */\n+\t\t\tmlx5_vlan_esxi_acquire(dev, &dev_flow->dv.vf_vlan);\n+\t\t}\n \t}\n \treturn 0;\n error:\n@@ -4671,6 +4692,9 @@ struct field_modify_info modify_tcp[] = {\n \t\t\t\tmlx5_hrxq_release(dev, dv->hrxq);\n \t\t\tdv->hrxq = NULL;\n \t\t}\n+\t\tif (dev_flow->dv.vf_vlan.tag &&\n+\t\t    dev_flow->dv.vf_vlan.created)\n+\t\t\tmlx5_vlan_esxi_release(dev, &dev_flow->dv.vf_vlan);\n \t}\n \trte_errno = err; /* Restore rte_errno. */\n \treturn -rte_errno;\n@@ -4871,6 +4895,9 @@ struct field_modify_info modify_tcp[] = {\n \t\t\t\tmlx5_hrxq_release(dev, dv->hrxq);\n \t\t\tdv->hrxq = NULL;\n \t\t}\n+\t\tif (dev_flow->dv.vf_vlan.tag &&\n+\t\t    dev_flow->dv.vf_vlan.created)\n+\t\t\tmlx5_vlan_esxi_release(dev, &dev_flow->dv.vf_vlan);\n \t}\n }\n \ndiff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c\nindex 2f4c80c..5909488 100644\n--- a/drivers/net/mlx5/mlx5_flow_verbs.c\n+++ b/drivers/net/mlx5/mlx5_flow_verbs.c\n@@ -386,6 +386,9 @@\n \t\tflow_verbs_spec_add(&dev_flow->verbs, &eth, size);\n \telse\n \t\tflow_verbs_item_vlan_update(dev_flow->verbs.attr, &eth);\n+\tif (!tunnel)\n+\t\tdev_flow->verbs.vf_vlan.tag =\n+\t\t\trte_be_to_cpu_16(spec->tci) & 0x0fff;\n }\n \n /**\n@@ -1049,7 +1052,7 @@\n \t\t\tbreak;\n \t\tcase RTE_FLOW_ITEM_TYPE_VLAN:\n \t\t\tret = mlx5_flow_validate_item_vlan(items, item_flags,\n-\t\t\t\t\t\t\t   error);\n+\t\t\t\t\t\t\t   dev, error);\n \t\t\tif (ret < 0)\n \t\t\t\treturn ret;\n \t\t\tlast_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |\n@@ -1587,6 +1590,10 @@\n \t\t\t\tmlx5_hrxq_release(dev, verbs->hrxq);\n \t\t\tverbs->hrxq = NULL;\n \t\t}\n+\t\tif (dev_flow->verbs.vf_vlan.tag &&\n+\t\t    dev_flow->verbs.vf_vlan.created) {\n+\t\t\tmlx5_vlan_esxi_release(dev, &dev_flow->verbs.vf_vlan);\n+\t\t}\n \t}\n }\n \n@@ -1634,6 +1641,7 @@\n flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,\n \t\t struct rte_flow_error *error)\n {\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n \tstruct mlx5_flow_verbs *verbs;\n \tstruct mlx5_flow *dev_flow;\n \tint err;\n@@ -1683,6 +1691,17 @@\n \t\t\t\t\t   \"hardware refuses to create flow\");\n \t\t\tgoto error;\n \t\t}\n+\t\tif (priv->esxi_context &&\n+\t\t    dev_flow->verbs.vf_vlan.tag &&\n+\t\t    !dev_flow->verbs.vf_vlan.created) {\n+\t\t\t/*\n+\t\t\t * The rule contains the VLAN pattern.\n+\t\t\t * For VF we are going to create VLAN\n+\t\t\t * interface to make ESXi set correct\n+\t\t\t * e-Switch vport context.\n+\t\t\t */\n+\t\t\tmlx5_vlan_esxi_acquire(dev, &dev_flow->verbs.vf_vlan);\n+\t\t}\n \t}\n \treturn 0;\n error:\n@@ -1696,6 +1715,10 @@\n \t\t\t\tmlx5_hrxq_release(dev, verbs->hrxq);\n \t\t\tverbs->hrxq = NULL;\n \t\t}\n+\t\tif (dev_flow->verbs.vf_vlan.tag &&\n+\t\t    dev_flow->verbs.vf_vlan.created) {\n+\t\t\tmlx5_vlan_esxi_release(dev, &dev_flow->verbs.vf_vlan);\n+\t\t}\n \t}\n \trte_errno = err; /* Restore rte_errno. */\n \treturn -rte_errno;\ndiff --git a/drivers/net/mlx5/mlx5_nl.c b/drivers/net/mlx5/mlx5_nl.c\nindex 5773fa7..8516442 100644\n--- a/drivers/net/mlx5/mlx5_nl.c\n+++ b/drivers/net/mlx5/mlx5_nl.c\n@@ -12,11 +12,14 @@\n #include <stdbool.h>\n #include <stdint.h>\n #include <stdlib.h>\n+#include <stdalign.h>\n #include <string.h>\n #include <sys/socket.h>\n #include <unistd.h>\n \n #include <rte_errno.h>\n+#include <rte_malloc.h>\n+#include <rte_hypervisor.h>\n \n #include \"mlx5.h\"\n #include \"mlx5_utils.h\"\n@@ -28,6 +31,8 @@\n /* Receive buffer size for the Netlink socket */\n #define MLX5_RECV_BUF_SIZE 32768\n \n+/** Parameters of VLAN devices created by driver. */\n+#define MLX5_ESXI_VLAN_DEVICE_PFX \"evmlx\"\n /*\n  * Define NDA_RTA as defined in iproute2 sources.\n  *\n@@ -987,3 +992,277 @@ struct mlx5_nl_ifindex_data {\n \t}\n \treturn ret;\n }\n+\n+/*\n+ * Delete VLAN network device by ifindex.\n+ *\n+ * @param[in] tcf\n+ *   Context object initialized by mlx5_vlan_esxi_init().\n+ * @param[in] ifindex\n+ *   Interface index of network device to delete.\n+ */\n+static void\n+mlx5_vlan_esxi_delete(struct mlx5_vlan_esxi_context *esxi,\n+\t\t      uint32_t ifindex)\n+{\n+\tint ret;\n+\tstruct {\n+\t\tstruct nlmsghdr nh;\n+\t\tstruct ifinfomsg info;\n+\t} req = {\n+\t\t.nh = {\n+\t\t\t.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),\n+\t\t\t.nlmsg_type = RTM_DELLINK,\n+\t\t\t.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,\n+\t\t},\n+\t\t.info = {\n+\t\t\t.ifi_family = AF_UNSPEC,\n+\t\t\t.ifi_index = ifindex,\n+\t\t},\n+\t};\n+\n+\tif (ifindex) {\n+\t\t++esxi->nl_sn;\n+\t\tif (!esxi->nl_sn)\n+\t\t\t++esxi->nl_sn;\n+\t\tret = mlx5_nl_send(esxi->nl_socket, &req.nh, esxi->nl_sn);\n+\t\tif (ret >= 0)\n+\t\t\tret = mlx5_nl_recv(esxi->nl_socket,\n+\t\t\t\t\t   esxi->nl_sn,\n+\t\t\t\t\t   NULL, NULL);\n+\t\tif (ret < 0)\n+\t\t\tDRV_LOG(WARNING, \"netlink: error deleting\"\n+\t\t\t\t\t \" VLAN ESXi ifindex %u, %d\",\n+\t\t\t\t\t ifindex, ret);\n+\t}\n+}\n+\n+/* Set of subroutines to build Netlink message. */\n+static struct nlattr *\n+nl_msg_tail(struct nlmsghdr *nlh)\n+{\n+\treturn (struct nlattr *)\n+\t\t(((uint8_t *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len));\n+}\n+\n+static void\n+nl_attr_put(struct nlmsghdr *nlh, int type, const void *data, int alen)\n+{\n+\tstruct nlattr *nla = nl_msg_tail(nlh);\n+\n+\tnla->nla_type = type;\n+\tnla->nla_len = NLMSG_ALIGN(sizeof(struct nlattr) + alen);\n+\tnlh->nlmsg_len = NLMSG_ALIGN(nlh->nlmsg_len) + nla->nla_len;\n+\n+\tif (alen)\n+\t\tmemcpy((uint8_t *)nla + sizeof(struct nlattr), data, alen);\n+}\n+\n+static struct nlattr *\n+nl_attr_nest_start(struct nlmsghdr *nlh, int type)\n+{\n+\tstruct nlattr *nest = (struct nlattr *)nl_msg_tail(nlh);\n+\n+\tnl_attr_put(nlh, type, NULL, 0);\n+\treturn nest;\n+}\n+\n+static void\n+nl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *nest)\n+{\n+\tnest->nla_len = (uint8_t *)nl_msg_tail(nlh) - (uint8_t *)nest;\n+}\n+\n+/*\n+ * Create network VLAN device with specified VLAN tag.\n+ *\n+ * @param[in] tcf\n+ *   Context object initialized by mlx5_vlan_esxi_init().\n+ * @param[in] ifindex\n+ *   Base network interface index.\n+ * @param[in] tag\n+ *   VLAN tag for VLAN network device to create.\n+ */\n+static uint32_t\n+mlx5_vlan_esxi_create(struct mlx5_vlan_esxi_context *esxi,\n+\t\t      uint32_t ifindex,\n+\t\t      uint16_t tag)\n+{\n+\tstruct nlmsghdr *nlh;\n+\tstruct ifinfomsg *ifm;\n+\tchar name[sizeof(MLX5_ESXI_VLAN_DEVICE_PFX) + 32];\n+\n+\talignas(RTE_CACHE_LINE_SIZE)\n+\tuint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +\n+\t\t    NLMSG_ALIGN(sizeof(struct ifinfomsg)) +\n+\t\t    NLMSG_ALIGN(sizeof(struct nlattr)) * 8 +\n+\t\t    NLMSG_ALIGN(sizeof(uint32_t)) +\n+\t\t    NLMSG_ALIGN(sizeof(name)) +\n+\t\t    NLMSG_ALIGN(sizeof(\"vlan\")) +\n+\t\t    NLMSG_ALIGN(sizeof(uint32_t)) +\n+\t\t    NLMSG_ALIGN(sizeof(uint16_t)) + 16];\n+\tstruct nlattr *na_info;\n+\tstruct nlattr *na_vlan;\n+\tint ret;\n+\n+\tmemset(buf, 0, sizeof(buf));\n+\t++esxi->nl_sn;\n+\tif (!esxi->nl_sn)\n+\t\t++esxi->nl_sn;\n+\tnlh = (struct nlmsghdr *)buf;\n+\tnlh->nlmsg_len = sizeof(struct nlmsghdr);\n+\tnlh->nlmsg_type = RTM_NEWLINK;\n+\tnlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |\n+\t\t\t   NLM_F_EXCL | NLM_F_ACK;\n+\tifm = (struct ifinfomsg *)nl_msg_tail(nlh);\n+\tnlh->nlmsg_len += sizeof(struct ifinfomsg);\n+\tifm->ifi_family = AF_UNSPEC;\n+\tifm->ifi_type = 0;\n+\tifm->ifi_index = 0;\n+\tifm->ifi_flags = IFF_UP;\n+\tifm->ifi_change = 0xffffffff;\n+\tnl_attr_put(nlh, IFLA_LINK, &ifindex, sizeof(ifindex));\n+\tret = snprintf(name, sizeof(name), \"%s.%u.%u\",\n+\t\t       MLX5_ESXI_VLAN_DEVICE_PFX, ifindex, tag);\n+\tnl_attr_put(nlh, IFLA_IFNAME, name, ret + 1);\n+\tna_info = nl_attr_nest_start(nlh, IFLA_LINKINFO);\n+\tnl_attr_put(nlh, IFLA_INFO_KIND, \"vlan\", sizeof(\"vlan\"));\n+\tna_vlan = nl_attr_nest_start(nlh, IFLA_INFO_DATA);\n+\tnl_attr_put(nlh, IFLA_VLAN_ID, &tag, sizeof(tag));\n+\tnl_attr_nest_end(nlh, na_vlan);\n+\tnl_attr_nest_end(nlh, na_info);\n+\tassert(sizeof(buf) >= nlh->nlmsg_len);\n+\tret = mlx5_nl_send(esxi->nl_socket, nlh, esxi->nl_sn);\n+\tif (ret >= 0)\n+\t\tret = mlx5_nl_recv(esxi->nl_socket, esxi->nl_sn, NULL, NULL);\n+\tif (ret < 0) {\n+\t\tDRV_LOG(WARNING,\n+\t\t\t\"netlink: VLAN %s create failure (%d)\",\n+\t\t\tname, ret);\n+\t}\n+\t// Try to get ifindex of created or pre-existing device.\n+\tret = if_nametoindex(name);\n+\tif (!ret) {\n+\t\tDRV_LOG(WARNING,\n+\t\t\t\"VLAN %s failed to get index (%d)\",\n+\t\t\tname, errno);\n+\t\treturn 0;\n+\t}\n+\treturn ret;\n+}\n+\n+/*\n+ * Release VLAN network device, created for ESXi workaround.\n+ *\n+ * @param[in] dev\n+ *   Ethernet device object, Netlink context provider.\n+ * @param[in] vlan\n+ *   Object representing the network device to release.\n+ */\n+void mlx5_vlan_esxi_release(struct rte_eth_dev *dev,\n+\t\t\t    struct mlx5_vf_vlan *vlan)\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tstruct mlx5_vlan_esxi_context *esxi = priv->esxi_context;\n+\tstruct mlx5_vlan_dev *vlan_dev = &esxi->vlan_dev[0];\n+\n+\tassert(vlan->created);\n+\tassert(priv->esxi_context);\n+\tif (!vlan->created || !esxi)\n+\t\treturn;\n+\tvlan->created = 0;\n+\tassert(vlan_dev[vlan->tag].refcnt);\n+\tif (--vlan_dev[vlan->tag].refcnt == 0 &&\n+\t    vlan_dev[vlan->tag].ifindex) {\n+\t\tmlx5_vlan_esxi_delete(esxi, vlan_dev[vlan->tag].ifindex);\n+\t\tvlan_dev[vlan->tag].ifindex = 0;\n+\t}\n+}\n+\n+/**\n+ * Acquire VLAN interface with specified tag for ESXi workaround.\n+ *\n+ * @param[in] dev\n+ *   Ethernet device object, Netlink context provider.\n+ * @param[in] vlan\n+ *   Object representing the network device to acquire.\n+ */\n+void mlx5_vlan_esxi_acquire(struct rte_eth_dev *dev,\n+\t\t\t    struct mlx5_vf_vlan *vlan)\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tstruct mlx5_vlan_esxi_context *esxi = priv->esxi_context;\n+\tstruct mlx5_vlan_dev *vlan_dev = &esxi->vlan_dev[0];\n+\n+\tassert(!vlan->created);\n+\tassert(priv->esxi_context);\n+\tif (vlan->created || !esxi)\n+\t\treturn;\n+\tif (vlan_dev[vlan->tag].refcnt == 0) {\n+\t\tassert(!vlan_dev[vlan->tag].ifindex);\n+\t\tvlan_dev[vlan->tag].ifindex =\n+\t\t\tmlx5_vlan_esxi_create(esxi,\n+\t\t\t\t\t      esxi->vf_ifindex,\n+\t\t\t\t\t      vlan->tag);\n+\t}\n+\tif (vlan_dev[vlan->tag].ifindex) {\n+\t\tvlan_dev[vlan->tag].refcnt++;\n+\t\tvlan->created = 1;\n+\t}\n+}\n+\n+/*\n+ * Create per ethernet device VLAN ESXi workaround context\n+ */\n+struct mlx5_vlan_esxi_context *\n+mlx5_vlan_esxi_init(struct rte_eth_dev *dev,\n+\t\t    uint32_t ifindex)\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tstruct mlx5_dev_config *config = &priv->config;\n+\tstruct mlx5_vlan_esxi_context *esxi;\n+\n+\t/* Do not engage workaround over PF. */\n+\tif (!config->vf)\n+\t\treturn NULL;\n+\t/* Check whether there is virtual environment */\n+\tif (rte_hypervisor_get() == RTE_HYPERVISOR_NONE)\n+\t\treturn NULL;\n+\tesxi = rte_zmalloc(__func__, sizeof(*esxi), sizeof(uint32_t));\n+\tif (!esxi) {\n+\t\tDRV_LOG(WARNING,\n+\t\t\t\"Can not allocate memory\"\n+\t\t\t\" for ESXi VLAN context\");\n+\t\treturn NULL;\n+\t}\n+\tesxi->nl_socket = mlx5_nl_init(NETLINK_ROUTE);\n+\tif (esxi->nl_socket < 0) {\n+\t\tDRV_LOG(WARNING,\n+\t\t\t\"Can not create Netlink socket\"\n+\t\t\t\" for ESXi VLAN context\");\n+\t\trte_free(esxi);\n+\t\treturn NULL;\n+\t}\n+\tesxi->nl_sn = random();\n+\tesxi->vf_ifindex = ifindex;\n+\tesxi->dev = dev;\n+\t/* Cleanup for existing VLAN devices. */\n+\treturn esxi;\n+}\n+\n+/*\n+ * Destroy per ethernet device VLAN ESXi workaround context\n+ */\n+void mlx5_vlan_esxi_exit(struct mlx5_vlan_esxi_context *esxi)\n+{\n+\tunsigned int i;\n+\n+\t/* Delete all remaining VLAN devices. */\n+\tfor (i = 0; i < RTE_DIM(esxi->vlan_dev); i++) {\n+\t\tif (esxi->vlan_dev[i].ifindex)\n+\t\t\tmlx5_vlan_esxi_delete(esxi, esxi->vlan_dev[i].ifindex);\n+\t}\n+\tif (esxi->nl_socket >= 0)\n+\t\tclose(esxi->nl_socket);\n+\trte_free(esxi);\n+}\n",
    "prefixes": []
}