new file mode 100644
@@ -0,0 +1,925 @@
+/* SPDX-License-Identifier: MIT
+ * Google Virtual Ethernet (gve) driver
+ * Version: 1.3.0
+ * Copyright (C) 2015-2022 Google, Inc.
+ * Copyright(C) 2022 Intel Corporation
+ */
+
+#include "gve_adminq.h"
+#include "gve_register.h"
+
+#define GVE_MAX_ADMINQ_RELEASE_CHECK 500
+#define GVE_ADMINQ_SLEEP_LEN 20
+#define GVE_MAX_ADMINQ_EVENT_COUNTER_CHECK 100
+
+#define GVE_DEVICE_OPTION_ERROR_FMT "%s option error:\n" \
+"Expected: length=%d, feature_mask=%x.\n" \
+"Actual: length=%d, feature_mask=%x."
+
+#define GVE_DEVICE_OPTION_TOO_BIG_FMT "Length of %s option larger than expected. Possible older version of guest driver."
+
+static
+struct gve_device_option *gve_get_next_option(struct gve_device_descriptor *descriptor,
+ struct gve_device_option *option)
+{
+ uintptr_t option_end, descriptor_end;
+
+ option_end = (uintptr_t)option + sizeof(*option) + be16_to_cpu(option->option_length);
+ descriptor_end = (uintptr_t)descriptor + be16_to_cpu(descriptor->total_length);
+
+ return option_end > descriptor_end ? NULL : (struct gve_device_option *)option_end;
+}
+
+static
+void gve_parse_device_option(struct gve_priv *priv,
+ struct gve_device_option *option,
+ struct gve_device_option_gqi_rda **dev_op_gqi_rda,
+ struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
+ struct gve_device_option_dqo_rda **dev_op_dqo_rda,
+ struct gve_device_option_jumbo_frames **dev_op_jumbo_frames)
+{
+ u32 req_feat_mask = be32_to_cpu(option->required_features_mask);
+ u16 option_length = be16_to_cpu(option->option_length);
+ u16 option_id = be16_to_cpu(option->option_id);
+
+ /* If the length or feature mask doesn't match, continue without
+ * enabling the feature.
+ */
+ switch (option_id) {
+ case GVE_DEV_OPT_ID_GQI_RAW_ADDRESSING:
+ if (option_length != GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING ||
+ req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RAW_ADDRESSING) {
+ PMD_DRV_LOG(WARNING, GVE_DEVICE_OPTION_ERROR_FMT,
+ "Raw Addressing",
+ GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING,
+ GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RAW_ADDRESSING,
+ option_length, req_feat_mask);
+ break;
+ }
+
+ PMD_DRV_LOG(INFO, "Gqi raw addressing device option enabled.");
+ priv->queue_format = GVE_GQI_RDA_FORMAT;
+ break;
+ case GVE_DEV_OPT_ID_GQI_RDA:
+ if (option_length < sizeof(**dev_op_gqi_rda) ||
+ req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA) {
+ PMD_DRV_LOG(WARNING, GVE_DEVICE_OPTION_ERROR_FMT,
+ "GQI RDA", (int)sizeof(**dev_op_gqi_rda),
+ GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA,
+ option_length, req_feat_mask);
+ break;
+ }
+
+ if (option_length > sizeof(**dev_op_gqi_rda)) {
+ PMD_DRV_LOG(WARNING,
+ GVE_DEVICE_OPTION_TOO_BIG_FMT, "GQI RDA");
+ }
+ *dev_op_gqi_rda = RTE_PTR_ADD(option, sizeof(*option));
+ break;
+ case GVE_DEV_OPT_ID_GQI_QPL:
+ if (option_length < sizeof(**dev_op_gqi_qpl) ||
+ req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL) {
+ PMD_DRV_LOG(WARNING, GVE_DEVICE_OPTION_ERROR_FMT,
+ "GQI QPL", (int)sizeof(**dev_op_gqi_qpl),
+ GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL,
+ option_length, req_feat_mask);
+ break;
+ }
+
+ if (option_length > sizeof(**dev_op_gqi_qpl)) {
+ PMD_DRV_LOG(WARNING,
+ GVE_DEVICE_OPTION_TOO_BIG_FMT, "GQI QPL");
+ }
+ *dev_op_gqi_qpl = RTE_PTR_ADD(option, sizeof(*option));
+ break;
+ case GVE_DEV_OPT_ID_DQO_RDA:
+ if (option_length < sizeof(**dev_op_dqo_rda) ||
+ req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA) {
+ PMD_DRV_LOG(WARNING, GVE_DEVICE_OPTION_ERROR_FMT,
+ "DQO RDA", (int)sizeof(**dev_op_dqo_rda),
+ GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA,
+ option_length, req_feat_mask);
+ break;
+ }
+
+ if (option_length > sizeof(**dev_op_dqo_rda)) {
+ PMD_DRV_LOG(WARNING,
+ GVE_DEVICE_OPTION_TOO_BIG_FMT, "DQO RDA");
+ }
+ *dev_op_dqo_rda = RTE_PTR_ADD(option, sizeof(*option));
+ break;
+ case GVE_DEV_OPT_ID_JUMBO_FRAMES:
+ if (option_length < sizeof(**dev_op_jumbo_frames) ||
+ req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES) {
+ PMD_DRV_LOG(WARNING, GVE_DEVICE_OPTION_ERROR_FMT,
+ "Jumbo Frames",
+ (int)sizeof(**dev_op_jumbo_frames),
+ GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES,
+ option_length, req_feat_mask);
+ break;
+ }
+
+ if (option_length > sizeof(**dev_op_jumbo_frames)) {
+ PMD_DRV_LOG(WARNING,
+ GVE_DEVICE_OPTION_TOO_BIG_FMT,
+ "Jumbo Frames");
+ }
+ *dev_op_jumbo_frames = RTE_PTR_ADD(option, sizeof(*option));
+ break;
+ default:
+ /* If we don't recognize the option just continue
+ * without doing anything.
+ */
+ PMD_DRV_LOG(DEBUG, "Unrecognized device option 0x%hx not enabled.\n",
+ option_id);
+ }
+}
+
+/* Process all device options for a given describe device call. */
+static int
+gve_process_device_options(struct gve_priv *priv,
+ struct gve_device_descriptor *descriptor,
+ struct gve_device_option_gqi_rda **dev_op_gqi_rda,
+ struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
+ struct gve_device_option_dqo_rda **dev_op_dqo_rda,
+ struct gve_device_option_jumbo_frames **dev_op_jumbo_frames)
+{
+ const int num_options = be16_to_cpu(descriptor->num_device_options);
+ struct gve_device_option *dev_opt;
+ int i;
+
+ /* The options struct directly follows the device descriptor. */
+ dev_opt = RTE_PTR_ADD(descriptor, sizeof(*descriptor));
+ for (i = 0; i < num_options; i++) {
+ struct gve_device_option *next_opt;
+
+ next_opt = gve_get_next_option(descriptor, dev_opt);
+ if (!next_opt) {
+ PMD_DRV_LOG(ERR,
+ "options exceed device_descriptor's total length.\n");
+ return -EINVAL;
+ }
+
+ gve_parse_device_option(priv, dev_opt,
+ dev_op_gqi_rda, dev_op_gqi_qpl,
+ dev_op_dqo_rda, dev_op_jumbo_frames);
+ dev_opt = next_opt;
+ }
+
+ return 0;
+}
+
+int gve_adminq_alloc(struct gve_priv *priv)
+{
+ priv->adminq = gve_alloc_dma_mem(&priv->adminq_dma_mem, PAGE_SIZE);
+ if (unlikely(!priv->adminq))
+ return -ENOMEM;
+
+ priv->adminq_mask = (PAGE_SIZE / sizeof(union gve_adminq_command)) - 1;
+ priv->adminq_prod_cnt = 0;
+ priv->adminq_cmd_fail = 0;
+ priv->adminq_timeouts = 0;
+ priv->adminq_describe_device_cnt = 0;
+ priv->adminq_cfg_device_resources_cnt = 0;
+ priv->adminq_register_page_list_cnt = 0;
+ priv->adminq_unregister_page_list_cnt = 0;
+ priv->adminq_create_tx_queue_cnt = 0;
+ priv->adminq_create_rx_queue_cnt = 0;
+ priv->adminq_destroy_tx_queue_cnt = 0;
+ priv->adminq_destroy_rx_queue_cnt = 0;
+ priv->adminq_dcfg_device_resources_cnt = 0;
+ priv->adminq_set_driver_parameter_cnt = 0;
+ priv->adminq_report_stats_cnt = 0;
+ priv->adminq_report_link_speed_cnt = 0;
+ priv->adminq_get_ptype_map_cnt = 0;
+
+ /* Setup Admin queue with the device */
+ iowrite32be(priv->adminq_dma_mem.pa / PAGE_SIZE,
+ &priv->reg_bar0->adminq_pfn);
+
+ gve_set_admin_queue_ok(priv);
+ return 0;
+}
+
+void gve_adminq_release(struct gve_priv *priv)
+{
+ int i = 0;
+
+ /* Tell the device the adminq is leaving */
+ iowrite32be(0x0, &priv->reg_bar0->adminq_pfn);
+ while (ioread32be(&priv->reg_bar0->adminq_pfn)) {
+ /* If this is reached the device is unrecoverable and still
+ * holding memory. Continue looping to avoid memory corruption,
+ * but WARN so it is visible what is going on.
+ */
+ if (i == GVE_MAX_ADMINQ_RELEASE_CHECK)
+ PMD_DRV_LOG(WARNING, "Unrecoverable platform error!");
+ i++;
+ msleep(GVE_ADMINQ_SLEEP_LEN);
+ }
+ gve_clear_device_rings_ok(priv);
+ gve_clear_device_resources_ok(priv);
+ gve_clear_admin_queue_ok(priv);
+}
+
+void gve_adminq_free(struct gve_priv *priv)
+{
+ if (!gve_get_admin_queue_ok(priv))
+ return;
+ gve_adminq_release(priv);
+ gve_free_dma_mem(&priv->adminq_dma_mem);
+ gve_clear_admin_queue_ok(priv);
+}
+
+static void gve_adminq_kick_cmd(struct gve_priv *priv, u32 prod_cnt)
+{
+ iowrite32be(prod_cnt, &priv->reg_bar0->adminq_doorbell);
+}
+
+static bool gve_adminq_wait_for_cmd(struct gve_priv *priv, u32 prod_cnt)
+{
+ int i;
+
+ for (i = 0; i < GVE_MAX_ADMINQ_EVENT_COUNTER_CHECK; i++) {
+ if (ioread32be(&priv->reg_bar0->adminq_event_counter)
+ == prod_cnt)
+ return true;
+ msleep(GVE_ADMINQ_SLEEP_LEN);
+ }
+
+ return false;
+}
+
+static int gve_adminq_parse_err(struct gve_priv *priv, u32 status)
+{
+ if (status != GVE_ADMINQ_COMMAND_PASSED &&
+ status != GVE_ADMINQ_COMMAND_UNSET) {
+ PMD_DRV_LOG(ERR, "AQ command failed with status %d", status);
+ priv->adminq_cmd_fail++;
+ }
+ switch (status) {
+ case GVE_ADMINQ_COMMAND_PASSED:
+ return 0;
+ case GVE_ADMINQ_COMMAND_UNSET:
+ PMD_DRV_LOG(ERR, "parse_aq_err: err and status both unset, this should not be possible.");
+ return -EINVAL;
+ case GVE_ADMINQ_COMMAND_ERROR_ABORTED:
+ case GVE_ADMINQ_COMMAND_ERROR_CANCELLED:
+ case GVE_ADMINQ_COMMAND_ERROR_DATALOSS:
+ case GVE_ADMINQ_COMMAND_ERROR_FAILED_PRECONDITION:
+ case GVE_ADMINQ_COMMAND_ERROR_UNAVAILABLE:
+ return -EAGAIN;
+ case GVE_ADMINQ_COMMAND_ERROR_ALREADY_EXISTS:
+ case GVE_ADMINQ_COMMAND_ERROR_INTERNAL_ERROR:
+ case GVE_ADMINQ_COMMAND_ERROR_INVALID_ARGUMENT:
+ case GVE_ADMINQ_COMMAND_ERROR_NOT_FOUND:
+ case GVE_ADMINQ_COMMAND_ERROR_OUT_OF_RANGE:
+ case GVE_ADMINQ_COMMAND_ERROR_UNKNOWN_ERROR:
+ return -EINVAL;
+ case GVE_ADMINQ_COMMAND_ERROR_DEADLINE_EXCEEDED:
+ return -ETIME;
+ case GVE_ADMINQ_COMMAND_ERROR_PERMISSION_DENIED:
+ case GVE_ADMINQ_COMMAND_ERROR_UNAUTHENTICATED:
+ return -EACCES;
+ case GVE_ADMINQ_COMMAND_ERROR_RESOURCE_EXHAUSTED:
+ return -ENOMEM;
+ case GVE_ADMINQ_COMMAND_ERROR_UNIMPLEMENTED:
+ return -ENOTSUP;
+ default:
+ PMD_DRV_LOG(ERR, "parse_aq_err: unknown status code %d",
+ status);
+ return -EINVAL;
+ }
+}
+
+/* Flushes all AQ commands currently queued and waits for them to complete.
+ * If there are failures, it will return the first error.
+ */
+static int gve_adminq_kick_and_wait(struct gve_priv *priv)
+{
+ u32 tail, head;
+ u32 i;
+
+ tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
+ head = priv->adminq_prod_cnt;
+
+ gve_adminq_kick_cmd(priv, head);
+ if (!gve_adminq_wait_for_cmd(priv, head)) {
+ PMD_DRV_LOG(ERR, "AQ commands timed out, need to reset AQ");
+ priv->adminq_timeouts++;
+ return -ENOTRECOVERABLE;
+ }
+
+ for (i = tail; i < head; i++) {
+ union gve_adminq_command *cmd;
+ u32 status, err;
+
+ cmd = &priv->adminq[i & priv->adminq_mask];
+ status = be32_to_cpu(READ_ONCE32(cmd->status));
+ err = gve_adminq_parse_err(priv, status);
+ if (err)
+ /* Return the first error if we failed. */
+ return err;
+ }
+
+ return 0;
+}
+
+/* This function is not threadsafe - the caller is responsible for any
+ * necessary locks.
+ */
+static int gve_adminq_issue_cmd(struct gve_priv *priv,
+ union gve_adminq_command *cmd_orig)
+{
+ union gve_adminq_command *cmd;
+ u32 opcode;
+ u32 tail;
+
+ tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
+
+ /* Check if next command will overflow the buffer. */
+ if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) ==
+ (tail & priv->adminq_mask)) {
+ int err;
+
+ /* Flush existing commands to make room. */
+ err = gve_adminq_kick_and_wait(priv);
+ if (err)
+ return err;
+
+ /* Retry. */
+ tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
+ if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) ==
+ (tail & priv->adminq_mask)) {
+ /* This should never happen. We just flushed the
+ * command queue so there should be enough space.
+ */
+ return -ENOMEM;
+ }
+ }
+
+ cmd = &priv->adminq[priv->adminq_prod_cnt & priv->adminq_mask];
+ priv->adminq_prod_cnt++;
+
+ memcpy(cmd, cmd_orig, sizeof(*cmd_orig));
+ opcode = be32_to_cpu(READ_ONCE32(cmd->opcode));
+
+ switch (opcode) {
+ case GVE_ADMINQ_DESCRIBE_DEVICE:
+ priv->adminq_describe_device_cnt++;
+ break;
+ case GVE_ADMINQ_CONFIGURE_DEVICE_RESOURCES:
+ priv->adminq_cfg_device_resources_cnt++;
+ break;
+ case GVE_ADMINQ_REGISTER_PAGE_LIST:
+ priv->adminq_register_page_list_cnt++;
+ break;
+ case GVE_ADMINQ_UNREGISTER_PAGE_LIST:
+ priv->adminq_unregister_page_list_cnt++;
+ break;
+ case GVE_ADMINQ_CREATE_TX_QUEUE:
+ priv->adminq_create_tx_queue_cnt++;
+ break;
+ case GVE_ADMINQ_CREATE_RX_QUEUE:
+ priv->adminq_create_rx_queue_cnt++;
+ break;
+ case GVE_ADMINQ_DESTROY_TX_QUEUE:
+ priv->adminq_destroy_tx_queue_cnt++;
+ break;
+ case GVE_ADMINQ_DESTROY_RX_QUEUE:
+ priv->adminq_destroy_rx_queue_cnt++;
+ break;
+ case GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES:
+ priv->adminq_dcfg_device_resources_cnt++;
+ break;
+ case GVE_ADMINQ_SET_DRIVER_PARAMETER:
+ priv->adminq_set_driver_parameter_cnt++;
+ break;
+ case GVE_ADMINQ_REPORT_STATS:
+ priv->adminq_report_stats_cnt++;
+ break;
+ case GVE_ADMINQ_REPORT_LINK_SPEED:
+ priv->adminq_report_link_speed_cnt++;
+ break;
+ case GVE_ADMINQ_GET_PTYPE_MAP:
+ priv->adminq_get_ptype_map_cnt++;
+ break;
+ default:
+ PMD_DRV_LOG(ERR, "unknown AQ command opcode %d", opcode);
+ }
+
+ return 0;
+}
+
+/* This function is not threadsafe - the caller is responsible for any
+ * necessary locks.
+ * The caller is also responsible for making sure there are no commands
+ * waiting to be executed.
+ */
+static int gve_adminq_execute_cmd(struct gve_priv *priv,
+ union gve_adminq_command *cmd_orig)
+{
+ u32 tail, head;
+ int err;
+
+ tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
+ head = priv->adminq_prod_cnt;
+ if (tail != head)
+ /* This is not a valid path */
+ return -EINVAL;
+
+ err = gve_adminq_issue_cmd(priv, cmd_orig);
+ if (err)
+ return err;
+
+ return gve_adminq_kick_and_wait(priv);
+}
+
+/* The device specifies that the management vector can either be the first irq
+ * or the last irq. ntfy_blk_msix_base_idx indicates the first irq assigned to
+ * the ntfy blks. It if is 0 then the management vector is last, if it is 1 then
+ * the management vector is first.
+ *
+ * gve arranges the msix vectors so that the management vector is last.
+ */
+#define GVE_NTFY_BLK_BASE_MSIX_IDX 0
+int gve_adminq_configure_device_resources(struct gve_priv *priv,
+ dma_addr_t counter_array_bus_addr,
+ u32 num_counters,
+ dma_addr_t db_array_bus_addr,
+ u32 num_ntfy_blks)
+{
+ union gve_adminq_command cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_CONFIGURE_DEVICE_RESOURCES);
+ cmd.configure_device_resources =
+ (struct gve_adminq_configure_device_resources) {
+ .counter_array = cpu_to_be64(counter_array_bus_addr),
+ .num_counters = cpu_to_be32(num_counters),
+ .irq_db_addr = cpu_to_be64(db_array_bus_addr),
+ .num_irq_dbs = cpu_to_be32(num_ntfy_blks),
+ .irq_db_stride = cpu_to_be32(sizeof(*priv->irq_dbs)),
+ .ntfy_blk_msix_base_idx =
+ cpu_to_be32(GVE_NTFY_BLK_BASE_MSIX_IDX),
+ .queue_format = priv->queue_format,
+ };
+
+ return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_deconfigure_device_resources(struct gve_priv *priv)
+{
+ union gve_adminq_command cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES);
+
+ return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)
+{
+ struct gve_tx_queue *txq = priv->txqs[queue_index];
+ union gve_adminq_command cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_TX_QUEUE);
+ cmd.create_tx_queue = (struct gve_adminq_create_tx_queue) {
+ .queue_id = cpu_to_be32(queue_index),
+ .queue_resources_addr =
+ cpu_to_be64(txq->qres_mz->iova),
+ .tx_ring_addr = cpu_to_be64(txq->tx_ring_phys_addr),
+ .ntfy_id = cpu_to_be32(txq->ntfy_id),
+ };
+
+ if (gve_is_gqi(priv)) {
+ u32 qpl_id = priv->queue_format == GVE_GQI_RDA_FORMAT ?
+ GVE_RAW_ADDRESSING_QPL_ID : txq->qpl->id;
+
+ cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
+ } else {
+ cmd.create_tx_queue.tx_ring_size =
+ cpu_to_be16(txq->nb_tx_desc);
+ cmd.create_tx_queue.tx_comp_ring_addr =
+ cpu_to_be64(txq->complq->tx_ring_phys_addr);
+ cmd.create_tx_queue.tx_comp_ring_size =
+ cpu_to_be16(priv->tx_compq_size);
+ }
+
+ return gve_adminq_issue_cmd(priv, &cmd);
+}
+
+int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 num_queues)
+{
+ int err;
+ u32 i;
+
+ for (i = 0; i < num_queues; i++) {
+ err = gve_adminq_create_tx_queue(priv, i);
+ if (err)
+ return err;
+ }
+
+ return gve_adminq_kick_and_wait(priv);
+}
+
+static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
+{
+ struct gve_rx_queue *rxq = priv->rxqs[queue_index];
+ union gve_adminq_command cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE);
+ cmd.create_rx_queue = (struct gve_adminq_create_rx_queue) {
+ .queue_id = cpu_to_be32(queue_index),
+ .ntfy_id = cpu_to_be32(rxq->ntfy_id),
+ .queue_resources_addr = cpu_to_be64(rxq->qres_mz->iova),
+ };
+
+ if (gve_is_gqi(priv)) {
+ u32 qpl_id = priv->queue_format == GVE_GQI_RDA_FORMAT ?
+ GVE_RAW_ADDRESSING_QPL_ID : rxq->qpl->id;
+
+ cmd.create_rx_queue.rx_desc_ring_addr =
+ cpu_to_be64(rxq->mz->iova),
+ cmd.create_rx_queue.rx_data_ring_addr =
+ cpu_to_be64(rxq->data_mz->iova),
+ cmd.create_rx_queue.index = cpu_to_be32(queue_index);
+ cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
+ cmd.create_rx_queue.packet_buffer_size = cpu_to_be16(rxq->rx_buf_len);
+ } else {
+ cmd.create_rx_queue.rx_ring_size =
+ cpu_to_be16(priv->rx_desc_cnt);
+ cmd.create_rx_queue.rx_desc_ring_addr =
+ cpu_to_be64(rxq->rx_ring_phys_addr);
+ cmd.create_rx_queue.rx_data_ring_addr =
+ cpu_to_be64(rxq->bufq->rx_ring_phys_addr);
+ cmd.create_rx_queue.packet_buffer_size =
+ cpu_to_be16(rxq->rx_buf_len);
+ cmd.create_rx_queue.rx_buff_ring_size =
+ cpu_to_be16(priv->rx_bufq_size);
+ cmd.create_rx_queue.enable_rsc = !!(priv->enable_lsc);
+ }
+
+ return gve_adminq_issue_cmd(priv, &cmd);
+}
+
+int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues)
+{
+ int err;
+ u32 i;
+
+ for (i = 0; i < num_queues; i++) {
+ err = gve_adminq_create_rx_queue(priv, i);
+ if (err)
+ return err;
+ }
+
+ return gve_adminq_kick_and_wait(priv);
+}
+
+static int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_index)
+{
+ union gve_adminq_command cmd;
+ int err;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_TX_QUEUE);
+ cmd.destroy_tx_queue = (struct gve_adminq_destroy_tx_queue) {
+ .queue_id = cpu_to_be32(queue_index),
+ };
+
+ err = gve_adminq_issue_cmd(priv, &cmd);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 num_queues)
+{
+ int err;
+ u32 i;
+
+ for (i = 0; i < num_queues; i++) {
+ err = gve_adminq_destroy_tx_queue(priv, i);
+ if (err)
+ return err;
+ }
+
+ return gve_adminq_kick_and_wait(priv);
+}
+
+static int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_index)
+{
+ union gve_adminq_command cmd;
+ int err;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_RX_QUEUE);
+ cmd.destroy_rx_queue = (struct gve_adminq_destroy_rx_queue) {
+ .queue_id = cpu_to_be32(queue_index),
+ };
+
+ err = gve_adminq_issue_cmd(priv, &cmd);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 num_queues)
+{
+ int err;
+ u32 i;
+
+ for (i = 0; i < num_queues; i++) {
+ err = gve_adminq_destroy_rx_queue(priv, i);
+ if (err)
+ return err;
+ }
+
+ return gve_adminq_kick_and_wait(priv);
+}
+
+static int gve_set_desc_cnt(struct gve_priv *priv,
+ struct gve_device_descriptor *descriptor)
+{
+ priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries);
+ if (priv->tx_desc_cnt * sizeof(priv->txqs[0]->tx_desc_ring[0])
+ < PAGE_SIZE) {
+ PMD_DRV_LOG(ERR, "Tx desc count %d too low", priv->tx_desc_cnt);
+ return -EINVAL;
+ }
+ priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);
+ if (priv->rx_desc_cnt * sizeof(priv->rxqs[0]->rx_desc_ring[0])
+ < PAGE_SIZE) {
+ PMD_DRV_LOG(ERR, "Rx desc count %d too low", priv->rx_desc_cnt);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int
+gve_set_desc_cnt_dqo(struct gve_priv *priv,
+ const struct gve_device_descriptor *descriptor,
+ const struct gve_device_option_dqo_rda *dev_op_dqo_rda)
+{
+ priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries);
+ priv->tx_compq_size = be16_to_cpu(dev_op_dqo_rda->tx_comp_ring_entries);
+ priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);
+ priv->rx_bufq_size = be16_to_cpu(dev_op_dqo_rda->rx_buff_ring_entries);
+
+ return 0;
+}
+
+static void gve_enable_supported_features(struct gve_priv *priv,
+ u32 supported_features_mask,
+ const struct gve_device_option_jumbo_frames
+ *dev_op_jumbo_frames)
+{
+ /* Before control reaches this point, the page-size-capped max MTU from
+ * the gve_device_descriptor field has already been stored in
+ * priv->dev->max_mtu. We overwrite it with the true max MTU below.
+ */
+ if (dev_op_jumbo_frames &&
+ (supported_features_mask & GVE_SUP_JUMBO_FRAMES_MASK)) {
+ PMD_DRV_LOG(INFO, "JUMBO FRAMES device option enabled.");
+ priv->max_mtu = be16_to_cpu(dev_op_jumbo_frames->max_mtu);
+ }
+}
+
+int gve_adminq_describe_device(struct gve_priv *priv)
+{
+ struct gve_device_option_jumbo_frames *dev_op_jumbo_frames = NULL;
+ struct gve_device_option_gqi_rda *dev_op_gqi_rda = NULL;
+ struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL;
+ struct gve_device_option_dqo_rda *dev_op_dqo_rda = NULL;
+ struct gve_device_descriptor *descriptor;
+ struct gve_dma_mem descriptor_dma_mem;
+ u32 supported_features_mask = 0;
+ union gve_adminq_command cmd;
+ int err = 0;
+ u8 *mac;
+ u16 mtu;
+
+ memset(&cmd, 0, sizeof(cmd));
+ descriptor = gve_alloc_dma_mem(&descriptor_dma_mem, PAGE_SIZE);
+ if (!descriptor)
+ return -ENOMEM;
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESCRIBE_DEVICE);
+ cmd.describe_device.device_descriptor_addr =
+ cpu_to_be64(descriptor_dma_mem.pa);
+ cmd.describe_device.device_descriptor_version =
+ cpu_to_be32(GVE_ADMINQ_DEVICE_DESCRIPTOR_VERSION);
+ cmd.describe_device.available_length = cpu_to_be32(PAGE_SIZE);
+
+ err = gve_adminq_execute_cmd(priv, &cmd);
+ if (err)
+ goto free_device_descriptor;
+
+ err = gve_process_device_options(priv, descriptor, &dev_op_gqi_rda,
+ &dev_op_gqi_qpl, &dev_op_dqo_rda,
+ &dev_op_jumbo_frames);
+ if (err)
+ goto free_device_descriptor;
+
+ /* If the GQI_RAW_ADDRESSING option is not enabled and the queue format
+ * is not set to GqiRda, choose the queue format in a priority order:
+ * DqoRda, GqiRda, GqiQpl. Use GqiQpl as default.
+ */
+ if (dev_op_dqo_rda) {
+ priv->queue_format = GVE_DQO_RDA_FORMAT;
+ PMD_DRV_LOG(INFO, "Driver is running with DQO RDA queue format.");
+ supported_features_mask =
+ be32_to_cpu(dev_op_dqo_rda->supported_features_mask);
+ } else if (dev_op_gqi_rda) {
+ priv->queue_format = GVE_GQI_RDA_FORMAT;
+ PMD_DRV_LOG(INFO, "Driver is running with GQI RDA queue format.");
+ supported_features_mask =
+ be32_to_cpu(dev_op_gqi_rda->supported_features_mask);
+ } else if (priv->queue_format == GVE_GQI_RDA_FORMAT) {
+ PMD_DRV_LOG(INFO, "Driver is running with GQI RDA queue format.");
+ } else {
+ priv->queue_format = GVE_GQI_QPL_FORMAT;
+ if (dev_op_gqi_qpl)
+ supported_features_mask =
+ be32_to_cpu(dev_op_gqi_qpl->supported_features_mask);
+ PMD_DRV_LOG(INFO, "Driver is running with GQI QPL queue format.");
+ }
+ if (gve_is_gqi(priv)) {
+ err = gve_set_desc_cnt(priv, descriptor);
+ } else {
+ /* DQO supports LRO. */
+ err = gve_set_desc_cnt_dqo(priv, descriptor, dev_op_dqo_rda);
+ }
+ if (err)
+ goto free_device_descriptor;
+
+ priv->max_registered_pages =
+ be64_to_cpu(descriptor->max_registered_pages);
+ mtu = be16_to_cpu(descriptor->mtu);
+ if (mtu < ETH_MIN_MTU) {
+ PMD_DRV_LOG(ERR, "MTU %d below minimum MTU", mtu);
+ err = -EINVAL;
+ goto free_device_descriptor;
+ }
+ priv->max_mtu = mtu;
+ priv->num_event_counters = be16_to_cpu(descriptor->counters);
+ rte_memcpy(priv->dev_addr.addr_bytes, descriptor->mac, ETH_ALEN);
+ mac = descriptor->mac;
+ PMD_DRV_LOG(INFO, "MAC addr: %02x:%02x:%02x:%02x:%02x:%02x",
+ mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
+ priv->tx_pages_per_qpl = be16_to_cpu(descriptor->tx_pages_per_qpl);
+ priv->rx_data_slot_cnt = be16_to_cpu(descriptor->rx_pages_per_qpl);
+
+ if (gve_is_gqi(priv) && priv->rx_data_slot_cnt < priv->rx_desc_cnt) {
+ PMD_DRV_LOG(ERR, "rx_data_slot_cnt cannot be smaller than rx_desc_cnt, setting rx_desc_cnt down to %d",
+ priv->rx_data_slot_cnt);
+ priv->rx_desc_cnt = priv->rx_data_slot_cnt;
+ }
+ priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues);
+
+ gve_enable_supported_features(priv, supported_features_mask,
+ dev_op_jumbo_frames);
+
+free_device_descriptor:
+ gve_free_dma_mem(&descriptor_dma_mem);
+ return err;
+}
+
+int gve_adminq_register_page_list(struct gve_priv *priv,
+ struct gve_queue_page_list *qpl)
+{
+ struct gve_dma_mem page_list_dma_mem;
+ u32 num_entries = qpl->num_entries;
+ u32 size = num_entries * sizeof(qpl->page_buses[0]);
+ union gve_adminq_command cmd;
+ __be64 *page_list;
+ int err;
+ u32 i;
+
+ memset(&cmd, 0, sizeof(cmd));
+ page_list = gve_alloc_dma_mem(&page_list_dma_mem, size);
+ if (!page_list)
+ return -ENOMEM;
+
+ for (i = 0; i < num_entries; i++)
+ page_list[i] = cpu_to_be64(qpl->page_buses[i]);
+
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_REGISTER_PAGE_LIST);
+ cmd.reg_page_list = (struct gve_adminq_register_page_list) {
+ .page_list_id = cpu_to_be32(qpl->id),
+ .num_pages = cpu_to_be32(num_entries),
+ .page_address_list_addr = cpu_to_be64(page_list_dma_mem.pa),
+ };
+
+ err = gve_adminq_execute_cmd(priv, &cmd);
+ gve_free_dma_mem(&page_list_dma_mem);
+ return err;
+}
+
+int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id)
+{
+ union gve_adminq_command cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_UNREGISTER_PAGE_LIST);
+ cmd.unreg_page_list = (struct gve_adminq_unregister_page_list) {
+ .page_list_id = cpu_to_be32(page_list_id),
+ };
+
+ return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu)
+{
+ union gve_adminq_command cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_SET_DRIVER_PARAMETER);
+ cmd.set_driver_param = (struct gve_adminq_set_driver_parameter) {
+ .parameter_type = cpu_to_be32(GVE_SET_PARAM_MTU),
+ .parameter_value = cpu_to_be64(mtu),
+ };
+
+ return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len,
+ dma_addr_t stats_report_addr, u64 interval)
+{
+ union gve_adminq_command cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_REPORT_STATS);
+ cmd.report_stats = (struct gve_adminq_report_stats) {
+ .stats_report_len = cpu_to_be64(stats_report_len),
+ .stats_report_addr = cpu_to_be64(stats_report_addr),
+ .interval = cpu_to_be64(interval),
+ };
+
+ return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_report_link_speed(struct gve_priv *priv)
+{
+ struct gve_dma_mem link_speed_region_dma_mem;
+ union gve_adminq_command gvnic_cmd;
+ u64 *link_speed_region;
+ int err;
+
+ link_speed_region = gve_alloc_dma_mem(&link_speed_region_dma_mem,
+ sizeof(*link_speed_region));
+
+ if (!link_speed_region)
+ return -ENOMEM;
+
+ memset(&gvnic_cmd, 0, sizeof(gvnic_cmd));
+ gvnic_cmd.opcode = cpu_to_be32(GVE_ADMINQ_REPORT_LINK_SPEED);
+ gvnic_cmd.report_link_speed.link_speed_address =
+ cpu_to_be64(link_speed_region_dma_mem.pa);
+
+ err = gve_adminq_execute_cmd(priv, &gvnic_cmd);
+
+ priv->link_speed = be64_to_cpu(*link_speed_region);
+ gve_free_dma_mem(&link_speed_region_dma_mem);
+ return err;
+}
+
+int gve_adminq_get_ptype_map_dqo(struct gve_priv *priv,
+ struct gve_ptype_lut *ptype_lut)
+{
+ struct gve_dma_mem ptype_map_dma_mem;
+ struct gve_ptype_map *ptype_map;
+ union gve_adminq_command cmd;
+ int err = 0;
+ int i;
+
+ memset(&cmd, 0, sizeof(cmd));
+ ptype_map = gve_alloc_dma_mem(&ptype_map_dma_mem, sizeof(*ptype_map));
+ if (!ptype_map)
+ return -ENOMEM;
+
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_GET_PTYPE_MAP);
+ cmd.get_ptype_map = (struct gve_adminq_get_ptype_map) {
+ .ptype_map_len = cpu_to_be64(sizeof(*ptype_map)),
+ .ptype_map_addr = cpu_to_be64(ptype_map_dma_mem.pa),
+ };
+
+ err = gve_adminq_execute_cmd(priv, &cmd);
+ if (err)
+ goto err;
+
+ /* Populate ptype_lut. */
+ for (i = 0; i < GVE_NUM_PTYPES; i++) {
+ ptype_lut->ptypes[i].l3_type =
+ ptype_map->ptypes[i].l3_type;
+ ptype_lut->ptypes[i].l4_type =
+ ptype_map->ptypes[i].l4_type;
+ }
+err:
+ gve_free_dma_mem(&ptype_map_dma_mem);
+ return err;
+}
new file mode 100644
@@ -0,0 +1,381 @@
+/* SPDX-License-Identifier: MIT
+ * Google Virtual Ethernet (gve) driver
+ * Version: 1.3.0
+ * Copyright (C) 2015-2022 Google, Inc.
+ * Copyright(C) 2022 Intel Corporation
+ */
+
+#ifndef _GVE_ADMINQ_H
+#define _GVE_ADMINQ_H
+
+/* Admin queue opcodes */
+enum gve_adminq_opcodes {
+ GVE_ADMINQ_DESCRIBE_DEVICE = 0x1,
+ GVE_ADMINQ_CONFIGURE_DEVICE_RESOURCES = 0x2,
+ GVE_ADMINQ_REGISTER_PAGE_LIST = 0x3,
+ GVE_ADMINQ_UNREGISTER_PAGE_LIST = 0x4,
+ GVE_ADMINQ_CREATE_TX_QUEUE = 0x5,
+ GVE_ADMINQ_CREATE_RX_QUEUE = 0x6,
+ GVE_ADMINQ_DESTROY_TX_QUEUE = 0x7,
+ GVE_ADMINQ_DESTROY_RX_QUEUE = 0x8,
+ GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES = 0x9,
+ GVE_ADMINQ_SET_DRIVER_PARAMETER = 0xB,
+ GVE_ADMINQ_REPORT_STATS = 0xC,
+ GVE_ADMINQ_REPORT_LINK_SPEED = 0xD,
+ GVE_ADMINQ_GET_PTYPE_MAP = 0xE,
+};
+
+/* Admin queue status codes */
+enum gve_adminq_statuses {
+ GVE_ADMINQ_COMMAND_UNSET = 0x0,
+ GVE_ADMINQ_COMMAND_PASSED = 0x1,
+ GVE_ADMINQ_COMMAND_ERROR_ABORTED = 0xFFFFFFF0,
+ GVE_ADMINQ_COMMAND_ERROR_ALREADY_EXISTS = 0xFFFFFFF1,
+ GVE_ADMINQ_COMMAND_ERROR_CANCELLED = 0xFFFFFFF2,
+ GVE_ADMINQ_COMMAND_ERROR_DATALOSS = 0xFFFFFFF3,
+ GVE_ADMINQ_COMMAND_ERROR_DEADLINE_EXCEEDED = 0xFFFFFFF4,
+ GVE_ADMINQ_COMMAND_ERROR_FAILED_PRECONDITION = 0xFFFFFFF5,
+ GVE_ADMINQ_COMMAND_ERROR_INTERNAL_ERROR = 0xFFFFFFF6,
+ GVE_ADMINQ_COMMAND_ERROR_INVALID_ARGUMENT = 0xFFFFFFF7,
+ GVE_ADMINQ_COMMAND_ERROR_NOT_FOUND = 0xFFFFFFF8,
+ GVE_ADMINQ_COMMAND_ERROR_OUT_OF_RANGE = 0xFFFFFFF9,
+ GVE_ADMINQ_COMMAND_ERROR_PERMISSION_DENIED = 0xFFFFFFFA,
+ GVE_ADMINQ_COMMAND_ERROR_UNAUTHENTICATED = 0xFFFFFFFB,
+ GVE_ADMINQ_COMMAND_ERROR_RESOURCE_EXHAUSTED = 0xFFFFFFFC,
+ GVE_ADMINQ_COMMAND_ERROR_UNAVAILABLE = 0xFFFFFFFD,
+ GVE_ADMINQ_COMMAND_ERROR_UNIMPLEMENTED = 0xFFFFFFFE,
+ GVE_ADMINQ_COMMAND_ERROR_UNKNOWN_ERROR = 0xFFFFFFFF,
+};
+
+#define GVE_ADMINQ_DEVICE_DESCRIPTOR_VERSION 1
+
+/* All AdminQ command structs should be naturally packed.
+ * GVE_CHECK_STRUCT/UNION_LEN will check struct/union length and throw
+ * error at compile time when the size is not correct.
+ */
+
+struct gve_adminq_describe_device {
+ __be64 device_descriptor_addr;
+ __be32 device_descriptor_version;
+ __be32 available_length;
+};
+
+GVE_CHECK_STRUCT_LEN(16, gve_adminq_describe_device);
+
+struct gve_device_descriptor {
+ __be64 max_registered_pages;
+ __be16 reserved1;
+ __be16 tx_queue_entries;
+ __be16 rx_queue_entries;
+ __be16 default_num_queues;
+ __be16 mtu;
+ __be16 counters;
+ __be16 tx_pages_per_qpl;
+ __be16 rx_pages_per_qpl;
+ u8 mac[ETH_ALEN];
+ __be16 num_device_options;
+ __be16 total_length;
+ u8 reserved2[6];
+};
+
+GVE_CHECK_STRUCT_LEN(40, gve_device_descriptor);
+
+struct gve_device_option {
+ __be16 option_id;
+ __be16 option_length;
+ __be32 required_features_mask;
+};
+
+GVE_CHECK_STRUCT_LEN(8, gve_device_option);
+
+struct gve_device_option_gqi_rda {
+ __be32 supported_features_mask;
+};
+
+GVE_CHECK_STRUCT_LEN(4, gve_device_option_gqi_rda);
+
+struct gve_device_option_gqi_qpl {
+ __be32 supported_features_mask;
+};
+
+GVE_CHECK_STRUCT_LEN(4, gve_device_option_gqi_qpl);
+
+struct gve_device_option_dqo_rda {
+ __be32 supported_features_mask;
+ __be16 tx_comp_ring_entries;
+ __be16 rx_buff_ring_entries;
+};
+
+GVE_CHECK_STRUCT_LEN(8, gve_device_option_dqo_rda);
+
+struct gve_device_option_jumbo_frames {
+ __be32 supported_features_mask;
+ __be16 max_mtu;
+ u8 padding[2];
+};
+
+GVE_CHECK_STRUCT_LEN(8, gve_device_option_jumbo_frames);
+
+/* Terminology:
+ *
+ * RDA - Raw DMA Addressing - Buffers associated with SKBs are directly DMA
+ * mapped and read/updated by the device.
+ *
+ * QPL - Queue Page Lists - Driver uses bounce buffers which are DMA mapped with
+ * the device for read/write and data is copied from/to SKBs.
+ */
+enum gve_dev_opt_id {
+ GVE_DEV_OPT_ID_GQI_RAW_ADDRESSING = 0x1,
+ GVE_DEV_OPT_ID_GQI_RDA = 0x2,
+ GVE_DEV_OPT_ID_GQI_QPL = 0x3,
+ GVE_DEV_OPT_ID_DQO_RDA = 0x4,
+ GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8,
+};
+
+enum gve_dev_opt_req_feat_mask {
+ GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RAW_ADDRESSING = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0,
+};
+
+enum gve_sup_feature_mask {
+ GVE_SUP_JUMBO_FRAMES_MASK = 1 << 2,
+};
+
+#define GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING 0x0
+
+struct gve_adminq_configure_device_resources {
+ __be64 counter_array;
+ __be64 irq_db_addr;
+ __be32 num_counters;
+ __be32 num_irq_dbs;
+ __be32 irq_db_stride;
+ __be32 ntfy_blk_msix_base_idx;
+ u8 queue_format;
+ u8 padding[7];
+};
+
+GVE_CHECK_STRUCT_LEN(40, gve_adminq_configure_device_resources);
+
+struct gve_adminq_register_page_list {
+ __be32 page_list_id;
+ __be32 num_pages;
+ __be64 page_address_list_addr;
+};
+
+GVE_CHECK_STRUCT_LEN(16, gve_adminq_register_page_list);
+
+struct gve_adminq_unregister_page_list {
+ __be32 page_list_id;
+};
+
+GVE_CHECK_STRUCT_LEN(4, gve_adminq_unregister_page_list);
+
+#define GVE_RAW_ADDRESSING_QPL_ID 0xFFFFFFFF
+
+struct gve_adminq_create_tx_queue {
+ __be32 queue_id;
+ __be32 reserved;
+ __be64 queue_resources_addr;
+ __be64 tx_ring_addr;
+ __be32 queue_page_list_id;
+ __be32 ntfy_id;
+ __be64 tx_comp_ring_addr;
+ __be16 tx_ring_size;
+ __be16 tx_comp_ring_size;
+ u8 padding[4];
+};
+
+GVE_CHECK_STRUCT_LEN(48, gve_adminq_create_tx_queue);
+
+struct gve_adminq_create_rx_queue {
+ __be32 queue_id;
+ __be32 index;
+ __be32 reserved;
+ __be32 ntfy_id;
+ __be64 queue_resources_addr;
+ __be64 rx_desc_ring_addr;
+ __be64 rx_data_ring_addr;
+ __be32 queue_page_list_id;
+ __be16 rx_ring_size;
+ __be16 packet_buffer_size;
+ __be16 rx_buff_ring_size;
+ u8 enable_rsc;
+ u8 padding[5];
+};
+
+GVE_CHECK_STRUCT_LEN(56, gve_adminq_create_rx_queue);
+
+/* Queue resources that are shared with the device */
+struct gve_queue_resources {
+ union {
+ struct {
+ __be32 db_index; /* Device -> Guest */
+ __be32 counter_index; /* Device -> Guest */
+ };
+ u8 reserved[64];
+ };
+};
+
+GVE_CHECK_STRUCT_LEN(64, gve_queue_resources);
+
+struct gve_adminq_destroy_tx_queue {
+ __be32 queue_id;
+};
+
+GVE_CHECK_STRUCT_LEN(4, gve_adminq_destroy_tx_queue);
+
+struct gve_adminq_destroy_rx_queue {
+ __be32 queue_id;
+};
+
+GVE_CHECK_STRUCT_LEN(4, gve_adminq_destroy_rx_queue);
+
+/* GVE Set Driver Parameter Types */
+enum gve_set_driver_param_types {
+ GVE_SET_PARAM_MTU = 0x1,
+};
+
+struct gve_adminq_set_driver_parameter {
+ __be32 parameter_type;
+ u8 reserved[4];
+ __be64 parameter_value;
+};
+
+GVE_CHECK_STRUCT_LEN(16, gve_adminq_set_driver_parameter);
+
+struct gve_adminq_report_stats {
+ __be64 stats_report_len;
+ __be64 stats_report_addr;
+ __be64 interval;
+};
+
+GVE_CHECK_STRUCT_LEN(24, gve_adminq_report_stats);
+
+struct gve_adminq_report_link_speed {
+ __be64 link_speed_address;
+};
+
+GVE_CHECK_STRUCT_LEN(8, gve_adminq_report_link_speed);
+
+struct stats {
+ __be32 stat_name;
+ __be32 queue_id;
+ __be64 value;
+};
+
+GVE_CHECK_STRUCT_LEN(16, stats);
+
+struct gve_stats_report {
+ __be64 written_count;
+ struct stats stats[];
+};
+
+GVE_CHECK_STRUCT_LEN(8, gve_stats_report);
+
+enum gve_stat_names {
+ /* stats from gve */
+ TX_WAKE_CNT = 1,
+ TX_STOP_CNT = 2,
+ TX_FRAMES_SENT = 3,
+ TX_BYTES_SENT = 4,
+ TX_LAST_COMPLETION_PROCESSED = 5,
+ RX_NEXT_EXPECTED_SEQUENCE = 6,
+ RX_BUFFERS_POSTED = 7,
+ TX_TIMEOUT_CNT = 8,
+ /* stats from NIC */
+ RX_QUEUE_DROP_CNT = 65,
+ RX_NO_BUFFERS_POSTED = 66,
+ RX_DROPS_PACKET_OVER_MRU = 67,
+ RX_DROPS_INVALID_CHECKSUM = 68,
+};
+
+enum gve_l3_type {
+ /* Must be zero so zero initialized LUT is unknown. */
+ GVE_L3_TYPE_UNKNOWN = 0,
+ GVE_L3_TYPE_OTHER,
+ GVE_L3_TYPE_IPV4,
+ GVE_L3_TYPE_IPV6,
+};
+
+enum gve_l4_type {
+ /* Must be zero so zero initialized LUT is unknown. */
+ GVE_L4_TYPE_UNKNOWN = 0,
+ GVE_L4_TYPE_OTHER,
+ GVE_L4_TYPE_TCP,
+ GVE_L4_TYPE_UDP,
+ GVE_L4_TYPE_ICMP,
+ GVE_L4_TYPE_SCTP,
+};
+
+/* These are control path types for PTYPE which are the same as the data path
+ * types.
+ */
+struct gve_ptype_entry {
+ u8 l3_type;
+ u8 l4_type;
+};
+
+struct gve_ptype_map {
+ struct gve_ptype_entry ptypes[1 << 10]; /* PTYPES are always 10 bits. */
+};
+
+struct gve_adminq_get_ptype_map {
+ __be64 ptype_map_len;
+ __be64 ptype_map_addr;
+};
+
+union gve_adminq_command {
+ struct {
+ __be32 opcode;
+ __be32 status;
+ union {
+ struct gve_adminq_configure_device_resources
+ configure_device_resources;
+ struct gve_adminq_create_tx_queue create_tx_queue;
+ struct gve_adminq_create_rx_queue create_rx_queue;
+ struct gve_adminq_destroy_tx_queue destroy_tx_queue;
+ struct gve_adminq_destroy_rx_queue destroy_rx_queue;
+ struct gve_adminq_describe_device describe_device;
+ struct gve_adminq_register_page_list reg_page_list;
+ struct gve_adminq_unregister_page_list unreg_page_list;
+ struct gve_adminq_set_driver_parameter set_driver_param;
+ struct gve_adminq_report_stats report_stats;
+ struct gve_adminq_report_link_speed report_link_speed;
+ struct gve_adminq_get_ptype_map get_ptype_map;
+ };
+ };
+ u8 reserved[64];
+};
+
+GVE_CHECK_UNION_LEN(64, gve_adminq_command);
+
+int gve_adminq_alloc(struct gve_priv *priv);
+void gve_adminq_free(struct gve_priv *priv);
+void gve_adminq_release(struct gve_priv *priv);
+int gve_adminq_describe_device(struct gve_priv *priv);
+int gve_adminq_configure_device_resources(struct gve_priv *priv,
+ dma_addr_t counter_array_bus_addr,
+ u32 num_counters,
+ dma_addr_t db_array_bus_addr,
+ u32 num_ntfy_blks);
+int gve_adminq_deconfigure_device_resources(struct gve_priv *priv);
+int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 num_queues);
+int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 queue_id);
+int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues);
+int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 queue_id);
+int gve_adminq_register_page_list(struct gve_priv *priv,
+ struct gve_queue_page_list *qpl);
+int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id);
+int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu);
+int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len,
+ dma_addr_t stats_report_addr, u64 interval);
+int gve_adminq_report_link_speed(struct gve_priv *priv);
+
+struct gve_ptype_lut;
+int gve_adminq_get_ptype_map_dqo(struct gve_priv *priv,
+ struct gve_ptype_lut *ptype_lut);
+
+#endif /* _GVE_ADMINQ_H */
new file mode 100644
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: MIT
+ * Google Virtual Ethernet (gve) driver
+ * Version: 1.3.0
+ * Copyright (C) 2015-2022 Google, Inc.
+ */
+
+/* GVE Transmit Descriptor formats */
+
+#ifndef _GVE_DESC_H_
+#define _GVE_DESC_H_
+
+/* A note on seg_addrs
+ *
+ * Base addresses encoded in seg_addr are not assumed to be physical
+ * addresses. The ring format assumes these come from some linear address
+ * space. This could be physical memory, kernel virtual memory, user virtual
+ * memory.
+ * If raw dma addressing is not supported then gVNIC uses lists of registered
+ * pages. Each queue is assumed to be associated with a single such linear
+ * address space to ensure a consistent meaning for seg_addrs posted to its
+ * rings.
+ */
+
+struct gve_tx_pkt_desc {
+ u8 type_flags; /* desc type is lower 4 bits, flags upper */
+ u8 l4_csum_offset; /* relative offset of L4 csum word */
+ u8 l4_hdr_offset; /* Offset of start of L4 headers in packet */
+ u8 desc_cnt; /* Total descriptors for this packet */
+ __be16 len; /* Total length of this packet (in bytes) */
+ __be16 seg_len; /* Length of this descriptor's segment */
+ __be64 seg_addr; /* Base address (see note) of this segment */
+} __packed;
+
+struct gve_tx_mtd_desc {
+ u8 type_flags; /* type is lower 4 bits, subtype upper */
+ u8 path_state; /* state is lower 4 bits, hash type upper */
+ __be16 reserved0;
+ __be32 path_hash;
+ __be64 reserved1;
+} __packed;
+
+struct gve_tx_seg_desc {
+ u8 type_flags; /* type is lower 4 bits, flags upper */
+ u8 l3_offset; /* TSO: 2 byte units to start of IPH */
+ __be16 reserved;
+ __be16 mss; /* TSO MSS */
+ __be16 seg_len;
+ __be64 seg_addr;
+} __packed;
+
+/* GVE Transmit Descriptor Types */
+#define GVE_TXD_STD (0x0 << 4) /* Std with Host Address */
+#define GVE_TXD_TSO (0x1 << 4) /* TSO with Host Address */
+#define GVE_TXD_SEG (0x2 << 4) /* Seg with Host Address */
+#define GVE_TXD_MTD (0x3 << 4) /* Metadata */
+
+/* GVE Transmit Descriptor Flags for Std Pkts */
+#define GVE_TXF_L4CSUM BIT(0) /* Need csum offload */
+#define GVE_TXF_TSTAMP BIT(2) /* Timestamp required */
+
+/* GVE Transmit Descriptor Flags for TSO Segs */
+#define GVE_TXSF_IPV6 BIT(1) /* IPv6 TSO */
+
+/* GVE Transmit Descriptor Options for MTD Segs */
+#define GVE_MTD_SUBTYPE_PATH 0
+
+#define GVE_MTD_PATH_STATE_DEFAULT 0
+#define GVE_MTD_PATH_STATE_TIMEOUT 1
+#define GVE_MTD_PATH_STATE_CONGESTION 2
+#define GVE_MTD_PATH_STATE_RETRANSMIT 3
+
+#define GVE_MTD_PATH_HASH_NONE (0x0 << 4)
+#define GVE_MTD_PATH_HASH_L4 (0x1 << 4)
+
+/* GVE Receive Packet Descriptor */
+/* The start of an ethernet packet comes 2 bytes into the rx buffer.
+ * gVNIC adds this padding so that both the DMA and the L3/4 protocol header
+ * access is aligned.
+ */
+#define GVE_RX_PAD 2
+
+struct gve_rx_desc {
+ u8 padding[48];
+ __be32 rss_hash; /* Receive-side scaling hash (Toeplitz for gVNIC) */
+ __be16 mss;
+ __be16 reserved; /* Reserved to zero */
+ u8 hdr_len; /* Header length (L2-L4) including padding */
+ u8 hdr_off; /* 64-byte-scaled offset into RX_DATA entry */
+ __sum16 csum; /* 1's-complement partial checksum of L3+ bytes */
+ __be16 len; /* Length of the received packet */
+ __be16 flags_seq; /* Flags [15:3] and sequence number [2:0] (1-7) */
+} __packed;
+static_assert(sizeof(struct gve_rx_desc) == 64);
+
+/* If the device supports raw dma addressing then the addr in data slot is
+ * the dma address of the buffer.
+ * If the device only supports registered segments then the addr is a byte
+ * offset into the registered segment (an ordered list of pages) where the
+ * buffer is.
+ */
+union gve_rx_data_slot {
+ __be64 qpl_offset;
+ __be64 addr;
+};
+
+/* GVE Receive Packet Descriptor Seq No */
+#define GVE_SEQNO(x) (be16_to_cpu(x) & 0x7)
+
+/* GVE Receive Packet Descriptor Flags */
+#define GVE_RXFLG(x) cpu_to_be16(1 << (3 + (x)))
+#define GVE_RXF_FRAG GVE_RXFLG(3) /* IP Fragment */
+#define GVE_RXF_IPV4 GVE_RXFLG(4) /* IPv4 */
+#define GVE_RXF_IPV6 GVE_RXFLG(5) /* IPv6 */
+#define GVE_RXF_TCP GVE_RXFLG(6) /* TCP Packet */
+#define GVE_RXF_UDP GVE_RXFLG(7) /* UDP Packet */
+#define GVE_RXF_ERR GVE_RXFLG(8) /* Packet Error Detected */
+#define GVE_RXF_PKT_CONT GVE_RXFLG(10) /* Multi Fragment RX packet */
+
+/* GVE IRQ */
+#define GVE_IRQ_ACK BIT(31)
+#define GVE_IRQ_MASK BIT(30)
+#define GVE_IRQ_EVENT BIT(29)
+
+static inline bool gve_needs_rss(__be16 flag)
+{
+ if (flag & GVE_RXF_FRAG)
+ return false;
+ if (flag & (GVE_RXF_IPV4 | GVE_RXF_IPV6))
+ return true;
+ return false;
+}
+
+static inline u8 gve_next_seqno(u8 seq)
+{
+ return (seq + 1) == 8 ? 1 : seq + 1;
+}
+#endif /* _GVE_DESC_H_ */
new file mode 100644
@@ -0,0 +1,254 @@
+/* SPDX-License-Identifier: MIT
+ * Google Virtual Ethernet (gve) driver
+ * Version: 1.3.0
+ * Copyright (C) 2015-2022 Google, Inc.
+ */
+
+/* GVE DQO Descriptor formats */
+
+#ifndef _GVE_DESC_DQO_H_
+#define _GVE_DESC_DQO_H_
+
+#define GVE_TX_MAX_HDR_SIZE_DQO 255
+#define GVE_TX_MIN_TSO_MSS_DQO 88
+
+#ifndef __LITTLE_ENDIAN_BITFIELD
+#error "Only little endian supported"
+#endif
+
+/* Basic TX descriptor (DTYPE 0x0C) */
+struct gve_tx_pkt_desc_dqo {
+ __le64 buf_addr;
+
+ /* Must be GVE_TX_PKT_DESC_DTYPE_DQO (0xc) */
+ u8 dtype: 5;
+
+ /* Denotes the last descriptor of a packet. */
+ u8 end_of_packet: 1;
+ u8 checksum_offload_enable: 1;
+
+ /* If set, will generate a descriptor completion for this descriptor. */
+ u8 report_event: 1;
+ u8 reserved0;
+ __le16 reserved1;
+
+ /* The TX completion associated with this packet will contain this tag.
+ */
+ __le16 compl_tag;
+ u16 buf_size: 14;
+ u16 reserved2: 2;
+} __packed;
+GVE_CHECK_STRUCT_LEN(16, gve_tx_pkt_desc_dqo);
+
+#define GVE_TX_PKT_DESC_DTYPE_DQO 0xc
+#define GVE_TX_MAX_BUF_SIZE_DQO ((16 * 1024) - 1)
+
+/* Maximum number of data descriptors allowed per packet, or per-TSO segment. */
+#define GVE_TX_MAX_DATA_DESCS 10
+
+/* Min gap between tail and head to avoid cacheline overlap */
+#define GVE_TX_MIN_DESC_PREVENT_CACHE_OVERLAP 4
+
+/* "report_event" on TX packet descriptors may only be reported on the last
+ * descriptor of a TX packet, and they must be spaced apart with at least this
+ * value.
+ */
+#define GVE_TX_MIN_RE_INTERVAL 32
+
+struct gve_tx_context_cmd_dtype {
+ u8 dtype: 5;
+ u8 tso: 1;
+ u8 reserved1: 2;
+
+ u8 reserved2;
+};
+
+GVE_CHECK_STRUCT_LEN(2, gve_tx_context_cmd_dtype);
+
+/* TX Native TSO Context DTYPE (0x05)
+ *
+ * "flex" fields allow the driver to send additional packet context to HW.
+ */
+struct gve_tx_tso_context_desc_dqo {
+ /* The L4 payload bytes that should be segmented. */
+ u32 tso_total_len: 24;
+ u32 flex10: 8;
+
+ /* Max segment size in TSO excluding headers. */
+ u16 mss: 14;
+ u16 reserved: 2;
+
+ u8 header_len; /* Header length to use for TSO offload */
+ u8 flex11;
+ struct gve_tx_context_cmd_dtype cmd_dtype;
+ u8 flex0;
+ u8 flex5;
+ u8 flex6;
+ u8 flex7;
+ u8 flex8;
+ u8 flex9;
+} __packed;
+GVE_CHECK_STRUCT_LEN(16, gve_tx_tso_context_desc_dqo);
+
+#define GVE_TX_TSO_CTX_DESC_DTYPE_DQO 0x5
+
+/* General context descriptor for sending metadata. */
+struct gve_tx_general_context_desc_dqo {
+ u8 flex4;
+ u8 flex5;
+ u8 flex6;
+ u8 flex7;
+ u8 flex8;
+ u8 flex9;
+ u8 flex10;
+ u8 flex11;
+ struct gve_tx_context_cmd_dtype cmd_dtype;
+ u16 reserved;
+ u8 flex0;
+ u8 flex1;
+ u8 flex2;
+ u8 flex3;
+} __packed;
+GVE_CHECK_STRUCT_LEN(16, gve_tx_general_context_desc_dqo);
+
+#define GVE_TX_GENERAL_CTX_DESC_DTYPE_DQO 0x4
+
+/* Logical structure of metadata which is packed into context descriptor flex
+ * fields.
+ */
+struct gve_tx_metadata_dqo {
+ union {
+ struct {
+ u8 version;
+
+ /* If `skb->l4_hash` is set, this value should be
+ * derived from `skb->hash`.
+ *
+ * A zero value means no l4_hash was associated with the
+ * skb.
+ */
+ u16 path_hash: 15;
+
+ /* Should be set to 1 if the flow associated with the
+ * skb had a rehash from the TCP stack.
+ */
+ u16 rehash_event: 1;
+ } __packed;
+ u8 bytes[12];
+ };
+} __packed;
+GVE_CHECK_STRUCT_LEN(12, gve_tx_metadata_dqo);
+
+#define GVE_TX_METADATA_VERSION_DQO 0
+
+/* TX completion descriptor */
+struct gve_tx_compl_desc {
+ /* For types 0-4 this is the TX queue ID associated with this
+ * completion.
+ */
+ u16 id: 11;
+
+ /* See: GVE_COMPL_TYPE_DQO* */
+ u16 type: 3;
+ u16 reserved0: 1;
+
+ /* Flipped by HW to notify the descriptor is populated. */
+ u16 generation: 1;
+ union {
+ /* For descriptor completions, this is the last index fetched
+ * by HW + 1.
+ */
+ __le16 tx_head;
+
+ /* For packet completions, this is the completion tag set on the
+ * TX packet descriptors.
+ */
+ __le16 completion_tag;
+ };
+ __le32 reserved1;
+} __packed;
+GVE_CHECK_STRUCT_LEN(8, gve_tx_compl_desc);
+
+#define GVE_COMPL_TYPE_DQO_PKT 0x2 /* Packet completion */
+#define GVE_COMPL_TYPE_DQO_DESC 0x4 /* Descriptor completion */
+#define GVE_COMPL_TYPE_DQO_MISS 0x1 /* Miss path completion */
+#define GVE_COMPL_TYPE_DQO_REINJECTION 0x3 /* Re-injection completion */
+
+/* Descriptor to post buffers to HW on buffer queue. */
+struct gve_rx_desc_dqo {
+ __le16 buf_id; /* ID returned in Rx completion descriptor */
+ __le16 reserved0;
+ __le32 reserved1;
+ __le64 buf_addr; /* DMA address of the buffer */
+ __le64 header_buf_addr;
+ __le64 reserved2;
+} __packed;
+GVE_CHECK_STRUCT_LEN(32, gve_rx_desc_dqo);
+
+/* Descriptor for HW to notify SW of new packets received on RX queue. */
+struct gve_rx_compl_desc_dqo {
+ /* Must be 1 */
+ u8 rxdid: 4;
+ u8 reserved0: 4;
+
+ /* Packet originated from this system rather than the network. */
+ u8 loopback: 1;
+ /* Set when IPv6 packet contains a destination options header or routing
+ * header.
+ */
+ u8 ipv6_ex_add: 1;
+ /* Invalid packet was received. */
+ u8 rx_error: 1;
+ u8 reserved1: 5;
+
+ u16 packet_type: 10;
+ u16 ip_hdr_err: 1;
+ u16 udp_len_err: 1;
+ u16 raw_cs_invalid: 1;
+ u16 reserved2: 3;
+
+ u16 packet_len: 14;
+ /* Flipped by HW to notify the descriptor is populated. */
+ u16 generation: 1;
+ /* Should be zero. */
+ u16 buffer_queue_id: 1;
+
+ u16 header_len: 10;
+ u16 rsc: 1;
+ u16 split_header: 1;
+ u16 reserved3: 4;
+
+ u8 descriptor_done: 1;
+ u8 end_of_packet: 1;
+ u8 header_buffer_overflow: 1;
+ u8 l3_l4_processed: 1;
+ u8 csum_ip_err: 1;
+ u8 csum_l4_err: 1;
+ u8 csum_external_ip_err: 1;
+ u8 csum_external_udp_err: 1;
+
+ u8 status_error1;
+
+ __le16 reserved5;
+ __le16 buf_id; /* Buffer ID which was sent on the buffer queue. */
+
+ union {
+ /* Packet checksum. */
+ __le16 raw_cs;
+ /* Segment length for RSC packets. */
+ __le16 rsc_seg_len;
+ };
+ __le32 hash;
+ __le32 reserved6;
+ __le64 reserved7;
+} __packed;
+
+GVE_CHECK_STRUCT_LEN(32, gve_rx_compl_desc_dqo);
+
+/* Ringing the doorbell too often can hurt performance.
+ *
+ * HW requires this value to be at least 8.
+ */
+#define GVE_RX_BUF_THRESH_DQO 32
+
+#endif /* _GVE_DESC_DQO_H_ */
new file mode 100644
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT
+ * Google Virtual Ethernet (gve) driver
+ * Version: 1.3.0
+ * Copyright (C) 2015-2022 Google, Inc.
+ */
+
+#ifndef _GVE_REGISTER_H_
+#define _GVE_REGISTER_H_
+
+/* Fixed Configuration Registers */
+struct gve_registers {
+ __be32 device_status;
+ __be32 driver_status;
+ __be32 max_tx_queues;
+ __be32 max_rx_queues;
+ __be32 adminq_pfn;
+ __be32 adminq_doorbell;
+ __be32 adminq_event_counter;
+ u8 reserved[3];
+ u8 driver_version;
+};
+
+enum gve_device_status_flags {
+ GVE_DEVICE_STATUS_RESET_MASK = BIT(1),
+ GVE_DEVICE_STATUS_LINK_STATUS_MASK = BIT(2),
+ GVE_DEVICE_STATUS_REPORT_STATS_MASK = BIT(3),
+};
+#endif /* _GVE_REGISTER_H_ */