On Fri, 11 Apr 2025 16:44:39 -0700
Stephen Hemminger <stephen@networkplumber.org> wrote:
> Before this patch if secondary process called start/stop
> it would only impact the secondary process, the ethdev on the
> primary process will still not be started.
>
> With this patch, when start/stop is called from secondary,
> it calls the primary and does the operation there. The design
> is generic, and we can later add queue and other operations
> as needed.
>
> Bugzilla ID: 73
>
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---
Wanted to expand this to other functions like rte_eth_dev_configure()
but the structure rte_eth_conf has grown quite large
According to pahole
/* size: 2280, cachelines: 36, members: 8 */
/* sum members: 2268, holes: 2, sum holes: 8 */
/* padding: 4 */
/* member types with holes: 3, total: 4, bit holes: 1, total: 1, bit paddings: 1, total: 29 bits */
The biggest offender
there is DCB and VMDQ both of which are features I doubt anyone ever uses,
but unlikely to get that fixed.
Because it is so large, it is impossible to pass rte_eth_conf over the mp
primary/secondary API.
@@ -114,6 +114,14 @@ rte_eth_dev_allocate(const char *name)
goto unlock;
}
+ if (eth_dev_shared_data->allocated_ports == 0 &&
+ rte_mp_action_register(ETHDEV_MP, ethdev_server) &&
+ rte_errno != ENOTSUP) {
+ RTE_ETHDEV_LOG_LINE(ERR,
+ "Could not start %s service", ETHDEV_MP);
+ goto unlock;
+ }
+
eth_dev = eth_dev_get(port_id);
eth_dev->flow_fp_ops = &rte_flow_fp_default_ops;
strlcpy(eth_dev->data->name, name, sizeof(eth_dev->data->name));
@@ -282,7 +290,8 @@ rte_eth_dev_release_port(struct rte_eth_dev *eth_dev)
memset(eth_dev->data, 0, sizeof(struct rte_eth_dev_data));
eth_dev->data = NULL;
- eth_dev_shared_data->allocated_ports--;
+ if (--eth_dev_shared_data->allocated_ports == 0)
+ rte_mp_action_unregister(ETHDEV_MP);
eth_dev_shared_data_release();
}
@@ -477,3 +477,80 @@ eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
dev->data->nb_tx_queues = nb_queues;
return 0;
}
+
+static int
+ethdev_handle_request(const struct ethdev_mp_request *req)
+{
+ switch (req->operation) {
+ case ETH_REQ_START:
+ return rte_eth_dev_start(req->port_id);
+
+ case ETH_REQ_STOP:
+ return rte_eth_dev_stop(req->port_id);
+
+ default:
+ return -EINVAL;
+ }
+}
+
+static_assert(sizeof(struct ethdev_mp_request) <= RTE_MP_MAX_PARAM_LEN);
+
+int
+ethdev_server(const struct rte_mp_msg *mp_msg, const void *peer)
+{
+ struct rte_mp_msg mp_resp = {
+ .name = ETHDEV_MP,
+ };
+ struct ethdev_mp_response *resp
+ = (struct ethdev_mp_response *)mp_resp.param;
+ const struct ethdev_mp_request *req;
+
+ mp_resp.len_param = sizeof(*resp);
+
+ req = (const struct ethdev_mp_request *)mp_msg->param;
+
+ resp->res_op = req->operation;
+
+ /* recv client requests */
+ if (mp_msg->len_param != sizeof(*req))
+ resp->err_value = -EINVAL;
+ else
+ resp->err_value = ethdev_handle_request(req);
+
+ return rte_mp_reply(&mp_resp, peer);
+}
+
+int
+ethdev_request(enum ethdev_mp_operation operation, uint16_t port_id,
+ uint16_t queue_id __rte_unused)
+{
+ struct rte_mp_msg mp_req = { };
+ struct rte_mp_reply mp_reply;
+ struct ethdev_mp_request *req;
+ struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+ int ret;
+
+ req = (struct ethdev_mp_request *)mp_req.param;
+ strlcpy(mp_req.name, ETHDEV_MP, RTE_MP_MAX_NAME_LEN);
+ mp_req.len_param = sizeof(*req);
+
+ req->operation = operation;
+ req->port_id = port_id;
+
+ if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0) {
+ const struct rte_mp_msg *mp_rep = &mp_reply.msgs[0];
+ const struct ethdev_mp_response *resp
+ = (const struct ethdev_mp_response *)mp_rep->param;
+
+ if (resp->err_value == 0)
+ ret = 0;
+ else
+ rte_errno = -resp->err_value;
+ free(mp_reply.msgs);
+ }
+
+ if (ret < 0)
+ RTE_ETHDEV_LOG_LINE(ERR,
+ "port %up ethdev op %u failed", port_id, operation);
+ return ret;
+}
@@ -79,4 +79,27 @@ void eth_dev_txq_release(struct rte_eth_dev *dev, uint16_t qid);
int eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues);
int eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues);
+/* Used to allow start/stop from secondary */
+#define ETHDEV_MP "mp_ethdev"
+
+enum ethdev_mp_operation {
+ ETH_REQ_START,
+ ETH_REQ_STOP,
+};
+
+struct ethdev_mp_request {
+ uint16_t operation;
+ uint16_t port_id;
+ uint32_t reserved;
+};
+
+struct ethdev_mp_response {
+ uint16_t res_op;
+ uint16_t port_id;
+ int32_t err_value;
+};
+
+int ethdev_server(const struct rte_mp_msg *mp_msg, const void *peer);
+int ethdev_request(enum ethdev_mp_operation op, uint16_t port, uint16_t queue);
+
#endif /* _ETH_PRIVATE_H_ */
@@ -1800,54 +1800,61 @@ rte_eth_dev_start(uint16_t port_id)
if (dev->data->dev_configured == 0) {
RTE_ETHDEV_LOG_LINE(INFO,
- "Device with port_id=%"PRIu16" is not configured.",
- port_id);
+ "Device with port_id=%"PRIu16" is not configured.",
+ port_id);
return -EINVAL;
}
if (dev->data->dev_started != 0) {
RTE_ETHDEV_LOG_LINE(INFO,
- "Device with port_id=%"PRIu16" already started",
- port_id);
+ "Device with port_id=%"PRIu16" already started",
+ port_id);
return 0;
}
- ret = rte_eth_dev_info_get(port_id, &dev_info);
- if (ret != 0)
- return ret;
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ ret = rte_eth_dev_info_get(port_id, &dev_info);
+ if (ret != 0)
+ return ret;
- restore_flags = rte_eth_get_restore_flags(dev, RTE_ETH_START);
+ restore_flags = rte_eth_get_restore_flags(dev, RTE_ETH_START);
- /* Lets restore MAC now if device does not support live change */
- if ((*dev_info.dev_flags & RTE_ETH_DEV_NOLIVE_MAC_ADDR) &&
- (restore_flags & RTE_ETH_RESTORE_MAC_ADDR))
- eth_dev_mac_restore(dev, &dev_info);
+ /* Restore MAC now if device does not support live change */
+ if ((*dev_info.dev_flags & RTE_ETH_DEV_NOLIVE_MAC_ADDR) &&
+ (restore_flags & RTE_ETH_RESTORE_MAC_ADDR))
+ eth_dev_mac_restore(dev, &dev_info);
- diag = dev->dev_ops->dev_start(dev);
- if (diag == 0)
- dev->data->dev_started = 1;
- else
- return eth_err(port_id, diag);
+ diag = dev->dev_ops->dev_start(dev);
+ if (diag == 0)
+ dev->data->dev_started = 1;
+ else
+ return eth_err(port_id, diag);
- ret = eth_dev_config_restore(dev, &dev_info, restore_flags, port_id);
- if (ret != 0) {
- RTE_ETHDEV_LOG_LINE(ERR,
- "Error during restoring configuration for device (port %u): %s",
- port_id, rte_strerror(-ret));
- ret_stop = rte_eth_dev_stop(port_id);
- if (ret_stop != 0) {
+ ret = eth_dev_config_restore(dev, &dev_info, restore_flags, port_id);
+ if (ret != 0) {
RTE_ETHDEV_LOG_LINE(ERR,
- "Failed to stop device (port %u): %s",
- port_id, rte_strerror(-ret_stop));
- }
+ "Error during restoring configuration for device (port %u): %s",
+ port_id, rte_strerror(-ret));
+ ret_stop = rte_eth_dev_stop(port_id);
+ if (ret_stop != 0) {
+ RTE_ETHDEV_LOG_LINE(ERR,
+ "Failed to stop device (port %u): %s",
+ port_id, rte_strerror(-ret_stop));
+ }
- return ret;
- }
+ return ret;
+ }
- if (dev->data->dev_conf.intr_conf.lsc == 0) {
- if (dev->dev_ops->link_update == NULL)
- return -ENOTSUP;
- dev->dev_ops->link_update(dev, 0);
+ if (dev->data->dev_conf.intr_conf.lsc == 0) {
+ if (dev->dev_ops->link_update == NULL)
+ return -ENOTSUP;
+ dev->dev_ops->link_update(dev, 0);
+ }
+ } else {
+ /* in secondary, proxy to primary */
+ ret = ethdev_request(ETH_REQ_START, port_id, UINT16_MAX);
+ if (ret != 0)
+ return ret;
}
/* expose selection of PMD fast-path functions */
@@ -1880,9 +1887,14 @@ rte_eth_dev_stop(uint16_t port_id)
/* point fast-path functions to dummy ones */
eth_dev_fp_ops_reset(rte_eth_fp_ops + port_id);
- ret = dev->dev_ops->dev_stop(dev);
- if (ret == 0)
- dev->data->dev_started = 0;
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ ret = dev->dev_ops->dev_stop(dev);
+ if (ret == 0)
+ dev->data->dev_started = 0;
+ } else {
+ ret = ethdev_request(ETH_REQ_STOP, port_id, UINT16_MAX);
+ }
+
rte_ethdev_trace_stop(port_id, ret);
return ret;