[v5,1/3] kni: rework rte_kni_update_link using ioctl

Message ID 20210826151911.15699-1-iryzhov@nfware.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series [v5,1/3] kni: rework rte_kni_update_link using ioctl |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/iol-testing warning apply patch failure

Commit Message

Igor Ryzhov Aug. 26, 2021, 3:19 p.m. UTC
  Current implementation doesn't allow us to update KNI carrier if the
interface is not yet UP in kernel. It means that we can't use it in the
same thread which is processing rte_kni_ops.config_network_if, which is
very convenient, because it allows us to have correct carrier status
of the interface right after we enabled it and we don't have to use any
additional thread to track link status.

Propagating speed/duplex/autoneg to the kernel module also allows us to
implement ethtool_ops.get_link_ksettings callback.

Suggested-by: Dan Gora <dg@adax.com>
Signed-off-by: Igor Ryzhov <iryzhov@nfware.com>
---
 app/test/test_kni.c         | 62 +++++++++++++++++++++++++++----------
 examples/kni/main.c         |  8 +++--
 kernel/linux/kni/kni_dev.h  |  5 +++
 kernel/linux/kni/kni_misc.c | 47 ++++++++++++++++++++++++++++
 lib/kni/rte_kni.c           | 38 ++++++-----------------
 lib/kni/rte_kni.h           | 12 +++----
 lib/kni/rte_kni_common.h    |  9 ++++++
 7 files changed, 126 insertions(+), 55 deletions(-)
  

Comments

Stephen Hemminger Aug. 26, 2021, 5:15 p.m. UTC | #1
On Thu, 26 Aug 2021 18:19:09 +0300
Igor Ryzhov <iryzhov@nfware.com> wrote:

>  
> +static int
> +kni_ioctl_link(struct net *net, uint32_t ioctl_num,
> +		unsigned long ioctl_param)
> +{
> +	struct kni_net *knet = net_generic(net, kni_net_id);
> +	int ret = -EINVAL;
> +	struct kni_dev *dev, *n;
> +	struct rte_kni_link_info link_info;
> +	struct net_device *netdev;
> +
> +	if (_IOC_SIZE(ioctl_num) > sizeof(link_info))
> +		return -EINVAL;
> +
> +	if (copy_from_user(&link_info, (void *)ioctl_param, sizeof(link_info)))
> +		return -EFAULT;
> +
> +	if (strlen(link_info.name) == 0)
> +		return -EINVAL;
> +
> +	down_read(&knet->kni_list_lock);
> +	list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
> +		if (strncmp(dev->name, link_info.name, RTE_KNI_NAMESIZE) != 0)
> +			continue;
> +
> +		netdev = dev->net_dev;
> +
> +		if (link_info.status) {
> +			netif_carrier_on(netdev);
> +
> +			dev->speed = link_info.speed;
> +			dev->duplex = link_info.duplex;
> +			dev->autoneg = link_info.autoneg;
> +		} else {
> +			netif_carrier_off(netdev);
> +		}
> +
> +		ret = 0;
> +		break;
> +	}
> +	up_read(&knet->kni_list_lock);
> +
> +	return ret;
> +}
> +

You need to be using the RTNL mutex in KNI here (and probably elsewhere).
The use of semaphore for list lock should also be replaced by a mutex.

The KNI driver was written long ago and was never reviewed by people
knowledgeable about kernel networking. That is one reason IMHO KNI
should not be used in production systems.
  
Igor Ryzhov Aug. 26, 2021, 5:46 p.m. UTC | #2
Could you please clarify where exactly do I need to use rtnl lock?
From what I understand, netif_carrier_on/off can be called without the lock.

On Thu, Aug 26, 2021 at 8:15 PM Stephen Hemminger <
stephen@networkplumber.org> wrote:

> On Thu, 26 Aug 2021 18:19:09 +0300
> Igor Ryzhov <iryzhov@nfware.com> wrote:
>
> >
> > +static int
> > +kni_ioctl_link(struct net *net, uint32_t ioctl_num,
> > +             unsigned long ioctl_param)
> > +{
> > +     struct kni_net *knet = net_generic(net, kni_net_id);
> > +     int ret = -EINVAL;
> > +     struct kni_dev *dev, *n;
> > +     struct rte_kni_link_info link_info;
> > +     struct net_device *netdev;
> > +
> > +     if (_IOC_SIZE(ioctl_num) > sizeof(link_info))
> > +             return -EINVAL;
> > +
> > +     if (copy_from_user(&link_info, (void *)ioctl_param,
> sizeof(link_info)))
> > +             return -EFAULT;
> > +
> > +     if (strlen(link_info.name) == 0)
> > +             return -EINVAL;
> > +
> > +     down_read(&knet->kni_list_lock);
> > +     list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
> > +             if (strncmp(dev->name, link_info.name, RTE_KNI_NAMESIZE)
> != 0)
> > +                     continue;
> > +
> > +             netdev = dev->net_dev;
> > +
> > +             if (link_info.status) {
> > +                     netif_carrier_on(netdev);
> > +
> > +                     dev->speed = link_info.speed;
> > +                     dev->duplex = link_info.duplex;
> > +                     dev->autoneg = link_info.autoneg;
> > +             } else {
> > +                     netif_carrier_off(netdev);
> > +             }
> > +
> > +             ret = 0;
> > +             break;
> > +     }
> > +     up_read(&knet->kni_list_lock);
> > +
> > +     return ret;
> > +}
> > +
>
> You need to be using the RTNL mutex in KNI here (and probably elsewhere).
> The use of semaphore for list lock should also be replaced by a mutex.
>
> The KNI driver was written long ago and was never reviewed by people
> knowledgeable about kernel networking. That is one reason IMHO KNI
> should not be used in production systems.
>
>
  
Stephen Hemminger Aug. 26, 2021, 6:06 p.m. UTC | #3
On Thu, 26 Aug 2021 20:46:47 +0300
Igor Ryzhov <iryzhov@nfware.com> wrote:

> Could you please clarify where exactly do I need to use rtnl lock?
> From what I understand, netif_carrier_on/off can be called without the lock.

Just a basic concern. The new stuff looks ok.
If you follow what current upstream virtio is doing, it should be safe.

See drivers/net/virtio/virtio_net.c, looks like more is needed?

Do you call ethtool_validate_speed() and ethtool_validate_duplex()?

Also, in virtio the driver does stop/wakeup of tx queues on carrier change.
  
Igor Ryzhov Aug. 30, 2021, 6:05 p.m. UTC | #4
On Thu, Aug 26, 2021 at 9:06 PM Stephen Hemminger <
stephen@networkplumber.org> wrote:

> On Thu, 26 Aug 2021 20:46:47 +0300
> Igor Ryzhov <iryzhov@nfware.com> wrote:
>
> > Could you please clarify where exactly do I need to use rtnl lock?
> > From what I understand, netif_carrier_on/off can be called without the
> lock.
>
> Just a basic concern. The new stuff looks ok.
> If you follow what current upstream virtio is doing, it should be safe.
>
> See drivers/net/virtio/virtio_net.c, looks like more is needed?


Thanks for the suggestion, I looked at it.


> Do you call ethtool_validate_speed() and ethtool_validate_duplex()?
>

The problem with those functions is that they are only available since
kernel version 4.6 and currently we have to also support 4.4. I don't think
we need to introduce more compatibility code, as nothing really bad
happens if the developer provides some invalued values. The worst
thing is that ethtool will show weird speed or unknown duplex.


>
> Also, in virtio the driver does stop/wakeup of tx queues on carrier change.
>

This can be added for sure, but this improvement is not actually related to
my patch. We don't currently stop kernel's tx queue when the link is down
and I don't change that behavior. The code change is trivial but I'll need
to
find some time to test that everything works fine, so let's consider it a
separate thing, please.
  

Patch

diff --git a/app/test/test_kni.c b/app/test/test_kni.c
index 96733554b6c4..0df028696f36 100644
--- a/app/test/test_kni.c
+++ b/app/test/test_kni.c
@@ -122,9 +122,32 @@  kni_change_mtu(uint16_t port_id, unsigned int new_mtu)
 	return 0;
 }
 
+static int
+kni_get_carrier(const char *name)
+{
+	FILE *fd;
+	char path[128];
+	int carrier;
+
+	snprintf(path, sizeof(path), "/sys/devices/virtual/net/%s/carrier",
+		name);
+	fd = fopen(path, "r");
+	if (fd == NULL)
+		return -1;
+
+	if (fscanf(fd, "%d", &carrier) != 1)
+		return -1;
+
+	fclose(fd);
+
+	return carrier;
+}
+
 static int
 test_kni_link_change(void)
 {
+	struct rte_eth_link link;
+	int carrier;
 	int ret;
 	int pid;
 
@@ -135,42 +158,47 @@  test_kni_link_change(void)
 	}
 
 	if (pid == 0) {
+		link.link_speed = ETH_SPEED_NUM_10G;
+		link.link_duplex = ETH_LINK_FULL_DUPLEX;
+		link.link_autoneg = ETH_LINK_AUTONEG;
+
 		printf("Starting KNI Link status change tests.\n");
 		if (system(IFCONFIG TEST_KNI_PORT" up") == -1) {
 			ret = -1;
 			goto error;
 		}
 
-		ret = rte_kni_update_link(test_kni_ctx, 1);
+		link.link_status = ETH_LINK_UP;
+		ret = rte_kni_update_link(test_kni_ctx, &link);
 		if (ret < 0) {
 			printf("Failed to change link state to Up ret=%d.\n",
 				ret);
 			goto error;
 		}
 		rte_delay_ms(1000);
-		printf("KNI: Set LINKUP, previous state=%d\n", ret);
-
-		ret = rte_kni_update_link(test_kni_ctx, 0);
-		if (ret != 1) {
-			printf(
-		"Failed! Previous link state should be 1, returned %d.\n",
-				ret);
+		carrier = kni_get_carrier(TEST_KNI_PORT);
+		if (carrier != 1) {
+			printf("Carrier did not change to Up in kernel.\n");
+			ret = -1;
 			goto error;
 		}
-		rte_delay_ms(1000);
-		printf("KNI: Set LINKDOWN, previous state=%d\n", ret);
+		printf("KNI: Set LINKUP\n");
 
-		ret = rte_kni_update_link(test_kni_ctx, 1);
-		if (ret != 0) {
-			printf(
-		"Failed! Previous link state should be 0, returned %d.\n",
+		link.link_status = ETH_LINK_DOWN;
+		ret = rte_kni_update_link(test_kni_ctx, &link);
+		if (ret < 0) {
+			printf("Failed to change link state to Down ret=%d.\n",
 				ret);
 			goto error;
 		}
-		printf("KNI: Set LINKUP, previous state=%d\n", ret);
-
-		ret = 0;
 		rte_delay_ms(1000);
+		carrier = kni_get_carrier(TEST_KNI_PORT);
+		if (carrier != 0) {
+			printf("Carrier did not change to Down in kernel.\n");
+			ret = -1;
+			goto error;
+		}
+		printf("KNI: Set LINKDOWN\n");
 
 error:
 		if (system(IFCONFIG TEST_KNI_PORT" down") == -1)
diff --git a/examples/kni/main.c b/examples/kni/main.c
index beabb3c848aa..aea44beac550 100644
--- a/examples/kni/main.c
+++ b/examples/kni/main.c
@@ -85,6 +85,7 @@  struct kni_port_params {
 	unsigned lcore_tx; /* lcore ID for TX */
 	uint32_t nb_lcore_k; /* Number of lcores for KNI multi kernel threads */
 	uint32_t nb_kni; /* Number of KNI devices to be created */
+	uint8_t link_status; /* Current link status of the port */
 	unsigned lcore_k[KNI_MAX_KTHREAD]; /* lcore ID list for kthreads */
 	struct rte_kni *kni[KNI_MAX_KTHREAD]; /* KNI context pointers */
 } __rte_cache_aligned;
@@ -720,7 +721,7 @@  log_link_state(struct rte_kni *kni, int prev, struct rte_eth_link *link)
 
 	rte_eth_link_to_str(link_status_text, sizeof(link_status_text), link);
 	if (prev != link->link_status)
-		RTE_LOG(INFO, APP, "%s NIC %s",
+		RTE_LOG(INFO, APP, "%s NIC %s\n",
 			rte_kni_get_name(kni),
 			link_status_text);
 }
@@ -754,9 +755,10 @@  monitor_all_ports_link_status(void *arg)
 				continue;
 			}
 			for (i = 0; i < p[portid]->nb_kni; i++) {
-				prev = rte_kni_update_link(p[portid]->kni[i],
-						link.link_status);
+				rte_kni_update_link(p[portid]->kni[i], &link);
+				prev = p[portid]->link_status;
 				log_link_state(p[portid]->kni[i], prev, &link);
+				p[portid]->link_status = link.link_status;
 			}
 		}
 	}
diff --git a/kernel/linux/kni/kni_dev.h b/kernel/linux/kni/kni_dev.h
index c15da311ba25..969108cc30f8 100644
--- a/kernel/linux/kni/kni_dev.h
+++ b/kernel/linux/kni/kni_dev.h
@@ -88,6 +88,11 @@  struct kni_dev {
 	void *alloc_va[MBUF_BURST_SZ];
 
 	struct task_struct *usr_tsk;
+
+	/* correct when netif_carrier_ok */
+	uint32_t speed;
+	uint8_t duplex;
+	uint8_t autoneg;
 };
 
 #ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
diff --git a/kernel/linux/kni/kni_misc.c b/kernel/linux/kni/kni_misc.c
index 2b464c438113..30ee69661935 100644
--- a/kernel/linux/kni/kni_misc.c
+++ b/kernel/linux/kni/kni_misc.c
@@ -481,6 +481,50 @@  kni_ioctl_release(struct net *net, uint32_t ioctl_num,
 	return ret;
 }
 
+static int
+kni_ioctl_link(struct net *net, uint32_t ioctl_num,
+		unsigned long ioctl_param)
+{
+	struct kni_net *knet = net_generic(net, kni_net_id);
+	int ret = -EINVAL;
+	struct kni_dev *dev, *n;
+	struct rte_kni_link_info link_info;
+	struct net_device *netdev;
+
+	if (_IOC_SIZE(ioctl_num) > sizeof(link_info))
+		return -EINVAL;
+
+	if (copy_from_user(&link_info, (void *)ioctl_param, sizeof(link_info)))
+		return -EFAULT;
+
+	if (strlen(link_info.name) == 0)
+		return -EINVAL;
+
+	down_read(&knet->kni_list_lock);
+	list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
+		if (strncmp(dev->name, link_info.name, RTE_KNI_NAMESIZE) != 0)
+			continue;
+
+		netdev = dev->net_dev;
+
+		if (link_info.status) {
+			netif_carrier_on(netdev);
+
+			dev->speed = link_info.speed;
+			dev->duplex = link_info.duplex;
+			dev->autoneg = link_info.autoneg;
+		} else {
+			netif_carrier_off(netdev);
+		}
+
+		ret = 0;
+		break;
+	}
+	up_read(&knet->kni_list_lock);
+
+	return ret;
+}
+
 static int
 kni_ioctl(struct inode *inode, uint32_t ioctl_num, unsigned long ioctl_param)
 {
@@ -502,6 +546,9 @@  kni_ioctl(struct inode *inode, uint32_t ioctl_num, unsigned long ioctl_param)
 	case _IOC_NR(RTE_KNI_IOCTL_RELEASE):
 		ret = kni_ioctl_release(net, ioctl_num, ioctl_param);
 		break;
+	case _IOC_NR(RTE_KNI_IOCTL_LINK):
+		ret = kni_ioctl_link(net, ioctl_num, ioctl_param);
+		break;
 	default:
 		pr_debug("IOCTL default\n");
 		break;
diff --git a/lib/kni/rte_kni.c b/lib/kni/rte_kni.c
index eb24b0d0ae4e..0a7b562abf11 100644
--- a/lib/kni/rte_kni.c
+++ b/lib/kni/rte_kni.c
@@ -784,43 +784,25 @@  rte_kni_unregister_handlers(struct rte_kni *kni)
 }
 
 int
-rte_kni_update_link(struct rte_kni *kni, unsigned int linkup)
+rte_kni_update_link(struct rte_kni *kni, struct rte_eth_link *link)
 {
-	char path[64];
-	char old_carrier[2];
-	const char *new_carrier;
-	int old_linkup;
-	int fd, ret;
+	struct rte_kni_link_info link_info;
 
 	if (kni == NULL)
 		return -1;
 
-	snprintf(path, sizeof(path), "/sys/devices/virtual/net/%s/carrier",
-		kni->name);
+	snprintf(link_info.name, RTE_KNI_NAMESIZE, "%s", kni->name);
+	link_info.speed = link->link_speed;
+	link_info.duplex = link->link_duplex;
+	link_info.autoneg = link->link_autoneg;
+	link_info.status = link->link_status;
 
-	fd = open(path, O_RDWR);
-	if (fd == -1) {
-		RTE_LOG(ERR, KNI, "Failed to open file: %s.\n", path);
+	if (ioctl(kni_fd, RTE_KNI_IOCTL_LINK, &link_info) < 0) {
+		RTE_LOG(ERR, KNI, "Fail to update KNI link\n");
 		return -1;
 	}
 
-	ret = read(fd, old_carrier, 2);
-	if (ret < 1) {
-		close(fd);
-		return -1;
-	}
-	old_linkup = (old_carrier[0] == '1');
-
-	new_carrier = linkup ? "1" : "0";
-	ret = write(fd, new_carrier, 1);
-	if (ret < 1) {
-		RTE_LOG(ERR, KNI, "Failed to write file: %s.\n", path);
-		close(fd);
-		return -1;
-	}
-
-	close(fd);
-	return old_linkup;
+	return 0;
 }
 
 void
diff --git a/lib/kni/rte_kni.h b/lib/kni/rte_kni.h
index b0eaf4610416..c891e0e1f9a5 100644
--- a/lib/kni/rte_kni.h
+++ b/lib/kni/rte_kni.h
@@ -21,6 +21,7 @@ 
 #include <rte_memory.h>
 #include <rte_mempool.h>
 #include <rte_ether.h>
+#include <rte_ethdev.h>
 
 #include <rte_kni_common.h>
 
@@ -244,19 +245,16 @@  int rte_kni_unregister_handlers(struct rte_kni *kni);
  *
  * @param kni
  *  pointer to struct rte_kni.
- * @param linkup
- *  New link state:
- *  0 for linkdown.
- *  > 0 for linkup.
+ * @param link
+ *  new link state, speed, duplex, autoneg.
  *
  * @return
+ *  On success: 0
  *  On failure: -1
- *  Previous link state == linkdown: 0
- *  Previous link state == linkup: 1
  */
 __rte_experimental
 int
-rte_kni_update_link(struct rte_kni *kni, unsigned int linkup);
+rte_kni_update_link(struct rte_kni *kni, struct rte_eth_link *link);
 
 /**
  *  Close KNI device.
diff --git a/lib/kni/rte_kni_common.h b/lib/kni/rte_kni_common.h
index b547ea550171..e27cb3330b35 100644
--- a/lib/kni/rte_kni_common.h
+++ b/lib/kni/rte_kni_common.h
@@ -130,10 +130,19 @@  struct rte_kni_device_info {
 	uint8_t iova_mode;
 };
 
+struct rte_kni_link_info {
+	char name[RTE_KNI_NAMESIZE];
+	uint32_t speed;
+	uint8_t duplex;
+	uint8_t autoneg;
+	uint8_t status;
+};
+
 #define KNI_DEVICE "kni"
 
 #define RTE_KNI_IOCTL_TEST    _IOWR(0, 1, int)
 #define RTE_KNI_IOCTL_CREATE  _IOWR(0, 2, struct rte_kni_device_info)
 #define RTE_KNI_IOCTL_RELEASE _IOWR(0, 3, struct rte_kni_device_info)
+#define RTE_KNI_IOCTL_LINK    _IOWR(0, 4, struct rte_kni_link_info)
 
 #endif /* _RTE_KNI_COMMON_H_ */