[v1,1/1] net/hinic: fix coredump when PMD used by fstack

Message ID 1615796077-68790-1-git-send-email-zhouguoyang@huawei.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series [v1,1/1] net/hinic: fix coredump when PMD used by fstack |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/iol-abi-testing success Testing PASS
ci/iol-testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-mellanox-Functional success Functional Testing PASS
ci/travis-robot success travis build: passed
ci/github-robot success github build: passed
ci/iol-mellanox-Performance success Performance Testing PASS
ci/intel-Testing success Testing PASS

Commit Message

Guoyang Zhou March 15, 2021, 8:14 a.m. UTC
  The fstack will use secondary process to access the memory of
eth_dev_ops , and it wants to get the info of dev, but hinic
driver does not initialized it when in secondary process.

Fixes: 66f64dd6dc86 ("net/hinic: fix secondary process")
Cc: stable@dpdk.org
Signed-off-by: Guoyang Zhou <zhouguoyang@huawei.com>
---
 drivers/net/hinic/base/hinic_compat.h | 25 ++++++++-----------------
 drivers/net/hinic/hinic_pmd_ethdev.c  |  5 +++++
 2 files changed, 13 insertions(+), 17 deletions(-)
  

Comments

Ferruh Yigit March 22, 2021, 5:17 p.m. UTC | #1
On 3/15/2021 8:14 AM, Guoyang Zhou wrote:
> The fstack will use secondary process to access the memory of
> eth_dev_ops , and it wants to get the info of dev, but hinic
> driver does not initialized it when in secondary process.
> 

I guess the issue is not specific to the f-stack, perhaps can generalize the 
patch title.

> Fixes: 66f64dd6dc86 ("net/hinic: fix secondary process")
> Cc: stable@dpdk.org
> Signed-off-by: Guoyang Zhou <zhouguoyang@huawei.com>
> ---
>   drivers/net/hinic/base/hinic_compat.h | 25 ++++++++-----------------
>   drivers/net/hinic/hinic_pmd_ethdev.c  |  5 +++++
>   2 files changed, 13 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/net/hinic/base/hinic_compat.h b/drivers/net/hinic/base/hinic_compat.h
> index 6dd210e..aea3320 100644
> --- a/drivers/net/hinic/base/hinic_compat.h
> +++ b/drivers/net/hinic/base/hinic_compat.h
> @@ -171,6 +171,7 @@ static inline u32 readl(const volatile void *addr)
>   #else
>   #define CLOCK_TYPE CLOCK_MONOTONIC
>   #endif
> +#define HINIC_MUTEX_TIMEOUT  10
>   
>   static inline unsigned long clock_gettime_ms(void)
>   {
> @@ -225,24 +226,14 @@ static inline int hinic_mutex_destroy(pthread_mutex_t *pthreadmutex)
>   static inline int hinic_mutex_lock(pthread_mutex_t *pthreadmutex)
>   {
>   	int err;
> +	struct timespec tout;
>   
> -	err = pthread_mutex_lock(pthreadmutex);
> -	if (!err) {
> -		return err;
> -	} else if (err == EOWNERDEAD) {
> -		PMD_DRV_LOG(ERR, "Mutex lock failed. (ErrorNo=%d)", errno);
> -#if defined(__GLIBC__)
> -#if __GLIBC_PREREQ(2, 12)
> -		(void)pthread_mutex_consistent(pthreadmutex);
> -#else
> -		(void)pthread_mutex_consistent_np(pthreadmutex);
> -#endif
> -#else
> -		(void)pthread_mutex_consistent(pthreadmutex);
> -#endif
> -	} else {
> -		PMD_DRV_LOG(ERR, "Mutex lock failed. (ErrorNo=%d)", errno);
> -	}
> +	(void)clock_gettime(CLOCK_TYPE, &tout);
> +
> +	tout.tv_sec += HINIC_MUTEX_TIMEOUT;
> +	err = pthread_mutex_timedlock(pthreadmutex, &tout);
> +	if (err)
> +		PMD_DRV_LOG(ERR, "Mutex lock failed. (ErrorNo=%d)", err);
>   

Is above change related to the secondary process fix?

>   	return err;
>   }
> diff --git a/drivers/net/hinic/hinic_pmd_ethdev.c b/drivers/net/hinic/hinic_pmd_ethdev.c
> index 1d6b710..057e7b1 100644
> --- a/drivers/net/hinic/hinic_pmd_ethdev.c
> +++ b/drivers/net/hinic/hinic_pmd_ethdev.c
> @@ -3085,6 +3085,10 @@ static int hinic_dev_close(struct rte_eth_dev *dev)
>   	.filter_ctrl                   = hinic_dev_filter_ctrl,
>   };
>   
> +static const struct eth_dev_ops hinic_dev_sec_ops = {
> +	.dev_infos_get                 = hinic_dev_infos_get,
> +};
> +
>   static int hinic_func_init(struct rte_eth_dev *eth_dev)
>   {
>   	struct rte_pci_device *pci_dev;
> @@ -3099,6 +3103,7 @@ static int hinic_func_init(struct rte_eth_dev *eth_dev)
>   
>   	/* EAL is SECONDARY and eth_dev is already created */
>   	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
> +		eth_dev->dev_ops = &hinic_dev_sec_ops;

Why not using existing dev_ops but creating a new one?

>   		PMD_DRV_LOG(INFO, "Initialize %s in secondary process",
>   			    eth_dev->data->name);
>   
>
  
Guoyang Zhou March 29, 2021, 11:05 a.m. UTC | #2
在 2021/3/23 1:17, Ferruh Yigit 写道:
> On 3/15/2021 8:14 AM, Guoyang Zhou wrote:
>> The fstack will use secondary process to access the memory of
>> eth_dev_ops , and it wants to get the info of dev, but hinic
>> driver does not initialized it when in secondary process.
>>
> 
> I guess the issue is not specific to the f-stack, perhaps can generalize the patch title.
> 
>> Fixes: 66f64dd6dc86 ("net/hinic: fix secondary process")
>> Cc: stable@dpdk.org
>> Signed-off-by: Guoyang Zhou <zhouguoyang@huawei.com>
>> ---
>>   drivers/net/hinic/base/hinic_compat.h | 25 ++++++++-----------------
>>   drivers/net/hinic/hinic_pmd_ethdev.c  |  5 +++++
>>   2 files changed, 13 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/net/hinic/base/hinic_compat.h b/drivers/net/hinic/base/hinic_compat.h
>> index 6dd210e..aea3320 100644
>> --- a/drivers/net/hinic/base/hinic_compat.h
>> +++ b/drivers/net/hinic/base/hinic_compat.h
>> @@ -171,6 +171,7 @@ static inline u32 readl(const volatile void *addr)
>>   #else
>>   #define CLOCK_TYPE CLOCK_MONOTONIC
>>   #endif
>> +#define HINIC_MUTEX_TIMEOUT  10
>>     static inline unsigned long clock_gettime_ms(void)
>>   {
>> @@ -225,24 +226,14 @@ static inline int hinic_mutex_destroy(pthread_mutex_t *pthreadmutex)
>>   static inline int hinic_mutex_lock(pthread_mutex_t *pthreadmutex)
>>   {
>>       int err;
>> +    struct timespec tout;
>>   -    err = pthread_mutex_lock(pthreadmutex);
>> -    if (!err) {
>> -        return err;
>> -    } else if (err == EOWNERDEAD) {
>> -        PMD_DRV_LOG(ERR, "Mutex lock failed. (ErrorNo=%d)", errno);
>> -#if defined(__GLIBC__)
>> -#if __GLIBC_PREREQ(2, 12)
>> -        (void)pthread_mutex_consistent(pthreadmutex);
>> -#else
>> -        (void)pthread_mutex_consistent_np(pthreadmutex);
>> -#endif
>> -#else
>> -        (void)pthread_mutex_consistent(pthreadmutex);
>> -#endif
>> -    } else {
>> -        PMD_DRV_LOG(ERR, "Mutex lock failed. (ErrorNo=%d)", errno);
>> -    }
>> +    (void)clock_gettime(CLOCK_TYPE, &tout);
>> +
>> +    tout.tv_sec += HINIC_MUTEX_TIMEOUT;
>> +    err = pthread_mutex_timedlock(pthreadmutex, &tout);
>> +    if (err)
>> +        PMD_DRV_LOG(ERR, "Mutex lock failed. (ErrorNo=%d)", err);
>>   
> 
> Is above change related to the secondary process fix?
> 

Yes, the hinic_mutex_lock function is related to the secondary process fix.
Becasue I found if do not fix it, when a lot of secondary processes calls
secondary ops (dev_infos_get), it will cause deadlock in low probability.

>>       return err;
>>   }
>> diff --git a/drivers/net/hinic/hinic_pmd_ethdev.c b/drivers/net/hinic/hinic_pmd_ethdev.c
>> index 1d6b710..057e7b1 100644
>> --- a/drivers/net/hinic/hinic_pmd_ethdev.c
>> +++ b/drivers/net/hinic/hinic_pmd_ethdev.c
>> @@ -3085,6 +3085,10 @@ static int hinic_dev_close(struct rte_eth_dev *dev)
>>       .filter_ctrl                   = hinic_dev_filter_ctrl,
>>   };
>>   +static const struct eth_dev_ops hinic_dev_sec_ops = {
>> +    .dev_infos_get                 = hinic_dev_infos_get,
>> +};
>> +
>>   static int hinic_func_init(struct rte_eth_dev *eth_dev)
>>   {
>>       struct rte_pci_device *pci_dev;
>> @@ -3099,6 +3103,7 @@ static int hinic_func_init(struct rte_eth_dev *eth_dev)
>>         /* EAL is SECONDARY and eth_dev is already created */
>>       if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
>> +        eth_dev->dev_ops = &hinic_dev_sec_ops;
> 
> Why not using existing dev_ops but creating a new one?
> 
>>           PMD_DRV_LOG(INFO, "Initialize %s in secondary process",
>>                   eth_dev->data->name);
>>  
> 
> .

Because many interfaces do not support calling from the secondary process due to
hardware reason, and from the current test situation, it does not be needed.

Regards,
Guoyang Zhou
  

Patch

diff --git a/drivers/net/hinic/base/hinic_compat.h b/drivers/net/hinic/base/hinic_compat.h
index 6dd210e..aea3320 100644
--- a/drivers/net/hinic/base/hinic_compat.h
+++ b/drivers/net/hinic/base/hinic_compat.h
@@ -171,6 +171,7 @@  static inline u32 readl(const volatile void *addr)
 #else
 #define CLOCK_TYPE CLOCK_MONOTONIC
 #endif
+#define HINIC_MUTEX_TIMEOUT  10
 
 static inline unsigned long clock_gettime_ms(void)
 {
@@ -225,24 +226,14 @@  static inline int hinic_mutex_destroy(pthread_mutex_t *pthreadmutex)
 static inline int hinic_mutex_lock(pthread_mutex_t *pthreadmutex)
 {
 	int err;
+	struct timespec tout;
 
-	err = pthread_mutex_lock(pthreadmutex);
-	if (!err) {
-		return err;
-	} else if (err == EOWNERDEAD) {
-		PMD_DRV_LOG(ERR, "Mutex lock failed. (ErrorNo=%d)", errno);
-#if defined(__GLIBC__)
-#if __GLIBC_PREREQ(2, 12)
-		(void)pthread_mutex_consistent(pthreadmutex);
-#else
-		(void)pthread_mutex_consistent_np(pthreadmutex);
-#endif
-#else
-		(void)pthread_mutex_consistent(pthreadmutex);
-#endif
-	} else {
-		PMD_DRV_LOG(ERR, "Mutex lock failed. (ErrorNo=%d)", errno);
-	}
+	(void)clock_gettime(CLOCK_TYPE, &tout);
+
+	tout.tv_sec += HINIC_MUTEX_TIMEOUT;
+	err = pthread_mutex_timedlock(pthreadmutex, &tout);
+	if (err)
+		PMD_DRV_LOG(ERR, "Mutex lock failed. (ErrorNo=%d)", err);
 
 	return err;
 }
diff --git a/drivers/net/hinic/hinic_pmd_ethdev.c b/drivers/net/hinic/hinic_pmd_ethdev.c
index 1d6b710..057e7b1 100644
--- a/drivers/net/hinic/hinic_pmd_ethdev.c
+++ b/drivers/net/hinic/hinic_pmd_ethdev.c
@@ -3085,6 +3085,10 @@  static int hinic_dev_close(struct rte_eth_dev *dev)
 	.filter_ctrl                   = hinic_dev_filter_ctrl,
 };
 
+static const struct eth_dev_ops hinic_dev_sec_ops = {
+	.dev_infos_get                 = hinic_dev_infos_get,
+};
+
 static int hinic_func_init(struct rte_eth_dev *eth_dev)
 {
 	struct rte_pci_device *pci_dev;
@@ -3099,6 +3103,7 @@  static int hinic_func_init(struct rte_eth_dev *eth_dev)
 
 	/* EAL is SECONDARY and eth_dev is already created */
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		eth_dev->dev_ops = &hinic_dev_sec_ops;
 		PMD_DRV_LOG(INFO, "Initialize %s in secondary process",
 			    eth_dev->data->name);