eal: fix use wrong time API

Message ID 1619597563-56716-1-git-send-email-humin29@huawei.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series eal: fix use wrong time API |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/github-robot success github build: passed
ci/iol-testing success Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-abi-testing success Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS

Commit Message

humin (Q) April 28, 2021, 8:12 a.m. UTC
  From: Chengwen Feng <fengchengwen@huawei.com>

Currently, the mp uses gettimeofday() API to get the time, and used as
timeout parameter.

But the time which gets from gettimeofday() API isn't monotonically
increasing. The process may fail if the system time is changed.

This fixes it by using clock_gettime() API with monotonic attribution.

Fixes: 783b6e54971d ("eal: add synchronous multi-process communication")
Fixes: f05e26051c15 ("eal: add IPC asynchronous request")
Cc: stable@dpdk.org

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
---
 lib/eal/common/eal_common_proc.c | 45 +++++++++++++++++-----------------------
 1 file changed, 19 insertions(+), 26 deletions(-)
  

Comments

Morten Brørup April 28, 2021, 9:24 a.m. UTC | #1
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Min Hu (Connor)
> Sent: Wednesday, April 28, 2021 10:13 AM
> 
> From: Chengwen Feng <fengchengwen@huawei.com>
> 
> Currently, the mp uses gettimeofday() API to get the time, and used as
> timeout parameter.
> 
> But the time which gets from gettimeofday() API isn't monotonically
> increasing. The process may fail if the system time is changed.
> 
> This fixes it by using clock_gettime() API with monotonic attribution.
> 
> Fixes: 783b6e54971d ("eal: add synchronous multi-process
> communication")
> Fixes: f05e26051c15 ("eal: add IPC asynchronous request")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
> ---
>  lib/eal/common/eal_common_proc.c | 45 +++++++++++++++++---------------
> --------
>  1 file changed, 19 insertions(+), 26 deletions(-)
> 
> diff --git a/lib/eal/common/eal_common_proc.c
> b/lib/eal/common/eal_common_proc.c
> index 6d1af3c..7f08826 100644
> --- a/lib/eal/common/eal_common_proc.c
> +++ b/lib/eal/common/eal_common_proc.c
> @@ -40,6 +40,12 @@ static char mp_dir_path[PATH_MAX]; /* The directory
> path for all mp sockets */
>  static pthread_mutex_t mp_mutex_action = PTHREAD_MUTEX_INITIALIZER;
>  static char peer_name[PATH_MAX];
> 
> +#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */
> +#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW
> +#else
> +#define CLOCK_TYPE_ID CLOCK_MONOTONIC
> +#endif

Just out of curiosity: Why do you prefer CLOCK_MONOTONIC_RAW over CLOCK_MONOTONIC?

> +
>  struct action_entry {
>  	TAILQ_ENTRY(action_entry) next;
>  	char action_name[RTE_MP_MAX_NAME_LEN];
> @@ -490,14 +496,8 @@ async_reply_handle_thread_unsafe(void *arg)
>  	struct pending_request *req = (struct pending_request *)arg;
>  	enum async_action action;
>  	struct timespec ts_now;
> -	struct timeval now;
> 
> -	if (gettimeofday(&now, NULL) < 0) {
> -		RTE_LOG(ERR, EAL, "Cannot get current time\n");
> -		goto no_trigger;
> -	}
> -	ts_now.tv_nsec = now.tv_usec * 1000;
> -	ts_now.tv_sec = now.tv_sec;
> +	clock_gettime(CLOCK_TYPE_ID, &ts_now);
> 
>  	action = process_async_request(req, &ts_now);
> 
> @@ -896,6 +896,7 @@ mp_request_sync(const char *dst, struct rte_mp_msg
> *req,
>  	       struct rte_mp_reply *reply, const struct timespec *ts)
>  {
>  	int ret;
> +	pthread_condattr_t attr;
>  	struct rte_mp_msg msg, *tmp;
>  	struct pending_request pending_req, *exist;
> 
> @@ -904,7 +905,9 @@ mp_request_sync(const char *dst, struct rte_mp_msg
> *req,
>  	strlcpy(pending_req.dst, dst, sizeof(pending_req.dst));
>  	pending_req.request = req;
>  	pending_req.reply = &msg;
> -	pthread_cond_init(&pending_req.sync.cond, NULL);
> +	pthread_condattr_init(&attr);
> +	pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);

Shouldn't CLOCK_MONOTONIC be CLOCK_TYPE_ID here too?

> +	pthread_cond_init(&pending_req.sync.cond, &attr);
> 
>  	exist = find_pending_request(dst, req->name);
>  	if (exist) {
> @@ -967,8 +970,7 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct
> rte_mp_reply *reply,
>  	int dir_fd, ret = -1;
>  	DIR *mp_dir;
>  	struct dirent *ent;
> -	struct timeval now;
> -	struct timespec end;
> +	struct timespec now, end;
>  	const struct internal_config *internal_conf =
>  		eal_get_internal_configuration();
> 
> @@ -987,15 +989,10 @@ rte_mp_request_sync(struct rte_mp_msg *req,
> struct rte_mp_reply *reply,
>  		return -1;
>  	}
> 
> -	if (gettimeofday(&now, NULL) < 0) {
> -		RTE_LOG(ERR, EAL, "Failed to get current time\n");
> -		rte_errno = errno;
> -		goto end;
> -	}
> -
> -	end.tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
> +	clock_gettime(CLOCK_TYPE_ID, &now);
> +	end.tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
>  	end.tv_sec = now.tv_sec + ts->tv_sec +
> -			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
> +			(now.tv_nsec + ts->tv_nsec) / 1000000000;
> 
>  	/* for secondary process, send request to the primary process
> only */
>  	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
> @@ -1069,7 +1066,7 @@ rte_mp_request_async(struct rte_mp_msg *req,
> const struct timespec *ts,
>  	int dir_fd, ret = 0;
>  	DIR *mp_dir;
>  	struct dirent *ent;
> -	struct timeval now;
> +	struct timespec now;
>  	struct timespec *end;
>  	bool dummy_used = false;
>  	const struct internal_config *internal_conf =
> @@ -1086,11 +1083,6 @@ rte_mp_request_async(struct rte_mp_msg *req,
> const struct timespec *ts,
>  		return -1;
>  	}
> 
> -	if (gettimeofday(&now, NULL) < 0) {
> -		RTE_LOG(ERR, EAL, "Failed to get current time\n");
> -		rte_errno = errno;
> -		return -1;
> -	}
>  	copy = calloc(1, sizeof(*copy));
>  	dummy = calloc(1, sizeof(*dummy));
>  	param = calloc(1, sizeof(*param));
> @@ -1108,9 +1100,10 @@ rte_mp_request_async(struct rte_mp_msg *req,
> const struct timespec *ts,
>  	end = &param->end;
>  	reply = &param->user_reply;
> 
> -	end->tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
> +	clock_gettime(CLOCK_TYPE_ID, &now);
> +	end->tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
>  	end->tv_sec = now.tv_sec + ts->tv_sec +
> -			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
> +			(now.tv_nsec + ts->tv_nsec) / 1000000000;
>  	reply->nb_sent = 0;
>  	reply->nb_received = 0;
>  	reply->msgs = NULL;
> --
> 2.7.4
>
  
Chengwen Feng April 28, 2021, 10:36 a.m. UTC | #2
On 2021/4/28 17:24, Morten Brørup wrote:
>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Min Hu (Connor)
>> Sent: Wednesday, April 28, 2021 10:13 AM
>>
>> From: Chengwen Feng <fengchengwen@huawei.com>
>>
>> Currently, the mp uses gettimeofday() API to get the time, and used as
>> timeout parameter.
>>
>> But the time which gets from gettimeofday() API isn't monotonically
>> increasing. The process may fail if the system time is changed.
>>
>> This fixes it by using clock_gettime() API with monotonic attribution.
>>
>> Fixes: 783b6e54971d ("eal: add synchronous multi-process
>> communication")
>> Fixes: f05e26051c15 ("eal: add IPC asynchronous request")
>> Cc: stable@dpdk.org
>>
>> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
>> Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
>> ---
>>  lib/eal/common/eal_common_proc.c | 45 +++++++++++++++++---------------
>> --------
>>  1 file changed, 19 insertions(+), 26 deletions(-)
>>
>> diff --git a/lib/eal/common/eal_common_proc.c
>> b/lib/eal/common/eal_common_proc.c
>> index 6d1af3c..7f08826 100644
>> --- a/lib/eal/common/eal_common_proc.c
>> +++ b/lib/eal/common/eal_common_proc.c
>> @@ -40,6 +40,12 @@ static char mp_dir_path[PATH_MAX]; /* The directory
>> path for all mp sockets */
>>  static pthread_mutex_t mp_mutex_action = PTHREAD_MUTEX_INITIALIZER;
>>  static char peer_name[PATH_MAX];
>>
>> +#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */
>> +#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW
>> +#else
>> +#define CLOCK_TYPE_ID CLOCK_MONOTONIC
>> +#endif
> 
> Just out of curiosity: Why do you prefer CLOCK_MONOTONIC_RAW over CLOCK_MONOTONIC?
> 

there may slightly difference, the CLOCK_MONOTONIC_RAW is totally local oscillator
(pls see below link), just preferred in engineering practice.
https://stackoverflow.com/questions/14270300/what-is-the-difference-between-clock-monotonic-clock-monotonic-raw

>> +
>>  struct action_entry {
>>  	TAILQ_ENTRY(action_entry) next;
>>  	char action_name[RTE_MP_MAX_NAME_LEN];
>> @@ -490,14 +496,8 @@ async_reply_handle_thread_unsafe(void *arg)
>>  	struct pending_request *req = (struct pending_request *)arg;
>>  	enum async_action action;
>>  	struct timespec ts_now;
>> -	struct timeval now;
>>
>> -	if (gettimeofday(&now, NULL) < 0) {
>> -		RTE_LOG(ERR, EAL, "Cannot get current time\n");
>> -		goto no_trigger;
>> -	}
>> -	ts_now.tv_nsec = now.tv_usec * 1000;
>> -	ts_now.tv_sec = now.tv_sec;
>> +	clock_gettime(CLOCK_TYPE_ID, &ts_now);
>>
>>  	action = process_async_request(req, &ts_now);
>>
>> @@ -896,6 +896,7 @@ mp_request_sync(const char *dst, struct rte_mp_msg
>> *req,
>>  	       struct rte_mp_reply *reply, const struct timespec *ts)
>>  {
>>  	int ret;
>> +	pthread_condattr_t attr;
>>  	struct rte_mp_msg msg, *tmp;
>>  	struct pending_request pending_req, *exist;
>>
>> @@ -904,7 +905,9 @@ mp_request_sync(const char *dst, struct rte_mp_msg
>> *req,
>>  	strlcpy(pending_req.dst, dst, sizeof(pending_req.dst));
>>  	pending_req.request = req;
>>  	pending_req.reply = &msg;
>> -	pthread_cond_init(&pending_req.sync.cond, NULL);
>> +	pthread_condattr_init(&attr);
>> +	pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
> 
> Shouldn't CLOCK_MONOTONIC be CLOCK_TYPE_ID here too?

After reading the source code, it only support CLOCK_MONOTONIC and CLOCK_REALTIME
(pls see below link), so cant't use CLOCK_TYPE_ID here.
https://code.woboq.org/userspace/glibc/nptl/pthread_condattr_setclock.c.html#pthread_condattr_setclock

will fix in v2 by make CLOCK_TYPE_ID equal CLOCK_MONOTONIC.

thanks

> 
>> +	pthread_cond_init(&pending_req.sync.cond, &attr);
>>
>>  	exist = find_pending_request(dst, req->name);
>>  	if (exist) {
>> @@ -967,8 +970,7 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct
>> rte_mp_reply *reply,
>>  	int dir_fd, ret = -1;
>>  	DIR *mp_dir;
>>  	struct dirent *ent;
>> -	struct timeval now;
>> -	struct timespec end;
>> +	struct timespec now, end;
>>  	const struct internal_config *internal_conf =
>>  		eal_get_internal_configuration();
>>
>> @@ -987,15 +989,10 @@ rte_mp_request_sync(struct rte_mp_msg *req,
>> struct rte_mp_reply *reply,
>>  		return -1;
>>  	}
>>
>> -	if (gettimeofday(&now, NULL) < 0) {
>> -		RTE_LOG(ERR, EAL, "Failed to get current time\n");
>> -		rte_errno = errno;
>> -		goto end;
>> -	}
>> -
>> -	end.tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
>> +	clock_gettime(CLOCK_TYPE_ID, &now);
>> +	end.tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
>>  	end.tv_sec = now.tv_sec + ts->tv_sec +
>> -			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
>> +			(now.tv_nsec + ts->tv_nsec) / 1000000000;
>>
>>  	/* for secondary process, send request to the primary process
>> only */
>>  	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
>> @@ -1069,7 +1066,7 @@ rte_mp_request_async(struct rte_mp_msg *req,
>> const struct timespec *ts,
>>  	int dir_fd, ret = 0;
>>  	DIR *mp_dir;
>>  	struct dirent *ent;
>> -	struct timeval now;
>> +	struct timespec now;
>>  	struct timespec *end;
>>  	bool dummy_used = false;
>>  	const struct internal_config *internal_conf =
>> @@ -1086,11 +1083,6 @@ rte_mp_request_async(struct rte_mp_msg *req,
>> const struct timespec *ts,
>>  		return -1;
>>  	}
>>
>> -	if (gettimeofday(&now, NULL) < 0) {
>> -		RTE_LOG(ERR, EAL, "Failed to get current time\n");
>> -		rte_errno = errno;
>> -		return -1;
>> -	}
>>  	copy = calloc(1, sizeof(*copy));
>>  	dummy = calloc(1, sizeof(*dummy));
>>  	param = calloc(1, sizeof(*param));
>> @@ -1108,9 +1100,10 @@ rte_mp_request_async(struct rte_mp_msg *req,
>> const struct timespec *ts,
>>  	end = &param->end;
>>  	reply = &param->user_reply;
>>
>> -	end->tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
>> +	clock_gettime(CLOCK_TYPE_ID, &now);
>> +	end->tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
>>  	end->tv_sec = now.tv_sec + ts->tv_sec +
>> -			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
>> +			(now.tv_nsec + ts->tv_nsec) / 1000000000;
>>  	reply->nb_sent = 0;
>>  	reply->nb_received = 0;
>>  	reply->msgs = NULL;
>> --
>> 2.7.4
>>
>
  
Morten Brørup April 28, 2021, 10:59 a.m. UTC | #3
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of fengchengwen
> Sent: Wednesday, April 28, 2021 12:36 PM
> 
> On 2021/4/28 17:24, Morten Brørup wrote:
> >> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Min Hu (Connor)
> >> Sent: Wednesday, April 28, 2021 10:13 AM
> >>
> >> From: Chengwen Feng <fengchengwen@huawei.com>
> >>
> >> Currently, the mp uses gettimeofday() API to get the time, and used
> as
> >> timeout parameter.
> >>
> >> But the time which gets from gettimeofday() API isn't monotonically
> >> increasing. The process may fail if the system time is changed.
> >>
> >> This fixes it by using clock_gettime() API with monotonic
> attribution.
> >>
> >> Fixes: 783b6e54971d ("eal: add synchronous multi-process
> >> communication")
> >> Fixes: f05e26051c15 ("eal: add IPC asynchronous request")
> >> Cc: stable@dpdk.org
> >>
> >> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> >> Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
> >> ---
> >>  lib/eal/common/eal_common_proc.c | 45 +++++++++++++++++------------
> ---
> >> --------
> >>  1 file changed, 19 insertions(+), 26 deletions(-)
> >>
> >> diff --git a/lib/eal/common/eal_common_proc.c
> >> b/lib/eal/common/eal_common_proc.c
> >> index 6d1af3c..7f08826 100644
> >> --- a/lib/eal/common/eal_common_proc.c
> >> +++ b/lib/eal/common/eal_common_proc.c
> >> @@ -40,6 +40,12 @@ static char mp_dir_path[PATH_MAX]; /* The
> directory
> >> path for all mp sockets */
> >>  static pthread_mutex_t mp_mutex_action = PTHREAD_MUTEX_INITIALIZER;
> >>  static char peer_name[PATH_MAX];
> >>
> >> +#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */
> >> +#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW
> >> +#else
> >> +#define CLOCK_TYPE_ID CLOCK_MONOTONIC
> >> +#endif
> >
> > Just out of curiosity: Why do you prefer CLOCK_MONOTONIC_RAW over
> CLOCK_MONOTONIC?
> >
> 
> there may slightly difference, the CLOCK_MONOTONIC_RAW is totally local
> oscillator
> (pls see below link), just preferred in engineering practice.
> https://stackoverflow.com/questions/14270300/what-is-the-difference-
> between-clock-monotonic-clock-monotonic-raw

Interesting link! Following the treads there, it looks like CLOCK_MONOTONIC had a bug in some old kernel versions, where in certain circumstances it could jump slightly backwards.

That bug seems to have been fixed, so CLOCK_MONOTONIC should be safe to use. Source: https://bugzilla.redhat.com/show_bug.cgi?id=448449

> 
> >> +
> >>  struct action_entry {
> >>  	TAILQ_ENTRY(action_entry) next;
> >>  	char action_name[RTE_MP_MAX_NAME_LEN];
> >> @@ -490,14 +496,8 @@ async_reply_handle_thread_unsafe(void *arg)
> >>  	struct pending_request *req = (struct pending_request *)arg;
> >>  	enum async_action action;
> >>  	struct timespec ts_now;
> >> -	struct timeval now;
> >>
> >> -	if (gettimeofday(&now, NULL) < 0) {
> >> -		RTE_LOG(ERR, EAL, "Cannot get current time\n");
> >> -		goto no_trigger;
> >> -	}
> >> -	ts_now.tv_nsec = now.tv_usec * 1000;
> >> -	ts_now.tv_sec = now.tv_sec;
> >> +	clock_gettime(CLOCK_TYPE_ID, &ts_now);
> >>
> >>  	action = process_async_request(req, &ts_now);
> >>
> >> @@ -896,6 +896,7 @@ mp_request_sync(const char *dst, struct
> rte_mp_msg
> >> *req,
> >>  	       struct rte_mp_reply *reply, const struct timespec *ts)
> >>  {
> >>  	int ret;
> >> +	pthread_condattr_t attr;
> >>  	struct rte_mp_msg msg, *tmp;
> >>  	struct pending_request pending_req, *exist;
> >>
> >> @@ -904,7 +905,9 @@ mp_request_sync(const char *dst, struct
> rte_mp_msg
> >> *req,
> >>  	strlcpy(pending_req.dst, dst, sizeof(pending_req.dst));
> >>  	pending_req.request = req;
> >>  	pending_req.reply = &msg;
> >> -	pthread_cond_init(&pending_req.sync.cond, NULL);
> >> +	pthread_condattr_init(&attr);
> >> +	pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
> >
> > Shouldn't CLOCK_MONOTONIC be CLOCK_TYPE_ID here too?
> 
> After reading the source code, it only support CLOCK_MONOTONIC and
> CLOCK_REALTIME
> (pls see below link), so cant't use CLOCK_TYPE_ID here.
> https://code.woboq.org/userspace/glibc/nptl/pthread_condattr_setclock.c
> .html#pthread_condattr_setclock
> 
> will fix in v2 by make CLOCK_TYPE_ID equal CLOCK_MONOTONIC.

OK, then just get rid of the CLOCK_TYPE_ID definition and use CLOCK_MONOTONIC instead.

> 
> thanks
> 
> >
> >> +	pthread_cond_init(&pending_req.sync.cond, &attr);
> >>
> >>  	exist = find_pending_request(dst, req->name);
> >>  	if (exist) {
> >> @@ -967,8 +970,7 @@ rte_mp_request_sync(struct rte_mp_msg *req,
> struct
> >> rte_mp_reply *reply,
> >>  	int dir_fd, ret = -1;
> >>  	DIR *mp_dir;
> >>  	struct dirent *ent;
> >> -	struct timeval now;
> >> -	struct timespec end;
> >> +	struct timespec now, end;
> >>  	const struct internal_config *internal_conf =
> >>  		eal_get_internal_configuration();
> >>
> >> @@ -987,15 +989,10 @@ rte_mp_request_sync(struct rte_mp_msg *req,
> >> struct rte_mp_reply *reply,
> >>  		return -1;
> >>  	}
> >>
> >> -	if (gettimeofday(&now, NULL) < 0) {
> >> -		RTE_LOG(ERR, EAL, "Failed to get current time\n");
> >> -		rte_errno = errno;
> >> -		goto end;
> >> -	}
> >> -
> >> -	end.tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
> >> +	clock_gettime(CLOCK_TYPE_ID, &now);
> >> +	end.tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
> >>  	end.tv_sec = now.tv_sec + ts->tv_sec +
> >> -			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
> >> +			(now.tv_nsec + ts->tv_nsec) / 1000000000;
> >>
> >>  	/* for secondary process, send request to the primary process
> >> only */
> >>  	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
> >> @@ -1069,7 +1066,7 @@ rte_mp_request_async(struct rte_mp_msg *req,
> >> const struct timespec *ts,
> >>  	int dir_fd, ret = 0;
> >>  	DIR *mp_dir;
> >>  	struct dirent *ent;
> >> -	struct timeval now;
> >> +	struct timespec now;
> >>  	struct timespec *end;
> >>  	bool dummy_used = false;
> >>  	const struct internal_config *internal_conf =
> >> @@ -1086,11 +1083,6 @@ rte_mp_request_async(struct rte_mp_msg *req,
> >> const struct timespec *ts,
> >>  		return -1;
> >>  	}
> >>
> >> -	if (gettimeofday(&now, NULL) < 0) {
> >> -		RTE_LOG(ERR, EAL, "Failed to get current time\n");
> >> -		rte_errno = errno;
> >> -		return -1;
> >> -	}
> >>  	copy = calloc(1, sizeof(*copy));
> >>  	dummy = calloc(1, sizeof(*dummy));
> >>  	param = calloc(1, sizeof(*param));
> >> @@ -1108,9 +1100,10 @@ rte_mp_request_async(struct rte_mp_msg *req,
> >> const struct timespec *ts,
> >>  	end = &param->end;
> >>  	reply = &param->user_reply;
> >>
> >> -	end->tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
> >> +	clock_gettime(CLOCK_TYPE_ID, &now);
> >> +	end->tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
> >>  	end->tv_sec = now.tv_sec + ts->tv_sec +
> >> -			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
> >> +			(now.tv_nsec + ts->tv_nsec) / 1000000000;
> >>  	reply->nb_sent = 0;
> >>  	reply->nb_received = 0;
> >>  	reply->msgs = NULL;
> >> --
> >> 2.7.4
> >>
> >
>
  
humin (Q) April 29, 2021, 2:12 a.m. UTC | #4
Hi, Morten,
     fixed in v2, thanks.

在 2021/4/28 18:59, Morten Brørup 写道:
>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of fengchengwen
>> Sent: Wednesday, April 28, 2021 12:36 PM
>>
>> On 2021/4/28 17:24, Morten Brørup wrote:
>>>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Min Hu (Connor)
>>>> Sent: Wednesday, April 28, 2021 10:13 AM
>>>>
>>>> From: Chengwen Feng <fengchengwen@huawei.com>
>>>>
>>>> Currently, the mp uses gettimeofday() API to get the time, and used
>> as
>>>> timeout parameter.
>>>>
>>>> But the time which gets from gettimeofday() API isn't monotonically
>>>> increasing. The process may fail if the system time is changed.
>>>>
>>>> This fixes it by using clock_gettime() API with monotonic
>> attribution.
>>>>
>>>> Fixes: 783b6e54971d ("eal: add synchronous multi-process
>>>> communication")
>>>> Fixes: f05e26051c15 ("eal: add IPC asynchronous request")
>>>> Cc: stable@dpdk.org
>>>>
>>>> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
>>>> Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
>>>> ---
>>>>   lib/eal/common/eal_common_proc.c | 45 +++++++++++++++++------------
>> ---
>>>> --------
>>>>   1 file changed, 19 insertions(+), 26 deletions(-)
>>>>
>>>> diff --git a/lib/eal/common/eal_common_proc.c
>>>> b/lib/eal/common/eal_common_proc.c
>>>> index 6d1af3c..7f08826 100644
>>>> --- a/lib/eal/common/eal_common_proc.c
>>>> +++ b/lib/eal/common/eal_common_proc.c
>>>> @@ -40,6 +40,12 @@ static char mp_dir_path[PATH_MAX]; /* The
>> directory
>>>> path for all mp sockets */
>>>>   static pthread_mutex_t mp_mutex_action = PTHREAD_MUTEX_INITIALIZER;
>>>>   static char peer_name[PATH_MAX];
>>>>
>>>> +#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */
>>>> +#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW
>>>> +#else
>>>> +#define CLOCK_TYPE_ID CLOCK_MONOTONIC
>>>> +#endif
>>>
>>> Just out of curiosity: Why do you prefer CLOCK_MONOTONIC_RAW over
>> CLOCK_MONOTONIC?
>>>
>>
>> there may slightly difference, the CLOCK_MONOTONIC_RAW is totally local
>> oscillator
>> (pls see below link), just preferred in engineering practice.
>> https://stackoverflow.com/questions/14270300/what-is-the-difference-
>> between-clock-monotonic-clock-monotonic-raw
> 
> Interesting link! Following the treads there, it looks like CLOCK_MONOTONIC had a bug in some old kernel versions, where in certain circumstances it could jump slightly backwards.
> 
> That bug seems to have been fixed, so CLOCK_MONOTONIC should be safe to use. Source: https://bugzilla.redhat.com/show_bug.cgi?id=448449
> 
>>
>>>> +
>>>>   struct action_entry {
>>>>   	TAILQ_ENTRY(action_entry) next;
>>>>   	char action_name[RTE_MP_MAX_NAME_LEN];
>>>> @@ -490,14 +496,8 @@ async_reply_handle_thread_unsafe(void *arg)
>>>>   	struct pending_request *req = (struct pending_request *)arg;
>>>>   	enum async_action action;
>>>>   	struct timespec ts_now;
>>>> -	struct timeval now;
>>>>
>>>> -	if (gettimeofday(&now, NULL) < 0) {
>>>> -		RTE_LOG(ERR, EAL, "Cannot get current time\n");
>>>> -		goto no_trigger;
>>>> -	}
>>>> -	ts_now.tv_nsec = now.tv_usec * 1000;
>>>> -	ts_now.tv_sec = now.tv_sec;
>>>> +	clock_gettime(CLOCK_TYPE_ID, &ts_now);
>>>>
>>>>   	action = process_async_request(req, &ts_now);
>>>>
>>>> @@ -896,6 +896,7 @@ mp_request_sync(const char *dst, struct
>> rte_mp_msg
>>>> *req,
>>>>   	       struct rte_mp_reply *reply, const struct timespec *ts)
>>>>   {
>>>>   	int ret;
>>>> +	pthread_condattr_t attr;
>>>>   	struct rte_mp_msg msg, *tmp;
>>>>   	struct pending_request pending_req, *exist;
>>>>
>>>> @@ -904,7 +905,9 @@ mp_request_sync(const char *dst, struct
>> rte_mp_msg
>>>> *req,
>>>>   	strlcpy(pending_req.dst, dst, sizeof(pending_req.dst));
>>>>   	pending_req.request = req;
>>>>   	pending_req.reply = &msg;
>>>> -	pthread_cond_init(&pending_req.sync.cond, NULL);
>>>> +	pthread_condattr_init(&attr);
>>>> +	pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
>>>
>>> Shouldn't CLOCK_MONOTONIC be CLOCK_TYPE_ID here too?
>>
>> After reading the source code, it only support CLOCK_MONOTONIC and
>> CLOCK_REALTIME
>> (pls see below link), so cant't use CLOCK_TYPE_ID here.
>> https://code.woboq.org/userspace/glibc/nptl/pthread_condattr_setclock.c
>> .html#pthread_condattr_setclock
>>
>> will fix in v2 by make CLOCK_TYPE_ID equal CLOCK_MONOTONIC.
> 
> OK, then just get rid of the CLOCK_TYPE_ID definition and use CLOCK_MONOTONIC instead.
> 
>>
>> thanks
>>
>>>
>>>> +	pthread_cond_init(&pending_req.sync.cond, &attr);
>>>>
>>>>   	exist = find_pending_request(dst, req->name);
>>>>   	if (exist) {
>>>> @@ -967,8 +970,7 @@ rte_mp_request_sync(struct rte_mp_msg *req,
>> struct
>>>> rte_mp_reply *reply,
>>>>   	int dir_fd, ret = -1;
>>>>   	DIR *mp_dir;
>>>>   	struct dirent *ent;
>>>> -	struct timeval now;
>>>> -	struct timespec end;
>>>> +	struct timespec now, end;
>>>>   	const struct internal_config *internal_conf =
>>>>   		eal_get_internal_configuration();
>>>>
>>>> @@ -987,15 +989,10 @@ rte_mp_request_sync(struct rte_mp_msg *req,
>>>> struct rte_mp_reply *reply,
>>>>   		return -1;
>>>>   	}
>>>>
>>>> -	if (gettimeofday(&now, NULL) < 0) {
>>>> -		RTE_LOG(ERR, EAL, "Failed to get current time\n");
>>>> -		rte_errno = errno;
>>>> -		goto end;
>>>> -	}
>>>> -
>>>> -	end.tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
>>>> +	clock_gettime(CLOCK_TYPE_ID, &now);
>>>> +	end.tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
>>>>   	end.tv_sec = now.tv_sec + ts->tv_sec +
>>>> -			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
>>>> +			(now.tv_nsec + ts->tv_nsec) / 1000000000;
>>>>
>>>>   	/* for secondary process, send request to the primary process
>>>> only */
>>>>   	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
>>>> @@ -1069,7 +1066,7 @@ rte_mp_request_async(struct rte_mp_msg *req,
>>>> const struct timespec *ts,
>>>>   	int dir_fd, ret = 0;
>>>>   	DIR *mp_dir;
>>>>   	struct dirent *ent;
>>>> -	struct timeval now;
>>>> +	struct timespec now;
>>>>   	struct timespec *end;
>>>>   	bool dummy_used = false;
>>>>   	const struct internal_config *internal_conf =
>>>> @@ -1086,11 +1083,6 @@ rte_mp_request_async(struct rte_mp_msg *req,
>>>> const struct timespec *ts,
>>>>   		return -1;
>>>>   	}
>>>>
>>>> -	if (gettimeofday(&now, NULL) < 0) {
>>>> -		RTE_LOG(ERR, EAL, "Failed to get current time\n");
>>>> -		rte_errno = errno;
>>>> -		return -1;
>>>> -	}
>>>>   	copy = calloc(1, sizeof(*copy));
>>>>   	dummy = calloc(1, sizeof(*dummy));
>>>>   	param = calloc(1, sizeof(*param));
>>>> @@ -1108,9 +1100,10 @@ rte_mp_request_async(struct rte_mp_msg *req,
>>>> const struct timespec *ts,
>>>>   	end = &param->end;
>>>>   	reply = &param->user_reply;
>>>>
>>>> -	end->tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
>>>> +	clock_gettime(CLOCK_TYPE_ID, &now);
>>>> +	end->tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
>>>>   	end->tv_sec = now.tv_sec + ts->tv_sec +
>>>> -			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
>>>> +			(now.tv_nsec + ts->tv_nsec) / 1000000000;
>>>>   	reply->nb_sent = 0;
>>>>   	reply->nb_received = 0;
>>>>   	reply->msgs = NULL;
>>>> --
>>>> 2.7.4
>>>>
>>>
>>
>
  

Patch

diff --git a/lib/eal/common/eal_common_proc.c b/lib/eal/common/eal_common_proc.c
index 6d1af3c..7f08826 100644
--- a/lib/eal/common/eal_common_proc.c
+++ b/lib/eal/common/eal_common_proc.c
@@ -40,6 +40,12 @@  static char mp_dir_path[PATH_MAX]; /* The directory path for all mp sockets */
 static pthread_mutex_t mp_mutex_action = PTHREAD_MUTEX_INITIALIZER;
 static char peer_name[PATH_MAX];
 
+#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */
+#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW
+#else
+#define CLOCK_TYPE_ID CLOCK_MONOTONIC
+#endif
+
 struct action_entry {
 	TAILQ_ENTRY(action_entry) next;
 	char action_name[RTE_MP_MAX_NAME_LEN];
@@ -490,14 +496,8 @@  async_reply_handle_thread_unsafe(void *arg)
 	struct pending_request *req = (struct pending_request *)arg;
 	enum async_action action;
 	struct timespec ts_now;
-	struct timeval now;
 
-	if (gettimeofday(&now, NULL) < 0) {
-		RTE_LOG(ERR, EAL, "Cannot get current time\n");
-		goto no_trigger;
-	}
-	ts_now.tv_nsec = now.tv_usec * 1000;
-	ts_now.tv_sec = now.tv_sec;
+	clock_gettime(CLOCK_TYPE_ID, &ts_now);
 
 	action = process_async_request(req, &ts_now);
 
@@ -896,6 +896,7 @@  mp_request_sync(const char *dst, struct rte_mp_msg *req,
 	       struct rte_mp_reply *reply, const struct timespec *ts)
 {
 	int ret;
+	pthread_condattr_t attr;
 	struct rte_mp_msg msg, *tmp;
 	struct pending_request pending_req, *exist;
 
@@ -904,7 +905,9 @@  mp_request_sync(const char *dst, struct rte_mp_msg *req,
 	strlcpy(pending_req.dst, dst, sizeof(pending_req.dst));
 	pending_req.request = req;
 	pending_req.reply = &msg;
-	pthread_cond_init(&pending_req.sync.cond, NULL);
+	pthread_condattr_init(&attr);
+	pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
+	pthread_cond_init(&pending_req.sync.cond, &attr);
 
 	exist = find_pending_request(dst, req->name);
 	if (exist) {
@@ -967,8 +970,7 @@  rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 	int dir_fd, ret = -1;
 	DIR *mp_dir;
 	struct dirent *ent;
-	struct timeval now;
-	struct timespec end;
+	struct timespec now, end;
 	const struct internal_config *internal_conf =
 		eal_get_internal_configuration();
 
@@ -987,15 +989,10 @@  rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		return -1;
 	}
 
-	if (gettimeofday(&now, NULL) < 0) {
-		RTE_LOG(ERR, EAL, "Failed to get current time\n");
-		rte_errno = errno;
-		goto end;
-	}
-
-	end.tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
+	clock_gettime(CLOCK_TYPE_ID, &now);
+	end.tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
 	end.tv_sec = now.tv_sec + ts->tv_sec +
-			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
+			(now.tv_nsec + ts->tv_nsec) / 1000000000;
 
 	/* for secondary process, send request to the primary process only */
 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
@@ -1069,7 +1066,7 @@  rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
 	int dir_fd, ret = 0;
 	DIR *mp_dir;
 	struct dirent *ent;
-	struct timeval now;
+	struct timespec now;
 	struct timespec *end;
 	bool dummy_used = false;
 	const struct internal_config *internal_conf =
@@ -1086,11 +1083,6 @@  rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
 		return -1;
 	}
 
-	if (gettimeofday(&now, NULL) < 0) {
-		RTE_LOG(ERR, EAL, "Failed to get current time\n");
-		rte_errno = errno;
-		return -1;
-	}
 	copy = calloc(1, sizeof(*copy));
 	dummy = calloc(1, sizeof(*dummy));
 	param = calloc(1, sizeof(*param));
@@ -1108,9 +1100,10 @@  rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
 	end = &param->end;
 	reply = &param->user_reply;
 
-	end->tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
+	clock_gettime(CLOCK_TYPE_ID, &now);
+	end->tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
 	end->tv_sec = now.tv_sec + ts->tv_sec +
-			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
+			(now.tv_nsec + ts->tv_nsec) / 1000000000;
 	reply->nb_sent = 0;
 	reply->nb_received = 0;
 	reply->msgs = NULL;