[v12,1/7] buildtools/chkincs: relax C linkage requirement

Message ID 20240920104754.739033-2-mattias.ronnblom@ericsson.com (mailing list archive)
State Changes Requested, archived
Delegated to: David Marchand
Headers
Series Improve EAL bit operations API |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Mattias Rönnblom Sept. 20, 2024, 10:47 a.m. UTC
Relax chkincs requirement of all DPDK header files having to contain
'extern "C"'.

Instructing a C++ toolchain to use C linkage is only necessarily if the
header file declares symbols (i.e., functions or global variables).

With this change, chkincs tries to find if any functions or references
to global variables are declared in the header file, and if not, no C
linkage is required.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>

--

PATCH v11:
 * Detect functions from the Windows POSIX wrappers, to avoid false
   positives for redundant 'extern "C"'.
---
 buildtools/chkincs/chkextern.py | 88 +++++++++++++++++++++++++++++++++
 buildtools/chkincs/meson.build  | 14 +++---
 2 files changed, 95 insertions(+), 7 deletions(-)
 create mode 100755 buildtools/chkincs/chkextern.py
  

Comments

Robin Jarry Oct. 3, 2024, 4:47 p.m. UTC | #1
Mattias Rönnblom, Sep 20, 2024 at 12:47:
> Relax chkincs requirement of all DPDK header files having to contain
> 'extern "C"'.
>
> Instructing a C++ toolchain to use C linkage is only necessarily if the
> header file declares symbols (i.e., functions or global variables).
>
> With this change, chkincs tries to find if any functions or references
> to global variables are declared in the header file, and if not, no C
> linkage is required.
>
> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
>
> --
>
> PATCH v11:
>  * Detect functions from the Windows POSIX wrappers, to avoid false
>    positives for redundant 'extern "C"'.
> ---

Hi Mattias,

I have some remarks on this script. Please see below:

>  buildtools/chkincs/chkextern.py | 88 +++++++++++++++++++++++++++++++++
>  buildtools/chkincs/meson.build  | 14 +++---
>  2 files changed, 95 insertions(+), 7 deletions(-)
>  create mode 100755 buildtools/chkincs/chkextern.py
>
> diff --git a/buildtools/chkincs/chkextern.py b/buildtools/chkincs/chkextern.py
> new file mode 100755
> index 0000000000..5374ce1c72
> --- /dev/null
> +++ b/buildtools/chkincs/chkextern.py
> @@ -0,0 +1,88 @@
> +#! /usr/bin/env python3
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright(c) 2024 Ericsson AB
> +
> +import sys
> +import re
> +
> +def strip_cpp(header):
> +    no_cpp = ""
> +    header = header.replace("\\\n", " ")
> +
> +    for line in header.split("\n"):
> +        if re.match(r'^\s*#.*', line) is None and len(line) > 0:
> +            no_cpp += "%s\n" % line
> +
> +    return no_cpp
> +
> +
> +def strip_comments(header):
> +    no_c_comments = re.sub(r'/\*.*?\*/', '', header, flags=re.DOTALL)
> +    no_cxx_comments = re.sub(r'//.*', '', no_c_comments)
> +    return no_cxx_comments
> +
> +
> +def strip(header):
> +    header = strip_comments(header)
> +    header = strip_cpp(header)
> +    return header

None of these three strip* functions are used.

> +
> +
> +def has_extern_c(header):
> +    return header.find('extern "C"') != -1
> +
> +
> +def has_vars(header):
> +    return re.search(r'^extern\s+[a-z0-9_]+\s.*;', header, flags=re.MULTILINE) is not None
> +
> +
> +FUNCTION_RES = [
> +    r'rte_[a-z0-9_]+\(',
> +    r'cmdline_[a-z0-9_]+\(',
> +    r'vt100_[a-z0-9_]+\(',
> +    r'rdline_[a-z0-9_]+\(',
> +    r'cirbuf_[a-z0-9_]+\(',
> +    # Windows UNIX compatibility
> +    r'pthread_[a-z0-9_]+\(',
> +    r'regcomp\(',
> +    r'count_cpu\('
> +]
> +
> +
> +def has_functions(header):
> +    for function_re in FUNCTION_RES:
> +        if re.search(function_re, header) is not None:
> +            return True
> +    return False
> +
> +
> +def has_symbols(header):
> +    return has_functions(header) or has_vars(header)
> +
> +
> +def chk_missing(filename):
> +    header = open(filename).read()

with open(filename) as f:
    header = f.read()

> +    if has_symbols(header) and not has_extern_c(header):
> +        print(filename)
> +
> +
> +def chk_redundant(filename):
> +    header = open(filename).read()

with open(filename) as f:
    header = f.read()

> +    if not has_symbols(header) and has_extern_c(header):
> +        print(filename)

Can we rename these functions to check_missing and check_redundant? 
I don't think the abbreviation is needed here.

> +
> +if len(sys.argv) < 3:
> +    print("%s missing|redundant <header-file> ..." % sys.argv[0])
> +    sys.exit(1)
> +
> +op = sys.argv[1]
> +headers = sys.argv[2:]
> +
> +for header in headers:
> +    if op == 'missing':
> +        chk_missing(header)
> +    elif op == 'redundant':
> +        chk_redundant(header)

I don't see the 'redundant' op being used here.

> +    else:
> +        print("Unknown operation.")
> +        sys.exit(1)

I know it is a simple python script but it would be better to add 
a proper main() function and use argparse to handle errors. E.g.:

def main():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("op", choices=("missing", "redundant"))
    parser.add_argument("headers", nargs="+")
    args = parser.parse_args()

    for h in args.headers:
        if op == "missing":
            chk_missing(h)
        elif op == "redundant":
            chk_redundant(h)

if __name__ == "__main__":
    main()


> diff --git a/buildtools/chkincs/meson.build b/buildtools/chkincs/meson.build
> index f2dadcae18..762f85efe5 100644
> --- a/buildtools/chkincs/meson.build
> +++ b/buildtools/chkincs/meson.build
> @@ -38,13 +38,13 @@ if not add_languages('cpp', required: false)
>  endif
>  
>  # check for extern C in files, since this is not detected as an error by the compiler
> -grep = find_program('grep', required: false)
> -if grep.found()
> -    errlist = run_command([grep, '--files-without-match', '^extern "C"', dpdk_chkinc_headers],
> -            check: false, capture: true).stdout().split()
> -    if errlist != []
> -        error('Files missing C++ \'extern "C"\' guards:\n- ' + '\n- '.join(errlist))
> -    endif
> +chkextern = find_program('chkextern.py')
> +
> +missing_extern_headers = run_command(chkextern, 'missing', dpdk_chkinc_headers,
> +      capture: true, check: true).stdout().split()
> +
> +if missing_extern_headers != []
> +    error('Files missing C++ \'extern "C"\' guards:\n- ' + '\n- '.join(missing_extern_headers))

Instead of just relying on this script output, it would be better if it 
exited with a non-zero status when something is wrong. That way you 
would not have to capture stdout at all and you could leave meson do the 
work.

>  endif
>  
>  gen_cpp_files = generator(gen_c_file_for_header,
> -- 
> 2.43.0
  
Mattias Rönnblom Oct. 4, 2024, 7:31 a.m. UTC | #2
On 2024-10-03 18:47, Robin Jarry wrote:
> Mattias Rönnblom, Sep 20, 2024 at 12:47:
>> Relax chkincs requirement of all DPDK header files having to contain
>> 'extern "C"'.
>>
>> Instructing a C++ toolchain to use C linkage is only necessarily if the
>> header file declares symbols (i.e., functions or global variables).
>>
>> With this change, chkincs tries to find if any functions or references
>> to global variables are declared in the header file, and if not, no C
>> linkage is required.
>>
>> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
>>
>> -- 
>>
>> PATCH v11:
>>  * Detect functions from the Windows POSIX wrappers, to avoid false
>>    positives for redundant 'extern "C"'.
>> ---
> 
> Hi Mattias,
> 
> I have some remarks on this script. Please see below:
> 
>>  buildtools/chkincs/chkextern.py | 88 +++++++++++++++++++++++++++++++++
>>  buildtools/chkincs/meson.build  | 14 +++---
>>  2 files changed, 95 insertions(+), 7 deletions(-)
>>  create mode 100755 buildtools/chkincs/chkextern.py
>>
>> diff --git a/buildtools/chkincs/chkextern.py b/buildtools/chkincs/ 
>> chkextern.py
>> new file mode 100755
>> index 0000000000..5374ce1c72
>> --- /dev/null
>> +++ b/buildtools/chkincs/chkextern.py
>> @@ -0,0 +1,88 @@
>> +#! /usr/bin/env python3
>> +# SPDX-License-Identifier: BSD-3-Clause
>> +# Copyright(c) 2024 Ericsson AB
>> +
>> +import sys
>> +import re
>> +
>> +def strip_cpp(header):
>> +    no_cpp = ""
>> +    header = header.replace("\\\n", " ")
>> +
>> +    for line in header.split("\n"):
>> +        if re.match(r'^\s*#.*', line) is None and len(line) > 0:
>> +            no_cpp += "%s\n" % line
>> +
>> +    return no_cpp
>> +
>> +
>> +def strip_comments(header):
>> +    no_c_comments = re.sub(r'/\*.*?\*/', '', header, flags=re.DOTALL)
>> +    no_cxx_comments = re.sub(r'//.*', '', no_c_comments)
>> +    return no_cxx_comments
>> +
>> +
>> +def strip(header):
>> +    header = strip_comments(header)
>> +    header = strip_cpp(header)
>> +    return header
> 
> None of these three strip* functions are used.
> 

Oops.

I think I'll just remove the strip_cpp() function (not sure why I 
thought that was ever needed). strip_comments() may be somewhat useful 
when looking for the 'extern "C"' string, if for some reason that string 
would be in a comment somewhere.

>> +
>> +
>> +def has_extern_c(header):
>> +    return header.find('extern "C"') != -1
>> +
>> +
>> +def has_vars(header):
>> +    return re.search(r'^extern\s+[a-z0-9_]+\s.*;', header, 
>> flags=re.MULTILINE) is not None
>> +
>> +
>> +FUNCTION_RES = [
>> +    r'rte_[a-z0-9_]+\(',
>> +    r'cmdline_[a-z0-9_]+\(',
>> +    r'vt100_[a-z0-9_]+\(',
>> +    r'rdline_[a-z0-9_]+\(',
>> +    r'cirbuf_[a-z0-9_]+\(',
>> +    # Windows UNIX compatibility
>> +    r'pthread_[a-z0-9_]+\(',
>> +    r'regcomp\(',
>> +    r'count_cpu\('
>> +]
>> +
>> +
>> +def has_functions(header):
>> +    for function_re in FUNCTION_RES:
>> +        if re.search(function_re, header) is not None:
>> +            return True
>> +    return False
>> +
>> +
>> +def has_symbols(header):
>> +    return has_functions(header) or has_vars(header)
>> +
>> +
>> +def chk_missing(filename):
>> +    header = open(filename).read()
> 
> with open(filename) as f:
>     header = f.read()
> 

What benefit would that construct be to this program?

>> +    if has_symbols(header) and not has_extern_c(header):
>> +        print(filename)
>> +
>> +
>> +def chk_redundant(filename):
>> +    header = open(filename).read()
> 
> with open(filename) as f:
>     header = f.read()
> 
>> +    if not has_symbols(header) and has_extern_c(header):
>> +        print(filename)
> 
> Can we rename these functions to check_missing and check_redundant? I 
> don't think the abbreviation is needed here.
> 

Sure.

(The function name was derived from the script name, which in turn got 
its name inspired by chkincs "tool" (directory) name.)

>> +
>> +if len(sys.argv) < 3:
>> +    print("%s missing|redundant <header-file> ..." % sys.argv[0])
>> +    sys.exit(1)
>> +
>> +op = sys.argv[1]
>> +headers = sys.argv[2:]
>> +
>> +for header in headers:
>> +    if op == 'missing':
>> +        chk_missing(header)
>> +    elif op == 'redundant':
>> +        chk_redundant(header)
> 
> I don't see the 'redundant' op being used here.
> 

It's used a patch further down the set.

>> +    else:
>> +        print("Unknown operation.")
>> +        sys.exit(1)
> 
> I know it is a simple python script but it would be better to add a 
> proper main() function and use argparse to handle errors. E.g.:
> 
> def main():
>     parser = argparse.ArgumentParser(description=__doc__)
>     parser.add_argument("op", choices=("missing", "redundant"))
>     parser.add_argument("headers", nargs="+")
>     args = parser.parse_args()
> 
>     for h in args.headers:
>         if op == "missing":
>             chk_missing(h)
>         elif op == "redundant":
>             chk_redundant(h)
> 
> if __name__ == "__main__":
>     main()
> 
> 

I don't think you need to bring in the usual Python boiler plate. This 
script is not supposed to be invoked directly by a user - it's just an 
extension of the meson build scripts.

>> diff --git a/buildtools/chkincs/meson.build b/buildtools/chkincs/ 
>> meson.build
>> index f2dadcae18..762f85efe5 100644
>> --- a/buildtools/chkincs/meson.build
>> +++ b/buildtools/chkincs/meson.build
>> @@ -38,13 +38,13 @@ if not add_languages('cpp', required: false)
>>  endif
>>
>>  # check for extern C in files, since this is not detected as an error 
>> by the compiler
>> -grep = find_program('grep', required: false)
>> -if grep.found()
>> -    errlist = run_command([grep, '--files-without-match', '^extern 
>> "C"', dpdk_chkinc_headers],
>> -            check: false, capture: true).stdout().split()
>> -    if errlist != []
>> -        error('Files missing C++ \'extern "C"\' guards:\n- ' + '\n- 
>> '.join(errlist))
>> -    endif
>> +chkextern = find_program('chkextern.py')
>> +
>> +missing_extern_headers = run_command(chkextern, 'missing', 
>> dpdk_chkinc_headers,
>> +      capture: true, check: true).stdout().split()
>> +
>> +if missing_extern_headers != []
>> +    error('Files missing C++ \'extern "C"\' guards:\n- ' + '\n- 
>> '.join(missing_extern_headers))
> 
> Instead of just relying on this script output, it would be better if it 
> exited with a non-zero status when something is wrong. That way you 
> would not have to capture stdout at all and you could leave meson do the 
> work.
> 

Sure, but it would be required to invoke the script for every header 
file in the tree. Not sure I think that would be a net gain.

Thanks for the review.

>>  endif
>>
>>  gen_cpp_files = generator(gen_c_file_for_header,
>> -- 
>> 2.43.0
>
  
Robin Jarry Oct. 4, 2024, 8:05 a.m. UTC | #3
Mattias Rönnblom, Oct 04, 2024 at 09:31:
> On 2024-10-03 18:47, Robin Jarry wrote:
>>> +def chk_missing(filename):
>>> +    header = open(filename).read()
>> 
>> with open(filename) as f:
>>     header = f.read()
>> 
>
> What benefit would that construct be to this program?

open(filename).read() leaks open file descriptors. This construct 
ensures the file is closed after reading is complete.

I understand that this script isn't mission critical, but let's avoid 
leaving bad examples in the code that others may follow by accident :)

>> I know it is a simple python script but it would be better to add a 
>> proper main() function and use argparse to handle errors. E.g.:
>> 
>> def main():
>>     parser = argparse.ArgumentParser(description=__doc__)
>>     parser.add_argument("op", choices=("missing", "redundant"))
>>     parser.add_argument("headers", nargs="+")
>>     args = parser.parse_args()
>> 
>>     for h in args.headers:
>>         if op == "missing":
>>             chk_missing(h)
>>         elif op == "redundant":
>>             chk_redundant(h)
>> 
>> if __name__ == "__main__":
>>     main()
>> 
>> 
>
> I don't think you need to bring in the usual Python boiler plate. This 
> script is not supposed to be invoked directly by a user - it's just an 
> extension of the meson build scripts.

Same as above, I know it is not a user facing script. But I think it 
would be much better to write proper python code to have good examples 
for newcomers to follow.

>>> diff --git a/buildtools/chkincs/meson.build b/buildtools/chkincs/ 
>>> meson.build
>>> index f2dadcae18..762f85efe5 100644
>>> --- a/buildtools/chkincs/meson.build
>>> +++ b/buildtools/chkincs/meson.build
>>> @@ -38,13 +38,13 @@ if not add_languages('cpp', required: false)
>>>  endif
>>>
>>>  # check for extern C in files, since this is not detected as an error 
>>> by the compiler
>>> -grep = find_program('grep', required: false)
>>> -if grep.found()
>>> -    errlist = run_command([grep, '--files-without-match', '^extern 
>>> "C"', dpdk_chkinc_headers],
>>> -            check: false, capture: true).stdout().split()
>>> -    if errlist != []
>>> -        error('Files missing C++ \'extern "C"\' guards:\n- ' + '\n- 
>>> '.join(errlist))
>>> -    endif
>>> +chkextern = find_program('chkextern.py')
>>> +
>>> +missing_extern_headers = run_command(chkextern, 'missing', 
>>> dpdk_chkinc_headers,
>>> +      capture: true, check: true).stdout().split()
>>> +
>>> +if missing_extern_headers != []
>>> +    error('Files missing C++ \'extern "C"\' guards:\n- ' + '\n- 
>>> '.join(missing_extern_headers))
>> 
>> Instead of just relying on this script output, it would be better if it 
>> exited with a non-zero status when something is wrong. That way you 
>> would not have to capture stdout at all and you could leave meson do the 
>> work.
>> 
>
> Sure, but it would be required to invoke the script for every header 
> file in the tree. Not sure I think that would be a net gain.

You can store a global exit status in the script and process all headers 
before exiting with an error if any.

> Thanks for the review.
>
>>>  endif
>>>
>>>  gen_cpp_files = generator(gen_c_file_for_header,
>>> -- 
>>> 2.43.0
>>
  
Mattias Rönnblom Oct. 4, 2024, 8:40 a.m. UTC | #4
On 2024-10-04 10:05, Robin Jarry wrote:
> Mattias Rönnblom, Oct 04, 2024 at 09:31:
>> On 2024-10-03 18:47, Robin Jarry wrote:
>>>> +def chk_missing(filename):
>>>> +    header = open(filename).read()
>>>
>>> with open(filename) as f:
>>>     header = f.read()
>>>
>>
>> What benefit would that construct be to this program?
> 
> open(filename).read() leaks open file descriptors. This construct 
> ensures the file is closed after reading is complete.
> 

I know what it does. I was asking why it matters in this context. But 
sure, I guess you could hit the per-process fd limit before the script 
exists.

> I understand that this script isn't mission critical, but let's avoid 
> leaving bad examples in the code that others may follow by accident :)
> 
>>> I know it is a simple python script but it would be better to add a 
>>> proper main() function and use argparse to handle errors. E.g.:
>>>
>>> def main():
>>>     parser = argparse.ArgumentParser(description=__doc__)
>>>     parser.add_argument("op", choices=("missing", "redundant"))
>>>     parser.add_argument("headers", nargs="+")
>>>     args = parser.parse_args()
>>>
>>>     for h in args.headers:
>>>         if op == "missing":
>>>             chk_missing(h)
>>>         elif op == "redundant":
>>>             chk_redundant(h)
>>>
>>> if __name__ == "__main__":
>>>     main()
>>>
>>>
>>
>> I don't think you need to bring in the usual Python boiler plate. This 
>> script is not supposed to be invoked directly by a user - it's just an 
>> extension of the meson build scripts.
> 
> Same as above, I know it is not a user facing script. But I think it 
> would be much better to write proper python code to have good examples 
> for newcomers to follow.
> 

Making small scripts needlessly complicated is not good example, it's a 
bad one.

>>>> diff --git a/buildtools/chkincs/meson.build b/buildtools/chkincs/ 
>>>> meson.build
>>>> index f2dadcae18..762f85efe5 100644
>>>> --- a/buildtools/chkincs/meson.build
>>>> +++ b/buildtools/chkincs/meson.build
>>>> @@ -38,13 +38,13 @@ if not add_languages('cpp', required: false)
>>>>  endif
>>>>
>>>>  # check for extern C in files, since this is not detected as an 
>>>> error by the compiler
>>>> -grep = find_program('grep', required: false)
>>>> -if grep.found()
>>>> -    errlist = run_command([grep, '--files-without-match', '^extern 
>>>> "C"', dpdk_chkinc_headers],
>>>> -            check: false, capture: true).stdout().split()
>>>> -    if errlist != []
>>>> -        error('Files missing C++ \'extern "C"\' guards:\n- ' + '\n- 
>>>> '.join(errlist))
>>>> -    endif
>>>> +chkextern = find_program('chkextern.py')
>>>> +
>>>> +missing_extern_headers = run_command(chkextern, 'missing', 
>>>> dpdk_chkinc_headers,
>>>> +      capture: true, check: true).stdout().split()
>>>> +
>>>> +if missing_extern_headers != []
>>>> +    error('Files missing C++ \'extern "C"\' guards:\n- ' + '\n- 
>>>> '.join(missing_extern_headers))
>>>
>>> Instead of just relying on this script output, it would be better if 
>>> it exited with a non-zero status when something is wrong. That way 
>>> you would not have to capture stdout at all and you could leave meson 
>>> do the work.
>>>
>>
>> Sure, but it would be required to invoke the script for every header 
>> file in the tree. Not sure I think that would be a net gain.
> 
> You can store a global exit status in the script and process all headers 
> before exiting with an error if any.
> 

You will need to give the user a list of offending header files.

>> Thanks for the review.
>>
>>>>  endif
>>>>
>>>>  gen_cpp_files = generator(gen_c_file_for_header,
>>>> -- 
>>>> 2.43.0
>>>
>
  
Robin Jarry Oct. 4, 2024, 11:51 a.m. UTC | #5
Mattias Rönnblom, Oct 04, 2024 at 10:40:
> Making small scripts needlessly complicated is not good example, it's a 
> bad one.

I don't find adding argument checks needlessly complicated but this is 
a matter of preference. To me, Python is not shell script. If you want 
something small, shell might be more appropriate?

>>> Sure, but it would be required to invoke the script for every header 
>>> file in the tree. Not sure I think that would be a net gain.
>> 
>> You can store a global exit status in the script and process all headers 
>> before exiting with an error if any.
>
> You will need to give the user a list of offending header files.

I'm not suggesting to avoid printing the offending file names. I'm only 
suggesting to exit(1) if there were *any* offending file names. That way 
you don't have to check *in meson* if the script did output anything. 
Checking the exit status is simpler.

Sorry for being pedantic, but Python code in DPDK is already treated 
badly. I wish we could improve the quality a bit.

Cheers.
  
Thomas Monjalon Oct. 4, 2024, 12:19 p.m. UTC | #6
04/10/2024 13:51, Robin Jarry:
> Mattias Rönnblom, Oct 04, 2024 at 10:40:
> > Making small scripts needlessly complicated is not good example, it's a 
> > bad one.
> 
> I don't find adding argument checks needlessly complicated but this is 
> a matter of preference. To me, Python is not shell script. If you want 
> something small, shell might be more appropriate?
> 
> >>> Sure, but it would be required to invoke the script for every header 
> >>> file in the tree. Not sure I think that would be a net gain.
> >> 
> >> You can store a global exit status in the script and process all headers 
> >> before exiting with an error if any.
> >
> > You will need to give the user a list of offending header files.
> 
> I'm not suggesting to avoid printing the offending file names. I'm only 
> suggesting to exit(1) if there were *any* offending file names. That way 
> you don't have to check *in meson* if the script did output anything. 
> Checking the exit status is simpler.
> 
> Sorry for being pedantic, but Python code in DPDK is already treated 
> badly. I wish we could improve the quality a bit.

Thank you Robin for your detailed review.
I support this effort of improving our Python scripts.
  
Mattias Rönnblom Oct. 6, 2024, 8:55 a.m. UTC | #7
On 2024-10-04 13:51, Robin Jarry wrote:
> Mattias Rönnblom, Oct 04, 2024 at 10:40:
>> Making small scripts needlessly complicated is not good example, it's 
>> a bad one.
> 
> I don't find adding argument checks needlessly complicated but this is a 
> matter of preference. To me, Python is not shell script. If you want 
> something small, shell might be more appropriate?
> 

Python can serve in many roles. I suggest you be more pragmatic and 
sensitive to the context. Sometimes the non-existence/non-use of doc 
strings and other language features that makes sense in larger programs 
is not a sign of the author being out to "treat Python badly".

>>>> Sure, but it would be required to invoke the script for every header 
>>>> file in the tree. Not sure I think that would be a net gain.
>>>
>>> You can store a global exit status in the script and process all 
>>> headers before exiting with an error if any.
>>
>> You will need to give the user a list of offending header files.
> 
> I'm not suggesting to avoid printing the offending file names. I'm only 
> suggesting to exit(1) if there were *any* offending file names. That way 
> you don't have to check *in meson* if the script did output anything. 
> Checking the exit status is simpler.
> 

What you wrote was "/../ That way you would not have to capture stdout 
at all and you could leave meson do the work.".

Can you elaborate on this? I'm not sure I follow. Is there a way to not 
capture the script output, invoke the script only once per build, and 
yet produce a fine-grained error message to the user?

I agree properly setting the exit code is an improvement. I just don't 
see how that materially changes anything on the meson side of things. 
But then, I know nothing about meson.

> Sorry for being pedantic, but Python code in DPDK is already treated 
> badly. I wish we could improve the quality a bit.
> 
> Cheers.
>
  
Mattias Rönnblom Oct. 6, 2024, 9:47 a.m. UTC | #8
On 2024-10-06 10:55, Mattias Rönnblom wrote:
> On 2024-10-04 13:51, Robin Jarry wrote:
>> Mattias Rönnblom, Oct 04, 2024 at 10:40:
>>> Making small scripts needlessly complicated is not good example, it's 
>>> a bad one.
>>
>> I don't find adding argument checks needlessly complicated but this is 
>> a matter of preference. To me, Python is not shell script. If you want 
>> something small, shell might be more appropriate?
>>
> 
> Python can serve in many roles. I suggest you be more pragmatic and 
> sensitive to the context. Sometimes the non-existence/non-use of doc 
> strings and other language features that makes sense in larger programs 
> is not a sign of the author being out to "treat Python badly".
> 
>>>>> Sure, but it would be required to invoke the script for every 
>>>>> header file in the tree. Not sure I think that would be a net gain.
>>>>
>>>> You can store a global exit status in the script and process all 
>>>> headers before exiting with an error if any.
>>>
>>> You will need to give the user a list of offending header files.
>>
>> I'm not suggesting to avoid printing the offending file names. I'm 
>> only suggesting to exit(1) if there were *any* offending file names. 
>> That way you don't have to check *in meson* if the script did output 
>> anything. Checking the exit status is simpler.
>>
> 
> What you wrote was "/../ That way you would not have to capture stdout 
> at all and you could leave meson do the work.".
> 
> Can you elaborate on this? I'm not sure I follow. Is there a way to not 
> capture the script output, invoke the script only once per build, and 
> yet produce a fine-grained error message to the user?
> 
> I agree properly setting the exit code is an improvement. I just don't 
> see how that materially changes anything on the meson side of things. 
> But then, I know nothing about meson.
> 

OK, so it does change something, but not for the better, seemingly.

meson.build now somehow has to distinguished between two different "errors":
1) One or more offending header files were found.
2) Any other kind of error (file not found, access control issues, 
internal script error, etc).

For 1), it should print a list of the offending header files.

If you just set "check: true" in run_command(), and have the 
chkextern.py script return non-zero on error, the offending header files 
will not be printed on stdout.

My guess would be that this complicating factor is the reason for this 
pattern of not (in/from meson) depending on the script delegate exit 
code, but rather the output.

May I should rename the script to "scanextern.py" to better reflect that 
a failure to find an issue is not an error (=non-zero exit code).

>> Sorry for being pedantic, but Python code in DPDK is already treated 
>> badly. I wish we could improve the quality a bit.
>>
>> Cheers.
>>
>
  
Robin Jarry Oct. 6, 2024, 12:25 p.m. UTC | #9
Mattias Rönnblom, Oct 06, 2024 at 11:47:
> OK, so it does change something, but not for the better, seemingly.
>
> meson.build now somehow has to distinguished between two different "errors":
> 1) One or more offending header files were found.
> 2) Any other kind of error (file not found, access control issues, 
> internal script error, etc).
>
> For 1), it should print a list of the offending header files.
>
> If you just set "check: true" in run_command(), and have the 
> chkextern.py script return non-zero on error, the offending header files 
> will not be printed on stdout.
>
> My guess would be that this complicating factor is the reason for this 
> pattern of not (in/from meson) depending on the script delegate exit 
> code, but rather the output.
>
> May I should rename the script to "scanextern.py" to better reflect that 
> a failure to find an issue is not an error (=non-zero exit code).

I think you need to change run_command() to custom_target(). I was 
thinking of this patch to be much simpler as follows:

diff --git a/buildtools/chkincs/check_extern_c.sh b/buildtools/chkincs/check_extern_c.sh
new file mode 100755
index 000000000000..96ace5ffd248
--- /dev/null
+++ b/buildtools/chkincs/check_extern_c.sh
@@ -0,0 +1,49 @@
+#!/bin/sh
+# SPDX-License-Identifier: BSD-3-Clause
+
+has_symbols() {
+	grep -Eq \
+		-e 'rte_[a-z0-9_]+\(' \
+		-e 'cmdline_[a-z0-9_]+\(' \
+		-e 'vt100_[a-z0-9_]+\(' \
+		-e 'rdline_[a-z0-9_]+\(' \
+		-e 'cirbuf_[a-z0-9_]+\(' \
+		-e 'pthread_[a-z0-9_]+\(' \
+		-e 'regcomp\(' \
+		-e 'count_cpu\(' \
+		-e '^extern[[:space:]]+[a-z0-9_]+[[:space:]]' \
+		"$1"
+}
+
+has_extern_c() {
+	grep -q 'extern "C"' "$1"
+}
+
+mode=$1
+shift
+ret=0
+
+case "$mode" in
+--missing)
+	for header in "$@"; do
+		if has_symbols "$header" && ! has_extern_c "$header"; then
+			echo "error: missing extern \"C\": $header" >&2
+			ret=1
+		fi
+	done
+	;;
+--redundant)
+	for header in "$@"; do
+		if ! has_symbols "$header" && has_extern_c "$header"; then
+			echo "error: missing extern \"C\": $header" >&2
+			exit 1
+		fi
+	done
+	;;
+*)
+	echo "error: unsupported mode '$mode'" >&2
+	ret=1
+esac
+
+exit $ret
diff --git a/buildtools/chkincs/meson.build b/buildtools/chkincs/meson.build
index f2dadcae18ef..b463845083eb 100644
--- a/buildtools/chkincs/meson.build
+++ b/buildtools/chkincs/meson.build
@@ -38,14 +38,11 @@ if not add_languages('cpp', required: false)
 endif
 
 # check for extern C in files, since this is not detected as an error by the compiler
-grep = find_program('grep', required: false)
-if grep.found()
-    errlist = run_command([grep, '--files-without-match', '^extern "C"', dpdk_chkinc_headers],
-            check: false, capture: true).stdout().split()
-    if errlist != []
-        error('Files missing C++ \'extern "C"\' guards:\n- ' + '\n- '.join(errlist))
-    endif
-endif
+custom_target('check-extern-c',
+        command: files('check_extern_c.sh') + ['--missing', '@INPUT@'],
+        input: dpdk_chkinc_headers,
+        output: 'check-extern-c',
+        build_by_default: true)
 
 gen_cpp_files = generator(gen_c_file_for_header,
         output: '@BASENAME@.cpp',
  
Robin Jarry Oct. 6, 2024, 1:09 p.m. UTC | #10
Robin Jarry, Oct 06, 2024 at 14:25:
> I think you need to change run_command() to custom_target(). I was 
> thinking of this patch to be much simpler as follows:
>
> diff --git a/buildtools/chkincs/check_extern_c.sh b/buildtools/chkincs/check_extern_c.sh

Sorry, I didn't think about windows compatibility which probably 
requires python. Please disregard my previous message. Here is 
a simplified python version:

diff --git a/buildtools/chkincs/check_extern_c.py b/buildtools/chkincs/check_extern_c.py
new file mode 100755
index 000000000000..9534ef10ba4e
--- /dev/null
+++ b/buildtools/chkincs/check_extern_c.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: BSD-3-Clause
+
+import argparse
+import re
+import sys
+
+SYMBOL_EXPR = [
+    # external variable definitions
+    re.compile(r"^extern\s+[a-z0-9_]+\s", re.MULTILINE),
+    # exported functions
+    re.compile(r"rte_[a-z0-9_]+\("),
+    re.compile(r"cmdline_[a-z0-9_]+\("),
+    re.compile(r"vt100_[a-z0-9_]+\("),
+    re.compile(r"rdline_[a-z0-9_]+\("),
+    re.compile(r"cirbuf_[a-z0-9_]+\("),
+    # windows compatibility
+    re.compile(r"pthread_[a-z0-9_]+\("),
+    re.compile(r"regcomp\("),
+    re.compile(r"count_cpu\("),
+]
+
+
+def has_symbols(buf):
+    for expr in SYMBOL_EXPR:
+        if expr.search(buf):
+            return True
+    return False
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("mode", choices=("redundant", "missing"))
+    parser.add_argument("headers", nargs="+")
+    args = parser.parse_args()
+
+    ret = 0
+
+    for h in args.headers:
+        with open(h) as f:
+            buf = f.read()
+        if args.mode == "missing":
+            if has_symbols(buf) and 'extern "C"' not in buf:
+                print('error: missing extern "C":', h)
+                ret = 1
+        elif args.mode == "redundant":
+            if not has_symbols(buf) and 'extern "C"' in buf:
+                print('error: redundant extern "C":', h)
+                ret = 1
+
+    return ret
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/buildtools/chkincs/meson.build b/buildtools/chkincs/meson.build
index f2dadcae18ef..381f30f42431 100644
--- a/buildtools/chkincs/meson.build
+++ b/buildtools/chkincs/meson.build
@@ -38,14 +38,11 @@ if not add_languages('cpp', required: false)
 endif
 
 # check for extern C in files, since this is not detected as an error by the compiler
-grep = find_program('grep', required: false)
-if grep.found()
-    errlist = run_command([grep, '--files-without-match', '^extern "C"', dpdk_chkinc_headers],
-            check: false, capture: true).stdout().split()
-    if errlist != []
-        error('Files missing C++ \'extern "C"\' guards:\n- ' + '\n- '.join(errlist))
-    endif
-endif
+custom_target('check-extern-c',
+        command: files('check_extern_c.py') + ['missing', '@INPUT@'],
+        input: dpdk_chkinc_headers,
+        output: 'check-extern-c',
+        build_by_default: true)
 
 gen_cpp_files = generator(gen_c_file_for_header,
         output: '@BASENAME@.cpp',
  
Mattias Rönnblom Oct. 6, 2024, 2:17 p.m. UTC | #11
On 2024-10-06 14:25, Robin Jarry wrote:
> Mattias Rönnblom, Oct 06, 2024 at 11:47:
>> OK, so it does change something, but not for the better, seemingly.
>>
>> meson.build now somehow has to distinguished between two different 
>> "errors":
>> 1) One or more offending header files were found.
>> 2) Any other kind of error (file not found, access control issues, 
>> internal script error, etc).
>>
>> For 1), it should print a list of the offending header files.
>>
>> If you just set "check: true" in run_command(), and have the 
>> chkextern.py script return non-zero on error, the offending header 
>> files will not be printed on stdout.
>>
>> My guess would be that this complicating factor is the reason for this 
>> pattern of not (in/from meson) depending on the script delegate exit 
>> code, but rather the output.
>>
>> May I should rename the script to "scanextern.py" to better reflect 
>> that a failure to find an issue is not an error (=non-zero exit code).
> 
> I think you need to change run_command() to custom_target(). I was 
> thinking of this patch to be much simpler as follows:

I started off with a shell script, but I switched to Python when I 
wanted to strip off comments. (Not that that can't be done in bourne shell.)

The below script doesn't strip off comments, and provides no usage 
information. Earlier I got the impression you wanted to improve 
command-line usage.

I think we need to decide if this thing is an integral part of the build 
system, custom-made for its needs, or if its something that should also 
be friendly to a command-line human user.

I could go either way on that.

> 
> diff --git a/buildtools/chkincs/check_extern_c.sh b/buildtools/chkincs/ 
> check_extern_c.sh
> new file mode 100755
> index 000000000000..96ace5ffd248
> --- /dev/null
> +++ b/buildtools/chkincs/check_extern_c.sh
> @@ -0,0 +1,49 @@
> +#!/bin/sh
> +# SPDX-License-Identifier: BSD-3-Clause
> +
> +has_symbols() {
> +    grep -Eq \
> +        -e 'rte_[a-z0-9_]+\(' \
> +        -e 'cmdline_[a-z0-9_]+\(' \
> +        -e 'vt100_[a-z0-9_]+\(' \
> +        -e 'rdline_[a-z0-9_]+\(' \
> +        -e 'cirbuf_[a-z0-9_]+\(' \
> +        -e 'pthread_[a-z0-9_]+\(' \
> +        -e 'regcomp\(' \
> +        -e 'count_cpu\(' \
> +        -e '^extern[[:space:]]+[a-z0-9_]+[[:space:]]' \
> +        "$1"
> +}
> +
> +has_extern_c() {
> +    grep -q 'extern "C"' "$1"
> +}
> +
> +mode=$1
> +shift
> +ret=0
> +
> +case "$mode" in

Here the shell script purist notes that getopt(1) is not used. :)

> +--missing)
> +    for header in "$@"; do
> +        if has_symbols "$header" && ! has_extern_c "$header"; then
> +            echo "error: missing extern \"C\": $header" >&2
> +            ret=1
> +        fi
> +    done
> +    ;;
> +--redundant)
> +    for header in "$@"; do
> +        if ! has_symbols "$header" && has_extern_c "$header"; then
> +            echo "error: missing extern \"C\": $header" >&2
> +            exit 1
> +        fi
> +    done
> +    ;;
> +*)
> +    echo "error: unsupported mode '$mode'" >&2
> +    ret=1
> +esac
> +
> +exit $ret
> diff --git a/buildtools/chkincs/meson.build b/buildtools/chkincs/ 
> meson.build
> index f2dadcae18ef..b463845083eb 100644
> --- a/buildtools/chkincs/meson.build
> +++ b/buildtools/chkincs/meson.build
> @@ -38,14 +38,11 @@ if not add_languages('cpp', required: false)
> endif
> 
> # check for extern C in files, since this is not detected as an error by 
> the compiler
> -grep = find_program('grep', required: false)
> -if grep.found()
> -    errlist = run_command([grep, '--files-without-match', '^extern 
> "C"', dpdk_chkinc_headers],
> -            check: false, capture: true).stdout().split()
> -    if errlist != []
> -        error('Files missing C++ \'extern "C"\' guards:\n- ' + '\n- 
> '.join(errlist))
> -    endif
> -endif
> +custom_target('check-extern-c',
> +        command: files('check_extern_c.sh') + ['--missing', '@INPUT@'],
> +        input: dpdk_chkinc_headers,
> +        output: 'check-extern-c',
> +        build_by_default: true)
> 
> gen_cpp_files = generator(gen_c_file_for_header,
>          output: '@BASENAME@.cpp',
>
  
Robin Jarry Oct. 6, 2024, 3:58 p.m. UTC | #12
Mattias Rönnblom, Oct 06, 2024 at 16:17:
>> I think you need to change run_command() to custom_target(). I was 
>> thinking of this patch to be much simpler as follows:
>
> I started off with a shell script, but I switched to Python when I 
> wanted to strip off comments. (Not that that can't be done in bourne shell.)
>
> The below script doesn't strip off comments, and provides no usage 
> information. Earlier I got the impression you wanted to improve 
> command-line usage.
>
> I think we need to decide if this thing is an integral part of the build 
> system, custom-made for its needs, or if its something that should also 
> be friendly to a command-line human user.
>
> I could go either way on that.

We don't have to choose. Being part of the build system does not mean 
the script cannot use the standard python tools to parse arguments. Here 
is what I came up with. It is shorter, strips comments and deals with 
arguments in a simpler way. We don't need to pass the "missing" or 
"redundant" operation to the script, it can figure out what to do on its 
own (see inline comment in main()):

diff --git a/buildtools/chkincs/check_extern_c.py b/buildtools/chkincs/check_extern_c.py
new file mode 100755
index 000000000000..3e61719a5ea5
--- /dev/null
+++ b/buildtools/chkincs/check_extern_c.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""
+Detect missing or redundant `extern "C"` statements in headers.
+"""
+
+import argparse
+import re
+import sys
+
+
+# regular expressions
+EXTERN_C_RE = re.compile(r'^extern\s+"C"', re.MULTILINE)
+CPP_LINE_COMMENT_RE = re.compile(r"//.*$", re.MULTILINE)
+C_BLOCK_COMMENT_RE = re.compile(r"/\*.+?\*/", re.DOTALL)
+C_SYMBOL_RE = [
+    # external variable definitions
+    re.compile(r"^extern\s+[a-z0-9_]+\s", re.MULTILINE),
+    # exported functions
+    re.compile(r"\brte_[a-z0-9_]+\("),
+    re.compile(r"\bcmdline_[a-z0-9_]+\("),
+    re.compile(r"\bvt100_[a-z0-9_]+\("),
+    re.compile(r"\brdline_[a-z0-9_]+\("),
+    re.compile(r"\bcirbuf_[a-z0-9_]+\("),
+    # windows compatibility
+    re.compile(r"\bpthread_[a-z0-9_]+\("),
+    re.compile(r"\bregcomp\("),
+    re.compile(r"\bcount_cpu\("),
+]
+
+
+def strip_comments(buf: str) -> str:
+    buf = CPP_LINE_COMMENT_RE.sub("", buf, re.MULTILINE)
+    return C_BLOCK_COMMENT_RE.sub("", buf, re.DOTALL)
+
+
+def has_c_symbols(buf: str) -> bool:
+    for expr in C_SYMBOL_RE:
+        if expr.search(buf, re.MULTILINE):
+            return True
+    return False
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("headers", nargs="+")
+    args = parser.parse_args()
+
+    ret = 0
+
+    for h in args.headers:
+        with open(h) as f:
+            buf = f.read()
+
+        buf = strip_comments(buf)
+
+        if has_c_symbols(buf):
+            if not EXTERN_C_RE.search(buf):
+                print('error: missing extern "C" in', h)
+                ret = 1
+        # Uncomment next block once all extraneous extern "C" have been removed
+        #else:
+        #    if EXTERN_C_RE.search(buf):
+        #        print('error: extraneous extern "C" in', h)
+        #        ret = 1
+
+    return ret
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/buildtools/chkincs/meson.build b/buildtools/chkincs/meson.build
index f2dadcae18ef..a551b2df9268 100644
--- a/buildtools/chkincs/meson.build
+++ b/buildtools/chkincs/meson.build
@@ -38,14 +38,11 @@ if not add_languages('cpp', required: false)
 endif
 
 # check for extern C in files, since this is not detected as an error by the compiler
-grep = find_program('grep', required: false)
-if grep.found()
-    errlist = run_command([grep, '--files-without-match', '^extern "C"', dpdk_chkinc_headers],
-            check: false, capture: true).stdout().split()
-    if errlist != []
-        error('Files missing C++ \'extern "C"\' guards:\n- ' + '\n- '.join(errlist))
-    endif
-endif
+custom_target('check-extern-c',
+        command: files('check_extern_c.py') + ['@INPUT@'],
+        input: dpdk_chkinc_headers,
+        output: 'check-extern-c',
+        build_by_default: true)
 
 gen_cpp_files = generator(gen_c_file_for_header,
         output: '@BASENAME@.cpp',
  
Mattias Rönnblom Oct. 10, 2024, 10:24 a.m. UTC | #13
On 2024-10-06 17:58, Robin Jarry wrote:
> Mattias Rönnblom, Oct 06, 2024 at 16:17:
>>> I think you need to change run_command() to custom_target(). I was 
>>> thinking of this patch to be much simpler as follows:
>>
>> I started off with a shell script, but I switched to Python when I 
>> wanted to strip off comments. (Not that that can't be done in bourne 
>> shell.)
>>
>> The below script doesn't strip off comments, and provides no usage 
>> information. Earlier I got the impression you wanted to improve 
>> command-line usage.
>>
>> I think we need to decide if this thing is an integral part of the 
>> build system, custom-made for its needs, or if its something that 
>> should also be friendly to a command-line human user.
>>
>> I could go either way on that.
> 
> We don't have to choose. Being part of the build system does not mean 

If it's an integral part of the build system, and the only user is 
another program (e.g., meson.build), the script can be made simpler. For 
example, you need not bother with usage information, since a computer 
program will not have any use of it.

It would be helpful if Bruce could comment on this. How should we think 
about small tools like this? That are more-or-less just an artifact of 
the lack of expressiveness in meson.

I'm leaning toward having them as proper human-usable CLI tools, 
provided it doesn't cause too much fuzz on the meson.build side of things.

> the script cannot use the standard python tools to parse arguments. Here 
> is what I came up with. It is shorter, strips comments and deals with 
> arguments in a simpler way. We don't need to pass the "missing" or 
> "redundant" operation to the script, it can figure out what to do on its 
> own (see inline comment in main()):

Sure, you could check for both issues at the same time. The primary 
reason for doing otherwise was that the code base wouldn't survive the 
"redundant" check at the time of the script's introduction (in the patch 
set).

> 
> diff --git a/buildtools/chkincs/check_extern_c.py b/buildtools/chkincs/ 
> check_extern_c.py
> new file mode 100755
> index 000000000000..3e61719a5ea5
> --- /dev/null
> +++ b/buildtools/chkincs/check_extern_c.py
> @@ -0,0 +1,72 @@
> +#!/usr/bin/env python3
> +# SPDX-License-Identifier: BSD-3-Clause
> +
> +"""
> +Detect missing or redundant `extern "C"` statements in headers.
> +"""
> +
> +import argparse
> +import re
> +import sys
> +
> +
> +# regular expressions
> +EXTERN_C_RE = re.compile(r'^extern\s+"C"', re.MULTILINE)
> +CPP_LINE_COMMENT_RE = re.compile(r"//.*$", re.MULTILINE)
> +C_BLOCK_COMMENT_RE = re.compile(r"/\*.+?\*/", re.DOTALL)
> +C_SYMBOL_RE = [
> +    # external variable definitions
> +    re.compile(r"^extern\s+[a-z0-9_]+\s", re.MULTILINE),
> +    # exported functions
> +    re.compile(r"\brte_[a-z0-9_]+\("),
> +    re.compile(r"\bcmdline_[a-z0-9_]+\("),
> +    re.compile(r"\bvt100_[a-z0-9_]+\("),
> +    re.compile(r"\brdline_[a-z0-9_]+\("),
> +    re.compile(r"\bcirbuf_[a-z0-9_]+\("),
> +    # windows compatibility
> +    re.compile(r"\bpthread_[a-z0-9_]+\("),
> +    re.compile(r"\bregcomp\("),
> +    re.compile(r"\bcount_cpu\("),
> +]
> +
> +
> +def strip_comments(buf: str) -> str:
> +    buf = CPP_LINE_COMMENT_RE.sub("", buf, re.MULTILINE)
> +    return C_BLOCK_COMMENT_RE.sub("", buf, re.DOTALL)
> +
> +
> +def has_c_symbols(buf: str) -> bool:
> +    for expr in C_SYMBOL_RE:
> +        if expr.search(buf, re.MULTILINE):
> +            return True
> +    return False
> +
> +
> +def main() -> int:
> +    parser = argparse.ArgumentParser(description=__doc__)
> +    parser.add_argument("headers", nargs="+")
> +    args = parser.parse_args()
> +
> +    ret = 0
> +
> +    for h in args.headers:
> +        with open(h) as f:
> +            buf = f.read()
> +
> +        buf = strip_comments(buf)
> +
> +        if has_c_symbols(buf):
> +            if not EXTERN_C_RE.search(buf):
> +                print('error: missing extern "C" in', h)
> +                ret = 1
> +        # Uncomment next block once all extraneous extern "C" have been 
> removed
> +        #else:
> +        #    if EXTERN_C_RE.search(buf):
> +        #        print('error: extraneous extern "C" in', h)
> +        #        ret = 1
> +
> +    return ret
> +
> +
> +if __name__ == "__main__":
> +    sys.exit(main())
> diff --git a/buildtools/chkincs/meson.build b/buildtools/chkincs/ 
> meson.build
> index f2dadcae18ef..a551b2df9268 100644
> --- a/buildtools/chkincs/meson.build
> +++ b/buildtools/chkincs/meson.build
> @@ -38,14 +38,11 @@ if not add_languages('cpp', required: false)
> endif
> 
> # check for extern C in files, since this is not detected as an error by 
> the compiler
> -grep = find_program('grep', required: false)
> -if grep.found()
> -    errlist = run_command([grep, '--files-without-match', '^extern 
> "C"', dpdk_chkinc_headers],
> -            check: false, capture: true).stdout().split()
> -    if errlist != []
> -        error('Files missing C++ \'extern "C"\' guards:\n- ' + '\n- 
> '.join(errlist))
> -    endif
> -endif
> +custom_target('check-extern-c',
> +        command: files('check_extern_c.py') + ['@INPUT@'],
> +        input: dpdk_chkinc_headers,
> +        output: 'check-extern-c',
> +        build_by_default: true)
> 
> gen_cpp_files = generator(gen_c_file_for_header,
>          output: '@BASENAME@.cpp',
> 

I won't review this in detail, but you can always submit a new patch 
against what's on main. What is there now is somewhat half-way between 
"proper CLI tool" and "meson.build delegate".
  
Bruce Richardson Oct. 10, 2024, 10:39 a.m. UTC | #14
On Thu, Oct 10, 2024 at 12:24:25PM +0200, Mattias Rönnblom wrote:
> On 2024-10-06 17:58, Robin Jarry wrote:
> > Mattias Rönnblom, Oct 06, 2024 at 16:17:
> > > > I think you need to change run_command() to custom_target(). I
> > > > was thinking of this patch to be much simpler as follows:
> > > 
> > > I started off with a shell script, but I switched to Python when I
> > > wanted to strip off comments. (Not that that can't be done in bourne
> > > shell.)
> > > 
> > > The below script doesn't strip off comments, and provides no usage
> > > information. Earlier I got the impression you wanted to improve
> > > command-line usage.
> > > 
> > > I think we need to decide if this thing is an integral part of the
> > > build system, custom-made for its needs, or if its something that
> > > should also be friendly to a command-line human user.
> > > 
> > > I could go either way on that.
> > 
> > We don't have to choose. Being part of the build system does not mean
> 
> If it's an integral part of the build system, and the only user is another
> program (e.g., meson.build), the script can be made simpler. For example,
> you need not bother with usage information, since a computer program will
> not have any use of it.
> 
> It would be helpful if Bruce could comment on this. How should we think
> about small tools like this? That are more-or-less just an artifact of the
> lack of expressiveness in meson.
> 
> I'm leaning toward having them as proper human-usable CLI tools, provided it
> doesn't cause too much fuzz on the meson.build side of things.
> 

I don't have a strong opinion either way. Meson indeed (like make before
it) has a limited set of basic things it can do, and requires scripting to
augment that. Whether those script are only for meson use or should be
usable outside the build I think we can determine on a case-by-case basis.
Initially, I would myself tend toward having them build-system-only for
simplicity, and only later generalise if there is a case for it (YAGNI
principle). However, if you think that a script may be better usable both
as part of a build and as a standalone tool, I see no issues with early
generalization - especially if the added complexity is minimal!

/Bruce
  

Patch

diff --git a/buildtools/chkincs/chkextern.py b/buildtools/chkincs/chkextern.py
new file mode 100755
index 0000000000..5374ce1c72
--- /dev/null
+++ b/buildtools/chkincs/chkextern.py
@@ -0,0 +1,88 @@ 
+#! /usr/bin/env python3
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2024 Ericsson AB
+
+import sys
+import re
+
+def strip_cpp(header):
+    no_cpp = ""
+    header = header.replace("\\\n", " ")
+
+    for line in header.split("\n"):
+        if re.match(r'^\s*#.*', line) is None and len(line) > 0:
+            no_cpp += "%s\n" % line
+
+    return no_cpp
+
+
+def strip_comments(header):
+    no_c_comments = re.sub(r'/\*.*?\*/', '', header, flags=re.DOTALL)
+    no_cxx_comments = re.sub(r'//.*', '', no_c_comments)
+    return no_cxx_comments
+
+
+def strip(header):
+    header = strip_comments(header)
+    header = strip_cpp(header)
+    return header
+
+
+def has_extern_c(header):
+    return header.find('extern "C"') != -1
+
+
+def has_vars(header):
+    return re.search(r'^extern\s+[a-z0-9_]+\s.*;', header, flags=re.MULTILINE) is not None
+
+
+FUNCTION_RES = [
+    r'rte_[a-z0-9_]+\(',
+    r'cmdline_[a-z0-9_]+\(',
+    r'vt100_[a-z0-9_]+\(',
+    r'rdline_[a-z0-9_]+\(',
+    r'cirbuf_[a-z0-9_]+\(',
+    # Windows UNIX compatibility
+    r'pthread_[a-z0-9_]+\(',
+    r'regcomp\(',
+    r'count_cpu\('
+]
+
+
+def has_functions(header):
+    for function_re in FUNCTION_RES:
+        if re.search(function_re, header) is not None:
+            return True
+    return False
+
+
+def has_symbols(header):
+    return has_functions(header) or has_vars(header)
+
+
+def chk_missing(filename):
+    header = open(filename).read()
+    if has_symbols(header) and not has_extern_c(header):
+        print(filename)
+
+
+def chk_redundant(filename):
+    header = open(filename).read()
+    if not has_symbols(header) and has_extern_c(header):
+        print(filename)
+
+if len(sys.argv) < 3:
+    print("%s missing|redundant <header-file> ..." % sys.argv[0])
+    sys.exit(1)
+
+op = sys.argv[1]
+headers = sys.argv[2:]
+
+for header in headers:
+    if op == 'missing':
+        chk_missing(header)
+    elif op == 'redundant':
+        chk_redundant(header)
+    else:
+        print("Unknown operation.")
+        sys.exit(1)
diff --git a/buildtools/chkincs/meson.build b/buildtools/chkincs/meson.build
index f2dadcae18..762f85efe5 100644
--- a/buildtools/chkincs/meson.build
+++ b/buildtools/chkincs/meson.build
@@ -38,13 +38,13 @@  if not add_languages('cpp', required: false)
 endif
 
 # check for extern C in files, since this is not detected as an error by the compiler
-grep = find_program('grep', required: false)
-if grep.found()
-    errlist = run_command([grep, '--files-without-match', '^extern "C"', dpdk_chkinc_headers],
-            check: false, capture: true).stdout().split()
-    if errlist != []
-        error('Files missing C++ \'extern "C"\' guards:\n- ' + '\n- '.join(errlist))
-    endif
+chkextern = find_program('chkextern.py')
+
+missing_extern_headers = run_command(chkextern, 'missing', dpdk_chkinc_headers,
+      capture: true, check: true).stdout().split()
+
+if missing_extern_headers != []
+    error('Files missing C++ \'extern "C"\' guards:\n- ' + '\n- '.join(missing_extern_headers))
 endif
 
 gen_cpp_files = generator(gen_c_file_for_header,