diff mbox series

[RFC] devtools: script to check meson indentation of lists

Message ID 20210422090211.320855-1-bruce.richardson@intel.com (mailing list archive)
State Superseded
Delegated to: Thomas Monjalon
Headers show
Series [RFC] devtools: script to check meson indentation of lists | expand

Checks

Context Check Description
ci/intel-Testing success Testing PASS
ci/Intel-compilation success Compilation OK
ci/checkpatch warning coding style issues

Commit Message

Bruce Richardson April 22, 2021, 9:02 a.m. UTC
This is a draft script developed when I was working on the whitespace rework
changes, since extended a little to attempt to fix some trailing comma issues.

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
 devtools/dpdk_meson_check.py | 106 +++++++++++++++++++++++++++++++++++
 1 file changed, 106 insertions(+)
 create mode 100755 devtools/dpdk_meson_check.py

--
2.27.0

Comments

Burakov, Anatoly April 22, 2021, 9:40 a.m. UTC | #1
On 22-Apr-21 10:02 AM, Bruce Richardson wrote:
> This is a draft script developed when I was working on the whitespace rework
> changes, since extended a little to attempt to fix some trailing comma issues.
> 
> Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
> ---
>   devtools/dpdk_meson_check.py | 106 +++++++++++++++++++++++++++++++++++
>   1 file changed, 106 insertions(+)
>   create mode 100755 devtools/dpdk_meson_check.py
> 
> diff --git a/devtools/dpdk_meson_check.py b/devtools/dpdk_meson_check.py
> new file mode 100755
> index 000000000..dc4c714ad
> --- /dev/null
> +++ b/devtools/dpdk_meson_check.py
> @@ -0,0 +1,106 @@
> +#!/usr/bin/env python3
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright(c) 2021 Intel Corporation
> +
> +'''
> +A Python script to run some checks on meson.build files in DPDK
> +'''
> +
> +import sys
> +import os
> +from os.path import relpath, join
> +from argparse import ArgumentParser
> +
> +VERBOSE = False
> +FIX = False
> +
> +def scan_dir(path):
> +    '''return meson.build files found in path'''
> +    for root, dirs, files in os.walk(path):
> +        if 'meson.build' in files:
> +            yield(relpath(join(root, 'meson.build')))
> +
> +
> +def check_indentation(filename, contents):
> +    '''check that a list or files() is correctly indented'''
> +    infiles = False
> +    inlist = False
> +    edit_count = 0
> +    for lineno in range(len(contents)):

for lineno, line in enumerate(contents)

?

> +        line = contents[lineno].rstrip()
> +        if not line:
> +            continue
> +        if line.endswith('files('):
> +            if infiles:
> +                raise(f'Error parsing {filename}:{lineno}, got "files(" when already parsing files list')
> +            if inlist:
> +                print(f'Error parsing {filename}:{lineno}, got "files(" when already parsing array list')
> +            infiles = True
> +            indent = 0
> +            while line[indent] == ' ':
> +                indent += 1

Here and in other places, if this is measuring length of indent, maybe 
do something like:

indent = len(line) - len(line.lstrip(' '))

?

> +            indent += 8  # double indent required
> +        elif line.endswith('= ['):
> +            if infiles:
> +                raise(f'Error parsing {filename}:{lineno}, got start of array when already parsing files list')
> +            if inlist:
> +                print(f'Error parsing {filename}:{lineno}, got start of array when already parsing array list')
> +            inlist = True
> +            indent = 0
> +            while line[indent] == ' ':
> +                indent += 1
> +            indent += 8  # double indent required
> +        elif infiles and (line.endswith(')') or line.strip().startswith(')')):

It's kinda hard to read with all the endswith/startswith, maybe extract 
those into a function? e.g. 'elif infiles and is_file_start(line)'

> +            infiles = False
> +            continue
> +        elif inlist and line.endswith(']') or line.strip().startswith(']'):
> +            inlist = False
> +            continue
> +        elif inlist or infiles:
> +            # skip further subarrays or lists
> +            if '[' in line  or ']' in line:
> +                continue

I guess you could make it recursive instead of giving up? Does this 
happen with any kind of regularity?

> +            if not line.startswith(' ' * indent) or line[indent] == ' ':
> +                print(f'Error: Incorrect indent at {filename}:{lineno + 1}')
> +                contents[lineno] = (' ' * indent) + line.strip() + '\n'
> +                line = contents[lineno].rstrip()
> +                edit_count += 1
> +            if not line.endswith(',') and '#' not in line:
> +                # TODO: support stripping comment and adding ','
> +                print(f'Error: Missing trailing "," in list at {filename}:{lineno + 1}')
> +                contents[lineno] = line + ',\n'
> +                line = contents[lineno].rstrip()

What is the point of setting `line` here?

> +                edit_count += 1
> +    return edit_count
> +
> +
> +def process_file(filename):
> +    '''run checks on file "filename"'''
> +    if VERBOSE:
> +        print(f'Processing {filename}')
> +    with open(filename) as f:
> +        contents = f.readlines()

I guess meson build files don't get too big so it's OK to read the 
entire file in memory and then work on it, rather than go line by line...

> +
> +    if check_indentation(filename, contents) > 0 and FIX:
> +        print(f"Fixing {filename}")
> +        with open(filename, 'w') as f:
> +            f.writelines(contents)
> +
> +
> +def main():
> +    '''parse arguments and then call other functions to do work'''
> +    global VERBOSE
> +    global FIX

Seems like globals are unnecessary here when you can just pass them into 
process_file?

> +    parser = ArgumentParser(description='Run syntax checks on DPDK meson.build files')
> +    parser.add_argument('-d', metavar='directory', default='.', help='Directory to process')
> +    parser.add_argument('--fix', action='store_true', help='Attempt to fix errors')
> +    parser.add_argument('-v', action='store_true', help='Verbose output')
> +    args = parser.parse_args()
> +
> +    VERBOSE = args.v
> +    FIX = args.fix
> +    for f in scan_dir(args.d):
> +        process_file(f)
> +
> +if __name__ == "__main__":
> +    main()
> --
> 2.27.0
>
Bruce Richardson April 22, 2021, 9:58 a.m. UTC | #2
On Thu, Apr 22, 2021 at 10:40:37AM +0100, Burakov, Anatoly wrote:
> On 22-Apr-21 10:02 AM, Bruce Richardson wrote:
> > This is a draft script developed when I was working on the whitespace rework
> > changes, since extended a little to attempt to fix some trailing comma issues.
> > 
> > Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
> > ---
> >   devtools/dpdk_meson_check.py | 106 +++++++++++++++++++++++++++++++++++
> >   1 file changed, 106 insertions(+)
> >   create mode 100755 devtools/dpdk_meson_check.py
> > 
> > diff --git a/devtools/dpdk_meson_check.py b/devtools/dpdk_meson_check.py
> > new file mode 100755
> > index 000000000..dc4c714ad
> > --- /dev/null
> > +++ b/devtools/dpdk_meson_check.py
> > @@ -0,0 +1,106 @@
> > +#!/usr/bin/env python3
> > +# SPDX-License-Identifier: BSD-3-Clause
> > +# Copyright(c) 2021 Intel Corporation
> > +
> > +'''
> > +A Python script to run some checks on meson.build files in DPDK
> > +'''
> > +
> > +import sys
> > +import os
> > +from os.path import relpath, join
> > +from argparse import ArgumentParser
> > +
> > +VERBOSE = False
> > +FIX = False
> > +
> > +def scan_dir(path):
> > +    '''return meson.build files found in path'''
> > +    for root, dirs, files in os.walk(path):
> > +        if 'meson.build' in files:
> > +            yield(relpath(join(root, 'meson.build')))
> > +
> > +
> > +def check_indentation(filename, contents):
> > +    '''check that a list or files() is correctly indented'''
> > +    infiles = False
> > +    inlist = False
> > +    edit_count = 0
> > +    for lineno in range(len(contents)):
> 
> for lineno, line in enumerate(contents)
> 
Yep, that's a good idea. Wasn't aware of enumerate. [Learn something new
every day, eh? :-)]

> ?
> 
> > +        line = contents[lineno].rstrip()
> > +        if not line:
> > +            continue
> > +        if line.endswith('files('):
> > +            if infiles:
> > +                raise(f'Error parsing {filename}:{lineno}, got "files(" when already parsing files list')
> > +            if inlist:
> > +                print(f'Error parsing {filename}:{lineno}, got "files(" when already parsing array list')
> > +            infiles = True
> > +            indent = 0
> > +            while line[indent] == ' ':
> > +                indent += 1
> 
> Here and in other places, if this is measuring length of indent, maybe do
> something like:
> 
> indent = len(line) - len(line.lstrip(' '))
> 
Yep, that is cleaner

> ?
> 
> > +            indent += 8  # double indent required
> > +        elif line.endswith('= ['):
> > +            if infiles:
> > +                raise(f'Error parsing {filename}:{lineno}, got start of array when already parsing files list')
> > +            if inlist:
> > +                print(f'Error parsing {filename}:{lineno}, got start of array when already parsing array list')
> > +            inlist = True
> > +            indent = 0
> > +            while line[indent] == ' ':
> > +                indent += 1
> > +            indent += 8  # double indent required
> > +        elif infiles and (line.endswith(')') or line.strip().startswith(')')):
> 
> It's kinda hard to read with all the endswith/startswith, maybe extract
> those into a function? e.g. 'elif infiles and is_file_start(line)'
> 
It is all a bit of a mess, yes, and needs cleaning up - which is why I
didn't previously send it with the patchset. Splitting into functions is
something I'll look at.

> > +            infiles = False
> > +            continue
> > +        elif inlist and line.endswith(']') or line.strip().startswith(']'):
> > +            inlist = False
> > +            continue
> > +        elif inlist or infiles:
> > +            # skip further subarrays or lists
> > +            if '[' in line  or ']' in line:
> > +                continue
> 
> I guess you could make it recursive instead of giving up? Does this happen
> with any kind of regularity?
> 
No, not that much, which is why I haven't explicitly tried to deal with it.
It would be good to support in future.

> > +            if not line.startswith(' ' * indent) or line[indent] == ' ':
> > +                print(f'Error: Incorrect indent at {filename}:{lineno + 1}')
> > +                contents[lineno] = (' ' * indent) + line.strip() + '\n'
> > +                line = contents[lineno].rstrip()
> > +                edit_count += 1
> > +            if not line.endswith(',') and '#' not in line:
> > +                # TODO: support stripping comment and adding ','
> > +                print(f'Error: Missing trailing "," in list at {filename}:{lineno + 1}')
> > +                contents[lineno] = line + ',\n'
> > +                line = contents[lineno].rstrip()
> 
> What is the point of setting `line` here?
> 
Because it allows us to make further checks using "line" as we add them
later.
The one question is whether it's worth using line as a shortcut for
contents[lineno] or not. I'd tend to keep it, as it also allows us not to
worry about the '\n' at the end. Then again, other rework might just change
the whole script to strip off all the '\n' post-read and add them back
again pre-write.

Again, further cleanup work.

> > +                edit_count += 1
> > +    return edit_count
> > +
> > +
> > +def process_file(filename):
> > +    '''run checks on file "filename"'''
> > +    if VERBOSE:
> > +        print(f'Processing {filename}')
> > +    with open(filename) as f:
> > +        contents = f.readlines()
> 
> I guess meson build files don't get too big so it's OK to read the entire
> file in memory and then work on it, rather than go line by line...
> 
This was a deliberate choice when starting the script. For now the script
only checks indentation and formatting of lists, but ideally in future it
should check other things, e.g. alphabetical order of lists, or formatting
of other parts. Rather than checking it all in one go, the script is
structured so that we can call multiple functions with "contents" each of
which does the processing without constantly re-reading and rewriting the
file. Something like sorting a list alphabetically also requires changing
multiple lines at a time, which is easier to do with lists that when
streaming input.

> > +
> > +    if check_indentation(filename, contents) > 0 and FIX:
> > +        print(f"Fixing {filename}")
> > +        with open(filename, 'w') as f:
> > +            f.writelines(contents)
> > +
> > +
> > +def main():
> > +    '''parse arguments and then call other functions to do work'''
> > +    global VERBOSE
> > +    global FIX
> 
> Seems like globals are unnecessary here when you can just pass them into
> process_file?
> 
Yes, they can just be passed, but I actually prefer to have those as
globals rather than having larger parameter lists. It's also possible that
e.g. verbose, should need to be passed through multiple levels of functions.
Personal preference, though really.

> > +    parser = ArgumentParser(description='Run syntax checks on DPDK meson.build files')
> > +    parser.add_argument('-d', metavar='directory', default='.', help='Directory to process')
> > +    parser.add_argument('--fix', action='store_true', help='Attempt to fix errors')
> > +    parser.add_argument('-v', action='store_true', help='Verbose output')
> > +    args = parser.parse_args()
> > +
> > +    VERBOSE = args.v
> > +    FIX = args.fix
> > +    for f in scan_dir(args.d):
> > +        process_file(f)
> > +
> > +if __name__ == "__main__":
> > +    main()
> > --
> > 2.27.0
> > 
> 
> 
> -- 
> Thanks,

Thanks for the review Anatoly.

/Bruce
Burakov, Anatoly April 22, 2021, 10:21 a.m. UTC | #3
On 22-Apr-21 10:58 AM, Bruce Richardson wrote:
> On Thu, Apr 22, 2021 at 10:40:37AM +0100, Burakov, Anatoly wrote:
>> On 22-Apr-21 10:02 AM, Bruce Richardson wrote:
>>> This is a draft script developed when I was working on the whitespace rework
>>> changes, since extended a little to attempt to fix some trailing comma issues.
>>>
>>> Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
>>> ---
>>>    devtools/dpdk_meson_check.py | 106 +++++++++++++++++++++++++++++++++++
>>>    1 file changed, 106 insertions(+)
>>>    create mode 100755 devtools/dpdk_meson_check.py
>>>
>>> diff --git a/devtools/dpdk_meson_check.py b/devtools/dpdk_meson_check.py
>>> new file mode 100755
>>> index 000000000..dc4c714ad
>>> --- /dev/null
>>> +++ b/devtools/dpdk_meson_check.py
>>> @@ -0,0 +1,106 @@
>>> +#!/usr/bin/env python3
>>> +# SPDX-License-Identifier: BSD-3-Clause
>>> +# Copyright(c) 2021 Intel Corporation
>>> +
>>> +'''
>>> +A Python script to run some checks on meson.build files in DPDK
>>> +'''
>>> +
>>> +import sys
>>> +import os
>>> +from os.path import relpath, join
>>> +from argparse import ArgumentParser
>>> +
>>> +VERBOSE = False
>>> +FIX = False
>>> +
>>> +def scan_dir(path):
>>> +    '''return meson.build files found in path'''
>>> +    for root, dirs, files in os.walk(path):
>>> +        if 'meson.build' in files:
>>> +            yield(relpath(join(root, 'meson.build')))
>>> +
>>> +
>>> +def check_indentation(filename, contents):
>>> +    '''check that a list or files() is correctly indented'''
>>> +    infiles = False
>>> +    inlist = False
>>> +    edit_count = 0
>>> +    for lineno in range(len(contents)):
>>
>> for lineno, line in enumerate(contents)
>>
> Yep, that's a good idea. Wasn't aware of enumerate. [Learn something new
> every day, eh? :-)]
> 
>> ?
>>
>>> +        line = contents[lineno].rstrip()
>>> +        if not line:
>>> +            continue
>>> +        if line.endswith('files('):
>>> +            if infiles:
>>> +                raise(f'Error parsing {filename}:{lineno}, got "files(" when already parsing files list')
>>> +            if inlist:
>>> +                print(f'Error parsing {filename}:{lineno}, got "files(" when already parsing array list')
>>> +            infiles = True
>>> +            indent = 0
>>> +            while line[indent] == ' ':
>>> +                indent += 1
>>
>> Here and in other places, if this is measuring length of indent, maybe do
>> something like:
>>
>> indent = len(line) - len(line.lstrip(' '))
>>
> Yep, that is cleaner
> 
>> ?
>>
>>> +            indent += 8  # double indent required
>>> +        elif line.endswith('= ['):
>>> +            if infiles:
>>> +                raise(f'Error parsing {filename}:{lineno}, got start of array when already parsing files list')
>>> +            if inlist:
>>> +                print(f'Error parsing {filename}:{lineno}, got start of array when already parsing array list')
>>> +            inlist = True
>>> +            indent = 0
>>> +            while line[indent] == ' ':
>>> +                indent += 1
>>> +            indent += 8  # double indent required
>>> +        elif infiles and (line.endswith(')') or line.strip().startswith(')')):
>>
>> It's kinda hard to read with all the endswith/startswith, maybe extract
>> those into a function? e.g. 'elif infiles and is_file_start(line)'
>>
> It is all a bit of a mess, yes, and needs cleaning up - which is why I
> didn't previously send it with the patchset. Splitting into functions is
> something I'll look at.
> 
>>> +            infiles = False
>>> +            continue
>>> +        elif inlist and line.endswith(']') or line.strip().startswith(']'):
>>> +            inlist = False
>>> +            continue
>>> +        elif inlist or infiles:
>>> +            # skip further subarrays or lists
>>> +            if '[' in line  or ']' in line:
>>> +                continue
>>
>> I guess you could make it recursive instead of giving up? Does this happen
>> with any kind of regularity?
>>
> No, not that much, which is why I haven't explicitly tried to deal with it.
> It would be good to support in future.
> 
>>> +            if not line.startswith(' ' * indent) or line[indent] == ' ':
>>> +                print(f'Error: Incorrect indent at {filename}:{lineno + 1}')
>>> +                contents[lineno] = (' ' * indent) + line.strip() + '\n'
>>> +                line = contents[lineno].rstrip()
>>> +                edit_count += 1
>>> +            if not line.endswith(',') and '#' not in line:
>>> +                # TODO: support stripping comment and adding ','
>>> +                print(f'Error: Missing trailing "," in list at {filename}:{lineno + 1}')
>>> +                contents[lineno] = line + ',\n'
>>> +                line = contents[lineno].rstrip()
>>
>> What is the point of setting `line` here?
>>
> Because it allows us to make further checks using "line" as we add them
> later.
> The one question is whether it's worth using line as a shortcut for
> contents[lineno] or not. I'd tend to keep it, as it also allows us not to
> worry about the '\n' at the end. Then again, other rework might just change
> the whole script to strip off all the '\n' post-read and add them back
> again pre-write.
> 
> Again, further cleanup work.

Well, yes, i got that, it's just that as far as i can tell, the last 
"line = ..." is at the end of the iteration, so there's no point in 
setting it. Maybe i'm missing something :)

> 
>>> +                edit_count += 1
>>> +    return edit_count
>>> +
>>> +
>>> +def process_file(filename):
>>> +    '''run checks on file "filename"'''
>>> +    if VERBOSE:
>>> +        print(f'Processing {filename}')
>>> +    with open(filename) as f:
>>> +        contents = f.readlines()
>>
>> I guess meson build files don't get too big so it's OK to read the entire
>> file in memory and then work on it, rather than go line by line...
>>
> This was a deliberate choice when starting the script. For now the script
> only checks indentation and formatting of lists, but ideally in future it
> should check other things, e.g. alphabetical order of lists, or formatting
> of other parts. Rather than checking it all in one go, the script is
> structured so that we can call multiple functions with "contents" each of
> which does the processing without constantly re-reading and rewriting the
> file. Something like sorting a list alphabetically also requires changing
> multiple lines at a time, which is easier to do with lists that when
> streaming input.

Right, gotcha.

> 
>>> +
>>> +    if check_indentation(filename, contents) > 0 and FIX:
>>> +        print(f"Fixing {filename}")
>>> +        with open(filename, 'w') as f:
>>> +            f.writelines(contents)
>>> +
>>> +
>>> +def main():
>>> +    '''parse arguments and then call other functions to do work'''
>>> +    global VERBOSE
>>> +    global FIX
>>
>> Seems like globals are unnecessary here when you can just pass them into
>> process_file?
>>
> Yes, they can just be passed, but I actually prefer to have those as
> globals rather than having larger parameter lists. It's also possible that
> e.g. verbose, should need to be passed through multiple levels of functions.
> Personal preference, though really.

Static analyzers (e.g. pylint) will complain about it, which is why i 
usually avoid those unless really necessary :) For something like 
"verbose", sure, one could argue that it's OK to have it as a global, 
but FIX is definitely something that could be a parameter, as you don't 
seem to use it anywhere other than in process_file(), nor would it seem 
likely that it will be used in the future.

> 
>>> +    parser = ArgumentParser(description='Run syntax checks on DPDK meson.build files')
>>> +    parser.add_argument('-d', metavar='directory', default='.', help='Directory to process')
>>> +    parser.add_argument('--fix', action='store_true', help='Attempt to fix errors')
>>> +    parser.add_argument('-v', action='store_true', help='Verbose output')
>>> +    args = parser.parse_args()
>>> +
>>> +    VERBOSE = args.v
>>> +    FIX = args.fix
>>> +    for f in scan_dir(args.d):
>>> +        process_file(f)
>>> +
>>> +if __name__ == "__main__":
>>> +    main()
>>> --
>>> 2.27.0
>>>
>>
>>
>> -- 
>> Thanks,
> 
> Thanks for the review Anatoly.
> 
> /Bruce
>
Bruce Richardson April 22, 2021, 10:31 a.m. UTC | #4
On Thu, Apr 22, 2021 at 11:21:26AM +0100, Burakov, Anatoly wrote:
> On 22-Apr-21 10:58 AM, Bruce Richardson wrote:
> > On Thu, Apr 22, 2021 at 10:40:37AM +0100, Burakov, Anatoly wrote:
> > > On 22-Apr-21 10:02 AM, Bruce Richardson wrote:
> > > > This is a draft script developed when I was working on the whitespace rework
> > > > changes, since extended a little to attempt to fix some trailing comma issues.
> > > > 
> > > > Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
> > > > ---

<snip>

> > > > +            if not line.endswith(',') and '#' not in line:
> > > > +                # TODO: support stripping comment and adding ','
> > > > +                print(f'Error: Missing trailing "," in list at {filename}:{lineno + 1}')
> > > > +                contents[lineno] = line + ',\n'
> > > > +                line = contents[lineno].rstrip()
> > > 
> > > What is the point of setting `line` here?
> > > 
> > Because it allows us to make further checks using "line" as we add them
> > later.
> > The one question is whether it's worth using line as a shortcut for
> > contents[lineno] or not. I'd tend to keep it, as it also allows us not to
> > worry about the '\n' at the end. Then again, other rework might just change
> > the whole script to strip off all the '\n' post-read and add them back
> > again pre-write.
> > 
> > Again, further cleanup work.
> 
> Well, yes, i got that, it's just that as far as i can tell, the last "line =
> ..." is at the end of the iteration, so there's no point in setting it.
> Maybe i'm missing something :)

No, you are not, and indeed it is at the very end. The fact it's there is
to make sure it's not forgotten when adding in new checks. It also allows
the checks to be reordered easier. Think of it like the trailing comma on a
list! :-)
 
> > 
> > > > +                edit_count += 1
> > > > +    return edit_count
> > > > +
> > > > +
> > > > +def process_file(filename):
> > > > +    '''run checks on file "filename"'''
> > > > +    if VERBOSE:
> > > > +        print(f'Processing {filename}')
> > > > +    with open(filename) as f:
> > > > +        contents = f.readlines()
> > > 
> > > I guess meson build files don't get too big so it's OK to read the entire
> > > file in memory and then work on it, rather than go line by line...
> > > 
> > This was a deliberate choice when starting the script. For now the script
> > only checks indentation and formatting of lists, but ideally in future it
> > should check other things, e.g. alphabetical order of lists, or formatting
> > of other parts. Rather than checking it all in one go, the script is
> > structured so that we can call multiple functions with "contents" each of
> > which does the processing without constantly re-reading and rewriting the
> > file. Something like sorting a list alphabetically also requires changing
> > multiple lines at a time, which is easier to do with lists that when
> > streaming input.
> 
> Right, gotcha.
> 
> > 
> > > > +
> > > > +    if check_indentation(filename, contents) > 0 and FIX:
> > > > +        print(f"Fixing {filename}")
> > > > +        with open(filename, 'w') as f:
> > > > +            f.writelines(contents)
> > > > +
> > > > +
> > > > +def main():
> > > > +    '''parse arguments and then call other functions to do work'''
> > > > +    global VERBOSE
> > > > +    global FIX
> > > 
> > > Seems like globals are unnecessary here when you can just pass them into
> > > process_file?
> > > 
> > Yes, they can just be passed, but I actually prefer to have those as
> > globals rather than having larger parameter lists. It's also possible that
> > e.g. verbose, should need to be passed through multiple levels of functions.
> > Personal preference, though really.
> 
> Static analyzers (e.g. pylint) will complain about it, which is why i
> usually avoid those unless really necessary :) For something like "verbose",
> sure, one could argue that it's OK to have it as a global, but FIX is
> definitely something that could be a parameter, as you don't seem to use it
> anywhere other than in process_file(), nor would it seem likely that it will
> be used in the future.
> 

Yep, agreed. Fix can be just a parameter. It may be that it's kept as a
global here is for consistency with verbose, but more likely it's just an
oversight on my part. I will change it to parameter instead in any next
revision.

/Bruce
diff mbox series

Patch

diff --git a/devtools/dpdk_meson_check.py b/devtools/dpdk_meson_check.py
new file mode 100755
index 000000000..dc4c714ad
--- /dev/null
+++ b/devtools/dpdk_meson_check.py
@@ -0,0 +1,106 @@ 
+#!/usr/bin/env python3
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2021 Intel Corporation
+
+'''
+A Python script to run some checks on meson.build files in DPDK
+'''
+
+import sys
+import os
+from os.path import relpath, join
+from argparse import ArgumentParser
+
+VERBOSE = False
+FIX = False
+
+def scan_dir(path):
+    '''return meson.build files found in path'''
+    for root, dirs, files in os.walk(path):
+        if 'meson.build' in files:
+            yield(relpath(join(root, 'meson.build')))
+
+
+def check_indentation(filename, contents):
+    '''check that a list or files() is correctly indented'''
+    infiles = False
+    inlist = False
+    edit_count = 0
+    for lineno in range(len(contents)):
+        line = contents[lineno].rstrip()
+        if not line:
+            continue
+        if line.endswith('files('):
+            if infiles:
+                raise(f'Error parsing {filename}:{lineno}, got "files(" when already parsing files list')
+            if inlist:
+                print(f'Error parsing {filename}:{lineno}, got "files(" when already parsing array list')
+            infiles = True
+            indent = 0
+            while line[indent] == ' ':
+                indent += 1
+            indent += 8  # double indent required
+        elif line.endswith('= ['):
+            if infiles:
+                raise(f'Error parsing {filename}:{lineno}, got start of array when already parsing files list')
+            if inlist:
+                print(f'Error parsing {filename}:{lineno}, got start of array when already parsing array list')
+            inlist = True
+            indent = 0
+            while line[indent] == ' ':
+                indent += 1
+            indent += 8  # double indent required
+        elif infiles and (line.endswith(')') or line.strip().startswith(')')):
+            infiles = False
+            continue
+        elif inlist and line.endswith(']') or line.strip().startswith(']'):
+            inlist = False
+            continue
+        elif inlist or infiles:
+            # skip further subarrays or lists
+            if '[' in line  or ']' in line:
+                continue
+            if not line.startswith(' ' * indent) or line[indent] == ' ':
+                print(f'Error: Incorrect indent at {filename}:{lineno + 1}')
+                contents[lineno] = (' ' * indent) + line.strip() + '\n'
+                line = contents[lineno].rstrip()
+                edit_count += 1
+            if not line.endswith(',') and '#' not in line:
+                # TODO: support stripping comment and adding ','
+                print(f'Error: Missing trailing "," in list at {filename}:{lineno + 1}')
+                contents[lineno] = line + ',\n'
+                line = contents[lineno].rstrip()
+                edit_count += 1
+    return edit_count
+
+
+def process_file(filename):
+    '''run checks on file "filename"'''
+    if VERBOSE:
+        print(f'Processing {filename}')
+    with open(filename) as f:
+        contents = f.readlines()
+
+    if check_indentation(filename, contents) > 0 and FIX:
+        print(f"Fixing {filename}")
+        with open(filename, 'w') as f:
+            f.writelines(contents)
+
+
+def main():
+    '''parse arguments and then call other functions to do work'''
+    global VERBOSE
+    global FIX
+    parser = ArgumentParser(description='Run syntax checks on DPDK meson.build files')
+    parser.add_argument('-d', metavar='directory', default='.', help='Directory to process')
+    parser.add_argument('--fix', action='store_true', help='Attempt to fix errors')
+    parser.add_argument('-v', action='store_true', help='Verbose output')
+    args = parser.parse_args()
+
+    VERBOSE = args.v
+    FIX = args.fix
+    for f in scan_dir(args.d):
+        process_file(f)
+
+if __name__ == "__main__":
+    main()