455 lines
20 KiB
Python
Executable file
455 lines
20 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""
|
|
|
|
Taken from Mathlib4.
|
|
|
|
Lint a file or files from mathlib for style.
|
|
|
|
Sample usage:
|
|
|
|
$ ./scripts/lint-style.py $(find Mathlib -name '*.lean')
|
|
|
|
which will lint all of the Lean files in the specified directories.
|
|
|
|
The resulting error output will contain one line for each style error
|
|
encountered that isn't in the list of allowed / ignored style exceptions.
|
|
|
|
Paths with no errors will not appear in the output, and the script will
|
|
exit with successful return code if there are no errors encountered in
|
|
any provided paths.
|
|
|
|
Paths emitted in the output will match the paths provided on the
|
|
command line for any files containing errors -- in particular, linting
|
|
a relative path (like ``Mathlib/Foo/Bar.lean``) will produce errors
|
|
that contain the relative path, whilst linting absolute paths (like
|
|
``/root/mathlib4/Mathlib/Foo/Bar.lean``) will produce errors with the
|
|
absolute path.
|
|
|
|
This script can also be used to regenerate the list of allowed / ignored style
|
|
exceptions by redirecting the output to ``style-exceptions.txt``. Use:
|
|
|
|
$ ./scripts/update-style-exceptions.sh
|
|
|
|
to perform this update.
|
|
"""
|
|
|
|
# TODO: This is adapted from the linter for mathlib3. It should be rewritten in Lean.
|
|
|
|
from pathlib import Path
|
|
import sys
|
|
import re
|
|
import shutil
|
|
|
|
ERR_COP = 0 # copyright header
|
|
ERR_MOD = 2 # module docstring
|
|
ERR_LIN = 3 # line length
|
|
ERR_OPT = 6 # set_option
|
|
ERR_AUT = 7 # malformed authors list
|
|
ERR_TAC = 9 # imported Mathlib.Tactic
|
|
ERR_IBY = 11 # isolated by
|
|
ERR_DOT = 12 # isolated or low focusing dot
|
|
ERR_SEM = 13 # the substring " ;"
|
|
ERR_WIN = 14 # Windows line endings "\r\n"
|
|
ERR_TWS = 15 # trailing whitespace
|
|
ERR_CLN = 16 # line starts with a colon
|
|
ERR_IND = 17 # second line not correctly indented
|
|
ERR_ARR = 18 # space after "←"
|
|
ERR_NUM_LIN = 19 # file is too large
|
|
ERR_NSP = 20 # non-terminal simp
|
|
|
|
exceptions = []
|
|
|
|
SCRIPTS_DIR = Path(__file__).parent.resolve()
|
|
ROOT_DIR = SCRIPTS_DIR.parent
|
|
|
|
|
|
with SCRIPTS_DIR.joinpath("style-exceptions.txt").open(encoding="utf-8") as f:
|
|
for exline in f:
|
|
filename, _, _, _, _, errno, *extra = exline.split()
|
|
path = ROOT_DIR / filename
|
|
if errno == "ERR_COP":
|
|
exceptions += [(ERR_COP, path, None)]
|
|
elif errno == "ERR_MOD":
|
|
exceptions += [(ERR_MOD, path, None)]
|
|
elif errno == "ERR_LIN":
|
|
exceptions += [(ERR_LIN, path, None)]
|
|
elif errno == "ERR_OPT":
|
|
exceptions += [(ERR_OPT, path, None)]
|
|
elif errno == "ERR_AUT":
|
|
exceptions += [(ERR_AUT, path, None)]
|
|
elif errno == "ERR_TAC":
|
|
exceptions += [(ERR_TAC, path, None)]
|
|
elif errno == "ERR_NUM_LIN":
|
|
exceptions += [(ERR_NUM_LIN, path, extra[1])]
|
|
else:
|
|
print(f"Error: unexpected errno in style-exceptions.txt: {errno}")
|
|
sys.exit(1)
|
|
|
|
new_exceptions = False
|
|
|
|
def annotate_comments(enumerate_lines):
|
|
"""
|
|
Take a list of tuples of enumerated lines of the form
|
|
(line_number, line, ...)
|
|
and return a list of
|
|
(line_number, line, ..., True/False)
|
|
where lines have True attached when they are in comments.
|
|
"""
|
|
nesting_depth = 0 # We're in a comment when `nesting_depth > 0`.
|
|
starts_in_comment = False # Whether we're in a comment when starting the line.
|
|
for line_nr, line, *rem in enumerate_lines:
|
|
# We assume multiline comments do not begin or end within single-line comments.
|
|
if line == "\n" or line.lstrip().startswith("--"):
|
|
yield line_nr, line, *rem, True
|
|
continue
|
|
# We assume that "/-/" and "-/-" never occur outside of "--" comments.
|
|
# We assume that we do not encounter "... -/ <term> /- ...".
|
|
# We also don't account for "/-" and "-/" appearing in strings.
|
|
starts_in_comment = (nesting_depth > 0)
|
|
nesting_depth = nesting_depth + line.count("/-") - line.count("-/")
|
|
in_comment = (starts_in_comment or line.lstrip().startswith("/-")) and \
|
|
(nesting_depth > 0 or line.rstrip().endswith("-/"))
|
|
yield line_nr, line, *rem, in_comment
|
|
|
|
def annotate_strings(enumerate_lines):
|
|
"""
|
|
Take a list of tuples of enumerated lines of the form
|
|
(line_number, line, ...)
|
|
and return a list of
|
|
(line_number, line, ..., True/False)
|
|
where lines have True attached when they are in strings.
|
|
"""
|
|
in_string = False
|
|
in_comment = False
|
|
for line_nr, line, *rem in enumerate_lines:
|
|
# ignore comment markers inside string literals
|
|
if not in_string:
|
|
if "/-" in line:
|
|
in_comment = True
|
|
if "-/" in line:
|
|
in_comment = False
|
|
# ignore quotes inside comments
|
|
if not in_comment:
|
|
# crude heuristic: if the number of non-escaped quote signs is odd,
|
|
# we're starting / ending a string literal
|
|
if line.count("\"") - line.count("\\\"") % 2 == 1:
|
|
in_string = not in_string
|
|
# if there are quote signs in this line,
|
|
# a string literal probably begins and / or ends here,
|
|
# so we skip this line
|
|
if line.count("\"") > 0:
|
|
yield line_nr, line, *rem, True
|
|
continue
|
|
if in_string:
|
|
yield line_nr, line, *rem, True
|
|
continue
|
|
yield line_nr, line, *rem, False
|
|
|
|
def set_option_check(lines, path):
|
|
errors = []
|
|
newlines = []
|
|
for line_nr, line, in_comment, in_string in annotate_strings(annotate_comments(lines)):
|
|
if line.strip().startswith('set_option') and not in_comment and not in_string:
|
|
option_prefix = line.strip().split(' ', 2)[1].split('.', 1)[0]
|
|
# forbidden options: pp, profiler, trace
|
|
if option_prefix in {'pp', 'profiler', 'trace'}:
|
|
errors += [(ERR_OPT, line_nr, path)]
|
|
# skip adding this line to newlines so that we suggest removal
|
|
continue
|
|
newlines.append((line_nr, line))
|
|
return errors, newlines
|
|
|
|
def line_endings_check(lines, path):
|
|
errors = []
|
|
newlines = []
|
|
for line_nr, line in lines:
|
|
if "\r\n" in line:
|
|
errors += [(ERR_WIN, line_nr, path)]
|
|
line = line.replace("\r\n", "\n")
|
|
if line.endswith(" \n"):
|
|
errors += [(ERR_TWS, line_nr, path)]
|
|
line = line.rstrip() + "\n"
|
|
newlines.append((line_nr, line))
|
|
return errors, newlines
|
|
|
|
def four_spaces_in_second_line(lines, path):
|
|
# TODO: also fix the space for all lines before ":=", right now we only fix the line after
|
|
# the first line break
|
|
errors = []
|
|
# We never alter the first line, as it does not occur as next_line in the iteration over the
|
|
# zipped lines below, hence we add it here
|
|
newlines = [lines[0]]
|
|
annotated_lines = list(annotate_comments(lines))
|
|
for (_, line, is_comment), (next_line_nr, next_line, _) in zip(annotated_lines,
|
|
annotated_lines[1:]):
|
|
# Check if the current line matches "(lemma|theorem) .* :"
|
|
new_next_line = next_line
|
|
if (not is_comment) and re.search(r"^(protected )?(def|lemma|theorem) (?!.*:=).*(where)?$",
|
|
line):
|
|
# Calculate the number of spaces before the first non-space character in the next line
|
|
stripped_next_line = next_line.lstrip()
|
|
if not (next_line == '\n' or next_line.startswith("#") or stripped_next_line.startswith("--")):
|
|
num_spaces = len(next_line) - len(stripped_next_line)
|
|
# The match with "| " could potentially match with a different usage of the same
|
|
# symbol, e.g. some sort of norm. In that case a space is not necessary, so
|
|
# looking for "| " should be enough.
|
|
if stripped_next_line.startswith("| ") or line.endswith("where\n"):
|
|
# Check and fix if the number of leading space is not 2
|
|
if num_spaces != 2:
|
|
errors += [(ERR_IND, next_line_nr, path)]
|
|
new_next_line = ' ' * 2 + stripped_next_line
|
|
# Check and fix if the number of leading spaces is not 4
|
|
else:
|
|
if num_spaces != 4:
|
|
errors += [(ERR_IND, next_line_nr, path)]
|
|
new_next_line = ' ' * 4 + stripped_next_line
|
|
newlines.append((next_line_nr, new_next_line))
|
|
return errors, newlines
|
|
|
|
def nonterminal_simp_check(lines, path):
|
|
errors = []
|
|
newlines = []
|
|
annotated_lines = list(annotate_comments(lines))
|
|
for (line_nr, line, is_comment), (_, next_line, _) in zip(annotated_lines,
|
|
annotated_lines[1:]):
|
|
# Check if the current line matches whitespace followed by "simp"
|
|
new_line = line
|
|
# TODO it would be better to use a regex like r"^\s*simp( \[.*\])?( at .*)?$" and thereby
|
|
# catch all possible simp invocations. Adding this will require more initial cleanup or
|
|
# nolint.
|
|
if (not is_comment) and re.search(r"^\s*simp$", line):
|
|
# Calculate the number of spaces before the first non-space character in the line
|
|
num_spaces = len(line) - len(line.lstrip())
|
|
# Calculate the number of spaces before the first non-space character in the next line
|
|
stripped_next_line = next_line.lstrip()
|
|
if not (next_line == '\n' or next_line.startswith("#") or stripped_next_line.startswith("--") or "rfl" in next_line):
|
|
num_next_spaces = len(next_line) - len(stripped_next_line)
|
|
# Check if the number of leading spaces is the same
|
|
if num_spaces == num_next_spaces:
|
|
# If so, the simp is nonterminal
|
|
errors += [(ERR_NSP, line_nr, path)]
|
|
new_line = line.replace("simp", "simp?")
|
|
newlines.append((line_nr, new_line))
|
|
newlines.append(lines[-1])
|
|
return errors, newlines
|
|
|
|
def long_lines_check(lines, path):
|
|
errors = []
|
|
# TODO: find a good way to break long lines
|
|
# TODO: some string literals (in e.g. tactic output messages) can be excepted from this rule
|
|
for line_nr, line in lines:
|
|
if "http" in line or "#align" in line:
|
|
continue
|
|
if len(line) > 101:
|
|
errors += [(ERR_LIN, line_nr, path)]
|
|
return errors, lines
|
|
|
|
def import_only_check(lines, path):
|
|
for _, line, is_comment in annotate_comments(lines):
|
|
if is_comment:
|
|
continue
|
|
imports = line.split()
|
|
if imports[0] == "#align_import":
|
|
continue
|
|
if imports[0] != "import":
|
|
return False
|
|
return True
|
|
|
|
def regular_check(lines, path):
|
|
errors = []
|
|
copy_started = False
|
|
copy_done = False
|
|
copy_start_line_nr = 1
|
|
copy_lines = ""
|
|
for line_nr, line in lines:
|
|
if not copy_started and line == "\n":
|
|
errors += [(ERR_COP, copy_start_line_nr, path)]
|
|
continue
|
|
if not copy_started and line == "/-\n":
|
|
copy_started = True
|
|
copy_start_line_nr = line_nr
|
|
continue
|
|
if not copy_started:
|
|
errors += [(ERR_COP, line_nr, path)]
|
|
if copy_started and not copy_done:
|
|
copy_lines += line
|
|
if "Author" in line:
|
|
# Validating names is not a reasonable thing to do,
|
|
# so we just look for the two common variations:
|
|
# using ' and ' between names, and a '.' at the end of line.
|
|
if ((not line.startswith("Authors: ")) or
|
|
(" " in line) or
|
|
(" and " in line) or
|
|
(line[-2] == '.')):
|
|
errors += [(ERR_AUT, line_nr, path)]
|
|
if line == "-/\n":
|
|
if ((not "Copyright" in copy_lines) or
|
|
(not "Apache" in copy_lines) or
|
|
(not "Authors: " in copy_lines)):
|
|
errors += [(ERR_COP, copy_start_line_nr, path)]
|
|
copy_done = True
|
|
continue
|
|
if copy_done and line == "\n":
|
|
continue
|
|
words = line.split()
|
|
if words[0] != "import" and words[0] != "--" and words[0] != "/-!" and words[0] != "#align_import":
|
|
errors += [(ERR_MOD, line_nr, path)]
|
|
break
|
|
if words[0] == "/-!":
|
|
break
|
|
return errors, lines
|
|
|
|
def banned_import_check(lines, path):
|
|
errors = []
|
|
for line_nr, line, is_comment in annotate_comments(lines):
|
|
if is_comment:
|
|
continue
|
|
imports = line.split()
|
|
if imports[0] != "import":
|
|
break
|
|
if imports[1] in ["Mathlib.Tactic"]:
|
|
errors += [(ERR_TAC, line_nr, path)]
|
|
return errors, lines
|
|
|
|
def isolated_by_dot_semicolon_check(lines, path):
|
|
errors = []
|
|
newlines = []
|
|
for line_nr, line in lines:
|
|
if line.strip() == "by":
|
|
# We excuse those "by"s following a comma or ", fun ... =>", since generally hanging "by"s
|
|
# should not be used in the second or later arguments of a tuple/anonymous constructor
|
|
# See https://github.com/leanprover-community/mathlib4/pull/3825#discussion_r1186702599
|
|
prev_line = lines[line_nr - 2][1].rstrip()
|
|
if not prev_line.endswith(",") and not re.search(", fun [^,]* (=>|↦)$", prev_line):
|
|
errors += [(ERR_IBY, line_nr, path)]
|
|
if line.lstrip().startswith(". "):
|
|
errors += [(ERR_DOT, line_nr, path)]
|
|
line = line.replace(". ", "· ", 1)
|
|
if line.strip() in (".", "·"):
|
|
errors += [(ERR_DOT, line_nr, path)]
|
|
if " ;" in line:
|
|
errors += [(ERR_SEM, line_nr, path)]
|
|
line = line.replace(" ;", ";")
|
|
if line.lstrip().startswith(":"):
|
|
errors += [(ERR_CLN, line_nr, path)]
|
|
newlines.append((line_nr, line))
|
|
return errors, newlines
|
|
|
|
def left_arrow_check(lines, path):
|
|
errors = []
|
|
newlines = []
|
|
for line_nr, line, is_comment, in_string in annotate_strings(annotate_comments(lines)):
|
|
if is_comment or in_string:
|
|
newlines.append((line_nr, line))
|
|
continue
|
|
# Allow "←" to be followed by "%" or "`", but not by "`(" or "``(" (since "`()" and "``()"
|
|
# are used for syntax quotations). Otherwise, insert a space after "←".
|
|
new_line = re.sub(r'←(?:(?=``?\()|(?![%`]))(\S)', r'← \1', line)
|
|
if new_line != line:
|
|
errors += [(ERR_ARR, line_nr, path)]
|
|
newlines.append((line_nr, new_line))
|
|
return errors, newlines
|
|
|
|
def output_message(path, line_nr, code, msg):
|
|
if len(exceptions) == 0:
|
|
# we are generating a new exceptions file
|
|
# filename first, then line so that we can call "sort" on the output
|
|
#print(f"{path} : line {line_nr} : {code} : {msg}")
|
|
print(f"{path}:{line_nr} : {code} : {msg}")
|
|
else:
|
|
if code.startswith("ERR"):
|
|
msg_type = "error"
|
|
if code.startswith("WRN"):
|
|
msg_type = "warning"
|
|
# We are outputting for github. We duplicate path, line_nr and code,
|
|
# so that they are also visible in the plaintext output.
|
|
print(f"::{msg_type} file={path},line={line_nr},code={code}::{path}#L{line_nr}: {code}: {msg}")
|
|
|
|
def format_errors(errors):
|
|
global new_exceptions
|
|
for errno, line_nr, path in errors:
|
|
if (errno, path.resolve(), None) in exceptions:
|
|
continue
|
|
new_exceptions = True
|
|
if errno == ERR_COP:
|
|
output_message(path, line_nr, "ERR_COP", "Malformed or missing copyright header")
|
|
if errno == ERR_MOD:
|
|
output_message(path, line_nr, "ERR_MOD", "Module docstring missing, or too late")
|
|
if errno == ERR_LIN:
|
|
output_message(path, line_nr, "ERR_LIN", "Line has more than 100 characters")
|
|
if errno == ERR_OPT:
|
|
output_message(path, line_nr, "ERR_OPT", "Forbidden set_option command")
|
|
if errno == ERR_AUT:
|
|
output_message(path, line_nr, "ERR_AUT", "Authors line should look like: 'Authors: Jean Dupont, Иван Иванович Иванов'")
|
|
if errno == ERR_TAC:
|
|
output_message(path, line_nr, "ERR_TAC", "Files in mathlib cannot import the whole tactic folder")
|
|
if errno == ERR_IBY:
|
|
output_message(path, line_nr, "ERR_IBY", "Line is an isolated 'by'")
|
|
if errno == ERR_DOT:
|
|
output_message(path, line_nr, "ERR_DOT", "Line is an isolated focusing dot or uses . instead of ·")
|
|
if errno == ERR_SEM:
|
|
output_message(path, line_nr, "ERR_SEM", "Line contains a space before a semicolon")
|
|
if errno == ERR_WIN:
|
|
output_message(path, line_nr, "ERR_WIN", "Windows line endings (\\r\\n) detected")
|
|
if errno == ERR_TWS:
|
|
output_message(path, line_nr, "ERR_TWS", "Trailing whitespace detected on line")
|
|
if errno == ERR_CLN:
|
|
output_message(path, line_nr, "ERR_CLN", "Put : and := before line breaks, not after")
|
|
if errno == ERR_IND:
|
|
output_message(path, line_nr, "ERR_IND", "If the theorem/def statement requires multiple lines, indent it correctly (4 spaces or 2 for `|`)")
|
|
if errno == ERR_ARR:
|
|
output_message(path, line_nr, "ERR_ARR", "Missing space after '←'.")
|
|
if errno == ERR_NSP:
|
|
output_message(path, line_nr, "ERR_NSP", "Non-terminal simp. Replace with `simp?` and use the suggested output")
|
|
|
|
def lint(path, fix=False):
|
|
global new_exceptions
|
|
with path.open(encoding="utf-8", newline="") as f:
|
|
# We enumerate the lines so that we can report line numbers in the error messages correctly
|
|
# we will modify lines as we go, so we need to keep track of the original line numbers
|
|
lines = f.readlines()
|
|
enum_lines = enumerate(lines, 1)
|
|
newlines = enum_lines
|
|
for error_check in [line_endings_check,
|
|
four_spaces_in_second_line,
|
|
long_lines_check,
|
|
isolated_by_dot_semicolon_check,
|
|
set_option_check,
|
|
left_arrow_check,
|
|
nonterminal_simp_check]:
|
|
errs, newlines = error_check(newlines, path)
|
|
format_errors(errs)
|
|
|
|
if not import_only_check(newlines, path):
|
|
# Check for too long files: either longer than 1500 lines, or not covered by an exception.
|
|
# Each exception contains a "watermark". If the file is longer than that, we also complain.
|
|
if len(lines) > 1500:
|
|
ex = [e for e in exceptions if e[1] == path.resolve()]
|
|
if ex:
|
|
(_ERR_NUM, _path, watermark) = list(ex)[0]
|
|
assert int(watermark) > 500 # protect against parse error
|
|
is_too_long = len(lines) > int(watermark)
|
|
else:
|
|
is_too_long = True
|
|
if is_too_long:
|
|
new_exceptions = True
|
|
# add up to 200 lines of slack, so simple PRs don't trigger this right away
|
|
watermark = len(lines) // 100 * 100 + 200
|
|
output_message(path, 1, "ERR_NUM_LIN", f"{watermark} file contains {len(lines)} lines, try to split it up")
|
|
errs, newlines = regular_check(newlines, path)
|
|
format_errors(errs)
|
|
errs, newlines = banned_import_check(newlines, path)
|
|
format_errors(errs)
|
|
# if we haven't been asked to fix errors, or there are no errors or no fixes, we're done
|
|
if fix and new_exceptions and enum_lines != newlines:
|
|
path.with_name(path.name + '.bak').write_text("".join(l for _,l in newlines), encoding = "utf8")
|
|
shutil.move(path.with_name(path.name + '.bak'), path)
|
|
|
|
fix = "--fix" in sys.argv
|
|
argv = (arg for arg in sys.argv[1:] if arg != "--fix")
|
|
|
|
for filename in argv:
|
|
lint(Path(filename), fix=fix)
|
|
|
|
if new_exceptions:
|
|
exit(1)
|