diff --git a/Makefile b/Makefile index 15704319..3b4d6654 100644 --- a/Makefile +++ b/Makefile @@ -116,7 +116,7 @@ progress: .PHONY: spellcheck spellcheck: - $(PYTHON) scripts/check_spelling.py $(filter-out $@, $(MAKECMDGOALS)) + $(PYTHON) scripts/check_spelling.py --print-errors $(filter-out $@, $(MAKECMDGOALS)) .PHONY: lint lint: diff --git a/scripts/check_spelling.py b/scripts/check_spelling.py index 3b55c155..b532f88e 100644 --- a/scripts/check_spelling.py +++ b/scripts/check_spelling.py @@ -1,9 +1,17 @@ """ Script to check the spelling of one, many or all .po files based on the custom dictionaries under the 'dictionaries/' directory. + +Gives the option to print the detected errors and add new entries to the dictionary file. + +Display information about usage with `python scripts/check_spelling.py --help` """ +import argparse +import contextlib +import functools import os +import multiprocessing from pathlib import Path import sys import tempfile @@ -11,22 +19,92 @@ import pospell -def check_spell(po_files=None): +def main(): + parser = create_parser() + args = parser.parse_args() + + errors = check_spell(args.po_files) + + if args.print_errors: + print_errors(errors) + + if args.write_entries: + write_new_entries({e[-1] for e in errors}) + + sys.exit(0 if len(errors) == 0 else -1) + + +def create_parser(): """ - Check spell in the given list of po_files and log the spell errors details. + Creates and configures the command line argument parser. + + returns: + - argparse.ArgumentParser: the argument parser containing the passed arguments and flags. + """ + parser = argparse.ArgumentParser( + usage="python check_spelling.py [options]", + description="spell-check translated .po files and add new entries to the dictionary if needed.", + ) + + parser.add_argument( + "-p", + "--print-errors", + action="store_true", + dest="print_errors", + help="print the detected errors of the spell-check", + ) + parser.add_argument( + "-w", + "--write-entries", + action="store_true", + dest="write_entries", + help="write the new detected entries in the dictionary file", + ) + parser.add_argument( + "-f", + "--po-files", + dest="po_files", + nargs="*", + default=list(), + help="list of .po files to spell-check, if not given checks all po files", + ) + + return parser - If no po_files are given, check spell in all files. + +def check_spell(po_files): + """ + Check spell in the given list of po_files. args: po_files: list of po_files paths. returns: - - int: spell errors count. + - list: list of tuples containing detected errors. + """ + entries = read_dictionary_entries() + + with write_entries_to_tmp_file(entries) as named_tmp_file: + # Run pospell either against all files or the files given on the command line + if len(po_files) == 0: + po_files = Path(".").glob("*/*.po") + + detected_errors = detect_errors(po_files, named_tmp_file.name) + return detected_errors + + +def read_dictionary_entries(): + """ + Read the entries in the dictionary files under `dictionaries` directory. + + returns: + - set: a set of string entries """ - # Read custom dictionaries entries = set() - for filename in Path("dictionaries").glob("*.txt"): + dictionaries = Path("dictionaries").glob("*.txt") + + for filename in dictionaries: with open(filename, "r") as f: entries.update( stripped_line @@ -34,7 +112,20 @@ def check_spell(po_files=None): if stripped_line ) - # Write merged dictionary file + return entries + + +@contextlib.contextmanager +def write_entries_to_tmp_file(entries): + """ + Write the given entries to a named temporary file and yield the file. + + args: + entries: a set of entries (strings) to write to the temporary file. + + returns: + - tempfile.NamedTemporaryFile: the temporary file with the given entries. + """ with tempfile.NamedTemporaryFile(suffix="_merged_dict.txt") as named_tmp_file: for e in entries: named_tmp_file.write(f"{e}\n".encode()) @@ -44,17 +135,97 @@ def check_spell(po_files=None): named_tmp_file.seek(0) - # Run pospell either against all files or the file given on the command line - if not po_files: - po_files = Path(".").glob("*/*.po") + yield named_tmp_file + + +# Clone of pospell.spell_check tailored to current needs. +# source: https://git.afpy.org/AFPy/pospell/src/branch/main/pospell.py +def detect_errors(po_files, personal_dict): + """ + Check for spelling mistakes in the given po_files. + + args: + po_files: list of strings or Path objects pointing to po files. + personal_dict: name of file containing dictionary entries. + + returns: + - list: a list of tuples with the detected errors + """ + # Pool.__exit__ calls terminate() instead of close(), we need the latter, + # which ensures the processes' atexit handlers execute fully, which in + # turn lets coverage write the sub-processes' coverage information + jobs = os.cpu_count() + pool = multiprocessing.Pool(jobs) - detected_errors = pospell.spell_check( - po_files, personal_dict=named_tmp_file.name, language="el_GR" + try: + input_lines = pospell.flatten( + pool.map( + functools.partial(pospell.po_to_text, drop_capitalized=False), + po_files, + ) ) - return detected_errors + + if not input_lines: + return [] + + # Distribute input lines across workers + lines_per_job = (len(input_lines) + jobs - 1) // jobs + chunked_inputs = [ + input_lines[i : i + lines_per_job] + for i in range(0, len(input_lines), lines_per_job) + ] + errors = pospell.flatten( + pool.map( + functools.partial(pospell.run_hunspell, "el_GR", personal_dict), + chunked_inputs, + ) + ) + finally: + pool.close() + pool.join() + + return errors + + +def print_errors(errors): + """ + Print the given errors with the following format: + filename:linenumber:word + + args: + errors: list of tuples with detected errors. + """ + if len(errors) > 0: + print("\nDetected errors:") + + for error in errors: + print("\t" + ":".join(map(str, error))) + + +def write_new_entries(new_entries): + """ + Write the given entries to the dictionary file respecting the + alphabetical sorting. + + args: + new_entries: set of entries (strings) to write to the dictionary file. + """ + entries = read_dictionary_entries() + + entries.update(new_entries) + entries = list(entries) + entries.sort() + + with open(Path("dictionaries", "main.txt"), "w") as file: + for e in entries: + file.write(e+"\n") + + if len(new_entries) > 0: + print("\nWrote the below new entries to main.txt:") + + for e in new_entries: + print(f"\t {e}") if __name__ == "__main__": - po_files = sys.argv[1:] - errors = check_spell(po_files) - sys.exit(0 if errors == 0 else -1) + main()