From 212f0d3c3f250817cd20ce0798b537954ba219d9 Mon Sep 17 00:00:00 2001 From: Kurt McKee Date: Wed, 26 Jul 2023 19:28:58 -0500 Subject: [PATCH 1/3] Use pre-commit to sort the dictionaries This changes the sort order of specific characters (like `"_"`) but is cross-platform and can be fixed by pre-commit.ci. Closes #2689 --- .pre-commit-config.yaml | 3 +++ Makefile | 8 +------- codespell_lib/data/dictionary.txt | 14 +++++++------- codespell_lib/tests/test_dictionary.py | 15 +++++++++++++++ 4 files changed, 26 insertions(+), 14 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 491371d902..08f82000d9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -46,6 +46,9 @@ repos: - id: fix-byte-order-marker - id: check-case-conflict - id: check-toml + - id: file-contents-sorter + files: dictionary.*\.txt$ + args: [--ignore-case] - repo: https://github.com/adrienverge/yamllint.git rev: v1.32.0 hooks: diff --git a/Makefile b/Makefile index 0c239e5938..18cf94cf2b 100644 --- a/Makefile +++ b/Makefile @@ -14,10 +14,6 @@ codespell.1: codespell.1.include Makefile check-dictionaries: @for dictionary in ${DICTIONARIES}; do \ - if ! LC_ALL=C sort ${SORT_ARGS} -c $$dictionary; then \ - echo "Dictionary $$dictionary not sorted. Sort with 'make sort-dictionaries'"; \ - exit 1; \ - fi; \ if grep -E -n "^\s*$$|\s$$|^\s" $$dictionary; then \ echo "Dictionary $$dictionary contains leading/trailing whitespace and/or blank lines. Trim with 'make trim-dictionaries'"; \ exit 1; \ @@ -31,9 +27,7 @@ check-dictionaries: fi sort-dictionaries: - @for dictionary in ${DICTIONARIES}; do \ - LC_ALL=C sort ${SORT_ARGS} -u -o $$dictionary $$dictionary; \ - done + pre-commit run --all-files file-contents-sorter trim-dictionaries: @for dictionary in ${DICTIONARIES}; do \ diff --git a/codespell_lib/data/dictionary.txt b/codespell_lib/data/dictionary.txt index f2bad8f511..a54e70d18c 100644 --- a/codespell_lib/data/dictionary.txt +++ b/codespell_lib/data/dictionary.txt @@ -5,6 +5,9 @@ 3rt->3rd 3st->3rd 4rd->4th +__attribyte__->__attribute__ +__cpluspus->__cplusplus +__cpusplus->__cplusplus a-diaerers->a-diaereses aaccess->access aaccessibility->accessibility @@ -8230,9 +8233,9 @@ cllouds->clouds cloack->cloak cloacks->cloaks cloberring->clobbering +clock_getttime->clock_gettime clocksourc->clocksource clockwíse->clockwise -clock_getttime->clock_gettime clodes->closed, clothes, cloding->closing cloes->close @@ -11247,8 +11250,8 @@ cought->caught, cough, fought, coul->could could'nt->couldn't could't->couldn't -couldent->couldn't coulden`t->couldn't +couldent->couldn't couldn->could, couldn't, couldn;t->couldn't couldnt'->couldn't @@ -31326,6 +31329,7 @@ phsyically->physically phsyics->physics phtread->pthread phtreads->pthreads +phy_interace->phy_interface phyiscal->physical phyiscally->physically phyiscs->physics @@ -31352,7 +31356,6 @@ physisions->physicians physisist->physicist phython->python phyton->python -phy_interace->phy_interface piar->pair, pier, pliers, piars->pairs, piers, pliers, piblisher->publisher @@ -47125,8 +47128,8 @@ woudl->would woudn't->wouldn't would'nt->wouldn't would't->wouldn't -wouldent->wouldn't woulden`t->wouldn't +wouldent->wouldn't wouldn;t->wouldn't wouldnt'->wouldn't wouldnt->wouldn't @@ -47370,9 +47373,6 @@ zukeeni->zucchini zuser->user zylophone->xylophone zylophones->xylophones -__attribyte__->__attribute__ -__cpluspus->__cplusplus -__cpusplus->__cplusplus évaluate->evaluate сontain->contain сontained->contained diff --git a/codespell_lib/tests/test_dictionary.py b/codespell_lib/tests/test_dictionary.py index 059b7f46a4..8b41ad52ab 100644 --- a/codespell_lib/tests/test_dictionary.py +++ b/codespell_lib/tests/test_dictionary.py @@ -77,6 +77,21 @@ def test_dictionary_formatting( raise AssertionError("\n" + "\n".join(errors)) +@fname_params +def test_dictionary_sorting( + fname: str, + in_aspell: Tuple[bool, bool], + in_dictionary: Tuple[Iterable[str], Iterable[str]], +) -> None: + previous_line = None + with open(fname, encoding="utf-8") as file: + for current_line in file: + current_line = current_line.strip().lower() + if previous_line is not None: + assert previous_line < current_line, f"{fname} is not sorted" + previous_line = current_line + + def _check_aspell( phrase: str, msg: str, From 10757a84024d0ae5c2709bbad3c23b2e0a8c5f08 Mon Sep 17 00:00:00 2001 From: Kurt McKee Date: Thu, 27 Jul 2023 08:33:02 -0500 Subject: [PATCH 2/3] Feedback: Remove `SORT_ARGS`, which is now unused --- Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/Makefile b/Makefile index 18cf94cf2b..9c7e9b8f98 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,3 @@ -SORT_ARGS := -f -b - DICTIONARIES := codespell_lib/data/dictionary*.txt PHONY := all check check-dictionaries sort-dictionaries trim-dictionaries check-dist pytest pypi ruff clean From 732d0b9c41ca6737fb7a7a86da2c9bbe8acabc51 Mon Sep 17 00:00:00 2001 From: Kurt McKee Date: Thu, 27 Jul 2023 08:52:05 -0500 Subject: [PATCH 3/3] Feedback: Sort additional `.wordlist` files, too --- .pre-commit-config.yaml | 4 ++-- Makefile | 2 +- codespell_lib/tests/test_dictionary.py | 22 ++++++++++++++-------- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 08f82000d9..bc6b9109d8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,5 @@ --- -files: ^(.*\.(py|json|md|sh|yaml|yml|in|cfg|txt|rst|toml|precommit-toml))$ +files: ^(.*\.(py|json|md|sh|yaml|yml|in|cfg|txt|rst|toml|precommit-toml|wordlist))$ exclude: ^(\.[^/]*cache/.*)$ repos: - repo: https://github.com/executablebooks/mdformat @@ -47,7 +47,7 @@ repos: - id: check-case-conflict - id: check-toml - id: file-contents-sorter - files: dictionary.*\.txt$ + files: dictionary.*\.txt$|\.wordlist$ args: [--ignore-case] - repo: https://github.com/adrienverge/yamllint.git rev: v1.32.0 diff --git a/Makefile b/Makefile index 9c7e9b8f98..d05f425b63 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -DICTIONARIES := codespell_lib/data/dictionary*.txt +DICTIONARIES := codespell_lib/data/dictionary*.txt codespell_lib/tests/data/*.wordlist PHONY := all check check-dictionaries sort-dictionaries trim-dictionaries check-dist pytest pypi ruff clean diff --git a/codespell_lib/tests/test_dictionary.py b/codespell_lib/tests/test_dictionary.py index 8b41ad52ab..07b8e868e5 100644 --- a/codespell_lib/tests/test_dictionary.py +++ b/codespell_lib/tests/test_dictionary.py @@ -1,6 +1,7 @@ import glob import os import os.path as op +import pathlib import re import warnings from typing import Any, Dict, Iterable, Optional, Set, Tuple @@ -11,6 +12,8 @@ spellers = {} +root = pathlib.Path(__file__).parent.parent + try: import aspell # type: ignore[import] @@ -77,18 +80,21 @@ def test_dictionary_formatting( raise AssertionError("\n" + "\n".join(errors)) -@fname_params -def test_dictionary_sorting( - fname: str, - in_aspell: Tuple[bool, bool], - in_dictionary: Tuple[Iterable[str], Iterable[str]], -) -> None: +@pytest.mark.parametrize( + "filename", + [ + *(root / "data").rglob("dictionary*.txt"), + *(root / "tests/data").rglob("*.wordlist"), + ], +) +def test_dictionary_sorting(filename: pathlib.Path) -> None: + relative_path = filename.relative_to(root) previous_line = None - with open(fname, encoding="utf-8") as file: + with filename.open(encoding="utf-8") as file: for current_line in file: current_line = current_line.strip().lower() if previous_line is not None: - assert previous_line < current_line, f"{fname} is not sorted" + assert previous_line < current_line, f"{relative_path} is not sorted" previous_line = current_line