diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 491371d902..bc6b9109d8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,5 @@ --- -files: ^(.*\.(py|json|md|sh|yaml|yml|in|cfg|txt|rst|toml|precommit-toml))$ +files: ^(.*\.(py|json|md|sh|yaml|yml|in|cfg|txt|rst|toml|precommit-toml|wordlist))$ exclude: ^(\.[^/]*cache/.*)$ repos: - repo: https://github.com/executablebooks/mdformat @@ -46,6 +46,9 @@ repos: - id: fix-byte-order-marker - id: check-case-conflict - id: check-toml + - id: file-contents-sorter + files: dictionary.*\.txt$|\.wordlist$ + args: [--ignore-case] - repo: https://github.com/adrienverge/yamllint.git rev: v1.32.0 hooks: diff --git a/Makefile b/Makefile index 0c239e5938..d05f425b63 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,4 @@ -SORT_ARGS := -f -b - -DICTIONARIES := codespell_lib/data/dictionary*.txt +DICTIONARIES := codespell_lib/data/dictionary*.txt codespell_lib/tests/data/*.wordlist PHONY := all check check-dictionaries sort-dictionaries trim-dictionaries check-dist pytest pypi ruff clean @@ -14,10 +12,6 @@ codespell.1: codespell.1.include Makefile check-dictionaries: @for dictionary in ${DICTIONARIES}; do \ - if ! LC_ALL=C sort ${SORT_ARGS} -c $$dictionary; then \ - echo "Dictionary $$dictionary not sorted. Sort with 'make sort-dictionaries'"; \ - exit 1; \ - fi; \ if grep -E -n "^\s*$$|\s$$|^\s" $$dictionary; then \ echo "Dictionary $$dictionary contains leading/trailing whitespace and/or blank lines. Trim with 'make trim-dictionaries'"; \ exit 1; \ @@ -31,9 +25,7 @@ check-dictionaries: fi sort-dictionaries: - @for dictionary in ${DICTIONARIES}; do \ - LC_ALL=C sort ${SORT_ARGS} -u -o $$dictionary $$dictionary; \ - done + pre-commit run --all-files file-contents-sorter trim-dictionaries: @for dictionary in ${DICTIONARIES}; do \ diff --git a/codespell_lib/data/dictionary.txt b/codespell_lib/data/dictionary.txt index f2bad8f511..a54e70d18c 100644 --- a/codespell_lib/data/dictionary.txt +++ b/codespell_lib/data/dictionary.txt @@ -5,6 +5,9 @@ 3rt->3rd 3st->3rd 4rd->4th +__attribyte__->__attribute__ +__cpluspus->__cplusplus +__cpusplus->__cplusplus a-diaerers->a-diaereses aaccess->access aaccessibility->accessibility @@ -8230,9 +8233,9 @@ cllouds->clouds cloack->cloak cloacks->cloaks cloberring->clobbering +clock_getttime->clock_gettime clocksourc->clocksource clockwíse->clockwise -clock_getttime->clock_gettime clodes->closed, clothes, cloding->closing cloes->close @@ -11247,8 +11250,8 @@ cought->caught, cough, fought, coul->could could'nt->couldn't could't->couldn't -couldent->couldn't coulden`t->couldn't +couldent->couldn't couldn->could, couldn't, couldn;t->couldn't couldnt'->couldn't @@ -31326,6 +31329,7 @@ phsyically->physically phsyics->physics phtread->pthread phtreads->pthreads +phy_interace->phy_interface phyiscal->physical phyiscally->physically phyiscs->physics @@ -31352,7 +31356,6 @@ physisions->physicians physisist->physicist phython->python phyton->python -phy_interace->phy_interface piar->pair, pier, pliers, piars->pairs, piers, pliers, piblisher->publisher @@ -47125,8 +47128,8 @@ woudl->would woudn't->wouldn't would'nt->wouldn't would't->wouldn't -wouldent->wouldn't woulden`t->wouldn't +wouldent->wouldn't wouldn;t->wouldn't wouldnt'->wouldn't wouldnt->wouldn't @@ -47370,9 +47373,6 @@ zukeeni->zucchini zuser->user zylophone->xylophone zylophones->xylophones -__attribyte__->__attribute__ -__cpluspus->__cplusplus -__cpusplus->__cplusplus évaluate->evaluate сontain->contain сontained->contained diff --git a/codespell_lib/tests/test_dictionary.py b/codespell_lib/tests/test_dictionary.py index 059b7f46a4..07b8e868e5 100644 --- a/codespell_lib/tests/test_dictionary.py +++ b/codespell_lib/tests/test_dictionary.py @@ -1,6 +1,7 @@ import glob import os import os.path as op +import pathlib import re import warnings from typing import Any, Dict, Iterable, Optional, Set, Tuple @@ -11,6 +12,8 @@ spellers = {} +root = pathlib.Path(__file__).parent.parent + try: import aspell # type: ignore[import] @@ -77,6 +80,24 @@ def test_dictionary_formatting( raise AssertionError("\n" + "\n".join(errors)) +@pytest.mark.parametrize( + "filename", + [ + *(root / "data").rglob("dictionary*.txt"), + *(root / "tests/data").rglob("*.wordlist"), + ], +) +def test_dictionary_sorting(filename: pathlib.Path) -> None: + relative_path = filename.relative_to(root) + previous_line = None + with filename.open(encoding="utf-8") as file: + for current_line in file: + current_line = current_line.strip().lower() + if previous_line is not None: + assert previous_line < current_line, f"{relative_path} is not sorted" + previous_line = current_line + + def _check_aspell( phrase: str, msg: str,