From 3aa8d69ab74555a0c6ca1ea87e42a9f4957e72f3 Mon Sep 17 00:00:00 2001 From: "Colin B. Macdonald" Date: Fri, 17 May 2024 14:31:33 -0700 Subject: [PATCH 01/18] CI: v4 of checkout to avoid node warnings --- .github/workflows/test_quick.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test_quick.yml b/.github/workflows/test_quick.yml index 31d9d42da..d3c1d2bfe 100644 --- a/.github/workflows/test_quick.yml +++ b/.github/workflows/test_quick.yml @@ -3,7 +3,7 @@ name: Test quick on: pull_request: branches: [main] - + workflow_dispatch: jobs: @@ -22,9 +22,9 @@ jobs: steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - uses: actions/setup-python@v2 - + - name: test_quick env: From 6870ade90b15099b7d52d8188e6282c04a9905aa Mon Sep 17 00:00:00 2001 From: "Colin B. Macdonald" Date: Fri, 17 May 2024 14:33:17 -0700 Subject: [PATCH 02/18] CI: Add a codespell job --- .codespell-ignorelines | 10 ++++++++++ .codespell-ignorewords | 10 ++++++++++ .github/workflows/test_quick.yml | 12 ++++++++++++ 3 files changed, 32 insertions(+) create mode 100644 .codespell-ignorelines create mode 100644 .codespell-ignorewords diff --git a/.codespell-ignorelines b/.codespell-ignorelines new file mode 100644 index 000000000..776816d39 --- /dev/null +++ b/.codespell-ignorelines @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: FSFAP +# Copyright (C) 2024 Colin B. Macdonald +# +# Copying and distribution of this file, with or without modification, +# are permitted in any medium without royalty provided the copyright +# notice and this notice are preserved. This file is offered as-is, +# without any warranty. + +# lines that codespell should ignore: whitespace matters! + diff --git a/.codespell-ignorewords b/.codespell-ignorewords new file mode 100644 index 000000000..1de8d1be2 --- /dev/null +++ b/.codespell-ignorewords @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: FSFAP +# Copyright (C) 2024 Colin B. Macdonald +# +# Copying and distribution of this file, with or without modification, +# are permitted in any medium without royalty provided the copyright +# notice and this notice are preserved. This file is offered as-is, +# without any warranty. + +# words that codespell should not complain about + diff --git a/.github/workflows/test_quick.yml b/.github/workflows/test_quick.yml index d3c1d2bfe..a68689190 100644 --- a/.github/workflows/test_quick.yml +++ b/.github/workflows/test_quick.yml @@ -38,3 +38,15 @@ jobs: inputs_wheels_windows_auto: "1" run: python scripts/gh_release.py + + + codespell: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: codespell-project/actions-codespell@v2 + with: + ignore_words_file: .codespell-ignorewords + exclude_file: .codespell-ignorelines + check_filenames: true + check_hidden: true From 873321d8865bd74c21a606984c35cdc54fec6320 Mon Sep 17 00:00:00 2001 From: "Colin B. Macdonald" Date: Fri, 17 May 2024 15:43:52 -0700 Subject: [PATCH 03/18] Fix some spelling errors --- .codespell-ignorewords | 2 +- src/utils.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.codespell-ignorewords b/.codespell-ignorewords index 1de8d1be2..f16a3377e 100644 --- a/.codespell-ignorewords +++ b/.codespell-ignorewords @@ -7,4 +7,4 @@ # without any warranty. # words that codespell should not complain about - +re-use diff --git a/src/utils.py b/src/utils.py index b4fdd43ea..7fca326f0 100644 --- a/src/utils.py +++ b/src/utils.py @@ -582,7 +582,7 @@ def get_textpage_ocr( Args: flags: (int) control content becoming part of the result. - language: (str) specify expected language(s). Deafault is "eng" (English). + language: (str) specify expected language(s). Default is "eng" (English). dpi: (int) resolution in dpi, default 72. full: (bool) whether to OCR the full page image, or only its images (default) """ @@ -1106,7 +1106,7 @@ def set_toc_item( (str) the new title. No change if None. to: (point-like) destination on the target page. If omitted, (72, 36) - will be used as taget coordinates. + will be used as target coordinates. filename: (str) destination filename, required for pymupdf.LINK_GOTOR and pymupdf.LINK_LAUNCH. @@ -1373,7 +1373,7 @@ def set_toc( # ------------------------------------------------------------------------------ olitems = [{"count": 0, "first": -1, "last": -1, "xref": xref[0]}] # ------------------------------------------------------------------------------ - # build olitems as a list of PDF-like connnected dictionaries + # build olitems as a list of PDF-like connected dictionaries # ------------------------------------------------------------------------------ for i in range(toclen): o = toc[i] @@ -5106,7 +5106,7 @@ def set_page_labels(doc, labels): # William Chapman, 2021-01-06 def create_label_str(label): - """Convert Python label dict to correspnding PDF rule string. + """Convert Python label dict to corresponding PDF rule string. Args: label: (dict) build rule for the label. @@ -5194,7 +5194,7 @@ def recover_bbox_quad(line_dir: tuple, span: dict, bbox: tuple) -> pymupdf.Quad: d = span["ascender"] - span["descender"] height = d * span["size"] # the quad's rectangle height - # The following are distances from the bbox corners, at wich we find the + # The following are distances from the bbox corners, at which we find the # respective quad points. The computation depends on in which quadrant # the text writing angle is located. hs = height * sin From dd409af058b29412ef79af5c4aac03facc40c2be Mon Sep 17 00:00:00 2001 From: "Colin B. Macdonald" Date: Fri, 17 May 2024 15:48:14 -0700 Subject: [PATCH 04/18] Skip pdf files --- .codespell-ignorewords | 1 + .github/workflows/test_quick.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/.codespell-ignorewords b/.codespell-ignorewords index f16a3377e..a2a6732cd 100644 --- a/.codespell-ignorewords +++ b/.codespell-ignorewords @@ -8,3 +8,4 @@ # words that codespell should not complain about re-use +flate diff --git a/.github/workflows/test_quick.yml b/.github/workflows/test_quick.yml index a68689190..9821867d2 100644 --- a/.github/workflows/test_quick.yml +++ b/.github/workflows/test_quick.yml @@ -50,3 +50,4 @@ jobs: exclude_file: .codespell-ignorelines check_filenames: true check_hidden: true + skip: *.pdf From 2eb1abfc0cc6ac7debb8abed8a0b2c58924d0d67 Mon Sep 17 00:00:00 2001 From: "Colin B. Macdonald" Date: Fri, 17 May 2024 15:50:22 -0700 Subject: [PATCH 05/18] Fix typos --- scripts/test.py | 4 ++-- setup.py | 8 ++++---- src/extra.i | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/scripts/test.py b/scripts/test.py index ed74c676c..62e0e291e 100755 --- a/scripts/test.py +++ b/scripts/test.py @@ -58,7 +58,7 @@ Location of local mupdf/ directory or 'git:...' to be used when building PyMuPDF. [This sets environment variable PYMUPDF_SETUP_MUPDF_BUILD, which is used by PyMuPDF/setup.py. If not - specifed PyMuPDF will download its default mupdf .tgz.] + specified PyMuPDF will download its default mupdf .tgz.] -p Set pytest options; default is ''. -t @@ -422,7 +422,7 @@ def test( elif gdb: command = f'{python} {pymupdf_dir_rel}/tests/run_compound.py{run_compound_args} gdb --args {python} -m pytest {pytest_options} {pytest_arg}' elif platform.system() == 'Windows': - # `python -m pytest` doesn' seem to work. + # `python -m pytest` doesn't seem to work. command = f'{python} {pymupdf_dir_rel}/tests/run_compound.py{run_compound_args} pytest {pytest_options} {pytest_arg}' else: # On OpenBSD `pip install pytest` doesn't seem to install the pytest diff --git a/setup.py b/setup.py index e2b426015..ad2375167 100755 --- a/setup.py +++ b/setup.py @@ -365,7 +365,7 @@ def get_git_id( directory): def get_mupdf_internal(out, location=None, sha=None, local_tgz=None): ''' Gets MuPDF as either a .tgz or a local directory. - + Args: out: Either 'dir' (we return name of local directory containing mupdf) or 'tgz' (we return @@ -373,8 +373,8 @@ def get_mupdf_internal(out, location=None, sha=None, local_tgz=None): location: First, if None we set to hard-coded default URL or git location. If starts with 'git:', should be remote git location. - Otherwise if containg '://' should be URL for .tgz. - Otherwise shuld path of local mupdf checkout. + Otherwise if containing '://' should be URL for .tgz. + Otherwise should be path of local mupdf checkout. sha: If not None and we use git clone, we checkout this sha. local_tgz: @@ -668,7 +668,7 @@ def env_add(env, name, value, sep=' ', prepend=False, verbose=False): ''' Appends/prepends `` to `env[name]`. - If `name` is not in `env`, we use os.environ[nane] if it exists. + If `name` is not in `env`, we use os.environ[name] if it exists. ''' v = env.get(name) if verbose: diff --git a/src/extra.i b/src/extra.i index 0b8c0ea53..6748fa2e0 100644 --- a/src/extra.i +++ b/src/extra.i @@ -732,7 +732,7 @@ static int DICT_SETITEMSTR_DROP(PyObject *dict, const char *key, PyObject *value //----------------------------------------------------------------------------- -// Functions converting betwenn PySequences and pymupdf geometry objects +// Functions converting between PySequences and pymupdf geometry objects //----------------------------------------------------------------------------- static int jm_init_item(PyObject* obj, Py_ssize_t idx, int* result) @@ -2300,7 +2300,7 @@ void JM_print_stext_page_as_text(mupdf::FzBuffer& res, mupdf::FzStextPage& page) #define CLIP_PATH 3 #define CLIP_STROKE_PATH 4 -// Every scissor of a clip is a sub rectangle of the preceeding clip +// Every scissor of a clip is a sub rectangle of the preceding clip // scissor if the clip level is larger. static fz_rect compute_scissor(jm_lineart_device *dev) { From 812b8ea53ab36d217b2c449d58cf014aa9916dcf Mon Sep 17 00:00:00 2001 From: "Colin B. Macdonald" Date: Fri, 17 May 2024 15:52:20 -0700 Subject: [PATCH 06/18] quotes on globs --- .github/workflows/test_quick.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_quick.yml b/.github/workflows/test_quick.yml index 9821867d2..2e3ed0bd5 100644 --- a/.github/workflows/test_quick.yml +++ b/.github/workflows/test_quick.yml @@ -50,4 +50,4 @@ jobs: exclude_file: .codespell-ignorelines check_filenames: true check_hidden: true - skip: *.pdf + skip: "*.pdf" From 5fefa539923cf45eef83904718de899a43b12c93 Mon Sep 17 00:00:00 2001 From: "Colin B. Macdonald" Date: Fri, 17 May 2024 16:06:27 -0700 Subject: [PATCH 07/18] Fix spelling errors or typos --- changes.txt | 50 +++++++++++++++---------------- docs/_static/custom.css | 4 +-- docs/annot.rst | 2 +- docs/footer.rst | 2 +- docs/glossary.rst | 2 +- docs/page.rst | 2 +- docs/recipes-optional-content.rst | 2 +- docs/recipes-stories.rst | 2 +- docs/recipes-text.rst | 2 +- docs/the-basics.rst | 2 +- docs/vars.rst | 2 +- src/__init__.py | 40 ++++++++++++------------- tests/test_drawings.py | 2 +- 13 files changed, 57 insertions(+), 57 deletions(-) diff --git a/changes.txt b/changes.txt index f94e01d73..f3feddaf2 100644 --- a/changes.txt +++ b/changes.txt @@ -43,7 +43,7 @@ Change Log * Fixed some problems when checking for PDF properties. * Fixed pip builds from sdist (see discussion `3360 `_: - Alpine linux docker build failing "No matching distribution found for pymupdfb==1.24.1"). + Alpine Linux docker build failing "No matching distribution found for pymupdfb==1.24.1"). **Changes in version 1.24.2 (2024-04-17)** @@ -94,13 +94,13 @@ Change Log * **Fixed** `3281 `_: Preparing metadata (pyproject.toml) did not run successfully * **Fixed** `3279 `_: PyMuPDF no longer builds in Alpine Linux - * **Fixed** `3257 `_: apply_redactions() deleting text outside of annoted box + * **Fixed** `3257 `_: apply_redactions() deleting text outside of annotated box * **Fixed** `3216 `_: AttributeError: 'Annot' object has no attribute '__del__' * **Fixed** `3207 `_: get_drawings's items is missing line from h path operator * **Fixed** `3201 `_: Memory leaks when merging PDFs * **Fixed** `3197 `_: page.get_text() returns hexadecimal text for some characters * **Fixed** `3196 `_: Remove text not working in 1.23.25 version vs 1.20.2 - * **Fixed** `3172 `_: PDF's 45º lines dissapearing in png conversion + * **Fixed** `3172 `_: PDF's 45º lines disappearing in png conversion * **Fixed** `3135 `_: Do not log warnings to stdout * **Fixed** `3125 `_: get_pixmap method stuck on one page and runs forever * **Fixed** `2964 `_: There is an issue with the image generated by the page.get_pixmap() function @@ -517,7 +517,7 @@ Change Log * Bug fixes: - * **Fixed** `2556 `_: Segmentation fault at caling get_cdrawings(extended=True) + * **Fixed** `2556 `_: Segmentation fault at calling get_cdrawings(extended=True) * **Fixed** `2637 `_: Page.insert_textbox incorrectly handles the last word if it starts a new line * **Fixed** `2683 `_: Windows sdist build failure - non-quoting of path and using UNIX which command * **Fixed** `2691 `_: Page.get_textpage_ocr() bug in rebased fitz_new version @@ -797,7 +797,7 @@ Change Log * Improve ``insert_file()`` documentation. - * ``get_bboxlog()``: aded optional ``layers`` to ``get_bboxlog()``. + * ``get_bboxlog()``: added optional ``layers`` to ``get_bboxlog()``. * ``Page.get_texttrace()``: add new dictionary key ``layer``, name of Optional Content Group. * Mention use of Python venv in installation documentation. @@ -977,7 +977,7 @@ Changes to build/release process: * **Added** new constants defining the default text extraction flags for more comfortable handling. Their naming convention is like :data:`TEXTFLAGS_WORDS` for ``page.get_text("words")``. See :ref:`text_extraction_flags`. -* **Changed** :meth:`Page.annots` and :meth:`Page.widgets` to detect and prevent reloading the page (illegally) inside the iterator loops via :meth:`Document.reload_page`. Doing this brings down the interpretor. Documented clean ways to do annotation and widget mass updates within properly designed loops. +* **Changed** :meth:`Page.annots` and :meth:`Page.widgets` to detect and prevent reloading the page (illegally) inside the iterator loops via :meth:`Document.reload_page`. Doing this brings down the interpreter. Documented clean ways to do annotation and widget mass updates within properly designed loops. * **Changed** several internal utility functions to become standalone ("SWIG inline") as opposed to be part of the :ref:`Tools` class. This, among other things, increases the performance of geometry object creation. @@ -1038,11 +1038,11 @@ This patch version implements minor improvements for :ref:`Pixmap` and also some * **Fixed** `#1351 `_. Reverted code that introduced the memory growth in v1.18.15. -* **Fixed** `#1417 `_. Developped circumvention for growth of open file handles using :meth:`Document.insert_pdf`. +* **Fixed** `#1417 `_. Developed circumvention for growth of open file handles using :meth:`Document.insert_pdf`. -* **Fixed** `#1418 `_. Developped circumvention for memory growth using :meth:`Document.insert_pdf`. +* **Fixed** `#1418 `_. Developed circumvention for memory growth using :meth:`Document.insert_pdf`. -* **Fixed** `#1430 `_. Developped circumvention for mass pixmap generations of document pages. +* **Fixed** `#1430 `_. Developed circumvention for mass pixmap generations of document pages. * **Fixed** `#1433 `_. Solves a bbox error for some Type 3 font in PyMuPDF text processing. @@ -1114,7 +1114,7 @@ A new MuPDF feature is **journalling PDF updates**, which is also supported by t A third feature (unrelated to the new MuPDF version) includes the ability to detect when page **objects cover or hide each other**. It is now e.g. possible to see that text is covered by a drawing or an image. -* **Changed** terminology and meaning of important geometry concepts: Rectangles are now characterized as *finite*, *valid* or *empty*, while the definitions of these terms have also changed. Rectangles specifically are now thought of being "open": not all corners and sides are considered part of the retangle. Please do read the :ref:`Rect` section for details. +* **Changed** terminology and meaning of important geometry concepts: Rectangles are now characterized as *finite*, *valid* or *empty*, while the definitions of these terms have also changed. Rectangles specifically are now thought of being "open": not all corners and sides are considered part of the rectangle. Please do read the :ref:`Rect` section for details. * **Added** new parameter `"no_new_id"` to :meth:`Document.save` / :meth:`Document.tobytes` methods. Use it to suppress updating the second item of the document ``/ID`` which in PDF indicates that the original file has been updated. If the PDF has no ``/ID`` at all yet, then no new one will be created either. @@ -1244,7 +1244,7 @@ Focus of this version are major performance improvements of selected functions. * **Added** documentation for handling transparent image insertions, :meth:`Page.insert_image`. * **Added** :meth:`Page.get_image_rects`, an improved version of :meth:`Page.get_image_bbox`. * **Changed** :meth:`Document.delete_pages` to support various ways of specifying pages to delete. Implements `#1042 `_. -* **Changed** :meth:`Page.insert_image` to also accept the xref of an existing image in the file. This allows "copying" images between pages, and extremely fast mutiple insertions. +* **Changed** :meth:`Page.insert_image` to also accept the xref of an existing image in the file. This allows "copying" images between pages, and extremely fast multiple insertions. * **Changed** :meth:`Page.insert_image` to also accept the integer parameter ``alpha``. To be used for performance improvements. * **Changed** :meth:`Pixmap.set_alpha` to support new parameters for pre-multiplying colors with their alpha values and setting a specific color to fully transparent (e.g. white). * **Changed** :meth:`Document.embfile_add` to automatically set creation and modification date-time. Correspondingly, :meth:`Document.embfile_upd` automatically maintains modification date-time (``/ModDate`` PDF key), and :meth:`Document.embfile_info` correspondingly reports these data. In addition, the embedded file's associated "collection item" is included via its :data:`xref`. This supports the development of PDF portfolio applications. @@ -1282,7 +1282,7 @@ Focus of this version are major performance improvements of selected functions. * **Fixed** issue `#895 `_. * **Fixed** issue `#896 `_. Since v1.17.6 PyMuPDF suppresses the font subset tags and only reports the base fontname in text extraction outputs "dict" / "json" / "rawdict" / "rawjson". Now a new global parameter can request the old behaviour, :meth:`Tools.set_subset_fontnames`. * **Fixed** issue `#885 `_. Pixmap creation now also works with filenames given as ``pathlib.Paths``. -* **Changed** :meth:`Document.subset_fonts`: Text is **not rewritten** any more and should therefore **retain all its origial properties** -- like being hidden or being controlled by Optional Content mechanisms. +* **Changed** :meth:`Document.subset_fonts`: Text is **not rewritten** any more and should therefore **retain all its original properties** -- like being hidden or being controlled by Optional Content mechanisms. * **Changed** :ref:`TextWriter` output to also accept text in right to left mode (Arabian, Hebrew): :meth:`TextWriter.fill_textbox`, :meth:`TextWriter.append`. These methods now accept a new boolean parameter `right_to_left`, which is *False* by default. Implements `#897 `_. * **Changed** :meth:`TextWriter.fill_textbox` to return all lines of text, that did not fit in the given rectangle. Also changed the default of the ``warn`` parameter to no longer print a warning message in overflow situations. * **Added** a utility function :meth:`recover_quad`, which computes the quadrilateral of a span. This function can be used for correctly marking text extracted with the "dict" or "rawdict" options of :meth:`Page.get_text`. @@ -1458,8 +1458,8 @@ This is the first PyMuPDF version supporting MuPDF v1.18. The focus here is on e * **Fixed** issue `#651 `_. An upstream bug causing interpreter crashes in corner case redaction processings was fixed by backporting MuPDF changes from their development repo. * **Fixed** issue `#645 `_. Pixmap top-left coordinates can be set (again) by their own method, :meth:`Pixmap.set_origin`. * **Fixed** issue `#622 `_. :meth:`Page.insertImage` again accepts a :data:`rect_like` parameter. -* **Added** severeal new methods to improve and speed-up table of contents (TOC) handling. Among other things, TOC items can now changed or deleted individually -- without always replacing the complete TOC. Furthermore, access to some PDF page attributes is now possible without first **loading** the page. This has a very significant impact on the performance of TOC manipulation. -* **Added** an option to :meth:`Document.insert_pdf` which allows displaying progress messages. Adresses `#640 `_. +* **Added** several new methods to improve and speed-up table of contents (TOC) handling. Among other things, TOC items can now changed or deleted individually -- without always replacing the complete TOC. Furthermore, access to some PDF page attributes is now possible without first **loading** the page. This has a very significant impact on the performance of TOC manipulation. +* **Added** an option to :meth:`Document.insert_pdf` which allows displaying progress messages. Addresses `#640 `_. * **Added** :meth:`Page.getTextbox` which extracts text contained in a rectangle. In many cases, this should obsolete writing your own script for this type of thing. * **Added** new ``clip`` parameter to :meth:`Page.getText` to simplify and speed up text extraction of page sub areas. * **Added** :meth:`TextWriter.appendv` to add text in **vertical write mode**. Addresses issue `#653 `_ @@ -1538,9 +1538,9 @@ This version is based on MuPDF v1.17. Following are highlights of new and change * **Added** extended language support for annotations and widgets: a mixture of Latin, Greece, Russian, Chinese, Japanese and Korean characters can now be used in 'FreeText' annotations and text widgets. No special arrangement is required to use it. -* Faster page access is implemented for documents supporting a "chapter" structure. This applies to EPUB documents currently. This comes with several new :ref:`Document` methods and changes for :meth:`Document.loadPage` and the "indexed" page access *doc[n]*: In addition to specifying a page number as before, a tuple *(chaper, pno)* can be specified to identify the desired page. +* Faster page access is implemented for documents supporting a "chapter" structure. This applies to EPUB documents currently. This comes with several new :ref:`Document` methods and changes for :meth:`Document.loadPage` and the "indexed" page access *doc[n]*: In addition to specifying a page number as before, a tuple *(chapter, pno)* can be specified to identify the desired page. -* **Changed:** Improved support of redaction annotations: images overlapped by redactions are **permanantly modified** by erasing the overlap areas. Also links are removed if overlapped by redactions. This is now fully in sync with PDF specifications. +* **Changed:** Improved support of redaction annotations: images overlapped by redactions are **permanently modified** by erasing the overlap areas. Also links are removed if overlapped by redactions. This is now fully in sync with PDF specifications. Other changes: @@ -1564,7 +1564,7 @@ Potential code breaking changes: This version introduces several new features around PDF text output. The motivation is to simplify this task, while at the same time offering extending features. -One major achievement is using MuPDF's capabilities to dynamically choosing fallback fonts whenever a character cannot be found in the current one. This seemlessly works for Base-14 fonts in combination with CJK fonts (China, Japan, Korea). So a text may contain **any combination of characters** from the Latin, Greek, Russian, Chinese, Japanese and Korean languages. +One major achievement is using MuPDF's capabilities to dynamically choosing fallback fonts whenever a character cannot be found in the current one. This seamlessly works for Base-14 fonts in combination with CJK fonts (China, Japan, Korea). So a text may contain **any combination of characters** from the Latin, Greek, Russian, Chinese, Japanese and Korean languages. * **Fixed** issue `#493 `_. ``Pixmap(doc, xref)`` should now again correctly resemble the loaded image object. * **Fixed** issue `#488 `_. Widget names are now modifiable. @@ -1861,7 +1861,7 @@ List of change details: **Changes in Version 1.14.10** -* **Changed** :meth:`Page.show_pdf_page` to support rotation of the source rectangle. Fixes #261 ("Cannot rotate insterted pages"). +* **Changed** :meth:`Page.show_pdf_page` to support rotation of the source rectangle. Fixes #261 ("Cannot rotate inserted pages"). * **Fixed** a bug in :meth:`Page.insertImage` which prevented insertion of multiple images provided as streams. @@ -1983,7 +1983,7 @@ This version contains some technical / performance improvements and bug fixes. **Changes in Version 1.13.17** * **Fixed** an error that intermittently caused an exception in :meth:`Page.show_pdf_page`, when pages from many different source PDFs were shown. -* **Changed** method :meth:`Document.extractImage` to now return more meta information about the extracted imgage. Also, its performance has been greatly improved. Several demo scripts have been changed to make use of this method. +* **Changed** method :meth:`Document.extractImage` to now return more meta information about the extracted image. Also, its performance has been greatly improved. Several demo scripts have been changed to make use of this method. * **Changed** method :meth:`Document._getXrefStream` to now return *None* if the object is no stream and no longer raise an exception if otherwise. * **Added** method :meth:`Document._deleteObject` which deletes a PDF object identified by its :data:`xref`. Only to be used by the experienced PDF expert. * **Added** a method :meth:`paper_rect` which returns a :ref:`Rect` for a supplied paper format string. Example: *fitz.paper_rect("letter") = fitz.Rect(0.0, 0.0, 612.0, 792.0)*. @@ -2044,9 +2044,9 @@ This patch version contains several improvements for embedded files and file att **Changes in Version 1.13.11** -While the preceeding patch subversions only contained various fixes, this version again introduces major new features: +While the preceding patch subversions only contained various fixes, this version again introduces major new features: -* **Added** basic support for PDF widget annotations. You can now add PDF form fields of types Text, CheckBox, ListBox and ComboBox. Where necessary, the PDF is tranformed to a Form PDF with the first added widget. +* **Added** basic support for PDF widget annotations. You can now add PDF form fields of types Text, CheckBox, ListBox and ComboBox. Where necessary, the PDF is transformed to a Form PDF with the first added widget. * **Fixed** issues #176 ("wrong file embedding"), #177 ("segment fault when invoking page.getText()")and #179 ("Segmentation fault using page.getLinks() on encrypted PDF"). @@ -2102,7 +2102,7 @@ The major enhancement is PDF form field support. Form fields are annotations of **Changes in Version 1.13.1** -* :meth:`TextPage.extractDICT` is a new method to extract the contents of a document page (text and images). All document types are supported as with the other :ref:`TextPage` *extract*()* methods. The returned object is a dictionary of nested lists and other dictionaries, and **exactly equal** to the JSON-deserialization of the old :meth:`TextPage.extractJSON`. The difference is that the result is created directly -- no JSON module is used. Because the user needs no JSON module to interpet the information, it should be easier to use, and also have a better performance, because it contains images in their original **binary format** -- they need not be base64-decoded. +* :meth:`TextPage.extractDICT` is a new method to extract the contents of a document page (text and images). All document types are supported as with the other :ref:`TextPage` *extract*()* methods. The returned object is a dictionary of nested lists and other dictionaries, and **exactly equal** to the JSON-deserialization of the old :meth:`TextPage.extractJSON`. The difference is that the result is created directly -- no JSON module is used. Because the user needs no JSON module to interpret the information, it should be easier to use, and also have a better performance, because it contains images in their original **binary format** -- they need not be base64-decoded. * :meth:`Page.getText` correspondingly supports the new parameter value *"dict"* to invoke the above method. * :meth:`TextPage.extractJSON` (resp. *Page.getText("json")*) is still supported for convenience, but its use is expected to decline. @@ -2274,15 +2274,15 @@ Though MuPDF has declared it as being mostly a bug fix version, one major new fe MuPDF version 1.10 has a significant impact on our bindings. Some of the changes also affect the API -- in other words, **you** as a PyMuPDF user. -* Link destination information has been reduced. Several properties of the *linkDest* class no longer contain valuable information. In fact, this class as a whole has been deleted from MuPDF's library and we in PyMuPDF only maintain it to provide compatibilty to existing code. +* Link destination information has been reduced. Several properties of the *linkDest* class no longer contain valuable information. In fact, this class as a whole has been deleted from MuPDF's library and we in PyMuPDF only maintain it to provide compatibility to existing code. * In an effort to minimize memory requirements, several improvements have been built into MuPDF v1.10: - A new *config.h* file can be used to de-select unwanted features in the C base code. Using this feature we have been able to reduce the size of our binary *_fitz.o* / *_fitz.pyd* by about 50% (from 9 MB to 4.5 MB). When UPX-ing this, the size goes even further down to a very handy 2.3 MB. - - The alpha (transparency) channel for pixmaps is now optional. Letting alpha default to *False* significantly reduces pixmap sizes (by 20% -- CMYK, 25% -- RGB, 50% -- GRAY). Many *Pixmap* constructors therefore now accept an *alpha* boolean to control inclusion of this channel. Other pixmap constructors (e.g. those for file and image input) create pixmaps with no alpha alltogether. On the downside, save methods for pixmaps no longer accept a *savealpha* option: this channel will always be saved when present. To minimize code breaks, we have left this parameter in the call patterns -- it will just be ignored. + - The alpha (transparency) channel for pixmaps is now optional. Letting alpha default to *False* significantly reduces pixmap sizes (by 20% -- CMYK, 25% -- RGB, 50% -- GRAY). Many *Pixmap* constructors therefore now accept an *alpha* boolean to control inclusion of this channel. Other pixmap constructors (e.g. those for file and image input) create pixmaps with no alpha altogether. On the downside, save methods for pixmaps no longer accept a *savealpha* option: this channel will always be saved when present. To minimize code breaks, we have left this parameter in the call patterns -- it will just be ignored. -* *DisplayList* and *TextPage* class constructors now **require the mediabox** of the page they are referring to (i.e. the *page.bound()* rectangle). There is no way to construct this information from other sources, therefore a source code change cannot be avoided in these cases. We assume however, that not many users are actually employing these rather low level classes explixitely. So the impact of that change should be minor. +* *DisplayList* and *TextPage* class constructors now **require the mediabox** of the page they are referring to (i.e. the *page.bound()* rectangle). There is no way to construct this information from other sources, therefore a source code change cannot be avoided in these cases. We assume however, that not many users are actually employing these rather low level classes explicitly. So the impact of that change should be minor. **Other Changes compared to Version 1.9.3** diff --git a/docs/_static/custom.css b/docs/_static/custom.css index 9ba00cc3d..3aff79a98 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -172,7 +172,7 @@ button.cta a { :target>h1:first-of-type, span:target~h1:first-of-type { background-color: #007aff !important; color: #fff !important; - padding-top: 40px; /* accomodates header search blocking target */ + padding-top: 40px; /* accommodates header search blocking target */ margin-top: -40px; } @@ -182,7 +182,7 @@ button.cta a { span:target~h2:first-of-type, span:target~h3:first-of-type, span:target~h4:first-of-type, span:target~h5:first-of-type, span:target~h6:first-of-type { background-color: transparent !important; - padding-top: 40px; /* accomodates header search blocking target */ + padding-top: 40px; /* accommodates header search blocking target */ margin-top: -40px; text-decoration: underline; } diff --git a/docs/annot.rst b/docs/annot.rst index 578e02bed..ac8eea942 100644 --- a/docs/annot.rst +++ b/docs/annot.rst @@ -15,7 +15,7 @@ There is a parent-child relationship between an annotation and its page. If the ================================== ============================================================== **Attribute** **Short Description** ================================== ============================================================== -:meth:`Annot.delete_responses` delete all responding annotions +:meth:`Annot.delete_responses` delete all responding annotations :meth:`Annot.get_file` get attached file content :meth:`Annot.get_oc` get :data:`xref` of an :data:`OCG` / :data:`OCMD` :meth:`Annot.get_pixmap` image of the annotation as a pixmap diff --git a/docs/footer.rst b/docs/footer.rst index bbce184ed..9acacaf63 100644 --- a/docs/footer.rst +++ b/docs/footer.rst @@ -44,7 +44,7 @@ document.getElementById("footerDisclaimer").innerHTML = getHeaderAndFooterTranslation("This software is provided AS-IS with no warranty, either express or implied. This software is distributed under license and may not be copied, modified or distributed except as expressly authorized under the terms of that license. Refer to licensing information at artifex.com or contact Artifex Software Inc., 39 Mesa Street, Suite 108A, San Francisco CA 94129, United States for further information."); - // more tranlsation for admonition-title as the in-built translation isn't great, needs: 注釈 -> 注 + // more translation for admonition-title as the in-built translation isn't great, needs: 注釈 -> 注 if (docLanguage == "ja") { const collection = document.getElementsByClassName("admonition-title"); for (var i=0;i= (1, 23, 8): return mupdf.fz_pixmap_size( self.this) - # fz_pixmap_size() is not publically visible, so we implement it + # fz_pixmap_size() is not publicly visible, so we implement it # ourselves. fixme: we don't add on sizeof(fz_pixmap). pm = self.this return pm.n() * pm.w() * pm.h() @@ -10859,7 +10859,7 @@ def is_convex(self): if p1.y * p2.y > 0: return False m = planish_line(self.ll, self.ur) # puts other diagonal on x-axis - p1 = self.lr * m # tranform the + p1 = self.lr * m # transform the p2 = self.ul * m # remaining points if p1.y * p2.y > 0: return False @@ -11294,8 +11294,8 @@ def draw_bezier( def draw_circle(self, center: point_like, radius: float):# -> Point: """Draw a circle given its center and radius.""" - if not radius > EPSILON: - raise ValueError("radius must be postive") + if not radius > EPSILON:s + raise ValueError("radius must be positive") center = Point(center) p1 = center - (radius, 0) return self.draw_sector(center, p1, 360, fullSector=False) @@ -11884,7 +11884,7 @@ def add_pdf_links(document_or_stream, positions): `document_or_stream` if a `Document` instance, otherwise a new `Document` instance. We raise an exception if an `href` in `positions` refers to an - internal position `#` but no item in `postions` has `id = + internal position `#` but no item in `positions` has `id = name`. """ if isinstance(document_or_stream, Document): @@ -12216,7 +12216,7 @@ def ret(): def update(parameter): ''' Evaluates `more, _ = self.place(fn(parameter))`. If `more` is - false, then `rect` is big enought to contain `self` and we + false, then `rect` is big enough to contain `self` and we set `state.pmax=parameter` and return True. Otherwise we set `state.pmin=parameter` and return False. ''' @@ -12402,7 +12402,7 @@ def _extractText(self, format_): # fixme: mupdfwrap.py thinks fz_output is not copyable, possibly # because there is no .refs member visible and no fz_keep_output() fn, # although there is an fz_drop_output(). So mupdf.fz_new_output_with_buffer() - # doesn't convert the returnd fz_output* into a mupdf.FzOutput. + # doesn't convert the returned fz_output* into a mupdf.FzOutput. #out = mupdf.FzOutput(out) if format_ == 1: mupdf.fz_print_stext_page_as_html(out, this_tpage, 0) @@ -17002,7 +17002,7 @@ def JM_quad_from_py(r): def JM_read_contents(pageref): ''' - Read and concatenate a PDF page's /Conents object(s) in a buffer + Read and concatenate a PDF page's /Contents object(s) in a buffer ''' assert isinstance(pageref, mupdf.PdfObj), f'{type(pageref)}' contents = mupdf.pdf_dict_get(pageref, mupdf.PDF_ENUM_NAME_Contents) @@ -19147,7 +19147,7 @@ def write(self, ctx, data_raw, data_length): def compute_scissor(dev): ''' - Every scissor of a clip is a sub rectangle of the preceeding clip scissor + Every scissor of a clip is a sub rectangle of the preceding clip scissor if the clip level is larger. ''' if dev.scissors is None: @@ -19892,7 +19892,7 @@ def annot_preprocess(page: "Page") -> int: def annot_postprocess(page: "Page", annot: "Annot") -> None: - """Clean up after annotation inertion. + """Clean up after annotation insertion. Set ownership flag and store annotation in page annotation dictionary. """ @@ -20341,10 +20341,10 @@ def util_invert_matrix(matrix): or abs( matrix.c - 0) >= sys.float_info.epsilon or abs( matrix.d - 1) >= sys.float_info.epsilon ): - # Invertion not possible. + # Inversion not possible. return 1, () return 0, (ret.a, ret.b, ret.c, ret.d, ret.e, ret.f) - # Do invertion in python. + # Do inversion in python. src = JM_matrix_from_py(matrix) a = src.a det = a * src.d - src.b * src.c @@ -20811,7 +20811,7 @@ def sRGB_to_pdf(srgb: int) -> tuple: Args: srgb: (int) RRGGBB (red, green, blue), each color in range(255). Returns: - Tuple (red, green, blue) each item in intervall 0 <= item <= 1. + Tuple (red, green, blue) each item in interval 0 <= item <= 1. """ t = sRGB_to_rgb(srgb) return t[0] / 255.0, t[1] / 255.0, t[2] / 255.0 @@ -20825,7 +20825,7 @@ def sRGB_to_rgb(srgb: int) -> tuple: Args: srgb: (int) RRGGBB (red, green, blue), each color in range(255). Returns: - Tuple (red, green, blue) each item in intervall 0 <= item <= 255. + Tuple (red, green, blue) each item in interval 0 <= item <= 255. """ r = srgb >> 16 g = (srgb - (r << 16)) >> 8 diff --git a/tests/test_drawings.py b/tests/test_drawings.py index 92e52c7e3..459747a6f 100644 --- a/tests/test_drawings.py +++ b/tests/test_drawings.py @@ -180,7 +180,7 @@ def test_3207(): """Example graphics with multiple "close path" commands within same path. The fix translates a close-path commands into an additional line - which connects the current point with a preceeding "move" target. + which connects the current point with a preceding "move" target. The example page has 2 paths which each contain 2 close-path commands after 2 normal "line" commands, i.e. 2 command sequences "move-to, line-to, line-to, close-path". From 6f3a96b1badd52ec0f694e1aeac53ad587a62332 Mon Sep 17 00:00:00 2001 From: "Colin B. Macdonald" Date: Fri, 17 May 2024 16:11:55 -0700 Subject: [PATCH 08/18] CI: skip spell checking src_classic Assuming its not long for this mortal world (?). Also check locales for now. --- .github/workflows/test_quick.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test_quick.yml b/.github/workflows/test_quick.yml index 2e3ed0bd5..bb429ac4b 100644 --- a/.github/workflows/test_quick.yml +++ b/.github/workflows/test_quick.yml @@ -50,4 +50,6 @@ jobs: exclude_file: .codespell-ignorelines check_filenames: true check_hidden: true - skip: "*.pdf" + skip: "*.pdf,src_classic,locales" + # if we don't fix everything we can set this + # only_warn: 1 From 873e4b10e6d51372c5bd5ef57ed2f7ff3e817eab Mon Sep 17 00:00:00 2001 From: "Colin B. Macdonald" Date: Fri, 17 May 2024 16:26:48 -0700 Subject: [PATCH 09/18] CI: ignore some lines that aren't errors --- .codespell-ignorelines | 5 +++++ .github/workflows/test_quick.yml | 2 +- docs/recipes-common-issues-and-their-solutions.rst | 4 +++- pipcl.py | 4 ++-- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/.codespell-ignorelines b/.codespell-ignorelines index 776816d39..33c0b1bfd 100644 --- a/.codespell-ignorelines +++ b/.codespell-ignorelines @@ -8,3 +8,8 @@ # lines that codespell should ignore: whitespace matters! + function pressEnter() { + setTimeout(pressEnter,1000); +see `here `_. +Wallis and Futuna (France);Mata Utu;1029;8.9%;2018 + needle = "¡La práctica hace el campeón!" diff --git a/.github/workflows/test_quick.yml b/.github/workflows/test_quick.yml index bb429ac4b..18db2563a 100644 --- a/.github/workflows/test_quick.yml +++ b/.github/workflows/test_quick.yml @@ -50,6 +50,6 @@ jobs: exclude_file: .codespell-ignorelines check_filenames: true check_hidden: true - skip: "*.pdf,src_classic,locales" + skip: "*.pdf,src_classic,locales,prism.js" # if we don't fix everything we can set this # only_warn: 1 diff --git a/docs/recipes-common-issues-and-their-solutions.rst b/docs/recipes-common-issues-and-their-solutions.rst index 8317a4aa5..aaace7d3a 100644 --- a/docs/recipes-common-issues-and-their-solutions.rst +++ b/docs/recipes-common-issues-and-their-solutions.rst @@ -47,7 +47,9 @@ If a clean, non-corrupt / decompressed PDF is needed, one could dynamically invo print pdf.Info # do further processing -With the command line utility *pdftk* (`available `_ for Windows only, but reported to also run under `Wine `_) a similar result can be achieved, see `here `_. However, you must invoke it as a separate process via *subprocess.Popen*, using stdin and stdout as communication vehicles. +With the command line utility *pdftk* (`available `_ for Windows only, but reported to also run under `Wine `_) a similar result can be achieved, +see `here `_. +However, you must invoke it as a separate process via *subprocess.Popen*, using stdin and stdout as communication vehicles. diff --git a/pipcl.py b/pipcl.py index 0434e8af9..51f25f845 100644 --- a/pipcl.py +++ b/pipcl.py @@ -1471,7 +1471,7 @@ def build_extension( debug2 = '' if debug: debug2 = '/Zi' # Generate .pdb. - # debug2 = '/Z7' # Embded debug info in .obj files. + # debug2 = '/Z7' # Embed debug info in .obj files. # As of 2023-08-23, it looks like VS tools create slightly # .dll's each time, even with identical inputs. @@ -1786,7 +1786,7 @@ def git_items( directory, submodules=False): ret = [] for path in text.decode('utf8').strip().split( '\n'): path2 = os.path.join(directory, path) - # Sometimes git ls-files seems to list empty/non-existant directories + # Sometimes git ls-files seems to list empty/non-existent directories # within submodules. # if not os.path.exists(path2): From 36903ce757cb58e7049f26e2e50af5b05220b06b Mon Sep 17 00:00:00 2001 From: "Colin B. Macdonald" Date: Fri, 17 May 2024 16:31:17 -0700 Subject: [PATCH 10/18] Fix typo --- .github/workflows/cla.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cla.yml b/.github/workflows/cla.yml index 7620b285a..91a337f53 100644 --- a/.github/workflows/cla.yml +++ b/.github/workflows/cla.yml @@ -24,7 +24,7 @@ jobs: branch: 'CLA' allowlist: - # the followings are the optional inputs - If the optional inputs are not given, then default values will be taken + # the following are the optional inputs - If the optional inputs are not given, then default values will be taken #remote-organization-name: enter the remote organization name where the signatures should be stored (Default is storing the signatures in the same repository) #remote-repository-name: enter the remote repository name where the signatures should be stored (Default is storing the signatures in the same repository) #create-file-commit-message: 'For example: Creating file for storing CLA Signatures' From ca4192664a15d77a381ee329556dcfb97d68ed29 Mon Sep 17 00:00:00 2001 From: "Colin B. Macdonald" Date: Fri, 17 May 2024 16:31:32 -0700 Subject: [PATCH 11/18] Fix a few typos in tests then remove from codespell Too many false positive from non-English for example. --- .github/workflows/test_quick.yml | 2 +- tests/test_general.py | 2 +- tests/test_pagedelete.py | 2 +- tests/test_remove-rotation.py | 2 +- tests/test_showpdfpage.py | 2 +- tests/test_story.py | 4 ++-- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/test_quick.yml b/.github/workflows/test_quick.yml index 18db2563a..5aecf7ce3 100644 --- a/.github/workflows/test_quick.yml +++ b/.github/workflows/test_quick.yml @@ -50,6 +50,6 @@ jobs: exclude_file: .codespell-ignorelines check_filenames: true check_hidden: true - skip: "*.pdf,src_classic,locales,prism.js" + skip: "*.pdf,src_classic,locales,prism.js,tests" # if we don't fix everything we can set this # only_warn: 1 diff --git a/tests/test_general.py b/tests/test_general.py index 3eac3727d..385bb796c 100644 --- a/tests/test_general.py +++ b/tests/test_general.py @@ -809,7 +809,7 @@ def test_2957_2(): page.apply_redactions() # remove/redact the word "longer" words1 = page.get_text("words") # extract words again assert len(words1) == len(words0) - 1 # must be one word less - assert words0[3][4] == "longer" # just confirm test file is correc one + assert words0[3][4] == "longer" # just confirm test file is correct one del words0[3] # remove the redacted word from first list for i in range(len(words1)): # compare words w1 = words1[i] # word after redaction diff --git a/tests/test_pagedelete.py b/tests/test_pagedelete.py index cde6812b3..73593fa29 100644 --- a/tests/test_pagedelete.py +++ b/tests/test_pagedelete.py @@ -83,7 +83,7 @@ def test_3094(): def test_3150(): """Assert correct functioning for problem file. - Implicitely also check use of new MuPDF function + Implicitly also check use of new MuPDF function pdf_rearrange_pages() since version 1.23.9. """ filename = os.path.join(scriptdir, "resources", "test-3150.pdf") diff --git a/tests/test_remove-rotation.py b/tests/test_remove-rotation.py index a25aa5a28..423c88113 100644 --- a/tests/test_remove-rotation.py +++ b/tests/test_remove-rotation.py @@ -10,7 +10,7 @@ def test_remove_rotation(): filename = os.path.join(scriptdir, "resources", "test-2812.pdf") doc = pymupdf.open(filename) - # We always create fresh pages to avoid false positves from cache content. + # We always create fresh pages to avoid false positives from cache content. # Text on these pages consists of pairwise different strings, sorting by # these strings must therefore yield identical bounding boxes. for i in range(1, doc.page_count): diff --git a/tests/test_showpdfpage.py b/tests/test_showpdfpage.py index fbfdbcb04..2e6b27a18 100644 --- a/tests/test_showpdfpage.py +++ b/tests/test_showpdfpage.py @@ -3,7 +3,7 @@ * Convert some image to a PDF * Insert it rotated in some rectangle of a PDF page * Assert PDF Form XObject has been created - * Assert that image contained in inserted PDF is inside given retangle + * Assert that image contained in inserted PDF is inside given rectangle """ import os diff --git a/tests/test_story.py b/tests/test_story.py index 824835658..adcb9d8c7 100644 --- a/tests/test_story.py +++ b/tests/test_story.py @@ -19,8 +19,8 @@ def test_story(): WHERE = MEDIABOX + (36, 36, -36, -36) # the font files are located in /home/chinese arch = pymupdf.Archive(".") - # if not specfied user_css, the output pdf has content - story = pymupdf.Story(HTML, user_css=CSS, archive=arch) + # if not specified user_css, the output pdf has content + story = pymupdf.Story(HTML, user_css=CSS, archive=arch) writer = pymupdf.DocumentWriter("output.pdf") From 4ffddf02cc1154a96b6718d486edc00dcc68ee35 Mon Sep 17 00:00:00 2001 From: "Colin B. Macdonald" Date: Fri, 17 May 2024 16:38:21 -0700 Subject: [PATCH 12/18] Few more typos and exceptions --- .codespell-ignorewords | 1 + changes.txt | 2 +- src/__init__.py | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.codespell-ignorewords b/.codespell-ignorewords index a2a6732cd..4861f910a 100644 --- a/.codespell-ignorewords +++ b/.codespell-ignorewords @@ -9,3 +9,4 @@ # words that codespell should not complain about re-use flate +fo diff --git a/changes.txt b/changes.txt index f3feddaf2..dab44cb92 100644 --- a/changes.txt +++ b/changes.txt @@ -1733,7 +1733,7 @@ Minor changes compared to version 1.16.2. The code of the "dict" and "rawdict" v * **Changed** text extraction methods of :ref:`Page` to allow detail control of the amount of extracted data. * **Added** :meth:`planish_line` which maps a given line (defined as a pair of points) to the x-axis. -* **Fixed** an issue (w/o Github number) which brought down the interpreter when encountering certain non-UTF-8 encodable characters while using :meth:`Page.getText` with te "dict" option. +* **Fixed** an issue (w/o Github number) which brought down the interpreter when encountering certain non-UTF-8 encodable characters while using :meth:`Page.getText` with the "dict" option. * **Fixed** issue #362 ("Memory Leak with getText('rawDICT')"). ------ diff --git a/src/__init__.py b/src/__init__.py index 768779a58..992e56111 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -17786,9 +17786,9 @@ def CheckMorph(o: typing.Any) -> bool: if not (type(o) in (list, tuple) and len(o) == 2): raise ValueError("morph must be a sequence of length 2") if not (len(o[0]) == 2 and len(o[1]) == 6): - raise ValueError("invalid morph parm 0") + raise ValueError("invalid morph param 0") if not o[1][4] == o[1][5] == 0: - raise ValueError("invalid morph parm 1") + raise ValueError("invalid morph param 1") return True From 82aa73a14d79e33a60118203de650895748637ab Mon Sep 17 00:00:00 2001 From: "Colin B. Macdonald" Date: Fri, 17 May 2024 16:42:49 -0700 Subject: [PATCH 13/18] Probably this is "Response" --- changes.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changes.txt b/changes.txt index dab44cb92..b32c60a54 100644 --- a/changes.txt +++ b/changes.txt @@ -1050,7 +1050,7 @@ This patch version implements minor improvements for :ref:`Pixmap` and also some * **Added** :meth:`Pixmap.warp` which makes a new pixmap from a given arbitrary convex quad inside the pixmap. -* **Added** :attr:`Annot.irt_xref` and :meth:`Annot.set_irt_xref` to inquire or set the `/IRT` ("In Responde To") property of an annotation. Implements `#1450 `_. +* **Added** :attr:`Annot.irt_xref` and :meth:`Annot.set_irt_xref` to inquire or set the `/IRT` ("In Response To") property of an annotation. Implements `#1450 `_. * **Added** :meth:`Rect.torect` and :meth:`IRect.torect` which compute a matrix that transforms to a given other rectangle. From 56502dc6022fbccc6c9fe2ac00430e896a706785 Mon Sep 17 00:00:00 2001 From: "Colin B. Macdonald" Date: Fri, 17 May 2024 16:43:05 -0700 Subject: [PATCH 14/18] Help anyone modifying this file It fails confusingly if you don't have a newline at the end of a file, and since not everyone's editor does that automatically, let's leave a comment for end of file. --- .codespell-ignorelines | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.codespell-ignorelines b/.codespell-ignorelines index 33c0b1bfd..f3a896e74 100644 --- a/.codespell-ignorelines +++ b/.codespell-ignorelines @@ -13,3 +13,5 @@ see `here `_. Wallis and Futuna (France);Mata Utu;1029;8.9%;2018 needle = "¡La práctica hace el campeón!" + +# end of file From f109d4f025e3e7718b7b430b6754755489bd65a9 Mon Sep 17 00:00:00 2001 From: "Colin B. Macdonald" Date: Wed, 22 May 2024 10:17:02 -0700 Subject: [PATCH 15/18] Undo whitespace change inconsistent with other code --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ad2375167..367b491f5 100755 --- a/setup.py +++ b/setup.py @@ -365,7 +365,7 @@ def get_git_id( directory): def get_mupdf_internal(out, location=None, sha=None, local_tgz=None): ''' Gets MuPDF as either a .tgz or a local directory. - + Args: out: Either 'dir' (we return name of local directory containing mupdf) or 'tgz' (we return From 02c3eaf8f709199e59018055b91155fbbf97a2dc Mon Sep 17 00:00:00 2001 From: "Colin B. Macdonald" Date: Wed, 22 May 2024 10:36:14 -0700 Subject: [PATCH 16/18] Add codespell to unit testing framework --- tests/test_codespell.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 tests/test_codespell.py diff --git a/tests/test_codespell.py b/tests/test_codespell.py new file mode 100644 index 000000000..c38103d00 --- /dev/null +++ b/tests/test_codespell.py @@ -0,0 +1,24 @@ +import pymupdf + +import os +import subprocess + + +def test_codespell(): + ''' + Check rebased Python code with flake8. + ''' + if not hasattr(pymupdf, 'mupdf'): + print('Not running codespell with classic implementation.') + return + root = os.path.abspath(f'{__file__}/../..') + def run(command): + print(f'test_codespell(): Running: {command}') + prev_workdir = os.getcwd() + os.chdir(root) + subprocess.run(command, shell=1, check=1) + os.chdir(prev_workdir) + # careful: I don't think paths like `docs/locales` works + skips = "*.pdf,src_classic,locales,prism.js,tests" + run(f'codespell -x .codespell-ignorelines -I .codespell-ignorewords --skip {skips}') + print('test_codespell(): codespell succeeded.') From 3776f8c92f6d2757e0165f2e8ec10668ccb06569 Mon Sep 17 00:00:00 2001 From: "Colin B. Macdonald" Date: Wed, 22 May 2024 10:43:09 -0700 Subject: [PATCH 17/18] Remove comment that fails spell check --- src/__init__.py | 2 +- src/utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/__init__.py b/src/__init__.py index 992e56111..b3a8f5f1c 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -11432,7 +11432,7 @@ def l5(a, b): list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm) )) - betar -= w90 # reduce parm angle by 90 deg + betar -= w90 alfa += w90 # advance start angle by 90 deg P = Q # advance to arc end point # draw (remaining) arc diff --git a/src/utils.py b/src/utils.py index 7fca326f0..dd647b066 100644 --- a/src/utils.py +++ b/src/utils.py @@ -3428,7 +3428,7 @@ def draw_sector( list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm) )) - betar -= w90 # reduce parm angle by 90 deg + betar -= w90 alfa += w90 # advance start angle by 90 deg P = Q # advance to arc end point # draw (remaining) arc From 1b2515262e8cbd6f6eb6fcf448607c0f9832b398 Mon Sep 17 00:00:00 2001 From: "Colin B. Macdonald" Date: Wed, 22 May 2024 10:47:28 -0700 Subject: [PATCH 18/18] Add codespell to deps for release --- scripts/gh_release.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gh_release.py b/scripts/gh_release.py index c72e5cc40..ef0660603 100755 --- a/scripts/gh_release.py +++ b/scripts/gh_release.py @@ -697,7 +697,7 @@ def platform_tag(): assert 0, f'Unrecognised: {platform.system()=}' -test_packages = 'pytest fontTools pymupdf-fonts flake8 pylint' +test_packages = 'pytest fontTools pymupdf-fonts flake8 pylint codespell' if platform.system() == 'Windows' and cpu_bits() == 32: # No pillow wheel available, and doesn't build easily. pass