diff --git a/.codespell-ignorelines b/.codespell-ignorelines
new file mode 100644
index 000000000..f3a896e74
--- /dev/null
+++ b/.codespell-ignorelines
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: FSFAP
+# Copyright (C) 2024 Colin B. Macdonald
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved. This file is offered as-is,
+# without any warranty.
+
+# lines that codespell should ignore: whitespace matters!
+
+ function pressEnter() {
+ setTimeout(pressEnter,1000);
+see `here `_.
+Wallis and Futuna (France);Mata Utu;1029;8.9%;2018
+ needle = "¡La práctica hace el campeón!"
+
+# end of file
diff --git a/.codespell-ignorewords b/.codespell-ignorewords
new file mode 100644
index 000000000..4861f910a
--- /dev/null
+++ b/.codespell-ignorewords
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: FSFAP
+# Copyright (C) 2024 Colin B. Macdonald
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved. This file is offered as-is,
+# without any warranty.
+
+# words that codespell should not complain about
+re-use
+flate
+fo
diff --git a/.github/workflows/cla.yml b/.github/workflows/cla.yml
index 7620b285a..91a337f53 100644
--- a/.github/workflows/cla.yml
+++ b/.github/workflows/cla.yml
@@ -24,7 +24,7 @@ jobs:
branch: 'CLA'
allowlist:
- # the followings are the optional inputs - If the optional inputs are not given, then default values will be taken
+ # the following are the optional inputs - If the optional inputs are not given, then default values will be taken
#remote-organization-name: enter the remote organization name where the signatures should be stored (Default is storing the signatures in the same repository)
#remote-repository-name: enter the remote repository name where the signatures should be stored (Default is storing the signatures in the same repository)
#create-file-commit-message: 'For example: Creating file for storing CLA Signatures'
diff --git a/.github/workflows/test_quick.yml b/.github/workflows/test_quick.yml
index 31d9d42da..5aecf7ce3 100644
--- a/.github/workflows/test_quick.yml
+++ b/.github/workflows/test_quick.yml
@@ -3,7 +3,7 @@ name: Test quick
on:
pull_request:
branches: [main]
-
+
workflow_dispatch:
jobs:
@@ -22,9 +22,9 @@ jobs:
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- uses: actions/setup-python@v2
-
+
- name: test_quick
env:
@@ -38,3 +38,18 @@ jobs:
inputs_wheels_windows_auto: "1"
run:
python scripts/gh_release.py
+
+
+ codespell:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - uses: codespell-project/actions-codespell@v2
+ with:
+ ignore_words_file: .codespell-ignorewords
+ exclude_file: .codespell-ignorelines
+ check_filenames: true
+ check_hidden: true
+ skip: "*.pdf,src_classic,locales,prism.js,tests"
+ # if we don't fix everything we can set this
+ # only_warn: 1
diff --git a/changes.txt b/changes.txt
index f94e01d73..b32c60a54 100644
--- a/changes.txt
+++ b/changes.txt
@@ -43,7 +43,7 @@ Change Log
* Fixed some problems when checking for PDF properties.
* Fixed pip builds from sdist
(see discussion `3360 `_:
- Alpine linux docker build failing "No matching distribution found for pymupdfb==1.24.1").
+ Alpine Linux docker build failing "No matching distribution found for pymupdfb==1.24.1").
**Changes in version 1.24.2 (2024-04-17)**
@@ -94,13 +94,13 @@ Change Log
* **Fixed** `3281 `_: Preparing metadata (pyproject.toml) did not run successfully
* **Fixed** `3279 `_: PyMuPDF no longer builds in Alpine Linux
- * **Fixed** `3257 `_: apply_redactions() deleting text outside of annoted box
+ * **Fixed** `3257 `_: apply_redactions() deleting text outside of annotated box
* **Fixed** `3216 `_: AttributeError: 'Annot' object has no attribute '__del__'
* **Fixed** `3207 `_: get_drawings's items is missing line from h path operator
* **Fixed** `3201 `_: Memory leaks when merging PDFs
* **Fixed** `3197 `_: page.get_text() returns hexadecimal text for some characters
* **Fixed** `3196 `_: Remove text not working in 1.23.25 version vs 1.20.2
- * **Fixed** `3172 `_: PDF's 45º lines dissapearing in png conversion
+ * **Fixed** `3172 `_: PDF's 45º lines disappearing in png conversion
* **Fixed** `3135 `_: Do not log warnings to stdout
* **Fixed** `3125 `_: get_pixmap method stuck on one page and runs forever
* **Fixed** `2964 `_: There is an issue with the image generated by the page.get_pixmap() function
@@ -517,7 +517,7 @@ Change Log
* Bug fixes:
- * **Fixed** `2556 `_: Segmentation fault at caling get_cdrawings(extended=True)
+ * **Fixed** `2556 `_: Segmentation fault at calling get_cdrawings(extended=True)
* **Fixed** `2637 `_: Page.insert_textbox incorrectly handles the last word if it starts a new line
* **Fixed** `2683 `_: Windows sdist build failure - non-quoting of path and using UNIX which command
* **Fixed** `2691 `_: Page.get_textpage_ocr() bug in rebased fitz_new version
@@ -797,7 +797,7 @@ Change Log
* Improve ``insert_file()`` documentation.
- * ``get_bboxlog()``: aded optional ``layers`` to ``get_bboxlog()``.
+ * ``get_bboxlog()``: added optional ``layers`` to ``get_bboxlog()``.
* ``Page.get_texttrace()``: add new dictionary key ``layer``, name of Optional Content Group.
* Mention use of Python venv in installation documentation.
@@ -977,7 +977,7 @@ Changes to build/release process:
* **Added** new constants defining the default text extraction flags for more comfortable handling. Their naming convention is like :data:`TEXTFLAGS_WORDS` for ``page.get_text("words")``. See :ref:`text_extraction_flags`.
-* **Changed** :meth:`Page.annots` and :meth:`Page.widgets` to detect and prevent reloading the page (illegally) inside the iterator loops via :meth:`Document.reload_page`. Doing this brings down the interpretor. Documented clean ways to do annotation and widget mass updates within properly designed loops.
+* **Changed** :meth:`Page.annots` and :meth:`Page.widgets` to detect and prevent reloading the page (illegally) inside the iterator loops via :meth:`Document.reload_page`. Doing this brings down the interpreter. Documented clean ways to do annotation and widget mass updates within properly designed loops.
* **Changed** several internal utility functions to become standalone ("SWIG inline") as opposed to be part of the :ref:`Tools` class. This, among other things, increases the performance of geometry object creation.
@@ -1038,11 +1038,11 @@ This patch version implements minor improvements for :ref:`Pixmap` and also some
* **Fixed** `#1351 `_. Reverted code that introduced the memory growth in v1.18.15.
-* **Fixed** `#1417 `_. Developped circumvention for growth of open file handles using :meth:`Document.insert_pdf`.
+* **Fixed** `#1417 `_. Developed circumvention for growth of open file handles using :meth:`Document.insert_pdf`.
-* **Fixed** `#1418 `_. Developped circumvention for memory growth using :meth:`Document.insert_pdf`.
+* **Fixed** `#1418 `_. Developed circumvention for memory growth using :meth:`Document.insert_pdf`.
-* **Fixed** `#1430 `_. Developped circumvention for mass pixmap generations of document pages.
+* **Fixed** `#1430 `_. Developed circumvention for mass pixmap generations of document pages.
* **Fixed** `#1433 `_. Solves a bbox error for some Type 3 font in PyMuPDF text processing.
@@ -1050,7 +1050,7 @@ This patch version implements minor improvements for :ref:`Pixmap` and also some
* **Added** :meth:`Pixmap.warp` which makes a new pixmap from a given arbitrary convex quad inside the pixmap.
-* **Added** :attr:`Annot.irt_xref` and :meth:`Annot.set_irt_xref` to inquire or set the `/IRT` ("In Responde To") property of an annotation. Implements `#1450 `_.
+* **Added** :attr:`Annot.irt_xref` and :meth:`Annot.set_irt_xref` to inquire or set the `/IRT` ("In Response To") property of an annotation. Implements `#1450 `_.
* **Added** :meth:`Rect.torect` and :meth:`IRect.torect` which compute a matrix that transforms to a given other rectangle.
@@ -1114,7 +1114,7 @@ A new MuPDF feature is **journalling PDF updates**, which is also supported by t
A third feature (unrelated to the new MuPDF version) includes the ability to detect when page **objects cover or hide each other**. It is now e.g. possible to see that text is covered by a drawing or an image.
-* **Changed** terminology and meaning of important geometry concepts: Rectangles are now characterized as *finite*, *valid* or *empty*, while the definitions of these terms have also changed. Rectangles specifically are now thought of being "open": not all corners and sides are considered part of the retangle. Please do read the :ref:`Rect` section for details.
+* **Changed** terminology and meaning of important geometry concepts: Rectangles are now characterized as *finite*, *valid* or *empty*, while the definitions of these terms have also changed. Rectangles specifically are now thought of being "open": not all corners and sides are considered part of the rectangle. Please do read the :ref:`Rect` section for details.
* **Added** new parameter `"no_new_id"` to :meth:`Document.save` / :meth:`Document.tobytes` methods. Use it to suppress updating the second item of the document ``/ID`` which in PDF indicates that the original file has been updated. If the PDF has no ``/ID`` at all yet, then no new one will be created either.
@@ -1244,7 +1244,7 @@ Focus of this version are major performance improvements of selected functions.
* **Added** documentation for handling transparent image insertions, :meth:`Page.insert_image`.
* **Added** :meth:`Page.get_image_rects`, an improved version of :meth:`Page.get_image_bbox`.
* **Changed** :meth:`Document.delete_pages` to support various ways of specifying pages to delete. Implements `#1042 `_.
-* **Changed** :meth:`Page.insert_image` to also accept the xref of an existing image in the file. This allows "copying" images between pages, and extremely fast mutiple insertions.
+* **Changed** :meth:`Page.insert_image` to also accept the xref of an existing image in the file. This allows "copying" images between pages, and extremely fast multiple insertions.
* **Changed** :meth:`Page.insert_image` to also accept the integer parameter ``alpha``. To be used for performance improvements.
* **Changed** :meth:`Pixmap.set_alpha` to support new parameters for pre-multiplying colors with their alpha values and setting a specific color to fully transparent (e.g. white).
* **Changed** :meth:`Document.embfile_add` to automatically set creation and modification date-time. Correspondingly, :meth:`Document.embfile_upd` automatically maintains modification date-time (``/ModDate`` PDF key), and :meth:`Document.embfile_info` correspondingly reports these data. In addition, the embedded file's associated "collection item" is included via its :data:`xref`. This supports the development of PDF portfolio applications.
@@ -1282,7 +1282,7 @@ Focus of this version are major performance improvements of selected functions.
* **Fixed** issue `#895 `_.
* **Fixed** issue `#896 `_. Since v1.17.6 PyMuPDF suppresses the font subset tags and only reports the base fontname in text extraction outputs "dict" / "json" / "rawdict" / "rawjson". Now a new global parameter can request the old behaviour, :meth:`Tools.set_subset_fontnames`.
* **Fixed** issue `#885 `_. Pixmap creation now also works with filenames given as ``pathlib.Paths``.
-* **Changed** :meth:`Document.subset_fonts`: Text is **not rewritten** any more and should therefore **retain all its origial properties** -- like being hidden or being controlled by Optional Content mechanisms.
+* **Changed** :meth:`Document.subset_fonts`: Text is **not rewritten** any more and should therefore **retain all its original properties** -- like being hidden or being controlled by Optional Content mechanisms.
* **Changed** :ref:`TextWriter` output to also accept text in right to left mode (Arabian, Hebrew): :meth:`TextWriter.fill_textbox`, :meth:`TextWriter.append`. These methods now accept a new boolean parameter `right_to_left`, which is *False* by default. Implements `#897 `_.
* **Changed** :meth:`TextWriter.fill_textbox` to return all lines of text, that did not fit in the given rectangle. Also changed the default of the ``warn`` parameter to no longer print a warning message in overflow situations.
* **Added** a utility function :meth:`recover_quad`, which computes the quadrilateral of a span. This function can be used for correctly marking text extracted with the "dict" or "rawdict" options of :meth:`Page.get_text`.
@@ -1458,8 +1458,8 @@ This is the first PyMuPDF version supporting MuPDF v1.18. The focus here is on e
* **Fixed** issue `#651 `_. An upstream bug causing interpreter crashes in corner case redaction processings was fixed by backporting MuPDF changes from their development repo.
* **Fixed** issue `#645 `_. Pixmap top-left coordinates can be set (again) by their own method, :meth:`Pixmap.set_origin`.
* **Fixed** issue `#622 `_. :meth:`Page.insertImage` again accepts a :data:`rect_like` parameter.
-* **Added** severeal new methods to improve and speed-up table of contents (TOC) handling. Among other things, TOC items can now changed or deleted individually -- without always replacing the complete TOC. Furthermore, access to some PDF page attributes is now possible without first **loading** the page. This has a very significant impact on the performance of TOC manipulation.
-* **Added** an option to :meth:`Document.insert_pdf` which allows displaying progress messages. Adresses `#640 `_.
+* **Added** several new methods to improve and speed-up table of contents (TOC) handling. Among other things, TOC items can now changed or deleted individually -- without always replacing the complete TOC. Furthermore, access to some PDF page attributes is now possible without first **loading** the page. This has a very significant impact on the performance of TOC manipulation.
+* **Added** an option to :meth:`Document.insert_pdf` which allows displaying progress messages. Addresses `#640 `_.
* **Added** :meth:`Page.getTextbox` which extracts text contained in a rectangle. In many cases, this should obsolete writing your own script for this type of thing.
* **Added** new ``clip`` parameter to :meth:`Page.getText` to simplify and speed up text extraction of page sub areas.
* **Added** :meth:`TextWriter.appendv` to add text in **vertical write mode**. Addresses issue `#653 `_
@@ -1538,9 +1538,9 @@ This version is based on MuPDF v1.17. Following are highlights of new and change
* **Added** extended language support for annotations and widgets: a mixture of Latin, Greece, Russian, Chinese, Japanese and Korean characters can now be used in 'FreeText' annotations and text widgets. No special arrangement is required to use it.
-* Faster page access is implemented for documents supporting a "chapter" structure. This applies to EPUB documents currently. This comes with several new :ref:`Document` methods and changes for :meth:`Document.loadPage` and the "indexed" page access *doc[n]*: In addition to specifying a page number as before, a tuple *(chaper, pno)* can be specified to identify the desired page.
+* Faster page access is implemented for documents supporting a "chapter" structure. This applies to EPUB documents currently. This comes with several new :ref:`Document` methods and changes for :meth:`Document.loadPage` and the "indexed" page access *doc[n]*: In addition to specifying a page number as before, a tuple *(chapter, pno)* can be specified to identify the desired page.
-* **Changed:** Improved support of redaction annotations: images overlapped by redactions are **permanantly modified** by erasing the overlap areas. Also links are removed if overlapped by redactions. This is now fully in sync with PDF specifications.
+* **Changed:** Improved support of redaction annotations: images overlapped by redactions are **permanently modified** by erasing the overlap areas. Also links are removed if overlapped by redactions. This is now fully in sync with PDF specifications.
Other changes:
@@ -1564,7 +1564,7 @@ Potential code breaking changes:
This version introduces several new features around PDF text output. The motivation is to simplify this task, while at the same time offering extending features.
-One major achievement is using MuPDF's capabilities to dynamically choosing fallback fonts whenever a character cannot be found in the current one. This seemlessly works for Base-14 fonts in combination with CJK fonts (China, Japan, Korea). So a text may contain **any combination of characters** from the Latin, Greek, Russian, Chinese, Japanese and Korean languages.
+One major achievement is using MuPDF's capabilities to dynamically choosing fallback fonts whenever a character cannot be found in the current one. This seamlessly works for Base-14 fonts in combination with CJK fonts (China, Japan, Korea). So a text may contain **any combination of characters** from the Latin, Greek, Russian, Chinese, Japanese and Korean languages.
* **Fixed** issue `#493 `_. ``Pixmap(doc, xref)`` should now again correctly resemble the loaded image object.
* **Fixed** issue `#488 `_. Widget names are now modifiable.
@@ -1733,7 +1733,7 @@ Minor changes compared to version 1.16.2. The code of the "dict" and "rawdict" v
* **Changed** text extraction methods of :ref:`Page` to allow detail control of the amount of extracted data.
* **Added** :meth:`planish_line` which maps a given line (defined as a pair of points) to the x-axis.
-* **Fixed** an issue (w/o Github number) which brought down the interpreter when encountering certain non-UTF-8 encodable characters while using :meth:`Page.getText` with te "dict" option.
+* **Fixed** an issue (w/o Github number) which brought down the interpreter when encountering certain non-UTF-8 encodable characters while using :meth:`Page.getText` with the "dict" option.
* **Fixed** issue #362 ("Memory Leak with getText('rawDICT')").
------
@@ -1861,7 +1861,7 @@ List of change details:
**Changes in Version 1.14.10**
-* **Changed** :meth:`Page.show_pdf_page` to support rotation of the source rectangle. Fixes #261 ("Cannot rotate insterted pages").
+* **Changed** :meth:`Page.show_pdf_page` to support rotation of the source rectangle. Fixes #261 ("Cannot rotate inserted pages").
* **Fixed** a bug in :meth:`Page.insertImage` which prevented insertion of multiple images provided as streams.
@@ -1983,7 +1983,7 @@ This version contains some technical / performance improvements and bug fixes.
**Changes in Version 1.13.17**
* **Fixed** an error that intermittently caused an exception in :meth:`Page.show_pdf_page`, when pages from many different source PDFs were shown.
-* **Changed** method :meth:`Document.extractImage` to now return more meta information about the extracted imgage. Also, its performance has been greatly improved. Several demo scripts have been changed to make use of this method.
+* **Changed** method :meth:`Document.extractImage` to now return more meta information about the extracted image. Also, its performance has been greatly improved. Several demo scripts have been changed to make use of this method.
* **Changed** method :meth:`Document._getXrefStream` to now return *None* if the object is no stream and no longer raise an exception if otherwise.
* **Added** method :meth:`Document._deleteObject` which deletes a PDF object identified by its :data:`xref`. Only to be used by the experienced PDF expert.
* **Added** a method :meth:`paper_rect` which returns a :ref:`Rect` for a supplied paper format string. Example: *fitz.paper_rect("letter") = fitz.Rect(0.0, 0.0, 612.0, 792.0)*.
@@ -2044,9 +2044,9 @@ This patch version contains several improvements for embedded files and file att
**Changes in Version 1.13.11**
-While the preceeding patch subversions only contained various fixes, this version again introduces major new features:
+While the preceding patch subversions only contained various fixes, this version again introduces major new features:
-* **Added** basic support for PDF widget annotations. You can now add PDF form fields of types Text, CheckBox, ListBox and ComboBox. Where necessary, the PDF is tranformed to a Form PDF with the first added widget.
+* **Added** basic support for PDF widget annotations. You can now add PDF form fields of types Text, CheckBox, ListBox and ComboBox. Where necessary, the PDF is transformed to a Form PDF with the first added widget.
* **Fixed** issues #176 ("wrong file embedding"), #177 ("segment fault when invoking page.getText()")and #179 ("Segmentation fault using page.getLinks() on encrypted PDF").
@@ -2102,7 +2102,7 @@ The major enhancement is PDF form field support. Form fields are annotations of
**Changes in Version 1.13.1**
-* :meth:`TextPage.extractDICT` is a new method to extract the contents of a document page (text and images). All document types are supported as with the other :ref:`TextPage` *extract*()* methods. The returned object is a dictionary of nested lists and other dictionaries, and **exactly equal** to the JSON-deserialization of the old :meth:`TextPage.extractJSON`. The difference is that the result is created directly -- no JSON module is used. Because the user needs no JSON module to interpet the information, it should be easier to use, and also have a better performance, because it contains images in their original **binary format** -- they need not be base64-decoded.
+* :meth:`TextPage.extractDICT` is a new method to extract the contents of a document page (text and images). All document types are supported as with the other :ref:`TextPage` *extract*()* methods. The returned object is a dictionary of nested lists and other dictionaries, and **exactly equal** to the JSON-deserialization of the old :meth:`TextPage.extractJSON`. The difference is that the result is created directly -- no JSON module is used. Because the user needs no JSON module to interpret the information, it should be easier to use, and also have a better performance, because it contains images in their original **binary format** -- they need not be base64-decoded.
* :meth:`Page.getText` correspondingly supports the new parameter value *"dict"* to invoke the above method.
* :meth:`TextPage.extractJSON` (resp. *Page.getText("json")*) is still supported for convenience, but its use is expected to decline.
@@ -2274,15 +2274,15 @@ Though MuPDF has declared it as being mostly a bug fix version, one major new fe
MuPDF version 1.10 has a significant impact on our bindings. Some of the changes also affect the API -- in other words, **you** as a PyMuPDF user.
-* Link destination information has been reduced. Several properties of the *linkDest* class no longer contain valuable information. In fact, this class as a whole has been deleted from MuPDF's library and we in PyMuPDF only maintain it to provide compatibilty to existing code.
+* Link destination information has been reduced. Several properties of the *linkDest* class no longer contain valuable information. In fact, this class as a whole has been deleted from MuPDF's library and we in PyMuPDF only maintain it to provide compatibility to existing code.
* In an effort to minimize memory requirements, several improvements have been built into MuPDF v1.10:
- A new *config.h* file can be used to de-select unwanted features in the C base code. Using this feature we have been able to reduce the size of our binary *_fitz.o* / *_fitz.pyd* by about 50% (from 9 MB to 4.5 MB). When UPX-ing this, the size goes even further down to a very handy 2.3 MB.
- - The alpha (transparency) channel for pixmaps is now optional. Letting alpha default to *False* significantly reduces pixmap sizes (by 20% -- CMYK, 25% -- RGB, 50% -- GRAY). Many *Pixmap* constructors therefore now accept an *alpha* boolean to control inclusion of this channel. Other pixmap constructors (e.g. those for file and image input) create pixmaps with no alpha alltogether. On the downside, save methods for pixmaps no longer accept a *savealpha* option: this channel will always be saved when present. To minimize code breaks, we have left this parameter in the call patterns -- it will just be ignored.
+ - The alpha (transparency) channel for pixmaps is now optional. Letting alpha default to *False* significantly reduces pixmap sizes (by 20% -- CMYK, 25% -- RGB, 50% -- GRAY). Many *Pixmap* constructors therefore now accept an *alpha* boolean to control inclusion of this channel. Other pixmap constructors (e.g. those for file and image input) create pixmaps with no alpha altogether. On the downside, save methods for pixmaps no longer accept a *savealpha* option: this channel will always be saved when present. To minimize code breaks, we have left this parameter in the call patterns -- it will just be ignored.
-* *DisplayList* and *TextPage* class constructors now **require the mediabox** of the page they are referring to (i.e. the *page.bound()* rectangle). There is no way to construct this information from other sources, therefore a source code change cannot be avoided in these cases. We assume however, that not many users are actually employing these rather low level classes explixitely. So the impact of that change should be minor.
+* *DisplayList* and *TextPage* class constructors now **require the mediabox** of the page they are referring to (i.e. the *page.bound()* rectangle). There is no way to construct this information from other sources, therefore a source code change cannot be avoided in these cases. We assume however, that not many users are actually employing these rather low level classes explicitly. So the impact of that change should be minor.
**Other Changes compared to Version 1.9.3**
diff --git a/docs/_static/custom.css b/docs/_static/custom.css
index 9ba00cc3d..3aff79a98 100644
--- a/docs/_static/custom.css
+++ b/docs/_static/custom.css
@@ -172,7 +172,7 @@ button.cta a {
:target>h1:first-of-type, span:target~h1:first-of-type {
background-color: #007aff !important;
color: #fff !important;
- padding-top: 40px; /* accomodates header search blocking target */
+ padding-top: 40px; /* accommodates header search blocking target */
margin-top: -40px;
}
@@ -182,7 +182,7 @@ button.cta a {
span:target~h2:first-of-type, span:target~h3:first-of-type,
span:target~h4:first-of-type, span:target~h5:first-of-type, span:target~h6:first-of-type {
background-color: transparent !important;
- padding-top: 40px; /* accomodates header search blocking target */
+ padding-top: 40px; /* accommodates header search blocking target */
margin-top: -40px;
text-decoration: underline;
}
diff --git a/docs/annot.rst b/docs/annot.rst
index 578e02bed..ac8eea942 100644
--- a/docs/annot.rst
+++ b/docs/annot.rst
@@ -15,7 +15,7 @@ There is a parent-child relationship between an annotation and its page. If the
================================== ==============================================================
**Attribute** **Short Description**
================================== ==============================================================
-:meth:`Annot.delete_responses` delete all responding annotions
+:meth:`Annot.delete_responses` delete all responding annotations
:meth:`Annot.get_file` get attached file content
:meth:`Annot.get_oc` get :data:`xref` of an :data:`OCG` / :data:`OCMD`
:meth:`Annot.get_pixmap` image of the annotation as a pixmap
diff --git a/docs/footer.rst b/docs/footer.rst
index bbce184ed..9acacaf63 100644
--- a/docs/footer.rst
+++ b/docs/footer.rst
@@ -44,7 +44,7 @@
document.getElementById("footerDisclaimer").innerHTML = getHeaderAndFooterTranslation("This software is provided AS-IS with no warranty, either express or implied. This software is distributed under license and may not be copied, modified or distributed except as expressly authorized under the terms of that license. Refer to licensing information at artifex.com or contact Artifex Software Inc., 39 Mesa Street, Suite 108A, San Francisco CA 94129, United States for further information.");
- // more tranlsation for admonition-title as the in-built translation isn't great, needs: 注釈 -> 注
+ // more translation for admonition-title as the in-built translation isn't great, needs: 注釈 -> 注
if (docLanguage == "ja") {
const collection = document.getElementsByClassName("admonition-title");
for (var i=0;i`_ for Windows only, but reported to also run under `Wine `_) a similar result can be achieved, see `here `_. However, you must invoke it as a separate process via *subprocess.Popen*, using stdin and stdout as communication vehicles.
+With the command line utility *pdftk* (`available `_ for Windows only, but reported to also run under `Wine `_) a similar result can be achieved,
+see `here `_.
+However, you must invoke it as a separate process via *subprocess.Popen*, using stdin and stdout as communication vehicles.
diff --git a/docs/recipes-optional-content.rst b/docs/recipes-optional-content.rst
index 17b5fae25..6e201e642 100644
--- a/docs/recipes-optional-content.rst
+++ b/docs/recipes-optional-content.rst
@@ -42,7 +42,7 @@ If you want to put an **existing** image under the control of an OCG, you must f
To **remove** an OCG from an image, do `doc.set_oc(img_xref, 0)`.
-One single OCG can be assigned to mutiple PDF objects to control their visibility.
+One single OCG can be assigned to multiple PDF objects to control their visibility.
How to Define Complex Optional Content Conditions
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/docs/recipes-stories.rst b/docs/recipes-stories.rst
index e0f4ffa49..d633b9281 100644
--- a/docs/recipes-stories.rst
+++ b/docs/recipes-stories.rst
@@ -335,7 +335,7 @@ Outputting HTML tables is supported as follows:
* Column widths are computed automatically based on column content. They cannot be directly set.
* Table **cells may contain images** which will be considered in the column width calculation magic.
* Row heights are computed automatically based on row content - leading to multi-line rows where needed.
-* The potentially multiple lines of a table row will always be kept together on one page (respectively "where" rectangle) and not be splitted.
+* The potentially multiple lines of a table row will always be kept together on one page (respectively "where" rectangle) and not be split.
* Table header rows are only **shown on the first page / "where" rectangle.**
* The "style" attribute is ignored when given directly in HTML table elements. Styling for a table and its elements must happen separately, in CSS source or within the :htmlTag:`style` tag.
* Styling for :htmlTag:`tr` elements is not supported and ignored. Therefore, a table-wide grid or alternating row background colors are not supported. One of the following example scripts however shows an easy way to deal with this limitation.
diff --git a/docs/recipes-text.rst b/docs/recipes-text.rst
index ccc4cb8ad..46d67f34f 100644
--- a/docs/recipes-text.rst
+++ b/docs/recipes-text.rst
@@ -612,7 +612,7 @@ As these four font files are located in the system's folder `C:/Windows/Fonts` t
# These statements define which font file to use for regular, bold,
# italic and bold-italic text.
- # We assign an arbitary common font-family for all 4 font files.
+ # We assign an arbitrary common font-family for all 4 font files.
# The Story algorithm will select the right file as required.
# We request to use "comic" throughout the text.
css = """
diff --git a/docs/the-basics.rst b/docs/the-basics.rst
index 1699960f8..731afb852 100644
--- a/docs/the-basics.rst
+++ b/docs/the-basics.rst
@@ -1001,7 +1001,7 @@ Annotations (:ref:`Annot`) on pages can be retrieved with the `page.annots()` me
Redacting content from a **PDF**
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Redactions are special types of annotations which can be marked onto a document page to denote an area on the page which should be securely removed. After marking an area with a rectangle then this area will be marked for *redaction*, once the redaction is *applied* then the content is securly removed.
+Redactions are special types of annotations which can be marked onto a document page to denote an area on the page which should be securely removed. After marking an area with a rectangle then this area will be marked for *redaction*, once the redaction is *applied* then the content is securely removed.
For example if we wanted to redact all instances of the name "Jane Doe" from a document we could do the following:
diff --git a/docs/vars.rst b/docs/vars.rst
index 5d5868f0a..1a60e1d09 100644
--- a/docs/vars.rst
+++ b/docs/vars.rst
@@ -274,7 +274,7 @@ Possible values of :attr:`linkDest.kind` (link destination kind).
3 -- Launch (open) another file (of any "executable" type).
- * |PyMuPDF| treats any external link that starts wth `file:` or doesn't
+ * |PyMuPDF| treats any external link that starts with `file:` or doesn't
contain a colon, as `LINK_LAUNCH`.
:rtype: int
diff --git a/pipcl.py b/pipcl.py
index 0434e8af9..51f25f845 100644
--- a/pipcl.py
+++ b/pipcl.py
@@ -1471,7 +1471,7 @@ def build_extension(
debug2 = ''
if debug:
debug2 = '/Zi' # Generate .pdb.
- # debug2 = '/Z7' # Embded debug info in .obj files.
+ # debug2 = '/Z7' # Embed debug info in .obj files.
# As of 2023-08-23, it looks like VS tools create slightly
# .dll's each time, even with identical inputs.
@@ -1786,7 +1786,7 @@ def git_items( directory, submodules=False):
ret = []
for path in text.decode('utf8').strip().split( '\n'):
path2 = os.path.join(directory, path)
- # Sometimes git ls-files seems to list empty/non-existant directories
+ # Sometimes git ls-files seems to list empty/non-existent directories
# within submodules.
#
if not os.path.exists(path2):
diff --git a/scripts/gh_release.py b/scripts/gh_release.py
index c72e5cc40..ef0660603 100755
--- a/scripts/gh_release.py
+++ b/scripts/gh_release.py
@@ -697,7 +697,7 @@ def platform_tag():
assert 0, f'Unrecognised: {platform.system()=}'
-test_packages = 'pytest fontTools pymupdf-fonts flake8 pylint'
+test_packages = 'pytest fontTools pymupdf-fonts flake8 pylint codespell'
if platform.system() == 'Windows' and cpu_bits() == 32:
# No pillow wheel available, and doesn't build easily.
pass
diff --git a/scripts/test.py b/scripts/test.py
index ed74c676c..62e0e291e 100755
--- a/scripts/test.py
+++ b/scripts/test.py
@@ -58,7 +58,7 @@
Location of local mupdf/ directory or 'git:...' to be used
when building PyMuPDF. [This sets environment variable
PYMUPDF_SETUP_MUPDF_BUILD, which is used by PyMuPDF/setup.py. If not
- specifed PyMuPDF will download its default mupdf .tgz.]
+ specified PyMuPDF will download its default mupdf .tgz.]
-p
Set pytest options; default is ''.
-t
@@ -422,7 +422,7 @@ def test(
elif gdb:
command = f'{python} {pymupdf_dir_rel}/tests/run_compound.py{run_compound_args} gdb --args {python} -m pytest {pytest_options} {pytest_arg}'
elif platform.system() == 'Windows':
- # `python -m pytest` doesn' seem to work.
+ # `python -m pytest` doesn't seem to work.
command = f'{python} {pymupdf_dir_rel}/tests/run_compound.py{run_compound_args} pytest {pytest_options} {pytest_arg}'
else:
# On OpenBSD `pip install pytest` doesn't seem to install the pytest
diff --git a/setup.py b/setup.py
index e2b426015..367b491f5 100755
--- a/setup.py
+++ b/setup.py
@@ -373,8 +373,8 @@ def get_mupdf_internal(out, location=None, sha=None, local_tgz=None):
location:
First, if None we set to hard-coded default URL or git location.
If starts with 'git:', should be remote git location.
- Otherwise if containg '://' should be URL for .tgz.
- Otherwise shuld path of local mupdf checkout.
+ Otherwise if containing '://' should be URL for .tgz.
+ Otherwise should be path of local mupdf checkout.
sha:
If not None and we use git clone, we checkout this sha.
local_tgz:
@@ -668,7 +668,7 @@ def env_add(env, name, value, sep=' ', prepend=False, verbose=False):
'''
Appends/prepends `` to `env[name]`.
- If `name` is not in `env`, we use os.environ[nane] if it exists.
+ If `name` is not in `env`, we use os.environ[name] if it exists.
'''
v = env.get(name)
if verbose:
diff --git a/src/__init__.py b/src/__init__.py
index 3f307a7db..b3a8f5f1c 100644
--- a/src/__init__.py
+++ b/src/__init__.py
@@ -2369,7 +2369,7 @@ def set_properties(
):
"""Set any or all properties of a node.
- To be used for existing nodes preferrably.
+ To be used for existing nodes preferably.
"""
root = self.root
temp = root.add_division()
@@ -2659,7 +2659,7 @@ def __init__(self, filename=None, stream=None, filetype=None, rect=None, width=0
on open (e.g. EPUB). Ignored if n/a.
"""
# We temporarily set JM_mupdf_show_errors=0 while we are constructing,
- # then restore its orginal value in a `finally:` block.
+ # then restore its original value in a `finally:` block.
#
global JM_mupdf_show_errors
JM_mupdf_show_errors_old = JM_mupdf_show_errors
@@ -4393,7 +4393,7 @@ def insert_file(self,
Insert an arbitrary supported document to an existing PDF.
The infile may be given as a filename, a Document or a Pixmap.
- Other paramters - where applicable - equal those of insert_pdf().
+ Other parameters - where applicable - equal those of insert_pdf().
'''
src = None
if isinstance(infile, Pixmap):
@@ -6909,7 +6909,7 @@ def __init__(self):
self.text_fontsize = 0
self.text_maxlen = 0 # text fields only
self.text_format = 0 # text fields only
- self._text_da = "" # /DA = default apparance
+ self._text_da = "" # /DA = default appearance
self.script = None # JavaScript (/A)
self.script_stroke = None # JavaScript (/AA/K)
@@ -7949,7 +7949,7 @@ def _insert_image(self,
#log( 'do_have_imask')
# mupdf.FzCompressedBuffer is not copyable, so
# mupdf.fz_compressed_image_buffer() does not work - it cannot
- # return by value. And sharing a fz_compressed_buffer betwen two
+ # return by value. And sharing a fz_compressed_buffer between two
# `fz_image`'s doesn't work, so we use a raw fz_compressed_buffer
# here, not a mupdf.FzCompressedBuffer.
#
@@ -7981,7 +7981,7 @@ def _insert_image(self,
# `fz_compressed_buffer`, which is not reference counted, and they
# both think that they own it.
#
- # So we do what the classic implementataion does, and simply ensure
+ # So we do what the classic implementation does, and simply ensure
# that `fz_drop_image(image)` is never called. This will leak
# some of `image`'s allocations (for example the main `fz_image`
# allocation), but it's not trivial to avoid this.
@@ -10468,7 +10468,7 @@ def size(self):
"""Pixmap size."""
if mupdf_version_tuple >= (1, 23, 8):
return mupdf.fz_pixmap_size( self.this)
- # fz_pixmap_size() is not publically visible, so we implement it
+ # fz_pixmap_size() is not publicly visible, so we implement it
# ourselves. fixme: we don't add on sizeof(fz_pixmap).
pm = self.this
return pm.n() * pm.w() * pm.h()
@@ -10859,7 +10859,7 @@ def is_convex(self):
if p1.y * p2.y > 0:
return False
m = planish_line(self.ll, self.ur) # puts other diagonal on x-axis
- p1 = self.lr * m # tranform the
+ p1 = self.lr * m # transform the
p2 = self.ul * m # remaining points
if p1.y * p2.y > 0:
return False
@@ -11294,8 +11294,8 @@ def draw_bezier(
def draw_circle(self, center: point_like, radius: float):# -> Point:
"""Draw a circle given its center and radius."""
- if not radius > EPSILON:
- raise ValueError("radius must be postive")
+ if not radius > EPSILON:s
+ raise ValueError("radius must be positive")
center = Point(center)
p1 = center - (radius, 0)
return self.draw_sector(center, p1, 360, fullSector=False)
@@ -11432,7 +11432,7 @@ def l5(a, b):
list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm)
))
- betar -= w90 # reduce parm angle by 90 deg
+ betar -= w90
alfa += w90 # advance start angle by 90 deg
P = Q # advance to arc end point
# draw (remaining) arc
@@ -11884,7 +11884,7 @@ def add_pdf_links(document_or_stream, positions):
`document_or_stream` if a `Document` instance, otherwise a
new `Document` instance.
We raise an exception if an `href` in `positions` refers to an
- internal position `#` but no item in `postions` has `id =
+ internal position `#` but no item in `positions` has `id =
name`.
"""
if isinstance(document_or_stream, Document):
@@ -12216,7 +12216,7 @@ def ret():
def update(parameter):
'''
Evaluates `more, _ = self.place(fn(parameter))`. If `more` is
- false, then `rect` is big enought to contain `self` and we
+ false, then `rect` is big enough to contain `self` and we
set `state.pmax=parameter` and return True. Otherwise we set
`state.pmin=parameter` and return False.
'''
@@ -12402,7 +12402,7 @@ def _extractText(self, format_):
# fixme: mupdfwrap.py thinks fz_output is not copyable, possibly
# because there is no .refs member visible and no fz_keep_output() fn,
# although there is an fz_drop_output(). So mupdf.fz_new_output_with_buffer()
- # doesn't convert the returnd fz_output* into a mupdf.FzOutput.
+ # doesn't convert the returned fz_output* into a mupdf.FzOutput.
#out = mupdf.FzOutput(out)
if format_ == 1:
mupdf.fz_print_stext_page_as_html(out, this_tpage, 0)
@@ -17002,7 +17002,7 @@ def JM_quad_from_py(r):
def JM_read_contents(pageref):
'''
- Read and concatenate a PDF page's /Conents object(s) in a buffer
+ Read and concatenate a PDF page's /Contents object(s) in a buffer
'''
assert isinstance(pageref, mupdf.PdfObj), f'{type(pageref)}'
contents = mupdf.pdf_dict_get(pageref, mupdf.PDF_ENUM_NAME_Contents)
@@ -17786,9 +17786,9 @@ def CheckMorph(o: typing.Any) -> bool:
if not (type(o) in (list, tuple) and len(o) == 2):
raise ValueError("morph must be a sequence of length 2")
if not (len(o[0]) == 2 and len(o[1]) == 6):
- raise ValueError("invalid morph parm 0")
+ raise ValueError("invalid morph param 0")
if not o[1][4] == o[1][5] == 0:
- raise ValueError("invalid morph parm 1")
+ raise ValueError("invalid morph param 1")
return True
@@ -19147,7 +19147,7 @@ def write(self, ctx, data_raw, data_length):
def compute_scissor(dev):
'''
- Every scissor of a clip is a sub rectangle of the preceeding clip scissor
+ Every scissor of a clip is a sub rectangle of the preceding clip scissor
if the clip level is larger.
'''
if dev.scissors is None:
@@ -19892,7 +19892,7 @@ def annot_preprocess(page: "Page") -> int:
def annot_postprocess(page: "Page", annot: "Annot") -> None:
- """Clean up after annotation inertion.
+ """Clean up after annotation insertion.
Set ownership flag and store annotation in page annotation dictionary.
"""
@@ -20341,10 +20341,10 @@ def util_invert_matrix(matrix):
or abs( matrix.c - 0) >= sys.float_info.epsilon
or abs( matrix.d - 1) >= sys.float_info.epsilon
):
- # Invertion not possible.
+ # Inversion not possible.
return 1, ()
return 0, (ret.a, ret.b, ret.c, ret.d, ret.e, ret.f)
- # Do invertion in python.
+ # Do inversion in python.
src = JM_matrix_from_py(matrix)
a = src.a
det = a * src.d - src.b * src.c
@@ -20811,7 +20811,7 @@ def sRGB_to_pdf(srgb: int) -> tuple:
Args:
srgb: (int) RRGGBB (red, green, blue), each color in range(255).
Returns:
- Tuple (red, green, blue) each item in intervall 0 <= item <= 1.
+ Tuple (red, green, blue) each item in interval 0 <= item <= 1.
"""
t = sRGB_to_rgb(srgb)
return t[0] / 255.0, t[1] / 255.0, t[2] / 255.0
@@ -20825,7 +20825,7 @@ def sRGB_to_rgb(srgb: int) -> tuple:
Args:
srgb: (int) RRGGBB (red, green, blue), each color in range(255).
Returns:
- Tuple (red, green, blue) each item in intervall 0 <= item <= 255.
+ Tuple (red, green, blue) each item in interval 0 <= item <= 255.
"""
r = srgb >> 16
g = (srgb - (r << 16)) >> 8
diff --git a/src/extra.i b/src/extra.i
index 0b8c0ea53..6748fa2e0 100644
--- a/src/extra.i
+++ b/src/extra.i
@@ -732,7 +732,7 @@ static int DICT_SETITEMSTR_DROP(PyObject *dict, const char *key, PyObject *value
//-----------------------------------------------------------------------------
-// Functions converting betwenn PySequences and pymupdf geometry objects
+// Functions converting between PySequences and pymupdf geometry objects
//-----------------------------------------------------------------------------
static int
jm_init_item(PyObject* obj, Py_ssize_t idx, int* result)
@@ -2300,7 +2300,7 @@ void JM_print_stext_page_as_text(mupdf::FzBuffer& res, mupdf::FzStextPage& page)
#define CLIP_PATH 3
#define CLIP_STROKE_PATH 4
-// Every scissor of a clip is a sub rectangle of the preceeding clip
+// Every scissor of a clip is a sub rectangle of the preceding clip
// scissor if the clip level is larger.
static fz_rect compute_scissor(jm_lineart_device *dev)
{
diff --git a/src/utils.py b/src/utils.py
index b4fdd43ea..dd647b066 100644
--- a/src/utils.py
+++ b/src/utils.py
@@ -582,7 +582,7 @@ def get_textpage_ocr(
Args:
flags: (int) control content becoming part of the result.
- language: (str) specify expected language(s). Deafault is "eng" (English).
+ language: (str) specify expected language(s). Default is "eng" (English).
dpi: (int) resolution in dpi, default 72.
full: (bool) whether to OCR the full page image, or only its images (default)
"""
@@ -1106,7 +1106,7 @@ def set_toc_item(
(str) the new title. No change if None.
to:
(point-like) destination on the target page. If omitted, (72, 36)
- will be used as taget coordinates.
+ will be used as target coordinates.
filename:
(str) destination filename, required for pymupdf.LINK_GOTOR and
pymupdf.LINK_LAUNCH.
@@ -1373,7 +1373,7 @@ def set_toc(
# ------------------------------------------------------------------------------
olitems = [{"count": 0, "first": -1, "last": -1, "xref": xref[0]}]
# ------------------------------------------------------------------------------
- # build olitems as a list of PDF-like connnected dictionaries
+ # build olitems as a list of PDF-like connected dictionaries
# ------------------------------------------------------------------------------
for i in range(toclen):
o = toc[i]
@@ -3428,7 +3428,7 @@ def draw_sector(
list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm)
))
- betar -= w90 # reduce parm angle by 90 deg
+ betar -= w90
alfa += w90 # advance start angle by 90 deg
P = Q # advance to arc end point
# draw (remaining) arc
@@ -5106,7 +5106,7 @@ def set_page_labels(doc, labels):
# William Chapman, 2021-01-06
def create_label_str(label):
- """Convert Python label dict to correspnding PDF rule string.
+ """Convert Python label dict to corresponding PDF rule string.
Args:
label: (dict) build rule for the label.
@@ -5194,7 +5194,7 @@ def recover_bbox_quad(line_dir: tuple, span: dict, bbox: tuple) -> pymupdf.Quad:
d = span["ascender"] - span["descender"]
height = d * span["size"] # the quad's rectangle height
- # The following are distances from the bbox corners, at wich we find the
+ # The following are distances from the bbox corners, at which we find the
# respective quad points. The computation depends on in which quadrant
# the text writing angle is located.
hs = height * sin
diff --git a/tests/test_codespell.py b/tests/test_codespell.py
new file mode 100644
index 000000000..c38103d00
--- /dev/null
+++ b/tests/test_codespell.py
@@ -0,0 +1,24 @@
+import pymupdf
+
+import os
+import subprocess
+
+
+def test_codespell():
+ '''
+ Check rebased Python code with flake8.
+ '''
+ if not hasattr(pymupdf, 'mupdf'):
+ print('Not running codespell with classic implementation.')
+ return
+ root = os.path.abspath(f'{__file__}/../..')
+ def run(command):
+ print(f'test_codespell(): Running: {command}')
+ prev_workdir = os.getcwd()
+ os.chdir(root)
+ subprocess.run(command, shell=1, check=1)
+ os.chdir(prev_workdir)
+ # careful: I don't think paths like `docs/locales` works
+ skips = "*.pdf,src_classic,locales,prism.js,tests"
+ run(f'codespell -x .codespell-ignorelines -I .codespell-ignorewords --skip {skips}')
+ print('test_codespell(): codespell succeeded.')
diff --git a/tests/test_drawings.py b/tests/test_drawings.py
index 92e52c7e3..459747a6f 100644
--- a/tests/test_drawings.py
+++ b/tests/test_drawings.py
@@ -180,7 +180,7 @@ def test_3207():
"""Example graphics with multiple "close path" commands within same path.
The fix translates a close-path commands into an additional line
- which connects the current point with a preceeding "move" target.
+ which connects the current point with a preceding "move" target.
The example page has 2 paths which each contain 2 close-path
commands after 2 normal "line" commands, i.e. 2 command sequences
"move-to, line-to, line-to, close-path".
diff --git a/tests/test_general.py b/tests/test_general.py
index 3eac3727d..385bb796c 100644
--- a/tests/test_general.py
+++ b/tests/test_general.py
@@ -809,7 +809,7 @@ def test_2957_2():
page.apply_redactions() # remove/redact the word "longer"
words1 = page.get_text("words") # extract words again
assert len(words1) == len(words0) - 1 # must be one word less
- assert words0[3][4] == "longer" # just confirm test file is correc one
+ assert words0[3][4] == "longer" # just confirm test file is correct one
del words0[3] # remove the redacted word from first list
for i in range(len(words1)): # compare words
w1 = words1[i] # word after redaction
diff --git a/tests/test_pagedelete.py b/tests/test_pagedelete.py
index cde6812b3..73593fa29 100644
--- a/tests/test_pagedelete.py
+++ b/tests/test_pagedelete.py
@@ -83,7 +83,7 @@ def test_3094():
def test_3150():
"""Assert correct functioning for problem file.
- Implicitely also check use of new MuPDF function
+ Implicitly also check use of new MuPDF function
pdf_rearrange_pages() since version 1.23.9.
"""
filename = os.path.join(scriptdir, "resources", "test-3150.pdf")
diff --git a/tests/test_remove-rotation.py b/tests/test_remove-rotation.py
index a25aa5a28..423c88113 100644
--- a/tests/test_remove-rotation.py
+++ b/tests/test_remove-rotation.py
@@ -10,7 +10,7 @@ def test_remove_rotation():
filename = os.path.join(scriptdir, "resources", "test-2812.pdf")
doc = pymupdf.open(filename)
- # We always create fresh pages to avoid false positves from cache content.
+ # We always create fresh pages to avoid false positives from cache content.
# Text on these pages consists of pairwise different strings, sorting by
# these strings must therefore yield identical bounding boxes.
for i in range(1, doc.page_count):
diff --git a/tests/test_showpdfpage.py b/tests/test_showpdfpage.py
index fbfdbcb04..2e6b27a18 100644
--- a/tests/test_showpdfpage.py
+++ b/tests/test_showpdfpage.py
@@ -3,7 +3,7 @@
* Convert some image to a PDF
* Insert it rotated in some rectangle of a PDF page
* Assert PDF Form XObject has been created
- * Assert that image contained in inserted PDF is inside given retangle
+ * Assert that image contained in inserted PDF is inside given rectangle
"""
import os
diff --git a/tests/test_story.py b/tests/test_story.py
index 824835658..adcb9d8c7 100644
--- a/tests/test_story.py
+++ b/tests/test_story.py
@@ -19,8 +19,8 @@ def test_story():
WHERE = MEDIABOX + (36, 36, -36, -36)
# the font files are located in /home/chinese
arch = pymupdf.Archive(".")
- # if not specfied user_css, the output pdf has content
- story = pymupdf.Story(HTML, user_css=CSS, archive=arch)
+ # if not specified user_css, the output pdf has content
+ story = pymupdf.Story(HTML, user_css=CSS, archive=arch)
writer = pymupdf.DocumentWriter("output.pdf")