diff --git a/.github/workflows/cla.yml b/.github/workflows/cla.yml index 7620b285a..91a337f53 100644 --- a/.github/workflows/cla.yml +++ b/.github/workflows/cla.yml @@ -24,7 +24,7 @@ jobs: branch: 'CLA' allowlist: - # the followings are the optional inputs - If the optional inputs are not given, then default values will be taken + # the following are the optional inputs - If the optional inputs are not given, then default values will be taken #remote-organization-name: enter the remote organization name where the signatures should be stored (Default is storing the signatures in the same repository) #remote-repository-name: enter the remote repository name where the signatures should be stored (Default is storing the signatures in the same repository) #create-file-commit-message: 'For example: Creating file for storing CLA Signatures' diff --git a/docs/_static/custom.css b/docs/_static/custom.css index 9ba00cc3d..3aff79a98 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -172,7 +172,7 @@ button.cta a { :target>h1:first-of-type, span:target~h1:first-of-type { background-color: #007aff !important; color: #fff !important; - padding-top: 40px; /* accomodates header search blocking target */ + padding-top: 40px; /* accommodates header search blocking target */ margin-top: -40px; } @@ -182,7 +182,7 @@ button.cta a { span:target~h2:first-of-type, span:target~h3:first-of-type, span:target~h4:first-of-type, span:target~h5:first-of-type, span:target~h6:first-of-type { background-color: transparent !important; - padding-top: 40px; /* accomodates header search blocking target */ + padding-top: 40px; /* accommodates header search blocking target */ margin-top: -40px; text-decoration: underline; } diff --git a/docs/_static/pymupdf-console.html b/docs/_static/pymupdf-console.html index 4decadd0f..e2502aae6 100644 --- a/docs/_static/pymupdf-console.html +++ b/docs/_static/pymupdf-console.html @@ -266,15 +266,15 @@ window.term.exec("data = await r.bytes()"); window.term.exec("doc = fitz.Document(stream=data)"); - function pressEnter() { + function pressEnter() { // codespell:ignore window.term.invoke_key("ENTER"); } - setTimeout(pressEnter,1000); + setTimeout(pressEnter,1000); // codespell:ignore } - \ No newline at end of file + diff --git a/docs/annot.rst b/docs/annot.rst index 578e02bed..ac8eea942 100644 --- a/docs/annot.rst +++ b/docs/annot.rst @@ -15,7 +15,7 @@ There is a parent-child relationship between an annotation and its page. If the ================================== ============================================================== **Attribute** **Short Description** ================================== ============================================================== -:meth:`Annot.delete_responses` delete all responding annotions +:meth:`Annot.delete_responses` delete all responding annotations :meth:`Annot.get_file` get attached file content :meth:`Annot.get_oc` get :data:`xref` of an :data:`OCG` / :data:`OCMD` :meth:`Annot.get_pixmap` image of the annotation as a pixmap diff --git a/docs/footer.rst b/docs/footer.rst index bbce184ed..9acacaf63 100644 --- a/docs/footer.rst +++ b/docs/footer.rst @@ -44,7 +44,7 @@ document.getElementById("footerDisclaimer").innerHTML = getHeaderAndFooterTranslation("This software is provided AS-IS with no warranty, either express or implied. This software is distributed under license and may not be copied, modified or distributed except as expressly authorized under the terms of that license. Refer to licensing information at artifex.com or contact Artifex Software Inc., 39 Mesa Street, Suite 108A, San Francisco CA 94129, United States for further information."); - // more tranlsation for admonition-title as the in-built translation isn't great, needs: 注釈 -> 注 + // more translation for admonition-title as the in-built translation isn't great, needs: 注釈 -> 注 if (docLanguage == "ja") { const collection = document.getElementsByClassName("admonition-title"); for (var i=0;i Set pytest options; default is ''. -t @@ -363,7 +363,7 @@ def build( env_extra['PYMUPDF_SETUP_MUPDF_REBUILD'] = '0' if build_type: env_extra['PYMUPDF_SETUP_MUPDF_BUILD_TYPE'] = build_type - gh_release.run(f'pip install{build_isolation_text} -vv {pymupdf_dir}', env_extra=env_extra) + gh_release.run(f'pip install{build_isolation_text} -v {pymupdf_dir}', env_extra=env_extra) def test( @@ -440,7 +440,7 @@ def test( elif gdb: command = f'{python} {pymupdf_dir_rel}/tests/run_compound.py{run_compound_args} gdb --args {python} -m pytest {pytest_options} {pytest_arg}' elif platform.system() == 'Windows': - # `python -m pytest` doesn' seem to work. + # `python -m pytest` doesn't seem to work. command = f'{python} {pymupdf_dir_rel}/tests/run_compound.py{run_compound_args} pytest {pytest_options} {pytest_arg}' else: # On OpenBSD `pip install pytest` doesn't seem to install the pytest diff --git a/setup.py b/setup.py index dacf53733..6330e4348 100755 --- a/setup.py +++ b/setup.py @@ -367,8 +367,8 @@ def get_mupdf_internal(out, location=None, sha=None, local_tgz=None): location: First, if None we set to hard-coded default URL or git location. If starts with 'git:', should be remote git location. - Otherwise if containg '://' should be URL for .tgz. - Otherwise shuld path of local mupdf checkout. + Otherwise if containing '://' should be URL for .tgz. + Otherwise should path of local mupdf checkout. sha: If not None and we use git clone, we checkout this sha. local_tgz: @@ -658,7 +658,7 @@ def env_add(env, name, value, sep=' ', prepend=False, verbose=False): ''' Appends/prepends `` to `env[name]`. - If `name` is not in `env`, we use os.environ[nane] if it exists. + If `name` is not in `env`, we use os.environ[name] if it exists. ''' v = env.get(name) if verbose: @@ -822,7 +822,7 @@ def get_mupdf_version(mupdf_dir): v0 = re.search('#define FZ_VERSION_MAJOR ([0-9]+)', text) v1 = re.search('#define FZ_VERSION_MINOR ([0-9]+)', text) v2 = re.search('#define FZ_VERSION_PATCH ([0-9]+)', text) - assert v0 and v1 and v2, f'Cannot find MuPDF version numers in {path=}.' + assert v0 and v1 and v2, f'Cannot find MuPDF version numbers in {path=}.' v0 = int(v0.group(1)) v1 = int(v1.group(1)) v2 = int(v2.group(1)) @@ -1245,7 +1245,7 @@ def platform_release_tuple(): print(f'MacOS/arm64: forcing use of libclang 16.0.6 because 18.1.1 known to fail with `clang.cindex.TranslationUnitLoadError: Error parsing translation unit.`') ret.append('libclang==16.0.6') elif darwin and platform_release_tuple() < (18,): - # There are still of ptoblems when building on old macos. + # There are still of problems when building on old macos. ret.append('libclang==14.0.6') else: ret.append('libclang') diff --git a/src/__init__.py b/src/__init__.py index 5810df098..c51dd2560 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -2404,7 +2404,7 @@ def set_properties( ): """Set any or all properties of a node. - To be used for existing nodes preferrably. + To be used for existing nodes preferably. """ root = self.root temp = root.add_division() @@ -2694,7 +2694,7 @@ def __init__(self, filename=None, stream=None, filetype=None, rect=None, width=0 on open (e.g. EPUB). Ignored if n/a. """ # We temporarily set JM_mupdf_show_errors=0 while we are constructing, - # then restore its orginal value in a `finally:` block. + # then restore its original value in a `finally:` block. # global JM_mupdf_show_errors JM_mupdf_show_errors_old = JM_mupdf_show_errors @@ -4412,8 +4412,8 @@ def insert_file(self, ''' Insert an arbitrary supported document to an existing PDF. - The infile may be given as a filename, a Document or a Pixmap. - Other paramters - where applicable - equal those of insert_pdf(). + The infile may be given as a filename, a Document or a Pixmap. Other + parameters - where applicable - equal those of insert_pdf(). ''' src = None if isinstance(infile, Pixmap): @@ -6897,7 +6897,7 @@ def __init__(self): self.text_fontsize = 0 self.text_maxlen = 0 # text fields only self.text_format = 0 # text fields only - self._text_da = "" # /DA = default apparance + self._text_da = "" # /DA = default appearance self.script = None # JavaScript (/A) self.script_stroke = None # JavaScript (/AA/K) @@ -6905,7 +6905,7 @@ def __init__(self): self.script_change = None # JavaScript (/AA/V) self.script_calc = None # JavaScript (/AA/C) self.script_blur = None # JavaScript (/AA/Bl) - self.script_focus = None # JavaScript (/AA/Fo) + self.script_focus = None # JavaScript (/AA/Fo) codespell:ignore self.rect = None # annot value self.xref = 0 # annot value @@ -7494,7 +7494,6 @@ def _add_ink_annot(self, list): def _add_line_annot(self, p1, p2): page = self._pdf_page() - ASSERT_PDF(page) annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_LINE) a = JM_point_from_py(p1) b = JM_point_from_py(p2) @@ -7580,7 +7579,6 @@ def _add_stamp_annot(self, rect, stamp=0): ] n = len(stamp_id) name = stamp_id[0] - ASSERT_PDF(page) r = JM_rect_from_py(rect) if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r): raise ValueError( MSG_BAD_RECT) @@ -7605,7 +7603,6 @@ def _add_stamp_annot(self, rect, stamp=0): def _add_text_annot(self, point, text, icon=None): page = self._pdf_page() p = JM_point_from_py( point) - ASSERT_PDF(page) annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_TEXT) r = mupdf.pdf_annot_rect(annot) r = mupdf.fz_make_rect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0) @@ -7681,7 +7678,6 @@ def _apply_redactions(self, text, images, graphics): opts.text = text # how to treat text opts.image_method = images # how to treat images opts.line_art = graphics # how to treat vector graphics - ASSERT_PDF(page) success = mupdf.pdf_redact_page(page.doc(), page, opts) return success @@ -7749,7 +7745,6 @@ def _get_resource_properties(self): page list Resource/Properties ''' page = self._pdf_page() - ASSERT_PDF(page) rc = JM_get_resource_properties(page.obj()) return rc @@ -7924,7 +7919,7 @@ def _insert_image(self, #log( 'do_have_imask') # mupdf.FzCompressedBuffer is not copyable, so # mupdf.fz_compressed_image_buffer() does not work - it cannot - # return by value. And sharing a fz_compressed_buffer betwen two + # return by value. And sharing a fz_compressed_buffer between two # `fz_image`'s doesn't work, so we use a raw fz_compressed_buffer # here, not a mupdf.FzCompressedBuffer. # @@ -7956,10 +7951,10 @@ def _insert_image(self, # `fz_compressed_buffer`, which is not reference counted, and they # both think that they own it. # - # So we do what the classic implementataion does, and simply ensure - # that `fz_drop_image(image)` is never called. This will leak - # some of `image`'s allocations (for example the main `fz_image` - # allocation), but it's not trivial to avoid this. + # So we do what the classic implementation does, and simply + # ensure that `fz_drop_image(image)` is never called. This will + # leak some of `image`'s allocations (for example the main + # `fz_image` allocation), but it's not trivial to avoid this. # # Perhaps we could manually set `fz_image`'s # `fz_compressed_buffer*` to null? Trouble is we'd have to @@ -8003,7 +7998,6 @@ def _insert_image(self, def _insertFont(self, fontname, bfname, fontfile, fontbuffer, set_simple, idx, wmode, serif, encoding, ordering): page = self._pdf_page() - ASSERT_PDF(page) pdf = page.doc() value = JM_insert_font(pdf, bfname, fontfile,fontbuffer, set_simple, idx, wmode, serif, encoding, ordering) @@ -8023,7 +8017,6 @@ def _insertFont(self, fontname, bfname, fontfile, fontbuffer, set_simple, idx, w def _load_annot(self, name, xref): page = self._pdf_page() - ASSERT_PDF(page) if xref == 0: annot = JM_get_annot_by_name(page, name) else: @@ -8046,8 +8039,8 @@ def _other_box(self, boxtype): return return JM_py_from_rect(rect) - def _pdf_page(self): - return _as_pdf_page(self.this) + def _pdf_page(self, required=True): + return _as_pdf_page(self.this, required=required) def _reset_annot_refs(self): """Invalidate / delete all annots of this page.""" @@ -8109,7 +8102,6 @@ def _set_pagebox(self, boxtype, rect): def _set_resource_property(self, name, xref): page = self._pdf_page() - assert page.m_internal JM_set_resource_property(page.obj(), name, xref) def _show_pdf_page(self, fz_srcpage, overlay=1, matrix=None, xref=0, oc=0, clip=None, graftmap=None, _imgname=None): @@ -8453,7 +8445,7 @@ def annot_names(self): ''' """List of names of annotations, fields and links.""" CheckParent(self) - page = self._pdf_page() + page = self._pdf_page(required=False) if not page.m_internal: return [] return JM_get_annot_id_list(page) @@ -8531,7 +8523,7 @@ def clean_contents(self, sanitize=1): def cropbox(self): """The CropBox.""" CheckParent(self) - page = self._pdf_page() + page = self._pdf_page(required=False) if not page.m_internal: val = mupdf.fz_bound_page(self.this) else: @@ -8616,7 +8608,7 @@ def derotation_matrix(self) -> Matrix: """Reflects page de-rotation.""" if g_use_extra: return Matrix(extra.Page_derotate_matrix( self.this)) - pdfpage = self._pdf_page() + pdfpage = self._pdf_page(required=False) if not pdfpage.m_internal: return Matrix(mupdf.FzRect(mupdf.FzRect.UNIT)) return Matrix(JM_derotate_page_matrix(pdfpage)) @@ -8636,7 +8628,7 @@ def extend_textpage(self, tpage, flags=0, matrix=None): def first_annot(self): """First annotation.""" CheckParent(self) - page = self._pdf_page() + page = self._pdf_page(required=False) if not page.m_internal: return annot = mupdf.pdf_first_annot(page) @@ -8660,7 +8652,7 @@ def first_widget(self): """First widget/field.""" CheckParent(self) annot = 0 - page = self._pdf_page() + page = self._pdf_page(required=False) if not page.m_internal: return annot = mupdf.pdf_first_widget(page) @@ -9451,7 +9443,7 @@ def load_widget( self, xref): def mediabox(self): """The MediaBox.""" CheckParent(self) - page = self._pdf_page() + page = self._pdf_page(required=False) if not page.m_internal: rect = mupdf.fz_bound_page( self.this) else: @@ -9547,7 +9539,6 @@ def set_mediabox(self, rect): """Set the MediaBox.""" CheckParent(self) page = self._pdf_page() - ASSERT_PDF(page) mediabox = JM_rect_from_py(rect) if (mupdf.fz_is_empty_rect(mediabox) or mupdf.fz_is_infinite_rect(mediabox) @@ -9576,7 +9567,7 @@ def transformation_matrix(self): CheckParent(self) ctm = mupdf.FzMatrix() - page = self._pdf_page() + page = self._pdf_page(required=False) if not page.m_internal: return JM_py_from_matrix(ctm) mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT) # fixme: original code passed mediabox=NULL. @@ -10439,7 +10430,7 @@ def size(self): """Pixmap size.""" if mupdf_version_tuple >= (1, 23, 8): return mupdf.fz_pixmap_size( self.this) - # fz_pixmap_size() is not publically visible, so we implement it + # fz_pixmap_size() is not publicly visible, so we implement it # ourselves. fixme: we don't add on sizeof(fz_pixmap). pm = self.this return pm.n() * pm.w() * pm.h() @@ -10830,7 +10821,7 @@ def is_convex(self): if p1.y * p2.y > 0: return False m = planish_line(self.ll, self.ur) # puts other diagonal on x-axis - p1 = self.lr * m # tranform the + p1 = self.lr * m # transform the p2 = self.ul * m # remaining points if p1.y * p2.y > 0: return False @@ -11266,7 +11257,7 @@ def draw_bezier( def draw_circle(self, center: point_like, radius: float):# -> Point: """Draw a circle given its center and radius.""" if not radius > EPSILON: - raise ValueError("radius must be postive") + raise ValueError("radius must be positive") center = Point(center) p1 = center - (radius, 0) return self.draw_sector(center, p1, 360, fullSector=False) @@ -11403,7 +11394,7 @@ def l5(a, b): list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm) )) - betar -= w90 # reduce parm angle by 90 deg + betar -= w90 # reduce param angle by 90 deg alfa += w90 # advance start angle by 90 deg P = Q # advance to arc end point # draw (remaining) arc @@ -11855,7 +11846,7 @@ def add_pdf_links(document_or_stream, positions): `document_or_stream` if a `Document` instance, otherwise a new `Document` instance. We raise an exception if an `href` in `positions` refers to an - internal position `#` but no item in `postions` has `id = + internal position `#` but no item in `positions` has `id = name`. """ if isinstance(document_or_stream, Document): @@ -12187,7 +12178,7 @@ def ret(): def update(parameter): ''' Evaluates `more, _ = self.place(fn(parameter))`. If `more` is - false, then `rect` is big enought to contain `self` and we + false, then `rect` is big enough to contain `self` and we set `state.pmax=parameter` and return True. Otherwise we set `state.pmin=parameter` and return False. ''' @@ -12373,7 +12364,7 @@ def _extractText(self, format_): # fixme: mupdfwrap.py thinks fz_output is not copyable, possibly # because there is no .refs member visible and no fz_keep_output() fn, # although there is an fz_drop_output(). So mupdf.fz_new_output_with_buffer() - # doesn't convert the returnd fz_output* into a mupdf.FzOutput. + # doesn't convert the returned fz_output* into a mupdf.FzOutput. #out = mupdf.FzOutput(out) if format_ == 1: mupdf.fz_print_stext_page_as_html(out, this_tpage, 0) @@ -12860,7 +12851,6 @@ def write_text(self, page, color=None, opacity=-1, overlay=1, morph=None, matrix else: colorspace = mupdf.fz_device_gray() - ASSERT_PDF(pdfpage) resources = mupdf.pdf_new_dict(pdfpage.doc(), 5) contents = mupdf.fz_new_buffer(1024) dev = mupdf.pdf_new_pdf_device( pdfpage.doc(), mupdf.FzMatrix(), resources, contents) @@ -15392,7 +15382,7 @@ def JM_get_annot_xref_list( page_obj): def JM_get_annot_xref_list2(page): - page = page._pdf_page() + page = page._pdf_page(required=False) if not page.m_internal: return list() return JM_get_annot_xref_list( page.obj()) @@ -16973,7 +16963,7 @@ def JM_quad_from_py(r): def JM_read_contents(pageref): ''' - Read and concatenate a PDF page's /Conents object(s) in a buffer + Read and concatenate a PDF page's /Contents object(s) in a buffer ''' assert isinstance(pageref, mupdf.PdfObj), f'{type(pageref)}' contents = mupdf.pdf_dict_get(pageref, mupdf.PDF_ENUM_NAME_Contents) @@ -17508,7 +17498,7 @@ def GETATTR(name): value = GETATTR("script_blur") JM_put_script(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Bl'), value) - # script (/AA/Fo) ------------------------------------------------------- + # script (/AA/Fo) codespell:ignore -------------------------------------- value = GETATTR("script_focus") JM_put_script(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Fo'), value) @@ -17757,9 +17747,9 @@ def CheckMorph(o: typing.Any) -> bool: if not (type(o) in (list, tuple) and len(o) == 2): raise ValueError("morph must be a sequence of length 2") if not (len(o[0]) == 2 and len(o[1]) == 6): - raise ValueError("invalid morph parm 0") + raise ValueError("invalid morph param 0") if not o[1][4] == o[1][5] == 0: - raise ValueError("invalid morph parm 1") + raise ValueError("invalid morph param 1") return True @@ -19120,7 +19110,7 @@ def write(self, ctx, data_raw, data_length): def compute_scissor(dev): ''' - Every scissor of a clip is a sub rectangle of the preceeding clip scissor + Every scissor of a clip is a sub rectangle of the preceding clip scissor if the clip level is larger. ''' if dev.scissors is None: @@ -19865,7 +19855,7 @@ def annot_preprocess(page: "Page") -> int: def annot_postprocess(page: "Page", annot: "Annot") -> None: - """Clean up after annotation inertion. + """Clean up after annotation insertion. Set ownership flag and store annotation in page annotation dictionary. """ @@ -20314,10 +20304,10 @@ def util_invert_matrix(matrix): or abs( matrix.c - 0) >= sys.float_info.epsilon or abs( matrix.d - 1) >= sys.float_info.epsilon ): - # Invertion not possible. + # Inversion not possible. return 1, () return 0, (ret.a, ret.b, ret.c, ret.d, ret.e, ret.f) - # Do invertion in python. + # Do inversion in python. src = JM_matrix_from_py(matrix) a = src.a det = a * src.d - src.b * src.c @@ -20784,7 +20774,7 @@ def sRGB_to_pdf(srgb: int) -> tuple: Args: srgb: (int) RRGGBB (red, green, blue), each color in range(255). Returns: - Tuple (red, green, blue) each item in intervall 0 <= item <= 1. + Tuple (red, green, blue) each item in interval 0 <= item <= 1. """ t = sRGB_to_rgb(srgb) return t[0] / 255.0, t[1] / 255.0, t[2] / 255.0 @@ -20798,7 +20788,7 @@ def sRGB_to_rgb(srgb: int) -> tuple: Args: srgb: (int) RRGGBB (red, green, blue), each color in range(255). Returns: - Tuple (red, green, blue) each item in intervall 0 <= item <= 255. + Tuple (red, green, blue) each item in interval 0 <= item <= 255. """ r = srgb >> 16 g = (srgb - (r << 16)) >> 8 @@ -21452,7 +21442,7 @@ def _reset_widget(annot): @staticmethod def _rotate_matrix(page): - pdfpage = page._pdf_page() + pdfpage = page._pdf_page(required=False) if not pdfpage.m_internal: return JM_py_from_matrix(mupdf.FzMatrix()) return JM_py_from_matrix(JM_rotate_page_matrix(pdfpage)) diff --git a/src/_apply_pages.py b/src/_apply_pages.py index 73a702382..119a51edf 100644 --- a/src/_apply_pages.py +++ b/src/_apply_pages.py @@ -240,7 +240,7 @@ def childfn(): return ret finally: - # Join all child proceses. + # Join all child processes. if stats: t = time.time() for pid in pids: diff --git a/src/extra.i b/src/extra.i index 3222f9354..1e4be61df 100644 --- a/src/extra.i +++ b/src/extra.i @@ -732,7 +732,7 @@ static int DICT_SETITEMSTR_DROP(PyObject *dict, const char *key, PyObject *value //----------------------------------------------------------------------------- -// Functions converting betwenn PySequences and pymupdf geometry objects +// Functions converting between PySequences and pymupdf geometry objects //----------------------------------------------------------------------------- static int jm_init_item(PyObject* obj, Py_ssize_t idx, int* result) @@ -2261,8 +2261,8 @@ void JM_print_stext_page_as_text(mupdf::FzBuffer& res, mupdf::FzStextPage& page) #define CLIP_PATH 3 #define CLIP_STROKE_PATH 4 -// Every scissor of a clip is a sub rectangle of the preceeding clip -// scissor if the clip level is larger. +// Every scissor of a clip is a sub rectangle of the preceding clip scissor if +// the clip level is larger. static fz_rect compute_scissor(jm_lineart_device *dev) { PyObject *last_scissor = NULL; diff --git a/src/utils.py b/src/utils.py index 45290ca3c..670403d92 100644 --- a/src/utils.py +++ b/src/utils.py @@ -582,7 +582,7 @@ def get_textpage_ocr( Args: flags: (int) control content becoming part of the result. - language: (str) specify expected language(s). Deafault is "eng" (English). + language: (str) specify expected language(s). Default is "eng" (English). dpi: (int) resolution in dpi, default 72. full: (bool) whether to OCR the full page image, or only its images (default) """ @@ -1107,7 +1107,7 @@ def set_toc_item( (str) the new title. No change if None. to: (point-like) destination on the target page. If omitted, (72, 36) - will be used as taget coordinates. + will be used as target coordinates. filename: (str) destination filename, required for pymupdf.LINK_GOTOR and pymupdf.LINK_LAUNCH. @@ -1377,7 +1377,7 @@ def set_toc( # ------------------------------------------------------------------------------ olitems = [{"count": 0, "first": -1, "last": -1, "xref": xref[0]}] # ------------------------------------------------------------------------------ - # build olitems as a list of PDF-like connnected dictionaries + # build olitems as a list of PDF-like connected dictionaries # ------------------------------------------------------------------------------ for i in range(toclen): o = toc[i] @@ -3435,7 +3435,7 @@ def draw_sector( list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm) )) - betar -= w90 # reduce parm angle by 90 deg + betar -= w90 # reduce param angle by 90 deg alfa += w90 # advance start angle by 90 deg P = Q # advance to arc end point # draw (remaining) arc @@ -5114,7 +5114,7 @@ def set_page_labels(doc, labels): # William Chapman, 2021-01-06 def create_label_str(label): - """Convert Python label dict to correspnding PDF rule string. + """Convert Python label dict to corresponding PDF rule string. Args: label: (dict) build rule for the label. @@ -5202,9 +5202,9 @@ def recover_bbox_quad(line_dir: tuple, span: dict, bbox: tuple) -> pymupdf.Quad: d = span["ascender"] - span["descender"] height = d * span["size"] # the quad's rectangle height - # The following are distances from the bbox corners, at wich we find the - # respective quad points. The computation depends on in which quadrant - # the text writing angle is located. + # The following are distances from the bbox corners, at which we find the + # respective quad points. The computation depends on in which quadrant the + # text writing angle is located. hs = height * sin hc = height * cos if hc >= 0 and hs <= 0: # quadrant 1 diff --git a/tests/resources/test_3654.docx b/tests/resources/test_3654.docx new file mode 100644 index 000000000..3794ec39b Binary files /dev/null and b/tests/resources/test_3654.docx differ diff --git a/tests/test_codespell.py b/tests/test_codespell.py new file mode 100644 index 000000000..9a3509853 --- /dev/null +++ b/tests/test_codespell.py @@ -0,0 +1,66 @@ +import pymupdf + +import os +import platform +import shlex +import subprocess +import sys +import textwrap + + +def test_codespell(): + ''' + Check rebased Python code with codespell. + ''' + if not hasattr(pymupdf, 'mupdf'): + print('Not running codespell with classic implementation.') + return + + if platform.system() == 'Windows': + # Git commands seem to fail on Github Windows runners. + print(f'test_codespell(): Not running on Widows') + return + + root = os.path.abspath(f'{__file__}/../..') + + # For now we ignore files that we would ideally still look at, because it + # is difficult to exclude some text sections. + skips = textwrap.dedent(''' + *.pdf + changes.txt + docs/_static/prism/prism.js + docs/_static/prism/prism.js + docs/locales/ja/LC_MESSAGES/changes.po + docs/locales/ja/LC_MESSAGES/recipes-common-issues-and-their-solutions.po + docs/recipes-common-issues-and-their-solutions.rst + docs/recipes-text.rst + docs/samples/national-capitals.py + locales + src_classic/* + tests + tests/test_story.py + tests/test_textbox.py + tests/test_textextract.py + ''') + skips = skips.strip().replace('\n', ',') + + command = f'cd {root} && codespell --skip {shlex.quote(skips)} --count' + command += f' --ignore-words-list re-use,flate,thirdparty' + + sys.path.append(root) + try: + import pipcl + finally: + del sys.path[0] + git_files = pipcl.git_items(root) + + for p in git_files: + _, ext = os.path.splitext(p) + if ext in ('.png', '.pdf'): + pass + else: + command += f' {p}' + + print(f'test_codespell(): Running: {command}') + subprocess.run(command, shell=1, check=1) + print('test_codespell(): codespell succeeded.') diff --git a/tests/test_drawings.py b/tests/test_drawings.py index 37641dfef..1c2681b44 100644 --- a/tests/test_drawings.py +++ b/tests/test_drawings.py @@ -180,7 +180,7 @@ def test_3207(): """Example graphics with multiple "close path" commands within same path. The fix translates a close-path commands into an additional line - which connects the current point with a preceeding "move" target. + which connects the current point with a preceding "move" target. The example page has 2 paths which each contain 2 close-path commands after 2 normal "line" commands, i.e. 2 command sequences "move-to, line-to, line-to, close-path". diff --git a/tests/test_general.py b/tests/test_general.py index 6e29e552c..57397df77 100644 --- a/tests/test_general.py +++ b/tests/test_general.py @@ -809,7 +809,7 @@ def test_2957_2(): page.apply_redactions() # remove/redact the word "longer" words1 = page.get_text("words") # extract words again assert len(words1) == len(words0) - 1 # must be one word less - assert words0[3][4] == "longer" # just confirm test file is correc one + assert words0[3][4] == "longer" # just confirm test file is correct one del words0[3] # remove the redacted word from first list for i in range(len(words1)): # compare words w1 = words1[i] # word after redaction @@ -969,8 +969,8 @@ def next_fd(): shape = page.new_shape() # create Shape for i in range(5): for j in range(3): - qtext = "" + "Ques #" + str(i*3+j+1) + ": " + "" - atext = "" + "Ans:" + "" + qtext = "" + "Ques #" + str(i*3+j+1) + ": " + "" # codespell:ignore + atext = "" + "Ans:" + "" # codespell:ignore qtext = qtext + '
' + atext shape.draw_rect(CELLS[i][j]) # draw rectangle page.insert_htmlbox(CELLS[i][j], qtext, css=css2, scale_low=0) @@ -1246,3 +1246,16 @@ def test_3615(): print(doc.pagelayout) wt = pymupdf.TOOLS.mupdf_warnings() assert wt + +def test_3654(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_3654.docx') + content = "" + with pymupdf.open(path) as document: + for page in document: + content += page.get_text() + '\n\n' + content = content.strip() + + # As of 2024-07-04 we get a warning for this input file. + wt = pymupdf.TOOLS.mupdf_warnings() + assert wt == 'dropping unclosed output' + diff --git a/tests/test_pagedelete.py b/tests/test_pagedelete.py index cde6812b3..73593fa29 100644 --- a/tests/test_pagedelete.py +++ b/tests/test_pagedelete.py @@ -83,7 +83,7 @@ def test_3094(): def test_3150(): """Assert correct functioning for problem file. - Implicitely also check use of new MuPDF function + Implicitly also check use of new MuPDF function pdf_rearrange_pages() since version 1.23.9. """ filename = os.path.join(scriptdir, "resources", "test-3150.pdf") diff --git a/tests/test_remove-rotation.py b/tests/test_remove-rotation.py index a25aa5a28..423c88113 100644 --- a/tests/test_remove-rotation.py +++ b/tests/test_remove-rotation.py @@ -10,7 +10,7 @@ def test_remove_rotation(): filename = os.path.join(scriptdir, "resources", "test-2812.pdf") doc = pymupdf.open(filename) - # We always create fresh pages to avoid false positves from cache content. + # We always create fresh pages to avoid false positives from cache content. # Text on these pages consists of pairwise different strings, sorting by # these strings must therefore yield identical bounding boxes. for i in range(1, doc.page_count): diff --git a/tests/test_showpdfpage.py b/tests/test_showpdfpage.py index fbfdbcb04..2e6b27a18 100644 --- a/tests/test_showpdfpage.py +++ b/tests/test_showpdfpage.py @@ -3,7 +3,7 @@ * Convert some image to a PDF * Insert it rotated in some rectangle of a PDF page * Assert PDF Form XObject has been created - * Assert that image contained in inserted PDF is inside given retangle + * Assert that image contained in inserted PDF is inside given rectangle """ import os diff --git a/tests/test_story.py b/tests/test_story.py index 824835658..a711f60eb 100644 --- a/tests/test_story.py +++ b/tests/test_story.py @@ -19,7 +19,7 @@ def test_story(): WHERE = MEDIABOX + (36, 36, -36, -36) # the font files are located in /home/chinese arch = pymupdf.Archive(".") - # if not specfied user_css, the output pdf has content + # if not specified user_css, the output pdf has content story = pymupdf.Story(HTML, user_css=CSS, archive=arch) writer = pymupdf.DocumentWriter("output.pdf") diff --git a/tests/test_textextract.py b/tests/test_textextract.py index 771ea9dfe..2a8a25e49 100644 --- a/tests/test_textextract.py +++ b/tests/test_textextract.py @@ -1,5 +1,5 @@ """ -Exract page text in various formats. +Extract page text in various formats. No checks performed - just contribute to code coverage. """ import os @@ -34,7 +34,7 @@ def _test_extract2(): import time path = f'{scriptdir}/../../PyMuPDF-performance/adobe.pdf' if not os.path.exists(path): - print(f'test_extract2(): not running becase does not exist: {path}') + print(f'test_extract2(): not running because does not exist: {path}') return doc = pymupdf.open( path) for opt in ( @@ -63,7 +63,7 @@ def _test_extract3(): import time path = f'{scriptdir}/../../PyMuPDF-performance/adobe.pdf' if not os.path.exists(path): - print(f'test_extract3(): not running becase does not exist: {path}') + print(f'test_extract3(): not running because does not exist: {path}') return doc = pymupdf.open( path) t0 = time.time() @@ -131,7 +131,7 @@ def get_text(page, space_guess): def test_2954(): ''' - Check handling of unknow unicode characters, issue #2954, fixed in + Check handling of unknown unicode characters, issue #2954, fixed in mupdf-1.23.9 with addition of FZ_STEXT_USE_CID_FOR_UNKNOWN_UNICODE. ''' path = os.path.abspath(f'{__file__}/../../tests/resources/test_2954.pdf')