Parse lua filters in filters argument (#284)

domvwt · NicklasTegner · web-flow · commit 4c00aba359fb · 2022-07-23T09:44:19.000+02:00
* Add support for Lua filters

* Update README

* Formatting

* Fix test for Windows

* Fix text for Windows (remove trailing whitespace)

* Add tests for mixed filters

* Update test

* Change tests so ordering is explicit

* Tidy up test code

Co-authored-by: NicklasTegner &lt;NicklasMCHD@live.dk&gt;
diff --git a/README.md b/README.md
@@ -305,8 +305,10 @@ Note that for citeproc tests to pass you'll need to have [pandoc-citeproc](https
 * [Kolen Cheung](https://github.com/ickc) - Implement `_get_pandoc_urls` for installing arbitrary version as well as the latest version of pandoc. Minor: README, Travis, setup.py.
 * [Rebecca Heineman](https://github.com/burgerbecky) - Added scanning code for finding pandoc in Windows
 * [Andrew Barraford](https://github.com/abarrafo) - Download destination.
+* [Jesse Widner](https://github.com/jwidner) & [Dominic Thorn](https://github.com/domvwt) - Add support for lua filters
 * [Alex Kneisel](https://github.com/hey-thanks/) - Added pathlib.Path support to convert_file.
- 
+
+
 ## License
 
 Pypandoc is available under MIT license. See LICENSE for more details. Pandoc itself is [available under the GPL2 license](https://github.com/jgm/pandoc/blob/master/COPYING.md).
diff --git a/poetry.lock b/poetry.lock
diff --git a/pypandoc/__init__.py b/pypandoc/__init__.py
@@ -356,7 +356,7 @@ def _convert_input(source, format, input_type, to, extra_args=(),
     if filters is not None:
         if isinstance(filters, string_types):
             filters = filters.split()
-        f = ['--filter=' + x for x in filters]
+        f = ['--lua-filter=' + x if x.endswith(".lua") else '--filter=' + x for x in filters]
         args.extend(f)
 
     # To get access to pandoc-citeproc when we use a included copy of pandoc,
diff --git a/pyproject.toml b/pyproject.toml
@@ -34,6 +34,7 @@ packages = [
 python = "^3.6"
 
 [tool.poetry.dev-dependencies]
+pandocfilters = "^1.5.0"
 
 [build-system]
 requires = ["poetry-core>=1.0.0"]
diff --git a/tests.py b/tests.py
@@ -10,6 +10,7 @@
 import subprocess
 import sys
 import tempfile
+import textwrap
 import unittest
 import warnings
 from pathlib import Path
@@ -307,6 +308,87 @@ def test_conversion_with_empty_filter(self):
         found = re.search(r'10.1038', written)
         self.assertTrue(found is None)
 
+    def test_conversion_with_python_filter(self):
+        markdown_source = "**Here comes the content.**"
+        python_source = '''\
+        #!/usr/bin/env python
+
+        """
+        Pandoc filter to convert all regular text to uppercase.
+        Code, link URLs, etc. are not affected.
+        """
+
+        from pandocfilters import toJSONFilter, Str
+
+        def caps(key, value, format, meta):
+            if key == 'Str':
+                return Str(value.upper())
+
+        if __name__ == "__main__":
+            toJSONFilter(caps)
+        '''
+        python_source = textwrap.dedent(python_source)
+        with closed_tempfile(".py", python_source) as tempfile:
+            output = pypandoc.convert_text(
+                markdown_source, to='html', format='md', outputfile=None, filters=tempfile
+            ).strip()
+            expected = '<p><strong>HERE COMES THE CONTENT.</strong></p>'
+            self.assertTrue(output == expected)
+
+    def test_conversion_with_lua_filter(self):
+        markdown_source = "**Here comes the content.**"
+        lua_source = """\
+        -- taken from: https://pandoc.org/lua-filters.html
+        function Strong(elem)
+            return pandoc.SmallCaps(elem.c)
+        end
+        """
+        lua_source = textwrap.dedent(lua_source)
+        with closed_tempfile(".lua", lua_source) as tempfile:
+            output = pypandoc.convert_text(
+                markdown_source, to='html', format='md', outputfile=None, filters=tempfile
+            ).strip()
+            expected = '<p><span class="smallcaps">Here comes the content.</span></p>'
+            self.assertTrue(output == expected)
+
+    def test_conversion_with_mixed_filters(self):
+        markdown_source = "-0-"
+
+        lua = """\
+        function Para(elem)
+            return pandoc.Para(elem.content .. {{"{0}-"}})
+        end
+        """
+        lua = textwrap.dedent(lua)
+
+        python = """\
+        #!/usr/bin/env python
+
+        from pandocfilters import toJSONFilter, Para, Str
+
+        def func(key, value, format, meta):
+            if key == "Para":
+                return Para(value + [Str("{0}-")])
+
+        if __name__ == "__main__":
+            toJSONFilter(func)
+        
+        """
+        python = textwrap.dedent(python)
+
+        with closed_tempfile(".lua", lua.format(1)) as temp1, closed_tempfile(".py", python.format(2)) as temp2:
+            with closed_tempfile(".lua", lua.format(3)) as temp3, closed_tempfile(".py", python.format(4)) as temp4:
+                output = pypandoc.convert_text(
+                    markdown_source, to="html", format="md", outputfile=None, filters=[temp1, temp2, temp3, temp4]
+                ).strip()
+                expected = "<p>-0-1-2-3-4-</p>"
+                self.assertTrue(output == expected)
+
+                output = pypandoc.convert_text(
+                    markdown_source, to="html", format="md", outputfile=None, filters=[temp3, temp1, temp4, temp2]
+                ).strip()
+                expected = "<p>-0-3-1-4-2-</p>"
+                self.assertTrue(output == expected)
 
     def test_classify_pandoc_logging(self):