diff --git a/openhtmltopdf-core/src/main/java/com/openhtmltopdf/extend/TextRenderer.java b/openhtmltopdf-core/src/main/java/com/openhtmltopdf/extend/TextRenderer.java index 9fd9fed76..e86abf9fa 100755 --- a/openhtmltopdf-core/src/main/java/com/openhtmltopdf/extend/TextRenderer.java +++ b/openhtmltopdf-core/src/main/java/com/openhtmltopdf/extend/TextRenderer.java @@ -24,26 +24,48 @@ import com.openhtmltopdf.render.FSFont; import com.openhtmltopdf.render.FSFontMetrics; import com.openhtmltopdf.render.JustificationInfo; +import com.openhtmltopdf.util.OpenUtil; + +import static com.openhtmltopdf.util.OpenUtil.areAllCharactersPrintable; public interface TextRenderer { - public void setup(FontContext context); - public void drawString(OutputDevice outputDevice, String string, float x, float y); - public void drawString( + /** + * Returns a string containing printable characters only. + * + * @param input The string can be null + * @return The cleaned string or null if the input is null + * @see com.openhtmltopdf.util.OpenUtil#isCodePointPrintable(int) + */ + static String getEffectivePrintableString(String input) { + if (input == null || input.isEmpty() || areAllCharactersPrintable(input)) { + return input; + } + + StringBuilder effective = new StringBuilder(input.length()); + input.codePoints().filter(OpenUtil::isCodePointPrintable).forEach(effective::appendCodePoint); + + return effective.toString(); + } + + void setup(FontContext context); + + void drawString(OutputDevice outputDevice, String string, float x, float y); + void drawString( OutputDevice outputDevice, String string, float x, float y, JustificationInfo info); - public FSFontMetrics getFSFontMetrics( + FSFontMetrics getFSFontMetrics( FontContext context, FSFont font, String string ); /** * Rarely need to use this method directly. * Instead favor {@link Breaker} static method instead. */ - public int getWidth(FontContext context, FSFont font, String string); + int getWidth(FontContext context, FSFont font, String string); - public void setFontScale(float scale); + void setFontScale(float scale); - public float getFontScale(); + float getFontScale(); /** * Set the smoothing threashold. This is a font size above which @@ -53,15 +75,15 @@ public FSFontMetrics getFSFontMetrics( * Else, set to the threshold font size. does not take font scaling * into account. */ - public void setSmoothingThreshold(float fontsize); + void setSmoothingThreshold(float fontsize); - public int getSmoothingLevel(); + int getSmoothingLevel(); /** * @deprecated no-op, will be removed in a future release. Anti-aliasing is now controlled via the smoothing * threshhold. * @param level no-op */ - public void setSmoothingLevel(int level); + void setSmoothingLevel(int level); } diff --git a/openhtmltopdf-core/src/main/java/com/openhtmltopdf/util/OpenUtil.java b/openhtmltopdf-core/src/main/java/com/openhtmltopdf/util/OpenUtil.java index 78f049962..46c4d3f97 100644 --- a/openhtmltopdf-core/src/main/java/com/openhtmltopdf/util/OpenUtil.java +++ b/openhtmltopdf-core/src/main/java/com/openhtmltopdf/util/OpenUtil.java @@ -1,5 +1,7 @@ package com.openhtmltopdf.util; +import java.util.Objects; + public class OpenUtil { private OpenUtil() {} @@ -23,7 +25,17 @@ public static boolean isCodePointPrintable(int codePoint) { category == Character.PRIVATE_USE || category == Character.SURROGATE); } - + + /** + * Returns true, when all characters of the given string are printable. + * @param str a non-null string to test + * @return whether all characters are printable + */ + public static boolean areAllCharactersPrintable(String str) { + Objects.requireNonNull(str, "str"); + return str.codePoints().allMatch(OpenUtil::isCodePointPrintable); + } + public static Integer parseIntegerOrNull(String possibleInteger) { try { return Integer.parseInt(possibleInteger); diff --git a/openhtmltopdf-core/src/test/java/com/openhtmltopdf/extend/TextRendererTest.java b/openhtmltopdf-core/src/test/java/com/openhtmltopdf/extend/TextRendererTest.java new file mode 100644 index 000000000..4221d1856 --- /dev/null +++ b/openhtmltopdf-core/src/test/java/com/openhtmltopdf/extend/TextRendererTest.java @@ -0,0 +1,21 @@ +package com.openhtmltopdf.extend; + +import org.junit.Test; + +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.CoreMatchers.nullValue; +import static org.hamcrest.MatcherAssert.assertThat; + + +public class TextRendererTest { + + @Test + public void getEffectivePrintableString() { + assertThat(TextRenderer.getEffectivePrintableString(null), nullValue()); + assertThat(TextRenderer.getEffectivePrintableString(""), is("")); + + assertThat(TextRenderer.getEffectivePrintableString("abc"), is("abc")); + + assertThat(TextRenderer.getEffectivePrintableString("ab\u00adc"), is("abc")); + } +} diff --git a/openhtmltopdf-core/src/test/java/com/openhtmltopdf/util/OpenUtilTest.java b/openhtmltopdf-core/src/test/java/com/openhtmltopdf/util/OpenUtilTest.java new file mode 100644 index 000000000..dec064128 --- /dev/null +++ b/openhtmltopdf-core/src/test/java/com/openhtmltopdf/util/OpenUtilTest.java @@ -0,0 +1,20 @@ +package com.openhtmltopdf.util; + +import org.junit.Test; + +import static org.hamcrest.CoreMatchers.is; +import static org.junit.Assert.*; + +/** + * @author schrader + */ +public class OpenUtilTest { + + @Test + public void areAllCharactersPrintable() { + String text = "abc 123 \uD844\uDCC1"; + boolean printable = OpenUtil.areAllCharactersPrintable(text); + assertThat(printable, is(true)); + } + +} \ No newline at end of file diff --git a/openhtmltopdf-examples/pom.xml b/openhtmltopdf-examples/pom.xml index e18d37da6..2d43605be 100644 --- a/openhtmltopdf-examples/pom.xml +++ b/openhtmltopdf-examples/pom.xml @@ -23,6 +23,11 @@ + + 1.25.2 + benchmarks + + com.openhtmltopdf @@ -115,6 +120,18 @@ 1.13.1 + + + org.openjdk.jmh + jmh-core + ${jmh.version} + + + org.openjdk.jmh + jmh-generator-annprocess + ${jmh.version} + + junit junit @@ -154,6 +171,51 @@ + + + org.apache.maven.plugins + maven-compiler-plugin + 3.8.1 + + + + org.apache.maven.plugins + maven-shade-plugin + 2.2 + + + package + + shade + + + ${uberjar.name} + + + org.openjdk.jmh.Main + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + diff --git a/openhtmltopdf-examples/src/main/java/com/openhtmltopdf/benchmark/NoopLogger.java b/openhtmltopdf-examples/src/main/java/com/openhtmltopdf/benchmark/NoopLogger.java new file mode 100644 index 000000000..a3612d80d --- /dev/null +++ b/openhtmltopdf-examples/src/main/java/com/openhtmltopdf/benchmark/NoopLogger.java @@ -0,0 +1,31 @@ +package com.openhtmltopdf.benchmark; + +import com.openhtmltopdf.util.Diagnostic; +import com.openhtmltopdf.util.XRLogger; + +import java.util.logging.Level; + +/** + * @author schrader + */ +class NoopLogger implements XRLogger { + @Override + public void log(String where, Level level, String msg) { + + } + + @Override + public void log(String where, Level level, String msg, Throwable th) { + + } + + @Override + public void setLevel(String logger, Level level) { + + } + + @Override + public boolean isLogLevelEnabled(Diagnostic diagnostic) { + return false; + } +} diff --git a/openhtmltopdf-examples/src/main/java/com/openhtmltopdf/benchmark/RenderTextBenchmark.java b/openhtmltopdf-examples/src/main/java/com/openhtmltopdf/benchmark/RenderTextBenchmark.java new file mode 100644 index 000000000..c86ad89ee --- /dev/null +++ b/openhtmltopdf-examples/src/main/java/com/openhtmltopdf/benchmark/RenderTextBenchmark.java @@ -0,0 +1,86 @@ +package com.openhtmltopdf.benchmark; + +import com.openhtmltopdf.pdfboxout.PdfRendererBuilder; +import com.openhtmltopdf.util.XRLog; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.util.Charsets; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +/** + * @author schrader + */ +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@State(Scope.Thread) +public class RenderTextBenchmark { + + public static void main(String[] args) throws Exception { + Options opt = new OptionsBuilder() + .include(RenderTextBenchmark.class.getSimpleName()) + .forks(1) + .build(); + + new Runner(opt).run(); + } + + private Map contents = new HashMap<>(); + + @Setup + public void setUp() { + XRLog.setLoggerImpl(new NoopLogger()); + + Arrays.asList( + "/benchmark/render-text-plain.html", + "/benchmark/render-text-soft-hyphens.html" + ).forEach(path -> contents.put(path, readContent(path))); + } + + @Benchmark + public void renderText_Plain() throws Exception { + runRenderer(contents.get("/benchmark/render-text-plain.html")); + } + + @Benchmark + public void renderText_SoftHyphens() throws Exception { + runRenderer(contents.get("/benchmark/render-text-soft-hyphens.html")); + } + + private void runRenderer(String html) throws IOException { + ByteArrayOutputStream actual = new ByteArrayOutputStream(); + + PdfRendererBuilder builder = new PdfRendererBuilder(); + builder.withHtmlContent(html, null); + builder.toStream(actual); + builder.useFastMode(); + builder.testMode(true); + + builder.run(); + } + + private String readContent(String path) { + try (InputStream htmlIs = RenderTextBenchmark.class.getResourceAsStream(path)) { + byte[] htmlBytes = IOUtils.toByteArray(htmlIs); + return new String(htmlBytes, Charsets.UTF_8); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + +} diff --git a/openhtmltopdf-examples/src/main/resources/benchmark/render-text-plain.html b/openhtmltopdf-examples/src/main/resources/benchmark/render-text-plain.html new file mode 100644 index 000000000..b415083a0 --- /dev/null +++ b/openhtmltopdf-examples/src/main/resources/benchmark/render-text-plain.html @@ -0,0 +1,11 @@ + + + + + + +

Li Europan lingues es membres del sam familie. Lor separat existentie es un myth. Por scientie, musica, sport etc, litot Europa usa li sam vocabular.

+ + diff --git a/openhtmltopdf-examples/src/main/resources/benchmark/render-text-soft-hyphens.html b/openhtmltopdf-examples/src/main/resources/benchmark/render-text-soft-hyphens.html new file mode 100644 index 000000000..a8628b78d --- /dev/null +++ b/openhtmltopdf-examples/src/main/resources/benchmark/render-text-soft-hyphens.html @@ -0,0 +1,11 @@ + + + + + + +

Li Eu­ro­pan lin­gues es mem­bres del sam fa­mi­lie. Lor se­pa­rat exi­sten­tie es un myth. Por sci­en­tie, mu­si­ca, spo­rt etc.

+ + diff --git a/openhtmltopdf-examples/src/main/resources/visualtest/expected/issue-482-infinite-loop-table.pdf b/openhtmltopdf-examples/src/main/resources/visualtest/expected/issue-482-infinite-loop-table.pdf index 664c98497..cb34896e2 100644 Binary files a/openhtmltopdf-examples/src/main/resources/visualtest/expected/issue-482-infinite-loop-table.pdf and b/openhtmltopdf-examples/src/main/resources/visualtest/expected/issue-482-infinite-loop-table.pdf differ diff --git a/openhtmltopdf-examples/src/main/resources/visualtest/expected/text/soft-hyphens.pdf b/openhtmltopdf-examples/src/main/resources/visualtest/expected/text/soft-hyphens.pdf index 100cf11ae..d42dc0db9 100644 Binary files a/openhtmltopdf-examples/src/main/resources/visualtest/expected/text/soft-hyphens.pdf and b/openhtmltopdf-examples/src/main/resources/visualtest/expected/text/soft-hyphens.pdf differ diff --git a/openhtmltopdf-examples/src/main/resources/visualtest/html/text/soft-hyphens.html b/openhtmltopdf-examples/src/main/resources/visualtest/html/text/soft-hyphens.html index 116faf7f9..482380855 100644 --- a/openhtmltopdf-examples/src/main/resources/visualtest/html/text/soft-hyphens.html +++ b/openhtmltopdf-examples/src/main/resources/visualtest/html/text/soft-hyphens.html @@ -114,5 +114,11 @@ +
+

No Hyphen Signs visible

+

Haft­pflicht­ver­si­che­rung

+

Hähn­chen­mast

+
+ diff --git a/openhtmltopdf-examples/src/test/java/com/openhtmltopdf/visualregressiontests/TextVisualRegressionTest.java b/openhtmltopdf-examples/src/test/java/com/openhtmltopdf/visualregressiontests/TextVisualRegressionTest.java index 4238893d1..237345a7f 100644 --- a/openhtmltopdf-examples/src/test/java/com/openhtmltopdf/visualregressiontests/TextVisualRegressionTest.java +++ b/openhtmltopdf-examples/src/test/java/com/openhtmltopdf/visualregressiontests/TextVisualRegressionTest.java @@ -553,7 +553,10 @@ public void testJustifySpaceAtEnd() throws IOException { */ @Test public void testSoftHyphens() throws IOException { - assertTrue(vtester.runTest("soft-hyphens", TestSupport.WITH_COLLAPSED_LINE_BREAKER)); + assertTrue(vtester.runTest("soft-hyphens", builder -> { + TestSupport.WITH_COLLAPSED_LINE_BREAKER.configure(builder); + builder.useFont(new File("target/test/visual-tests/SourceSansPro-Regular.ttf"), "ExtraFont"); + })); } /** diff --git a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxFastOutputDevice.java b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxFastOutputDevice.java index 3df87cdb5..39fe81543 100644 --- a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxFastOutputDevice.java +++ b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxFastOutputDevice.java @@ -33,6 +33,7 @@ import com.openhtmltopdf.extend.OutputDevice; import com.openhtmltopdf.extend.OutputDeviceGraphicsDrawer; import com.openhtmltopdf.extend.StructureType; +import com.openhtmltopdf.extend.TextRenderer; import com.openhtmltopdf.layout.SharedContext; import com.openhtmltopdf.outputdevice.helper.FontResolverHelper; import com.openhtmltopdf.pdfboxout.PdfBoxFontResolver.FontDescription; @@ -42,6 +43,7 @@ import com.openhtmltopdf.simple.extend.ReplacedElementScaleHelper; import com.openhtmltopdf.util.ArrayUtil; import com.openhtmltopdf.util.LogMessageId; +import com.openhtmltopdf.util.OpenUtil; import com.openhtmltopdf.util.XRLog; import de.rototor.pdfbox.graphics2d.PdfBoxGraphics2D; import de.rototor.pdfbox.graphics2d.PdfBoxGraphics2DFontTextDrawer; @@ -72,6 +74,10 @@ import java.util.Map.Entry; import java.util.logging.Level; import java.util.regex.Pattern; +import java.util.stream.IntStream; +import java.util.stream.StreamSupport; + +import static com.openhtmltopdf.util.OpenUtil.areAllCharactersPrintable; public class PdfBoxFastOutputDevice extends AbstractOutputDevice implements OutputDevice, PdfBoxOutputDevice { // @@ -390,19 +396,22 @@ private AffineTransform normalizeMatrix(AffineTransform current) { public void drawString(String s, float x, float y, JustificationInfo info) { PDFont firstFont = _font.getFontDescription().get(0).getFont(); - - // First check if the string will print with the current font entirely. + + String effectiveString = TextRenderer.getEffectivePrintableString(s); + + // First check if the string contains printable characters only and + // will print with the current font entirely. try { - firstFont.getStringWidth(s); + firstFont.getStringWidth(effectiveString); // We got here, so all is good. - drawStringFast(s, x, y, info, _font.getFontDescription().get(0), _font.getSize2D()); + drawStringFast(effectiveString, x, y, info, _font.getFontDescription().get(0), _font.getSize2D()); return; - } + } catch (Exception e) { // Fallthrough, we'll have to process the string into font runs. } - List fontRuns = PdfBoxTextRenderer.divideIntoFontRuns(_font, s, _reorderer); + List fontRuns = PdfBoxTextRenderer.divideIntoFontRuns(_font, effectiveString, _reorderer); float xOffset = 0f; for (FontRun run : fontRuns) { @@ -420,7 +429,7 @@ public void drawString(String s, float x, float y, JustificationInfo info) { } } } - + public void drawStringFast(String s, float x, float y, JustificationInfo info, FontDescription desc, float fontSize) { if (s.length() == 0) return; diff --git a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxSlowOutputDevice.java b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxSlowOutputDevice.java index c01e25691..56a74bbf9 100644 --- a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxSlowOutputDevice.java +++ b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxSlowOutputDevice.java @@ -40,6 +40,7 @@ import com.openhtmltopdf.util.ArrayUtil; import com.openhtmltopdf.util.Configuration; import com.openhtmltopdf.util.LogMessageId; +import com.openhtmltopdf.util.OpenUtil; import com.openhtmltopdf.util.XRLog; import de.rototor.pdfbox.graphics2d.PdfBoxGraphics2D; import de.rototor.pdfbox.graphics2d.PdfBoxGraphics2DFontTextDrawer; @@ -86,7 +87,7 @@ public class PdfBoxSlowOutputDevice extends AbstractOutputDevice implements Outp // Dividing by _dotsPerPoint will convert OpenHTMLtoPDF dots to PDF points. // Theoretically, this is all configurable, but not tested at all with other values. // - + private static final int FILL = 1; private static final int STROKE = 2; private static final int CLIP = 3; @@ -98,11 +99,11 @@ public class PdfBoxSlowOutputDevice extends AbstractOutputDevice implements Outp // The current PDF page. private PDPage _page; - + // A wrapper around the IOException throwing content stream methods which only throws runtime exceptions. // Created for every page. private PdfContentStreamAdapter _cp; - + // We need the page height because the project uses top down units which PDFs use bottom up units. // This is in PDF points unit (1/72 inch). private float _pageHeight; @@ -115,7 +116,7 @@ public class PdfBoxSlowOutputDevice extends AbstractOutputDevice implements Outp // It scales from internal dots to PDF points. // It translates positions to implement page margins. private AffineTransform _transform = new AffineTransform(); - + // A stack of currently in force transforms on the PDF graphics state. // NOTE: Transforms are cumulative and order is important. // After the graphics state is restored in setClip we must appropriately reapply the transforms @@ -125,11 +126,11 @@ public class PdfBoxSlowOutputDevice extends AbstractOutputDevice implements Outp // An index into the transformStack. When we save state we set this to the length of transformStack // then we know we have to reapply those transforms set after saving state upon restoring state. private int clipTransformIndex; - + // We use these to keep track of where the current transform-origin is in absolute internal dots units. private float _absoluteTransformOriginX = 0; private float _absoluteTransformOriginY = 0; - + // The desired color as set by setColor. // To make sure this color is set on the PDF graphics stream call ensureFillColor or ensureStrokeColor. private FSColor _color = FSRGBColor.BLACK; @@ -143,10 +144,10 @@ public class PdfBoxSlowOutputDevice extends AbstractOutputDevice implements Outp // This is already transformed to PDF points units. // Call setStrokeDiff to set this on the PDF graphics stream. private Stroke _stroke = null; - + // Same as _stroke, but not transformed. That is, it is in internal dots units. private Stroke _originalStroke = null; - + // The currently set stroke on the PDF graphics stream. When we call setStokeDiff // this is compared with _stroke and only the differences are output to the graphics stream. private Stroke _oldStroke = null; @@ -156,7 +157,7 @@ public class PdfBoxSlowOutputDevice extends AbstractOutputDevice implements Outp // Essentially per-run global variables. private SharedContext _sharedContext; - + // The project internal dots per PDF point unit. See discussion of units above. private float _dotsPerPoint; @@ -175,7 +176,7 @@ public class PdfBoxSlowOutputDevice extends AbstractOutputDevice implements Outp // Contains all the state needed to manage form controls private final PdfBoxPerDocumentFormState _formState = new PdfBoxPerDocumentFormState(); - + // The root box in the document. We keep this so we can search for specific boxes below it // such as links or form controls which we need to position. private Box _root; @@ -184,24 +185,24 @@ public class PdfBoxSlowOutputDevice extends AbstractOutputDevice implements Outp // so we can use it to offset when we need to know the PDF page number. // NOTE: Not tested recently, this feature may be broken. private int _startPageNo; - + // Whether we are in test mode, currently not used here, but keep around in case we need it down the track. @SuppressWarnings("unused") private final boolean _testMode; - + // Link manage handles a links. We add the link in paintBackground and then output links when the document is finished. private PdfBoxLinkManager _linkManager; - + // Not used currently. private RenderingContext _renderingContext; - - // The bidi reorderer is responsible for shaping Arabic text, deshaping and + + // The bidi reorderer is responsible for shaping Arabic text, deshaping and // converting RTL text into its visual order. private BidiReorderer _reorderer = new SimpleBidiReorderer(); // Font Mapping for the Graphics2D output private PdfBoxGraphics2DFontTextDrawer _fontTextDrawer; - + public PdfBoxSlowOutputDevice(float dotsPerPoint, boolean testMode) { _dotsPerPoint = dotsPerPoint; _testMode = testMode; @@ -216,7 +217,7 @@ public PDDocument getWriter() { } /** - * Start a page. A new PDF page starts a new content stream so all graphics state has to be + * Start a page. A new PDF page starts a new content stream so all graphics state has to be * set back to default. */ public void initializePage(PDPageContentStream currentPage, PDPage page, float height) { @@ -229,7 +230,7 @@ public void initializePage(PDPageContentStream currentPage, PDPage page, float h // restoreGraphics is only used by setClip and page finish (unless the fast renderer is in use). _cp.saveGraphics(); } - + _transform = new AffineTransform(); _transform.scale(1.0d / _dotsPerPoint, 1.0d / _dotsPerPoint); @@ -253,7 +254,7 @@ public void finishPage() { if (!isFastRenderer()) { _cp.restoreGraphics(); } - + _cp.closeContent(); } @@ -271,7 +272,7 @@ public void paintBackground(RenderingContext c, Box box) { // processLinkLater will take care of making sure it is actually a link. _linkManager.processLinkLater(c, box, _page, _pageHeight, _transform); - + if (box.getElement() != null && box.getElement().getNodeName().equals("form")) { _formState.addFormIfRequired(box, this); } else if (box.getElement() != null && @@ -285,7 +286,7 @@ public void paintBackground(RenderingContext c, Box box) { private void processControls() { _formState.processControls(_sharedContext, _writer, _root); } - + /** @@ -375,7 +376,7 @@ private AffineTransform normalizeMatrix(AffineTransform current) { public void drawString(String s, float x, float y, JustificationInfo info) { PDFont firstFont = _font.getFontDescription().get(0).getFont(); - + // First check if the string will print with the current font entirely. try { firstFont.getStringWidth(s); diff --git a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxTextRenderer.java b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxTextRenderer.java index 2e7405640..6e109db50 100644 --- a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxTextRenderer.java +++ b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxTextRenderer.java @@ -23,6 +23,7 @@ import java.util.ArrayList; import java.util.List; import java.util.logging.Level; +import java.util.stream.IntStream; import com.openhtmltopdf.bidi.BidiReorderer; import com.openhtmltopdf.extend.FontContext; @@ -34,13 +35,14 @@ import com.openhtmltopdf.render.FSFontMetrics; import com.openhtmltopdf.render.JustificationInfo; import com.openhtmltopdf.util.LogMessageId; -import com.openhtmltopdf.util.OpenUtil; import com.openhtmltopdf.util.ThreadCtx; import com.openhtmltopdf.util.XRLog; +import static com.openhtmltopdf.util.OpenUtil.isCodePointPrintable; + public class PdfBoxTextRenderer implements TextRenderer { private static float TEXT_MEASURING_DELTA = 0.01f; - + private BidiReorderer _reorderer; public void setup(FontContext context, BidiReorderer reorderer) { @@ -191,6 +193,11 @@ public static List divideIntoFontRuns(FSFont font, String str, BidiReor int unicode = str.codePointAt(i); i += Character.charCount(unicode); String ch = String.valueOf(Character.toChars(unicode)); + + if (!isCodePointPrintable(unicode)) { + continue; + } + boolean gotChar = false; FONT_LOOP: @@ -277,9 +284,6 @@ else if (replace.fontDescription != current.des) { current.spaceCharacterCount++; sb.append(' '); } - else if (!OpenUtil.isCodePointPrintable(unicode)) { - // Do nothing - } else { current.otherCharacterCount++; sb.append(replace.replacement); @@ -314,6 +318,8 @@ private float getStringWidthSlow(FSFont bf, String str) { public int getWidth(FontContext context, FSFont font, String string) { float result = 0f; + String effectiveString = TextRenderer.getEffectivePrintableString(string); + try { if (((PdfBoxFSFont) font).getFontDescription() == null || ((PdfBoxFSFont) font).getFontDescription().isEmpty()) { @@ -322,7 +328,7 @@ public int getWidth(FontContext context, FSFont font, String string) { // Go through the list of font descriptions for (FontDescription fd : ((PdfBoxFSFont) font).getFontDescription()) { if (fd.getFont() != null) { - result = fd.getFont().getStringWidth(string) / 1000f * font.getSize2D(); + result = fd.getFont().getStringWidth(effectiveString) / 1000f * font.getSize2D(); break; } else { XRLog.log(Level.WARNING, LogMessageId.LogMessageId0Param.RENDER_FONT_IS_NULL); @@ -332,7 +338,7 @@ public int getWidth(FontContext context, FSFont font, String string) { } catch (IllegalArgumentException e2) { // PDFont::getStringWidth throws an IllegalArgumentException if the character doesn't exist in the font. // So we do it one character by character instead. - result = getStringWidthSlow(font, string) / 1000f * font.getSize2D(); + result = getStringWidthSlow(font, effectiveString) / 1000f * font.getSize2D(); } catch (IOException e) { throw new PdfContentStreamAdapter.PdfException("getWidth", e); }