diff --git a/.editorconfig b/.editorconfig
index 238988221..d43b16be5 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -34,5 +34,8 @@ indent_size = 2
[metafacture-io/src/test/resources/org/metafacture/io/compressed.txt]
insert_final_newline = false
+[metamorph/src/test/resources/org/metafacture/metamorph/maps/file-map-test.txt]
+trim_trailing_whitespace = false
+
[metafacture-runner/src/main/dist/config/java-options.conf]
end_of_line = crlf
diff --git a/metamorph/src/main/java/org/metafacture/metamorph/maps/FileMap.java b/metamorph/src/main/java/org/metafacture/metamorph/maps/FileMap.java
index 878397fc0..1972e362a 100644
--- a/metamorph/src/main/java/org/metafacture/metamorph/maps/FileMap.java
+++ b/metamorph/src/main/java/org/metafacture/metamorph/maps/FileMap.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2013, 2014 Deutsche Nationalbibliothek
+ * Copyright 2013, 2014, 2021 Deutsche Nationalbibliothek et al
*
* Licensed under the Apache License, Version 2.0 the "License";
* you may not use this file except in compliance with the License.
@@ -29,6 +29,7 @@
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
@@ -37,9 +38,16 @@
import java.util.regex.Pattern;
/**
- * Provides a {@link Map} based on a file. The file is supposed to be UTF-8
- * encoded. The default separator is {@code \t}. Important:
- * Lines that are not split in two parts by the separator are ignored!
+ * Provides a {@link Map} based on files. Can be one file or a comma separated list of files.
+ * The files are supposed to be UTF-8 encoded. The default separator is {@code \t}.
+ *
+ * By setting {@link #allowEmptyValues} to {@code true} the values in the
+ * {@link Map} can be empty thus enabling e.g.
+ * {@link org.metafacture.metamorph.functions.SetReplace} to remove matching
+ * keys.
+ *
+ * Important: All other lines that are not split in two parts
+ * by the separator are ignored!
*
* @author Markus Michael Geipel
*/
@@ -48,6 +56,9 @@ public final class FileMap extends AbstractReadOnlyMap {
private final Map map = new HashMap<>();
private Pattern split = Pattern.compile("\t", Pattern.LITERAL);
+ private boolean allowEmptyValues;
+ private boolean isUninitialized = true;
+ private ArrayList filenames = new ArrayList<>();
/**
* Creates an instance of {@link FileMap}.
@@ -55,27 +66,45 @@ public final class FileMap extends AbstractReadOnlyMap {
public FileMap() {
}
+ private void init() {
+ loadFiles();
+ isUninitialized = false;
+ }
+
+ /**
+ * Sets whether to allow empty values in the {@link Map} or ignore these
+ * entries.
+ *
+ * Default value: false
+ *
+ * @param allowEmptyValues true if empty values in the Map are allowed
+ */
+ public void setAllowEmptyValues(final boolean allowEmptyValues) {
+ this.allowEmptyValues = allowEmptyValues;
+ }
+
/**
- * Sets a comma separated list of files which are then passed to
- * {@link #setFile}.
+ * Sets a comma separated list of files which provides the {@link Map}.
*
* @param files a comma separated list of files
*/
public void setFiles(final String files) {
- final String[] parts = files.split("\\s*,\\s*");
- for (final String part : parts) {
- setFile(part);
- }
+ Collections.addAll(filenames, files.split("\\s*,\\s*"));
}
/**
- * Provides a {@link Map} based on a file. The file is supposed to be UTF-8
- * encoded. The default separator is {@code \t}. Important:
- * Lines that are not split in two parts by the separator are ignored!
- *
+ * Sets a file which provides the {@link Map}.
* @param file the file
*/
public void setFile(final String file) {
+ Collections.addAll(filenames, file);
+ }
+
+ private void loadFiles() {
+ filenames.forEach(this::loadFile);
+ }
+
+ private void loadFile(final String file) {
try (
InputStream stream = openStream(file);
BufferedReader reader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))
@@ -85,7 +114,7 @@ public void setFile(final String file) {
if (line.isEmpty()) {
continue;
}
- final String[] parts = split.split(line);
+ final String[] parts = allowEmptyValues ? split.split(line, -1) : split.split(line);
if (parts.length == 2) {
map.put(parts[0], parts[1]);
}
@@ -147,11 +176,17 @@ public void setSeparator(final String delimiter) {
@Override
public String get(final Object key) {
+ if (isUninitialized) {
+ init();
+ }
return map.get(key);
}
@Override
public Set keySet() {
+ if (isUninitialized) {
+ init();
+ }
return Collections.unmodifiableSet(map.keySet());
}
diff --git a/metamorph/src/main/resources/schemata/metamorph.xsd b/metamorph/src/main/resources/schemata/metamorph.xsd
index 65ec1a0f2..7d3cc7cd0 100644
--- a/metamorph/src/main/resources/schemata/metamorph.xsd
+++ b/metamorph/src/main/resources/schemata/metamorph.xsd
@@ -587,7 +587,6 @@
-
Lookup table defined by text files
@@ -598,16 +597,21 @@
Unique name of the lookup table
+
+
+ Allow empty values in Map.
+
+
- Filenames
+ Filename(s) referencing the lookup table(s). Can be one
+ filename or a comma separated list of filenames.
-
+
- String used in the files to separate key from value.
-
+ String used in the files to separate keys from values.
+ The default separator is the tabulator.
@@ -795,7 +799,7 @@
- Relace strings based on a replacement table.
+ Replace strings based on a replacement table.
diff --git a/metamorph/src/test/java/org/metafacture/metamorph/maps/FileMapTest.java b/metamorph/src/test/java/org/metafacture/metamorph/maps/FileMapTest.java
index 388b5389c..e55b06679 100644
--- a/metamorph/src/test/java/org/metafacture/metamorph/maps/FileMapTest.java
+++ b/metamorph/src/test/java/org/metafacture/metamorph/maps/FileMapTest.java
@@ -47,12 +47,12 @@ public final class FileMapTest {
"" +
"" +
" " +
+ "file-map-test.txt' %s/>" +
"";
@Test
public void shouldLookupValuesInFileBasedMap() {
- assertMorph(receiver, String.format(MORPH, "lookup in"),
+ assertMorph(receiver, String.format(MORPH, "lookup in", ""),
i -> {
i.startRecord("1");
i.literal("1", "gw");
@@ -70,7 +70,7 @@ public void shouldLookupValuesInFileBasedMap() {
@Test
public void shouldWhitelistValuesInFileBasedMap() {
- assertMorph(receiver, String.format(MORPH, "whitelist map"),
+ assertMorph(receiver, String.format(MORPH, "whitelist map", ""),
i -> {
i.startRecord("1");
i.literal("1", "gw");
@@ -89,7 +89,7 @@ public void shouldWhitelistValuesInFileBasedMap() {
@Test
public void shouldReplaceValuesUsingFileBasedMap() {
- assertMorph(receiver, String.format(MORPH, "setreplace map"),
+ assertMorph(receiver, String.format(MORPH, "setreplace map", ""),
i -> {
i.startRecord("1");
i.literal("1", "gw-fj: 1:1");
@@ -105,4 +105,53 @@ public void shouldReplaceValuesUsingFileBasedMap() {
);
}
+ @Test
+ public void shouldReplaceCommaSeparatedValuesUsingFileBasedMapSetting() {
+ assertMorph(receiver, String.format(MORPH, "setreplace map", "separator=\",\""),
+ i -> {
+ i.startRecord("1");
+ i.literal("1", "gw");
+ i.literal("1", "ry\tRyukyuIslands");
+ i.endRecord();
+ },
+ o -> {
+ o.get().startRecord("1");
+ o.get().literal("1", "gw");
+ o.get().literal("1", "Southern");
+ o.get().endRecord();
+ }
+ );
+ }
+
+ @Test
+ public void shouldReplaceEmptyValuesUsingFileBasedMapSetting() {
+ assertMorph(receiver, String.format(MORPH, "setreplace map", "allowEmptyValues=\"true\""),
+ i -> {
+ i.startRecord("1");
+ i.literal("1", "zz");
+ i.endRecord();
+ },
+ o -> {
+ o.get().startRecord("1");
+ o.get().literal("1", "");
+ o.get().endRecord();
+ }
+ );
+ }
+
+ @Test
+ public void shouldNotReplaceEmptyValuesUsingFileBasedMapSetting() {
+ assertMorph(receiver, String.format(MORPH, "setreplace map", ""),
+ i -> {
+ i.startRecord("1");
+ i.literal("1", "zz");
+ i.endRecord();
+ },
+ o -> {
+ o.get().startRecord("1");
+ o.get().literal("1", "zz");
+ o.get().endRecord();
+ }
+ );
+ }
}
diff --git a/metamorph/src/test/resources/org/metafacture/metamorph/maps/file-map-test.txt b/metamorph/src/test/resources/org/metafacture/metamorph/maps/file-map-test.txt
index ce9843b05..800404ffe 100644
--- a/metamorph/src/test/resources/org/metafacture/metamorph/maps/file-map-test.txt
+++ b/metamorph/src/test/resources/org/metafacture/metamorph/maps/file-map-test.txt
@@ -378,3 +378,4 @@ ykc YukonTerritory
ys Yemen(People'sDemocraticRepublic)
yu SerbiaandMontenegro
za Zambia
+zz