Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,8 @@ indent_size = 2
[metafacture-io/src/test/resources/org/metafacture/io/compressed.txt]
insert_final_newline = false

[metamorph/src/test/resources/org/metafacture/metamorph/maps/file-map-test.txt]
trim_trailing_whitespace = false

[metafacture-runner/src/main/dist/config/java-options.conf]
end_of_line = crlf
65 changes: 50 additions & 15 deletions metamorph/src/main/java/org/metafacture/metamorph/maps/FileMap.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2013, 2014 Deutsche Nationalbibliothek
* Copyright 2013, 2014, 2021 Deutsche Nationalbibliothek et al
*
* Licensed under the Apache License, Version 2.0 the "License";
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -29,6 +29,7 @@
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
Expand All @@ -37,9 +38,16 @@
import java.util.regex.Pattern;

/**
* Provides a {@link Map} based on a file. The file is supposed to be UTF-8
* encoded. The default separator is {@code \t}. <strong>Important:</strong>
* Lines that are not split in two parts by the separator are ignored!
* Provides a {@link Map} based on files. Can be one file or a comma separated list of files.
* The files are supposed to be UTF-8 encoded. The default separator is {@code \t}.
*
* By setting {@link #allowEmptyValues} to {@code true} the values in the
* {@link Map} can be empty thus enabling e.g.
* {@link org.metafacture.metamorph.functions.SetReplace} to remove matching
* keys.
*
* <strong>Important:</strong> All other lines that are not split in two parts
* by the separator are ignored!
*
* @author Markus Michael Geipel
*/
Expand All @@ -48,34 +56,55 @@ public final class FileMap extends AbstractReadOnlyMap<String, String> {
private final Map<String, String> map = new HashMap<>();

private Pattern split = Pattern.compile("\t", Pattern.LITERAL);
private boolean allowEmptyValues;
private boolean isUninitialized = true;
private ArrayList<String> filenames = new ArrayList<>();

/**
* Creates an instance of {@link FileMap}.
*/
public FileMap() {
}

private void init() {
loadFiles();
isUninitialized = false;
}

/**
* Sets whether to allow empty values in the {@link Map} or ignore these
* entries.
*
* <strong>Default value: false </strong>
*
* @param allowEmptyValues true if empty values in the Map are allowed
*/
public void setAllowEmptyValues(final boolean allowEmptyValues) {
this.allowEmptyValues = allowEmptyValues;
}

/**
* Sets a comma separated list of files which are then passed to
* {@link #setFile}.
* Sets a comma separated list of files which provides the {@link Map}.
*
* @param files a comma separated list of files
*/
public void setFiles(final String files) {
final String[] parts = files.split("\\s*,\\s*");
for (final String part : parts) {
setFile(part);
}
Collections.addAll(filenames, files.split("\\s*,\\s*"));
}

/**
* Provides a {@link Map} based on a file. The file is supposed to be UTF-8
* encoded. The default separator is {@code \t}. <strong>Important:</strong>
* Lines that are not split in two parts by the separator are ignored!
*
* Sets a file which provides the {@link Map}.
* @param file the file
*/
public void setFile(final String file) {
Collections.addAll(filenames, file);
}

private void loadFiles() {
filenames.forEach(this::loadFile);
}

private void loadFile(final String file) {
try (
InputStream stream = openStream(file);
BufferedReader reader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))
Expand All @@ -85,7 +114,7 @@ public void setFile(final String file) {
if (line.isEmpty()) {
continue;
}
final String[] parts = split.split(line);
final String[] parts = allowEmptyValues ? split.split(line, -1) : split.split(line);
if (parts.length == 2) {
map.put(parts[0], parts[1]);
}
Expand Down Expand Up @@ -147,11 +176,17 @@ public void setSeparator(final String delimiter) {

@Override
public String get(final Object key) {
if (isUninitialized) {
init();
}
return map.get(key);
}

@Override
public Set<String> keySet() {
if (isUninitialized) {
init();
}
return Collections.unmodifiableSet(map.keySet());
}

Expand Down
18 changes: 11 additions & 7 deletions metamorph/src/main/resources/schemata/metamorph.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -587,7 +587,6 @@
</complexType>
</element>


<element name="filemap">
<annotation>
<documentation>Lookup table defined by text files</documentation>
Expand All @@ -598,16 +597,21 @@
<documentation>Unique name of the lookup table</documentation>
</annotation>
</attribute>
<attribute name="allowEmptyValues" type="boolean" use="optional" default="false">
<annotation>
<documentation>Allow empty values in Map.</documentation>
</annotation>
</attribute>
<attribute name="files" type="string" use="required">
<annotation>
<documentation>Filenames</documentation>
<documentation>Filename(s) referencing the lookup table(s). Can be one
filename or a comma separated list of filenames.</documentation>
</annotation>
</attribute>
<attribute name="separator" type="string" use="optional"
default="\t">
<attribute name="separator" type="string" use="optional" default="&#09;">
<annotation>
<documentation>String used in the files to separate key from value.
</documentation>
<documentation>String used in the files to separate keys from values.
The default separator is the tabulator. </documentation>
</annotation>
</attribute>
<attribute ref="xml:base" />
Expand Down Expand Up @@ -795,7 +799,7 @@

<element name="setreplace">
<annotation>
<documentation>Relace strings based on a replacement table.
<documentation>Replace strings based on a replacement table.
</documentation>
</annotation>
<complexType>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,12 @@ public final class FileMapTest {
"</rules>" +
"<maps>" +
" <filemap name='map1' files='org/metafacture/metamorph/maps/" +
"file-map-test.txt' />" +
"file-map-test.txt' %s/>" +
"</maps>";

@Test
public void shouldLookupValuesInFileBasedMap() {
assertMorph(receiver, String.format(MORPH, "lookup in"),
assertMorph(receiver, String.format(MORPH, "lookup in", ""),
i -> {
i.startRecord("1");
i.literal("1", "gw");
Expand All @@ -70,7 +70,7 @@ public void shouldLookupValuesInFileBasedMap() {

@Test
public void shouldWhitelistValuesInFileBasedMap() {
assertMorph(receiver, String.format(MORPH, "whitelist map"),
assertMorph(receiver, String.format(MORPH, "whitelist map", ""),
i -> {
i.startRecord("1");
i.literal("1", "gw");
Expand All @@ -89,7 +89,7 @@ public void shouldWhitelistValuesInFileBasedMap() {

@Test
public void shouldReplaceValuesUsingFileBasedMap() {
assertMorph(receiver, String.format(MORPH, "setreplace map"),
assertMorph(receiver, String.format(MORPH, "setreplace map", ""),
i -> {
i.startRecord("1");
i.literal("1", "gw-fj: 1:1");
Expand All @@ -105,4 +105,53 @@ public void shouldReplaceValuesUsingFileBasedMap() {
);
}

@Test
public void shouldReplaceCommaSeparatedValuesUsingFileBasedMapSetting() {
assertMorph(receiver, String.format(MORPH, "setreplace map", "separator=\",\""),
i -> {
i.startRecord("1");
i.literal("1", "gw");
i.literal("1", "ry\tRyukyuIslands");
i.endRecord();
},
o -> {
o.get().startRecord("1");
o.get().literal("1", "gw");
o.get().literal("1", "Southern");
o.get().endRecord();
}
);
}

@Test
public void shouldReplaceEmptyValuesUsingFileBasedMapSetting() {
assertMorph(receiver, String.format(MORPH, "setreplace map", "allowEmptyValues=\"true\""),
i -> {
i.startRecord("1");
i.literal("1", "zz");
i.endRecord();
},
o -> {
o.get().startRecord("1");
o.get().literal("1", "");
o.get().endRecord();
}
);
}

@Test
public void shouldNotReplaceEmptyValuesUsingFileBasedMapSetting() {
assertMorph(receiver, String.format(MORPH, "setreplace map", ""),
i -> {
i.startRecord("1");
i.literal("1", "zz");
i.endRecord();
},
o -> {
o.get().startRecord("1");
o.get().literal("1", "zz");
o.get().endRecord();
}
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -378,3 +378,4 @@ ykc YukonTerritory
ys Yemen(People'sDemocraticRepublic)
yu SerbiaandMontenegro
za Zambia
zz