Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 38 additions & 22 deletions metafacture-io/src/main/java/org/metafacture/io/FileOpener.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
*/
@Description("Opens a file.")
@In(String.class)
@Out(java.io.Reader.class)
@Out(Reader.class)
@FluxCommand("open-file")
public final class FileOpener extends DefaultObjectPipe<String, ObjectReceiver<Reader>> {

Expand All @@ -66,8 +66,7 @@ public String getEncoding() {
/**
* Sets the encoding used to open the resource.
*
* @param encoding
* new encoding
* @param encoding new encoding
*/
public void setEncoding(final String encoding) {
this.encoding = encoding;
Expand All @@ -83,7 +82,7 @@ public FileCompression getCompression() {
}

/**
* * Sets the compression of the file.
* Sets the compression of the file.
*
* @param compression the {@link FileCompression}
*/
Expand All @@ -94,7 +93,7 @@ public void setCompression(final FileCompression compression) {
/**
* Sets the compression of the file.
*
* @param compression the name of the compression.
* @param compression the name of the compression
*/
public void setCompression(final String compression) {
setCompression(FileCompression.valueOf(compression.toUpperCase()));
Expand All @@ -112,35 +111,52 @@ public boolean getDecompressConcatenated() {
/**
* Flags whether to use decompress concatenated file compression.
*
* @param decompressConcatenated true if file compression should be decompresses
* concatenated
* @param decompressConcatenated true if file compression should decompress concatenated
*/
public void setDecompressConcatenated(final boolean decompressConcatenated) {
this.decompressConcatenated = decompressConcatenated;
}

@Override
public void process(final String file) {
/**
* Opens a file.
*
* @param file the file
* @return a Reader
* @throws IOException if an I/O error occurs
*/
public Reader open(final String file) throws IOException {
return open(new FileInputStream(file));
}

/**
* Opens a file stream.
*
* @param stream the stream
* @return a Reader
* @throws IOException if an I/O error occurs
*/
public Reader open(final InputStream stream) throws IOException {
try {
final InputStream fileStream = new FileInputStream(file);
final InputStream decompressor = compression.createDecompressor(stream, decompressConcatenated);
try {
final InputStream decompressor = compression.createDecompressor(fileStream, decompressConcatenated);
try {

final Reader reader = new InputStreamReader(new BOMInputStream(
decompressor), encoding);
getReceiver().process(reader);
}
catch (final IOException | MetafactureException e) {
decompressor.close();
throw e;
}
return new InputStreamReader(new BOMInputStream(decompressor), encoding);
}
catch (final IOException | MetafactureException e) {
fileStream.close();
decompressor.close();
throw e;
}
}
catch (final IOException | MetafactureException e) {
stream.close();
throw e;
}
}

@Override
public void process(final String file) {
try {
getReceiver().process(open(file));
}
catch (final IOException e) {
throw new MetafactureException(e);
}
Expand Down
1 change: 1 addition & 0 deletions metamorph/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ dependencies {
api project(':metamorph-api')
implementation project(':metafacture-commons')
implementation project(':metafacture-flowcontrol')
implementation project(':metafacture-io')
implementation project(':metafacture-mangling')
implementation project(':metafacture-javaintegration')
implementation 'org.slf4j:slf4j-api:1.7.21'
Expand Down
53 changes: 42 additions & 11 deletions metamorph/src/main/java/org/metafacture/metamorph/maps/FileMap.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

package org.metafacture.metamorph.maps;

import org.metafacture.io.FileOpener;
import org.metafacture.metamorph.api.MorphExecutionException;
import org.metafacture.metamorph.api.helpers.AbstractReadOnlyMap;

Expand All @@ -24,11 +25,10 @@
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UncheckedIOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
Expand All @@ -38,8 +38,11 @@
import java.util.regex.Pattern;

/**
* Provides a {@link Map} based on files. Can be one file or a comma separated list of files.
* The files are supposed to be UTF-8 encoded. The default separator is {@code \t}.
* Provides a {@link Map} based on files. Can be a single file or a
* comma-separated list of files.
*
* The default {@link #setEncoding encoding} is UTF-8.
* The default {@link #setSeparator separator} is {@code \t}.
*
* By setting {@link #allowEmptyValues} to {@code true} the values in the
* {@link Map} can be empty thus enabling e.g.
Expand All @@ -53,6 +56,7 @@
*/
public final class FileMap extends AbstractReadOnlyMap<String, String> {

private final FileOpener fileOpener = new FileOpener();
private final Map<String, String> map = new HashMap<>();

private Pattern split = Pattern.compile("\t", Pattern.LITERAL);
Expand Down Expand Up @@ -100,17 +104,45 @@ public void setFile(final String file) {
Collections.addAll(filenames, file);
}

/**
* Sets the encoding used to open the resource.
*
* @param encoding new encoding
*/
public void setEncoding(final String encoding) {
fileOpener.setEncoding(encoding);
}

/**
* Sets the compression of the file.
*
* @param compression the name of the compression
*/
public void setCompression(final String compression) {
fileOpener.setCompression(compression);
}

/**
* Flags whether to use decompress concatenated file compression.
*
* @param decompressConcatenated true if file compression should decompress concatenated
*/
public void setDecompressConcatenated(final boolean decompressConcatenated) {
fileOpener.setDecompressConcatenated(decompressConcatenated);
}

private void loadFiles() {
filenames.forEach(this::loadFile);
}

private void loadFile(final String file) {
try (
InputStream stream = openStream(file);
BufferedReader reader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))
Reader reader = fileOpener.open(stream);
BufferedReader br = new BufferedReader(reader)
) {
String line;
while ((line = reader.readLine()) != null) {
while ((line = br.readLine()) != null) {
if (line.isEmpty()) {
continue;
}
Expand All @@ -127,10 +159,9 @@ private void loadFile(final String file) {

private InputStream openStream(final String file) {
return openAsFile(file)
.orElseGet(() -> openAsResource(file)
.orElseGet(() -> openAsUrl(file)
.orElseThrow(() -> new MorphExecutionException(
"File not found: " + file))));
.orElseGet(() -> openAsResource(file)
.orElseGet(() -> openAsUrl(file)
.orElseThrow(() -> new MorphExecutionException("File not found: " + file))));
}

private Optional<InputStream> openAsFile(final String file) {
Expand Down Expand Up @@ -166,7 +197,7 @@ private Optional<InputStream> openAsUrl(final String file) {
/**
* Sets the separator.
*
* <strong>Default value: {@code \t} </strong>
* <strong>Default value: {@code \t}</strong>
*
* @param delimiter the separator
*/
Expand Down
15 changes: 15 additions & 0 deletions metamorph/src/main/resources/schemata/metamorph.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,21 @@
<documentation>Allow empty values in Map.</documentation>
</annotation>
</attribute>
<attribute name="compression" type="string" use="optional" default="auto">
<annotation>
<documentation>Sets the compression of the file.</documentation>
</annotation>
</attribute>
<attribute name="decompressConcatenated" type="boolean" use="optional" default="false">
<annotation>
<documentation>Flags whether to use decompress concatenated file compression.</documentation>
</annotation>
</attribute>
<attribute name="encoding" type="string" use="optional" default="UTF-8">
<annotation>
<documentation>Sets the encoding used to open the resource.</documentation>
</annotation>
</attribute>
<attribute name="files" type="string" use="required">
<annotation>
<documentation>Filename(s) referencing the lookup table(s). Can be one
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,12 @@ public final class FileMapTest {
" </data>" +
"</rules>" +
"<maps>" +
" <filemap name='map1' files='org/metafacture/metamorph/maps/" +
"file-map-test.txt' %s/>" +
" <filemap name='map1' files='org/metafacture/metamorph/maps/%s' %s/>" +
"</maps>";

@Test
public void shouldLookupValuesInFileBasedMap() {
assertMorph(receiver, String.format(MORPH, "lookup in", ""),
assertMorph(receiver, buildMorph("lookup in", ""),
i -> {
i.startRecord("1");
i.literal("1", "gw");
Expand All @@ -70,7 +69,7 @@ public void shouldLookupValuesInFileBasedMap() {

@Test
public void shouldWhitelistValuesInFileBasedMap() {
assertMorph(receiver, String.format(MORPH, "whitelist map", ""),
assertMorph(receiver, buildMorph("whitelist map", ""),
i -> {
i.startRecord("1");
i.literal("1", "gw");
Expand All @@ -89,7 +88,7 @@ public void shouldWhitelistValuesInFileBasedMap() {

@Test
public void shouldReplaceValuesUsingFileBasedMap() {
assertMorph(receiver, String.format(MORPH, "setreplace map", ""),
assertMorph(receiver, buildMorph("setreplace map", ""),
i -> {
i.startRecord("1");
i.literal("1", "gw-fj: 1:1");
Expand All @@ -107,7 +106,7 @@ public void shouldReplaceValuesUsingFileBasedMap() {

@Test
public void shouldReplaceCommaSeparatedValuesUsingFileBasedMapSetting() {
assertMorph(receiver, String.format(MORPH, "setreplace map", "separator=\",\""),
assertMorph(receiver, buildMorph("setreplace map", "separator=\",\""),
i -> {
i.startRecord("1");
i.literal("1", "gw");
Expand All @@ -125,7 +124,7 @@ public void shouldReplaceCommaSeparatedValuesUsingFileBasedMapSetting() {

@Test
public void shouldReplaceEmptyValuesUsingFileBasedMapSetting() {
assertMorph(receiver, String.format(MORPH, "setreplace map", "allowEmptyValues=\"true\""),
assertMorph(receiver, buildMorph("setreplace map", "allowEmptyValues=\"true\""),
i -> {
i.startRecord("1");
i.literal("1", "zz");
Expand All @@ -141,7 +140,7 @@ public void shouldReplaceEmptyValuesUsingFileBasedMapSetting() {

@Test
public void shouldNotReplaceEmptyValuesUsingFileBasedMapSetting() {
assertMorph(receiver, String.format(MORPH, "setreplace map", ""),
assertMorph(receiver, buildMorph("setreplace map", ""),
i -> {
i.startRecord("1");
i.literal("1", "zz");
Expand All @@ -154,4 +153,65 @@ public void shouldNotReplaceEmptyValuesUsingFileBasedMapSetting() {
}
);
}

@Test
public void shouldLookupValuesInGzipFileMap() {
assertMorph(receiver, buildMorph("lookup in", "file-map-test.txt.gz", ""),
i -> {
i.startRecord("1");
i.literal("1", "gw");
i.literal("1", "fj");
i.endRecord();
},
o -> {
o.get().startRecord("1");
o.get().literal("1", "Germany");
o.get().literal("1", "Fiji");
o.get().endRecord();
}
);
}

@Test
public void shouldNotLookupValuesInBlockedGzipFileMapWithoutDecompressConcatenated() {
assertMorph(receiver, buildMorph("lookup in", "file-map-test.txt.bgzf", ""),
i -> {
i.startRecord("1");
i.literal("1", "gw");
i.literal("1", "fj");
i.endRecord();
},
o -> {
o.get().startRecord("1");
o.get().endRecord();
}
);
}

@Test
public void shouldLookupValuesInBlockedGzipFileMap() {
assertMorph(receiver, buildMorph("lookup in", "file-map-test.txt.bgzf", "decompressConcatenated=\"true\""),
i -> {
i.startRecord("1");
i.literal("1", "gw");
i.literal("1", "fj");
i.endRecord();
},
o -> {
o.get().startRecord("1");
o.get().literal("1", "Germany");
o.get().literal("1", "Fiji");
o.get().endRecord();
}
);
}

private String buildMorph(final String data, final String options) {
return buildMorph(data, "file-map-test.txt", options);
}

private String buildMorph(final String data, final String map, final String options) {
return String.format(MORPH, data, map, options);
}

}
Binary file not shown.
Binary file not shown.