Skip to content
This repository was archived by the owner on May 16, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 52 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -356,14 +356,16 @@ Parameters:

Options:

- `append`: Whether to open files in append mode if they exist. (Default: `false`)
- `compression` (file output only): Compression mode. (Default: `auto`)
- `destination`: Destination to write the record to; may include [format directives](https://docs.oracle.com/javase/8/docs/api/java/util/Formatter.html#syntax) for counter and record ID (in that order). (Default: `stdout`)
- `encoding` (file output only): Encoding used by the underlying writer. (Default: `UTF-8`)
- `footer`: Footer which is output after the record. (Default: `\n`)
- `header`: Header which is output before the record. (Default: Empty string)
- `footer`: Footer which is written at the end of the output. (Default: `\n`)
- `header`: Header which is written at the beginning of the output. (Default: Empty string)
- `id`: Field name which contains the record ID; if found, will be available for inclusion in `prefix` and `destination`. (Default: `_id`)
- `internal`: Whether to print the record's internal representation instead of JSON. (Default: `false`)
- `pretty`: Whether to use pretty printing. (Default: `false`)
- `separator`: Separator which is written after the record. (Default: `\n`)

```perl
print_record(["<prefix>"][, <options>...])
Expand Down Expand Up @@ -553,10 +555,55 @@ join_field("<sourceField>", "<separator>")

Looks up matching values in a map and replaces the field value with this match. External files as well as internal maps can be used.

Parameters:

- `path` (required): Field path to look up.
- `map` (optional): Name or path of the map in which to look up values.

Options:

- `__default`: Default value to use for unknown values. (Default: Old value)
- `delete`: Whether to delete unknown values. (Default: `false`)
- `print_unknown`: Whether to print unknown values. (Default: `false`)

Additional options when printing unknown values:

- `append`: Whether to open files in append mode if they exist. (Default: `true`)
- `compression` (file output only): Compression mode. (Default: `auto`)
- `destination`: Destination to write unknown values to; may include [format directives](https://docs.oracle.com/javase/8/docs/api/java/util/Formatter.html#syntax) for counter and record ID (in that order). (Default: `stdout`)
- `encoding` (file output only): Encoding used by the underlying writer. (Default: `UTF-8`)
- `footer`: Footer which is written at the end of the output. (Default: `\n`)
- `header`: Header which is written at the beginning of the output. (Default: Empty string)
- `id`: Field name which contains the record ID; if found, will be available for inclusion in `destination`. (Default: `_id`)
- `prefix`: Prefix to print before the unknown value; may include [format directives](https://docs.oracle.com/javase/8/docs/api/java/util/Formatter.html#syntax) for counter and record ID (in that order). (Default: Empty string)
- `separator`: Separator which is written after the unknown value. (Default: `\n`)

```perl
lookup("<sourceField>", "<mapFile>", sep_char: ”,”)
lookup("<sourceField>", "<mapName>")
lookup("<sourceField>", "<mapName>", default: "NA")
lookup("<sourceField>"[, <mapName>][, <options>...])
```

E.g.:

```perl
# local (unnamed) map
lookup("path.to.field", key_1: "value_1", ...)

# internal (named) map
put_map("internal-map", key_1: "value_1", ...)
lookup("path.to.field", "internal-map")

# external file map (implicit)
lookup("path.to.field", "path/to/file", sep_char: ";")

# external file map (explicit)
put_filemap("path/to/file", "file-map", sep_char: ";")
lookup("path.to.field", "file-map")

# with default value
lookup("path.to.field", "map-name", __default: "NA")

# with printing unknown values to a file
lookup("path.to.field", "map-name", print_unknown: "true", destination: "unknown.txt")
```

##### `prepend`
Expand Down
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ subprojects {
'jquery': '3.3.1-1',
'junit_jupiter': '5.8.2',
'junit_platform': '1.4.2',
'metafacture': 'metafacture-core-5.4.1-rc1',
'metafacture': 'metafacture-core-5.4.1-rc3',
'mockito': '2.27.0',
'requirejs': '2.3.6',
'slf4j': '1.7.21',
Expand Down
78 changes: 44 additions & 34 deletions metafix/src/main/java/org/metafacture/metafix/FixMethod.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@

package org.metafacture.metafix;

import org.metafacture.framework.StandardEventNames;
import org.metafacture.io.ObjectWriter;
import org.metafacture.metafix.api.FixFunction;
import org.metafacture.metamorph.api.Maps;
import org.metafacture.metamorph.functions.ISBN;
Expand All @@ -34,6 +32,7 @@
import java.util.Map;
import java.util.Random;
import java.util.concurrent.atomic.LongAdder;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.function.UnaryOperator;
Expand Down Expand Up @@ -241,42 +240,31 @@ private boolean literalString(final String s) {

@Override
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
final String destination = options.getOrDefault("destination", ObjectWriter.STDOUT);
final Value idValue = record.get(options.getOrDefault("id", StandardEventNames.ID));

final boolean internal = getBoolean(options, "internal");
final boolean pretty = getBoolean(options, "pretty");

final LongAdder counter = scopedCounter.computeIfAbsent(metafix, k -> new LongAdder());
counter.increment();

final String id = Value.isNull(idValue) ? "" : idValue.toString();
final String prefix = params.isEmpty() ? "" : String.format(params.get(0), counter.sum(), id);
final ObjectWriter<String> writer = new ObjectWriter<>(String.format(destination, counter.sum(), id));

withOption(options, "compression", writer::setCompression);
withOption(options, "encoding", writer::setEncoding);
withOption(options, "footer", writer::setFooter);
withOption(options, "header", writer::setHeader);
if (!params.isEmpty()) {
options.put("prefix", params.get(0));
}

if (internal) {
if (pretty) {
record.forEach((f, v) -> writer.process(prefix + f + "=" + v));
withWriter(metafix, record, options, scopedCounter, c -> {
if (internal) {
if (pretty) {
record.forEach((f, v) -> c.accept(f + "=" + v));
}
else {
c.accept(record.toString());
}
}
else {
writer.process(prefix + record);
}
}
else {
try {
writer.process(prefix + record.toJson(pretty));
}
catch (final IOException e) {
// Log a warning? Print string representation instead?
try {
c.accept(record.toJson(pretty));
}
catch (final IOException e) {
// Log a warning? Print string representation instead?
}
}
}

writer.closeStream();
});
}
},
random {
Expand Down Expand Up @@ -478,6 +466,8 @@ public void apply(final Metafix metafix, final Record record, final List<String>
}
},
lookup {
private final Map<Metafix, LongAdder> scopedCounter = new HashMap<>();

@Override
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
final Map<String, String> map;
Expand All @@ -501,10 +491,30 @@ public void apply(final Metafix metafix, final Record record, final List<String>
}

final String defaultValue = map.get(Maps.DEFAULT_MAP_KEY); // TODO: Catmandu uses 'default'
record.transform(params.get(0), oldValue -> {
final String newValue = map.getOrDefault(oldValue, defaultValue);
return newValue != null ? newValue : getBoolean(options, "delete") ? null : oldValue;
final boolean delete = getBoolean(options, "delete");
final boolean printUnknown = getBoolean(options, "print_unknown");

final Consumer<Consumer<String>> consumer = c -> record.transform(params.get(0), oldValue -> {
final String newValue = map.get(oldValue);
if (newValue != null) {
return newValue;
}
else {
if (c != null) {
c.accept(oldValue);
}

return defaultValue != null ? defaultValue : delete ? null : oldValue;
}
});

if (printUnknown) {
options.putIfAbsent("append", "true");
withWriter(metafix, record, options, scopedCounter, consumer);
}
else {
consumer.accept(null);
}
}
},
prepend {
Expand Down
36 changes: 36 additions & 0 deletions metafix/src/main/java/org/metafacture/metafix/api/FixFunction.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@

package org.metafacture.metafix.api;

import org.metafacture.framework.StandardEventNames;
import org.metafacture.io.ObjectWriter;
import org.metafacture.metafix.Metafix;
import org.metafacture.metafix.Record;
import org.metafacture.metafix.Value;
Expand All @@ -24,8 +26,10 @@
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.LongAdder;
import java.util.function.BiFunction;
import java.util.function.Consumer;
import java.util.function.UnaryOperator;
import java.util.stream.Stream;

@FunctionalInterface
Expand All @@ -43,6 +47,38 @@ default <T> void withOption(final Map<String, String> options, final String key,
}
}

default void withWriter(final Map<String, String> options, final UnaryOperator<String> operator, final Consumer<ObjectWriter<String>> consumer) {
final String destination = options.getOrDefault("destination", ObjectWriter.STDOUT);
final ObjectWriter<String> writer = new ObjectWriter<>(operator != null ? operator.apply(destination) : destination);

withOption(options, "append", writer::setAppendIfFileExists, this::getBoolean);
withOption(options, "compression", writer::setCompression);
withOption(options, "encoding", writer::setEncoding);
withOption(options, "footer", writer::setFooter);
withOption(options, "header", writer::setHeader);
withOption(options, "separator", writer::setSeparator);

try {
consumer.accept(writer);
}
finally {
writer.closeStream();
}
}

default void withWriter(final Metafix metafix, final Record record, final Map<String, String> options, final Map<Metafix, LongAdder> scopedCounter, final Consumer<Consumer<String>> consumer) {
final Value idValue = record.get(options.getOrDefault("id", StandardEventNames.ID));

final LongAdder counter = scopedCounter.computeIfAbsent(metafix, k -> new LongAdder());
counter.increment();

final UnaryOperator<String> formatter = s -> String.format(s,
counter.sum(), Value.isNull(idValue) ? "" : idValue.toString());

final String prefix = formatter.apply(options.getOrDefault("prefix", ""));
withWriter(options, formatter, w -> consumer.accept(s -> w.process(prefix + s)));
}

default boolean getBoolean(final Map<String, String> options, final String key) {
return Boolean.parseBoolean(options.get(key));
}
Expand Down
110 changes: 110 additions & 0 deletions metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;

import java.io.IOException;
import java.util.Arrays;

/**
Expand Down Expand Up @@ -846,6 +847,115 @@ public void shouldFailLookupInUnknownExternalMap() {
);
}

private void shouldPrintUnknown(final String args, final String defaultValue, final String expected) {
MetafixTestHelpers.assertStdout(expected, () ->
MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList(
LOOKUP + " Aloha: Alohaeha, 'Moin': 'Moin zäme'" + args + ", print_unknown: 'true')"
),
i -> {
i.startRecord("rec1");
i.literal("name", "moe");
i.literal("title", "Aloha");
i.literal("title", "Moin");
i.literal("title", "Hey");
i.literal("title", "you");
i.literal("title", "there");
i.endRecord();

i.startRecord("rec2");
i.literal("name", "joe");
i.literal("title", "Aloha");
i.literal("title", "you");
i.literal("title", "too");
i.endRecord();
},
(o, f) -> {
final boolean delete = "__delete".equals(defaultValue);

o.get().startRecord("rec1");
o.get().literal("name", "moe");
o.get().literal("title", "Alohaeha");
o.get().literal("title", "Moin zäme");

if (defaultValue == null) {
o.get().literal("title", "Hey");
o.get().literal("title", "you");
o.get().literal("title", "there");
}
else if (!delete) {
f.apply(3).literal("title", defaultValue);
}

o.get().endRecord();

o.get().startRecord("rec2");
o.get().literal("name", "joe");
o.get().literal("title", "Alohaeha");

if (defaultValue == null) {
o.get().literal("title", "you");
o.get().literal("title", "too");
}
else if (!delete) {
f.apply(2).literal("title", defaultValue);
}

o.get().endRecord();
}
)
);
}

@Test
public void shouldPrintUnknown() {
shouldPrintUnknown("", null, "Hey\nyou\nthere\nyou\ntoo\n");
}

@Test
public void shouldPrintUnknownWithDefault() {
shouldPrintUnknown(", __default: Tach", "Tach", "Hey\nyou\nthere\nyou\ntoo\n");
}

@Test
public void shouldPrintUnknownWithDelete() {
shouldPrintUnknown(", delete: 'true'", "__delete", "Hey\nyou\nthere\nyou\ntoo\n");
}

@Test
public void shouldPrintUnknownWithPrefix() {
shouldPrintUnknown(", prefix: '<%d:%s>'", null, "<1:rec1>Hey\n<1:rec1>you\n<1:rec1>there\n<2:rec2>you\n<2:rec2>too\n");
}

@Test
public void shouldPrintUnknownWithPrefixAndIdField() {
shouldPrintUnknown(", prefix: '<%d:%s>', id: 'name'", null, "<1:moe>Hey\n<1:moe>you\n<1:moe>there\n<2:joe>you\n<2:joe>too\n");
}

@Test
public void shouldPrintUnknownWithHeader() {
shouldPrintUnknown(", header: '<%d:%s>'", null, "<%d:%s>Hey\nyou\nthere\n<%d:%s>you\ntoo\n");
}

@Test
public void shouldPrintUnknownWithFooter() {
shouldPrintUnknown(", footer: '<%d:%s>'", null, "Hey\nyou\nthere<%d:%s>you\ntoo<%d:%s>");
}

@Test
public void shouldPrintUnknownWithSeparator() {
shouldPrintUnknown(", separator: '<%d:%s>'", null, "Hey<%d:%s>you<%d:%s>there\nyou<%d:%s>too\n");
}

@Test
public void shouldPrintUnknownToFile() throws IOException {
MetafixTestHelpers.assertTempFile("Hey\nyou\nthere\nyou\ntoo\n", p -> shouldPrintUnknown(", destination: '" + p + "'", null, ""));
}

@Test
public void shouldPrintUnknownToFileWithoutAppend() throws IOException {
MetafixTestHelpers.assertTempFile("you\ntoo\n", p -> shouldPrintUnknown(", destination: '" + p + "', append: 'false'", null, ""));
}

private void assertMap(final String... fixDef) {
MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList(fixDef),
i -> {
Expand Down