Skip to content

Commit 97d89c6

Browse files
authored
Refactor FST.saveMetadata() to FSTMetadata.save() (#13549)
* lazily write the FST padding byte * Also write the pad byte when there is emptyOutput * add comment * Make Lucene90BlockTreeTermsWriter to write FST off-heap * Add change log * Tidy code & Add comments * use temp IndexOutput for FST writing * Use IOUtils to delete files * Update CHANGES.txt * Update CHANGES.txt
1 parent af9a2b9 commit 97d89c6

File tree

1 file changed

+55
-47
lines changed
  • lucene/core/src/java/org/apache/lucene/util/fst

1 file changed

+55
-47
lines changed

lucene/core/src/java/org/apache/lucene/util/fst/FST.java

Lines changed: 55 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -528,56 +528,10 @@ public FSTMetadata<T> getMetadata() {
528528
* @param out the DataOutput to write the FST bytes to
529529
*/
530530
public void save(DataOutput metaOut, DataOutput out) throws IOException {
531-
saveMetadata(metaOut);
531+
metadata.save(metaOut);
532532
fstReader.writeTo(out);
533533
}
534534

535-
/**
536-
* Save the metadata to a DataOutput
537-
*
538-
* @param metaOut the DataOutput to write the metadata to
539-
*/
540-
public void saveMetadata(DataOutput metaOut) throws IOException {
541-
CodecUtil.writeHeader(metaOut, FILE_FORMAT_NAME, VERSION_CURRENT);
542-
// TODO: really we should encode this as an arc, arriving
543-
// to the root node, instead of special casing here:
544-
if (metadata.emptyOutput != null) {
545-
// Accepts empty string
546-
metaOut.writeByte((byte) 1);
547-
548-
// Serialize empty-string output:
549-
ByteBuffersDataOutput ros = new ByteBuffersDataOutput();
550-
outputs.writeFinalOutput(metadata.emptyOutput, ros);
551-
byte[] emptyOutputBytes = ros.toArrayCopy();
552-
int emptyLen = emptyOutputBytes.length;
553-
554-
// reverse
555-
final int stopAt = emptyLen / 2;
556-
int upto = 0;
557-
while (upto < stopAt) {
558-
final byte b = emptyOutputBytes[upto];
559-
emptyOutputBytes[upto] = emptyOutputBytes[emptyLen - upto - 1];
560-
emptyOutputBytes[emptyLen - upto - 1] = b;
561-
upto++;
562-
}
563-
metaOut.writeVInt(emptyLen);
564-
metaOut.writeBytes(emptyOutputBytes, 0, emptyLen);
565-
} else {
566-
metaOut.writeByte((byte) 0);
567-
}
568-
final byte t;
569-
if (metadata.inputType == INPUT_TYPE.BYTE1) {
570-
t = 0;
571-
} else if (metadata.inputType == INPUT_TYPE.BYTE2) {
572-
t = 1;
573-
} else {
574-
t = 2;
575-
}
576-
metaOut.writeByte(t);
577-
metaOut.writeVLong(metadata.startNode);
578-
metaOut.writeVLong(numBytes());
579-
}
580-
581535
/** Writes an automaton to a file. */
582536
public void save(final Path path) throws IOException {
583537
try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(path))) {
@@ -1249,5 +1203,59 @@ public FSTMetadata(
12491203
public int getVersion() {
12501204
return version;
12511205
}
1206+
1207+
public T getEmptyOutput() {
1208+
return emptyOutput;
1209+
}
1210+
1211+
public long getNumBytes() {
1212+
return numBytes;
1213+
}
1214+
1215+
/**
1216+
* Save the metadata to a DataOutput
1217+
*
1218+
* @param metaOut the DataOutput to write the metadata to
1219+
*/
1220+
public void save(DataOutput metaOut) throws IOException {
1221+
CodecUtil.writeHeader(metaOut, FILE_FORMAT_NAME, VERSION_CURRENT);
1222+
// TODO: really we should encode this as an arc, arriving
1223+
// to the root node, instead of special casing here:
1224+
if (emptyOutput != null) {
1225+
// Accepts empty string
1226+
metaOut.writeByte((byte) 1);
1227+
1228+
// Serialize empty-string output:
1229+
ByteBuffersDataOutput ros = new ByteBuffersDataOutput();
1230+
outputs.writeFinalOutput(emptyOutput, ros);
1231+
byte[] emptyOutputBytes = ros.toArrayCopy();
1232+
int emptyLen = emptyOutputBytes.length;
1233+
1234+
// reverse
1235+
final int stopAt = emptyLen / 2;
1236+
int upto = 0;
1237+
while (upto < stopAt) {
1238+
final byte b = emptyOutputBytes[upto];
1239+
emptyOutputBytes[upto] = emptyOutputBytes[emptyLen - upto - 1];
1240+
emptyOutputBytes[emptyLen - upto - 1] = b;
1241+
upto++;
1242+
}
1243+
metaOut.writeVInt(emptyLen);
1244+
metaOut.writeBytes(emptyOutputBytes, 0, emptyLen);
1245+
} else {
1246+
metaOut.writeByte((byte) 0);
1247+
}
1248+
final byte t;
1249+
if (inputType == INPUT_TYPE.BYTE1) {
1250+
t = 0;
1251+
} else if (inputType == INPUT_TYPE.BYTE2) {
1252+
t = 1;
1253+
} else {
1254+
t = 2;
1255+
}
1256+
metaOut.writeByte(t);
1257+
metaOut.writeVLong(startNode);
1258+
metaOut.writeVLong(numBytes);
1259+
}
12521260
}
12531261
}

0 commit comments

Comments
 (0)