Skip to content

Commit 8ec88db

Browse files
Fokkonastra
authored andcommitted
Core,Open-API: Don't expose the last-column-id (apache#11514)
* Core,Open-API: Don't expose the `last-column-id` Okay, I've added this to the spec a while ago: apache#7445 But I think this was a mistake, and we should not expose this to the public APIs, as it is much better to track this internally. I noticed this while reviewing apache/iceberg-rust#587 Removing this as part of the APIs in Java, and the Open-API update makes it much more resilient, and don't require the clients to compute this value. For example. when there are two conflicting schema changes, the last-column-id must be recomputed correctly when doing the retry operation. * Update the tests as well * Add `deprecation` flag * Wording Co-authored-by: Eduard Tudenhoefner <[email protected]> * Wording Co-authored-by: Eduard Tudenhoefner <[email protected]> * Wording * Thanks Ryan! * Remove `LOG` --------- Co-authored-by: Eduard Tudenhoefner <[email protected]>
1 parent 1db36dc commit 8ec88db

File tree

13 files changed

+67
-51
lines changed

13 files changed

+67
-51
lines changed

aws/src/test/java/org/apache/iceberg/aws/glue/TestIcebergToGlueConverter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ public void testSetTableInputInformationWithRemovedColumns() {
238238

239239
Schema newSchema =
240240
new Schema(Types.NestedField.required(1, "x", Types.StringType.get(), "comment1"));
241-
tableMetadata = tableMetadata.updateSchema(newSchema, 3);
241+
tableMetadata = tableMetadata.updateSchema(newSchema);
242242
IcebergToGlueConverter.setTableInputInformation(actualTableInputBuilder, tableMetadata);
243243
TableInput actualTableInput = actualTableInputBuilder.build();
244244

core/src/main/java/org/apache/iceberg/MetadataUpdate.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,16 @@ class AddSchema implements MetadataUpdate {
8686
private final Schema schema;
8787
private final int lastColumnId;
8888

89+
public AddSchema(Schema schema) {
90+
this(schema, schema.highestFieldId());
91+
}
92+
93+
/**
94+
* Set the schema
95+
*
96+
* @deprecated since 1.8.0, will be removed 1.9.0 or 2.0.0, use AddSchema(schema).
97+
*/
98+
@Deprecated
8999
public AddSchema(Schema schema, int lastColumnId) {
90100
this.schema = schema;
91101
this.lastColumnId = lastColumnId;

core/src/main/java/org/apache/iceberg/SchemaUpdate.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ public Schema apply() {
444444

445445
@Override
446446
public void commit() {
447-
TableMetadata update = applyChangesToMetadata(base.updateSchema(apply(), lastColumnId));
447+
TableMetadata update = applyChangesToMetadata(base.updateSchema(apply()));
448448
ops.commit(base, update);
449449
}
450450

core/src/main/java/org/apache/iceberg/TableMetadata.java

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -563,10 +563,23 @@ public TableMetadata withUUID() {
563563
return new Builder(this).assignUUID().build();
564564
}
565565

566+
/**
567+
* Updates the schema
568+
*
569+
* @deprecated since 1.8.0, will be removed in 1.9.0 or 2.0.0, use updateSchema(schema).
570+
*/
571+
@Deprecated
566572
public TableMetadata updateSchema(Schema newSchema, int newLastColumnId) {
567573
return new Builder(this).setCurrentSchema(newSchema, newLastColumnId).build();
568574
}
569575

576+
/** Updates the schema */
577+
public TableMetadata updateSchema(Schema newSchema) {
578+
return new Builder(this)
579+
.setCurrentSchema(newSchema, Math.max(this.lastColumnId, newSchema.highestFieldId()))
580+
.build();
581+
}
582+
570583
// The caller is responsible to pass a newPartitionSpec with correct partition field IDs
571584
public TableMetadata updatePartitionSpec(PartitionSpec newPartitionSpec) {
572585
return new Builder(this).setDefaultPartitionSpec(newPartitionSpec).build();
@@ -1082,8 +1095,18 @@ public Builder setCurrentSchema(int schemaId) {
10821095
return this;
10831096
}
10841097

1098+
public Builder addSchema(Schema schema) {
1099+
addSchemaInternal(schema, Math.max(lastColumnId, schema.highestFieldId()));
1100+
return this;
1101+
}
1102+
1103+
/**
1104+
* Add a new schema.
1105+
*
1106+
* @deprecated since 1.8.0, will be removed in 1.9.0 or 2.0.0, use AddSchema(schema).
1107+
*/
1108+
@Deprecated
10851109
public Builder addSchema(Schema schema, int newLastColumnId) {
1086-
// TODO: remove requirement for newLastColumnId
10871110
addSchemaInternal(schema, newLastColumnId);
10881111
return this;
10891112
}

core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -954,7 +954,7 @@ private static List<MetadataUpdate> createChanges(TableMetadata meta) {
954954
changes.add(new MetadataUpdate.UpgradeFormatVersion(meta.formatVersion()));
955955

956956
Schema schema = meta.schema();
957-
changes.add(new MetadataUpdate.AddSchema(schema, schema.highestFieldId()));
957+
changes.add(new MetadataUpdate.AddSchema(schema));
958958
changes.add(new MetadataUpdate.SetCurrentSchema(-1));
959959

960960
PartitionSpec spec = meta.spec();

core/src/main/java/org/apache/iceberg/view/ViewMetadata.java

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -372,20 +372,15 @@ private int addSchemaInternal(Schema schema) {
372372
newSchema = schema;
373373
}
374374

375-
int highestFieldId = Math.max(highestFieldId(), newSchema.highestFieldId());
376375
schemas.add(newSchema);
377376
schemasById.put(newSchema.schemaId(), newSchema);
378-
changes.add(new MetadataUpdate.AddSchema(newSchema, highestFieldId));
377+
changes.add(new MetadataUpdate.AddSchema(newSchema));
379378

380379
this.lastAddedSchemaId = newSchemaId;
381380

382381
return newSchemaId;
383382
}
384383

385-
private int highestFieldId() {
386-
return schemas.stream().map(Schema::highestFieldId).max(Integer::compareTo).orElse(0);
387-
}
388-
389384
private int reuseOrCreateNewSchemaId(Schema newSchema) {
390385
// if the schema already exists, use its id; otherwise use the highest id + 1
391386
int newSchemaId = INITIAL_SCHEMA_ID;

core/src/test/java/org/apache/iceberg/TestMetadataUpdateParser.java

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -112,23 +112,9 @@ public void testUpgradeFormatVersionFromJson() {
112112
public void testAddSchemaFromJson() {
113113
String action = MetadataUpdateParser.ADD_SCHEMA;
114114
Schema schema = ID_DATA_SCHEMA;
115-
int lastColumnId = schema.highestFieldId();
116-
String json =
117-
String.format(
118-
"{\"action\":\"add-schema\",\"schema\":%s,\"last-column-id\":%d}",
119-
SchemaParser.toJson(schema), lastColumnId);
120-
MetadataUpdate actualUpdate = new MetadataUpdate.AddSchema(schema, lastColumnId);
121-
assertEquals(action, actualUpdate, MetadataUpdateParser.fromJson(json));
122-
}
123-
124-
@Test
125-
public void testAddSchemaFromJsonWithoutLastColumnId() {
126-
String action = MetadataUpdateParser.ADD_SCHEMA;
127-
Schema schema = ID_DATA_SCHEMA;
128-
int lastColumnId = schema.highestFieldId();
129115
String json =
130116
String.format("{\"action\":\"add-schema\",\"schema\":%s}", SchemaParser.toJson(schema));
131-
MetadataUpdate actualUpdate = new MetadataUpdate.AddSchema(schema, lastColumnId);
117+
MetadataUpdate actualUpdate = new MetadataUpdate.AddSchema(schema);
132118
assertEquals(action, actualUpdate, MetadataUpdateParser.fromJson(json));
133119
}
134120

@@ -140,7 +126,7 @@ public void testAddSchemaToJson() {
140126
String.format(
141127
"{\"action\":\"add-schema\",\"schema\":%s,\"last-column-id\":%d}",
142128
SchemaParser.toJson(schema), lastColumnId);
143-
MetadataUpdate update = new MetadataUpdate.AddSchema(schema, lastColumnId);
129+
MetadataUpdate update = new MetadataUpdate.AddSchema(schema);
144130
String actual = MetadataUpdateParser.toJson(update);
145131
assertThat(actual)
146132
.as("Add schema should convert to the correct JSON value")

core/src/test/java/org/apache/iceberg/TestTableMetadata.java

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1425,7 +1425,7 @@ public void testUpdateSchemaIdentifierFields() {
14251425
new Schema(
14261426
Lists.newArrayList(Types.NestedField.required(1, "x", Types.StringType.get())),
14271427
Sets.newHashSet(1));
1428-
TableMetadata newMeta = meta.updateSchema(newSchema, 1);
1428+
TableMetadata newMeta = meta.updateSchema(newSchema);
14291429
assertThat(newMeta.schemas()).hasSize(2);
14301430
assertThat(newMeta.schema().identifierFieldIds()).containsExactly(1);
14311431
}
@@ -1447,7 +1447,7 @@ public void testUpdateSchema() {
14471447
new Schema(
14481448
Types.NestedField.required(1, "y", Types.LongType.get(), "comment"),
14491449
Types.NestedField.required(2, "x", Types.StringType.get()));
1450-
TableMetadata twoSchemasTable = freshTable.updateSchema(schema2, 2);
1450+
TableMetadata twoSchemasTable = freshTable.updateSchema(schema2);
14511451
assertThat(twoSchemasTable.currentSchemaId()).isEqualTo(1);
14521452
assertSameSchemaList(
14531453
ImmutableList.of(schema, new Schema(1, schema2.columns())), twoSchemasTable.schemas());
@@ -1459,34 +1459,34 @@ public void testUpdateSchema() {
14591459
new Schema(
14601460
Types.NestedField.required(1, "y", Types.LongType.get(), "comment"),
14611461
Types.NestedField.required(2, "x", Types.StringType.get()));
1462-
TableMetadata sameSchemaTable = twoSchemasTable.updateSchema(sameSchema2, 2);
1462+
TableMetadata sameSchemaTable = twoSchemasTable.updateSchema(sameSchema2);
14631463
assertThat(sameSchemaTable).isSameAs(twoSchemasTable);
14641464

14651465
// update schema with the same schema and different last column ID as current should create
14661466
// a new table
1467-
TableMetadata differentColumnIdTable = sameSchemaTable.updateSchema(sameSchema2, 3);
1467+
TableMetadata differentColumnIdTable = sameSchemaTable.updateSchema(sameSchema2);
14681468
assertThat(differentColumnIdTable.currentSchemaId()).isEqualTo(1);
14691469
assertSameSchemaList(
14701470
ImmutableList.of(schema, new Schema(1, schema2.columns())),
14711471
differentColumnIdTable.schemas());
14721472
assertThat(differentColumnIdTable.schema().asStruct()).isEqualTo(schema2.asStruct());
1473-
assertThat(differentColumnIdTable.lastColumnId()).isEqualTo(3);
1473+
assertThat(differentColumnIdTable.lastColumnId()).isEqualTo(2);
14741474

14751475
// update schema with old schema does not change schemas
1476-
TableMetadata revertSchemaTable = differentColumnIdTable.updateSchema(schema, 3);
1476+
TableMetadata revertSchemaTable = differentColumnIdTable.updateSchema(schema);
14771477
assertThat(revertSchemaTable.currentSchemaId()).isEqualTo(0);
14781478
assertSameSchemaList(
14791479
ImmutableList.of(schema, new Schema(1, schema2.columns())), revertSchemaTable.schemas());
14801480
assertThat(revertSchemaTable.schema().asStruct()).isEqualTo(schema.asStruct());
1481-
assertThat(revertSchemaTable.lastColumnId()).isEqualTo(3);
1481+
assertThat(revertSchemaTable.lastColumnId()).isEqualTo(2);
14821482

14831483
// create new schema will use the largest schema id + 1
14841484
Schema schema3 =
14851485
new Schema(
14861486
Types.NestedField.required(2, "y", Types.LongType.get(), "comment"),
14871487
Types.NestedField.required(4, "x", Types.StringType.get()),
14881488
Types.NestedField.required(6, "z", Types.IntegerType.get()));
1489-
TableMetadata threeSchemaTable = revertSchemaTable.updateSchema(schema3, 6);
1489+
TableMetadata threeSchemaTable = revertSchemaTable.updateSchema(schema3);
14901490
assertThat(threeSchemaTable.currentSchemaId()).isEqualTo(2);
14911491
assertSameSchemaList(
14921492
ImmutableList.of(

core/src/test/java/org/apache/iceberg/TestUpdateRequirements.java

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -223,9 +223,9 @@ public void addSchema() {
223223
UpdateRequirements.forUpdateTable(
224224
metadata,
225225
ImmutableList.of(
226-
new MetadataUpdate.AddSchema(new Schema(), lastColumnId),
227-
new MetadataUpdate.AddSchema(new Schema(), lastColumnId + 1),
228-
new MetadataUpdate.AddSchema(new Schema(), lastColumnId + 2)));
226+
new MetadataUpdate.AddSchema(new Schema()),
227+
new MetadataUpdate.AddSchema(new Schema()),
228+
new MetadataUpdate.AddSchema(new Schema())));
229229
requirements.forEach(req -> req.validate(metadata));
230230

231231
assertThat(requirements)
@@ -253,9 +253,9 @@ public void addSchemaFailure() {
253253
UpdateRequirements.forUpdateTable(
254254
metadata,
255255
ImmutableList.of(
256-
new MetadataUpdate.AddSchema(new Schema(), 1),
257-
new MetadataUpdate.AddSchema(new Schema(), 2),
258-
new MetadataUpdate.AddSchema(new Schema(), 3)));
256+
new MetadataUpdate.AddSchema(new Schema()),
257+
new MetadataUpdate.AddSchema(new Schema()),
258+
new MetadataUpdate.AddSchema(new Schema())));
259259

260260
assertThatThrownBy(() -> requirements.forEach(req -> req.validate(updated)))
261261
.isInstanceOf(CommitFailedException.class)
@@ -269,9 +269,9 @@ public void addSchemaForView() {
269269
UpdateRequirements.forReplaceView(
270270
viewMetadata,
271271
ImmutableList.of(
272-
new MetadataUpdate.AddSchema(new Schema(), lastColumnId),
273-
new MetadataUpdate.AddSchema(new Schema(), lastColumnId + 1),
274-
new MetadataUpdate.AddSchema(new Schema(), lastColumnId + 2)));
272+
new MetadataUpdate.AddSchema(new Schema()),
273+
new MetadataUpdate.AddSchema(new Schema()),
274+
new MetadataUpdate.AddSchema(new Schema())));
275275
requirements.forEach(req -> req.validate(viewMetadata));
276276

277277
assertThat(requirements)

open-api/rest-catalog-open-api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1152,7 +1152,7 @@ class AddSchemaUpdate(BaseUpdate):
11521152
last_column_id: Optional[int] = Field(
11531153
None,
11541154
alias='last-column-id',
1155-
description='The highest assigned column ID for the table. This is used to ensure columns are always assigned an unused ID when evolving schemas. When omitted, it will be computed on the server side.',
1155+
description="This optional field is **DEPRECATED for REMOVAL** since it more safe to handle this internally, and shouldn't be exposed to the clients.\nThe highest assigned column ID for the table. This is used to ensure columns are always assigned an unused ID when evolving schemas. When omitted, it will be computed on the server side.",
11561156
)
11571157

11581158

0 commit comments

Comments
 (0)