|
23 | 23 |
|
24 | 24 | import java.io.ByteArrayInputStream; |
25 | 25 | import java.io.ByteArrayOutputStream; |
| 26 | +import java.io.File; |
26 | 27 | import java.io.IOException; |
27 | 28 | import java.time.LocalDateTime; |
28 | 29 | import java.time.OffsetDateTime; |
|
33 | 34 | import org.apache.avro.LogicalTypes; |
34 | 35 | import org.apache.avro.Schema; |
35 | 36 | import org.apache.avro.SchemaBuilder; |
| 37 | +import org.apache.avro.file.DataFileWriter; |
36 | 38 | import org.apache.avro.generic.GenericData; |
37 | 39 | import org.apache.avro.generic.GenericDatumWriter; |
38 | 40 | import org.apache.avro.generic.GenericRecord; |
39 | 41 | import org.apache.avro.io.BinaryDecoder; |
40 | 42 | import org.apache.avro.io.BinaryEncoder; |
41 | 43 | import org.apache.avro.io.DecoderFactory; |
42 | 44 | import org.apache.avro.io.EncoderFactory; |
| 45 | +import org.apache.iceberg.Files; |
43 | 46 | import org.apache.iceberg.MetadataColumns; |
| 47 | +import org.apache.iceberg.avro.Avro; |
| 48 | +import org.apache.iceberg.avro.AvroIterable; |
44 | 49 | import org.apache.iceberg.data.Record; |
45 | 50 | import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; |
46 | 51 | import org.apache.iceberg.relocated.com.google.common.collect.Iterables; |
@@ -305,6 +310,65 @@ public void testMixedRowLineageValues() throws IOException { |
305 | 310 | .isEqualTo(10L); |
306 | 311 | } |
307 | 312 |
|
| 313 | + @Test |
| 314 | + public void testLineageColumnsNotProjected() throws Exception { |
| 315 | + |
| 316 | + org.apache.iceberg.Schema icebergSchema = |
| 317 | + new org.apache.iceberg.Schema( |
| 318 | + Types.NestedField.required(1, "data", Types.StringType.get())); |
| 319 | + |
| 320 | + Schema fileSchema = |
| 321 | + SchemaBuilder.record("test") |
| 322 | + .fields() |
| 323 | + .name("data") |
| 324 | + .type() |
| 325 | + .stringType() |
| 326 | + .noDefault() |
| 327 | + .name(MetadataColumns.ROW_ID.name()) |
| 328 | + .type() |
| 329 | + .optional() |
| 330 | + .longType() |
| 331 | + .name(MetadataColumns.LAST_UPDATED_SEQUENCE_NUMBER.name()) |
| 332 | + .type() |
| 333 | + .optional() |
| 334 | + .longType() |
| 335 | + .endRecord(); |
| 336 | + |
| 337 | + fileSchema.getField("data").addProp("field-id", 1); |
| 338 | + fileSchema |
| 339 | + .getField(MetadataColumns.ROW_ID.name()) |
| 340 | + .addProp("field-id", MetadataColumns.ROW_ID.fieldId()); |
| 341 | + fileSchema |
| 342 | + .getField(MetadataColumns.LAST_UPDATED_SEQUENCE_NUMBER.name()) |
| 343 | + .addProp("field-id", MetadataColumns.LAST_UPDATED_SEQUENCE_NUMBER.fieldId()); |
| 344 | + |
| 345 | + File file = File.createTempFile("test", ".avro"); |
| 346 | + |
| 347 | + try (DataFileWriter<GenericRecord> writer = |
| 348 | + new DataFileWriter<>(new GenericDatumWriter<>(fileSchema))) { |
| 349 | + |
| 350 | + writer.create(fileSchema, file); |
| 351 | + |
| 352 | + GenericRecord rec = new GenericData.Record(fileSchema); |
| 353 | + rec.put("data", "a"); |
| 354 | + rec.put(MetadataColumns.ROW_ID.name(), 10L); |
| 355 | + rec.put(MetadataColumns.LAST_UPDATED_SEQUENCE_NUMBER.name(), 5L); |
| 356 | + |
| 357 | + writer.append(rec); |
| 358 | + } |
| 359 | + |
| 360 | + try (AvroIterable<Record> reader = |
| 361 | + Avro.read(Files.localInput(file)) |
| 362 | + .createResolvingReader(schema -> PlannedDataReader.create(icebergSchema)) |
| 363 | + .project(icebergSchema) |
| 364 | + .build()) { |
| 365 | + |
| 366 | + List<Record> rows = Lists.newArrayList(reader); |
| 367 | + assertThat(rows).hasSize(1); |
| 368 | + assertThat(rows.get(0).getField("data")).isEqualTo("a"); |
| 369 | + } |
| 370 | + } |
| 371 | + |
308 | 372 | private Record readRecord( |
309 | 373 | PlannedDataReader<Record> reader, Schema avroSchema, GenericRecord avroRecord) |
310 | 374 | throws IOException { |
|
0 commit comments