diff --git a/src/iceberg/avro/avro_data_util.cc b/src/iceberg/avro/avro_data_util.cc index f2bcc29f8..b44e4b110 100644 --- a/src/iceberg/avro/avro_data_util.cc +++ b/src/iceberg/avro/avro_data_util.cc @@ -35,6 +35,7 @@ #include "iceberg/avro/avro_schema_util_internal.h" #include "iceberg/metadata_columns.h" #include "iceberg/schema.h" +#include "iceberg/schema_internal.h" #include "iceberg/schema_util.h" #include "iceberg/util/checked_cast.h" #include "iceberg/util/macros.h" @@ -620,7 +621,9 @@ Status ExtractDatumFromArray(const ::arrow::Array& array, int64_t index, } case ::arrow::Type::EXTENSION: { - if (array.type()->name() == "arrow.uuid") { + const auto& extension_type = + internal::checked_cast(*array.type()); + if (extension_type.extension_name() == kArrowUuidExtensionName) { const auto& extension_array = internal::checked_cast(array); const auto& fixed_array = @@ -632,7 +635,8 @@ Status ExtractDatumFromArray(const ::arrow::Array& array, int64_t index, return {}; } - return NotSupported("Unsupported Arrow extension type: {}", array.type()->name()); + return NotSupported("Unsupported Arrow extension type: {}", + extension_type.extension_name()); } case ::arrow::Type::STRUCT: { diff --git a/src/iceberg/avro/avro_reader.cc b/src/iceberg/avro/avro_reader.cc index 1d431c46b..95218ccbd 100644 --- a/src/iceberg/avro/avro_reader.cc +++ b/src/iceberg/avro/avro_reader.cc @@ -20,9 +20,11 @@ #include "iceberg/avro/avro_reader.h" #include +#include #include #include +#include #include #include #include @@ -42,6 +44,7 @@ #include "iceberg/metadata_columns.h" #include "iceberg/name_mapping.h" #include "iceberg/schema_internal.h" +#include "iceberg/util/checked_cast.h" #include "iceberg/util/macros.h" namespace iceberg::avro { @@ -209,6 +212,63 @@ struct ReadContext { std::shared_ptr<::arrow::ArrayBuilder> builder_; }; +std::shared_ptr<::arrow::DataType> StorageTypeForBuilder( + const std::shared_ptr<::arrow::DataType>& type); + +std::shared_ptr<::arrow::Field> StorageFieldForBuilder( + const std::shared_ptr<::arrow::Field>& field) { + return ::arrow::field(field->name(), StorageTypeForBuilder(field->type()), + field->nullable(), field->metadata()); +} + +// Arrow cannot construct builders for arrow.uuid extension arrays yet, so build +// with the UUID storage type while keeping the public schema unchanged. +std::shared_ptr<::arrow::DataType> StorageTypeForBuilder( + const std::shared_ptr<::arrow::DataType>& type) { + switch (type->id()) { + case ::arrow::Type::EXTENSION: { + const auto& extension_type = + internal::checked_cast(*type); + if (extension_type.extension_name() == kArrowUuidExtensionName) { + return extension_type.storage_type(); + } + return type; + } + case ::arrow::Type::STRUCT: { + const auto& struct_type = internal::checked_cast(*type); + std::vector> fields; + fields.reserve(struct_type.num_fields()); + for (const auto& field : struct_type.fields()) { + fields.emplace_back(StorageFieldForBuilder(field)); + } + return ::arrow::struct_(std::move(fields)); + } + case ::arrow::Type::LIST: { + const auto& list_type = internal::checked_cast(*type); + return ::arrow::list(StorageFieldForBuilder(list_type.value_field())); + } + case ::arrow::Type::LARGE_LIST: { + const auto& list_type = + internal::checked_cast(*type); + return ::arrow::large_list(StorageFieldForBuilder(list_type.value_field())); + } + case ::arrow::Type::FIXED_SIZE_LIST: { + const auto& list_type = + internal::checked_cast(*type); + return ::arrow::fixed_size_list(StorageFieldForBuilder(list_type.value_field()), + list_type.list_size()); + } + case ::arrow::Type::MAP: { + const auto& map_type = internal::checked_cast(*type); + return ::arrow::map(StorageTypeForBuilder(map_type.key_type()), + StorageFieldForBuilder(map_type.item_field()), + map_type.keys_sorted()); + } + default: + return type; + } +} + } // namespace // TODO(gang.wu): collect basic reader metrics @@ -349,7 +409,8 @@ class AvroReader::Impl { context_->arrow_schema_ = import_result.MoveValueUnsafe(); auto arrow_struct_type = - std::make_shared<::arrow::StructType>(context_->arrow_schema_->fields()); + internal::checked_pointer_cast<::arrow::StructType>(StorageTypeForBuilder( + std::make_shared<::arrow::StructType>(context_->arrow_schema_->fields()))); auto builder_result = ::arrow::MakeBuilder(arrow_struct_type); if (!builder_result.ok()) { return InvalidSchema("Failed to make the arrow builder: {}", diff --git a/src/iceberg/parquet/parquet_schema_util.cc b/src/iceberg/parquet/parquet_schema_util.cc index a5629198f..09b1f9636 100644 --- a/src/iceberg/parquet/parquet_schema_util.cc +++ b/src/iceberg/parquet/parquet_schema_util.cc @@ -28,6 +28,7 @@ #include "iceberg/metadata_columns.h" #include "iceberg/parquet/parquet_schema_util_internal.h" #include "iceberg/result.h" +#include "iceberg/schema_internal.h" #include "iceberg/schema_util_internal.h" #include "iceberg/util/checked_cast.h" #include "iceberg/util/formatter.h" // IWYU pragma: keep @@ -222,7 +223,7 @@ Status ValidateParquetSchemaEvolution( if (arrow_type->id() == ::arrow::Type::EXTENSION) { const auto& extension_type = internal::checked_cast(*arrow_type); - if (extension_type.extension_name() == "arrow.uuid") { + if (extension_type.extension_name() == kArrowUuidExtensionName) { return {}; } } diff --git a/src/iceberg/row/struct_like.cc b/src/iceberg/row/struct_like.cc index 355af84c5..430fd22f0 100644 --- a/src/iceberg/row/struct_like.cc +++ b/src/iceberg/row/struct_like.cc @@ -62,6 +62,12 @@ Result LiteralToScalar(const Literal& literal) { return Scalar{ std::string_view(reinterpret_cast(bytes.data()), bytes.size())}; } + case TypeId::kUuid: { + const auto& uuid = std::get(literal.value()); + const auto& bytes = uuid.bytes(); + return Scalar{ + std::string_view(reinterpret_cast(bytes.data()), bytes.size())}; + } case TypeId::kDecimal: return Scalar{std::get(literal.value())}; default: @@ -162,8 +168,14 @@ Result StructLikeAccessor::GetLiteral(const StructLike& struct_like) co const auto& fixed_data = std::get(scalar); return Literal::Fixed(std::vector(fixed_data.cbegin(), fixed_data.cend())); } - case TypeId::kUuid: - // TODO(gangwu): Implement UUID type + case TypeId::kUuid: { + const auto& uuid_data = std::get(scalar); + ICEBERG_ASSIGN_OR_RAISE( + auto uuid, + Uuid::FromBytes(std::span( + reinterpret_cast(uuid_data.data()), uuid_data.size()))); + return Literal::UUID(uuid); + } default: return NotSupported("Cannot convert scalar to literal of type {}", type_->ToString()); diff --git a/src/iceberg/schema_internal.cc b/src/iceberg/schema_internal.cc index 792341adf..5dac0d3bf 100644 --- a/src/iceberg/schema_internal.cc +++ b/src/iceberg/schema_internal.cc @@ -37,7 +37,6 @@ namespace { // Constants for Arrow schema metadata constexpr const char* kArrowExtensionName = "ARROW:extension:name"; constexpr const char* kArrowExtensionMetadata = "ARROW:extension:metadata"; -constexpr const char* kArrowUuidExtensionName = "arrow.uuid"; constexpr int32_t kUnknownFieldId = -1; Status CheckArrowCompatible(const Type& type) { diff --git a/src/iceberg/schema_internal.h b/src/iceberg/schema_internal.h index 5c7209d64..fd9ec6266 100644 --- a/src/iceberg/schema_internal.h +++ b/src/iceberg/schema_internal.h @@ -30,6 +30,9 @@ namespace iceberg { +/// \brief Canonical Arrow extension name used for Iceberg UUID values. +inline constexpr const char* kArrowUuidExtensionName = "arrow.uuid"; + /// \brief Convert an Iceberg schema to an Arrow schema. /// /// \param[in] schema The Iceberg schema to convert. diff --git a/src/iceberg/test/avro_test.cc b/src/iceberg/test/avro_test.cc index ef86ef9e2..3ae1696d0 100644 --- a/src/iceberg/test/avro_test.cc +++ b/src/iceberg/test/avro_test.cc @@ -17,6 +17,8 @@ * under the License. */ +#include +#include #include #include #include @@ -25,7 +27,10 @@ #include #include +#include +#include #include +#include #include #include #include @@ -46,6 +51,7 @@ #include "iceberg/test/temp_file_test_base.h" #include "iceberg/type.h" #include "iceberg/util/checked_cast.h" +#include "iceberg/util/uuid.h" namespace iceberg::avro { @@ -76,6 +82,41 @@ std::optional FieldIdAt(const ::avro::NodePtr& node, size_t index) { return std::stoi(field_id.value()); } +constexpr std::array kUuidBytes1 = { + 0x12, 0x3e, 0x45, 0x67, 0xe8, 0x9b, 0x12, 0xd3, + 0xa4, 0x56, 0x42, 0x66, 0x14, 0x17, 0x40, 0x00}; +constexpr std::array kUuidBytes2 = { + 0xf7, 0x9c, 0x3e, 0x09, 0x67, 0x7c, 0x4b, 0xbd, + 0xa4, 0x79, 0x3f, 0x34, 0x9c, 0xb7, 0x85, 0xe7}; + +std::shared_ptr<::arrow::Array> MakeUuidArray( + std::initializer_list*> values) { + ::arrow::FixedSizeBinaryBuilder uuid_storage_builder( + ::arrow::fixed_size_binary(Uuid::kLength)); + for (const auto* value : values) { + EXPECT_TRUE(uuid_storage_builder.Append(value->data()).ok()); + } + auto uuid_storage = uuid_storage_builder.Finish().ValueOrDie(); + return ::arrow::ExtensionType::WrapArray(::arrow::extension::uuid(), uuid_storage); +} + +std::shared_ptr<::arrow::Array> MakeInt32Array(std::initializer_list values) { + ::arrow::Int32Builder builder; + for (auto value : values) { + EXPECT_TRUE(builder.Append(value).ok()); + } + return builder.Finish().ValueOrDie(); +} + +std::shared_ptr<::arrow::Array> MakeStringArray( + std::initializer_list values) { + ::arrow::StringBuilder builder; + for (auto value : values) { + EXPECT_TRUE(builder.Append(value).ok()); + } + return builder.Finish().ValueOrDie(); +} + } // namespace class AvroReaderTest : public TempFileTestBase { @@ -781,6 +822,41 @@ class AvroWriterTest : public ::testing::Test, return avro_reader.dataSchema(); } + void WriteArrowArrayAndVerify(std::shared_ptr schema, + const std::shared_ptr<::arrow::Array>& array) { + ArrowArray arrow_array; + ASSERT_TRUE(::arrow::ExportArray(*array, &arrow_array).ok()); + + WriterProperties writer_properties; + writer_properties.Set(WriterProperties::kAvroSkipDatum, skip_datum_); + + ICEBERG_UNWRAP_OR_FAIL(writer_, WriterFactoryRegistry::Open( + FileFormatType::kAvro, + {.path = temp_avro_file_, + .schema = schema, + .io = file_io_, + .properties = std::move(writer_properties)})); + ASSERT_THAT(writer_->Write(&arrow_array), IsOk()); + ASSERT_THAT(writer_->Close(), IsOk()); + + ICEBERG_UNWRAP_OR_FAIL(auto written_length, writer_->length()); + ICEBERG_UNWRAP_OR_FAIL( + auto reader, + ReaderFactoryRegistry::Open(FileFormatType::kAvro, {.path = temp_avro_file_, + .length = written_length, + .io = file_io_, + .projection = schema})); + ICEBERG_UNWRAP_OR_FAIL(auto data, reader->Next()); + ASSERT_TRUE(data.has_value()); + + ICEBERG_UNWRAP_OR_FAIL(auto arrow_c_schema, reader->Schema()); + auto read_array = ::arrow::ImportArray(&data.value(), &arrow_c_schema).ValueOrDie(); + ASSERT_TRUE(read_array->Equals(*array)) << "actual:\n" + << read_array->ToString() << "\nexpected:\n" + << array->ToString(); + ASSERT_NO_FATAL_FAILURE(VerifyExhausted(*reader)); + } + std::shared_ptr file_io_; std::string temp_avro_file_; bool skip_datum_{true}; @@ -806,6 +882,135 @@ TEST_P(AvroWriterTest, WritePrimitiveTypes) { VerifyWrittenData(test_data); } +TEST_P(AvroWriterTest, WriteUuidType) { + auto schema = std::make_shared(std::vector{ + SchemaField::MakeRequired(1, "uuid_col", iceberg::uuid())}); + + auto uuid_array = MakeUuidArray({&kUuidBytes1, &kUuidBytes2}); + auto array = + ::arrow::StructArray::Make( + {uuid_array}, + {::arrow::field("uuid_col", ::arrow::extension::uuid(), /*nullable=*/false)}) + .ValueOrDie(); + + ArrowArray arrow_array; + ASSERT_TRUE(::arrow::ExportArray(*array, &arrow_array).ok()); + + WriterProperties writer_properties; + writer_properties.Set(WriterProperties::kAvroSkipDatum, skip_datum_); + + ICEBERG_UNWRAP_OR_FAIL( + writer_, WriterFactoryRegistry::Open(FileFormatType::kAvro, + {.path = temp_avro_file_, + .schema = schema, + .io = file_io_, + .properties = std::move(writer_properties)})); + ASSERT_THAT(writer_->Write(&arrow_array), IsOk()); + ASSERT_THAT(writer_->Close(), IsOk()); + + auto avro_schema = PhysicalAvroSchema(); + auto root = avro_schema.root(); + ASSERT_EQ(root->type(), ::avro::AVRO_RECORD); + ASSERT_EQ(root->leaves(), 1); + auto uuid_node = root->leafAt(0); + EXPECT_EQ(uuid_node->type(), ::avro::AVRO_FIXED); + EXPECT_EQ(uuid_node->logicalType().type(), ::avro::LogicalType::UUID); + EXPECT_EQ(uuid_node->fixedSize(), Uuid::kLength); + + ICEBERG_UNWRAP_OR_FAIL(auto written_length, writer_->length()); + ICEBERG_UNWRAP_OR_FAIL( + auto reader, + ReaderFactoryRegistry::Open(FileFormatType::kAvro, {.path = temp_avro_file_, + .length = written_length, + .io = file_io_, + .projection = schema})); + ICEBERG_UNWRAP_OR_FAIL(auto data, reader->Next()); + ASSERT_TRUE(data.has_value()); + + ICEBERG_UNWRAP_OR_FAIL(auto arrow_c_schema, reader->Schema()); + auto read_array = ::arrow::ImportArray(&data.value(), &arrow_c_schema).ValueOrDie(); + ASSERT_TRUE(read_array->Equals(*array)) << "actual:\n" + << read_array->ToString() << "\nexpected:\n" + << array->ToString(); + ASSERT_NO_FATAL_FAILURE(VerifyExhausted(*reader)); +} + +TEST_P(AvroWriterTest, WriteUuidListType) { + auto schema = std::make_shared(std::vector{ + SchemaField::MakeRequired(1, "uuid_list", + std::make_shared(SchemaField::MakeRequired( + 2, ListType::kElementName, iceberg::uuid())))}); + + auto list_values = MakeUuidArray({&kUuidBytes1, &kUuidBytes2, &kUuidBytes1}); + auto list_offsets = MakeInt32Array({0, 2, 3}); + auto list_type = ::arrow::list(::arrow::field(std::string(ListType::kElementName), + ::arrow::extension::uuid(), + /*nullable=*/false)); + auto list_array = + ::arrow::ListArray::FromArrays(list_type, *list_offsets, *list_values).ValueOrDie(); + + auto array = + ::arrow::StructArray::Make( + {list_array}, {::arrow::field("uuid_list", list_type, /*nullable=*/false)}) + .ValueOrDie(); + + ASSERT_NO_FATAL_FAILURE(WriteArrowArrayAndVerify(schema, array)); +} + +TEST_P(AvroWriterTest, WriteUuidMapType) { + auto schema = std::make_shared( + std::vector{SchemaField::MakeRequired( + 1, "uuid_map", + std::make_shared( + SchemaField::MakeRequired(2, MapType::kKeyName, iceberg::string()), + SchemaField::MakeRequired(3, MapType::kValueName, iceberg::uuid())))}); + + auto map_offsets = MakeInt32Array({0, 2, 3}); + auto map_keys = MakeStringArray({"first", "second", "only"}); + auto map_items = MakeUuidArray({&kUuidBytes1, &kUuidBytes2, &kUuidBytes1}); + auto map_type = + ::arrow::map(::arrow::utf8(), ::arrow::field(std::string(MapType::kValueName), + ::arrow::extension::uuid(), + /*nullable=*/false)); + auto map_array = + ::arrow::MapArray::FromArrays(map_type, map_offsets, map_keys, map_items) + .ValueOrDie(); + + auto array = + ::arrow::StructArray::Make( + {map_array}, {::arrow::field("uuid_map", map_type, /*nullable=*/false)}) + .ValueOrDie(); + + ASSERT_NO_FATAL_FAILURE(WriteArrowArrayAndVerify(schema, array)); +} + +TEST_P(AvroWriterTest, WriteUuidMapKeyType) { + auto schema = std::make_shared( + std::vector{SchemaField::MakeRequired( + 1, "uuid_key_map", + std::make_shared( + SchemaField::MakeRequired(2, MapType::kKeyName, iceberg::uuid()), + SchemaField::MakeRequired(3, MapType::kValueName, iceberg::string())))}); + + auto map_offsets = MakeInt32Array({0, 2, 3}); + auto map_keys = MakeUuidArray({&kUuidBytes1, &kUuidBytes2, &kUuidBytes1}); + auto map_items = MakeStringArray({"first", "second", "only"}); + auto map_type = + ::arrow::map(::arrow::extension::uuid(), + ::arrow::field(std::string(MapType::kValueName), ::arrow::utf8(), + /*nullable=*/false)); + auto map_array = + ::arrow::MapArray::FromArrays(map_type, map_offsets, map_keys, map_items) + .ValueOrDie(); + + auto array = + ::arrow::StructArray::Make( + {map_array}, {::arrow::field("uuid_key_map", map_type, /*nullable=*/false)}) + .ValueOrDie(); + + ASSERT_NO_FATAL_FAILURE(WriteArrowArrayAndVerify(schema, array)); +} + TEST_P(AvroWriterTest, WriteTemporalTypes) { auto schema = std::make_shared(std::vector{ SchemaField::MakeRequired(1, "date_col", std::make_shared()), diff --git a/src/iceberg/test/parquet_test.cc b/src/iceberg/test/parquet_test.cc index ee1cbc931..88c337603 100644 --- a/src/iceberg/test/parquet_test.cc +++ b/src/iceberg/test/parquet_test.cc @@ -17,13 +17,16 @@ * under the License. */ +#include #include #include #include #include #include +#include #include +#include #include #include #include @@ -52,6 +55,7 @@ #include "iceberg/type.h" #include "iceberg/util/checked_cast.h" #include "iceberg/util/macros.h" +#include "iceberg/util/uuid.h" namespace iceberg::parquet { @@ -150,6 +154,13 @@ std::optional FirstUnavailableParquetCodec() { return std::nullopt; } +constexpr std::array kUuidBytes1 = { + 0x12, 0x3e, 0x45, 0x67, 0xe8, 0x9b, 0x12, 0xd3, + 0xa4, 0x56, 0x42, 0x66, 0x14, 0x17, 0x40, 0x00}; +constexpr std::array kUuidBytes2 = { + 0xf7, 0x9c, 0x3e, 0x09, 0x67, 0x7c, 0x4b, 0xbd, + 0xa4, 0x79, 0x3f, 0x34, 0x9c, 0xb7, 0x85, 0xe7}; + } // namespace class ParquetReaderTest : public TempFileTestBase { @@ -771,4 +782,29 @@ TEST_F(ParquetReadWrite, SimpleTypeRoundTrip) { ASSERT_TRUE(out->Equals(*array)); } +TEST_F(ParquetReadWrite, UuidRoundTrip) { + auto schema = std::make_shared( + std::vector{SchemaField::MakeRequired(1, "uuid_col", uuid())}); + + ::arrow::FixedSizeBinaryBuilder uuid_storage_builder( + ::arrow::fixed_size_binary(Uuid::kLength)); + ASSERT_TRUE(uuid_storage_builder.Append(kUuidBytes1.data()).ok()); + ASSERT_TRUE(uuid_storage_builder.Append(kUuidBytes2.data()).ok()); + auto uuid_storage = uuid_storage_builder.Finish().ValueOrDie(); + auto uuid_array = + ::arrow::ExtensionType::WrapArray(::arrow::extension::uuid(), uuid_storage); + auto array = + ::arrow::StructArray::Make( + {uuid_array}, + {::arrow::field("uuid_col", ::arrow::extension::uuid(), /*nullable=*/false)}) + .ValueOrDie(); + + std::shared_ptr<::arrow::Array> out; + DoRoundtrip(array, schema, out); + + ASSERT_TRUE(out->Equals(*array)) << "actual:\n" + << out->ToString() << "\nexpected:\n" + << array->ToString(); +} + } // namespace iceberg::parquet diff --git a/src/iceberg/test/struct_like_test.cc b/src/iceberg/test/struct_like_test.cc index 97df61b8d..ad19d6a09 100644 --- a/src/iceberg/test/struct_like_test.cc +++ b/src/iceberg/test/struct_like_test.cc @@ -17,6 +17,11 @@ * under the License. */ +#include "iceberg/row/struct_like.h" + +#include +#include + #include #include #include @@ -24,6 +29,7 @@ #include #include "iceberg/arrow_c_data_guard_internal.h" +#include "iceberg/expression/literal.h" #include "iceberg/manifest/manifest_list.h" #include "iceberg/manifest/manifest_reader_internal.h" #include "iceberg/row/arrow_array_wrapper.h" @@ -32,6 +38,7 @@ #include "iceberg/schema_internal.h" #include "iceberg/test/matchers.h" #include "iceberg/type.h" +#include "iceberg/util/uuid.h" namespace iceberg { @@ -59,6 +66,27 @@ namespace iceberg { ASSERT_TRUE(std::holds_alternative(scalar)); \ } while (0) +namespace { + +class SingleFieldStructLike : public StructLike { + public: + explicit SingleFieldStructLike(Scalar value) : value_(std::move(value)) {} + + Result GetField(size_t pos) const override { + if (pos != 0) { + return InvalidArgument("Field index {} out of range", pos); + } + return value_; + } + + size_t num_fields() const override { return 1; } + + private: + Scalar value_; +}; + +} // namespace + TEST(ManifestFileStructLike, BasicFields) { ManifestFile manifest_file{ .manifest_path = "/path/to/manifest.avro", @@ -99,6 +127,49 @@ TEST(ManifestFileStructLike, BasicFields) { EXPECT_THAT(struct_like.GetField(100), IsError(ErrorKind::kInvalidArgument)); } +TEST(LiteralToScalarTest, Uuid) { + ICEBERG_UNWRAP_OR_FAIL(auto uuid, + Uuid::FromString("123e4567-e89b-12d3-a456-426614174000")); + auto literal = Literal::UUID(uuid); + + ICEBERG_UNWRAP_OR_FAIL(auto scalar, LiteralToScalar(literal)); + ASSERT_TRUE(std::holds_alternative(scalar)); + + auto value = std::get(scalar); + ASSERT_EQ(value.size(), Uuid::kLength); + + const auto& expected_bytes = uuid.bytes(); + for (size_t i = 0; i < expected_bytes.size(); ++i) { + EXPECT_EQ(static_cast(static_cast(value[i])), + expected_bytes[i]); + } +} + +TEST(StructLikeAccessorTest, GetLiteralUuid) { + ICEBERG_UNWRAP_OR_FAIL(auto uuid, + Uuid::FromString("123e4567-e89b-12d3-a456-426614174000")); + const auto& bytes = uuid.bytes(); + std::string_view uuid_data(reinterpret_cast(bytes.data()), bytes.size()); + SingleFieldStructLike row(Scalar{uuid_data}); + std::array path = {0}; + StructLikeAccessor accessor(iceberg::uuid(), path); + + ICEBERG_UNWRAP_OR_FAIL(auto literal, accessor.GetLiteral(row)); + EXPECT_EQ(literal.type()->type_id(), TypeId::kUuid); + ASSERT_TRUE(std::holds_alternative(literal.value())); + EXPECT_EQ(std::get(literal.value()), uuid); +} + +TEST(StructLikeAccessorTest, GetLiteralUuidRejectsWrongLength) { + SingleFieldStructLike row(Scalar{std::string_view("not-a-uuid")}); + std::array path = {0}; + StructLikeAccessor accessor(iceberg::uuid(), path); + + auto result = accessor.GetLiteral(row); + EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(result, HasErrorMessage("UUID byte array must be exactly 16 bytes")); +} + TEST(ManifestFileStructLike, OptionalFields) { ManifestFile manifest_file{.manifest_path = "/path/to/manifest2.avro", .manifest_length = 54321,