From 967973fbe0161569b6cb9ac9fcfdbfe747ead47d Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Mon, 12 Jan 2026 14:24:07 -0800 Subject: [PATCH 1/6] Geo type changes --- Cargo.lock | 77 ++++++-- Cargo.toml | 3 +- crates/iceberg/Cargo.toml | 1 + crates/iceberg/src/arrow/schema.rs | 7 + crates/iceberg/src/arrow/value.rs | 6 +- crates/iceberg/src/avro/schema.rs | 1 + crates/iceberg/src/spec/datatypes.rs | 225 ++++++++++++++++++++++ crates/iceberg/src/spec/values/datum.rs | 14 ++ crates/iceberg/src/spec/values/literal.rs | 39 +++- crates/iceberg/src/spec/values/serde.rs | 10 +- crates/iceberg/src/spec/values/tests.rs | 86 ++++++++- 11 files changed, 449 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 51af571d5a..3f4ac36ca5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -194,6 +194,15 @@ dependencies = [ "zstd", ] +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + [[package]] name = "ar_archive_writer" version = "0.2.0" @@ -2497,7 +2506,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -2639,7 +2648,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -2945,6 +2954,26 @@ dependencies = [ "version_check", ] +[[package]] +name = "geo-traits" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e7c353d12a704ccfab1ba8bfb1a7fe6cb18b665bf89d37f4f7890edcd260206" +dependencies = [ + "geo-types", +] + +[[package]] +name = "geo-types" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24f8647af4005fa11da47cd56252c6ef030be8fa97bdbf355e7dfb6348f0a82c" +dependencies = [ + "approx", + "num-traits", + "serde", +] + [[package]] name = "getrandom" version = "0.2.16" @@ -3312,7 +3341,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.5.10", + "socket2 0.6.1", "tokio", "tower-service", "tracing", @@ -3381,6 +3410,7 @@ dependencies = [ "opendal", "ordered-float 4.6.0", "parquet", + "parquet-geospatial", "pretty_assertions", "rand 0.8.5", "regex", @@ -3844,7 +3874,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -4365,7 +4395,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -4670,6 +4700,19 @@ dependencies = [ "zstd", ] +[[package]] +name = "parquet-geospatial" +version = "57.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a379b66a70361d96d1dd4805904bd3180f62d6b1b3a18203cd89ee5ac87a963b" +dependencies = [ + "arrow-schema", + "geo-traits", + "serde", + "serde_json", + "wkb", +] + [[package]] name = "paste" version = "1.0.15" @@ -5165,7 +5208,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls 0.23.35", - "socket2 0.5.10", + "socket2 0.6.1", "thiserror 2.0.17", "tokio", "tracing", @@ -5202,9 +5245,9 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.5.10", + "socket2 0.6.1", "tracing", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -5696,7 +5739,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -6698,7 +6741,7 @@ dependencies = [ "getrandom 0.3.4", "once_cell", "rustix", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -7513,7 +7556,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.61.2", ] [[package]] @@ -7821,6 +7864,18 @@ version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +[[package]] +name = "wkb" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a120b336c7ad17749026d50427c23d838ecb50cd64aaea6254b5030152f890a9" +dependencies = [ + "byteorder", + "geo-traits", + "num_enum", + "thiserror 1.0.69", +] + [[package]] name = "writeable" version = "0.6.2" diff --git a/Cargo.toml b/Cargo.toml index 56cd1801cc..c9732093b2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -102,6 +102,7 @@ once_cell = "1.20" opendal = "0.55.0" ordered-float = "4" parquet = "57.0" +parquet-geospatial = "57.0" pilota = "0.11.10" port_scanner = "0.1.5" pretty_assertions = "1.4" @@ -131,4 +132,4 @@ url = "2.5.7" uuid = { version = "1.18", features = ["v7"] } volo = "0.10.6" volo-thrift = "0.10.8" -zstd = "0.13.3" \ No newline at end of file +zstd = "0.13.3" diff --git a/crates/iceberg/Cargo.toml b/crates/iceberg/Cargo.toml index 6f1332a444..2c03472de6 100644 --- a/crates/iceberg/Cargo.toml +++ b/crates/iceberg/Cargo.toml @@ -72,6 +72,7 @@ once_cell = { workspace = true } opendal = { workspace = true } ordered-float = { workspace = true } parquet = { workspace = true, features = ["async"] } +parquet-geospatial = { workspace = true } rand = { workspace = true } reqsign = { version = "0.16.3", optional = true, default-features = false } reqwest = { workspace = true } diff --git a/crates/iceberg/src/arrow/schema.rs b/crates/iceberg/src/arrow/schema.rs index 9ee7897cb6..ec6a638c8f 100644 --- a/crates/iceberg/src/arrow/schema.rs +++ b/crates/iceberg/src/arrow/schema.rs @@ -687,6 +687,10 @@ impl SchemaVisitor for ToArrowSchemaConverter { crate::spec::PrimitiveType::Binary => { Ok(ArrowSchemaOrFieldOrType::Type(DataType::LargeBinary)) } + crate::spec::PrimitiveType::Geometry { .. } + | crate::spec::PrimitiveType::Geography { .. } => { + Ok(ArrowSchemaOrFieldOrType::Type(DataType::LargeBinary)) + } } } } @@ -1119,6 +1123,9 @@ pub fn datum_to_arrow_type_with_ree(datum: &Datum) -> DataType { PrimitiveType::Uuid => make_ree(DataType::Binary), PrimitiveType::Fixed(_) => make_ree(DataType::Binary), PrimitiveType::Binary => make_ree(DataType::Binary), + PrimitiveType::Geometry { .. } | PrimitiveType::Geography { .. } => { + make_ree(DataType::Binary) + } PrimitiveType::Decimal { precision, scale } => { make_ree(DataType::Decimal128(*precision as u8, *scale as i8)) } diff --git a/crates/iceberg/src/arrow/value.rs b/crates/iceberg/src/arrow/value.rs index 30b47d83fc..0912a8a274 100644 --- a/crates/iceberg/src/arrow/value.rs +++ b/crates/iceberg/src/arrow/value.rs @@ -406,7 +406,9 @@ impl SchemaWithPartnerVisitor for ArrowArrayToIcebergStructConverter { .map(|v| v.map(|v| Literal::fixed(v.iter().cloned()))) .collect()) } - PrimitiveType::Binary => { + PrimitiveType::Binary + | PrimitiveType::Geometry { .. } + | PrimitiveType::Geography { .. } => { if let Some(array) = partner.as_any().downcast_ref::() { Ok(array .iter() @@ -420,7 +422,7 @@ impl SchemaWithPartnerVisitor for ArrowArrayToIcebergStructConverter { } else { Err(Error::new( ErrorKind::DataInvalid, - "The partner is not a binary array", + format!("The partner is not a binary array: {:?}", p), )) } } diff --git a/crates/iceberg/src/avro/schema.rs b/crates/iceberg/src/avro/schema.rs index fdbc680977..1e8bf82967 100644 --- a/crates/iceberg/src/avro/schema.rs +++ b/crates/iceberg/src/avro/schema.rs @@ -240,6 +240,7 @@ impl SchemaVisitor for SchemaToAvroSchema { PrimitiveType::Decimal { precision, scale } => { avro_decimal_schema(*precision as usize, *scale as usize)? } + PrimitiveType::Geometry { .. } | PrimitiveType::Geography { .. } => todo!(), }; Ok(Either::Left(avro_schema)) } diff --git a/crates/iceberg/src/spec/datatypes.rs b/crates/iceberg/src/spec/datatypes.rs index 0379465584..354bc32598 100644 --- a/crates/iceberg/src/spec/datatypes.rs +++ b/crates/iceberg/src/spec/datatypes.rs @@ -247,6 +247,40 @@ pub enum PrimitiveType { Fixed(u64), /// Arbitrary-length byte array. Binary, + /// Geometry type + Geometry { + /// CRS + crs: String, + }, + /// Geography type + Geography { + /// CRS + crs: String, + /// Edge algorithm + algorithm: EdgeAlgorithm, + }, +} + +/// Edge algorithm for geography type. +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone, Hash, Copy)] +#[serde(rename_all = "lowercase")] +pub enum EdgeAlgorithm { + /// Planar + Planar, + /// Spherical + Spherical, + /// Vincenty + Vincenty, +} + +impl fmt::Display for EdgeAlgorithm { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + EdgeAlgorithm::Planar => write!(f, "planar"), + EdgeAlgorithm::Spherical => write!(f, "spherical"), + EdgeAlgorithm::Vincenty => write!(f, "vincenty"), + } + } } impl PrimitiveType { @@ -270,6 +304,8 @@ impl PrimitiveType { | (PrimitiveType::Uuid, PrimitiveLiteral::UInt128(_)) | (PrimitiveType::Fixed(_), PrimitiveLiteral::Binary(_)) | (PrimitiveType::Binary, PrimitiveLiteral::Binary(_)) + | (PrimitiveType::Geometry { .. }, PrimitiveLiteral::Binary(_)) + | (PrimitiveType::Geography { .. }, PrimitiveLiteral::Binary(_)) ) } } @@ -298,6 +334,10 @@ impl<'de> Deserialize<'de> for PrimitiveType { deserialize_decimal(s.into_deserializer()) } else if s.starts_with("fixed") { deserialize_fixed(s.into_deserializer()) + } else if s.starts_with("geometry") { + deserialize_geometry(s.into_deserializer()) + } else if s.starts_with("geography") { + deserialize_geography(s.into_deserializer()) } else { PrimitiveType::deserialize(s.into_deserializer()) } @@ -312,6 +352,10 @@ impl Serialize for PrimitiveType { serialize_decimal(precision, scale, serializer) } PrimitiveType::Fixed(l) => serialize_fixed(l, serializer), + PrimitiveType::Geometry { crs } => serialize_geometry(crs, serializer), + PrimitiveType::Geography { crs, algorithm } => { + serialize_geography(crs, algorithm, serializer) + } _ => PrimitiveType::serialize(self, serializer), } } @@ -361,6 +405,88 @@ where S: Serializer { serializer.serialize_str(&format!("fixed[{value}]")) } +fn deserialize_geometry<'de, D>(deserializer: D) -> std::result::Result +where + D: Deserializer<'de>, +{ + let s = String::deserialize(deserializer)?; + if s == "geometry" { + return Ok(PrimitiveType::Geometry { + crs: "OGC:CRS84".to_string(), + }); + } + let crs = s + .trim_start_matches(r"geometry(") + .trim_end_matches(')') + .to_owned(); + + Ok(PrimitiveType::Geometry { crs }) +} + +fn serialize_geometry(crs: &str, serializer: S) -> std::result::Result +where + S: Serializer, +{ + if crs == "OGC:CRS84" { + serializer.serialize_str("geometry") + } else { + serializer.serialize_str(&format!("geometry({crs})")) + } +} + +fn deserialize_geography<'de, D>(deserializer: D) -> std::result::Result +where + D: Deserializer<'de>, +{ + let s = String::deserialize(deserializer)?; + if s == "geography" { + return Ok(PrimitiveType::Geography { + crs: "OGC:CRS84".to_string(), + algorithm: EdgeAlgorithm::Spherical, + }); + } + let params = s.trim_start_matches(r"geography(").trim_end_matches(')'); + + if let Some((crs, algorithm)) = params.split_once(',') { + let algorithm = match algorithm.trim().to_lowercase().as_str() { + "planar" => EdgeAlgorithm::Planar, + "spherical" => EdgeAlgorithm::Spherical, + "vincenty" => EdgeAlgorithm::Vincenty, + _ => { + return Err(D::Error::custom(format!( + "Invalid edge algorithm: {algorithm}" + ))); + } + }; + Ok(PrimitiveType::Geography { + crs: crs.trim().to_string(), + algorithm, + }) + } else { + Ok(PrimitiveType::Geography { + crs: params.trim().to_string(), + algorithm: EdgeAlgorithm::Spherical, + }) + } +} + +fn serialize_geography( + crs: &str, + algorithm: &EdgeAlgorithm, + serializer: S, +) -> std::result::Result +where + S: Serializer, +{ + if crs == "OGC:CRS84" && matches!(algorithm, EdgeAlgorithm::Spherical) { + serializer.serialize_str("geography") + } else if matches!(algorithm, EdgeAlgorithm::Spherical) { + serializer.serialize_str(&format!("geography({crs})")) + } else { + serializer.serialize_str(&format!("geography({crs}, {algorithm})")) + } +} + impl fmt::Display for PrimitiveType { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { @@ -382,6 +508,22 @@ impl fmt::Display for PrimitiveType { PrimitiveType::Uuid => write!(f, "uuid"), PrimitiveType::Fixed(size) => write!(f, "fixed({size})"), PrimitiveType::Binary => write!(f, "binary"), + PrimitiveType::Geometry { crs } => { + if crs == "OGC:CRS84" { + write!(f, "geometry") + } else { + write!(f, "geometry({crs})") + } + } + PrimitiveType::Geography { crs, algorithm } => { + if crs == "OGC:CRS84" && matches!(algorithm, EdgeAlgorithm::Spherical) { + write!(f, "geography") + } else if matches!(algorithm, EdgeAlgorithm::Spherical) { + write!(f, "geography({crs})") + } else { + write!(f, "geography({crs}, {algorithm})") + } + } } } } @@ -1183,6 +1325,76 @@ mod tests { assert_eq!(16, Type::decimal_required_bytes(38).unwrap()); } + #[test] + fn geometry_geography_type_serde() { + let pairs = vec![ + ( + "geometry", + PrimitiveType::Geometry { + crs: "OGC:CRS84".to_string(), + }, + None, + ), + ( + "geometry(EPSG:4326)", + PrimitiveType::Geometry { + crs: "EPSG:4326".to_string(), + }, + None, + ), + ( + "geography", + PrimitiveType::Geography { + crs: "OGC:CRS84".to_string(), + algorithm: EdgeAlgorithm::Spherical, + }, + None, + ), + ( + "geography(EPSG:4326)", + PrimitiveType::Geography { + crs: "EPSG:4326".to_string(), + algorithm: EdgeAlgorithm::Spherical, + }, + None, + ), + ( + "geography(EPSG:4326, planar)", + PrimitiveType::Geography { + crs: "EPSG:4326".to_string(), + algorithm: EdgeAlgorithm::Planar, + }, + None, + ), + ( + "geography(EPSG:4326, spherical)", + PrimitiveType::Geography { + crs: "EPSG:4326".to_string(), + algorithm: EdgeAlgorithm::Spherical, + }, + Some("geography(EPSG:4326)"), + ), + ( + "geography(EPSG:4326, vincenty)", + PrimitiveType::Geography { + crs: "EPSG:4326".to_string(), + algorithm: EdgeAlgorithm::Vincenty, + }, + None, + ), + ]; + + for (json, expected_type, expected_json) in pairs { + let json_input = format!("\"{}\"", json); + let desered_type: PrimitiveType = serde_json::from_str(&json_input).unwrap(); + assert_eq!(desered_type, expected_type); + + let sered_json = serde_json::to_string(&expected_type).unwrap(); + let expected_output = expected_json.unwrap_or(json); + assert_eq!(sered_json, format!("\"{}\"", expected_output)); + } + } + #[test] fn test_primitive_type_compatible() { let pairs = vec![ @@ -1210,6 +1422,19 @@ mod tests { ), (PrimitiveType::Fixed(8), PrimitiveLiteral::Binary(vec![1])), (PrimitiveType::Binary, PrimitiveLiteral::Binary(vec![1])), + ( + PrimitiveType::Geometry { + crs: "OGC:CRS84".to_string(), + }, + PrimitiveLiteral::Binary(vec![1]), + ), + ( + PrimitiveType::Geography { + crs: "OGC:CRS84".to_string(), + algorithm: EdgeAlgorithm::Spherical, + }, + PrimitiveLiteral::Binary(vec![1]), + ), ]; for (ty, literal) in pairs { assert!(ty.compatible(&literal)); diff --git a/crates/iceberg/src/spec/values/datum.rs b/crates/iceberg/src/spec/values/datum.rs index 88209ae95c..2ef13b170d 100644 --- a/crates/iceberg/src/spec/values/datum.rs +++ b/crates/iceberg/src/spec/values/datum.rs @@ -256,6 +256,18 @@ impl PartialOrd for Datum { PrimitiveType::Binary, PrimitiveType::Binary, ) => val.partial_cmp(other_val), + ( + PrimitiveLiteral::Binary(val), + PrimitiveLiteral::Binary(other_val), + PrimitiveType::Geometry { .. }, + PrimitiveType::Geometry { .. }, + ) => val.partial_cmp(other_val), + ( + PrimitiveLiteral::Binary(val), + PrimitiveLiteral::Binary(other_val), + PrimitiveType::Geography { .. }, + PrimitiveType::Geography { .. }, + ) => val.partial_cmp(other_val), ( PrimitiveLiteral::Int128(val), PrimitiveLiteral::Int128(other_val), @@ -406,6 +418,8 @@ impl Datum { } PrimitiveType::Fixed(_) => PrimitiveLiteral::Binary(Vec::from(bytes)), PrimitiveType::Binary => PrimitiveLiteral::Binary(Vec::from(bytes)), + PrimitiveType::Geometry { .. } => PrimitiveLiteral::Binary(Vec::from(bytes)), + PrimitiveType::Geography { .. } => PrimitiveLiteral::Binary(Vec::from(bytes)), PrimitiveType::Decimal { .. } => { let unscaled_value = BigInt::from_signed_bytes_be(bytes); PrimitiveLiteral::Int128(unscaled_value.to_i128().ok_or_else(|| { diff --git a/crates/iceberg/src/spec/values/literal.rs b/crates/iceberg/src/spec/values/literal.rs index d6e502e8fd..e60211515c 100644 --- a/crates/iceberg/src/spec/values/literal.rs +++ b/crates/iceberg/src/spec/values/literal.rs @@ -500,8 +500,26 @@ impl Literal { (PrimitiveType::Uuid, JsonValue::String(s)) => Ok(Some(Literal::Primitive( PrimitiveLiteral::UInt128(Uuid::parse_str(&s)?.as_u128()), ))), - (PrimitiveType::Fixed(_), JsonValue::String(_)) => todo!(), - (PrimitiveType::Binary, JsonValue::String(_)) => todo!(), + (PrimitiveType::Fixed(len), JsonValue::String(s)) => { + let bytes = parse_hex_string(&s)?; + if bytes.len() as u64 != *len { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "Fixed literal length {} doesn't match type length {}", + bytes.len(), + len + ), + )); + } + Ok(Some(Literal::Primitive(PrimitiveLiteral::Binary(bytes)))) + } + (PrimitiveType::Binary, JsonValue::String(s)) + | (PrimitiveType::Geometry { .. }, JsonValue::String(s)) + | (PrimitiveType::Geography { .. }, JsonValue::String(s)) => { + let bytes = parse_hex_string(&s)?; + Ok(Some(Literal::Primitive(PrimitiveLiteral::Binary(bytes)))) + } ( PrimitiveType::Decimal { precision: _, @@ -663,7 +681,7 @@ impl Literal { (_, PrimitiveLiteral::Binary(val)) => Ok(JsonValue::String(val.iter().fold( String::new(), |mut acc, x| { - acc.push_str(&format!("{x:x}")); + acc.push_str(&format!("{x:02X}")); acc }, ))), @@ -745,3 +763,18 @@ impl Literal { } } } + +fn parse_hex_string(s: &str) -> Result> { + if s.len() % 2 != 0 { + return Err(Error::new( + ErrorKind::DataInvalid, + "Hex string must have even length", + )); + } + + (0..s.len()) + .step_by(2) + .map(|i| u8::from_str_radix(&s[i..i + 2], 16)) + .collect::, _>>() + .map_err(|e| Error::new(ErrorKind::DataInvalid, "Invalid hex string").with_source(e)) +} diff --git a/crates/iceberg/src/spec/values/serde.rs b/crates/iceberg/src/spec/values/serde.rs index 053acca8b0..f91305fff2 100644 --- a/crates/iceberg/src/spec/values/serde.rs +++ b/crates/iceberg/src/spec/values/serde.rs @@ -509,6 +509,12 @@ pub(crate) mod _serde { )) } } + Type::Primitive(PrimitiveType::Geometry { .. }) => { + Ok(Some(Literal::Primitive(PrimitiveLiteral::Binary(v.to_vec())))) + } + Type::Primitive(PrimitiveType::Geography { .. }) => { + Ok(Some(Literal::Primitive(PrimitiveLiteral::Binary(v.to_vec())))) + } _ => Err(invalid_err("bytes")), }, RawLiteralEnum::List(v) => match ty { @@ -626,7 +632,9 @@ pub(crate) mod _serde { } Ok(Some(Literal::decimal(i128::from_be_bytes(bytes)))) } - Type::Primitive(PrimitiveType::Binary) => { + Type::Primitive(PrimitiveType::Binary) + | Type::Primitive(PrimitiveType::Geometry { .. }) + | Type::Primitive(PrimitiveType::Geography { .. }) => { let bytes = v .list .into_iter() diff --git a/crates/iceberg/src/spec/values/tests.rs b/crates/iceberg/src/spec/values/tests.rs index bb10701d87..7c97267488 100644 --- a/crates/iceberg/src/spec/values/tests.rs +++ b/crates/iceberg/src/spec/values/tests.rs @@ -29,7 +29,9 @@ use crate::ErrorKind; use crate::avro::schema_to_avro_schema; use crate::spec::Schema; use crate::spec::Type::Primitive; -use crate::spec::datatypes::{ListType, MapType, NestedField, PrimitiveType, StructType, Type}; +use crate::spec::datatypes::{ + EdgeAlgorithm, ListType, MapType, NestedField, PrimitiveType, StructType, Type, +}; use crate::spec::values::datum::{INT_MAX, INT_MIN, LONG_MAX, LONG_MIN}; use crate::spec::values::serde::_serde; use crate::spec::values::{Datum, Literal, Map, PrimitiveLiteral, RawLiteral, Struct}; @@ -324,6 +326,33 @@ fn json_map() { ); } +#[test] +fn json_geometry() { + let record = r#""0001020304""#; + + check_json_serde( + record, + Literal::Primitive(PrimitiveLiteral::Binary(vec![0, 1, 2, 3, 4])), + &Type::Primitive(PrimitiveType::Geometry { + crs: "OGC:CRS84".to_string(), + }), + ); +} + +#[test] +fn json_geography() { + let record = r#""0001020304""#; + + check_json_serde( + record, + Literal::Primitive(PrimitiveLiteral::Binary(vec![0, 1, 2, 3, 4])), + &Type::Primitive(PrimitiveType::Geography { + crs: "OGC:CRS84".to_string(), + algorithm: EdgeAlgorithm::Spherical, + }), + ); +} + #[test] fn avro_bytes_boolean() { let bytes = vec![1u8]; @@ -427,6 +456,42 @@ fn avro_bytes_decimal_expect_error() { } } +#[test] +fn avro_bytes_geometry() { + let bytes = vec![1u8, 2u8, 3u8, 4u8, 5u8]; + check_avro_bytes_serde( + bytes.clone(), + Datum::new( + PrimitiveType::Geometry { + crs: "OGC:CRS84".to_string(), + }, + PrimitiveLiteral::Binary(bytes.clone()), + ), + &PrimitiveType::Geometry { + crs: "OGC:CRS84".to_string(), + }, + ); +} + +#[test] +fn avro_bytes_geography() { + let bytes = vec![1u8, 2u8, 3u8, 4u8, 5u8]; + check_avro_bytes_serde( + bytes.clone(), + Datum::new( + PrimitiveType::Geography { + crs: "OGC:CRS84".to_string(), + algorithm: EdgeAlgorithm::Spherical, + }, + PrimitiveLiteral::Binary(bytes.clone()), + ), + &PrimitiveType::Geography { + crs: "OGC:CRS84".to_string(), + algorithm: EdgeAlgorithm::Spherical, + }, + ); +} + fn check_raw_literal_bytes_serde_via_avro( input_bytes: Vec, expected_literal: Literal, @@ -1059,6 +1124,23 @@ fn test_datum_ser_deser() { test_fn(datum); let datum = Datum::fixed(vec![1, 2, 3, 4, 5]); test_fn(datum); + + let datum = Datum::new( + PrimitiveType::Geometry { + crs: "OGC:CRS84".to_string(), + }, + PrimitiveLiteral::Binary(vec![1, 2, 3, 4, 5]), + ); + test_fn(datum); + + let datum = Datum::new( + PrimitiveType::Geography { + crs: "OGC:CRS84".to_string(), + algorithm: EdgeAlgorithm::Spherical, + }, + PrimitiveLiteral::Binary(vec![1, 2, 3, 4, 5]), + ); + test_fn(datum); } #[test] @@ -1356,4 +1438,4 @@ fn test_date_from_json_as_number() { ); // Both formats should produce the same Literal value -} +} \ No newline at end of file From 651bf82ec950151bf111393a72b374c16d04883f Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Mon, 12 Jan 2026 14:48:36 -0800 Subject: [PATCH 2/6] lint issues --- crates/iceberg/src/spec/datatypes.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/crates/iceberg/src/spec/datatypes.rs b/crates/iceberg/src/spec/datatypes.rs index 354bc32598..8781a77d8d 100644 --- a/crates/iceberg/src/spec/datatypes.rs +++ b/crates/iceberg/src/spec/datatypes.rs @@ -406,9 +406,7 @@ where S: Serializer { } fn deserialize_geometry<'de, D>(deserializer: D) -> std::result::Result -where - D: Deserializer<'de>, -{ +where D: Deserializer<'de> { let s = String::deserialize(deserializer)?; if s == "geometry" { return Ok(PrimitiveType::Geometry { @@ -424,9 +422,7 @@ where } fn serialize_geometry(crs: &str, serializer: S) -> std::result::Result -where - S: Serializer, -{ +where S: Serializer { if crs == "OGC:CRS84" { serializer.serialize_str("geometry") } else { From 625baf26011f99212305441d6c9defd40abb8a7a Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Mon, 12 Jan 2026 14:53:26 -0800 Subject: [PATCH 3/6] fmt --- crates/iceberg/src/spec/datatypes.rs | 4 +--- crates/iceberg/src/spec/values/serde.rs | 12 ++++++------ 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/crates/iceberg/src/spec/datatypes.rs b/crates/iceberg/src/spec/datatypes.rs index 8781a77d8d..16c1421fb1 100644 --- a/crates/iceberg/src/spec/datatypes.rs +++ b/crates/iceberg/src/spec/datatypes.rs @@ -431,9 +431,7 @@ where S: Serializer { } fn deserialize_geography<'de, D>(deserializer: D) -> std::result::Result -where - D: Deserializer<'de>, -{ +where D: Deserializer<'de> { let s = String::deserialize(deserializer)?; if s == "geography" { return Ok(PrimitiveType::Geography { diff --git a/crates/iceberg/src/spec/values/serde.rs b/crates/iceberg/src/spec/values/serde.rs index f91305fff2..6084dc3673 100644 --- a/crates/iceberg/src/spec/values/serde.rs +++ b/crates/iceberg/src/spec/values/serde.rs @@ -509,12 +509,12 @@ pub(crate) mod _serde { )) } } - Type::Primitive(PrimitiveType::Geometry { .. }) => { - Ok(Some(Literal::Primitive(PrimitiveLiteral::Binary(v.to_vec())))) - } - Type::Primitive(PrimitiveType::Geography { .. }) => { - Ok(Some(Literal::Primitive(PrimitiveLiteral::Binary(v.to_vec())))) - } + Type::Primitive(PrimitiveType::Geometry { .. }) => Ok(Some( + Literal::Primitive(PrimitiveLiteral::Binary(v.to_vec())), + )), + Type::Primitive(PrimitiveType::Geography { .. }) => Ok(Some( + Literal::Primitive(PrimitiveLiteral::Binary(v.to_vec())), + )), _ => Err(invalid_err("bytes")), }, RawLiteralEnum::List(v) => match ty { From 61d786a7277e57492f14a273e389d476370a5f5e Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Mon, 12 Jan 2026 14:57:31 -0800 Subject: [PATCH 4/6] fmt --- crates/iceberg/src/spec/values/tests.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/iceberg/src/spec/values/tests.rs b/crates/iceberg/src/spec/values/tests.rs index 7c97267488..fbc8f68f54 100644 --- a/crates/iceberg/src/spec/values/tests.rs +++ b/crates/iceberg/src/spec/values/tests.rs @@ -1438,4 +1438,5 @@ fn test_date_from_json_as_number() { ); // Both formats should produce the same Literal value -} \ No newline at end of file +} + From 16a32c75d3fc05747e3badd5671aca92db623eaa Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Mon, 12 Jan 2026 15:04:44 -0800 Subject: [PATCH 5/6] finally have the env working properly --- crates/iceberg/src/spec/values/tests.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/iceberg/src/spec/values/tests.rs b/crates/iceberg/src/spec/values/tests.rs index fbc8f68f54..ca1fd4b37b 100644 --- a/crates/iceberg/src/spec/values/tests.rs +++ b/crates/iceberg/src/spec/values/tests.rs @@ -1439,4 +1439,3 @@ fn test_date_from_json_as_number() { // Both formats should produce the same Literal value } - From acfa045bc8d473dfbf7a1218a7bb8cc35d383b6d Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Mon, 12 Jan 2026 15:17:30 -0800 Subject: [PATCH 6/6] make build is passing now --- crates/catalog/glue/src/schema.rs | 5 ++++- crates/catalog/hms/src/schema.rs | 5 ++++- crates/integrations/datafusion/src/physical_plan/scan.rs | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/crates/catalog/glue/src/schema.rs b/crates/catalog/glue/src/schema.rs index 864320dae4..99a33687d2 100644 --- a/crates/catalog/glue/src/schema.rs +++ b/crates/catalog/glue/src/schema.rs @@ -165,7 +165,10 @@ impl SchemaVisitor for GlueSchemaBuilder { PrimitiveType::Date => "date".to_string(), PrimitiveType::Timestamp => "timestamp".to_string(), PrimitiveType::TimestampNs => "timestamp_ns".to_string(), - PrimitiveType::Timestamptz | PrimitiveType::TimestamptzNs => { + PrimitiveType::Timestamptz + | PrimitiveType::TimestamptzNs + | PrimitiveType::Geometry { .. } + | PrimitiveType::Geography { .. } => { return Err(Error::new( ErrorKind::FeatureUnsupported, format!("Conversion from {p:?} is not supported"), diff --git a/crates/catalog/hms/src/schema.rs b/crates/catalog/hms/src/schema.rs index c23b48719d..ec0eaae8c5 100644 --- a/crates/catalog/hms/src/schema.rs +++ b/crates/catalog/hms/src/schema.rs @@ -122,7 +122,10 @@ impl SchemaVisitor for HiveSchemaBuilder { PrimitiveType::Date => "date".to_string(), PrimitiveType::Timestamp => "timestamp".to_string(), PrimitiveType::TimestampNs => "timestamp_ns".to_string(), - PrimitiveType::Timestamptz | PrimitiveType::TimestamptzNs => { + PrimitiveType::Timestamptz + | PrimitiveType::TimestamptzNs + | PrimitiveType::Geometry { .. } + | PrimitiveType::Geography { .. } => { return Err(Error::new( ErrorKind::FeatureUnsupported, format!("Conversion from {p:?} is not supported"), diff --git a/crates/integrations/datafusion/src/physical_plan/scan.rs b/crates/integrations/datafusion/src/physical_plan/scan.rs index d627b6a63d..7bb4e7ed0e 100644 --- a/crates/integrations/datafusion/src/physical_plan/scan.rs +++ b/crates/integrations/datafusion/src/physical_plan/scan.rs @@ -126,7 +126,7 @@ impl ExecutionPlan for IcebergTableScan { self } - fn children(&self) -> Vec<&Arc<(dyn ExecutionPlan + 'static)>> { + fn children(&self) -> Vec<&Arc> { vec![] }