From 5fe0852988677504bea67ecdacfe01d40a3659dd Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Sun, 2 Mar 2025 18:24:15 -0500 Subject: [PATCH] implement iceberg catalog into optd e2e demo --- Cargo.lock | 2003 +++++++++++++++-- optd-core/Cargo.toml | 5 +- optd-core/src/catalog/mod.rs | 312 +-- optd-core/src/catalog/sqlite_memo.rs | 305 +++ optd-datafusion/Cargo.toml | 3 + optd-datafusion/src/converter/mod.rs | 35 - optd-datafusion/src/df_conversion/context.rs | 31 + .../{converter => df_conversion}/from_optd.rs | 11 +- .../{converter => df_conversion}/into_optd.rs | 12 +- optd-datafusion/src/df_conversion/mod.rs | 5 + .../session.rs} | 0 optd-datafusion/src/iceberg_conversion.rs | 116 + optd-datafusion/src/lib.rs | 6 +- optd-datafusion/src/mock.rs | 64 +- 14 files changed, 2393 insertions(+), 515 deletions(-) create mode 100644 optd-core/src/catalog/sqlite_memo.rs delete mode 100644 optd-datafusion/src/converter/mod.rs create mode 100644 optd-datafusion/src/df_conversion/context.rs rename optd-datafusion/src/{converter => df_conversion}/from_optd.rs (97%) rename optd-datafusion/src/{converter => df_conversion}/into_optd.rs (96%) create mode 100644 optd-datafusion/src/df_conversion/mod.rs rename optd-datafusion/src/{optd_utils.rs => df_conversion/session.rs} (100%) create mode 100644 optd-datafusion/src/iceberg_conversion.rs diff --git a/Cargo.lock b/Cargo.lock index fd2d5dd..3ba4728 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,23 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" +[[package]] +name = "adler32" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" + +[[package]] +name = "ahash" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" +dependencies = [ + "getrandom 0.2.15", + "once_cell", + "version_check", +] + [[package]] name = "ahash" version = "0.8.11" @@ -132,6 +149,30 @@ version = "1.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b964d184e89d9b6b67dd2715bc8e74cf3107fb2b529990c90cf517326150bf4" +[[package]] +name = "apache-avro" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aef82843a0ec9f8b19567445ad2421ceeb1d711514384bdd3d49fe37102ee13" +dependencies = [ + "bigdecimal", + "digest", + "libflate", + "log", + "num-bigint", + "quad-rand", + "rand", + "regex-lite", + "serde", + "serde_bytes", + "serde_json", + "strum", + "strum_macros", + "thiserror 1.0.69", + "typed-builder 0.19.1", + "uuid", +] + [[package]] name = "ariadne" version = "0.5.0" @@ -142,6 +183,12 @@ dependencies = [ "yansi", ] +[[package]] +name = "array-init" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d62b7694a562cdf5a74227903507c56ab2cc8bdd1f781ed5cb4cf9c9f810bfc" + [[package]] name = "arrayref" version = "0.3.9" @@ -160,19 +207,34 @@ version = "54.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc208515aa0151028e464cc94a692156e945ce5126abd3537bb7fd6ba2143ed1" dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", + "arrow-arith 54.2.1", + "arrow-array 54.2.1", + "arrow-buffer 54.2.1", + "arrow-cast 54.2.1", "arrow-csv", - "arrow-data", - "arrow-ipc", + "arrow-data 54.2.1", + "arrow-ipc 54.2.1", "arrow-json", - "arrow-ord", + "arrow-ord 54.2.1", "arrow-row", - "arrow-schema", - "arrow-select", - "arrow-string", + "arrow-schema 54.2.1", + "arrow-select 54.2.1", + "arrow-string 54.2.1", +] + +[[package]] +name = "arrow-arith" +version = "53.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31dce77d2985522288edae7206bffd5fc4996491841dda01a13a58415867e681" +dependencies = [ + "arrow-array 53.4.0", + "arrow-buffer 53.4.0", + "arrow-data 53.4.0", + "arrow-schema 53.4.0", + "chrono", + "half", + "num", ] [[package]] @@ -181,11 +243,27 @@ version = "54.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e07e726e2b3f7816a85c6a45b6ec118eeeabf0b2a8c208122ad949437181f49a" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 54.2.1", + "arrow-buffer 54.2.1", + "arrow-data 54.2.1", + "arrow-schema 54.2.1", + "chrono", + "num", +] + +[[package]] +name = "arrow-array" +version = "53.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d45fe6d3faed0435b7313e59a02583b14c6c6339fa7729e94c32a20af319a79" +dependencies = [ + "ahash 0.8.11", + "arrow-buffer 53.4.0", + "arrow-data 53.4.0", + "arrow-schema 53.4.0", "chrono", + "half", + "hashbrown 0.15.2", "num", ] @@ -195,10 +273,10 @@ version = "54.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2262eba4f16c78496adfd559a29fe4b24df6088efc9985a873d58e92be022d5" dependencies = [ - "ahash", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "ahash 0.8.11", + "arrow-buffer 54.2.1", + "arrow-data 54.2.1", + "arrow-schema 54.2.1", "chrono", "chrono-tz", "half", @@ -206,6 +284,17 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-buffer" +version = "53.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b02656a35cc103f28084bc80a0159668e0a680d919cef127bd7e0aaccb06ec1" +dependencies = [ + "bytes", + "half", + "num", +] + [[package]] name = "arrow-buffer" version = "54.2.1" @@ -217,17 +306,37 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-cast" +version = "53.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c73c6233c5b5d635a56f6010e6eb1ab9e30e94707db21cea03da317f67d84cf3" +dependencies = [ + "arrow-array 53.4.0", + "arrow-buffer 53.4.0", + "arrow-data 53.4.0", + "arrow-schema 53.4.0", + "arrow-select 53.4.0", + "atoi", + "base64", + "chrono", + "half", + "lexical-core", + "num", + "ryu", +] + [[package]] name = "arrow-cast" version = "54.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4103d88c5b441525ed4ac23153be7458494c2b0c9a11115848fdb9b81f6f886a" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 54.2.1", + "arrow-buffer 54.2.1", + "arrow-data 54.2.1", + "arrow-schema 54.2.1", + "arrow-select 54.2.1", "atoi", "base64", "chrono", @@ -244,9 +353,9 @@ version = "54.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43d3cb0914486a3cae19a5cad2598e44e225d53157926d0ada03c20521191a65" dependencies = [ - "arrow-array", - "arrow-cast", - "arrow-schema", + "arrow-array 54.2.1", + "arrow-cast 54.2.1", + "arrow-schema 54.2.1", "chrono", "csv", "csv-core", @@ -254,28 +363,54 @@ dependencies = [ "regex", ] +[[package]] +name = "arrow-data" +version = "53.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7f2861ffa86f107b8ab577d86cff7c7a490243eabe961ba1e1af4f27542bb79" +dependencies = [ + "arrow-buffer 53.4.0", + "arrow-schema 53.4.0", + "half", + "num", +] + [[package]] name = "arrow-data" version = "54.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a329fb064477c9ec5f0870d2f5130966f91055c7c5bce2b3a084f116bc28c3b" dependencies = [ - "arrow-buffer", - "arrow-schema", + "arrow-buffer 54.2.1", + "arrow-schema 54.2.1", "half", "num", ] +[[package]] +name = "arrow-ipc" +version = "53.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0270dc511f11bb5fa98a25020ad51a99ca5b08d8a8dfbd17503bb9dba0388f0b" +dependencies = [ + "arrow-array 53.4.0", + "arrow-buffer 53.4.0", + "arrow-cast 53.4.0", + "arrow-data 53.4.0", + "arrow-schema 53.4.0", + "flatbuffers", +] + [[package]] name = "arrow-ipc" version = "54.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ddecdeab02491b1ce88885986e25002a3da34dd349f682c7cfe67bab7cc17b86" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 54.2.1", + "arrow-buffer 54.2.1", + "arrow-data 54.2.1", + "arrow-schema 54.2.1", "flatbuffers", "lz4_flex", ] @@ -286,31 +421,46 @@ version = "54.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d03b9340013413eb84868682ace00a1098c81a5ebc96d279f7ebf9a4cac3c0fd" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-array 54.2.1", + "arrow-buffer 54.2.1", + "arrow-cast 54.2.1", + "arrow-data 54.2.1", + "arrow-schema 54.2.1", "chrono", "half", - "indexmap", + "indexmap 2.7.1", "lexical-core", "num", "serde", "serde_json", ] +[[package]] +name = "arrow-ord" +version = "53.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6f202a879d287099139ff0d121e7f55ae5e0efe634b8cf2106ebc27a8715dee" +dependencies = [ + "arrow-array 53.4.0", + "arrow-buffer 53.4.0", + "arrow-data 53.4.0", + "arrow-schema 53.4.0", + "arrow-select 53.4.0", + "half", + "num", +] + [[package]] name = "arrow-ord" version = "54.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f841bfcc1997ef6ac48ee0305c4dfceb1f7c786fe31e67c1186edf775e1f1160" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 54.2.1", + "arrow-buffer 54.2.1", + "arrow-data 54.2.1", + "arrow-schema 54.2.1", + "arrow-select 54.2.1", ] [[package]] @@ -319,31 +469,68 @@ version = "54.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1eeb55b0a0a83851aa01f2ca5ee5648f607e8506ba6802577afdda9d75cdedcd" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 54.2.1", + "arrow-buffer 54.2.1", + "arrow-data 54.2.1", + "arrow-schema 54.2.1", "half", ] +[[package]] +name = "arrow-schema" +version = "53.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9579b9d8bce47aa41389fe344f2c6758279983b7c0ebb4013e283e3e91bb450e" + [[package]] name = "arrow-schema" version = "54.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "85934a9d0261e0fa5d4e2a5295107d743b543a6e0484a835d4b8db2da15306f9" +[[package]] +name = "arrow-select" +version = "53.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7471ba126d0b0aaa24b50a36bc6c25e4e74869a1fd1a5553357027a0b1c8d1f1" +dependencies = [ + "ahash 0.8.11", + "arrow-array 53.4.0", + "arrow-buffer 53.4.0", + "arrow-data 53.4.0", + "arrow-schema 53.4.0", + "num", +] + [[package]] name = "arrow-select" version = "54.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7e2932aece2d0c869dd2125feb9bd1709ef5c445daa3838ac4112dcfa0fda52c" dependencies = [ - "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "ahash 0.8.11", + "arrow-array 54.2.1", + "arrow-buffer 54.2.1", + "arrow-data 54.2.1", + "arrow-schema 54.2.1", + "num", +] + +[[package]] +name = "arrow-string" +version = "53.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72993b01cb62507b06f1fb49648d7286c8989ecfabdb7b77a750fcb54410731b" +dependencies = [ + "arrow-array 53.4.0", + "arrow-buffer 53.4.0", + "arrow-data 53.4.0", + "arrow-schema 53.4.0", + "arrow-select 53.4.0", + "memchr", "num", + "regex", + "regex-syntax 0.8.5", ] [[package]] @@ -352,15 +539,15 @@ version = "54.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "912e38bd6a7a7714c1d9b61df80315685553b7455e8a6045c27531d8ecd5b458" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 54.2.1", + "arrow-buffer 54.2.1", + "arrow-data 54.2.1", + "arrow-schema 54.2.1", + "arrow-select 54.2.1", "memchr", "num", "regex", - "regex-syntax", + "regex-syntax 0.8.5", ] [[package]] @@ -380,6 +567,17 @@ dependencies = [ "zstd-safe", ] +[[package]] +name = "async-lock" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff6e472cdea888a4bd64f342f09b3f50e1886d32afe8df3d663c01140b811b18" +dependencies = [ + "event-listener", + "event-listener-strategy", + "pin-project-lite", +] + [[package]] name = "async-recursion" version = "1.1.1" @@ -388,7 +586,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.98", ] [[package]] @@ -399,7 +597,7 @@ checksum = "644dd749086bf3771a2fbc5f256fdb982d53f011c7d5d560304eafeecebce79d" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.98", ] [[package]] @@ -417,6 +615,17 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +[[package]] +name = "backon" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49fef586913a57ff189f25c9b3d034356a5bf6b3fa9a7f067588fe1698ba1f5d" +dependencies = [ + "fastrand", + "gloo-timers", + "tokio", +] + [[package]] name = "backtrace" version = "0.3.74" @@ -455,8 +664,15 @@ dependencies = [ "num-bigint", "num-integer", "num-traits", + "serde", ] +[[package]] +name = "bimap" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "230c5f1ca6a325a32553f8640d31ac9b49f2411e901e427570154868b46da4f7" + [[package]] name = "bitflags" version = "1.3.2" @@ -472,6 +688,18 @@ dependencies = [ "serde", ] +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + [[package]] name = "blake2" version = "0.10.6" @@ -503,6 +731,29 @@ dependencies = [ "generic-array", ] +[[package]] +name = "borsh" +version = "1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5430e3be710b68d984d1391c854eb431a9d548640711faa54eecb1df93db91cc" +dependencies = [ + "borsh-derive", + "cfg_aliases", +] + +[[package]] +name = "borsh-derive" +version = "1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8b668d39970baad5356d7c83a86fee3a539e6f93bf6764c97368243e17a0487" +dependencies = [ + "once_cell", + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.98", +] + [[package]] name = "brotli" version = "7.0.0" @@ -530,6 +781,28 @@ version = "3.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" +[[package]] +name = "bytecheck" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "simdutf8", +] + +[[package]] +name = "bytecheck_derive" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "byteorder" version = "1.5.0" @@ -578,6 +851,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "chrono" version = "0.4.39" @@ -586,7 +865,10 @@ checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825" dependencies = [ "android-tzdata", "iana-time-zone", + "js-sys", "num-traits", + "serde", + "wasm-bindgen", "windows-targets 0.52.6", ] @@ -652,7 +934,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn", + "syn 2.0.98", ] [[package]] @@ -724,6 +1006,15 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "core2" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505" +dependencies = [ + "memchr", +] + [[package]] name = "cpufeatures" version = "0.2.17" @@ -748,6 +1039,15 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" +[[package]] +name = "crc32c" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a47af21622d091a8f0fb295b88bc886ac74efcc613efc19f5d0b21de5c89e47" +dependencies = [ + "rustc_version", +] + [[package]] name = "crc32fast" version = "1.4.2" @@ -757,6 +1057,24 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-queue" version = "0.3.12" @@ -809,6 +1127,47 @@ dependencies = [ "memchr", ] +[[package]] +name = "darling" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.98", +] + +[[package]] +name = "darling_macro" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" +dependencies = [ + "darling_core", + "quote", + "syn 2.0.98", +] + +[[package]] +name = "dary_heap" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728" + [[package]] name = "dashmap" version = "6.1.0" @@ -830,9 +1189,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eae420e7a5b0b7f1c39364cc76cbcd0f5fdc416b2514ae3847c2676bbd60702a" dependencies = [ "arrow", - "arrow-array", - "arrow-ipc", - "arrow-schema", + "arrow-array 54.2.1", + "arrow-ipc 54.2.1", + "arrow-schema 54.2.1", "async-compression", "async-trait", "bytes", @@ -861,7 +1220,7 @@ dependencies = [ "log", "object_store", "parking_lot", - "parquet", + "parquet 54.2.1", "rand", "regex", "sqlparser", @@ -901,20 +1260,20 @@ version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3f6d5b8c9408cc692f7c194b8aa0c0f9b253e065a8d960ad9cdc2a13e697602" dependencies = [ - "ahash", + "ahash 0.8.11", "arrow", - "arrow-array", - "arrow-buffer", - "arrow-ipc", - "arrow-schema", + "arrow-array 54.2.1", + "arrow-buffer 54.2.1", + "arrow-ipc 54.2.1", + "arrow-schema 54.2.1", "base64", "half", "hashbrown 0.14.5", - "indexmap", + "indexmap 2.7.1", "libc", "log", "object_store", - "parquet", + "parquet 54.2.1", "paste", "recursive", "sqlparser", @@ -971,7 +1330,7 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr-common", - "indexmap", + "indexmap 2.7.1", "paste", "recursive", "serde_json", @@ -997,7 +1356,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6125874e4856dfb09b59886784fcb74cde5cfc5930b3a80a1a728ef7a010df6b" dependencies = [ "arrow", - "arrow-buffer", + "arrow-buffer 54.2.1", "base64", "blake2", "blake3", @@ -1026,10 +1385,10 @@ version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3add7b1d3888e05e7c95f2b281af900ca69ebdcb21069ba679b33bde8b3b9d6" dependencies = [ - "ahash", + "ahash 0.8.11", "arrow", - "arrow-buffer", - "arrow-schema", + "arrow-buffer 54.2.1", + "arrow-schema 54.2.1", "datafusion-common", "datafusion-doc", "datafusion-execution", @@ -1049,7 +1408,7 @@ version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e18baa4cfc3d2f144f74148ed68a1f92337f5072b6dde204a0dbbdf3324989c" dependencies = [ - "ahash", + "ahash 0.8.11", "arrow", "datafusion-common", "datafusion-expr-common", @@ -1063,10 +1422,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3ec5ee8cecb0dc370291279673097ddabec03a011f73f30d7f1096457127e03e" dependencies = [ "arrow", - "arrow-array", - "arrow-buffer", - "arrow-ord", - "arrow-schema", + "arrow-array 54.2.1", + "arrow-buffer 54.2.1", + "arrow-ord 54.2.1", + "arrow-schema 54.2.1", "datafusion-common", "datafusion-doc", "datafusion-execution", @@ -1131,7 +1490,7 @@ checksum = "09369b8d962291e808977cf94d495fd8b5b38647232d7ef562c27ac0f495b0af" dependencies = [ "datafusion-expr", "quote", - "syn", + "syn 2.0.98", ] [[package]] @@ -1145,12 +1504,12 @@ dependencies = [ "datafusion-common", "datafusion-expr", "datafusion-physical-expr", - "indexmap", + "indexmap 2.7.1", "itertools 0.14.0", "log", "recursive", "regex", - "regex-syntax", + "regex-syntax 0.8.5", ] [[package]] @@ -1159,11 +1518,11 @@ version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "86ff72ac702b62dbf2650c4e1d715ebd3e4aab14e3885e72e8549e250307347c" dependencies = [ - "ahash", + "ahash 0.8.11", "arrow", - "arrow-array", - "arrow-buffer", - "arrow-schema", + "arrow-array 54.2.1", + "arrow-buffer 54.2.1", + "arrow-schema 54.2.1", "datafusion-common", "datafusion-expr", "datafusion-expr-common", @@ -1171,7 +1530,7 @@ dependencies = [ "datafusion-physical-expr-common", "half", "hashbrown 0.14.5", - "indexmap", + "indexmap 2.7.1", "itertools 0.14.0", "log", "paste", @@ -1184,9 +1543,9 @@ version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60982b7d684e25579ee29754b4333057ed62e2cc925383c5f0bd8cab7962f435" dependencies = [ - "ahash", + "ahash 0.8.11", "arrow", - "arrow-buffer", + "arrow-buffer 54.2.1", "datafusion-common", "datafusion-expr-common", "hashbrown 0.14.5", @@ -1200,7 +1559,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac5e85c189d5238a5cf181a624e450c4cd4c66ac77ca551d6f3ff9080bac90bb" dependencies = [ "arrow", - "arrow-schema", + "arrow-schema 54.2.1", "datafusion-common", "datafusion-execution", "datafusion-expr", @@ -1221,12 +1580,12 @@ version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c36bf163956d7e2542657c78b3383fdc78f791317ef358a359feffcdb968106f" dependencies = [ - "ahash", + "ahash 0.8.11", "arrow", - "arrow-array", - "arrow-buffer", - "arrow-ord", - "arrow-schema", + "arrow-array 54.2.1", + "arrow-buffer 54.2.1", + "arrow-ord 54.2.1", + "arrow-schema 54.2.1", "async-trait", "chrono", "datafusion-common", @@ -1239,7 +1598,7 @@ dependencies = [ "futures", "half", "hashbrown 0.14.5", - "indexmap", + "indexmap 2.7.1", "itertools 0.14.0", "log", "parking_lot", @@ -1254,12 +1613,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e13caa4daede211ecec53c78b13c503b592794d125f9a3cc3afe992edf9e7f43" dependencies = [ "arrow", - "arrow-array", - "arrow-schema", + "arrow-array 54.2.1", + "arrow-schema 54.2.1", "bigdecimal", "datafusion-common", "datafusion-expr", - "indexmap", + "indexmap 2.7.1", "log", "recursive", "regex", @@ -1277,6 +1636,47 @@ dependencies = [ "zeroize", ] +[[package]] +name = "deranged" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +dependencies = [ + "powerfmt", + "serde", +] + +[[package]] +name = "derive_builder" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.98", +] + +[[package]] +name = "derive_builder_macro" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" +dependencies = [ + "derive_builder_core", + "syn 2.0.98", +] + [[package]] name = "digest" version = "0.10.7" @@ -1297,7 +1697,16 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.98", +] + +[[package]] +name = "dlv-list" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "442039f5147480ba31067cb00ada1adae6892028e40e45fc5de7b7df6dcc1b5f" +dependencies = [ + "const-random", ] [[package]] @@ -1324,7 +1733,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn", + "syn 2.0.98", ] [[package]] @@ -1365,6 +1774,16 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "event-listener-strategy" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c3e4e0dd3673c1139bf041f3008816d9cf2946bbfac2945c09e523b8d7b05b2" +dependencies = [ + "event-listener", + "pin-project-lite", +] + [[package]] name = "fastrand" version = "2.3.0" @@ -1377,6 +1796,12 @@ version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" +[[package]] +name = "flagset" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3ea1ec5f8307826a5b71094dd91fc04d4ae75d5709b20ad351c7fb4815c86ec" + [[package]] name = "flatbuffers" version = "24.12.23" @@ -1408,6 +1833,12 @@ dependencies = [ "spin", ] +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "foldhash" version = "0.1.4" @@ -1423,6 +1854,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + [[package]] name = "futures" version = "0.3.31" @@ -1490,7 +1927,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.98", ] [[package]] @@ -1523,6 +1960,19 @@ dependencies = [ "slab", ] +[[package]] +name = "generator" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc6bd114ceda131d3b1d665eba35788690ad37f5916457286b32ab6fd3c438dd" +dependencies = [ + "cfg-if", + "libc", + "log", + "rustversion", + "windows", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -1540,8 +1990,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi 0.11.0+wasi-snapshot-preview1", + "wasm-bindgen", ] [[package]] @@ -1568,6 +2020,18 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" +[[package]] +name = "gloo-timers" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbb143cf96099802033e0d4f4963b19fd2e0b728bcf076cd9cf7f6634f092994" +dependencies = [ + "futures-channel", + "futures-core", + "js-sys", + "wasm-bindgen", +] + [[package]] name = "half" version = "2.4.1" @@ -1579,13 +2043,22 @@ dependencies = [ "num-traits", ] +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash 0.7.8", +] + [[package]] name = "hashbrown" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ - "ahash", + "ahash 0.8.11", "allocator-api2", ] @@ -1648,12 +2121,108 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "http" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f16ca2af56261c99fba8bac40a10251ce8188205a4c448fbb745a2e4daa76fea" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http", +] + +[[package]] +name = "http-body-util" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" +dependencies = [ + "bytes", + "futures-util", + "http", + "http-body", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2d708df4e7140240a16cd6ab0ab65c972d7433ab77819ea693fde9c43811e2a" + [[package]] name = "humantime" version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +[[package]] +name = "hyper" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http", + "http-body", + "httparse", + "itoa", + "pin-project-lite", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d191583f3da1305256f22463b9bb0471acad48a4e534a5218b9963e9c1f59b2" +dependencies = [ + "futures-util", + "http", + "hyper", + "hyper-util", + "rustls", + "rustls-pki-types", + "tokio", + "tokio-rustls", + "tower-service", + "webpki-roots", +] + +[[package]] +name = "hyper-util" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http", + "http-body", + "hyper", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", +] + [[package]] name = "iana-time-zone" version = "0.1.61" @@ -1665,7 +2234,7 @@ dependencies = [ "iana-time-zone-haiku", "js-sys", "wasm-bindgen", - "windows-core", + "windows-core 0.52.0", ] [[package]] @@ -1677,6 +2246,69 @@ dependencies = [ "cc", ] +[[package]] +name = "iceberg" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dcf75614057c573c1f0bb4904ad012d9aa65b5aebbabea7096d99e4f1a3fc9b" +dependencies = [ + "anyhow", + "apache-avro", + "array-init", + "arrow-arith 53.4.0", + "arrow-array 53.4.0", + "arrow-cast 53.4.0", + "arrow-ord 53.4.0", + "arrow-schema 53.4.0", + "arrow-select 53.4.0", + "arrow-string 53.4.0", + "async-trait", + "bimap", + "bitvec", + "bytes", + "chrono", + "derive_builder", + "fnv", + "futures", + "itertools 0.13.0", + "moka", + "murmur3", + "num-bigint", + "once_cell", + "opendal", + "ordered-float 4.6.0", + "parquet 53.4.0", + "paste", + "rand", + "reqwest", + "rust_decimal", + "serde", + "serde_bytes", + "serde_derive", + "serde_json", + "serde_repr", + "serde_with", + "tokio", + "typed-builder 0.20.0", + "url", + "uuid", + "zstd", +] + +[[package]] +name = "iceberg-catalog-memory" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94438e70602c1cc37c8de0e0f8925a703092be41da5046c9252fb9859b58a908" +dependencies = [ + "async-trait", + "futures", + "iceberg", + "itertools 0.13.0", + "serde_json", + "uuid", +] + [[package]] name = "icu_collections" version = "1.5.0" @@ -1792,9 +2424,15 @@ checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.98", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "1.0.3" @@ -1816,6 +2454,17 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", + "serde", +] + [[package]] name = "indexmap" version = "2.7.1" @@ -1824,6 +2473,7 @@ checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652" dependencies = [ "equivalent", "hashbrown 0.15.2", + "serde", ] [[package]] @@ -1832,6 +2482,12 @@ version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" +[[package]] +name = "ipnet" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -1960,6 +2616,30 @@ version = "0.2.170" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "875b3680cb2f8f71bdcf9a30f38d48282f5d3c95cbf9b3fa57269bb5d5c06828" +[[package]] +name = "libflate" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45d9dfdc14ea4ef0900c1cddbc8dcd553fbaacd8a4a282cf4018ae9dd04fb21e" +dependencies = [ + "adler32", + "core2", + "crc32fast", + "dary_heap", + "libflate_lz77", +] + +[[package]] +name = "libflate_lz77" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e0d73b369f386f1c44abd9c570d5318f55ccde816ff4b562fa452e5182863d" +dependencies = [ + "core2", + "hashbrown 0.14.5", + "rle-decode-fast", +] + [[package]] name = "libm" version = "0.2.11" @@ -2005,6 +2685,19 @@ version = "0.4.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e" +[[package]] +name = "loom" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca" +dependencies = [ + "cfg-if", + "generator", + "scoped-tls", + "tracing", + "tracing-subscriber", +] + [[package]] name = "lz4_flex" version = "0.11.3" @@ -2025,6 +2718,15 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + [[package]] name = "md-5" version = "0.10.6" @@ -2041,6 +2743,12 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + [[package]] name = "miniz_oxide" version = "0.8.5" @@ -2061,6 +2769,44 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "moka" +version = "0.12.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9321642ca94a4282428e6ea4af8cc2ca4eac48ac7a6a4ea8f33f76d0ce70926" +dependencies = [ + "async-lock", + "crossbeam-channel", + "crossbeam-epoch", + "crossbeam-utils", + "event-listener", + "futures-util", + "loom", + "parking_lot", + "portable-atomic", + "rustc_version", + "smallvec", + "tagptr", + "thiserror 1.0.69", + "uuid", +] + +[[package]] +name = "murmur3" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9252111cf132ba0929b6f8e030cac2a24b507f3a4d6db6fb2896f27b354c714b" + +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + [[package]] name = "num" version = "0.4.3" @@ -2083,6 +2829,7 @@ checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" dependencies = [ "num-integer", "num-traits", + "serde", ] [[package]] @@ -2111,6 +2858,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + [[package]] name = "num-integer" version = "0.1.46" @@ -2188,13 +2941,45 @@ version = "1.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" +[[package]] +name = "opendal" +version = "0.50.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb28bb6c64e116ceaf8dd4e87099d3cfea4a58e85e62b104fef74c91afba0f44" +dependencies = [ + "anyhow", + "async-trait", + "backon", + "base64", + "bytes", + "chrono", + "crc32c", + "flagset", + "futures", + "getrandom 0.2.15", + "http", + "log", + "md-5", + "once_cell", + "percent-encoding", + "quick-xml 0.36.2", + "reqsign", + "reqwest", + "serde", + "serde_json", + "tokio", + "uuid", +] + [[package]] name = "optd-core" version = "0.1.0" dependencies = [ "anyhow", "async-recursion", + "async-trait", "dotenvy", + "iceberg", "ordered-float 5.0.0", "proc-macro2", "serde", @@ -2213,6 +2998,8 @@ dependencies = [ "async-trait", "datafusion", "futures", + "iceberg", + "iceberg-catalog-memory", "itertools 0.14.0", "optd-core", "proc-macro2", @@ -2244,6 +3031,15 @@ dependencies = [ "num-traits", ] +[[package]] +name = "ordered-float" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bb71e1b3fa6ca1c61f383464aaf2bb0e2f8e772a1f01d486832464de363b951" +dependencies = [ + "num-traits", +] + [[package]] name = "ordered-float" version = "5.0.0" @@ -2255,6 +3051,22 @@ dependencies = [ "serde", ] +[[package]] +name = "ordered-multimap" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49203cdcae0030493bad186b28da2fa25645fa276a51b6fec8010d281e02ef79" +dependencies = [ + "dlv-list", + "hashbrown 0.14.5", +] + +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + [[package]] name = "parking" version = "2.2.1" @@ -2286,18 +3098,18 @@ dependencies = [ [[package]] name = "parquet" -version = "54.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f88838dca3b84d41444a0341b19f347e8098a3898b0f21536654b8b799e11abd" -dependencies = [ - "ahash", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ipc", - "arrow-schema", - "arrow-select", +version = "53.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8957c0c95a6a1804f3e51a18f69df29be53856a8c5768cc9b6d00fcafcd2917c" +dependencies = [ + "ahash 0.8.11", + "arrow-array 53.4.0", + "arrow-buffer 53.4.0", + "arrow-cast 53.4.0", + "arrow-data 53.4.0", + "arrow-ipc 53.4.0", + "arrow-schema 53.4.0", + "arrow-select 53.4.0", "base64", "brotli", "bytes", @@ -2309,10 +3121,8 @@ dependencies = [ "lz4_flex", "num", "num-bigint", - "object_store", "paste", "seq-macro", - "simdutf8", "snap", "thrift", "tokio", @@ -2322,10 +3132,47 @@ dependencies = [ ] [[package]] -name = "parse-zoneinfo" -version = "0.3.1" +name = "parquet" +version = "54.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f2a05b18d44e2957b88f96ba460715e295bc1d7510468a2f3d3b44535d26c24" +checksum = "f88838dca3b84d41444a0341b19f347e8098a3898b0f21536654b8b799e11abd" +dependencies = [ + "ahash 0.8.11", + "arrow-array 54.2.1", + "arrow-buffer 54.2.1", + "arrow-cast 54.2.1", + "arrow-data 54.2.1", + "arrow-ipc 54.2.1", + "arrow-schema 54.2.1", + "arrow-select 54.2.1", + "base64", + "brotli", + "bytes", + "chrono", + "flate2", + "futures", + "half", + "hashbrown 0.15.2", + "lz4_flex", + "num", + "num-bigint", + "object_store", + "paste", + "seq-macro", + "simdutf8", + "snap", + "thrift", + "tokio", + "twox-hash", + "zstd", + "zstd-sys", +] + +[[package]] +name = "parse-zoneinfo" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f2a05b18d44e2957b88f96ba460715e295bc1d7510468a2f3d3b44535d26c24" dependencies = [ "regex", ] @@ -2358,7 +3205,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" dependencies = [ "fixedbitset", - "indexmap", + "indexmap 2.7.1", ] [[package]] @@ -2438,6 +3285,18 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" +[[package]] +name = "portable-atomic" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "ppv-lite86" version = "0.2.20" @@ -2447,6 +3306,15 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "proc-macro-crate" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecf48c7ca261d60b74ab1a7b20da18bede46776b2e55535cb958eb595c5fa7b" +dependencies = [ + "toml_edit", +] + [[package]] name = "proc-macro2" version = "1.0.93" @@ -2465,6 +3333,104 @@ dependencies = [ "cc", ] +[[package]] +name = "ptr_meta" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "quad-rand" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40" + +[[package]] +name = "quick-xml" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86e446ed58cef1bbfe847bc2fda0e2e4ea9f0e57b90c507d4781292590d72a4e" +dependencies = [ + "memchr", + "serde", +] + +[[package]] +name = "quick-xml" +version = "0.36.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7649a7b4df05aed9ea7ec6f628c67c9953a43869b8bc50929569b2999d443fe" +dependencies = [ + "memchr", + "serde", +] + +[[package]] +name = "quinn" +version = "0.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62e96808277ec6f97351a2380e6c25114bc9e67037775464979f3037c92d05ef" +dependencies = [ + "bytes", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash", + "rustls", + "socket2", + "thiserror 2.0.11", + "tokio", + "tracing", +] + +[[package]] +name = "quinn-proto" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d" +dependencies = [ + "bytes", + "getrandom 0.2.15", + "rand", + "ring", + "rustc-hash", + "rustls", + "rustls-pki-types", + "slab", + "thiserror 2.0.11", + "tinyvec", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-udp" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e46f3055866785f6b92bc6164b76be02ca8f2eb4b002c0354b28cf4c119e5944" +dependencies = [ + "cfg_aliases", + "libc", + "once_cell", + "socket2", + "tracing", + "windows-sys 0.59.0", +] + [[package]] name = "quote" version = "1.0.38" @@ -2474,6 +3440,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + [[package]] name = "rand" version = "0.8.5" @@ -2523,7 +3495,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn", + "syn 2.0.98", ] [[package]] @@ -2543,8 +3515,17 @@ checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", - "regex-automata", - "regex-syntax", + "regex-automata 0.4.9", + "regex-syntax 0.8.5", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", ] [[package]] @@ -2555,15 +3536,158 @@ checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.8.5", ] +[[package]] +name = "regex-lite" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" + +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + [[package]] name = "regex-syntax" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +[[package]] +name = "rend" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" +dependencies = [ + "bytecheck", +] + +[[package]] +name = "reqsign" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb0075a66c8bfbf4cc8b70dca166e722e1f55a3ea9250ecbb85f4d92a5f64149" +dependencies = [ + "anyhow", + "async-trait", + "base64", + "chrono", + "form_urlencoded", + "getrandom 0.2.15", + "hex", + "hmac", + "home", + "http", + "log", + "percent-encoding", + "quick-xml 0.35.0", + "rand", + "reqwest", + "rust-ini", + "serde", + "serde_json", + "sha1", + "sha2", +] + +[[package]] +name = "reqwest" +version = "0.12.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43e734407157c3c2034e0258f5e4473ddb361b1e85f95a66690d67264d7cd1da" +dependencies = [ + "base64", + "bytes", + "futures-core", + "futures-util", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-rustls", + "hyper-util", + "ipnet", + "js-sys", + "log", + "mime", + "once_cell", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls", + "rustls-pemfile", + "rustls-pki-types", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tokio-rustls", + "tokio-util", + "tower", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams", + "web-sys", + "webpki-roots", + "windows-registry", +] + +[[package]] +name = "ring" +version = "0.17.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da5349ae27d3887ca812fb375b45a4fbb36d8d12d2df394968cd86e35683fe73" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.15", + "libc", + "untrusted", + "windows-sys 0.52.0", +] + +[[package]] +name = "rkyv" +version = "0.7.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9008cd6385b9e161d8229e1f6549dd23c3d022f132a2ea37ac3a10ac4935779b" +dependencies = [ + "bitvec", + "bytecheck", + "bytes", + "hashbrown 0.12.3", + "ptr_meta", + "rend", + "rkyv_derive", + "seahash", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.7.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "503d1d27590a2b0a3a4ca4c94755aa2875657196ecbf401a42eff41d7de532c0" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "rle-decode-fast" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422" + [[package]] name = "rsa" version = "0.9.7" @@ -2584,12 +3708,45 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rust-ini" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e310ef0e1b6eeb79169a1171daf9abcb87a2e17c03bee2c4bb100b55c75409f" +dependencies = [ + "cfg-if", + "ordered-multimap", + "trim-in-place", +] + +[[package]] +name = "rust_decimal" +version = "1.36.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b082d80e3e3cc52b2ed634388d436fe1f4de6af5786cc2de9ba9737527bdf555" +dependencies = [ + "arrayvec", + "borsh", + "bytes", + "num-traits", + "rand", + "rkyv", + "serde", + "serde_json", +] + [[package]] name = "rustc-demangle" version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + [[package]] name = "rustc_version" version = "0.4.1" @@ -2612,6 +3769,49 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "rustls" +version = "0.23.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47796c98c480fce5406ef69d1c76378375492c3b0a0de587be0c1d9feb12f395" +dependencies = [ + "once_cell", + "ring", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-pemfile" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "rustls-pki-types" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c" +dependencies = [ + "web-time", +] + +[[package]] +name = "rustls-webpki" +version = "0.102.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", +] + [[package]] name = "rustversion" version = "1.0.19" @@ -2633,12 +3833,24 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + [[package]] name = "scopeguard" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "seahash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" + [[package]] name = "semver" version = "1.0.25" @@ -2660,6 +3872,15 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde_bytes" +version = "0.11.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "387cc504cb06bb40a96c8e04e951fe01854cf6bc921053c954e4a606d9675c6a" +dependencies = [ + "serde", +] + [[package]] name = "serde_derive" version = "1.0.218" @@ -2668,7 +3889,7 @@ checksum = "f09503e191f4e797cb8aac08e9a4a4695c5edf6a2e70e376d961ddd5c969f82b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.98", ] [[package]] @@ -2683,6 +3904,17 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_repr" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -2695,6 +3927,36 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_with" +version = "3.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6b6f7f2fcb69f747921f79f3926bd1e203fce4fef62c268dd3abfb6d86029aa" +dependencies = [ + "base64", + "chrono", + "hex", + "indexmap 1.9.3", + "indexmap 2.7.1", + "serde", + "serde_derive", + "serde_json", + "serde_with_macros", + "time", +] + +[[package]] +name = "serde_with_macros" +version = "3.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d00caa5193a3c8362ac2b73be6b9e768aa5a4b2f721d8f4b339600c3cb51f8e" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.98", +] + [[package]] name = "sha1" version = "0.10.6" @@ -2717,6 +3979,15 @@ dependencies = [ "digest", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "shlex" version = "1.3.0" @@ -2790,7 +4061,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn", + "syn 2.0.98", ] [[package]] @@ -2846,7 +4117,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.98", ] [[package]] @@ -2879,7 +4150,7 @@ dependencies = [ "futures-util", "hashbrown 0.15.2", "hashlink", - "indexmap", + "indexmap 2.7.1", "log", "memchr", "once_cell", @@ -2888,7 +4159,7 @@ dependencies = [ "serde_json", "sha2", "smallvec", - "thiserror", + "thiserror 2.0.11", "tokio", "tokio-stream", "tracing", @@ -2905,7 +4176,7 @@ dependencies = [ "quote", "sqlx-core", "sqlx-macros-core", - "syn", + "syn 2.0.98", ] [[package]] @@ -2928,7 +4199,7 @@ dependencies = [ "sqlx-mysql", "sqlx-postgres", "sqlx-sqlite", - "syn", + "syn 2.0.98", "tempfile", "tokio", "url", @@ -2971,7 +4242,7 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror", + "thiserror 2.0.11", "tracing", "whoami", ] @@ -3008,7 +4279,7 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror", + "thiserror 2.0.11", "tracing", "whoami", ] @@ -3078,12 +4349,42 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.98", +] + [[package]] name = "subtle" version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "syn" version = "2.0.98" @@ -3095,6 +4396,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] + [[package]] name = "synstructure" version = "0.13.1" @@ -3103,9 +4413,21 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.98", ] +[[package]] +name = "tagptr" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" + +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + [[package]] name = "tempfile" version = "3.17.1" @@ -3120,13 +4442,33 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + [[package]] name = "thiserror" version = "2.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc" dependencies = [ - "thiserror-impl", + "thiserror-impl 2.0.11", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", ] [[package]] @@ -3137,7 +4479,17 @@ checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.98", +] + +[[package]] +name = "thread_local" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +dependencies = [ + "cfg-if", + "once_cell", ] [[package]] @@ -3151,6 +4503,37 @@ dependencies = [ "ordered-float 2.10.1", ] +[[package]] +name = "time" +version = "0.3.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35e7868883861bd0e56d9ac6efcaaca0d6d5d82a2a7ec8209ff492c07cf37b21" +dependencies = [ + "deranged", + "itoa", + "num-conv", + "powerfmt", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" + +[[package]] +name = "time-macros" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2834e6017e3e5e4b9834939793b282bc03b37a3336245fa820e35e233e2a85de" +dependencies = [ + "num-conv", + "time-core", +] + [[package]] name = "tiny-keccak" version = "2.0.2" @@ -3211,7 +4594,17 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.98", +] + +[[package]] +name = "tokio-rustls" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b" +dependencies = [ + "rustls", + "tokio", ] [[package]] @@ -3238,6 +4631,50 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml_datetime" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" + +[[package]] +name = "toml_edit" +version = "0.22.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17b4795ff5edd201c7cd6dca065ae59972ce77d1b80fa0a84d94950ece7d1474" +dependencies = [ + "indexmap 2.7.1", + "toml_datetime", + "winnow", +] + +[[package]] +name = "tower" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper", + "tokio", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + +[[package]] +name = "tower-service" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" + [[package]] name = "tracing" version = "0.1.41" @@ -3258,7 +4695,7 @@ checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.98", ] [[package]] @@ -3268,6 +4705,36 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" dependencies = [ "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", ] [[package]] @@ -3278,9 +4745,21 @@ checksum = "70977707304198400eb4835a78f6a9f928bf41bba420deb8fdb175cd965d77a7" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.98", ] +[[package]] +name = "trim-in-place" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "343e926fc669bc8cde4fa3129ab681c63671bae288b1f1081ceee6d9d37904fc" + +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + [[package]] name = "twox-hash" version = "1.6.3" @@ -3291,6 +4770,46 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "typed-builder" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06fbd5b8de54c5f7c91f6fe4cebb949be2125d7758e630bb58b1d831dbce600" +dependencies = [ + "typed-builder-macro 0.19.1", +] + +[[package]] +name = "typed-builder" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e14ed59dc8b7b26cacb2a92bad2e8b1f098806063898ab42a3bd121d7d45e75" +dependencies = [ + "typed-builder-macro 0.20.0", +] + +[[package]] +name = "typed-builder-macro" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9534daa9fd3ed0bd911d462a37f172228077e7abf18c18a5f67199d959205f8" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] + +[[package]] +name = "typed-builder-macro" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "560b82d656506509d43abe30e0ba64c56b1953ab3d4fe7ba5902747a7a3cedd5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] + [[package]] name = "typenum" version = "1.18.0" @@ -3342,6 +4861,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + [[package]] name = "url" version = "2.5.4" @@ -3378,8 +4903,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e0f540e3240398cce6128b64ba83fdbdd86129c16a3aa1a3a252efd66eb3d587" dependencies = [ "getrandom 0.3.1", + "serde", ] +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + [[package]] name = "vcpkg" version = "0.2.15" @@ -3402,6 +4934,15 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -3445,10 +4986,23 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn", + "syn 2.0.98", "wasm-bindgen-shared", ] +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" +dependencies = [ + "cfg-if", + "js-sys", + "once_cell", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "wasm-bindgen-macro" version = "0.2.100" @@ -3467,7 +5021,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.98", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3481,6 +5035,29 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "wasm-streams" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + +[[package]] +name = "web-sys" +version = "0.3.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "web-time" version = "1.1.0" @@ -3491,6 +5068,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki-roots" +version = "0.26.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2210b291f7ea53617fbafcc4939f10914214ec15aace5ba62293a668f322c5c9" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "whoami" version = "1.5.2" @@ -3501,6 +5087,22 @@ dependencies = [ "wasite", ] +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + [[package]] name = "winapi-util" version = "0.1.9" @@ -3510,6 +5112,22 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6" +dependencies = [ + "windows-core 0.58.0", + "windows-targets 0.52.6", +] + [[package]] name = "windows-core" version = "0.52.0" @@ -3519,6 +5137,71 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-core" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-result", + "windows-strings", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-implement" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] + +[[package]] +name = "windows-interface" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053c4c462dc91d3b1504c6fe5a726dd15e216ba718e84a0e46a88fbe5ded3515" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] + +[[package]] +name = "windows-registry" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0" +dependencies = [ + "windows-result", + "windows-strings", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-result" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-strings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +dependencies = [ + "windows-result", + "windows-targets 0.52.6", +] + [[package]] name = "windows-sys" version = "0.48.0" @@ -3667,6 +5350,15 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "winnow" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7f4ea97f6f78012141bcdb6a216b2609f0979ada50b20ca5b52dde2eac2bb1" +dependencies = [ + "memchr", +] + [[package]] name = "wit-bindgen-rt" version = "0.33.0" @@ -3688,6 +5380,15 @@ version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + [[package]] name = "xz2" version = "0.1.7" @@ -3723,7 +5424,7 @@ checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.98", "synstructure", ] @@ -3745,7 +5446,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.98", ] [[package]] @@ -3765,7 +5466,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.98", "synstructure", ] @@ -3794,7 +5495,7 @@ checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.98", ] [[package]] diff --git a/optd-core/Cargo.toml b/optd-core/Cargo.toml index bb66730..c4a64fe 100644 --- a/optd-core/Cargo.toml +++ b/optd-core/Cargo.toml @@ -15,4 +15,7 @@ sqlx = { version = "0.8", features = [ "sqlite", "runtime-tokio", "migrate" ] } serde = { version = "1.0", features = ["derive"] } serde_json = { version = "1", features = ["raw_value"] } dotenvy = "0.15" -ordered-float = { version = "5.0.0", features = ["serde"] } \ No newline at end of file +ordered-float = { version = "5.0.0", features = ["serde"] } + +async-trait = "0.1.85" +iceberg = "0.4.0" diff --git a/optd-core/src/catalog/mod.rs b/optd-core/src/catalog/mod.rs index 71abe14..84337af 100644 --- a/optd-core/src/catalog/mod.rs +++ b/optd-core/src/catalog/mod.rs @@ -1,305 +1,37 @@ -use std::sync::Arc; - -use sqlx::prelude::FromRow; - use crate::storage::memo::SqliteMemo; +use iceberg::{spec::Schema, Catalog, Result, TableIdent}; +use std::sync::Arc; -#[trait_variant::make(Send)] -pub trait Catalog { - async fn create_database(&self, db_name: &str) -> anyhow::Result>; - - async fn get_database(&self, db_name: &str) -> anyhow::Result>; - - async fn create_namespace( - &self, - database_id: DatabaseId, - namespace_name: &str, - ) -> anyhow::Result>; - - async fn get_namespace( - &self, - db_name: &str, - namespace_name: &str, - ) -> anyhow::Result>; - - async fn create_table( - &self, - namespace_id: NamespaceId, - table_name: &str, - schema: &Schema, - ) -> anyhow::Result>; - - async fn get_table( - &self, - db_name: &str, - namespace_name: &str, - table_name: &str, - ) -> anyhow::Result>; - - async fn get_schema(&self, table_id: TableId) -> anyhow::Result; -} - -pub struct OptdCatalog { - storage: Arc, +#[derive(Debug)] +pub struct OptdCatalog { + // TODO(connor): Do we even need this if `SqliteMemo` is going to implement `Catalog`? + _memo: Arc, + catalog: C, } -impl Catalog for OptdCatalog { - async fn create_database(&self, db_name: &str) -> anyhow::Result> { - let mut txn = self.storage.begin().await?; - let db: DatabaseMetadata = - sqlx::query_as("INSERT INTO database_metadata (name) VALUES (?) RETURNING id, name") - .bind(db_name) - .fetch_one(&mut *txn) - .await - .map_err(|e| { - anyhow::anyhow!("Failed to create database metadata for {}: {}", db_name, e) - })?; - txn.commit().await?; - Ok(Arc::new(db)) - } - - async fn get_database(&self, db_name: &str) -> anyhow::Result> { - let mut txn = self.storage.begin().await?; - let db: DatabaseMetadata = - sqlx::query_as("SELECT id, name FROM database_metadata WHERE name = ?") - .bind(db_name) - .fetch_one(&mut *txn) - .await - .map_err(|e| { - anyhow::anyhow!("Failed to get database metadata for {}: {}", db_name, e) - })?; - txn.commit().await?; - Ok(Arc::new(db)) - } - - async fn create_namespace( - &self, - database_id: DatabaseId, - namespace_name: &str, - ) -> anyhow::Result> { - let mut txn = self.storage.begin().await?; - let namespace: NamespaceMetadata = sqlx::query_as( - "INSERT INTO namespace_metadata (name, database_id) VALUES (?, ?) RETURNING id, name", - ) - .bind(namespace_name) - .bind(database_id) - .fetch_one(&mut *txn) - .await - .map_err(|e| { - anyhow::anyhow!( - "Failed to create namespace metadata for {}: {}", - namespace_name, - e - ) - })?; - txn.commit().await?; - Ok(Arc::new(namespace)) - } - - async fn get_namespace( - &self, - db_name: &str, - namespace_name: &str, - ) -> anyhow::Result> { - let mut txn = self.storage.begin().await?; - let namespace: NamespaceMetadata = sqlx::query_as( - "SELECT namespace_metadata.id, namespace_metadata.name FROM namespace_metadata, database_metadata WHERE database_metadata.name = ? and namspace_metadata.name = ? and namespace_metadata.database_id = database_metadata.id", - ) - .bind(db_name) - .bind(namespace_name) - .fetch_one(&mut *txn) - .await - .map_err(|e| { - anyhow::anyhow!( - "Failed to get namespace metadata for {}.{}: {}", - db_name, - namespace_name, - e - ) - })?; - txn.commit().await?; - Ok(Arc::new(namespace)) - } - - async fn create_table( - &self, - namespace_id: NamespaceId, - table_name: &str, - schema: &Schema, - ) -> anyhow::Result> { - let mut txn = self.storage.begin().await?; - let table: TableMetadata = sqlx::query_as( - "INSERT INTO table_metadata (name, namespace_id) VALUES (?, ?) RETURNING id, name", - ) - .bind(table_name) - .bind(namespace_id) - .fetch_one(&mut *txn) - .await - .map_err(|e| { - anyhow::anyhow!("Failed to create table metadata for {}: {}", table_name, e) - })?; - for (i, attribute) in schema.attributes.iter().enumerate() { - sqlx::query("INSERT INTO attributes (name, kind, table_id, base_attribute_number) VALUES (?, ?, ?, ?)") - .bind(&attribute.name) - .bind(&attribute.kind) - .bind(table.id) - .bind(i as i64) - .execute(&mut *txn) - .await - .map_err(|e| { - anyhow::anyhow!( - "Failed to create attribute metadata for {}.{}: {}", - table_name, - attribute.name, - e - ) - })?; +impl OptdCatalog { + pub fn new(memo: Arc, catalog: C) -> Self { + Self { + _memo: memo, + catalog, } - txn.commit().await?; - Ok(Arc::new(table)) } - async fn get_table( - &self, - db_name: &str, - namespace_name: &str, - table_name: &str, - ) -> anyhow::Result> { - let mut txn = self.storage.begin().await?; - let table: TableMetadata = sqlx::query_as( - "SELECT table_metadata.id, table_metadata.name FROM table_metadata, namespace_metadata, database_metadata WHERE database_metadata.name = ? and namspace_metadata.name = ? and namespace_metadata.database_id = database_metadata.id and table_metadata.namespace_id = namespace_metadata.id and table_metadata.name = ?", - ) - .bind(db_name) - .bind(namespace_name) - .bind(table_name) - .fetch_one(&mut *txn) - .await - .map_err(|e| { - anyhow::anyhow!( - "Failed to get table metadata for {}.{}.{}: {}", - db_name, - namespace_name, - table_name, - e - ) - })?; - txn.commit().await?; - Ok(Arc::new(table)) + pub fn catalog(&self) -> &C { + &self.catalog } - async fn get_schema(&self, table_id: TableId) -> anyhow::Result { - let mut txn = self.storage.begin().await?; - let attributes: Vec = sqlx::query_as( - "SELECT attributes.id, attributes.name, attributes.kind FROM attributes WHERE attributes.table_id = ?", - ) - .bind(table_id) - .fetch_all(&mut *txn) - .await - .map_err(|e| { - anyhow::anyhow!( - "Failed to get schema metadata for table {:?}: {}", - table_id, - e - ) - })?; + pub async fn get_current_table_schema(&self, table_id: &TableIdent) -> Result> { + let table = self.catalog.load_table(table_id).await?; + let table_metadata = table.metadata(); - Ok(Schema { attributes }) + Ok(table_metadata.current_schema().clone()) } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] -#[repr(transparent)] -#[sqlx(transparent)] -pub struct DatabaseId(i64); - -#[derive(Debug, Clone, PartialEq, Eq, FromRow, sqlx::Type)] -pub struct DatabaseMetadata { - pub id: DatabaseId, - pub name: String, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] -#[repr(transparent)] -#[sqlx(transparent)] -pub struct NamespaceId(i64); - -#[derive(Debug, Clone, PartialEq, Eq, FromRow, sqlx::Type)] -pub struct NamespaceMetadata { - pub id: NamespaceId, - pub name: String, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] -#[repr(transparent)] -#[sqlx(transparent)] -pub struct TableId(i64); - -#[derive(Debug, Clone, PartialEq, Eq, FromRow, sqlx::Type)] -pub struct TableMetadata { - pub id: TableId, - pub name: String, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] -#[repr(transparent)] -#[sqlx(transparent)] -pub struct AttributeId(i64); - -#[derive(Debug, Clone, PartialEq, Eq, FromRow, sqlx::Type)] -pub struct Attribute { - /// The unique identifier for the attribute. - pub id: AttributeId, - /// The name of the attribute. - pub name: String, - /// The kind (data type) of the attribute. - pub kind: String, -} - -pub struct Schema { - /// The attributes in the schema. - pub attributes: Vec, -} - -#[cfg(test)] -mod tests { - - use super::*; - - #[tokio::test] - async fn test_schema() -> anyhow::Result<()> { - let storage = Arc::new(SqliteMemo::new_in_memory().await?); - let catalog = OptdCatalog { storage }; - - let db = catalog.create_database("test_db").await?; - let namespace = catalog.create_namespace(db.id, "test_namespace").await?; - let schema = Schema { - attributes: vec![ - Attribute { - id: AttributeId(1), - name: "id".to_string(), - kind: "INTEGER".to_string(), - }, - Attribute { - id: AttributeId(2), - name: "name".to_string(), - kind: "TEXT".to_string(), - }, - ], - }; - let table = catalog - .create_table(namespace.id, "test_table", &schema) - .await?; - - assert_eq!(table.name, "test_table"); - assert_eq!(table.id, TableId(1)); - let schema = catalog.get_schema(table.id).await?; - assert_eq!(schema.attributes.len(), 2); - assert_eq!(schema.attributes[0].name, "id"); - assert_eq!(schema.attributes[0].kind, "INTEGER"); - assert_eq!(schema.attributes[1].name, "name"); - assert_eq!(schema.attributes[1].kind, "TEXT"); + pub async fn num_columns(&self, table_id: &TableIdent) -> Result { + let schema = self.get_current_table_schema(table_id).await?; + let field_ids = schema.identifier_field_ids(); - Ok(()) + Ok(field_ids.len()) } } diff --git a/optd-core/src/catalog/sqlite_memo.rs b/optd-core/src/catalog/sqlite_memo.rs new file mode 100644 index 0000000..71abe14 --- /dev/null +++ b/optd-core/src/catalog/sqlite_memo.rs @@ -0,0 +1,305 @@ +use std::sync::Arc; + +use sqlx::prelude::FromRow; + +use crate::storage::memo::SqliteMemo; + +#[trait_variant::make(Send)] +pub trait Catalog { + async fn create_database(&self, db_name: &str) -> anyhow::Result>; + + async fn get_database(&self, db_name: &str) -> anyhow::Result>; + + async fn create_namespace( + &self, + database_id: DatabaseId, + namespace_name: &str, + ) -> anyhow::Result>; + + async fn get_namespace( + &self, + db_name: &str, + namespace_name: &str, + ) -> anyhow::Result>; + + async fn create_table( + &self, + namespace_id: NamespaceId, + table_name: &str, + schema: &Schema, + ) -> anyhow::Result>; + + async fn get_table( + &self, + db_name: &str, + namespace_name: &str, + table_name: &str, + ) -> anyhow::Result>; + + async fn get_schema(&self, table_id: TableId) -> anyhow::Result; +} + +pub struct OptdCatalog { + storage: Arc, +} + +impl Catalog for OptdCatalog { + async fn create_database(&self, db_name: &str) -> anyhow::Result> { + let mut txn = self.storage.begin().await?; + let db: DatabaseMetadata = + sqlx::query_as("INSERT INTO database_metadata (name) VALUES (?) RETURNING id, name") + .bind(db_name) + .fetch_one(&mut *txn) + .await + .map_err(|e| { + anyhow::anyhow!("Failed to create database metadata for {}: {}", db_name, e) + })?; + txn.commit().await?; + Ok(Arc::new(db)) + } + + async fn get_database(&self, db_name: &str) -> anyhow::Result> { + let mut txn = self.storage.begin().await?; + let db: DatabaseMetadata = + sqlx::query_as("SELECT id, name FROM database_metadata WHERE name = ?") + .bind(db_name) + .fetch_one(&mut *txn) + .await + .map_err(|e| { + anyhow::anyhow!("Failed to get database metadata for {}: {}", db_name, e) + })?; + txn.commit().await?; + Ok(Arc::new(db)) + } + + async fn create_namespace( + &self, + database_id: DatabaseId, + namespace_name: &str, + ) -> anyhow::Result> { + let mut txn = self.storage.begin().await?; + let namespace: NamespaceMetadata = sqlx::query_as( + "INSERT INTO namespace_metadata (name, database_id) VALUES (?, ?) RETURNING id, name", + ) + .bind(namespace_name) + .bind(database_id) + .fetch_one(&mut *txn) + .await + .map_err(|e| { + anyhow::anyhow!( + "Failed to create namespace metadata for {}: {}", + namespace_name, + e + ) + })?; + txn.commit().await?; + Ok(Arc::new(namespace)) + } + + async fn get_namespace( + &self, + db_name: &str, + namespace_name: &str, + ) -> anyhow::Result> { + let mut txn = self.storage.begin().await?; + let namespace: NamespaceMetadata = sqlx::query_as( + "SELECT namespace_metadata.id, namespace_metadata.name FROM namespace_metadata, database_metadata WHERE database_metadata.name = ? and namspace_metadata.name = ? and namespace_metadata.database_id = database_metadata.id", + ) + .bind(db_name) + .bind(namespace_name) + .fetch_one(&mut *txn) + .await + .map_err(|e| { + anyhow::anyhow!( + "Failed to get namespace metadata for {}.{}: {}", + db_name, + namespace_name, + e + ) + })?; + txn.commit().await?; + Ok(Arc::new(namespace)) + } + + async fn create_table( + &self, + namespace_id: NamespaceId, + table_name: &str, + schema: &Schema, + ) -> anyhow::Result> { + let mut txn = self.storage.begin().await?; + let table: TableMetadata = sqlx::query_as( + "INSERT INTO table_metadata (name, namespace_id) VALUES (?, ?) RETURNING id, name", + ) + .bind(table_name) + .bind(namespace_id) + .fetch_one(&mut *txn) + .await + .map_err(|e| { + anyhow::anyhow!("Failed to create table metadata for {}: {}", table_name, e) + })?; + for (i, attribute) in schema.attributes.iter().enumerate() { + sqlx::query("INSERT INTO attributes (name, kind, table_id, base_attribute_number) VALUES (?, ?, ?, ?)") + .bind(&attribute.name) + .bind(&attribute.kind) + .bind(table.id) + .bind(i as i64) + .execute(&mut *txn) + .await + .map_err(|e| { + anyhow::anyhow!( + "Failed to create attribute metadata for {}.{}: {}", + table_name, + attribute.name, + e + ) + })?; + } + txn.commit().await?; + Ok(Arc::new(table)) + } + + async fn get_table( + &self, + db_name: &str, + namespace_name: &str, + table_name: &str, + ) -> anyhow::Result> { + let mut txn = self.storage.begin().await?; + let table: TableMetadata = sqlx::query_as( + "SELECT table_metadata.id, table_metadata.name FROM table_metadata, namespace_metadata, database_metadata WHERE database_metadata.name = ? and namspace_metadata.name = ? and namespace_metadata.database_id = database_metadata.id and table_metadata.namespace_id = namespace_metadata.id and table_metadata.name = ?", + ) + .bind(db_name) + .bind(namespace_name) + .bind(table_name) + .fetch_one(&mut *txn) + .await + .map_err(|e| { + anyhow::anyhow!( + "Failed to get table metadata for {}.{}.{}: {}", + db_name, + namespace_name, + table_name, + e + ) + })?; + txn.commit().await?; + Ok(Arc::new(table)) + } + + async fn get_schema(&self, table_id: TableId) -> anyhow::Result { + let mut txn = self.storage.begin().await?; + let attributes: Vec = sqlx::query_as( + "SELECT attributes.id, attributes.name, attributes.kind FROM attributes WHERE attributes.table_id = ?", + ) + .bind(table_id) + .fetch_all(&mut *txn) + .await + .map_err(|e| { + anyhow::anyhow!( + "Failed to get schema metadata for table {:?}: {}", + table_id, + e + ) + })?; + + Ok(Schema { attributes }) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] +#[repr(transparent)] +#[sqlx(transparent)] +pub struct DatabaseId(i64); + +#[derive(Debug, Clone, PartialEq, Eq, FromRow, sqlx::Type)] +pub struct DatabaseMetadata { + pub id: DatabaseId, + pub name: String, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] +#[repr(transparent)] +#[sqlx(transparent)] +pub struct NamespaceId(i64); + +#[derive(Debug, Clone, PartialEq, Eq, FromRow, sqlx::Type)] +pub struct NamespaceMetadata { + pub id: NamespaceId, + pub name: String, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] +#[repr(transparent)] +#[sqlx(transparent)] +pub struct TableId(i64); + +#[derive(Debug, Clone, PartialEq, Eq, FromRow, sqlx::Type)] +pub struct TableMetadata { + pub id: TableId, + pub name: String, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] +#[repr(transparent)] +#[sqlx(transparent)] +pub struct AttributeId(i64); + +#[derive(Debug, Clone, PartialEq, Eq, FromRow, sqlx::Type)] +pub struct Attribute { + /// The unique identifier for the attribute. + pub id: AttributeId, + /// The name of the attribute. + pub name: String, + /// The kind (data type) of the attribute. + pub kind: String, +} + +pub struct Schema { + /// The attributes in the schema. + pub attributes: Vec, +} + +#[cfg(test)] +mod tests { + + use super::*; + + #[tokio::test] + async fn test_schema() -> anyhow::Result<()> { + let storage = Arc::new(SqliteMemo::new_in_memory().await?); + let catalog = OptdCatalog { storage }; + + let db = catalog.create_database("test_db").await?; + let namespace = catalog.create_namespace(db.id, "test_namespace").await?; + let schema = Schema { + attributes: vec![ + Attribute { + id: AttributeId(1), + name: "id".to_string(), + kind: "INTEGER".to_string(), + }, + Attribute { + id: AttributeId(2), + name: "name".to_string(), + kind: "TEXT".to_string(), + }, + ], + }; + let table = catalog + .create_table(namespace.id, "test_table", &schema) + .await?; + + assert_eq!(table.name, "test_table"); + assert_eq!(table.id, TableId(1)); + + let schema = catalog.get_schema(table.id).await?; + assert_eq!(schema.attributes.len(), 2); + assert_eq!(schema.attributes[0].name, "id"); + assert_eq!(schema.attributes[0].kind, "INTEGER"); + assert_eq!(schema.attributes[1].name, "name"); + assert_eq!(schema.attributes[1].kind, "TEXT"); + + Ok(()) + } +} diff --git a/optd-datafusion/Cargo.toml b/optd-datafusion/Cargo.toml index 0055bc7..8cfd7f1 100644 --- a/optd-datafusion/Cargo.toml +++ b/optd-datafusion/Cargo.toml @@ -16,3 +16,6 @@ async-trait = "0.1.85" datafusion.workspace = true futures = "0.3.31" itertools = "0.14.0" + +iceberg = "0.4.0" +iceberg-catalog-memory = "0.4.0" diff --git a/optd-datafusion/src/converter/mod.rs b/optd-datafusion/src/converter/mod.rs deleted file mode 100644 index ba07afa..0000000 --- a/optd-datafusion/src/converter/mod.rs +++ /dev/null @@ -1,35 +0,0 @@ -use datafusion::{execution::SessionState, logical_expr::TableSource}; -use std::fmt::Debug; -use std::{collections::HashMap, sync::Arc}; - -pub mod from_optd; -pub mod into_optd; - -/// A context for converting between optd and datafusion. -/// The map is used to lookup table sources when converting TableScan operators from optd to -/// datafusion. -pub(crate) struct OptdContext { - /// Maps table names to table sources. - tables: HashMap>, - /// DataFusion session state. - session_state: SessionState, -} - -impl OptdContext { - /// Creates a new empty `OptdDataFusionContext` with the provided session state. - pub(crate) fn new(session_state: &SessionState) -> OptdContext { - OptdContext { - tables: HashMap::new(), - session_state: session_state.clone(), - } - } -} - -impl Debug for OptdContext { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("OptdContext") - .field("tables", &self.tables.keys()) - .field("session_state", &self.session_state) - .finish() - } -} diff --git a/optd-datafusion/src/df_conversion/context.rs b/optd-datafusion/src/df_conversion/context.rs new file mode 100644 index 0000000..ce3eba8 --- /dev/null +++ b/optd-datafusion/src/df_conversion/context.rs @@ -0,0 +1,31 @@ +use datafusion::catalog::TableProvider; +use datafusion::execution::SessionState; +use std::fmt::Debug; +use std::{collections::HashMap, sync::Arc}; + +/// A context for converting plans and expressions between `optd` and DataFusion. +pub(crate) struct OptdDFContext { + /// Maps table names to DataFusion [`TableProvider`]s. + pub(crate) providers: HashMap>, + /// DataFusion session state. + pub(crate) session_state: SessionState, +} + +impl OptdDFContext { + /// Creates a new empty `OptdDataFusionContext` with the provided session state. + pub(crate) fn new(session_state: &SessionState) -> OptdDFContext { + OptdDFContext { + providers: HashMap::new(), + session_state: session_state.clone(), + } + } +} + +impl Debug for OptdDFContext { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("OptdContext") + .field("tables", &self.providers.keys()) + .field("session_state", &self.session_state) + .finish() + } +} diff --git a/optd-datafusion/src/converter/from_optd.rs b/optd-datafusion/src/df_conversion/from_optd.rs similarity index 97% rename from optd-datafusion/src/converter/from_optd.rs rename to optd-datafusion/src/df_conversion/from_optd.rs index 9de171c..b361235 100644 --- a/optd-datafusion/src/converter/from_optd.rs +++ b/optd-datafusion/src/df_conversion/from_optd.rs @@ -1,9 +1,9 @@ +use super::context::OptdDFContext; use anyhow::bail; use async_recursion::async_recursion; use datafusion::{ arrow::datatypes::{Schema, SchemaRef}, common::JoinType, - datasource::source_as_provider, logical_expr::Operator, physical_plan::{ expressions::{BinaryExpr, Column, Literal, NegativeExpr, NotExpr}, @@ -20,9 +20,7 @@ use optd_core::{ }; use std::{collections::HashMap, str::FromStr, sync::Arc}; -use super::OptdContext; - -impl OptdContext { +impl OptdDFContext { #[async_recursion] pub(crate) async fn optd_to_df_relational( &self, @@ -30,8 +28,8 @@ impl OptdContext { ) -> anyhow::Result> { match &optimized_plan.operator { PhysicalOperator::TableScan(table_scan) => { - let source = self - .tables + let provider = self + .providers .get( table_scan .table_name @@ -39,7 +37,6 @@ impl OptdContext { .expect("Table name is not valid"), ) .ok_or_else(|| anyhow::anyhow!("Table not found"))?; - let provider = source_as_provider(source)?; // TODO(yuchen): support filters inside table scan. let filters = vec![]; diff --git a/optd-datafusion/src/converter/into_optd.rs b/optd-datafusion/src/df_conversion/into_optd.rs similarity index 96% rename from optd-datafusion/src/converter/into_optd.rs rename to optd-datafusion/src/df_conversion/into_optd.rs index 5bce3fa..570444d 100644 --- a/optd-datafusion/src/converter/into_optd.rs +++ b/optd-datafusion/src/df_conversion/into_optd.rs @@ -1,3 +1,4 @@ +use super::context::OptdDFContext; use anyhow::bail; use datafusion::{ common::DFSchema, @@ -21,9 +22,7 @@ use optd_core::{ }; use std::sync::Arc; -use super::OptdContext; - -impl OptdContext { +impl OptdDFContext { /// Given a DataFusion logical plan, returns an `optd` [`LogicalPlan`]. pub(crate) fn df_to_optd_relational( &mut self, @@ -33,8 +32,11 @@ impl OptdContext { DataFusionLogicalPlan::TableScan(table_scan) => { let table_name = table_scan.table_name.to_quoted_string(); - // Record the table name and source into the context. - self.tables.insert(table_name, table_scan.source.clone()); + // Record the table name and provider into the context. + self.providers.insert( + table_name, + datafusion::datasource::source_as_provider(&table_scan.source)?, + ); let combine_filters = conjunction(table_scan.filters.to_vec()); let predicate = match combine_filters { diff --git a/optd-datafusion/src/df_conversion/mod.rs b/optd-datafusion/src/df_conversion/mod.rs new file mode 100644 index 0000000..0381f2b --- /dev/null +++ b/optd-datafusion/src/df_conversion/mod.rs @@ -0,0 +1,5 @@ +mod from_optd; +mod into_optd; + +pub(crate) mod context; +pub(crate) mod session; diff --git a/optd-datafusion/src/optd_utils.rs b/optd-datafusion/src/df_conversion/session.rs similarity index 100% rename from optd-datafusion/src/optd_utils.rs rename to optd-datafusion/src/df_conversion/session.rs diff --git a/optd-datafusion/src/iceberg_conversion.rs b/optd-datafusion/src/iceberg_conversion.rs new file mode 100644 index 0000000..78856ae --- /dev/null +++ b/optd-datafusion/src/iceberg_conversion.rs @@ -0,0 +1,116 @@ +use datafusion::catalog::TableProvider; +use datafusion::common::arrow::datatypes::{DataType as DFType, Schema as DFSchema}; +use iceberg::spec::{NestedField, PrimitiveType, Schema, Type}; +use iceberg::{Catalog, NamespaceIdent, Result, TableCreation, TableIdent}; +use std::sync::atomic::{AtomicI32, Ordering}; +use std::{collections::HashMap, sync::Arc}; + +static NAMESPACE: &str = "default"; +static FIELD_ID: AtomicI32 = AtomicI32::new(0); + +// Given a map of table names to [`TableProvider`]s, ingest them into an Iceberg [`Catalog`]. +pub(crate) async fn ingest_providers( + catalog: &C, + providers: &HashMap>, +) -> Result<()> +where + C: Catalog, +{ + let namespace_ident = NamespaceIdent::from_vec(vec![NAMESPACE.to_string()]).unwrap(); + + for (name, provider) in providers { + // Create the table identifier. + let table_ident = TableIdent::new(namespace_ident.clone(), name.clone()); + + if catalog.table_exists(&table_ident).await? { + unimplemented!("Table update unimplemented") + } else { + let df_schema = provider.schema(); + let iceberg_schema = df_to_iceberg_schema(&df_schema); + + let create_table = TableCreation { + name: name.clone(), + schema: iceberg_schema, + location: None, + properties: df_schema.metadata.clone(), + partition_spec: None, + sort_order: None, + }; + + catalog.create_table(&namespace_ident, create_table).await?; + } + } + + todo!() +} + +/// Converts a DataFusion [`DFSchema`] to an Iceberg [`Schema`]. +fn df_to_iceberg_schema(df_schema: &DFSchema) -> Schema { + let fields = &df_schema.fields; + + let fields = fields.iter().map(|field| { + let field_name = field.name(); + let iceberg_type = df_to_iceberg_datatype(field.data_type()); + + Arc::new(NestedField { + id: FIELD_ID.fetch_add(1, Ordering::Relaxed), + name: field_name.clone(), + required: true, + field_type: Box::new(iceberg_type), + doc: None, + initial_default: None, + write_default: None, + }) + }); + + Schema::builder() + .with_fields(fields) + .build() + .expect("Failed to convert DataFusion schema to Iceberg schema") +} + +/// Converts a DataFusion [`DFType`] to an Iceberg [`Type`]. +/// +/// TODO(connor): Some of these are probably wrong. +/// +/// See: +/// - https://docs.rs/datafusion/latest/datafusion/common/arrow/datatypes/enum.DataType.html +/// - https://docs.rs/iceberg/latest/iceberg/spec/enum.Type.html +fn df_to_iceberg_datatype(df_datatype: &DFType) -> Type { + match df_datatype { + DFType::Null => unimplemented!("All Iceberg types are nullable"), + DFType::Boolean => Type::Primitive(PrimitiveType::Boolean), + DFType::Int8 => Type::Primitive(PrimitiveType::Int), + DFType::Int16 => Type::Primitive(PrimitiveType::Int), + DFType::Int32 => Type::Primitive(PrimitiveType::Int), + DFType::Int64 => Type::Primitive(PrimitiveType::Long), + DFType::UInt8 => Type::Primitive(PrimitiveType::Int), + DFType::UInt16 => Type::Primitive(PrimitiveType::Int), + DFType::UInt32 => Type::Primitive(PrimitiveType::Int), + DFType::UInt64 => Type::Primitive(PrimitiveType::Long), + DFType::Float16 => Type::Primitive(PrimitiveType::Float), + DFType::Float32 => Type::Primitive(PrimitiveType::Float), + DFType::Float64 => Type::Primitive(PrimitiveType::Double), + DFType::Timestamp(_, _) => Type::Primitive(PrimitiveType::Timestamp), + DFType::Date32 => Type::Primitive(PrimitiveType::Date), + DFType::Date64 => Type::Primitive(PrimitiveType::Date), + DFType::Time32(_) => Type::Primitive(PrimitiveType::Timestamp), + DFType::Time64(_) => Type::Primitive(PrimitiveType::Timestamp), + DFType::Binary => Type::Primitive(PrimitiveType::Binary), + DFType::FixedSizeBinary(bytes) => Type::Primitive(PrimitiveType::Fixed(*bytes as u64)), + DFType::LargeBinary => Type::Primitive(PrimitiveType::Binary), + DFType::BinaryView => Type::Primitive(PrimitiveType::Binary), + DFType::Utf8 => Type::Primitive(PrimitiveType::String), + DFType::LargeUtf8 => Type::Primitive(PrimitiveType::String), + DFType::Utf8View => Type::Primitive(PrimitiveType::String), + DFType::Decimal128(precision, scale) => Type::Primitive(PrimitiveType::Decimal { + precision: *precision as u32, + scale: *scale as u32, + }), + DFType::Decimal256(precision, scale) => Type::Primitive(PrimitiveType::Decimal { + precision: *precision as u32, + scale: *scale as u32, + }), + dt => unimplemented!("Unsupported data type: {:?}", dt), + } +} diff --git a/optd-datafusion/src/lib.rs b/optd-datafusion/src/lib.rs index c91187b..7511aeb 100644 --- a/optd-datafusion/src/lib.rs +++ b/optd-datafusion/src/lib.rs @@ -11,9 +11,9 @@ use datafusion::prelude::*; use futures::StreamExt; use std::time::SystemTime; -mod converter; +mod df_conversion; +mod iceberg_conversion; mod mock; -mod optd_utils; /// Given a string of SQL queries, run them pub async fn run_queries(queries: &[&str]) -> Result<()> { @@ -22,7 +22,7 @@ pub async fn run_queries(queries: &[&str]) -> Result<()> { // Create a DataFusion `SessionContext` that uses the `optd` optimizer to help created optimized // `ExecutionPlan`s. - let ctx = optd_utils::create_optd_session(Some(session_config), None, None) + let ctx = df_conversion::session::create_optd_session(Some(session_config), None, None) .await .unwrap(); diff --git a/optd-datafusion/src/mock.rs b/optd-datafusion/src/mock.rs index 656ffba..1c09d71 100644 --- a/optd-datafusion/src/mock.rs +++ b/optd-datafusion/src/mock.rs @@ -1,17 +1,19 @@ -use crate::converter::OptdContext; +use crate::df_conversion::context::OptdDFContext; use async_trait::async_trait; use datafusion::{ common::Result as DataFusionResult, execution::{context::QueryPlanner, SessionState}, logical_expr::{ - Explain, LogicalPlan as DataFusionLogicalPlan, PlanType as DataFusionPlanType, - ToStringifiedPlan, + Explain, LogicalPlan as DFLogicalPlan, PlanType as DFPlanType, ToStringifiedPlan, }, physical_plan::{displayable, explain::ExplainExec, ExecutionPlan}, physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner}, }; +use iceberg::io::FileIOBuilder; +use iceberg_catalog_memory::MemoryCatalog; use optd_core::{ cascades, + catalog::OptdCatalog, plans::{logical::LogicalPlan, physical::PhysicalPlan}, storage::memo::SqliteMemo, }; @@ -21,15 +23,20 @@ use std::sync::Arc; #[derive(Debug)] pub(crate) struct MockOptdOptimizer { /// The memo table used for dynamic programming during query optimization. - memo: SqliteMemo, + memo: Arc, + catalog: Arc>, } impl MockOptdOptimizer { /// Creates a new `optd` optimizer with an in-memory memo table. pub async fn new_in_memory() -> anyhow::Result { - Ok(Self { - memo: SqliteMemo::new_in_memory().await?, - }) + let memo = Arc::new(SqliteMemo::new_in_memory().await?); + + let file_io = FileIOBuilder::new("memory").build()?; + let memory_catalog = MemoryCatalog::new(file_io, None); + let catalog = Arc::new(OptdCatalog::new(memo.clone(), memory_catalog)); + + Ok(Self { memo, catalog }) } /// A mock optimization function for testing purposes. @@ -52,9 +59,13 @@ impl MockOptdOptimizer { &self, logical_plan: &LogicalPlan, ) -> anyhow::Result> { - let root_group_id = cascades::ingest_full_logical_plan(&self.memo, logical_plan).await?; - let goal_id = cascades::mock_optimize_relation_group(&self.memo, root_group_id).await?; - let optimized_plan = cascades::match_any_physical_plan(&self.memo, goal_id).await?; + let root_group_id = + cascades::ingest_full_logical_plan(self.memo.as_ref(), logical_plan).await?; + let goal_id = + cascades::mock_optimize_relation_group(self.memo.as_ref(), root_group_id).await?; + let optimized_plan = cascades::match_any_physical_plan(self.memo.as_ref(), goal_id).await?; + + std::hint::black_box(&self.catalog); Ok(optimized_plan) } @@ -63,11 +74,11 @@ impl MockOptdOptimizer { #[async_trait] impl QueryPlanner for MockOptdOptimizer { /// This function is the entry point for the physical planner. It will attempt to optimize the - /// given DataFusion [`DataFusionLogicalPlan`] using the `optd` optimizer. + /// given DataFusion [`DFLogicalPlan`] using the `optd` optimizer. /// - /// If the [`DataFusionLogicalPlan`] is a DML/DDL operation, it will fall back to the DataFusion planner. + /// If the [`DFLogicalPlan`] is a DML/DDL operation, it will fall back to the DataFusion planner. /// - /// Otherwise, this function will convert the DataFusion [`DataFusionLogicalPlan`] into an + /// Otherwise, this function will convert the DataFusion [`DFLogicalPlan`] into an /// `optd` [`LogicalPlan`] in order to pass it to the `optd` optimizer. /// /// Once `optd` has finished optimization, it will convert the output `optd` [`PhysicalPlan`] @@ -83,13 +94,12 @@ impl QueryPlanner for MockOptdOptimizer { /// DataFusion. async fn create_physical_plan( &self, - datafusion_logical_plan: &DataFusionLogicalPlan, + datafusion_logical_plan: &DFLogicalPlan, session_state: &SessionState, ) -> DataFusionResult> { // Fallback to the default DataFusion planner for DML/DDL operations. - if let DataFusionLogicalPlan::Dml(_) - | DataFusionLogicalPlan::Ddl(_) - | DataFusionLogicalPlan::EmptyRelation(_) = datafusion_logical_plan + if let DFLogicalPlan::Dml(_) | DFLogicalPlan::Ddl(_) | DFLogicalPlan::EmptyRelation(_) = + datafusion_logical_plan { return DefaultPhysicalPlanner::default() .create_physical_plan(datafusion_logical_plan, session_state) @@ -97,7 +107,7 @@ impl QueryPlanner for MockOptdOptimizer { } let (datafusion_logical_plan, _verbose, mut explains) = match datafusion_logical_plan { - DataFusionLogicalPlan::Explain(Explain { plan, verbose, .. }) => { + DFLogicalPlan::Explain(Explain { plan, verbose, .. }) => { (plan.as_ref(), *verbose, Some(Vec::new())) } _ => (datafusion_logical_plan, false, None), @@ -105,19 +115,28 @@ impl QueryPlanner for MockOptdOptimizer { if let Some(explains) = &mut explains { explains.push(datafusion_logical_plan.to_stringified( - DataFusionPlanType::OptimizedLogicalPlan { + DFPlanType::OptimizedLogicalPlan { optimizer_name: "datafusion".to_string(), }, )); } - let mut optd_ctx = OptdContext::new(session_state); + let mut optd_ctx = OptdDFContext::new(session_state); // convert the DataFusion logical plan to `optd`'s version of a `LogicalPlan`. let logical_plan = optd_ctx .df_to_optd_relational(datafusion_logical_plan) .expect("TODO FIX ERROR HANDLING"); + // The DataFusion to `optd` conversion will have read in all of the tables necessary to + // execute the query. Now we can update our own catalog with any new tables. + crate::iceberg_conversion::ingest_providers( + self.catalog.as_ref().catalog(), + &optd_ctx.providers, + ) + .await + .expect("Unable to ingest providers"); + // Run the `optd` optimizer on the `LogicalPlan`. let optd_optimized_physical_plan = self .mock_optimize(&logical_plan) @@ -132,12 +151,11 @@ impl QueryPlanner for MockOptdOptimizer { if let Some(mut explains) = explains { explains.push( - displayable(&*physical_plan) - .to_stringified(false, DataFusionPlanType::FinalPhysicalPlan), + displayable(&*physical_plan).to_stringified(false, DFPlanType::FinalPhysicalPlan), ); return Ok(Arc::new(ExplainExec::new( - DataFusionLogicalPlan::explain_schema(), + DFLogicalPlan::explain_schema(), explains, true, )));