From 2acb62849aafd69e5ec766016a951bf964f4eb1b Mon Sep 17 00:00:00 2001 From: "R. Tyler Croy" Date: Fri, 2 Feb 2024 21:45:05 -0800 Subject: [PATCH] fix: optimize tables written by Spark Fixes #1648 --- crates/core/Cargo.toml | 1 + crates/core/src/operations/optimize.rs | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 4bd58a492c..d394c8a027 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -105,6 +105,7 @@ criterion = "0.5" ctor = "0" deltalake-test = { path = "../test", features = ["datafusion"] } dotenvy = "0" +fs_extra = "1.2.0" hyper = { version = "0.14", features = ["server"] } maplit = "1" pretty_assertions = "1.2.1" diff --git a/crates/core/src/operations/optimize.rs b/crates/core/src/operations/optimize.rs index 4aa64ba962..099d0b40ca 100644 --- a/crates/core/src/operations/optimize.rs +++ b/crates/core/src/operations/optimize.rs @@ -1514,5 +1514,30 @@ pub(super) mod zorder { assert_eq!(data.value_data().len(), 3 * 16 * 3); assert!(data.iter().all(|x| x.unwrap().len() == 3 * 16)); } + + #[tokio::test] + async fn works_on_spark_table() { + use crate::DeltaOps; + use tempfile::TempDir; + // Create a temporary directory + let tmp_dir = TempDir::new().expect("Failed to make temp dir"); + let table_name = "delta-1.2.1-only-struct-stats"; + + // Copy recursively from the test data directory to the temporary directory + let source_path = format!("../test/tests/data/{table_name}"); + fs_extra::dir::copy(source_path, tmp_dir.path(), &Default::default()).unwrap(); + + // Run optimize + let (_, metrics) = + DeltaOps::try_from_uri(tmp_dir.path().join(table_name).to_str().unwrap()) + .await + .unwrap() + .optimize() + .await + .unwrap(); + + // Verify it worked + assert_eq!(metrics.num_files_added, 1); + } } }