Skip to content

Commit

Permalink
fix: optimize tables written by Spark
Browse files Browse the repository at this point in the history
Fixes #1648
  • Loading branch information
rtyler committed Feb 3, 2024
1 parent f85152b commit 2acb628
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 0 deletions.
1 change: 1 addition & 0 deletions crates/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ criterion = "0.5"
ctor = "0"
deltalake-test = { path = "../test", features = ["datafusion"] }
dotenvy = "0"
fs_extra = "1.2.0"
hyper = { version = "0.14", features = ["server"] }
maplit = "1"
pretty_assertions = "1.2.1"
Expand Down
25 changes: 25 additions & 0 deletions crates/core/src/operations/optimize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1514,5 +1514,30 @@ pub(super) mod zorder {
assert_eq!(data.value_data().len(), 3 * 16 * 3);
assert!(data.iter().all(|x| x.unwrap().len() == 3 * 16));
}

#[tokio::test]
async fn works_on_spark_table() {
use crate::DeltaOps;
use tempfile::TempDir;
// Create a temporary directory
let tmp_dir = TempDir::new().expect("Failed to make temp dir");
let table_name = "delta-1.2.1-only-struct-stats";

// Copy recursively from the test data directory to the temporary directory
let source_path = format!("../test/tests/data/{table_name}");
fs_extra::dir::copy(source_path, tmp_dir.path(), &Default::default()).unwrap();

// Run optimize
let (_, metrics) =
DeltaOps::try_from_uri(tmp_dir.path().join(table_name).to_str().unwrap())
.await
.unwrap()
.optimize()
.await
.unwrap();

// Verify it worked
assert_eq!(metrics.num_files_added, 1);
}
}
}

0 comments on commit 2acb628

Please sign in to comment.