Skip to content

Commit

Permalink
add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
guilhem-dvr committed Oct 3, 2023
1 parent fdabd1a commit 92e0587
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 2 deletions.
4 changes: 2 additions & 2 deletions python/deltalake/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,8 +674,8 @@ def delete(self, predicate: Optional[str] = None) -> Dict[str, Any]:
that contain records that satisfy the predicate. Once files are determined
they are rewritten without the records.
:param predicate: a logical expression, defaults to None
:return: the metrics from delete
:param predicate: a SQL where clause. If not passed, will delete all rows.
:return: the metrics from delete.
"""
metrics = self._table.delete(predicate)
return json.loads(metrics)
Expand Down
62 changes: 62 additions & 0 deletions python/tests/test_delete.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# test delete with:
# 1. no predicates
# 2. partition where predicate
# 3. in-file where predicate


import pathlib

import pyarrow as pa
import pyarrow.compute as pc

from deltalake.table import DeltaTable
from deltalake.writer import write_deltalake


def test_delete_no_predicates(existing_table: DeltaTable):
old_version = existing_table.version()

existing_table.delete()

last_action = existing_table.history(1)[0]
assert last_action["operation"] == "DELETE"
assert existing_table.version() == old_version + 1

dataset = existing_table.to_pyarrow_dataset()
assert dataset.count_rows() == 0


def test_delete_a_partition(tmp_path: pathlib.Path, sample_data: pa.Table):
write_deltalake(tmp_path, sample_data, partition_by=["bool"])

dt = DeltaTable(tmp_path)
old_version = dt.version()

expr = pc.field("bool") != pc.scalar(True)
expected_table = sample_data.filter(expr)

dt.delete(predicate="bool = true")

last_action = dt.history(1)[0]
assert last_action["operation"] == "DELETE"
assert dt.version() == old_version + 1

table = dt.to_pyarrow_table()
assert table.equals(expected_table)
assert len(dt.files()) == 1


def test_delete_some_rows(existing_table: DeltaTable):
old_version = existing_table.version()

expr = ~pc.field("utf8").isin(["0", "1"])
expected_table = existing_table.to_pyarrow_table().filter(expr)

existing_table.delete(predicate="utf8 in ('0', '1')")

last_action = existing_table.history(1)[0]
assert last_action["operation"] == "DELETE"
assert existing_table.version() == old_version + 1

table = existing_table.to_pyarrow_table()
assert table.equals(expected_table)

0 comments on commit 92e0587

Please sign in to comment.