Skip to content

Commit

Permalink
remove dependency on six, drop support for python 3.7, update dev dep…
Browse files Browse the repository at this point in the history
…dendencies

update pytest describe

add flexible terminal string formatter

start swapping in default formats

start phasing out underline_cells

wip

wip

demonstrate how custom formatting can be injected via pytest fixtures

wip

make sure format_string works with no formats

add new custom formatter to readme

bump to v0.10.0

New Chispa interface (#94)

* add formats to dataframe comparer

* add new chispa interface

lock

run tests for multiple python versions

small fix

add runs-on argument

fix

reset ci
  • Loading branch information
fpgmaas committed Jul 14, 2024
1 parent bd822a4 commit 4b7d453
Show file tree
Hide file tree
Showing 9 changed files with 437 additions and 1,282 deletions.
49 changes: 49 additions & 0 deletions .github/actions/setup-poetry-env/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
name: "setup-poetry-env"
description: "Composite action to setup the Python and poetry environment."

inputs:
python-version:
required: false
description: "The python version to use"
default: "3.11"

runs:
using: "composite"
steps:
- name: Set up python
uses: actions/setup-python@v4
with:
python-version: ${{ inputs.python-version }}

- name: Install Poetry
env:
# renovate: datasource=pypi depName=poetry
POETRY_VERSION: "1.5.1"
run: curl -sSL https://install.python-poetry.org | python - -y
shell: bash

- name: Add Poetry to Path
run: echo "$HOME/.local/bin" >> $GITHUB_PATH
if: ${{ matrix.os != 'Windows' }}
shell: bash

- name: Add Poetry to Path
run: echo "$APPDATA\Python\Scripts" >> $GITHUB_PATH
if: ${{ matrix.os == 'Windows' }}
shell: bash

- name: Configure Poetry virtual environment in project
run: poetry config virtualenvs.in-project true
shell: bash

- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v3
with:
path: .venv
key: venv-${{ runner.os }}-${{ inputs.python-version }}-${{ hashFiles('poetry.lock') }}

- name: Install dependencies
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction
shell: bash
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ chispa.egg-info/
tmp/
.idea/
.DS_Store
.python_version

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
1 change: 0 additions & 1 deletion .python-version

This file was deleted.

3 changes: 0 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -475,9 +475,6 @@ TODO: Need to benchmark these methods vs. the spark-testing-base ones

## Vendored dependencies

These dependencies are vendored:

* [six](https:/benjaminp/six)
* [PrettyTable](https:/jazzband/prettytable)

The dependencies are vendored to save you from dependency hell.
Expand Down
11 changes: 6 additions & 5 deletions chispa/rows_comparer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import chispa.six as six
from itertools import zip_longest
from chispa.prettytable import PrettyTable
from chispa.bcolors import *
import chispa
Expand All @@ -11,8 +11,9 @@
def assert_basic_rows_equality(rows1, rows2, underline_cells=False, formats=DefaultFormats()):
if rows1 != rows2:
t = PrettyTable(["df1", "df2"])
zipped = list(six.moves.zip_longest(rows1, rows2))
zipped = list(zip_longest(rows1, rows2))
all_rows_equal = True

for r1, r2 in zipped:
if r1 is None and r2 is not None:
t.add_row([None, format_string(r2, formats.mismatched_rows)])
Expand All @@ -21,7 +22,7 @@ def assert_basic_rows_equality(rows1, rows2, underline_cells=False, formats=Defa
t.add_row([format_string(r1, formats.mismatched_rows), None])
all_rows_equal = False
else:
r_zipped = list(six.moves.zip_longest(r1.__fields__, r2.__fields__))
r_zipped = list(zip_longest(r1.__fields__, r2.__fields__))
r1_string = []
r2_string = []
for r1_field, r2_field in r_zipped:
Expand All @@ -43,7 +44,7 @@ def assert_basic_rows_equality(rows1, rows2, underline_cells=False, formats=Defa
def assert_generic_rows_equality(rows1, rows2, row_equality_fun, row_equality_fun_args, underline_cells=False, formats=DefaultFormats()):
df1_rows = rows1
df2_rows = rows2
zipped = list(six.moves.zip_longest(df1_rows, df2_rows))
zipped = list(zip_longest(df1_rows, df2_rows))
t = PrettyTable(["df1", "df2"])
all_rows_equal = True
for r1, r2 in zipped:
Expand All @@ -58,7 +59,7 @@ def assert_generic_rows_equality(rows1, rows2, row_equality_fun, row_equality_fu
t.add_row([format_string(r1_string, formats.matched_rows), format_string(r2_string, formats.matched_rows)])
# otherwise, rows aren't equal
else:
r_zipped = list(six.moves.zip_longest(r1.__fields__, r2.__fields__))
r_zipped = list(zip_longest(r1.__fields__, r2.__fields__))
r1_string = []
r2_string = []
for r1_field, r2_field in r_zipped:
Expand Down
12 changes: 6 additions & 6 deletions chispa/schema_comparer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from chispa.prettytable import PrettyTable
from chispa.bcolors import *
import chispa.six as six
from itertools import zip_longest


class SchemasNotEqualError(Exception):
Expand All @@ -19,15 +19,15 @@ def assert_schema_equality_full(s1, s2, ignore_nullable=False, ignore_metadata=F
def inner(s1, s2, ignore_nullable, ignore_metadata):
if len(s1) != len(s2):
return False
zipped = list(six.moves.zip_longest(s1, s2))
zipped = list(zip_longest(s1, s2))
for sf1, sf2 in zipped:
if not are_structfields_equal(sf1, sf2, ignore_nullable, ignore_metadata):
return False
return True

if not inner(s1, s2, ignore_nullable, ignore_metadata):
t = PrettyTable(["schema1", "schema2"])
zipped = list(six.moves.zip_longest(s1, s2))
zipped = list(zip_longest(s1, s2))
for sf1, sf2 in zipped:
if are_structfields_equal(sf1, sf2, True):
t.add_row([blue(sf1), blue(sf2)])
Expand All @@ -42,7 +42,7 @@ def inner(s1, s2, ignore_nullable, ignore_metadata):
def assert_basic_schema_equality(s1, s2):
if s1 != s2:
t = PrettyTable(["schema1", "schema2"])
zipped = list(six.moves.zip_longest(s1, s2))
zipped = list(zip_longest(s1, s2))
for sf1, sf2 in zipped:
if sf1 == sf2:
t.add_row([blue(sf1), blue(sf2)])
Expand All @@ -56,7 +56,7 @@ def assert_basic_schema_equality(s1, s2):
def assert_schema_equality_ignore_nullable(s1, s2):
if not are_schemas_equal_ignore_nullable(s1, s2):
t = PrettyTable(["schema1", "schema2"])
zipped = list(six.moves.zip_longest(s1, s2))
zipped = list(zip_longest(s1, s2))
for sf1, sf2 in zipped:
if are_structfields_equal(sf1, sf2, True):
t.add_row([blue(sf1), blue(sf2)])
Expand All @@ -69,7 +69,7 @@ def assert_schema_equality_ignore_nullable(s1, s2):
def are_schemas_equal_ignore_nullable(s1, s2):
if len(s1) != len(s2):
return False
zipped = list(six.moves.zip_longest(s1, s2))
zipped = list(zip_longest(s1, s2))
for sf1, sf2 in zipped:
if not are_structfields_equal(sf1, sf2, True):
return False
Expand Down
Loading

0 comments on commit 4b7d453

Please sign in to comment.