remove dependency on six, drop support for python 3.7, update dev dep…

…dendencies update pytest describe add flexible terminal string formatter start swapping in default formats start phasing out underline_cells wip wip demonstrate how custom formatting can be injected via pytest fixtures wip make sure format_string works with no formats add new custom formatter to readme bump to v0.10.0 New Chispa interface (#94) * add formats to dataframe comparer * add new chispa interface lock run tests for multiple python versions small fix add runs-on argument fix reset ci
MrPowers · Jul 14, 2024 · 4b7d453 · 4b7d453
1 parent bd822a4
commit 4b7d453
Show file tree

Hide file tree

Showing 9 changed files with 437 additions and 1,282 deletions.
diff --git a/.github/actions/setup-poetry-env/action.yml b/.github/actions/setup-poetry-env/action.yml
@@ -0,0 +1,49 @@
+name: "setup-poetry-env"
+description: "Composite action to setup the Python and poetry environment."
+
+inputs:
+ python-version:
+ required: false
+ description: "The python version to use"
+ default: "3.11"
+
+runs:
+ using: "composite"
+ steps:
+ - name: Set up python
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ inputs.python-version }}
+
+ - name: Install Poetry
+ env:
+ # renovate: datasource=pypi depName=poetry
+ POETRY_VERSION: "1.5.1"
+ run: curl -sSL https://install.python-poetry.org | python - -y
+ shell: bash
+
+ - name: Add Poetry to Path
+ run: echo "$HOME/.local/bin" >> $GITHUB_PATH
+ if: ${{ matrix.os != 'Windows' }}
+ shell: bash
+
+ - name: Add Poetry to Path
+ run: echo "$APPDATA\Python\Scripts" >> $GITHUB_PATH
+ if: ${{ matrix.os == 'Windows' }}
+ shell: bash
+
+ - name: Configure Poetry virtual environment in project
+ run: poetry config virtualenvs.in-project true
+ shell: bash
+
+ - name: Load cached venv
+ id: cached-poetry-dependencies
+ uses: actions/cache@v3
+ with:
+ path: .venv
+ key: venv-${{ runner.os }}-${{ inputs.python-version }}-${{ hashFiles('poetry.lock') }}
+
+ - name: Install dependencies
+ if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
+ run: poetry install --no-interaction
+ shell: bash
diff --git a/.gitignore b/.gitignore
@@ -5,6 +5,7 @@ chispa.egg-info/
 tmp/
 .idea/
 .DS_Store
+.python_version
 
 # Byte-compiled / optimized / DLL files
 __pycache__/

diff --git a/.python-version b/.python-version
diff --git a/README.md b/README.md
@@ -475,9 +475,6 @@ TODO: Need to benchmark these methods vs. the spark-testing-base ones
 
 ## Vendored dependencies
 
-These dependencies are vendored:
-
-* [six](https:/benjaminp/six)
 * [PrettyTable](https:/jazzband/prettytable)
 
 The dependencies are vendored to save you from dependency hell.

diff --git a/chispa/rows_comparer.py b/chispa/rows_comparer.py
@@ -1,4 +1,4 @@
-import chispa.six as six
+from itertools import zip_longest
 from chispa.prettytable import PrettyTable
 from chispa.bcolors import *
 import chispa
@@ -11,8 +11,9 @@
 def assert_basic_rows_equality(rows1, rows2, underline_cells=False, formats=DefaultFormats()):
  if rows1 != rows2:
  t = PrettyTable(["df1", "df2"])
- zipped = list(six.moves.zip_longest(rows1, rows2))
+ zipped = list(zip_longest(rows1, rows2))
  all_rows_equal = True
+
  for r1, r2 in zipped:
  if r1 is None and r2 is not None:
  t.add_row([None, format_string(r2, formats.mismatched_rows)])
@@ -21,7 +22,7 @@ def assert_basic_rows_equality(rows1, rows2, underline_cells=False, formats=Defa
  t.add_row([format_string(r1, formats.mismatched_rows), None])
  all_rows_equal = False
  else:
- r_zipped = list(six.moves.zip_longest(r1.__fields__, r2.__fields__))
+ r_zipped = list(zip_longest(r1.__fields__, r2.__fields__))
  r1_string = []
  r2_string = []
  for r1_field, r2_field in r_zipped:
@@ -43,7 +44,7 @@ def assert_basic_rows_equality(rows1, rows2, underline_cells=False, formats=Defa
 def assert_generic_rows_equality(rows1, rows2, row_equality_fun, row_equality_fun_args, underline_cells=False, formats=DefaultFormats()):
  df1_rows = rows1
  df2_rows = rows2
- zipped = list(six.moves.zip_longest(df1_rows, df2_rows))
+ zipped = list(zip_longest(df1_rows, df2_rows))
  t = PrettyTable(["df1", "df2"])
  all_rows_equal = True
  for r1, r2 in zipped:
@@ -58,7 +59,7 @@ def assert_generic_rows_equality(rows1, rows2, row_equality_fun, row_equality_fu
  t.add_row([format_string(r1_string, formats.matched_rows), format_string(r2_string, formats.matched_rows)])
  # otherwise, rows aren't equal
  else:
- r_zipped = list(six.moves.zip_longest(r1.__fields__, r2.__fields__))
+ r_zipped = list(zip_longest(r1.__fields__, r2.__fields__))
  r1_string = []
  r2_string = []
  for r1_field, r2_field in r_zipped:

diff --git a/chispa/schema_comparer.py b/chispa/schema_comparer.py
@@ -1,6 +1,6 @@
 from chispa.prettytable import PrettyTable
 from chispa.bcolors import *
-import chispa.six as six
+from itertools import zip_longest
 
 
 class SchemasNotEqualError(Exception):
@@ -19,15 +19,15 @@ def assert_schema_equality_full(s1, s2, ignore_nullable=False, ignore_metadata=F
  def inner(s1, s2, ignore_nullable, ignore_metadata):
  if len(s1) != len(s2):
  return False
- zipped = list(six.moves.zip_longest(s1, s2))
+ zipped = list(zip_longest(s1, s2))
  for sf1, sf2 in zipped:
  if not are_structfields_equal(sf1, sf2, ignore_nullable, ignore_metadata):
  return False
  return True
 
  if not inner(s1, s2, ignore_nullable, ignore_metadata):
  t = PrettyTable(["schema1", "schema2"])
- zipped = list(six.moves.zip_longest(s1, s2))
+ zipped = list(zip_longest(s1, s2))
  for sf1, sf2 in zipped:
  if are_structfields_equal(sf1, sf2, True):
  t.add_row([blue(sf1), blue(sf2)])
@@ -42,7 +42,7 @@ def inner(s1, s2, ignore_nullable, ignore_metadata):
 def assert_basic_schema_equality(s1, s2):
  if s1 != s2:
  t = PrettyTable(["schema1", "schema2"])
- zipped = list(six.moves.zip_longest(s1, s2))
+ zipped = list(zip_longest(s1, s2))
  for sf1, sf2 in zipped:
  if sf1 == sf2:
  t.add_row([blue(sf1), blue(sf2)])
@@ -56,7 +56,7 @@ def assert_basic_schema_equality(s1, s2):
 def assert_schema_equality_ignore_nullable(s1, s2):
  if not are_schemas_equal_ignore_nullable(s1, s2):
  t = PrettyTable(["schema1", "schema2"])
- zipped = list(six.moves.zip_longest(s1, s2))
+ zipped = list(zip_longest(s1, s2))
  for sf1, sf2 in zipped:
  if are_structfields_equal(sf1, sf2, True):
  t.add_row([blue(sf1), blue(sf2)])
@@ -69,7 +69,7 @@ def assert_schema_equality_ignore_nullable(s1, s2):
 def are_schemas_equal_ignore_nullable(s1, s2):
  if len(s1) != len(s2):
  return False
- zipped = list(six.moves.zip_longest(s1, s2))
+ zipped = list(zip_longest(s1, s2))
  for sf1, sf2 in zipped:
  if not are_structfields_equal(sf1, sf2, True):
  return False