ci(mypy): add mypy check and adjust code for types (#439)

* ci(mypy): add mypy check and adjust code for types * remove redefinitions config * update stubs and ignores * update output for compression typing * update expanduser types for cell_locations * add ignore for change in types * adjust for typing * update celllocations typing * remove ignores * add missing library ignore settings; fix aggregate * add test condition for aggregate
cytomining · Sep 28, 2024 · af07d49 · af07d49
1 parent 2fd5ca3
commit af07d49
Show file tree

Hide file tree

Showing 11 changed files with 257 additions and 25 deletions.
diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
@@ -56,6 +56,28 @@ jobs:
  uses: pre-commit/[email protected]
  with:
  extra_args: --all-files
+ python-type-checks:
+ # This job is used to check Python types
+ name: Python type checks
+ # Avoid fail-fast to retain output
+ strategy:
+ fail-fast: false
+ runs-on: ubuntu-22.04
+ if: github.event_name != 'schedule'
+ steps:
+ - name: Checkout repo
+ uses: actions/checkout@v4
+ - name: Setup python, and check pre-commit cache
+ uses: ./.github/actions/setup-env
+ with:
+ python-version: ${{ env.TARGET_PYTHON_VERSION }}
+ cache-pre-commit: false
+ cache-venv: true
+ setup-poetry: true
+ install-deps: true
+ - name: Run mypy
+ run: |
+ poetry run mypy .
  integration-test:
  name: Pytest (Python ${{ matrix.python-version }} on ${{ matrix.os }})
  # Runs pytest on all tested versions of python and OSes

diff --git a/poetry.lock b/poetry.lock
diff --git a/pycytominer/aggregate.py b/pycytominer/aggregate.py
@@ -83,7 +83,7 @@ def aggregate(
  # Only extract single object column in preparation for count
  if compute_object_count:
  count_object_df = (
- population_df.loc[:, np.union1d(strata, [object_feature])]
+ population_df.loc[:, list(np.union1d(strata, [object_feature]))]
  .groupby(strata)[object_feature]
  .count()
  .reset_index()
@@ -92,7 +92,9 @@ def aggregate(
 
  if features == "infer":
  features = infer_cp_features(population_df)
- population_df = population_df[features]
+
+ # recast as dataframe to protect against scenarios where a series may be returned
+ population_df = pd.DataFrame(population_df[features])
 
  # Fix dtype of input features (they should all be floats!)
  population_df = population_df.astype(float)
@@ -101,7 +103,9 @@ def aggregate(
  population_df = pd.concat([strata_df, population_df], axis="columns")
 
  # Perform aggregating function
- population_df = population_df.groupby(strata, dropna=False)
+ # Note: type ignore added below to address the change in variable types for
+ # label `population_df`.
+ population_df = population_df.groupby(strata, dropna=False) # type: ignore[assignment]
 
  if operation == "median":
  population_df = population_df.median().reset_index()
@@ -118,10 +122,10 @@ def aggregate(
  for column in population_df.columns
  if column in ["ImageNumber", "ObjectNumber"]
  ]:
- population_df = population_df.drop([columns_to_drop], axis="columns")
+ population_df = population_df.drop(columns=columns_to_drop, axis="columns")
 
  if output_file is not None:
- output(
+ return output(
  df=population_df,
  output_filename=output_file,
  output_type=output_type,

diff --git a/pycytominer/cyto_utils/DeepProfiler_processing.py b/pycytominer/cyto_utils/DeepProfiler_processing.py
@@ -7,8 +7,9 @@
 import pandas as pd
 import warnings
 
-from pycytominer import aggregate, normalize
-from pycytominer.cyto_utils import (
+# use mypy ignores below to avoid duplicate import warnings
+from pycytominer import aggregate, normalize # type: ignore[no-redef]
+from pycytominer.cyto_utils import ( # type: ignore[no-redef]
  load_npz_features,
  load_npz_locations,
  infer_cp_features,

diff --git a/pycytominer/cyto_utils/cell_locations.py b/pycytominer/cyto_utils/cell_locations.py
@@ -106,7 +106,9 @@ def __init__(
  "s3", config=botocore.config.Config(signature_version=botocore.UNSIGNED)
  )
 
- def _expanduser(self, obj: Union[str, None]):
+ def _expanduser(
+ self, obj: Union[str, pd.DataFrame, sqlalchemy.engine.Engine, None]
+ ):
  """Expand the user home directory in a path"""
  if obj is not None and isinstance(obj, str) and not obj.startswith("s3://"):
  return pathlib.Path(obj).expanduser().as_posix()

diff --git a/pycytominer/cyto_utils/cells.py b/pycytominer/cyto_utils/cells.py
@@ -714,7 +714,7 @@ def merge_single_cells(
  """
 
  # Load the single cell dataframe by merging on the specific linking columns
- sc_df = ""
+ left_compartment_loaded = False
  linking_check_cols = []
  merge_suffix_rename = []
  for left_compartment in self.compartment_linking_cols:
@@ -737,7 +737,7 @@ def merge_single_cells(
  left_compartment
  ]
 
- if isinstance(sc_df, str):
+ if not left_compartment_loaded:
  sc_df = self.load_compartment(compartment=left_compartment)
 
  if compute_subsample:
@@ -752,6 +752,8 @@ def merge_single_cells(
  sc_df, how="left", on=subset_logic_df.columns.tolist()
  ).reindex(sc_df.columns, axis="columns")
 
+ left_compartment_loaded = True
+
  sc_df = sc_df.merge(
  self.load_compartment(compartment=right_compartment),
  left_on=[*self.merge_cols, left_link_col],
@@ -804,11 +806,13 @@ def merge_single_cells(
 
  normalize_args["features"] = features
 
- sc_df = normalize(profiles=sc_df, **normalize_args)
+ # ignore mypy warnings below as these reference root package imports
+ sc_df = normalize(profiles=sc_df, **normalize_args) # type: ignore[operator]
 
  # In case platemap metadata is provided, use pycytominer.annotate for metadata
  if platemap is not None:
- sc_df = annotate(
+ # ignore mypy warnings below as these reference root package imports
+ sc_df = annotate( # type: ignore[operator]
  profiles=sc_df, platemap=platemap, output_file=None, **kwargs
  )
 

diff --git a/pycytominer/cyto_utils/collate.py b/pycytominer/cyto_utils/collate.py
@@ -131,7 +131,7 @@ def collate(
  with sqlite3.connect(cache_backend_file, isolation_level=None) as connection:
  cursor = connection.cursor()
  if column:
- if print:
+ if printtoscreen:
  print(f"Adding a Metadata_Plate column based on column {column}")
  cursor.execute("ALTER TABLE Image ADD COLUMN Metadata_Plate TEXT;")
  cursor.execute(f"UPDATE image SET Metadata_Plate ={column};")