TST: Test, document case of residue ID >99,999

openforcefield · Aug 6, 2024 · 197240d · 197240d
1 parent ba56c58
commit 197240d
Show file tree

Hide file tree

Showing 5 changed files with 54 additions and 8 deletions.
diff --git a/devtools/conda-envs/dev_env.yaml b/devtools/conda-envs/dev_env.yaml
@@ -4,7 +4,7 @@ channels:
  - openeye
 dependencies:
  # Core
- - python =3.10
+ - python
  - pip
  - numpy
  - pydantic =2
@@ -34,6 +34,8 @@ dependencies:
  # Drivers
  - gromacs
  - lammps >=2023.08.02
+ # https:/conda-forge/lammps-feedstock/issues/207
+ - openmpi =4
  - panedr
  # Typing
  - mypy

diff --git a/openff/interchange/_tests/unit_tests/interop/gromacs/export/test_export.py b/openff/interchange/_tests/unit_tests/interop/gromacs/export/test_export.py
@@ -45,7 +45,7 @@ class _NeedsGROMACS:
 def parse_residue_ids(file) -> list[str]:
  """Parse residue IDs, and only the residue IDs, from a GROMACS .gro file."""
  with open(file) as f:
- ids = [int(line[:5]) for line in f.readlines()[2:-1]]
+ ids = [line[:5].strip() for line in f.readlines()[2:-1]]
 
  return ids
 
@@ -205,13 +205,44 @@ def test_fill_in_residue_ids(self, sage):
  ),
  ).to_gro("fill.gro")
 
- expected_residue_ids = 8 * [1] + 3 * [2] + 5 * [3]
+ expected_residue_ids = 8 * ["1"] + 3 * ["2"] + 5 * ["3"]
 
  found_residue_ids = parse_residue_ids("fill.gro")
 
  for expected, found in zip(expected_residue_ids, found_residue_ids):
  assert expected == found
 
+ def test_atom_index_gt_100_000(self, water, sage):
+ """Ensure that atom indices are written correctly for large numbers."""
+ water.add_hierarchy_scheme(
+ ("residue_name", "residue_number"),
+ "residues",
+ )
+
+ topology = Topology.from_molecules(2 * [water])
+
+ topology.box_vectors = unit.Quantity([4, 4, 4], unit.nanometer)
+
+ # Can't just update atoms' metadata, neeed to create these scheme/element objects
+ # and need to also modify the residue objects themselves
+ for molecule_index, molecule in enumerate(topology.molecules):
+ for atom in molecule.atoms:
+ atom.metadata["residue_number"] = str(molecule_index + 123_456)
+
+ for residue_index, residue in enumerate(topology.residues):
+ residue.residue_number = str(residue_index + 123_456)
+
+ interchange = sage.create_interchange(topology)
+
+ interchange.to_gro("large.gro")
+
+ expected_residue_ids = 3 * ["23456"] + 3 * ["23457"]
+
+ found_residue_ids = parse_residue_ids("large.gro")
+
+ for expected, found in zip(expected_residue_ids, found_residue_ids):
+ assert expected == found
+
  @pytest.mark.slow
  def test_atom_names_pdb(self):
  peptide = Molecule.from_polymer_pdb(

diff --git a/openff/interchange/_tests/unit_tests/interop/openmm/test_virtual_sites.py b/openff/interchange/_tests/unit_tests/interop/openmm/test_virtual_sites.py
@@ -330,7 +330,6 @@ def test_tip5p_num_exceptions(self, water, tip5p, combine, n_molecules):
  # Safeguard against some of the behavior seen in #919
  for index in range(num_exceptions):
  p1, p2, *_ = force.getExceptionParameters(index)
- print(p1, p2)
 
  if sorted([p1, p2]) == [0, 3]:
  raise Exception(

diff --git a/openff/interchange/components/interchange.py b/openff/interchange/components/interchange.py
@@ -446,6 +446,8 @@ def to_gromacs(
  Molecule names in written files are not guaranteed to match the `Moleclue.name` attribute of the
  molecules in the topology, especially if they are empty strings or not unique.
 
+ See `to_gro` and `to_top` for more details.
+
  """
  from openff.interchange.interop.gromacs.export._export import GROMACSWriter
  from openff.interchange.smirnoff._gromacs import _convert
@@ -505,6 +507,16 @@ def to_gro(self, file_path: Path | str, decimal: int = 3):
  decimal: int, default=3
  The number of decimal places to use when writing the GROMACS coordinate file.
 
+ Residue IDs must be positive integers (or string representations thereof).
+
+ Residue IDs greater than 99,999 are reduced modulo 100,000 in line with common GROMACS practice.
+
+ Residue names and IDs from the topology are used, if present, and otherwise are generated internally.
+
+ Behavior when some, but not all, residue names and IDs are present in the topology is undefined.
+
+ If residue IDs are generated internally, they are assigned sequentially to each molecule starting at 1.
+
  """
  from openff.interchange.interop.gromacs.export._export import GROMACSWriter
  from openff.interchange.smirnoff._gromacs import _convert

diff --git a/openff/interchange/interop/gromacs/export/_export.py b/openff/interchange/interop/gromacs/export/_export.py
@@ -200,7 +200,7 @@ def _write_atoms(
  top.write(
  f"{atom.index :6d} "
  f"{mapping_to_reduced_atom_types[atom.atom_type] :6s}"
- f"{atom.residue_index :8d} "
+ f"{atom.residue_index % 100_000 :8d} "
  f"{atom.residue_name :8s} "
  f"{atom.name :6s}"
  f"{atom.charge_group_number :6d}"
@@ -211,7 +211,7 @@ def _write_atoms(
  top.write(
  f"{atom.index :6d} "
  f"{atom.atom_type :6s}"
- f"{atom.residue_index :8d} "
+ f"{atom.residue_index % 100_000 :8d} "
  f"{atom.residue_name :8s} "
  f"{atom.name :6s}"
  f"{atom.charge_group_number :6d}"
@@ -448,15 +448,17 @@ def _write_gro(self, gro, decimal: int):
  for molecule_name, molecule in self.system.molecule_types.items():
  n_copies = self.system.molecules[molecule_name]
 
- for _ in range(n_copies):
+ for copy_index in range(n_copies):
+
  for atom in molecule.atoms:
+
  gro.write(
  f"%5d%-5s%5s%5d"
  f"%{decimal + 5}.{decimal}f"
  f"%{decimal + 5}.{decimal}f"
  f"%{decimal + 5}.{decimal}f\n"
  % (
- atom.residue_index, # This needs to be looked up from a different data structure
+ (atom.residue_index + copy_index) % 100_000,
  atom.residue_name[:5],
  atom.name[:5],
  (count + 1) % 100000,