diff --git a/devtools/conda-envs/dev_env.yaml b/devtools/conda-envs/dev_env.yaml index db912c9c..2699c5e3 100644 --- a/devtools/conda-envs/dev_env.yaml +++ b/devtools/conda-envs/dev_env.yaml @@ -4,7 +4,7 @@ channels: - openeye dependencies: # Core - - python =3.10 + - python - pip - numpy - pydantic =2 @@ -34,6 +34,8 @@ dependencies: # Drivers - gromacs - lammps >=2023.08.02 + # https://github.com/conda-forge/lammps-feedstock/issues/207 + - openmpi =4 - panedr # Typing - mypy diff --git a/openff/interchange/_tests/interoperability_tests/internal/test_gromacs.py b/openff/interchange/_tests/interoperability_tests/internal/test_gromacs.py index b59bcc19..ea381712 100644 --- a/openff/interchange/_tests/interoperability_tests/internal/test_gromacs.py +++ b/openff/interchange/_tests/interoperability_tests/internal/test_gromacs.py @@ -98,57 +98,6 @@ def test_vaccum_warning(self, sage): with pytest.warns(UserWarning, match="gitlab"): out.to_gro("tmp.gro") - @pytest.mark.slow - @skip_if_missing("openmm") - def test_residue_info(self, sage): - """Test that residue information is passed through to .gro files.""" - import mdtraj - - protein = Molecule.from_polymer_pdb( - get_data_file_path( - "proteins/MainChain_HIE.pdb", - "openff.toolkit", - ), - ) - - ff14sb = ForceField("ff14sb_off_impropers_0.0.3.offxml") - - out = Interchange.from_smirnoff( - force_field=ff14sb, - topology=[protein], - ) - - out.to_gro("tmp.gro") - - mdtraj_topology = mdtraj.load("tmp.gro").topology - - for found_residue, original_residue in zip( - mdtraj_topology.residues, - out.topology.hierarchy_iterator("residues"), - ): - assert found_residue.name == original_residue.residue_name - assert str(found_residue.resSeq) == original_residue.residue_number - - @pytest.mark.slow - def test_atom_names_pdb(self): - peptide = Molecule.from_polymer_pdb( - get_data_file_path("proteins/MainChain_ALA_ALA.pdb", "openff.toolkit"), - ) - ff14sb = ForceField("ff14sb_off_impropers_0.0.3.offxml") - - Interchange.from_smirnoff(ff14sb, peptide.to_topology()).to_gro( - "atom_names.gro", - ) - - pdb_object = openmm.app.PDBFile( - get_data_file_path("proteins/MainChain_ALA_ALA.pdb", "openff.toolkit"), - ) - pdb_atom_names = [atom.name for atom in pdb_object.topology.atoms()] - - openmm_atom_names = openmm.app.GromacsGroFile("atom_names.gro").atomNames - - assert openmm_atom_names == pdb_atom_names - @needs_gmx class TestGROMACS: diff --git a/openff/interchange/_tests/unit_tests/interop/gromacs/export/test_export.py b/openff/interchange/_tests/unit_tests/interop/gromacs/export/test_export.py index fbfcaca1..bd087465 100644 --- a/openff/interchange/_tests/unit_tests/interop/gromacs/export/test_export.py +++ b/openff/interchange/_tests/unit_tests/interop/gromacs/export/test_export.py @@ -1,3 +1,4 @@ +import random from importlib import resources from math import exp @@ -41,6 +42,14 @@ class _NeedsGROMACS: pass +def parse_residue_ids(file) -> list[str]: + """Parse residue IDs, and only the residue IDs, from a GROMACS .gro file.""" + with open(file) as f: + ids = [line[:5].strip() for line in f.readlines()[2:-1]] + + return ids + + class TestToGro(_NeedsGROMACS): def test_residue_names(self, sage): """Reproduce issue #642.""" @@ -143,8 +152,9 @@ def test_vaccum_warning(self, sage): out.to_gro("tmp.gro") @pytest.mark.slow + @pytest.mark.parametrize("offset_residue_ids", [True, False]) @skip_if_missing("openmm") - def test_residue_info(self, sage): + def test_residue_info(self, offset_residue_ids): """Test that residue information is passed through to .gro files.""" import mdtraj @@ -155,6 +165,19 @@ def test_residue_info(self, sage): ), ) + if offset_residue_ids: + offset = random.randint(10, 20) + + for atom in protein.atoms: + atom.metadata["residue_number"] = str( + int(atom.metadata["residue_number"]) + offset, + ) + + # Need to manually update residues _and_ atoms + # https://github.com/openforcefield/openff-toolkit/issues/1921 + for residue in protein.residues: + residue.residue_number = str(int(residue.residue_number) + offset) + ff14sb = ForceField("ff14sb_off_impropers_0.0.3.offxml") out = Interchange.from_smirnoff( @@ -171,7 +194,54 @@ def test_residue_info(self, sage): out.topology.hierarchy_iterator("residues"), ): assert found_residue.name == original_residue.residue_name - assert str(found_residue.resSeq) == original_residue.residue_number + found_index = [*original_residue.atoms][0].metadata["residue_number"] + assert str(found_residue.resSeq) == found_index + + def test_fill_in_residue_ids(self, sage): + """Ensure that, if inputs have no residue_number, they are generated on-the-fly.""" + sage.create_interchange( + Topology.from_molecules( + [MoleculeWithConformer.from_smiles(smi) for smi in ["CC", "O", "C"]], + ), + ).to_gro("fill.gro") + + expected_residue_ids = 8 * ["1"] + 3 * ["2"] + 5 * ["3"] + + found_residue_ids = parse_residue_ids("fill.gro") + + for expected, found in zip(expected_residue_ids, found_residue_ids): + assert expected == found + + def test_atom_index_gt_100_000(self, water, sage): + """Ensure that atom indices are written correctly for large numbers.""" + water.add_hierarchy_scheme( + ("residue_name", "residue_number"), + "residues", + ) + + topology = Topology.from_molecules(2 * [water]) + + topology.box_vectors = unit.Quantity([4, 4, 4], unit.nanometer) + + # Can't just update atoms' metadata, neeed to create these scheme/element objects + # and need to also modify the residue objects themselves + for molecule_index, molecule in enumerate(topology.molecules): + for atom in molecule.atoms: + atom.metadata["residue_number"] = str(molecule_index + 123_456) + + for residue_index, residue in enumerate(topology.residues): + residue.residue_number = str(residue_index + 123_456) + + interchange = sage.create_interchange(topology) + + interchange.to_gro("large.gro") + + expected_residue_ids = 3 * ["23456"] + 3 * ["23457"] + + found_residue_ids = parse_residue_ids("large.gro") + + for expected, found in zip(expected_residue_ids, found_residue_ids): + assert expected == found @pytest.mark.slow def test_atom_names_pdb(self): diff --git a/openff/interchange/components/interchange.py b/openff/interchange/components/interchange.py index 6fea38b6..ef168176 100644 --- a/openff/interchange/components/interchange.py +++ b/openff/interchange/components/interchange.py @@ -446,6 +446,8 @@ def to_gromacs( Molecule names in written files are not guaranteed to match the `Moleclue.name` attribute of the molecules in the topology, especially if they are empty strings or not unique. + See `to_gro` and `to_top` for more details. + """ from openff.interchange.interop.gromacs.export._export import GROMACSWriter from openff.interchange.smirnoff._gromacs import _convert @@ -505,6 +507,16 @@ def to_gro(self, file_path: Path | str, decimal: int = 3): decimal: int, default=3 The number of decimal places to use when writing the GROMACS coordinate file. + Residue IDs must be positive integers (or string representations thereof). + + Residue IDs greater than 99,999 are reduced modulo 100,000 in line with common GROMACS practice. + + Residue names and IDs from the topology are used, if present, and otherwise are generated internally. + + Behavior when some, but not all, residue names and IDs are present in the topology is undefined. + + If residue IDs are generated internally, they are assigned sequentially to each molecule starting at 1. + """ from openff.interchange.interop.gromacs.export._export import GROMACSWriter from openff.interchange.smirnoff._gromacs import _convert diff --git a/openff/interchange/interop/gromacs/export/_export.py b/openff/interchange/interop/gromacs/export/_export.py index e16f375b..6deb0aae 100644 --- a/openff/interchange/interop/gromacs/export/_export.py +++ b/openff/interchange/interop/gromacs/export/_export.py @@ -200,7 +200,7 @@ def _write_atoms( top.write( f"{atom.index :6d} " f"{mapping_to_reduced_atom_types[atom.atom_type] :6s}" - f"{atom.residue_index :8d} " + f"{atom.residue_index % 100_000 :8d} " f"{atom.residue_name :8s} " f"{atom.name :6s}" f"{atom.charge_group_number :6d}" @@ -211,7 +211,7 @@ def _write_atoms( top.write( f"{atom.index :6d} " f"{atom.atom_type :6s}" - f"{atom.residue_index :8d} " + f"{atom.residue_index % 100_000 :8d} " f"{atom.residue_name :8s} " f"{atom.name :6s}" f"{atom.charge_group_number :6d}" @@ -448,15 +448,17 @@ def _write_gro(self, gro, decimal: int): for molecule_name, molecule in self.system.molecule_types.items(): n_copies = self.system.molecules[molecule_name] - for _ in range(n_copies): + for copy_index in range(n_copies): + for atom in molecule.atoms: + gro.write( f"%5d%-5s%5s%5d" f"%{decimal + 5}.{decimal}f" f"%{decimal + 5}.{decimal}f" f"%{decimal + 5}.{decimal}f\n" % ( - atom.residue_index, # This needs to be looked up from a different data structure + (atom.residue_index + copy_index) % 100_000, atom.residue_name[:5], atom.name[:5], (count + 1) % 100000,