RMSD

RMSD#

autodE can be used to calculate RMSD values between different molecules. For example, a script that compares .xyz files in the current directory and copies them to a folder (unique_conformers) if they are unique based on an RMSD threshold

#!/usr/bin/env python3
from autode.species import Species
from autode.geom import calc_rmsd
from autode.input_output import xyz_file_to_atoms
from shutil import copyfile
import os
import argparse

folder_name = "unique_conformers"


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "filenames", action="store", nargs="+", help=".xyz files to compare"
    )

    parser.add_argument(
        "-t",
        action="store",
        default=1.0,
        type=float,
        help="RMSD threshold (Å)",
    )

    parser.add_argument(
        "-oh",
        action="store_true",
        default=False,
        help="Only compare heavy atoms",
    )

    return parser.parse_args()


def get_molecules_no_hydrogens(molecules):
    """Remove all hydrogen atoms from  a list of molecules"""

    no_h_molecules = []

    for molecule in molecules:
        molecule.atoms = [atom for atom in molecule.atoms if atom.label != "H"]

        no_h_molecules.append(molecule)

    return no_h_molecules


def get_and_copy_unique_confs(xyz_filenames, only_heavy_atoms, threshold_rmsd):
    """For each xyz file generate a species and copy it to a folder if it is
    unique based on an RMSD threshold"""

    molecules = [
        Species(
            name=fn.rstrip(".xyz"),
            atoms=xyz_file_to_atoms(fn),
            charge=0,
            mult=1,
        )
        for fn in xyz_filenames
    ]

    if only_heavy_atoms:
        molecules = get_molecules_no_hydrogens(molecules)

    unique_mol_ids = []

    for i in range(len(molecules)):
        mol = molecules[i]

        is_unique = True

        for j in unique_mol_ids:
            rmsd = calc_rmsd(mol.coordinates, molecules[j].coordinates)

            if rmsd < threshold_rmsd:
                is_unique = False
                break

        if is_unique:
            unique_mol_ids.append(i)

    print("Number of unique molecules = ", len(unique_mol_ids))

    # Copy all the unique .xyz files to a new folder
    if not os.path.exists(folder_name):
        os.mkdir(folder_name)

    for i in unique_mol_ids:
        xyz_filename = xyz_filenames[i]
        copyfile(xyz_filename, os.path.join(folder_name, xyz_filename))

    return None


if __name__ == "__main__":
    args = get_args()
    assert all(fn.endswith(".xyz") for fn in args.filenames)

    get_and_copy_unique_confs(
        xyz_filenames=args.filenames,
        only_heavy_atoms=args.oh,
        threshold_rmsd=args.t,
    )

which can be used:

$ python rmsd.py conformer1.xyz. conformer2.xyz -t 0.1

Note

There are many other Python packages to calculate RMSD e.g. rmsd and spyrmsd which may be better!