Source code for nanoCAT.recipes.mol_filter

"""Recipes for filtering molecules.

Index
-----
.. currentmodule:: nanoCAT.recipes
.. autosummary::
    get_mol_length
    filter_mol
    filter_data

API
---
.. autofunction:: get_mol_length
.. autofunction:: filter_mol
.. autofunction:: filter_data

"""

from typing import Union, Iterable, Dict, TypeVar, Callable

import numpy as np
from scipy.spatial.distance import cdist
from scm.plams import Molecule, Atom

__all__ = ['get_mol_length', 'filter_mol', 'filter_data']

T = TypeVar('T')


[docs]def get_mol_length(mol: Union[np.ndarray, Molecule], atom: Union[np.ndarray, Atom]) -> float: """Return the distance between **atom** and the atom in **mol** which it is furthest removed from. Examples -------- Use the a molecules length for filtering a list of molecules: .. code:: python >>> from CAT.recipes import get_mol_length, filter_mol >>> from scm.plams import Molecule >>> mol_list = [Molecule(...), ...] >>> data = [...] >>> filter = lambda mol: get_mol_length(mol, mol.properties.get('anchor')) < 10 >>> mol_dict = filter_mol(mol_list, data, filter=filter) Parameters ---------- mol : :class:`~scm.plams.mol.molecule.Molecule` or :class:`numpy.ndarray` A PLAMS molecule or a 2D numpy array with a molecules Cartesian coordinates. atom : :class:`~scm.plams.mol.atom.Atom` or :class:`numpy.ndarray` A PLAMS atom or a 1D numpy array with an atoms Cartesian coordinates. Returns ------- :class:`float` The largest distance between **atom** and all other atoms **mol**. See Also -------- :func:`filter_mol` Filter **mol_list** and **data** based on elements from **mol_list**. """ # noqa: E501 if isinstance(atom, Atom): atom_xyz = np.fromiter(atom.coords, 3, dtype=float) atom_xyz.shape = (1, 3) else: atom_xyz = np.asarray(atom, dtype=float).reshape((1, 3)) dist = cdist(atom_xyz, mol) return dist.max()
[docs]def filter_mol(mol_list: Iterable[Molecule], data: Iterable[T], filter: Callable[[Molecule], bool]) -> Dict[Molecule, T]: """Filter **mol_list** and **data** based on elements from **mol_list**. Examples -------- .. code:: python >>> from scm.plams import Molecule >>> from CAT.recipes import filter_mol >>> mol_list = [Molecule(...), ...] >>> data = [...] >>> mol_dict1 = filter_mol(mol_list, data, filter=lambda n: n < 10) >>> prop1 = [...] >>> prop2 = [...] >>> prop3 = [...] >>> multi_data = zip([prop1, prop2, prop3]) >>> mol_dict2 = filter_mol(mol_list, multi_data, filter=lambda n: n < 10) >>> keys = mol_dict1.keys() >>> values = mol_dict1.values() >>> mol_dict3 = filter_mol(keys, values, filter=lambda n: n < 5) Parameters ---------- mol_list : :class:`~collections.abc.Iterable` [:class:`~scm.plams.mol.molecule.Molecule`] An iterable of the, to-be filtered, PLAMS molecules. data : :class:`Iterable[T]<collections.abc.Iterable>` An iterable which will be assigned as values to the to-be returned dict. These parameters will be filtered in conjunction with **mol_list**. Note that **mol_list** and **data** *should* be of the same length. filter : :class:`Callable[[Molecule], bool]<collections.abc.Callable>` A callable for filtering the distance vector. An example would be :code:`lambda n: max(n) < 10`. Returns ------- :class:`dict` [:class:`~scm.plams.mol.molecule.Molecule`, :class:`T<typing.TypeVar>`] A dictionary with all (filtered) molecules as keys and elements from **data** as values. See Also -------- :func:`filter_data` Filter **mol_list** and **data** based on elements from **data**. """ return {mol: item for mol, item in zip(mol_list, data) if filter(mol)}
[docs]def filter_data(mol_list: Iterable[Molecule], data: Iterable[T], filter: Callable[[T], bool]) -> Dict[Molecule, T]: """Filter **mol_list** and **data** based on elements from **data**. Examples -------- See :func:`filter_mol` for a number of input examples. Parameters ---------- mol_list : :class:`~collections.abc.Iterable` [:class:`~scm.plams.mol.molecule.Molecule`] An iterable of the, to-be filtered, PLAMS molecules. data : :class:`Iterable[T]<collections.abc.Iterable>` An iterable which will be assigned as values to the to-be returned dict. These parameters will be filtered in conjunction with **mol_list**. Note that **mol_list** and **data** *should* be of the same length. filter : :class:`Callable[[T], bool]<collections.abc.Callable>` A callable for filtering the elements of **data**. An example would be :code:`lambda n: n < 10`. Returns ------- :class:`dict` [:class:`~scm.plams.mol.molecule.Molecule`, :class:`T<typing.TypeVar>`] A dictionary with all (filtered) molecules as keys and elements from **data** as values. See Also -------- :func:`filter_mol` Filter **mol_list** and **data** based on elements from **mol_list**. """ return {mol: item for mol, item in zip(mol_list, data) if filter(item)}