Source code for nanoCAT.recipes.mark_surface

"""
nanoCAT.recipes.mark_surface
============================

A recipe for identifying surface-atom subsets.

Index
-----
.. currentmodule:: nanoCAT.recipes
.. autosummary::
    replace_surface

API
---
.. autofunction:: replace_surface

"""

from typing import Any, Union, Iterable
from collections import abc

import numpy as np
import pandas as pd

from scm.plams import Molecule, MoleculeError

from CAT.mol_utils import to_atnum, to_symbol
from CAT.attachment.distribution import distribute_idx
from nanoCAT.bde.identify_surface import identify_surface_ch

__all__ = ['replace_surface']


[docs]def replace_surface(mol: Molecule,
                    symbol: Union[str, int],
                    symbol_new: Union[str, int] = 'Cl',
                    nth_shell: Union[int, Iterable[int]] = 0,
                    f: float = 0.5,
                    mode: str = 'uniform',
                    displacement_factor: float = 0.5,
                    **kwargs: Any) -> Molecule:
    r"""A workflow for identifying all surface atoms in **mol** and replacing a subset of them.

    Consists of three distinct steps:

    1. Identifying which atoms, with a user-specified atomic **symbol**,
       are located on the surface of **mol** rather than in the bulk.
    2. Define a subset of the newly identified surface atoms using one of CAT's
       distribution algorithms.
    3. Create and return a molecule where the atom subset defined in step 2
       has its atomic symbols replaced with **symbol_new**.

    Examples
    --------
    Replace 75% of all surface ``"Cl"`` atoms with ``"I"``.

    .. code:: python

        >>> from scm.plams import Molecule
        >>> from CAT.recipes import replace_surface

        >>> mol = Molecule(...)  # Read an .xyz file
        >>> mol_new = replace_surface(mol, symbol='Cl', symbol_new='I', f=0.75)
        >>> mol_new.write(...)  # Write an .xyz file

    The same as above, except this time the new ``"I"`` atoms are all deleted.

    .. code:: python

        >>> from scm.plams import Molecule
        >>> from CAT.recipes import replace_surface

        >>> mol = Molecule(...)  # Read an .xyz file
        >>> mol_new = replace_surface(mol, symbol='Cl', symbol_new='I', f=0.75)

        >>> del_atom = [at for at in mol_new if at.symbol == 'I']
        >>> for at in del_atom:
        ...     mol_new.delete_atom(at)
        >>> mol_new.write(...)  # Write an .xyz file

    Parameters
    ----------
    mol : :class:`~scm.plams.mol.molecule.Molecule`
        The input molecule.

    symbol : :class:`str` or :class:`int`
        An atomic symbol or number defining the super-set of the surface atoms.

    symbol_new : :class:`str` or :class:`int`
        An atomic symbol or number which will be assigned to the new surface-atom subset.

    nth_shell : :class:`int` or :class:`~collections.abc.Iterable` [:class:`int`]
        One or more integers denoting along which shell-surface(s) to search.
        For example, if ``symbol = "Cd"`` then ``nth_shell = 0`` represents the surface,
        ``nth_shell = 1`` is the first sub-surface ``"Cd"`` shell and
        ``nth_shell = 2`` is the second sub-surface ``"Cd"`` shell.
        Using ``nth_shell = [1, 2]`` will search along both the first and second
        ``"Cd"`` sub-surface shells.
        Note that a :exc:`Zscm.plams.core.errors.MoleculeError` will be raised if
        the specified **nth_shell** is larger than the actual number of available
        sub-surface shells.

    f : :class:`float`
        The fraction of surface atoms whose atom types will be replaced with **symbol_new**.
        Must obey the following condition: :math:`0 < f \le 1`.

    mode : :class:`str`
        How the subset of surface atoms will be generated.
        Accepts one of the following values:

        * ``"random"``: A random distribution.
        * ``"uniform"``: A uniform distribution; maximizes the nearest-neighbor distance.
        * ``"cluster"``: A clustered distribution; minimizes the nearest-neighbor distance.

    displacement_factor : :class:`float`
        The smoothing factor :math:`n` for constructing a convex hull;
        should obey :math:`0 <= n <= 1`.
        Represents the degree of displacement of all atoms with respect to a spherical surface;
        :math:`n = 1` is a complete projection while :math:`n = 0` means no displacement at all.

        A non-zero value is generally recomended here,
        as the herein utilized :class:`~scipy.spatial.ConvexHull` class
        requires an adequate degree of surface-convexness,
        lest it fails to properly identify all valid surface points.

    \**kwargs : :data:`~typing.Any`
        Further keyword arguments for
        :func:`~CAT.attachment.distribution.distribute_idx`.

    Returns
    -------
    :class:`~scm.plams.mol.molecule.Molecule`
        A new Molecule with a subset of its surface atoms replaced with **symbol_new**.

    See Also
    --------
    :func:`~CAT.attachment.distribution.distribute_idx`
        Create a new distribution of atomic indices from **idx** of length :code:`f * len(idx)`.

    :func:`~nanoCAT.bde.identify_surface.identify_surface`
        Take a molecule and identify which atoms are located on the surface,
        rather than in the bulk.

    :func:`~nanoCAT.bde.identify_surface.identify_surface_ch`
        Identify the surface of a molecule using a convex hull-based approach.

    """
    # Parse input arguments
    xyz = np.array(mol, dtype=float, ndmin=2, copy=False)
    atnum = to_atnum(symbol)
    atnum_new = to_atnum(symbol_new)

    # Define the surface-atom subset
    idx = np.fromiter((i for i, at in enumerate(mol) if at.atnum == atnum), dtype=int)
    try:
        idx_surface = idx[_collect_surface(xyz[idx], displacement_factor, nth_shell)]
    except MoleculeError as exc:
        nth = exc.args[0]
        if nth == 0:
            exc = MoleculeError(f"No atoms with atomic symbol {to_symbol(symbol)!r} available in "
                                f"{mol.get_formula()!r}")
        else:
            exc = MoleculeError(f"No subsurface shell >= {nth!r} of atom {to_symbol(symbol)!r} "
                                f"available in {mol.get_formula()!r}")
        exc.__cause__ = None
        raise exc

    try:
        idx_surface_subset = distribute_idx(xyz, idx_surface, f=f, mode=mode, **kwargs)
    except ValueError as ex:
        raise MoleculeError("Failed to identify any surface atoms with atomic symbol "
                            f"{to_symbol(symbol)!r} in {mol.get_formula()!r}") from ex
    else:
        idx_surface_subset += 1

    # Update atomic symbols and return
    ret = mol.copy()
    for i in idx_surface_subset:
        ret[i].atnum = atnum_new
    return ret


def _collect_surface(xyz: np.ndarray, displacement_factor: float,
                     nth_shell: Union[int, Iterable[int]] = 0) -> np.ndarray:
    """Collect all user-specified surface and/or sub-surface shells of **xyz**."""
    n = -1
    xyz_df = pd.DataFrame(xyz)
    n_set = {nth_shell} if not isinstance(nth_shell, abc.Iterable) else set(nth_shell)

    ret = []
    while True:
        n += 1

        # Check if the DataFrame is empty
        if not xyz_df.size:
            raise MoleculeError(n)

        idx = identify_surface_ch(xyz_df, n=displacement_factor)
        bool_ar = np.ones(len(xyz_df), dtype=bool)
        bool_ar[idx] = False

        if n not in n_set:
            xyz_df = xyz_df.loc[bool_ar, :]
            continue
        else:
            index = xyz_df.index[~bool_ar]
            xyz_df = xyz_df.loc[bool_ar, :]

        n_set.remove(n)
        ret += index.tolist()
        if not n_set:
            return np.fromiter(ret, count=len(ret), dtype=int)