Source code for nanoCAT.cdft

from math import nan
from typing import Iterable, Any, Type, List, Union

import yaml
import pandas as pd

from qmflows import templates as _templates
from qmflows.packages.SCM import ADF_Result
from scm.plams import Molecule, Settings, ADFJob, ADFResults, Units, Results
from scm.plams.core.basejob import Job
from CAT.workflows import WorkFlow, JOB_SETTINGS_CDFT, MOL
from CAT.jobs import job_single_point  # noqa: F401
from CAT.settings_dataframe import SettingsDataFrame

__all__ = ['init_cdft', 'get_global_descriptors', 'cdft']

_CDFT: str = """specific:
    adf:
        symmetry: nosym
        conceptualdft:
            enabled: yes
            analysislevel: extended
            electronegativity: yes
            domains:
                enabled: yes
        qtaim:
            enabled: yes
            analysislevel: extended
            energy: yes
        basis:
            core: none
            type: DZP
        xc:
            libxc:
                CAM-B3LYP
        numericalquality: good
"""

#: A QMFlows-style template for conceptual DFT calculations.
cdft = Settings(yaml.safe_load(_CDFT))
cdft.specific.adf += _templates.singlepoint.specific.adf.copy()


def init_cdft(ligand_df: SettingsDataFrame) -> None:
    r"""Initialize the ligand conceptual dft (CDFT) workflow.

    Parameters
    ----------
    ligand_df : |CAT.SettingsDataFrame|
        A DataFrame of ligands.

    """
    workflow = WorkFlow.from_template(ligand_df, name='cdft')

    # Import from the database and start the calculation
    idx = workflow.from_db(ligand_df)
    workflow(start_crs_jobs, ligand_df, index=idx)

    # Sets a nested list with the filenames of .in files
    # This cannot be done with loc is it will try to expand the list into a 2D array
    ligand_df[JOB_SETTINGS_CDFT] = workflow.pop_job_settings(ligand_df[MOL])

    # Export to the database
    job_recipe = workflow.get_recipe()
    workflow.to_db(ligand_df, index=idx, job_recipe=job_recipe)


def start_crs_jobs(mol_list: Iterable[Molecule],
                   jobs: Iterable[Type[Job]], settings: Iterable[Settings],
                   **kwargs: Any) -> List[pd.Series]:
    # Parse the job type
    job, *_ = jobs
    if job is not ADFJob:
        raise NotImplementedError(f"job: {job.__class__.__name__} = {job!r}")

    # Parse and update the input settings
    _s, *_ = settings
    s = Settings(_s)
    s.input += cdft.specific.adf

    ret = []
    for mol in mol_list:
        ret.append(run_cdft_job(mol, job, s))
    return ret


_BACKUP = pd.Series({
    'Electronic chemical potential (mu)': nan,
    'Electronegativity (chi=-mu)': nan,
    'Hardness (eta)': nan,
    'Softness (S)': nan,
    'Hyperhardness (gamma)': nan,
    'Electrophilicity index (w=omega)': nan,
    'Dissocation energy (nucleofuge)': nan,
    'Dissociation energy (electrofuge)': nan,
    'Electrodonating power (w-)': nan,
    'Electroaccepting power(w+)': nan,
    'Net Electrophilicity': nan,
    'Global Dual Descriptor Deltaf+': nan,
    'Global Dual Descriptor Deltaf-': nan,
    'Electronic chemical potential (mu+)': nan,
    'Electronic chemical potential (mu-)': nan
})
_BACKUP.index = pd.MultiIndex.from_product(
    [['cdft'], _BACKUP.index], names=['index', 'sub index']
)


def run_cdft_job(mol: Molecule, job: Type[ADFJob], s: Settings) -> pd.Series:
    """Run a conceptual DFT job and extract & return all global descriptors."""
    results = mol.job_single_point(job, s.copy(), name='CDFT',
                                   ret_results=True, read_template=False)

    if results.job.status in {'crashed', 'failed'}:
        return _BACKUP

    ret = get_global_descriptors(results)
    ret.index = pd.MultiIndex.from_product(
        [['cdft'], ret.index], names=['index', 'sub index']
    )
    return ret


[docs]def get_global_descriptors(results: Union[ADFResults, ADF_Result]) -> pd.Series:
    """Extract a dictionary with all ADF conceptual DFT global descriptors from **results**.

    Examples
    --------
    .. code:: python

        >>> import pandas as pd
        >>> from scm.plams import ADFResults
        >>> from CAT.recipes import get_global_descriptors

        >>> results = ADFResults(...)

        >>> series: pd.Series = get_global_descriptors(results)
        >>> print(dct)
        Electronic chemical potential (mu)     -0.113
        Electronegativity (chi=-mu)             0.113
        Hardness (eta)                          0.090
        Softness (S)                           11.154
        Hyperhardness (gamma)                  -0.161
        Electrophilicity index (w=omega)        0.071
        Dissocation energy (nucleofuge)         0.084
        Dissociation energy (electrofuge)       6.243
        Electrodonating power (w-)              0.205
        Electroaccepting power(w+)              0.092
        Net Electrophilicity                    0.297
        Global Dual Descriptor Deltaf+          0.297
        Global Dual Descriptor Deltaf-         -0.297
        Electronic chemical potential (mu+)    -0.068
        Electronic chemical potential (mu-)    -0.158
        Name: global descriptors, dtype: float64


    Parameters
    ----------
    results : :class:`plams.ADFResults` or :class:`qmflows.ADF_Result`
        A PLAMS Results or QMFlows Result instance of an ADF calculation.

    Returns
    -------
    :class:`pandas.Series`
        A Series with all ADF global decsriptors as extracted from **results**.

    """
    if not isinstance(results, Results):
        results = results.results
    file = results['$JN.out']

    with open(file) as f:
        # Identify the GLOBAL DESCRIPTORS block
        for item in f:
            if item == ' GLOBAL DESCRIPTORS\n':
                next(f)
                next(f)
                break
        else:
            raise ValueError(f"Failed to identify the 'GLOBAL DESCRIPTORS' block in {file!r}")

        # Extract the descriptors
        ret = {}
        for item in f:
            item = item.rstrip('\n')
            if not item:
                break

            _key, _value = item.rsplit('=', maxsplit=1)
            key = _key.strip()
            try:
                value = float(_value)
            except ValueError:
                value = float(_value.rstrip('(eV)')) * Units.conversion_ratio('ev', 'au')
            ret[key] = value

    # Fix the names of "mu+" and "mu-"
    ret['Electronic chemical potential (mu+)'] = ret.pop('mu+', nan)
    ret['Electronic chemical potential (mu-)'] = ret.pop('mu-', nan)
    return pd.Series(ret, name='global descriptors')