Source code for dataCAT.df_collection

"""
dataCAT.df_collection
=====================

A module which holds the :class:`DFCollection` class, a mutable collection for holding DataFrames.

Index
-----
.. currentmodule:: dataCAT.df_collection
.. autosummary::
    DFCollection
    get_df_collection

API
---
.. autoclass:: _DFCollection
    :members:
    :private-members:
    :special-members:
.. autofunction:: get_df_collection

"""

from typing import (Any, Iterator)
from textwrap import indent
from collections.abc import Collection

import pandas as pd

__all__ = ['get_df_collection']

_MAGIC_METHODS = (
    '__abs__', '__add__', '__and__', '__contains__', '__div__', '__eq__', '__floordiv__', '__ge__',
    '__getitem__', '__gt__', '__hash__', '__iadd__', '__iand__', '__ifloordiv__', '__imod__',
    '__imul__', '__ior__', '__ipow__', '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__',
    '__len__', '__lt__', '__matmul__', '__mod__', '__mul__', '__or__', '__pow__', '__setitem__',
    '__sub__', '__truediv__', '__xor__', '__contains__', '__iter__', '__len__'
)


def _is_magic(key: str) -> bool:
    """Check if **key** belongs to a magic method."""
    return key.startswith('__') and key.endswith('__')


[docs]class _DFCollection(Collection): """A mutable collection for holding dataframes. Parameters ---------- df : |pd.DataFrame|_ A Pandas DataFrame (see :attr:`_DFCollection.df`). Attributes ---------- df : |pd.DataFrame|_ A Pandas DataFrame. Warning ------- The :class:`._DFCollection` class should never be directly called on its own. See :func:`.get_df_collection`, which returns an actually usable :class:`.DFCollection` instance (a subclass). """ def __init__(self, df: pd.DataFrame) -> None: """Initialize the :class:`DFCollection` instance.""" object.__setattr__(self, 'df', df) def __getattribute__(self, key: str) -> Any: """Call :meth:`pandas.DataFrame.__getattribute__` unless a magic method is provided.""" if key == 'df' or _is_magic(key): return object.__getattribute__(self, key) else: df = object.__getattribute__(self, 'df') return df.__getattribute__(key) def __setattr__(self, key: str, value: Any) -> None: """Call :meth:`pandas.DataFrame.__setattr__` unless ``"df"`` is provided as **key**.""" if key == 'df': if not isinstance(value, pd.DataFrame): raise TypeError(f"The {self.__class__.__name__}.df key expects an instance of " f"'pandas.DataFrame'; observed type '{value.__class__.__name__}'") object.__setattr__(self, key, value) for k in _MAGIC_METHODS: # Populate this instance with **df** magic methods method = getattr(value, k) setattr(self.__class__, k, method) else: self.df.__setattr__(key, value) def __repr__(self) -> str: """Return a machine readable string representation of this instance.""" df = self.df args = self.__class__.__name__, df.__class__.__module__, df.__class__.__name__, hex(id(df)) return '{}(df=<{}.{} at {}>)'.format(*args) def __str__(self) -> str: """Return a human readable string representation of this instance.""" df = indent(str(self.df), ' ', bool) return f'{self.__class__.__name__}(\n{df}\n)' def __contains__(self, value: Any) -> bool: pass def __len__(self) -> int: pass def __iter__(self) -> Iterator[pd.Series]: pass
[docs]def get_df_collection(df: pd.DataFrame) -> 'DFCollection': """Return a mutable collection for holding dataframes. Parameters ---------- df : |pd.DataFrame|_ A Pandas DataFrame. Returns ------- |dataCAT.DFCollection|_ A :class:`DFCollection` instance. The class is described in more detail in the documentation of its superclass: :class:`_DFCollection`. Note ---- As the :class:`DFCollection` class is defined within the scope of this function, two instances of :class:`DFCollection` will *not* belong to the same class (see example below). In more technical terms: The class bound to a particular :class:`DFCollection` instance is a unique instance of :class:`type`. .. code:: python >>> import numpy as np >>> import pandas as pd >>> df = pd.DataFrame(np.random.rand(5, 5)) >>> collection1 = get_df_collection(df) >>> collection2 = get_df_collection(df) >>> print(df is collection1.df is collection2.df) True >>> print(collection1.__class__.__name__ == collection2.__class__.__name__) True >>> print(collection1.__class__ == collection2.__class__) False """ class DFCollection(_DFCollection): pass if not isinstance(df, pd.DataFrame): raise TypeError("df expects an instance of 'pandas.DataFrame'; " f"observed type: '{df.__class__.__name__}'") ret = DFCollection ret.__doc__ = _DFCollection.__doc__.rsplit('\n', 7)[0] for key in _MAGIC_METHODS: method = getattr(df, key) setattr(ret, key, method) return ret(df)