#! /usr/bin/env python
#
# Copyright (C) 2015-2016 Rich Lewis <rl403@cam.ac.uk>
# License: 3-clause BSD
"""
# skchem.filters
Chemical filters are defined.
"""
import pandas as pd
from ..base import BaseTransformer, Transformer
from .. import core
from ..utils import iterable_to_series, Defaults, optional_second_method
[docs]def not_all(x):
""" Not all x """
return not all(x)
[docs]def not_any(x):
""" Not any x """
return not any(x)
[docs]def identity(x):
""" The identity """
return x
AGGS = Defaults(defaults={
'none': identity,
'any': any,
'all': all,
'not all': not_all,
'not any': not_any
})
[docs]class BaseFilter(BaseTransformer):
""" The base Filter class. """
def __init__(self, agg='any', **kwargs):
self._agg = None
super(BaseFilter, self).__init__(**kwargs)
self.agg = agg
[docs] def axes_names(self):
return 'batch', self.columns.name
@property
def agg(self):
""" callable: The aggregate function to use. String aliases
for `'any'`, `'not any'`, 'all', `'not all'` are available."""
return self._agg
@agg.setter
def agg(self, val):
self._agg = AGGS.get(val)
@property
def columns(self):
""" pd.Index: The column index to use. """
return pd.Index([self.__class__.__name__])
def _mask(self, mols=None, res=None, neg=False):
""" Generate a mask from molecules, or from their result after transform.
Args:
mols (pd.Series<skchem.Mol>):
The molecules to use to generate the mask.
res (pd.Series):
The result of a transform. Overrides mols.
neg (bool):
Whether the mask should be inversed.
Returns:
pd.Series<bool>
"""
res = self.transform(mols, agg=False) if res is None else res
res = (res != 0) & pd.notnull(res)
if isinstance(res, pd.Series) and isinstance(mols, core.Mol):
res = self.agg(res)
if isinstance(res, pd.DataFrame):
res = res.apply(self.agg, axis=1)
return res == 0 if neg else res
@optional_second_method
[docs] def filter(self, mols, y=None, neg=False):
mask = self._mask(mols=mols, neg=neg)
if isinstance(mols, core.Mol):
return mols if mask else None
elif not isinstance(mols, pd.Series):
mols = iterable_to_series(mols)
if y is None:
return mols[mask]
else:
return mols[mask], y[mask]
[docs]class Filter(BaseFilter, Transformer):
""" Filter base class.
Examples:
>>> import skchem
Initialize the filter with a function:
>>> is_named = skchem.filters.Filter(lambda m: m.name is not None)
Filter results can be found with `transform`:
>>> ethane = skchem.Mol.from_smiles('CC', name='ethane')
>>> is_named.transform(ethane)
True
>>> anonymous = skchem.Mol.from_smiles('c1ccccc1')
>>> is_named.transform(anonymous)
False
Can take a series or dataframe:
>>> mols = pd.Series({'anonymous': anonymous, 'ethane': ethane})
>>> is_named.transform(mols)
anonymous False
ethane True
Name: Filter, dtype: bool
Using `filter` will drop out molecules that fail the test:
>>> is_named.filter(mols)
ethane <Mol: CC>
dtype: object
Only failed are retained with the `neg` keyword argument:
>>> is_named.filter(mols, neg=True)
anonymous <Mol: c1ccccc1>
dtype: object
"""
def __init__(self, func=None, agg='any', n_jobs=1, verbose=True):
""" Initialize a `Filter` object.
Args:
func (function: Mol => bool):
The function to use to filter the arguments.
agg (str or function: iterable<bool> => bool):
The aggregation to use in the filter. Can be 'any', 'all',
'not any', 'not all' or a callable, for example `any` or `all`.
"""
super(Filter, self).__init__(agg=agg, n_jobs=n_jobs, verbose=verbose)
if func is not None:
self._transform_mol = func
def _transform_mol(self, mol):
raise NotImplemented