Source code for skchem.pandas_ext.structure_methods

#! /usr/bin/env python
#
# Copyright (C) 2015-2016 Rich Lewis <rl403@cam.ac.uk>
# License: 3-clause BSD

""" # skchem.pandas.structure_methods

 Tools for adding a default attribute to pandas objects."""


from sklearn.manifold import TSNE, MDS
from sklearn.decomposition import PCA

import pandas as pd

from pandas.core.base import NoNewAttributesMixin, AccessorProperty
from pandas.core.series import Series
from pandas.core.index import Index

from .. import core
from .. import features

DIM_RED = {
    'tsne': TSNE,
    'pca': PCA,
    'mds': MDS
}


[docs]class StructureMethods(NoNewAttributesMixin): """ Accessor for calling chemical methods on series of molecules. """ def __init__(self, data): self._data = data
[docs] def add_hs(self, **kwargs): return self._data.apply(lambda m: m.add_hs(**kwargs))
[docs] def remove_hs(self, **kwargs): return self._data.apply(lambda m: m.remove_hs(**kwargs))
[docs] def visualize(self, fper='morgan', dim_red='tsne', dim_red_kw=None, **kwargs): if dim_red_kw is None: dim_red_kw = {} if isinstance(dim_red, str): dim_red = DIM_RED.get(dim_red.lower())(**dim_red_kw) fper = features.get(fper) fper.verbose = False feats = fper.transform(self._data) feats = feats.fillna(feats.mean()) twod = pd.DataFrame(dim_red.fit_transform(feats)) ax = twod.plot.scatter(x=0, y=1, **kwargs) ax.set_xticklabels([]) ax.set_xlabel('') ax.set_yticklabels([]) ax.set_ylabel('')
@property def atoms(self): return self._data.apply(lambda m: m.atoms)
[docs]def only_contains_mols(ser): return ser.apply(lambda s: isinstance(s, core.Mol)).all()
[docs]class StructureAccessorMixin(object): """ Mixin to bind chemical methods to objects. """ def _make_structure_accessor(self): if isinstance(self, Index): raise AttributeError('Can only use .mol accessor with molecules,' 'which use np.object_ in scikit-chem.') if not only_contains_mols(self): raise AttributeError('Can only use .mol accessor with ' 'Series that only contain mols.') return StructureMethods(self) mol = AccessorProperty(StructureMethods, _make_structure_accessor)
Series.__bases__ += StructureAccessorMixin,