Source code for skchem.descriptors.physicochemical

#! /usr/bin/env python
#
# Copyright (C) 2015-Rich Lewis <rl403@cam.ac.uk>
# License: 3-clause BSD

"""
## skchem.descriptors.physicochemical

Physicochemical descriptors and associated functions are defined.

"""

from rdkit.Chem import Descriptors
import pandas as pd
import numpy as np

from ..base import Transformer, Featurizer
from ..utils import camel_to_snail

DESCRIPTORS = {camel_to_snail(s): f for (s, f) in Descriptors.descList}


[docs]class PhysicochemicalFeaturizer(Transformer, Featurizer): """ Physicochemical descriptor generator using RDKit descriptor """ def __init__(self, features='all', **kwargs): """ Create a physicochemical descriptor generator. Args: descriptors (list<(str, func)> or 'all'): Descriptors to calculate, or if 'all', use all descriptors.""" super(PhysicochemicalFeaturizer, self).__init__(**kwargs) self.features = features @property def features(self): return self._features @features.setter def features(self, features): if features == 'all': features = DESCRIPTORS elif isinstance(features, str): features = {features: DESCRIPTORS[features]} elif isinstance(features, list): features = {feature: DESCRIPTORS[feature] for feature in features} elif isinstance(features, (dict, pd.Series)): features = features else: raise NotImplementedError('Cannot use features {}'.format(features)) self._features = pd.Series(features) self._features.index.name = 'physicochemical_features' @property def name(self): return 'physchem' @property def columns(self): return self.features.index def _transform_mol(self, mol): res = [] for (n, f) in self.features.iteritems(): try: res.append(f(mol)) except ValueError: return res.append(np.NaN) return np.array(res)