A couple of chemical feature related enhancements (#2078)

* fixes #1636
a bit of refactoring of the tests

* Fixes #2077
Add better confId handling to the chemical feature machinery
This commit is contained in:
Greg Landrum
2018-10-01 17:38:36 +02:00
committed by GitHub
parent 848aacc231
commit 3e00673a28
7 changed files with 110 additions and 29 deletions

View File

@@ -24,7 +24,7 @@
namespace RDKit {
FeatSPtrList MolChemicalFeatureFactory::getFeaturesForMol(
const ROMol &mol, const char *includeOnly) const {
const ROMol &mol, const char *includeOnly, int confId) const {
PRECONDITION(includeOnly, "bad limits");
std::string limits(includeOnly);
@@ -33,8 +33,7 @@ FeatSPtrList MolChemicalFeatureFactory::getFeaturesForMol(
#endif
FeatSPtrList res;
int idx = 1;
typedef std::vector<std::pair<std::string, std::set<int> > >
MatchSetCollection;
typedef std::vector<std::pair<std::string, std::set<int>>> MatchSetCollection;
MatchSetCollection matchSets;
for (auto featDefIt = beginFeatureDefs(); featDefIt != endFeatureDefs();
featDefIt++) {
@@ -73,6 +72,7 @@ FeatSPtrList MolChemicalFeatureFactory::getFeaturesForMol(
// Set up the feature:
auto *newFeat =
new MolChemicalFeature(&mol, this, featDef.get(), idx++);
newFeat->setActiveConformer(confId);
MolChemicalFeature::AtomPtrContainer &atoms = newFeat->d_atoms;
atoms.resize(match.size());
@@ -119,4 +119,4 @@ MolChemicalFeatureFactory *buildFeatureFactory(std::istream &inStream) {
return res;
}
}
} // namespace RDKit

View File

@@ -57,18 +57,21 @@ class RDKIT_MOLCHEMICALFEATURES_EXPORT MolChemicalFeatureFactory {
\param mol The molecule of interest
\param includeOnly (optional) if this is non-null, only features in this
family will be returned
\param confId (optional) the conformer id to use
*/
FeatSPtrList getFeaturesForMol(const ROMol &mol,
const char *includeOnly = "") const;
FeatSPtrList getFeaturesForMol(const ROMol &mol, const char *includeOnly = "",
int confId = -1) const;
private:
MolChemicalFeatureDef::CollectionType d_featDefs;
};
//! constructs a MolChemicalFeatureFactory from the data in a stream
RDKIT_MOLCHEMICALFEATURES_EXPORT MolChemicalFeatureFactory *buildFeatureFactory(std::istream &inStream);
RDKIT_MOLCHEMICALFEATURES_EXPORT MolChemicalFeatureFactory *buildFeatureFactory(
std::istream &inStream);
//! constructs a MolChemicalFeatureFactory from the data in a string
RDKIT_MOLCHEMICALFEATURES_EXPORT MolChemicalFeatureFactory *buildFeatureFactory(const std::string &featureData);
RDKIT_MOLCHEMICALFEATURES_EXPORT MolChemicalFeatureFactory *buildFeatureFactory(
const std::string &featureData);
} // end of namespace RDKit

View File

@@ -51,8 +51,9 @@ struct feat_wrapper {
.def("GetType", &MolChemicalFeature::getType,
"Get the specific type for the feature",
python::return_value_policy<python::copy_const_reference>())
.def("GetPos", (RDGeom::Point3D (MolChemicalFeature::*)(int) const) &
MolChemicalFeature::getPos,
.def("GetPos",
(RDGeom::Point3D(MolChemicalFeature::*)(int) const) &
MolChemicalFeature::getPos,
(python::arg("self"), python::arg("confId") = -1),
"Get the location of the chemical feature")
.def("GetAtomIds", getFeatAtomIds,
@@ -64,8 +65,12 @@ struct feat_wrapper {
"Get the factory used to generate this feature",
python::return_value_policy<python::reference_existing_object>())
.def("ClearCache", &MolChemicalFeature::clearCache,
"Clears the cache used to store position information.");
"Clears the cache used to store position information.")
.def("SetActiveConformer", &MolChemicalFeature::setActiveConformer,
"Sets the conformer to use (must be associated with a molecule).")
.def("GetActiveConformer", &MolChemicalFeature::getActiveConformer,
"Gets the conformer to use.");
};
};
}
} // namespace RDKit
void wrap_MolChemicalFeat() { RDKit::feat_wrapper::wrap(); }

View File

@@ -44,11 +44,11 @@ int getNumMolFeatures(const MolChemicalFeatureFactory &factory,
}
FeatSPtr getMolFeature(const MolChemicalFeatureFactory &factory,
const ROMol &mol, int idx, std::string includeOnly = "",
bool recompute = true) {
const ROMol &mol, int idx, std::string includeOnly,
bool recompute, int confId) {
static FeatSPtrList feats;
if (recompute) {
feats = factory.getFeaturesForMol(mol, includeOnly.c_str());
feats = factory.getFeaturesForMol(mol, includeOnly.c_str(), confId);
}
if (idx < 0 || idx >= static_cast<int>(feats.size())) {
throw IndexErrorException(idx);
@@ -102,11 +102,11 @@ struct featfactory_wrapper {
.def("GetMolFeature", getMolFeature,
(python::arg("mol"), python::arg("idx"),
python::arg("includeOnly") = std::string(""),
python::arg("recompute") = true),
python::arg("recompute") = true, python::arg("confId") = -1),
python::with_custodian_and_ward_postcall<0, 2>(),
"returns a particular feature (by index)");
};
};
}
} // namespace RDKit
void wrap_factory() { RDKit::featfactory_wrapper::wrap(); }

View File

@@ -53,19 +53,18 @@ class TestCase(unittest.TestCase):
positions = [[1.3041, -0.6079, 0.0924], [-0.7066, 0.5994, 0.1824], [1.3041, -0.6079, 0.0924]]
targetAids = [[3], [1], [3]]
i = 0
for feat in feats:
self.failUnless(feat.GetFamily() == fTypes[i])
for i,feat in enumerate(feats):
self.assertEqual(feat.GetFamily(),fTypes[i])
pos = list(feat.GetPos())
aids = list(feat.GetAtomIds())
self.failUnless(aids == targetAids[i])
self.failUnless(lstFeq(pos, positions[i]))
self.assertEqual(aids,targetAids[i])
self.assertTrue(lstFeq(pos, positions[i]))
nmol = feat.GetMol()
self.failUnless(Chem.MolToSmiles(nmol) == "COCN")
self.assertEqual(Chem.MolToSmiles(nmol),"COCN")
ncfac = feat.GetFactory()
self.failUnless(ncfac.GetNumFeatureDefs() == 2)
i += 1
self.assertEqual(ncfac.GetNumFeatureDefs(), 2)
self.assertEqual(feat.GetActiveConformer(),-1)
def testIncludeOnly(self):
cfac = ChemicalFeatures.BuildFeatureFactory(
os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolChemicalFeatures', 'test_data',

View File

@@ -809,7 +809,8 @@ void testNestedAtomTypes() {
void testGithub252() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << "Testing Github Issue #252: crash when calling "
"getPos() with no conformer." << std::endl;
"getPos() with no conformer."
<< std::endl;
BOOST_LOG(rdErrorLog) << " expect a precondition failure message below"
<< std::endl;
@@ -846,6 +847,78 @@ void testGithub252() {
BOOST_LOG(rdErrorLog) << " Done" << std::endl;
}
void testGithub2077() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << "Testing github 2077: add confId argument to "
"MolChemicalFeatureFactor::getFeaturesForMol()"
<< std::endl;
ROMol *testMol;
Conformer *conf;
std::string inText;
FeatSPtrList featSPtrs;
boost::shared_ptr<MolChemicalFeature> featSPtr;
MolChemicalFeatureFactory *factory;
MolChemicalFeatureDef::CollectionType::value_type featDef;
inText =
"DefineFeature HDonor1 [N,O;!H0]\n"
" Family HBondDonor\n"
" Weights 1.0\n"
"EndFeature\n"
"DefineFeature Carboxyl1 C(=O)[O;H1,-]\n"
" Family ZnBinder\n"
" Weights 1.0,1.0,1.0\n"
"EndFeature\n";
factory = buildFeatureFactory(inText);
TEST_ASSERT(factory);
TEST_ASSERT(factory->getNumFeatureDefs() == 2);
testMol = SmilesToMol("C(=O)O");
TEST_ASSERT(testMol);
conf = new Conformer(3);
testMol->addConformer(conf, true);
conf->setAtomPos(0, RDGeom::Point3D(0, 0, 0.0));
conf->setAtomPos(1, RDGeom::Point3D(1.2, 0, 0.0));
conf->setAtomPos(2, RDGeom::Point3D(0, 1.5, 0.0));
conf = new Conformer(3);
testMol->addConformer(conf, true);
conf->setAtomPos(0, RDGeom::Point3D(1, 0, 0.0));
conf->setAtomPos(1, RDGeom::Point3D(2.2, 0, 0.0));
conf->setAtomPos(2, RDGeom::Point3D(1, 1.5, 0.0));
{
featSPtrs = factory->getFeaturesForMol(*testMol);
TEST_ASSERT(featSPtrs.size() == 2);
featSPtr = *featSPtrs.begin();
TEST_ASSERT(featSPtr->getActiveConformer() == -1);
TEST_ASSERT(featSPtr->getFamily() == "HBondDonor");
TEST_ASSERT(featSPtr->getType() == "HDonor1");
TEST_ASSERT(feq(featSPtr->getPos().x, 0.0));
TEST_ASSERT(feq(featSPtr->getPos().y, 1.5));
TEST_ASSERT(feq(featSPtr->getPos().z, 0.0));
}
{
featSPtrs = factory->getFeaturesForMol(*testMol, "", 1);
TEST_ASSERT(featSPtrs.size() == 2);
featSPtr = *featSPtrs.begin();
TEST_ASSERT(featSPtr->getActiveConformer() == 1);
TEST_ASSERT(featSPtr->getFamily() == "HBondDonor");
TEST_ASSERT(featSPtr->getType() == "HDonor1");
TEST_ASSERT(feq(featSPtr->getPos().x, 1.0));
TEST_ASSERT(feq(featSPtr->getPos().y, 1.5));
TEST_ASSERT(feq(featSPtr->getPos().z, 0.0));
}
delete testMol;
delete factory;
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
//-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
//
//-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
@@ -867,4 +940,5 @@ int main() {
#endif
testNestedAtomTypes();
testGithub252();
testGithub2077();
}

View File

@@ -11,11 +11,11 @@ from rdkit.Chem.rdChemicalFeatures import *
from rdkit.Chem.rdMolChemicalFeatures import *
def MCFF_GetFeaturesForMol(self, mol, includeOnly=""):
def MCFF_GetFeaturesForMol(self, mol, includeOnly="", confId=-1):
res = []
count = self.GetNumMolFeatures(mol, includeOnly=includeOnly)
for i in range(count):
res.append(self.GetMolFeature(mol, i, includeOnly=includeOnly))
res.append(self.GetMolFeature(mol, i, includeOnly=includeOnly, confId=confId))
return tuple(res)