- switched to using std::vector<unsigned int> for isotopichs property (#3878)

- switched to using std::vector<int> instead of std::list<int> for _ErGAtomTypes property
- added RegisterListConverter<std::vector<unsigned int>>

Co-authored-by: Paolo Tosco <paolo.tosco@novartis.com>
This commit is contained in:
Paolo Tosco
2021-03-05 16:37:43 +01:00
committed by GitHub
parent f5fab0485e
commit f04a9c9a42
7 changed files with 44 additions and 39 deletions

View File

@@ -14,9 +14,6 @@
#include "MonomerInfo.h"
#include <Geometry/Transform3D.h>
#include <Geometry/point.h>
#include <boost/lexical_cast.hpp>
#include <boost/tokenizer.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <boost/algorithm/string/classification.hpp>
#include <boost/dynamic_bitset.hpp>
#include <boost/range/iterator_range.hpp>
@@ -439,15 +436,6 @@ void AssignHsResidueInfo(RWMol &mol) {
}
}
std::string isoHsToString(const std::vector<unsigned int> &isoHs) {
std::stringstream ss;
std::copy(isoHs.begin(), isoHs.end(),
std::ostream_iterator<unsigned int>(ss, " "));
std::string res(ss.str());
boost::trim(res);
return res;
}
std::map<unsigned int, std::vector<unsigned int>> getIsoMap(const ROMol &mol) {
std::map<unsigned int, std::vector<unsigned int>> isoMap;
for (auto atom : mol.atoms()) {
@@ -522,18 +510,9 @@ void addHs(RWMol &mol, bool explicitOnly, bool addCoords,
Atom *newAt = mol.getAtomWithIdx(aidx);
std::vector<unsigned int> isoHs;
std::string isotopicHsProp;
if (newAt->getPropIfPresent(common_properties::_isotopicHs,
isotopicHsProp)) {
isoHs)) {
newAt->clearProp(common_properties::_isotopicHs);
// be lenient on input, even if we write only space-separated
// strings of indices
boost::trim_if(isotopicHsProp, boost::is_any_of(" \t\r\n,()[]{}"));
boost::tokenizer<> tokens(isotopicHsProp);
std::transform(tokens.begin(), tokens.end(), std::back_inserter(isoHs),
[](const std::string &t) {
return boost::lexical_cast<unsigned int>(t);
});
}
std::vector<unsigned int>::const_iterator isoH = isoHs.begin();
unsigned int newIdx;
@@ -792,7 +771,7 @@ void removeHs(RWMol &mol, const RemoveHsParameters &ps, bool sanitize) {
if (ps.removeAndTrackIsotopes) {
for (const auto &pair : getIsoMap(mol)) {
mol.getAtomWithIdx(pair.first)
->setProp(common_properties::_isotopicHs, isoHsToString(pair.second));
->setProp(common_properties::_isotopicHs, pair.second);
}
}
boost::dynamic_bitset<> atomsToRemove{mol.getNumAtoms(), 0};

View File

@@ -144,12 +144,12 @@ RDNumeric::DoubleVector *generateErGFingerprintForReducedGraph(
double *dm = MolOps::getDistanceMat(mol);
// cache the atom type vectors:
std::vector<std::list<int> > tvs;
std::vector<std::vector<int> > tvs;
tvs.reserve(mol.getNumAtoms());
for (ROMol::ConstAtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms();
++atIt) {
const std::list<int> &tv =
(*atIt)->getProp<std::list<int> >("_ErGAtomTypes");
const std::vector<int> &tv =
(*atIt)->getProp<std::vector<int> >("_ErGAtomTypes");
tvs.push_back(tv);
}
@@ -213,7 +213,7 @@ ROMol *generateMolExtendedReducedGraph(
for (ROMol::AtomIterator atIt = res->beginAtoms(); atIt != res->endAtoms();
++atIt) {
std::list<int> tv;
std::vector<int> tv;
tv.clear();
for (unsigned int i = 0; i < atomTypes->size(); ++i) {
if ((*atomTypes)[i][(*atIt)->getIdx()]) {
@@ -236,7 +236,7 @@ ROMol *generateMolExtendedReducedGraph(
++nSP2;
}
}
std::list<int> tv;
std::vector<int> tv;
if (nAromatic >= 2 || nSP2 >= rdcast<int>(ring.size() / 2)) {
tv.push_back(aromaticFlag);
} else {
@@ -251,7 +251,7 @@ ROMol *generateMolExtendedReducedGraph(
if (mol.getRingInfo()->numAtomRings(i) &&
mol.getAtomWithIdx(i)->getDegree() == 2 &&
res->getAtomWithIdx(i)
->getProp<std::list<int> >("_ErGAtomTypes")
->getProp<std::vector<int> >("_ErGAtomTypes")
.empty()) {
res->removeAtom(i);
}

View File

@@ -16,23 +16,23 @@ class TestCase(unittest.TestCase):
m = Chem.MolFromSmiles('OCCc1ccccc1')
mrg = rdRG.GenerateMolExtendedReducedGraph(m)
mrg.UpdatePropertyCache(False)
self.failUnlessEqual('*cCCO', Chem.MolToSmiles(mrg))
self.assertEqual('*cCCO', Chem.MolToSmiles(mrg))
m = Chem.MolFromSmiles('OCCC1CCCCC1')
mrg = rdRG.GenerateMolExtendedReducedGraph(m)
mrg.UpdatePropertyCache(False)
self.failUnlessEqual('*CCCO', Chem.MolToSmiles(mrg))
self.assertEqual('*CCCO', Chem.MolToSmiles(mrg))
def test2(self):
m = Chem.MolFromSmiles('OCCc1ccccc1')
mrg = rdRG.GenerateMolExtendedReducedGraph(m)
mrg.UpdatePropertyCache(False)
self.failUnlessEqual('*cCCO', Chem.MolToSmiles(mrg))
self.assertEqual('*cCCO', Chem.MolToSmiles(mrg))
fp1 = rdRG.GenerateErGFingerprintForReducedGraph(mrg)
fp2 = rdRG.GetErGFingerprint(m)
md = max(abs(fp1 - fp2))
self.failUnless(md < 1e-4)
self.assertLess(md, 1e-4)
def test3(self):
m = Chem.MolFromSmiles('OCCc1ccccc1')
@@ -41,7 +41,7 @@ class TestCase(unittest.TestCase):
fp2 = rdRG.GetErGFingerprint(m)
md = max(abs(fp1 - fp2))
self.failUnlessAlmostEqual(0.0, md, 4)
self.assertAlmostEqual(0.0, md, 4)
def test4(self):
m = Chem.MolFromSmiles('OCCc1ccccc1')
@@ -49,7 +49,13 @@ class TestCase(unittest.TestCase):
fp2 = rdRG.GetErGFingerprint(m, fuzzIncrement=0.1)
md = max(abs(fp1 - fp2))
self.failUnlessAlmostEqual(0.2, md, 4)
self.assertAlmostEqual(0.2, md, 4)
def testCanRetrieveProp(self):
m = Chem.MolFromSmiles('OCCc1ccccc1')
mrg = rdRG.GenerateMolExtendedReducedGraph(m)
erg_types = [tuple(atom.GetPropsAsDict().get('_ErGAtomTypes')) for atom in mrg.GetAtoms()]
self.assertEqual(erg_types, [(0, 1), (), (), (), (5,)])
if __name__ == '__main__':

View File

@@ -171,11 +171,29 @@ void test2() {
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void testCanRetrieveProp() {
BOOST_LOG(rdInfoLog)
<< "testing retrieving _ErGAtomTypes from property" << std::endl;
auto m = "OCCc1ccccc1"_smiles;
std::vector<std::vector<int>> expected{{0, 1}, {}, {}, {}, {5}};
std::vector<std::vector<int>> res;
std::unique_ptr<ROMol> mrg(
ReducedGraphs::generateMolExtendedReducedGraph(*m));
for (const auto atom : mrg->atoms()) {
std::vector<int> atomTypes;
TEST_ASSERT(atom->getPropIfPresent("_ErGAtomTypes", atomTypes));
res.push_back(atomTypes);
}
TEST_ASSERT(res == expected);
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
int main(int argc, char *argv[]) {
(void)argc;
(void)argv;
RDLog::InitLogs();
test1();
test2();
testCanRetrieveProp();
return 0;
}

View File

@@ -5904,8 +5904,7 @@ M END
m_noh = Chem.RemoveHs(m, ps)
self.assertEqual(m_noh.GetNumAtoms(), m.GetNumAtoms() - 2)
self.assertTrue(m_noh.GetAtomWithIdx(2).HasProp("_isotopicHs"))
self.assertEqual(tuple(map(int,
m_noh.GetAtomWithIdx(2).GetProp("_isotopicHs").split())), (2, 2))
self.assertEqual(tuple(m_noh.GetAtomWithIdx(2).GetPropsAsDict().get("_isotopicHs")), (2, 2))
m_h = Chem.AddHs(m_noh)
self.assertFalse(m_h.GetAtomWithIdx(2).HasProp("_isotopicHs"))
self.assertEqual(

View File

@@ -7715,8 +7715,10 @@ void testRemoveAndTrackIsotopes() {
std::unique_ptr<ROMol> mNoH(removeHs(*static_cast<ROMol *>(m.get()), ps));
TEST_ASSERT(mNoH->getAtomWithIdx(0)->getAtomicNum() == 6);
TEST_ASSERT(mNoH->getAtomWithIdx(0)->hasProp(common_properties::_isotopicHs));
TEST_ASSERT(mNoH->getAtomWithIdx(0)->getProp<std::string>(
common_properties::_isotopicHs) == "2");
std::vector<unsigned int> isoHs;
TEST_ASSERT(mNoH->getAtomWithIdx(0)->getPropIfPresent(common_properties::_isotopicHs, isoHs));
TEST_ASSERT(isoHs.size() == 1);
TEST_ASSERT(isoHs.front() == 2);
TEST_ASSERT(mNoH->getAtomWithIdx(30)->getAtomicNum() == 6);
TEST_ASSERT(
!mNoH->getAtomWithIdx(30)->hasProp(common_properties::_isotopicHs));

View File

@@ -118,6 +118,7 @@ BOOST_PYTHON_MODULE(rdBase) {
RegisterListConverter<int>();
RegisterListConverter<std::vector<int>>();
RegisterListConverter<std::vector<unsigned int>>();
python::register_exception_translator<IndexErrorException>(
&translate_index_error);