ScaffoldNetwork rearrangements (#2985)

* backup

* rearrange the network to make it cleaner

- GenericBond scaffolds are now directly connected to the corresponding Generic scaffold
- Attachment points are no longer removed from Generic (or GenericBond) scaffolds

* remove dead code
This commit is contained in:
Greg Landrum
2020-03-06 13:51:38 +01:00
committed by GitHub
parent 494c28de18
commit fe745cfc04
3 changed files with 102 additions and 68 deletions

View File

@@ -111,7 +111,7 @@ ROMol *flattenMol(const ROMol &mol, const ScaffoldNetworkParams &params) {
res = new RWMol(mol);
}
for (auto atom : res->atoms()) {
if (params.flattenIsotopes){
if (params.flattenIsotopes) {
atom->setIsotope(0);
}
if (params.flattenChirality) {
@@ -188,7 +188,7 @@ size_t addEntryIfMissing(T &vect, const V &e,
if (viter == vect.end()) {
vect.push_back(e);
res = vect.size() - 1;
if (counts){
if (counts) {
counts->push_back(0);
}
} else {
@@ -232,8 +232,10 @@ void addMolToNetwork(const ROMol &mol, ScaffoldNetwork &network,
makeScaffoldGeneric(*fmol, doAtoms, doBonds));
auto gbsmi = MolToSmiles(*gbmol);
auto gbidx = addEntryIfMissing(network.nodes, gbsmi, &network.counts);
addEntryIfMissing(network.edges,
NetworkEdge({fidx, gbidx, EdgeType::GenericBond}));
if (gidx != gbidx) {
addEntryIfMissing(network.edges,
NetworkEdge({gidx, gbidx, EdgeType::GenericBond}));
}
}
}
} else {
@@ -263,15 +265,6 @@ void addMolToNetwork(const ROMol &mol, ScaffoldNetwork &network,
auto gidx = addEntryIfMissing(network.nodes, gsmi, &network.counts);
addEntryIfMissing(network.edges,
NetworkEdge({lidx, gidx, EdgeType::Generic}));
if (params.includeScaffoldsWithAttachments &&
params.includeScaffoldsWithoutAttachments) {
std::unique_ptr<ROMol> amol(removeAttachmentPoints(*gmol, params));
auto asmi = MolToSmiles(*amol);
auto aidx = addEntryIfMissing(network.nodes, asmi, &network.counts);
addEntryIfMissing(
network.edges,
NetworkEdge({gidx, aidx, EdgeType::RemoveAttachment}));
}
if (params.includeGenericBondScaffolds) {
bool doAtoms = true;
bool doBonds = true;
@@ -279,16 +272,9 @@ void addMolToNetwork(const ROMol &mol, ScaffoldNetwork &network,
makeScaffoldGeneric(*fragMol, doAtoms, doBonds));
auto gbsmi = MolToSmiles(*gbmol);
auto gbidx = addEntryIfMissing(network.nodes, gbsmi, &network.counts);
addEntryIfMissing(network.edges,
NetworkEdge({lidx, gbidx, EdgeType::GenericBond}));
if (params.includeScaffoldsWithAttachments &&
params.includeScaffoldsWithoutAttachments) {
std::unique_ptr<ROMol> amol(removeAttachmentPoints(*gbmol, params));
auto asmi = MolToSmiles(*amol);
auto aidx = addEntryIfMissing(network.nodes, asmi, &network.counts);
addEntryIfMissing(
network.edges,
NetworkEdge({gbidx, aidx, EdgeType::RemoveAttachment}));
if (gidx != gbidx) {
addEntryIfMissing(network.edges,
NetworkEdge({gidx, gbidx, EdgeType::GenericBond}));
}
}
}
@@ -301,6 +287,29 @@ void addMolToNetwork(const ROMol &mol, ScaffoldNetwork &network,
auto aidx = addEntryIfMissing(network.nodes, asmi, &network.counts);
addEntryIfMissing(network.edges,
NetworkEdge({lidx, aidx, EdgeType::RemoveAttachment}));
if (params.includeGenericScaffolds) {
bool doAtoms = true;
bool doBonds = false;
std::unique_ptr<ROMol> gmol(
makeScaffoldGeneric(*amol, doAtoms, doBonds));
auto gsmi = MolToSmiles(*gmol);
auto gidx = addEntryIfMissing(network.nodes, gsmi, &network.counts);
addEntryIfMissing(network.edges,
NetworkEdge({aidx, gidx, EdgeType::Generic}));
if (params.includeGenericBondScaffolds) {
bool doAtoms = true;
bool doBonds = true;
std::unique_ptr<ROMol> gbmol(
makeScaffoldGeneric(*amol, doAtoms, doBonds));
auto gbsmi = MolToSmiles(*gbmol);
auto gbidx = addEntryIfMissing(network.nodes, gbsmi, &network.counts);
if (gidx != gbidx) {
addEntryIfMissing(
network.edges,
NetworkEdge({gidx, gbidx, EdgeType::GenericBond}));
}
}
}
}
}
}

View File

@@ -31,23 +31,23 @@ class TestScaffoldNetwork(unittest.TestCase):
net = rdScaffoldNetwork.CreateScaffoldNetwork(ms, params)
self.assertEqual(len(net.nodes), 12)
self.assertEqual(len(net.edges), 12)
self.assertEqual(len(net.edges), 13)
self.assertEqual(len(net.counts), len(net.nodes))
self.assertEqual(len([x for x in net.edges if x.type == rdScaffoldNetwork.EdgeType.Fragment]),
4)
self.assertEqual(len([x for x in net.edges if x.type == rdScaffoldNetwork.EdgeType.Generic]), 6)
self.assertEqual(
len([x for x in net.edges if x.type == rdScaffoldNetwork.EdgeType.Fragment]), 4)
self.assertEqual(len([x for x in net.edges if x.type == rdScaffoldNetwork.EdgeType.Generic]), 3)
self.assertEqual(
len([x for x in net.edges if x.type == rdScaffoldNetwork.EdgeType.RemoveAttachment]), 5)
len([x for x in net.edges if x.type == rdScaffoldNetwork.EdgeType.RemoveAttachment]), 3)
net = rdScaffoldNetwork.ScaffoldNetwork()
rdScaffoldNetwork.UpdateScaffoldNetwork(ms, net, params)
self.assertEqual(len(net.nodes), 12)
self.assertEqual(len(net.edges), 12)
self.assertEqual(len(net.edges), 13)
self.assertEqual(len([x for x in net.edges if x.type == rdScaffoldNetwork.EdgeType.Fragment]),
4)
self.assertEqual(len([x for x in net.edges if x.type == rdScaffoldNetwork.EdgeType.Generic]), 6)
self.assertEqual(
len([x for x in net.edges if x.type == rdScaffoldNetwork.EdgeType.Fragment]), 4)
self.assertEqual(len([x for x in net.edges if x.type == rdScaffoldNetwork.EdgeType.Generic]), 3)
self.assertEqual(
len([x for x in net.edges if x.type == rdScaffoldNetwork.EdgeType.RemoveAttachment]), 5)
len([x for x in net.edges if x.type == rdScaffoldNetwork.EdgeType.RemoveAttachment]), 3)
def test2Basics(self):
smis = ["c1ccccc1CC1NC(=O)CCC1", "c1cccnc1CC1NC(=O)CCC1"]
@@ -57,8 +57,8 @@ class TestScaffoldNetwork(unittest.TestCase):
net = rdScaffoldNetwork.CreateScaffoldNetwork(ms, params)
self.assertEqual(len(net.nodes), 7)
self.assertEqual(len(net.edges), 7)
self.assertEqual(
len([x for x in net.edges if x.type == rdScaffoldNetwork.EdgeType.Fragment]), 4)
self.assertEqual(len([x for x in net.edges if x.type == rdScaffoldNetwork.EdgeType.Fragment]),
4)
self.assertEqual(len([x for x in net.edges if x.type == rdScaffoldNetwork.EdgeType.Generic]), 3)
def test3Update(self):
@@ -73,23 +73,23 @@ class TestScaffoldNetwork(unittest.TestCase):
self.assertEqual(list(net.counts).count(1), len(net.counts))
rdScaffoldNetwork.UpdateScaffoldNetwork(ms[1:2], net, params)
self.assertEqual(len(net.nodes), 12)
self.assertEqual(len(net.edges), 12)
self.assertEqual(len(net.edges), 13)
self.assertEqual(len(net.counts), len(net.nodes))
self.assertEqual(len([x for x in net.edges if x.type == rdScaffoldNetwork.EdgeType.Fragment]),
4)
self.assertEqual(len([x for x in net.edges if x.type == rdScaffoldNetwork.EdgeType.Generic]), 6)
self.assertEqual(
len([x for x in net.edges if x.type == rdScaffoldNetwork.EdgeType.Fragment]), 4)
self.assertEqual(len([x for x in net.edges if x.type == rdScaffoldNetwork.EdgeType.Generic]), 3)
self.assertEqual(
len([x for x in net.edges if x.type == rdScaffoldNetwork.EdgeType.RemoveAttachment]), 5)
len([x for x in net.edges if x.type == rdScaffoldNetwork.EdgeType.RemoveAttachment]), 3)
net = rdScaffoldNetwork.CreateScaffoldNetwork(ms[0:1], params)
rdScaffoldNetwork.UpdateScaffoldNetwork(ms[1:2], net, params)
self.assertEqual(len(net.nodes), 12)
self.assertEqual(len(net.edges), 12)
self.assertEqual(len(net.edges), 13)
self.assertEqual(len([x for x in net.edges if x.type == rdScaffoldNetwork.EdgeType.Fragment]),
4)
self.assertEqual(len([x for x in net.edges if x.type == rdScaffoldNetwork.EdgeType.Generic]), 6)
self.assertEqual(
len([x for x in net.edges if x.type == rdScaffoldNetwork.EdgeType.Fragment]), 4)
self.assertEqual(len([x for x in net.edges if x.type == rdScaffoldNetwork.EdgeType.Generic]), 3)
self.assertEqual(
len([x for x in net.edges if x.type == rdScaffoldNetwork.EdgeType.RemoveAttachment]), 5)
len([x for x in net.edges if x.type == rdScaffoldNetwork.EdgeType.RemoveAttachment]), 3)
def test4Str(self):
smis = ["c1ccccc1CC1NC(=O)CCC1"]
@@ -136,14 +136,7 @@ class TestScaffoldNetwork(unittest.TestCase):
params.includeGenericBondScaffolds = True
net = rdScaffoldNetwork.CreateScaffoldNetwork(ms, params)
self.assertEqual(len(net.nodes), 11)
self.assertEqual(len(net.edges), 11)
self.assertEqual(len(net.edges), 10)
if __name__ == '__main__':

View File

@@ -198,12 +198,12 @@ TEST_CASE("addMolToNetwork", "[unittest, scaffolds]") {
CHECK(std::count_if(net.edges.begin(), net.edges.end(),
[](ScaffoldNetwork::NetworkEdge e) {
return e.type == ScaffoldNetwork::EdgeType::Generic;
}) == 2);
}) == 4);
CHECK(std::count_if(net.edges.begin(), net.edges.end(),
[](ScaffoldNetwork::NetworkEdge e) {
return e.type ==
ScaffoldNetwork::EdgeType::RemoveAttachment;
}) == 4);
}) == 2);
CHECK(std::count(net.counts.begin(), net.counts.end(), 1) ==
net.counts.size());
@@ -285,7 +285,7 @@ TEST_CASE("Network defaults", "[scaffolds]") {
ScaffoldNetwork::updateScaffoldNetwork(ms, net, ps);
CHECK(net.nodes.size() == 12);
CHECK(net.counts.size() == net.nodes.size());
CHECK(net.edges.size() == 12);
CHECK(net.edges.size() == 13);
CHECK(std::count_if(net.edges.begin(), net.edges.end(),
[](ScaffoldNetwork::NetworkEdge e) {
return e.type == ScaffoldNetwork::EdgeType::Fragment;
@@ -293,12 +293,12 @@ TEST_CASE("Network defaults", "[scaffolds]") {
CHECK(std::count_if(net.edges.begin(), net.edges.end(),
[](ScaffoldNetwork::NetworkEdge e) {
return e.type == ScaffoldNetwork::EdgeType::Generic;
}) == 3);
}) == 6);
CHECK(std::count_if(net.edges.begin(), net.edges.end(),
[](ScaffoldNetwork::NetworkEdge e) {
return e.type ==
ScaffoldNetwork::EdgeType::RemoveAttachment;
}) == 5);
}) == 3);
}
SECTION("don't remove attachments (makes sure parameters actually work)") {
ScaffoldNetwork::ScaffoldNetworkParams ps;
@@ -323,7 +323,7 @@ TEST_CASE("Network defaults", "[scaffolds]") {
ScaffoldNetwork::createScaffoldNetwork(ms, ps);
CHECK(net.nodes.size() == 12);
CHECK(net.counts.size() == net.nodes.size());
CHECK(net.edges.size() == 12);
CHECK(net.edges.size() == 13);
CHECK(std::count_if(net.edges.begin(), net.edges.end(),
[](ScaffoldNetwork::NetworkEdge e) {
return e.type == ScaffoldNetwork::EdgeType::Fragment;
@@ -331,12 +331,12 @@ TEST_CASE("Network defaults", "[scaffolds]") {
CHECK(std::count_if(net.edges.begin(), net.edges.end(),
[](ScaffoldNetwork::NetworkEdge e) {
return e.type == ScaffoldNetwork::EdgeType::Generic;
}) == 3);
}) == 6);
CHECK(std::count_if(net.edges.begin(), net.edges.end(),
[](ScaffoldNetwork::NetworkEdge e) {
return e.type ==
ScaffoldNetwork::EdgeType::RemoveAttachment;
}) == 5);
}) == 3);
}
}
TEST_CASE("ostream integration", "[scaffolds]") {
@@ -413,7 +413,7 @@ TEST_CASE("no attachment points", "[unittest, scaffolds]") {
ScaffoldNetwork::detail::addMolToNetwork(*m, net, ps);
CHECK(net.nodes.size() == 9);
CHECK(net.counts.size() == net.nodes.size());
CHECK(net.edges.size() == 9);
CHECK(net.edges.size() == 8);
CHECK(std::count_if(net.edges.begin(), net.edges.end(),
[](ScaffoldNetwork::NetworkEdge e) {
return e.type == ScaffoldNetwork::EdgeType::Fragment;
@@ -431,7 +431,7 @@ TEST_CASE("no attachment points", "[unittest, scaffolds]") {
[](ScaffoldNetwork::NetworkEdge e) {
return e.type ==
ScaffoldNetwork::EdgeType::GenericBond;
}) == 3);
}) == 2);
// std::copy(net.nodes.begin(), net.nodes.end(),
// std::ostream_iterator<std::string>(std::cerr, " "));
// std::cerr << std::endl;
@@ -440,6 +440,38 @@ TEST_CASE("no attachment points", "[unittest, scaffolds]") {
CHECK(std::find(net.nodes.begin(), net.nodes.end(), "**1*****1") !=
net.nodes.end());
}
SECTION("generic bonds 2") {
// this tests a very particular case where the generic bond scaffold is the
// same as the generic scaffold that leads to it. Make sure we do not end up
// with this kind of self edge
auto m = "Cc1ccccc1OC1C(C)C1"_smiles;
REQUIRE(m);
ScaffoldNetwork::ScaffoldNetworkParams ps;
ps.includeGenericBondScaffolds = true;
ScaffoldNetwork::ScaffoldNetwork net;
ScaffoldNetwork::detail::addMolToNetwork(*m, net, ps);
CHECK(net.nodes.size() == 14);
CHECK(net.counts.size() == net.nodes.size());
CHECK(net.edges.size() == 13);
CHECK(std::count_if(net.edges.begin(), net.edges.end(),
[](ScaffoldNetwork::NetworkEdge e) {
return e.type == ScaffoldNetwork::EdgeType::Fragment;
}) == 2);
CHECK(std::count_if(net.edges.begin(), net.edges.end(),
[](ScaffoldNetwork::NetworkEdge e) {
return e.type ==
ScaffoldNetwork::EdgeType::Initialize;
}) == 1);
CHECK(std::count_if(net.edges.begin(), net.edges.end(),
[](ScaffoldNetwork::NetworkEdge e) {
return e.type == ScaffoldNetwork::EdgeType::Generic;
}) == 5);
CHECK(std::count_if(net.edges.begin(), net.edges.end(),
[](ScaffoldNetwork::NetworkEdge e) {
return e.type ==
ScaffoldNetwork::EdgeType::GenericBond;
}) == 3);
}
SECTION("generic + no attach") {
auto m = "Cc1ccccc1OC1C(C)C1"_smiles;
REQUIRE(m);
@@ -465,12 +497,12 @@ TEST_CASE("no attachment points", "[unittest, scaffolds]") {
CHECK(std::count_if(net.edges.begin(), net.edges.end(),
[](ScaffoldNetwork::NetworkEdge e) {
return e.type == ScaffoldNetwork::EdgeType::Generic;
}) == 3);
}) == 5);
CHECK(std::count_if(net.edges.begin(), net.edges.end(),
[](ScaffoldNetwork::NetworkEdge e) {
return e.type ==
ScaffoldNetwork::EdgeType::RemoveAttachment;
}) == 4);
}) == 2);
// std::copy(net.nodes.begin(), net.nodes.end(),
// std::ostream_iterator<std::string>(std::cerr, " "));
// std::cerr << std::endl;
@@ -730,7 +762,7 @@ TEST_CASE("Serialization", "[serialization]") {
ScaffoldNetwork::updateScaffoldNetwork(ms, net, ps);
CHECK(net.nodes.size() == 12);
CHECK(net.counts.size() == net.nodes.size());
CHECK(net.edges.size() == 12);
CHECK(net.edges.size() == 13);
std::stringstream ss;
boost::archive::text_oarchive oa(ss);
oa << net;
@@ -742,7 +774,7 @@ TEST_CASE("Serialization", "[serialization]") {
ia >> net2;
CHECK(net2.nodes.size() == 12);
CHECK(net2.counts.size() == net2.nodes.size());
CHECK(net2.edges.size() == 12);
CHECK(net2.edges.size() == 13);
CHECK(net2.nodes == net.nodes);
CHECK(net2.counts == net.counts);
CHECK(net2.edges == net.edges);
@@ -753,7 +785,7 @@ TEST_CASE("Serialization", "[serialization]") {
ScaffoldNetwork::ScaffoldNetwork net2(pkl);
CHECK(net2.nodes.size() == 12);
CHECK(net2.counts.size() == net2.nodes.size());
CHECK(net2.edges.size() == 12);
CHECK(net2.edges.size() == 13);
CHECK(net2.nodes == net.nodes);
CHECK(net2.counts == net.counts);
CHECK(net2.edges == net.edges);