diff --git a/Code/GraphMol/RGroupDecomposition/RGroupDecomp.cpp b/Code/GraphMol/RGroupDecomposition/RGroupDecomp.cpp index b3acfd362..1df6c359d 100644 --- a/Code/GraphMol/RGroupDecomposition/RGroupDecomp.cpp +++ b/Code/GraphMol/RGroupDecomposition/RGroupDecomp.cpp @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -170,9 +171,6 @@ bool RGroupDecompositionParameters::prepareCore(RWMol &core, MolOps::AdjustQueryParameters adjustParams; adjustParams.makeDummiesQueries = true; adjustParams.adjustDegree = false; - adjustParams.adjustHeavyDegree = onlyMatchAtRGroups; - // if (onlyMatchAtRGroups) - // adjustParams.adjustDegreeFlags |= MolOps::ADJUST_IGNOREHS; adjustQueryProperties(core, &adjustParams); for (auto &it : atomToLabel) @@ -577,13 +575,14 @@ struct RGroupDecompData { auto atm = atoms.find(userLabel); if (atm == atoms.end()) continue; // label not used in the rgroup Atom *atom = atm->second; - mappings[userLabel] = ++count; - + mappings[userLabel] = userLabel; + if(count < userLabel) + count = userLabel; if (atom->getAtomicNum() == 0) { // add to existing dummy/rlabel - setRlabel(atom, count); + setRlabel(atom, userLabel); } else { // adds new rlabel auto *newAt = new Atom(0); - setRlabel(newAt, count); + setRlabel(newAt, userLabel); atomsToAdd.push_back(std::make_pair(atom, newAt)); } } @@ -639,8 +638,6 @@ struct RGroupDecompData { } mol.setProp(done, true); - // std::cerr << "==> relabelling: " << mol.getProp("idx") << " <++idx" - // << std::endl; std::vector> atomsToAdd; // adds -R if necessary @@ -823,6 +820,52 @@ int RGroupDecomposition::add(const ROMol &inmol) { useChirality); } + if (data->params.onlyMatchAtRGroups) { + // First find all the core atoms that have user + // label and but their indices into core_atoms_with_user_labels + std::set core_atoms_with_user_labels; + + for(auto atom : coreIt->second.atoms()) { + if(atom->hasProp(RLABEL)) { + core_atoms_with_user_labels.insert(atom->getIdx()); + } + } + + std::vector tmatches_filtered; + for(auto &mv : tmatches) { + bool passes_filter = true; + boost::dynamic_bitset<> target_match_indices(mol.getNumAtoms()); + for(auto &match : mv) { + target_match_indices[match.second] = 1; + } + + for(auto &match : mv) { + const Atom* atm= mol.getAtomWithIdx(match.second); + // is this a labelled rgroup or not? + if(core_atoms_with_user_labels.find(match.first) == + core_atoms_with_user_labels.end()) { + + // nope... if any neighbor is not part of the substructure + // make sure we are a hydrogen, otherwise, skip the match + for (const auto &nbri : boost::make_iterator_range(mol.getAtomNeighbors(atm))) { + const auto &nbr = mol[nbri]; + if(nbr->getAtomicNum() != 1 && !target_match_indices[nbr->getIdx()]) { + passes_filter=false; + break; + } + } + } + if(!passes_filter) + break; + } + + if (passes_filter) { + tmatches_filtered.push_back( mv ); + } + } + tmatches = tmatches_filtered; + } + if (!tmatches.size()) { continue; } else { diff --git a/Code/GraphMol/RGroupDecomposition/Wrap/test_rgroups.py b/Code/GraphMol/RGroupDecomposition/Wrap/test_rgroups.py index 4003ab978..135b345fa 100644 --- a/Code/GraphMol/RGroupDecomposition/Wrap/test_rgroups.py +++ b/Code/GraphMol/RGroupDecomposition/Wrap/test_rgroups.py @@ -1,4 +1,4 @@ -# Copyright (c) 2017, Novartis Institutes for BioMedical Research Inc. +# Copyright (c) 2018, Novartis Institutes for BioMedical Research Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -41,7 +41,7 @@ from rdkit.Chem.rdRGroupDecomposition import RGroupDecompose, RGroupDecompositio from collections import OrderedDict class TestCase(unittest.TestCase) : - def atest_multicores(self): + def test_multicores(self): cores_smi_easy = OrderedDict() cores_smi_hard = OrderedDict() @@ -162,9 +162,49 @@ C1CCO[C@@](S)(P)1 Chem.MolFromSmiles("CC")] res, unmatched = RGroupDecompose(cores, mols) - self.assertEquals(len(res), 1) - self.assertEquals(unmatched, [0,1,2,4]) - + self.assertEqual(len(res), 1) + self.assertEqual(unmatched, [0,1,2,4]) + + def test_userlabels(self): + smis = ["C(Cl)N(N)O(O)"] + mols = [Chem.MolFromSmiles(smi) for smi in smis] + smarts = 'C([*:1])N([*:5])O([*:6])' + core = Chem.MolFromSmarts(smarts) + rg = RGroupDecomposition(core) + for m in mols: + rg.Add(m) + rg.Process() + self.assertEqual(rg.GetRGroupsAsColumns(asSmiles=True), + {'Core': ['C(N(O[*:6])[*:5])[*:1]'], + 'R1': ['Cl[*:1]'], + 'R5': ['[H]N([H])[*:5]'], + 'R6': ['[H]O[*:6]']}) + + smarts = 'C([*:4])N([*:5])O([*:6])' + + core = Chem.MolFromSmarts(smarts) + rg = RGroupDecomposition(core) + for m in mols: + rg.Add(m) + rg.Process() + self.assertEqual(rg.GetRGroupsAsColumns(asSmiles=True), + {'Core': ['C(N(O[*:6])[*:5])[*:4]'], + 'R4': ['Cl[*:4]'], + 'R5': ['[H]N([H])[*:5]'], + 'R6': ['[H]O[*:6]']}) + + def test_match_only_at_rgroups(self): + smiles = ['c1ccccc1']#, 'c1(Cl)ccccc1', 'c1(Cl)cc(Br)ccc1'] + mols = [Chem.MolFromSmiles(smi) for smi in smiles] + + core1 = Chem.MolFromSmiles("c1([*:5])cc([*:6])ccc1") + params = RGroupDecompositionParameters() + params.onlyMatchAtRGroups=True + rg = RGroupDecomposition(core1, params) + for smi,m in zip(smiles,mols): + self.assertTrue(rg.Add(m)!=-1, smi) + + if __name__ == '__main__': unittest.main()