RGroupDecomposition fixes, keep userLabels more robust onlyMatchAtRGroups (#2202)

* Fix onlyMatchAtRGroups
 adjust queries wasn’t working

* Keep user RLabels if present in the core

* Fix tests

* Fix for review comments
This commit is contained in:
Brian Kelley
2019-01-02 12:57:34 -05:00
committed by Greg Landrum
parent a59997b7e4
commit 73e6b751ce
2 changed files with 97 additions and 14 deletions

View File

@@ -38,6 +38,7 @@
#include <GraphMol/ChemTransforms/ChemTransforms.h>
#include <GraphMol/FMCS/FMCS.h>
#include <boost/scoped_ptr.hpp>
#include <boost/dynamic_bitset.hpp>
#include <set>
#include <utility>
#include <vector>
@@ -170,9 +171,6 @@ bool RGroupDecompositionParameters::prepareCore(RWMol &core,
MolOps::AdjustQueryParameters adjustParams;
adjustParams.makeDummiesQueries = true;
adjustParams.adjustDegree = false;
adjustParams.adjustHeavyDegree = onlyMatchAtRGroups;
// if (onlyMatchAtRGroups)
// adjustParams.adjustDegreeFlags |= MolOps::ADJUST_IGNOREHS;
adjustQueryProperties(core, &adjustParams);
for (auto &it : atomToLabel)
@@ -577,13 +575,14 @@ struct RGroupDecompData {
auto atm = atoms.find(userLabel);
if (atm == atoms.end()) continue; // label not used in the rgroup
Atom *atom = atm->second;
mappings[userLabel] = ++count;
mappings[userLabel] = userLabel;
if(count < userLabel)
count = userLabel;
if (atom->getAtomicNum() == 0) { // add to existing dummy/rlabel
setRlabel(atom, count);
setRlabel(atom, userLabel);
} else { // adds new rlabel
auto *newAt = new Atom(0);
setRlabel(newAt, count);
setRlabel(newAt, userLabel);
atomsToAdd.push_back(std::make_pair(atom, newAt));
}
}
@@ -639,8 +638,6 @@ struct RGroupDecompData {
}
mol.setProp(done, true);
// std::cerr << "==> relabelling: " << mol.getProp<int>("idx") << " <++idx"
// << std::endl;
std::vector<std::pair<Atom *, Atom *>> atomsToAdd; // adds -R if necessary
@@ -823,6 +820,52 @@ int RGroupDecomposition::add(const ROMol &inmol) {
useChirality);
}
if (data->params.onlyMatchAtRGroups) {
// First find all the core atoms that have user
// label and but their indices into core_atoms_with_user_labels
std::set<int> core_atoms_with_user_labels;
for(auto atom : coreIt->second.atoms()) {
if(atom->hasProp(RLABEL)) {
core_atoms_with_user_labels.insert(atom->getIdx());
}
}
std::vector<MatchVectType> tmatches_filtered;
for(auto &mv : tmatches) {
bool passes_filter = true;
boost::dynamic_bitset<> target_match_indices(mol.getNumAtoms());
for(auto &match : mv) {
target_match_indices[match.second] = 1;
}
for(auto &match : mv) {
const Atom* atm= mol.getAtomWithIdx(match.second);
// is this a labelled rgroup or not?
if(core_atoms_with_user_labels.find(match.first) ==
core_atoms_with_user_labels.end()) {
// nope... if any neighbor is not part of the substructure
// make sure we are a hydrogen, otherwise, skip the match
for (const auto &nbri : boost::make_iterator_range(mol.getAtomNeighbors(atm))) {
const auto &nbr = mol[nbri];
if(nbr->getAtomicNum() != 1 && !target_match_indices[nbr->getIdx()]) {
passes_filter=false;
break;
}
}
}
if(!passes_filter)
break;
}
if (passes_filter) {
tmatches_filtered.push_back( mv );
}
}
tmatches = tmatches_filtered;
}
if (!tmatches.size()) {
continue;
} else {

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2017, Novartis Institutes for BioMedical Research Inc.
# Copyright (c) 2018, Novartis Institutes for BioMedical Research Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
@@ -41,7 +41,7 @@ from rdkit.Chem.rdRGroupDecomposition import RGroupDecompose, RGroupDecompositio
from collections import OrderedDict
class TestCase(unittest.TestCase) :
def atest_multicores(self):
def test_multicores(self):
cores_smi_easy = OrderedDict()
cores_smi_hard = OrderedDict()
@@ -162,9 +162,49 @@ C1CCO[C@@](S)(P)1
Chem.MolFromSmiles("CC")]
res, unmatched = RGroupDecompose(cores, mols)
self.assertEquals(len(res), 1)
self.assertEquals(unmatched, [0,1,2,4])
self.assertEqual(len(res), 1)
self.assertEqual(unmatched, [0,1,2,4])
def test_userlabels(self):
smis = ["C(Cl)N(N)O(O)"]
mols = [Chem.MolFromSmiles(smi) for smi in smis]
smarts = 'C([*:1])N([*:5])O([*:6])'
core = Chem.MolFromSmarts(smarts)
rg = RGroupDecomposition(core)
for m in mols:
rg.Add(m)
rg.Process()
self.assertEqual(rg.GetRGroupsAsColumns(asSmiles=True),
{'Core': ['C(N(O[*:6])[*:5])[*:1]'],
'R1': ['Cl[*:1]'],
'R5': ['[H]N([H])[*:5]'],
'R6': ['[H]O[*:6]']})
smarts = 'C([*:4])N([*:5])O([*:6])'
core = Chem.MolFromSmarts(smarts)
rg = RGroupDecomposition(core)
for m in mols:
rg.Add(m)
rg.Process()
self.assertEqual(rg.GetRGroupsAsColumns(asSmiles=True),
{'Core': ['C(N(O[*:6])[*:5])[*:4]'],
'R4': ['Cl[*:4]'],
'R5': ['[H]N([H])[*:5]'],
'R6': ['[H]O[*:6]']})
def test_match_only_at_rgroups(self):
smiles = ['c1ccccc1']#, 'c1(Cl)ccccc1', 'c1(Cl)cc(Br)ccc1']
mols = [Chem.MolFromSmiles(smi) for smi in smiles]
core1 = Chem.MolFromSmiles("c1([*:5])cc([*:6])ccc1")
params = RGroupDecompositionParameters()
params.onlyMatchAtRGroups=True
rg = RGroupDecomposition(core1, params)
for smi,m in zip(smiles,mols):
self.assertTrue(rg.Add(m)!=-1, smi)
if __name__ == '__main__':
unittest.main()