Add hasQueryHs (#6702)

This commit is contained in:
Brian Kelley
2023-09-15 02:59:56 -04:00
committed by GitHub
parent 9249ca5cc8
commit cccee15a91
7 changed files with 155 additions and 9 deletions

View File

@@ -1063,7 +1063,13 @@ ROMol *removeAllHs(const ROMol &mol, bool sanitize) {
}
namespace {
bool isQueryH(const Atom *atom) {
enum class HydrogenType {
NotAHydrogen,
UnMergableQueryHydrogen,
QueryHydrogen
};
HydrogenType isQueryH(const Atom *atom) {
PRECONDITION(atom, "bogus atom");
if (atom->getAtomicNum() == 1) {
// the simple case: the atom is flagged as being an H and
@@ -1071,18 +1077,18 @@ bool isQueryH(const Atom *atom) {
if (!atom->hasQuery() ||
(!atom->getQuery()->getNegation() &&
atom->getQuery()->getDescription() == "AtomAtomicNum")) {
return true;
return HydrogenType::QueryHydrogen;
}
}
if (!(atom->getDegree() <= 1)) {
// bonded and unbonded H atoms will continue rest will be returned
return false;
return HydrogenType::NotAHydrogen;
}
if (atom->hasQuery() && atom->getQuery()->getNegation()) {
// we will not merge negated queries
return false;
return HydrogenType::NotAHydrogen;
}
bool hasHQuery = false, hasOr = false;
@@ -1119,10 +1125,10 @@ bool isQueryH(const Atom *atom) {
"in ORs is not supported. This query will not "
"be merged"
<< std::endl;
return false;
return HydrogenType::UnMergableQueryHydrogen;
}
}
return hasHQuery;
return hasHQuery ? HydrogenType::QueryHydrogen : HydrogenType::NotAHydrogen;
}
} // namespace
@@ -1145,7 +1151,7 @@ void mergeQueryHs(RWMol &mol, bool mergeUnmappedOnly, bool mergeIsotopes) {
boost::dynamic_bitset<> hatoms(mol.getNumAtoms());
for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) {
hatoms[i] = isQueryH(mol.getAtomWithIdx(i));
hatoms[i] = isQueryH(mol.getAtomWithIdx(i)) == HydrogenType::QueryHydrogen;
}
unsigned int currIdx = 0, stopIdx = mol.getNumAtoms();
while (currIdx < stopIdx) {
@@ -1263,5 +1269,56 @@ bool needsHs(const ROMol &mol) {
return false;
}
std::pair<bool,bool> hasQueryHs(const ROMol &mol) {
bool queryHs = false;
// We don't care about announcing ORs or other items during isQueryH
RDLog::LogStateSetter blocker;
for (const auto atom : mol.atoms()) {
switch (isQueryH(atom)) {
case HydrogenType::UnMergableQueryHydrogen:
return std::make_pair(true, true);
case HydrogenType::QueryHydrogen:
queryHs = true;
break;
default: // HydrogenType::NotAHydrogen:
break;
}
if (atom->hasQuery()) {
if (atom->getQuery()->getDescription() == "RecursiveStructure") {
auto *rsq = dynamic_cast<RecursiveStructureQuery *>(atom->getQuery());
CHECK_INVARIANT(rsq, "could not convert recursive structure query");
auto res = hasQueryHs(*rsq->getQueryMol());
if(res.second) { // unmergableH implies queryH
return res;
}
queryHs |= res.first;
}
// FIX: shouldn't be repeating this code here -- yet again!
std::list<QueryAtom::QUERYATOM_QUERY::CHILD_TYPE> childStack(
atom->getQuery()->beginChildren(), atom->getQuery()->endChildren());
while (!childStack.empty()) {
QueryAtom::QUERYATOM_QUERY::CHILD_TYPE qry = childStack.front();
childStack.pop_front();
if (qry->getDescription() == "RecursiveStructure") {
auto *rsq = dynamic_cast<RecursiveStructureQuery *>(qry.get());
CHECK_INVARIANT(rsq, "could not convert recursive structure query");
auto res = hasQueryHs(*rsq->getQueryMol());
if(res.second) {
return res;
}
queryHs |= res.first;
} else {
childStack.insert(childStack.end(), qry->beginChildren(),
qry->endChildren());
}
}
}
} // end of recursion loop
return std::make_pair(queryHs, false);
}
} // namespace MolOps
} // namespace RDKit

View File

@@ -309,6 +309,18 @@ RDKIT_GRAPHMOL_EXPORT void mergeQueryHs(RWMol &mol,
bool mergeUnmappedOnly = false,
bool mergeIsotopes = false);
//! returns a pair of booleans (hasQueryHs, hasUnmergaebleQueryHs)
/*!
This is really intended to be used with molecules that contain QueryAtoms
such as when checking smarts patterns for explicit hydrogens
\param mol the molecule to check for query Hs from
\return std::pair if pair.first is true if the molecule has query hydrogens, if pair.second
is true, the queryHs cannot be removed my mergeQueryHs
*/
RDKIT_GRAPHMOL_EXPORT std::pair<bool,bool> hasQueryHs(const ROMol &mol);
typedef enum {
ADJUST_IGNORENONE = 0x0,
ADJUST_IGNORECHAINS = 0x1,

View File

@@ -964,6 +964,14 @@ ROMol *molzipHelper(python::object &pmols, const MolzipParams &p) {
return molzip(*mols, p).release();
}
python::tuple hasQueryHsHelper(const ROMol &m) {
python::list res;
auto hashs = MolOps::hasQueryHs(m);
res.append(hashs.first);
res.append(hashs.second);
return python::tuple(res);
}
// we can really only set some of these types from C++ which means
// we need a helper function for testing that we can read them
// correctly.
@@ -1333,6 +1341,19 @@ struct molops_wrapper {
"merges hydrogens into their neighboring atoms as queries",
python::return_value_policy<python::manage_new_object>());
docString =
"Check to see if the molecule has query Hs, this is normally used on query molecules\n\
such as thos returned from MolFromSmarts\n\
Example: \n\
(hasQueryHs, hasUnmergableQueryHs) = HasQueryHs(mol)\n\
\n\
if hasUnmergableQueryHs, these query hs cannot be removed by calling\n\
MergeQueryHs";
python::def("HasQueryHs", hasQueryHsHelper,
python::arg("mol"),
docString.c_str());
// ------------------------------------------------------------------------
docString =
"Removes atoms matching a substructure query from a molecule\n\

View File

@@ -7347,6 +7347,17 @@ CAS<~>
self.assertEqual(sgs[1].GetGroupType(), Chem.StereoGroupType.STEREO_OR)
self.assertEqual(len(sgs[1].GetAtoms()), 1)
def testHasQueryHs(self):
for sma, hasQHs in [
("[#1]", (True, False)),
("[#1,N]", (True, True)),
("[$(C-[H])]", (True, False)),
("[$([C,#1])]", (True, True)),
("[$(c([C;!R;!$(C-[N,O,S]);!$(C-[H])](=O))1naaaa1),$(c([C;!R;!$(C-[N,O,S]);!$(C-[H])](=O))1naa[n,s,o]1)]",
(True, False))]:
pat = Chem.MolFromSmarts(sma)
self.assertEqual(Chem.HasQueryHs(pat), hasQHs)
def testMrvHandling(self):
fn1 = os.path.join(RDConfig.RDBaseDir,'Code','GraphMol','MarvinParse','test_data','aspirin.mrv')
mol = Chem.MolFromMrvFile(fn1)
@@ -7369,7 +7380,6 @@ CAS<~>
self.assertEqual(mol.GetNumAtoms(),13)
if __name__ == '__main__':
if "RDTESTCASE" in os.environ:
suite = unittest.TestSuite()

View File

@@ -8437,6 +8437,37 @@ void testGithub5099() {
TEST_ASSERT(m->getNumAtoms() == 5);
}
void testHasQueryHs() {
BOOST_LOG(rdInfoLog)
<< "-----------------------\n Testing hasQueryHs "
<< std::endl;
const auto has_no_query_hs = std::make_pair(false, false);
const auto has_only_query_hs = std::make_pair(true, false);
const auto has_unmergeable_hs = std::make_pair(true, true);
auto m0 = "CCCC"_smarts;
TEST_ASSERT(RDKit::MolOps::hasQueryHs(*m0) == has_no_query_hs);
auto m = "[#1]"_smarts;
TEST_ASSERT(RDKit::MolOps::hasQueryHs(*m) == has_only_query_hs);
auto m2 = "[#1,N]"_smarts;
TEST_ASSERT(RDKit::MolOps::hasQueryHs(*m2) == has_unmergeable_hs);
//remove the negation
auto recursive = "[$(C-[H])]"_smarts;
TEST_ASSERT(RDKit::MolOps::hasQueryHs(*recursive) == has_only_query_hs);
auto recursive_or = "[$([C,#1])]"_smarts;
TEST_ASSERT(RDKit::MolOps::hasQueryHs(*recursive_or) == has_unmergeable_hs);
// from rd_filters for something bigger
auto keto_def_heterocycle = "[$(c([C;!R;!$(C-[N,O,S]);!$(C-[H])](=O))1naaaa1),$(c([C;!R;!$(C-[N,O,S]);!$(C-[H])](=O))1naa[n,s,o]1)]"_smarts;
TEST_ASSERT(RDKit::MolOps::hasQueryHs(*keto_def_heterocycle) == has_only_query_hs);
BOOST_LOG(rdInfoLog) << "\tdone" << std::endl;
}
int main() {
RDLog::InitLogs();
// boost::logging::enable_logs("rdApp.debug");
@@ -8553,6 +8584,6 @@ int main() {
testSetTerminalAtomCoords();
testGet3DDistanceMatrix();
testGithub5099();
testHasQueryHs();
return 0;
}

View File

@@ -29,6 +29,7 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
%include "std_pair.i"
%{
#include <GraphMol/MolOps.h>
@@ -46,6 +47,7 @@
%ignore RDKit::MolOps::detectChemistryProblems;
%include <GraphMol/MolOps.h>
%ignore RDKit::MolOps::sanitizeMol(RWMol &,unsigned int &,unsigned int &);
%template(BoolPair) std::pair<bool, bool>;
%inline %{
int sanitizeMol(RDKit::RWMol &mol,int sanitizeOps){

View File

@@ -63,6 +63,19 @@ public class MolQueryTests extends GraphMolTest {
assertTrue(m1.hasSubstructMatch(aqmol));
assertFalse(m2.hasSubstructMatch(aqmol));
}
@Test public void testHasQueryHs() {
ROMol m2;
m2 = RWMol.MolFromSmarts("[#1]",0,false);
BoolPair res = RDKFuncs.hasQueryHs(m2);
assertTrue(res.getFirst());
assertFalse(res.getSecond());
m2 = RWMol.MolFromSmarts("[#1,C]",0,false);
res = RDKFuncs.hasQueryHs(m2);
assertTrue(res.getFirst());
assertTrue(res.getSecond());
}
public static void main(String args[]) {
org.junit.runner.JUnitCore.main("org.RDKit.BasicMoleculeTests");
}