mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
Add hasQueryHs (#6702)
This commit is contained in:
@@ -1063,7 +1063,13 @@ ROMol *removeAllHs(const ROMol &mol, bool sanitize) {
|
||||
}
|
||||
|
||||
namespace {
|
||||
bool isQueryH(const Atom *atom) {
|
||||
enum class HydrogenType {
|
||||
NotAHydrogen,
|
||||
UnMergableQueryHydrogen,
|
||||
QueryHydrogen
|
||||
};
|
||||
|
||||
HydrogenType isQueryH(const Atom *atom) {
|
||||
PRECONDITION(atom, "bogus atom");
|
||||
if (atom->getAtomicNum() == 1) {
|
||||
// the simple case: the atom is flagged as being an H and
|
||||
@@ -1071,18 +1077,18 @@ bool isQueryH(const Atom *atom) {
|
||||
if (!atom->hasQuery() ||
|
||||
(!atom->getQuery()->getNegation() &&
|
||||
atom->getQuery()->getDescription() == "AtomAtomicNum")) {
|
||||
return true;
|
||||
return HydrogenType::QueryHydrogen;
|
||||
}
|
||||
}
|
||||
|
||||
if (!(atom->getDegree() <= 1)) {
|
||||
// bonded and unbonded H atoms will continue rest will be returned
|
||||
return false;
|
||||
return HydrogenType::NotAHydrogen;
|
||||
}
|
||||
|
||||
if (atom->hasQuery() && atom->getQuery()->getNegation()) {
|
||||
// we will not merge negated queries
|
||||
return false;
|
||||
return HydrogenType::NotAHydrogen;
|
||||
}
|
||||
|
||||
bool hasHQuery = false, hasOr = false;
|
||||
@@ -1119,10 +1125,10 @@ bool isQueryH(const Atom *atom) {
|
||||
"in ORs is not supported. This query will not "
|
||||
"be merged"
|
||||
<< std::endl;
|
||||
return false;
|
||||
return HydrogenType::UnMergableQueryHydrogen;
|
||||
}
|
||||
}
|
||||
return hasHQuery;
|
||||
return hasHQuery ? HydrogenType::QueryHydrogen : HydrogenType::NotAHydrogen;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
@@ -1145,7 +1151,7 @@ void mergeQueryHs(RWMol &mol, bool mergeUnmappedOnly, bool mergeIsotopes) {
|
||||
|
||||
boost::dynamic_bitset<> hatoms(mol.getNumAtoms());
|
||||
for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) {
|
||||
hatoms[i] = isQueryH(mol.getAtomWithIdx(i));
|
||||
hatoms[i] = isQueryH(mol.getAtomWithIdx(i)) == HydrogenType::QueryHydrogen;
|
||||
}
|
||||
unsigned int currIdx = 0, stopIdx = mol.getNumAtoms();
|
||||
while (currIdx < stopIdx) {
|
||||
@@ -1263,5 +1269,56 @@ bool needsHs(const ROMol &mol) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::pair<bool,bool> hasQueryHs(const ROMol &mol) {
|
||||
bool queryHs = false;
|
||||
// We don't care about announcing ORs or other items during isQueryH
|
||||
RDLog::LogStateSetter blocker;
|
||||
|
||||
for (const auto atom : mol.atoms()) {
|
||||
switch (isQueryH(atom)) {
|
||||
case HydrogenType::UnMergableQueryHydrogen:
|
||||
return std::make_pair(true, true);
|
||||
case HydrogenType::QueryHydrogen:
|
||||
queryHs = true;
|
||||
break;
|
||||
default: // HydrogenType::NotAHydrogen:
|
||||
break;
|
||||
}
|
||||
if (atom->hasQuery()) {
|
||||
if (atom->getQuery()->getDescription() == "RecursiveStructure") {
|
||||
auto *rsq = dynamic_cast<RecursiveStructureQuery *>(atom->getQuery());
|
||||
CHECK_INVARIANT(rsq, "could not convert recursive structure query");
|
||||
auto res = hasQueryHs(*rsq->getQueryMol());
|
||||
if(res.second) { // unmergableH implies queryH
|
||||
return res;
|
||||
}
|
||||
queryHs |= res.first;
|
||||
}
|
||||
|
||||
// FIX: shouldn't be repeating this code here -- yet again!
|
||||
std::list<QueryAtom::QUERYATOM_QUERY::CHILD_TYPE> childStack(
|
||||
atom->getQuery()->beginChildren(), atom->getQuery()->endChildren());
|
||||
while (!childStack.empty()) {
|
||||
QueryAtom::QUERYATOM_QUERY::CHILD_TYPE qry = childStack.front();
|
||||
childStack.pop_front();
|
||||
if (qry->getDescription() == "RecursiveStructure") {
|
||||
auto *rsq = dynamic_cast<RecursiveStructureQuery *>(qry.get());
|
||||
CHECK_INVARIANT(rsq, "could not convert recursive structure query");
|
||||
auto res = hasQueryHs(*rsq->getQueryMol());
|
||||
if(res.second) {
|
||||
return res;
|
||||
}
|
||||
queryHs |= res.first;
|
||||
} else {
|
||||
childStack.insert(childStack.end(), qry->beginChildren(),
|
||||
qry->endChildren());
|
||||
}
|
||||
}
|
||||
}
|
||||
} // end of recursion loop
|
||||
|
||||
return std::make_pair(queryHs, false);
|
||||
}
|
||||
|
||||
} // namespace MolOps
|
||||
} // namespace RDKit
|
||||
|
||||
@@ -309,6 +309,18 @@ RDKIT_GRAPHMOL_EXPORT void mergeQueryHs(RWMol &mol,
|
||||
bool mergeUnmappedOnly = false,
|
||||
bool mergeIsotopes = false);
|
||||
|
||||
//! returns a pair of booleans (hasQueryHs, hasUnmergaebleQueryHs)
|
||||
/*!
|
||||
This is really intended to be used with molecules that contain QueryAtoms
|
||||
such as when checking smarts patterns for explicit hydrogens
|
||||
|
||||
|
||||
\param mol the molecule to check for query Hs from
|
||||
\return std::pair if pair.first is true if the molecule has query hydrogens, if pair.second
|
||||
is true, the queryHs cannot be removed my mergeQueryHs
|
||||
*/
|
||||
RDKIT_GRAPHMOL_EXPORT std::pair<bool,bool> hasQueryHs(const ROMol &mol);
|
||||
|
||||
typedef enum {
|
||||
ADJUST_IGNORENONE = 0x0,
|
||||
ADJUST_IGNORECHAINS = 0x1,
|
||||
|
||||
@@ -964,6 +964,14 @@ ROMol *molzipHelper(python::object &pmols, const MolzipParams &p) {
|
||||
return molzip(*mols, p).release();
|
||||
}
|
||||
|
||||
python::tuple hasQueryHsHelper(const ROMol &m) {
|
||||
python::list res;
|
||||
auto hashs = MolOps::hasQueryHs(m);
|
||||
res.append(hashs.first);
|
||||
res.append(hashs.second);
|
||||
return python::tuple(res);
|
||||
}
|
||||
|
||||
// we can really only set some of these types from C++ which means
|
||||
// we need a helper function for testing that we can read them
|
||||
// correctly.
|
||||
@@ -1333,6 +1341,19 @@ struct molops_wrapper {
|
||||
"merges hydrogens into their neighboring atoms as queries",
|
||||
python::return_value_policy<python::manage_new_object>());
|
||||
|
||||
docString =
|
||||
"Check to see if the molecule has query Hs, this is normally used on query molecules\n\
|
||||
such as thos returned from MolFromSmarts\n\
|
||||
Example: \n\
|
||||
(hasQueryHs, hasUnmergableQueryHs) = HasQueryHs(mol)\n\
|
||||
\n\
|
||||
if hasUnmergableQueryHs, these query hs cannot be removed by calling\n\
|
||||
MergeQueryHs";
|
||||
python::def("HasQueryHs", hasQueryHsHelper,
|
||||
python::arg("mol"),
|
||||
docString.c_str());
|
||||
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
docString =
|
||||
"Removes atoms matching a substructure query from a molecule\n\
|
||||
|
||||
@@ -7347,6 +7347,17 @@ CAS<~>
|
||||
self.assertEqual(sgs[1].GetGroupType(), Chem.StereoGroupType.STEREO_OR)
|
||||
self.assertEqual(len(sgs[1].GetAtoms()), 1)
|
||||
|
||||
def testHasQueryHs(self):
|
||||
for sma, hasQHs in [
|
||||
("[#1]", (True, False)),
|
||||
("[#1,N]", (True, True)),
|
||||
("[$(C-[H])]", (True, False)),
|
||||
("[$([C,#1])]", (True, True)),
|
||||
("[$(c([C;!R;!$(C-[N,O,S]);!$(C-[H])](=O))1naaaa1),$(c([C;!R;!$(C-[N,O,S]);!$(C-[H])](=O))1naa[n,s,o]1)]",
|
||||
(True, False))]:
|
||||
pat = Chem.MolFromSmarts(sma)
|
||||
self.assertEqual(Chem.HasQueryHs(pat), hasQHs)
|
||||
|
||||
def testMrvHandling(self):
|
||||
fn1 = os.path.join(RDConfig.RDBaseDir,'Code','GraphMol','MarvinParse','test_data','aspirin.mrv')
|
||||
mol = Chem.MolFromMrvFile(fn1)
|
||||
@@ -7369,7 +7380,6 @@ CAS<~>
|
||||
self.assertEqual(mol.GetNumAtoms(),13)
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if "RDTESTCASE" in os.environ:
|
||||
suite = unittest.TestSuite()
|
||||
|
||||
@@ -8437,6 +8437,37 @@ void testGithub5099() {
|
||||
TEST_ASSERT(m->getNumAtoms() == 5);
|
||||
}
|
||||
|
||||
void testHasQueryHs() {
|
||||
BOOST_LOG(rdInfoLog)
|
||||
<< "-----------------------\n Testing hasQueryHs "
|
||||
<< std::endl;
|
||||
const auto has_no_query_hs = std::make_pair(false, false);
|
||||
const auto has_only_query_hs = std::make_pair(true, false);
|
||||
const auto has_unmergeable_hs = std::make_pair(true, true);
|
||||
|
||||
auto m0 = "CCCC"_smarts;
|
||||
TEST_ASSERT(RDKit::MolOps::hasQueryHs(*m0) == has_no_query_hs);
|
||||
|
||||
auto m = "[#1]"_smarts;
|
||||
TEST_ASSERT(RDKit::MolOps::hasQueryHs(*m) == has_only_query_hs);
|
||||
|
||||
auto m2 = "[#1,N]"_smarts;
|
||||
TEST_ASSERT(RDKit::MolOps::hasQueryHs(*m2) == has_unmergeable_hs);
|
||||
|
||||
//remove the negation
|
||||
auto recursive = "[$(C-[H])]"_smarts;
|
||||
TEST_ASSERT(RDKit::MolOps::hasQueryHs(*recursive) == has_only_query_hs);
|
||||
|
||||
auto recursive_or = "[$([C,#1])]"_smarts;
|
||||
TEST_ASSERT(RDKit::MolOps::hasQueryHs(*recursive_or) == has_unmergeable_hs);
|
||||
|
||||
// from rd_filters for something bigger
|
||||
auto keto_def_heterocycle = "[$(c([C;!R;!$(C-[N,O,S]);!$(C-[H])](=O))1naaaa1),$(c([C;!R;!$(C-[N,O,S]);!$(C-[H])](=O))1naa[n,s,o]1)]"_smarts;
|
||||
TEST_ASSERT(RDKit::MolOps::hasQueryHs(*keto_def_heterocycle) == has_only_query_hs);
|
||||
|
||||
BOOST_LOG(rdInfoLog) << "\tdone" << std::endl;
|
||||
|
||||
}
|
||||
int main() {
|
||||
RDLog::InitLogs();
|
||||
// boost::logging::enable_logs("rdApp.debug");
|
||||
@@ -8553,6 +8584,6 @@ int main() {
|
||||
testSetTerminalAtomCoords();
|
||||
testGet3DDistanceMatrix();
|
||||
testGithub5099();
|
||||
|
||||
testHasQueryHs();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
%include "std_pair.i"
|
||||
|
||||
%{
|
||||
#include <GraphMol/MolOps.h>
|
||||
@@ -46,6 +47,7 @@
|
||||
%ignore RDKit::MolOps::detectChemistryProblems;
|
||||
%include <GraphMol/MolOps.h>
|
||||
%ignore RDKit::MolOps::sanitizeMol(RWMol &,unsigned int &,unsigned int &);
|
||||
%template(BoolPair) std::pair<bool, bool>;
|
||||
|
||||
%inline %{
|
||||
int sanitizeMol(RDKit::RWMol &mol,int sanitizeOps){
|
||||
|
||||
@@ -63,6 +63,19 @@ public class MolQueryTests extends GraphMolTest {
|
||||
assertTrue(m1.hasSubstructMatch(aqmol));
|
||||
assertFalse(m2.hasSubstructMatch(aqmol));
|
||||
}
|
||||
@Test public void testHasQueryHs() {
|
||||
ROMol m2;
|
||||
m2 = RWMol.MolFromSmarts("[#1]",0,false);
|
||||
BoolPair res = RDKFuncs.hasQueryHs(m2);
|
||||
assertTrue(res.getFirst());
|
||||
assertFalse(res.getSecond());
|
||||
|
||||
m2 = RWMol.MolFromSmarts("[#1,C]",0,false);
|
||||
res = RDKFuncs.hasQueryHs(m2);
|
||||
assertTrue(res.getFirst());
|
||||
assertTrue(res.getSecond());
|
||||
}
|
||||
|
||||
public static void main(String args[]) {
|
||||
org.junit.runner.JUnitCore.main("org.RDKit.BasicMoleculeTests");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user