// // Copyright (C) 2013 Greg Landrum // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include #include #include #include "ReducedGraphs.h" #include #include #include namespace RDKit{ namespace { // FIX: this is duplicated here and in the MorganFingerprints code class ss_matcher { public: ss_matcher() {}; ss_matcher(const std::string &pattern){ RDKit::RWMol *p=RDKit::SmartsToMol(pattern); TEST_ASSERT(p); m_matcher.reset(p); }; //const RDKit::ROMOL_SPTR &getMatcher() const { return m_matcher; }; const RDKit::ROMol *getMatcher() const { return m_matcher.get(); }; private: RDKit::ROMOL_SPTR m_matcher; }; } namespace ReducedGraphs { // Definitions for feature points adapted from: // Gobbi and Poppinger, Biotech. Bioeng. _61_ 47-54 (1998) const char *smartsPatterns[4]={ "[$([N;!H0;v3,v4&+1]),\ $([O,S;H1;+0]),\ n&H1&+0]", // Donor "[$([O,S;H1;v2;!$(*-*=[O,N,P,S])]),\ $([O,S;H0;v2]),\ $([O,S;-]),\ $([N;v3;!$(N-*=[O,N,P,S])]),\ n&H0&+0,\ $([o,s;+0;!$([o,s]:n);!$([o,s]:c:n)])]", // Acceptor "[#7;+,\ $([N;H2&+0][$([C,a]);!$([C,a](=O))]),\ $([N;H1&+0]([$([C,a]);!$([C,a](=O))])[$([C,a]);!$([C,a](=O))]),\ $([N;H0&+0]([C;!$(C(=O))])([C;!$(C(=O))])[C;!$(C(=O))])]", // Positive "[$([C,S](=[O,S,P])-[O;H1,-1])]" // Negative }; std::vector defaultFeatureSmarts(smartsPatterns,smartsPatterns+4); typedef boost::flyweight,boost::flyweights::no_tracking > pattern_flyweight; void getErGAtomTypes(const ROMol &mol, std::vector > &types, std::vector *patterns=0){ unsigned int nAtoms=mol.getNumAtoms(); if(!patterns){ std::vector featureMatchers; featureMatchers.reserve(defaultFeatureSmarts.size()); for(std::vector::const_iterator smaIt=defaultFeatureSmarts.begin(); smaIt!=defaultFeatureSmarts.end();++smaIt){ const ROMol *matcher=pattern_flyweight(*smaIt).get().getMatcher(); CHECK_INVARIANT(matcher,"bad smarts"); featureMatchers.push_back(matcher); } patterns=&featureMatchers; } types.resize(patterns->size()); for(unsigned int i=0;isize();++i){ types[i].resize(nAtoms); types[i].reset(); unsigned int mask=1< matchVect; // to maintain thread safety, we have to copy the pattern // molecules: SubstructMatch(mol,ROMol(*(*patterns)[i],true),matchVect); for(std::vector::const_iterator mvIt=matchVect.begin(); mvIt!=matchVect.end();++mvIt){ types[i].set((*mvIt)[0].second); } } } // end of getAtomTypes; } // end of namespace ReducedGraphs ROMol *createMolExtendedReducedGraph(const ROMol &mol, std::vector > *atomTypes ){ std::vector > *latomTypes=0; if(!atomTypes){ latomTypes = new std::vector >(); atomTypes = latomTypes; ReducedGraphs::getErGAtomTypes(mol,*atomTypes); } RWMol *res = new RWMol(mol); const int aromaticFlag = atomTypes->size(); const int aliphaticFlag = atomTypes->size()+1; for(ROMol::AtomIterator atIt=res->beginAtoms();atIt!=res->endAtoms();++atIt){ std::list tv; for(unsigned int i=0;isize();++i){ if((*atomTypes)[i][(*atIt)->getIdx()]) tv.push_back(i); } (*atIt)->setProp("_ErGAtomTypes",tv,true); } // start by adding dummies at the ring centroids BOOST_FOREACH(const INT_VECT &ring,mol.getRingInfo()->atomRings()){ if(ring.size()<8){ int nIdx=res->addAtom(new Atom(0),false,true)-1; int nAromatic=0,nSP2=0; BOOST_FOREACH(int idx,ring){ res->addBond(idx,nIdx,Bond::SINGLE); if(mol.getAtomWithIdx(idx)->getIsAromatic()){ ++nAromatic; } else if(mol.getAtomWithIdx(idx)->getHybridization()==Atom::SP2) { ++nSP2; } } std::list tv; if(nAromatic>=2 || nSP2 >= ring.size()/2) tv.push_back(aromaticFlag); else tv.push_back(aliphaticFlag); res->getAtomWithIdx(nIdx)->setProp("_ErGAtomTypes",tv,true); } } // now remove any degree-two ring atoms that have no features: for(unsigned int i=mol.getNumAtoms()-1;i>=0;++i){ if(mol.getRingInfo()->numAtomRings(i) && mol.getAtomWithIdx(i)->getDegree()==2){ std::list li; mol.getAtomWithIdx(i)->getProp("_ErGAtomTypes",li); if(li.empty()){ res->removeAtom(i); } } } // FIX: still need to do the "highly fused rings" simplification for things like adamantane if(latomTypes) delete latomTypes; return res; } } // end of namespace RDKit