Files
rdkit/Code/GraphMol/Descriptors/Lipinski.cpp
2015-10-18 13:41:03 -04:00

390 lines
14 KiB
C++

//
// Copyright (C) 2011-2013 Greg Landrum
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include <GraphMol/RDKitBase.h>
#include <GraphMol/MolPickler.h>
#include <GraphMol/Descriptors/MolDescriptors.h>
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <GraphMol/Substruct/SubstructMatch.h>
#include <RDGeneral/types.h>
#include <RDGeneral/BoostStartInclude.h>
#include <boost/dynamic_bitset.hpp>
#include <boost/foreach.hpp>
#include <boost/flyweight.hpp>
#include <boost/flyweight/key_value.hpp>
#include <boost/flyweight/no_tracking.hpp>
#include <RDGeneral/BoostEndInclude.h>
#include <vector>
#include <string>
namespace {
class ss_matcher {
public:
ss_matcher(const std::string &pattern) : m_pattern(pattern){
m_needCopies=(pattern.find_first_of("$")!=std::string::npos);
RDKit::RWMol *p=RDKit::SmartsToMol(pattern);
m_matcher=p;
POSTCONDITION(m_matcher,"no matcher");
};
const RDKit::ROMol *getMatcher() const { return m_matcher; };
unsigned int countMatches(const RDKit::ROMol &mol) const {
PRECONDITION(m_matcher,"no matcher");
std::vector<RDKit::MatchVectType> matches;
// This is an ugly one. Recursive queries aren't thread safe.
// Unfortunately we have to take a performance hit here in order
// to guarantee thread safety
if(m_needCopies){
const RDKit::ROMol nm(*(m_matcher),true);
RDKit::SubstructMatch(mol,nm,matches);
} else {
const RDKit::ROMol &nm=*m_matcher;
RDKit::SubstructMatch(mol,nm,matches);
}
return matches.size();
}
~ss_matcher() { delete m_matcher; };
private:
ss_matcher() : m_pattern(""), m_needCopies(false), m_matcher(0) {};
std::string m_pattern;
bool m_needCopies;
const RDKit::ROMol *m_matcher;
};
}
typedef boost::flyweight<boost::flyweights::key_value<std::string,ss_matcher>,boost::flyweights::no_tracking > pattern_flyweight;
#define SMARTSCOUNTFUNC(nm,pattern,vers) \
const std::string nm ## Version =vers; \
unsigned int calc##nm(const RDKit::ROMol &mol){ \
pattern_flyweight m(pattern); \
return m.get().countMatches(mol); \
} \
extern int no_such_variable
namespace RDKit{
namespace Descriptors {
unsigned int calcLipinskiHBA(const ROMol &mol){
unsigned int res=0;
for(ROMol::ConstAtomIterator iter=mol.beginAtoms();
iter!=mol.endAtoms();++iter){
if((*iter)->getAtomicNum()==7 || (*iter)->getAtomicNum()==8) ++res;
}
return res;
}
unsigned int calcLipinskiHBD(const ROMol &mol){
unsigned int res=0;
for(ROMol::ConstAtomIterator iter=mol.beginAtoms();
iter!=mol.endAtoms();++iter){
if( ((*iter)->getAtomicNum()==7 || (*iter)->getAtomicNum()==8) ) {
res += (*iter)->getTotalNumHs(true);
}
}
return res;
}
const std::string NumRotatableBondsVersion="2.0.0";
unsigned int calcNumRotatableBonds(const ROMol &mol,bool strict){
if(strict){
std::string strict_pattern="[!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1])&!$([#7,O,S!D1]-!@[CD3]=[N,O,S])&!$([CD3](=[N+])-!@[#7!D1])&!$([#7!D1]-!@[CD3]=[N+])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]";
pattern_flyweight m(strict_pattern);
return m.get().countMatches(mol);
} else {
std::string pattern="[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]";
pattern_flyweight m(pattern);
return m.get().countMatches(mol);
}
}
//SMARTSCOUNTFUNC(NumHBD, "[$([N;!H0;v3]),$([N;!H0;+1;v4]),$([O,S;H1;+0]),$([n;H1;+0])]","2.0.1" ) ;
SMARTSCOUNTFUNC(NumHBD, "[N&!H0&v3,N&!H0&+1&v4,O&H1&+0,S&H1&+0,n&H1&+0]","2.0.1" ) ;
SMARTSCOUNTFUNC(NumHBA, "[$([O,S;H1;v2]-[!$(*=[O,N,P,S])]),$([O,S;H0;v2]),$([O,S;-]),$([N;v3;!$(N-*=!@[O,N,P,S])]),$([nH0,o,s;+0])]","2.0.1") ;
SMARTSCOUNTFUNC(NumHeteroatoms,"[!#6;!#1]","1.0.1") ;
SMARTSCOUNTFUNC(NumAmideBonds,"C(=[O;!R])N","1.0.0") ;
const std::string NumRingsVersion="1.0.1";
unsigned int calcNumRings(const ROMol &mol){
return mol.getRingInfo()->numRings();
}
const std::string FractionCSP3Version="1.0.0";
double calcFractionCSP3(const ROMol &mol){
unsigned int nCSP3=0;
unsigned int nC=0;
ROMol::VERTEX_ITER atBegin,atEnd;
boost::tie(atBegin,atEnd) = mol.getVertices();
while(atBegin!=atEnd){
ATOM_SPTR at=mol[*atBegin];
if(at->getAtomicNum()==6){
++nC;
if(at->getTotalDegree()==4){
++nCSP3;
}
}
++atBegin;
}
if(!nC) return 0;
return static_cast<double>(nCSP3)/nC;
}
const std::string NumHeterocyclesVersion="1.0.0";
unsigned int calcNumHeterocycles(const ROMol &mol){
unsigned int res=0;
BOOST_FOREACH(const INT_VECT &iv,mol.getRingInfo()->atomRings()){
BOOST_FOREACH(int i,iv){
if(mol.getAtomWithIdx(i)->getAtomicNum()!=6){
++res;
break;
}
}
}
return res;
}
const std::string NumAromaticRingsVersion="1.0.0";
unsigned int calcNumAromaticRings(const ROMol &mol){
unsigned int res=0;
BOOST_FOREACH(const INT_VECT &iv,mol.getRingInfo()->bondRings()){
++res;
BOOST_FOREACH(int i,iv){
if(!mol.getBondWithIdx(i)->getIsAromatic()){
--res;
break;
}
}
}
return res;
}
const std::string NumSaturatedRingsVersion="1.0.0";
unsigned int calcNumSaturatedRings(const ROMol &mol){
unsigned int res=0;
BOOST_FOREACH(const INT_VECT &iv,mol.getRingInfo()->bondRings()){
++res;
BOOST_FOREACH(int i,iv){
if(mol.getBondWithIdx(i)->getBondType()!=Bond::SINGLE || mol.getBondWithIdx(i)->getIsAromatic()){
--res;
break;
}
}
}
return res;
}
const std::string NumAliphaticRingsVersion="1.0.0";
unsigned int calcNumAliphaticRings(const ROMol &mol){
unsigned int res=0;
BOOST_FOREACH(const INT_VECT &iv,mol.getRingInfo()->bondRings()){
BOOST_FOREACH(int i,iv){
if(!mol.getBondWithIdx(i)->getIsAromatic()){
++res;
break;
}
}
}
return res;
}
const std::string NumAromaticHeterocyclesVersion="1.0.0";
unsigned int calcNumAromaticHeterocycles(const ROMol &mol){
unsigned int res=0;
BOOST_FOREACH(const INT_VECT &iv,mol.getRingInfo()->bondRings()){
bool countIt=false;
BOOST_FOREACH(int i,iv){
if(!mol.getBondWithIdx(i)->getIsAromatic()){
countIt=false;
break;
}
// we're checking each atom twice, which is kind of doofy, but this
// function is hopefully not going to be a big time sink.
if(!countIt &&
(mol.getBondWithIdx(i)->getBeginAtom()->getAtomicNum()!=6 ||
mol.getBondWithIdx(i)->getEndAtom()->getAtomicNum()!=6) ){
countIt=true;
}
}
if(countIt) ++res;
}
return res;
}
const std::string NumAromaticCarbocyclesVersion="1.0.0";
unsigned int calcNumAromaticCarbocycles(const ROMol &mol){
unsigned int res=0;
BOOST_FOREACH(const INT_VECT &iv,mol.getRingInfo()->bondRings()){
bool countIt=true;
BOOST_FOREACH(int i,iv){
if(!mol.getBondWithIdx(i)->getIsAromatic()){
countIt=false;
break;
}
// we're checking each atom twice, which is kind of doofy, but this
// function is hopefully not going to be a big time sync.
if(mol.getBondWithIdx(i)->getBeginAtom()->getAtomicNum()!=6 ||
mol.getBondWithIdx(i)->getEndAtom()->getAtomicNum()!=6 ){
countIt=false;
break;
}
}
if(countIt) ++res;
}
return res;
}
const std::string NumAliphaticHeterocyclesVersion="1.0.0";
unsigned int calcNumAliphaticHeterocycles(const ROMol &mol){
unsigned int res=0;
BOOST_FOREACH(const INT_VECT &iv,mol.getRingInfo()->bondRings()){
bool hasAliph=false;
bool hasHetero=false;
BOOST_FOREACH(int i,iv){
if(!mol.getBondWithIdx(i)->getIsAromatic()){
hasAliph=true;
}
// we're checking each atom twice, which is kind of doofy, but this
// function is hopefully not going to be a big time sink.
if(!hasHetero &&
(mol.getBondWithIdx(i)->getBeginAtom()->getAtomicNum()!=6 ||
mol.getBondWithIdx(i)->getEndAtom()->getAtomicNum()!=6) ){
hasHetero=true;
}
}
if(hasHetero&&hasAliph) ++res;
}
return res;
}
const std::string NumAliphaticCarbocyclesVersion="1.0.0";
unsigned int calcNumAliphaticCarbocycles(const ROMol &mol){
unsigned int res=0;
BOOST_FOREACH(const INT_VECT &iv,mol.getRingInfo()->bondRings()){
bool hasAliph=false;
bool hasHetero=false;
BOOST_FOREACH(int i,iv){
if(!mol.getBondWithIdx(i)->getIsAromatic()){
hasAliph=true;
}
// we're checking each atom twice, which is kind of doofy, but this
// function is hopefully not going to be a big time sync.
if(mol.getBondWithIdx(i)->getBeginAtom()->getAtomicNum()!=6 ||
mol.getBondWithIdx(i)->getEndAtom()->getAtomicNum()!=6 ){
hasHetero=true;
break;
}
}
if(hasAliph&&!hasHetero) ++res;
}
return res;
}
const std::string NumSaturatedHeterocyclesVersion="1.0.0";
unsigned int calcNumSaturatedHeterocycles(const ROMol &mol){
unsigned int res=0;
BOOST_FOREACH(const INT_VECT &iv,mol.getRingInfo()->bondRings()){
bool countIt=false;
BOOST_FOREACH(int i,iv){
if(mol.getBondWithIdx(i)->getBondType()!=Bond::SINGLE || mol.getBondWithIdx(i)->getIsAromatic()){
countIt=false;
break;
}
// we're checking each atom twice, which is kind of doofy, but this
// function is hopefully not going to be a big time sync.
if(!countIt &&
(mol.getBondWithIdx(i)->getBeginAtom()->getAtomicNum()!=6 ||
mol.getBondWithIdx(i)->getEndAtom()->getAtomicNum()!=6) ){
countIt=true;
}
}
if(countIt) ++res;
}
return res;
}
const std::string NumSaturatedCarbocyclesVersion="1.0.0";
unsigned int calcNumSaturatedCarbocycles(const ROMol &mol){
unsigned int res=0;
BOOST_FOREACH(const INT_VECT &iv,mol.getRingInfo()->bondRings()){
bool countIt=true;
BOOST_FOREACH(int i,iv){
if(mol.getBondWithIdx(i)->getBondType()!=Bond::SINGLE || mol.getBondWithIdx(i)->getIsAromatic()){
countIt=false;
break;
}
// we're checking each atom twice, which is kind of doofy, but this
// function is hopefully not going to be a big time sync.
if(mol.getBondWithIdx(i)->getBeginAtom()->getAtomicNum()!=6 ||
mol.getBondWithIdx(i)->getEndAtom()->getAtomicNum()!=6 ){
countIt=false;
break;
}
}
if(countIt) ++res;
}
return res;
}
const std::string NumSpiroAtomsVersion="1.0.0";
unsigned int calcNumSpiroAtoms(const ROMol &mol,std::vector<unsigned int> *atoms){
if(!mol.getRingInfo() || !mol.getRingInfo()->isInitialized()){
MolOps::findSSSR(mol);
}
const RingInfo *rInfo = mol.getRingInfo();
std::vector<unsigned int> lAtoms;
if(!atoms) atoms = &lAtoms;
for(unsigned int i=0;i<rInfo->atomRings().size();++i){
const INT_VECT &ri=rInfo->atomRings()[i];
for(unsigned int j=i+1;j<rInfo->atomRings().size();++j){
const INT_VECT &rj=rInfo->atomRings()[j];
// EFF: using intersect here does more work and memory allocation than is required
INT_VECT inter;
Intersect(ri,rj,inter);
if(inter.size()==1){
if(std::find(atoms->begin(),atoms->end(),inter[0]) == atoms->end()){
atoms->push_back(inter[0]);
}
}
}
}
return atoms->size();
}
const std::string NumBridgeheadAtomsVersion="1.0.0";
unsigned int calcNumBridgeheadAtoms(const ROMol &mol,std::vector<unsigned int> *atoms){
if(!mol.getRingInfo() || !mol.getRingInfo()->isInitialized()){
MolOps::findSSSR(mol);
}
const RingInfo *rInfo = mol.getRingInfo();
std::vector<unsigned int> lAtoms;
if(!atoms) atoms = &lAtoms;
for(unsigned int i=0;i<rInfo->bondRings().size();++i){
const INT_VECT &ri=rInfo->bondRings()[i];
for(unsigned int j=i+1;j<rInfo->bondRings().size();++j){
const INT_VECT &rj=rInfo->bondRings()[j];
// EFF: using intersect here does more work and memory allocation than is required
INT_VECT inter;
Intersect(ri,rj,inter);
if(inter.size()>1){
INT_VECT atomCounts(mol.getNumAtoms(),0);
BOOST_FOREACH(int ii,inter){
atomCounts[mol.getBondWithIdx(ii)->getBeginAtomIdx()] += 1;
atomCounts[mol.getBondWithIdx(ii)->getEndAtomIdx()] += 1;
}
for(unsigned int ti=0;ti<atomCounts.size();++ti){
if(atomCounts[ti]==1){
if(std::find(atoms->begin(),atoms->end(),ti) == atoms->end()){
atoms->push_back(ti);
}
}
}
}
}
}
return atoms->size();
}
} // end of namespace Descriptors
} // end of namespace RDKit