optimization work

This commit is contained in:
Greg Landrum
2012-04-15 06:19:22 +00:00
parent 8f112c708c
commit de2126b641
9 changed files with 66 additions and 87 deletions

View File

@@ -408,7 +408,7 @@ namespace {
narom += aromRings.size();
}
bool isAtomCandForArom(const Atom *at, VECT_EDON_TYPE &edon) {
bool isAtomCandForArom(const Atom *at, const ElectronDonorType edon) {
PRECONDITION(at,"bad atom");
// limit aromaticity to:
// - the first two rows of the periodic table
@@ -418,7 +418,7 @@ namespace {
at->getAtomicNum()!=52 ) {
return false;
}
switch (edon[at->getIdx()]) {
switch (edon) {
case VacantElectronDonorType:
case OneElectronDonorType:
case TwoElectronDonorType:
@@ -601,8 +601,12 @@ namespace RDKit {
// first find the all the simple rings in the molecule
VECT_INT_VECT srings;
symmetrizeSSSR(mol, srings);
if(mol.getRingInfo()->isInitialized()){
srings = mol.getRingInfo()->atomRings();
} else {
MolOps::symmetrizeSSSR(mol, srings);
}
int narom = 0;
// loop over all the atoms in the rings that can be candidates
// for aromaticity
@@ -611,13 +615,20 @@ namespace RDKit {
// - has one or more electron to donate or has empty p-orbitals
int natoms = mol.getNumAtoms();
boost::dynamic_bitset<> acands(natoms);
boost::dynamic_bitset<> aseen(natoms);
VECT_EDON_TYPE edon(natoms);
int ncands = 0;
VECT_INT_VECT cRings; // holder for rings that are candidates for aromaticity
for (VECT_INT_VECT_I vivi = srings.begin();
vivi != srings.end(); ++vivi) {
bool allAromatic=true;
for (INT_VECT_I ivi = (*vivi).begin();
ivi!=(*vivi).end(); ++ivi) {
if(aseen[*ivi]){
if(!acands[*ivi]) allAromatic=false;
continue;
}
aseen[*ivi]=1;
Atom *at = mol.getAtomWithIdx(*ivi);
// now that the atom is part of ring check if it can donate
@@ -625,38 +636,14 @@ namespace RDKit {
// information in 'edon' - we will need it when we get to
// the Huckel rule later
edon[*ivi] = getAtomDonorTypeArom(at);
if (isAtomCandForArom(at, edon)) {
ncands++;
acands[*ivi]=1;
} else {
acands[*ivi]=0;
}
acands[*ivi]=isAtomCandForArom(at, edon[*ivi]);
if(!acands[*ivi]) allAromatic=false;
}
if(allAromatic){
cRings.push_back((*vivi));
}
}
// mark rings in the SSSR list that cannot be candidates for
// aromaticity
// For a ring to be ac andidate for aromaticity: all the atoms
// in the ring need to be aromatic
// Assume here that the SSSR
// function above will return a continuous list of rings
VECT_INT_VECT cRings; // holder for the candidate rings
INT_VECT cring;
for (VECT_INT_VECT_CI vivi = srings.begin();
vivi != srings.end(); ++vivi) {
bool rcand = true;
for (INT_VECT_CI ivi = vivi->begin();
ivi != vivi->end(); ++ivi) {
if (!acands[*ivi]) { // if an atom in the ring is not a candidate
rcand = false; // mark teh entire ring not a candidate
break;
}
}
if (rcand) {
cring = (*vivi);
cRings.push_back(cring);
}
}
// first convert all rings to bonds ids
VECT_INT_VECT brings;
RingUtils::convertToBonds(cRings, brings, mol);
@@ -667,8 +654,6 @@ namespace RDKit {
// useful to figure out fused systems
INT_INT_VECT_MAP neighMap;
RingUtils::makeRingNeighborMap(brings, neighMap);
// now loop over all the candidate rings and check the
// huckel rule - of course paying attention to fused systems.

View File

@@ -14,12 +14,8 @@
namespace Canon {
using namespace RDKit;
PossibleType makePossible(int rank,int atomIdx,Bond *bond) {
return std::make_pair(rank,std::make_pair(atomIdx,bond));
};
int _possibleComp(const PossibleType &arg1,const PossibleType &arg2) {
return (arg1.first < arg2.first);
return (arg1.get<0>() < arg2.get<0>());
};
void switchBondDir(Bond *bond){
@@ -433,7 +429,7 @@ namespace Canon {
rank += static_cast<int>(Bond::OTHER - theBond->getBondType()) *
MAX_NATOMS*MAX_NATOMS;
}
possibles.push_back(makePossible(rank,otherIdx,theBond.get()));
possibles.push_back(PossibleType(rank,otherIdx,theBond.get()));
}
bondsPair.first++;
}
@@ -456,8 +452,12 @@ namespace Canon {
possiblesIt!=possibles.end();
possiblesIt++){
MolStack subStack;
#if 0
int possibleIdx = possiblesIt->second.first;
Bond *bond = possiblesIt->second.second;
#endif
int possibleIdx = possiblesIt->get<1>();
Bond *bond = possiblesIt->get<2>();
Atom *otherAtom=mol.getAtomWithIdx(possibleIdx);
INT_LIST otherTravList;
unsigned int lowestRingIdx;
@@ -487,7 +487,7 @@ namespace Canon {
// -----
cycleEndList.push_back(bond->getIdx());
cAIt=std::find(cyclesAvailable.begin(),
cyclesAvailable.end(),1);
cyclesAvailable.end(),1);
if(cAIt==cyclesAvailable.end()){
throw ValueErrorException("Too many rings open at once. SMILES cannot be generated.");
}
@@ -737,9 +737,11 @@ namespace Canon {
for(vviIt=cycles.begin();vviIt!=cycles.end();++vviIt) vviIt->resize(0);
// make sure that we've done the stereo perception:
MolOps::assignStereochemistry(mol,false);
if(!mol.hasProp("_StereochemDone")){
MolOps::assignStereochemistry(mol,false);
}
// we need ring information make sure findSSSR has been called before
// we need ring information; make sure findSSSR has been called before
// if not call now
if ( !mol.getRingInfo()->isInitialized() ) {
MolOps::findSSSR(mol);

View File

@@ -10,6 +10,8 @@
#ifndef _RD_CANON_H_
#define _RD_CANON_H_
#include <boost/tuple/tuple.hpp>
namespace RDKit {
class ROMol;
class Atom;
@@ -89,7 +91,7 @@ namespace Canon {
//! used to represent possible branches from an atom
typedef std::pair< int, std::pair< int, RDKit::Bond * > > PossibleType;
typedef boost::tuple<int,int,RDKit::Bond *> PossibleType;
//! returns a PossibleType
PossibleType makePossible(int rank,int atomIdx,RDKit::Bond *bond);
//! compare two PossibleTypes

View File

@@ -391,7 +391,9 @@ void test4MultipleProducts(){
TEST_ASSERT(prods[0].size()==2);
TEST_ASSERT(prods[0][0]->getNumAtoms()==8);
TEST_ASSERT(prods[0][1]->getNumAtoms()==2);
TEST_ASSERT(MolToSmiles(*prods[0][0])=="C1NC(=O)CNC1=O");
smi=MolToSmiles(*prods[0][0]);
std::cerr<<"smi: "<<smi<<std::endl;
TEST_ASSERT(smi=="C1NC(=O)CNC1=O");
TEST_ASSERT(MolToSmiles(*prods[0][1])=="OO");
reacts.clear();

View File

@@ -202,7 +202,7 @@ void test1(){
TEST_ASSERT(tmpMol);
catGen.addFragsFromMol(*tmpMol, fcat3);
TEST_ASSERT(fcat3->getNumEntries()==1);
//std::cout << fcat3->getEntryWithBitId(0)->getDescription() << std::endl;
std::cout << fcat3->getEntryWithBitId(0)->getDescription() << std::endl;
TEST_ASSERT(fcat3->getEntryWithBitId(0)->getDescription()=="C<-O>C(<-O>)<-N>");
BOOST_LOG(rdInfoLog) << "---- Done" << std::endl;

View File

@@ -487,11 +487,15 @@ namespace RDKit {
// first find the all the simple rings in the molecule
VECT_INT_VECT arings;
MolOps::findSSSR(mol, arings);
if(mol.getRingInfo()->isInitialized()){
arings = mol.getRingInfo()->atomRings();
} else {
MolOps::findSSSR(mol, arings);
}
// convert the rings to bonds ids
VECT_INT_VECT brings;
RingUtils::convertToBonds(arings, brings, mol);
brings = mol.getRingInfo()->bondRings();
//RingUtils::convertToBonds(arings, brings, mol);
// make a the neighbor map for the rings i.e. a ring is a
// neighbor to another candidate ring if it shares at least
@@ -545,18 +549,6 @@ namespace RDKit {
(*bi)->setIsAromatic(false);
}
}
for (ROMol::BondIterator bi=mol.beginBonds();
bi != mol.endBonds(); ++bi) {
// by now the bondtype should have already changed from aromatic
if (markAtomsBonds && (*bi)->getBondType() == Bond::AROMATIC) {
std::ostringstream errout;
errout << "Kekulization somehow did not convert bond " << (*bi)->getIdx();
std::string msg = errout.str();
BOOST_LOG(rdErrorLog) << msg<< std::endl;
throw MolSanitizeException(msg);
}
}
// ok some error checking here force a implicit valence
// calculation that should do some error checking by itself. In

View File

@@ -115,16 +115,14 @@ namespace RankAtoms{
INT_LIST &indicesInPlay,
double *adjMat,
INT_VECT &ranks,
VECT_INT_VECT *rankHistory=0,unsigned int stagnantTol=1){
VECT_INT_VECT *rankHistory,unsigned int stagnantTol){
PRECONDITION(!rankHistory||rankHistory->size()>=nAtoms,"bad rankHistory size");
bool done = false;
unsigned int numClasses = countClasses(ranks);
unsigned int lastNumClasses = 0;
unsigned int nCycles = 0;
unsigned int nStagnant=0;
if(stagnantTol==0){
stagnantTol = nAtoms;
}
//
// loop until either we finish or no improvement is seen
//

View File

@@ -399,31 +399,17 @@ namespace RDKit{
} // end of namespace SmilesWrite
// NOTE: I did not forget the const here... Producing SMILES for
// a molecule actually can change the molecule. Specifically,
// things like the directionality of bonds may be changed when
// the molecule is canonicalized.
// Odds are good that this may be one of those foot-shooting
// decisions and I'm gonna want to smack myself for doing this,
// but we'll try anyway.
std::string MolToSmiles(ROMol &mol,bool doIsomericSmiles,
std::string MolToSmiles(const ROMol &mol,bool doIsomericSmiles,
bool doKekule,int rootedAtAtom,bool canonical,
bool allBondsExplicit){
PRECONDITION(rootedAtAtom<0||static_cast<unsigned int>(rootedAtAtom)<mol.getNumAtoms(),
"rootedAtomAtom must be less than the number of atoms");
if(!mol.getNumAtoms()) return "";
ROMol tmol(mol);
ROMol tmol(mol,true);
if(doIsomericSmiles){
tmol.setProp("_doIsoSmiles",1);
} else if(tmol.hasProp("_doIsoSmiles")){
tmol.clearProp("_doIsoSmiles");
}
if(tmol.hasProp("_ringStereoWarning")){
tmol.clearProp("_ringStereoWarning");
}
#if 0
std::cout << "----------------------------" << std::endl;
std::cout << "MolToSmiles:"<< std::endl;
@@ -442,7 +428,20 @@ namespace RDKit{
// clean up the chirality on any atom that is marked as chiral,
// but that should not be:
if(doIsomericSmiles){
MolOps::assignStereochemistry(tmol,true);
if(!mol.hasProp("_StereochemDone")){
MolOps::assignStereochemistry(tmol,true);
} else {
tmol.setProp("_StereochemDone",1);
// we need the CIP codes:
for(unsigned int aidx=0;aidx<tmol.getNumAtoms();++aidx){
const Atom *oAt=mol.getAtomWithIdx(aidx);
if(oAt->hasProp("_CIPCode")){
std::string cipCode;
oAt->getProp("_CIPCode",cipCode);
tmol.getAtomWithIdx(aidx)->setProp("_CIPCode",cipCode);
}
}
}
}
if(canonical){
MolOps::rankAtoms(tmol,ranks);

View File

@@ -47,8 +47,7 @@ namespace RDKit{
//! \brief returns canonical SMILES for a molecule
/*!
\param mol : the molecule in question. NOTE that the molecule may
be modified as part of the canonicalization process.
\param mol : the molecule in question.
\param doIsomericSmiles : include stereochemistry and isotope information
in the SMILES
\param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
@@ -57,7 +56,7 @@ namespace RDKit{
\param canonical : if false, no attempt will be made to canonicalize the SMILES
\param allBondsExplicit : if true, symbols will be included for all bonds.
*/
std::string MolToSmiles(ROMol &mol,bool doIsomericSmiles=false,
std::string MolToSmiles(const ROMol &mol,bool doIsomericSmiles=false,
bool doKekule=false,int rootedAtAtom=-1,
bool canonical=true,
bool allBondsExplicit=false);