// $Id$ // // Copyright (C) 2004-2012 Greg Landrum and Rational Discovery LLC // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include "Embedder.h" #include #include #include #include #include "BoundsMatrixBuilder.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define ERROR_TOL 0.00001 namespace RDKit { namespace DGeomHelpers { typedef std::pair INT_PAIR; typedef std::vector INT_PAIR_VECT; bool _sameSide(const RDGeom::Point3D &v1,const RDGeom::Point3D &v2, const RDGeom::Point3D &v3,const RDGeom::Point3D &v4, const RDGeom::Point3D &p0, double tol=0.1){ RDGeom::Point3D normal = (v2-v1).crossProduct(v3-v1); double d1 = normal.dotProduct(v4-v1); double d2 = normal.dotProduct(p0-v1); //std::cerr<<" "<d_idx0 == chiralSet->d_idx4) { // this happens for three-coordinate centers return true; } RDGeom::Point3D p0((*positions[chiralSet->d_idx0])[0],(*positions[chiralSet->d_idx0])[1],(*positions[chiralSet->d_idx0])[2]); RDGeom::Point3D p1((*positions[chiralSet->d_idx1])[0],(*positions[chiralSet->d_idx1])[1],(*positions[chiralSet->d_idx1])[2]); RDGeom::Point3D p2((*positions[chiralSet->d_idx2])[0],(*positions[chiralSet->d_idx2])[1],(*positions[chiralSet->d_idx2])[2]); RDGeom::Point3D p3((*positions[chiralSet->d_idx3])[0],(*positions[chiralSet->d_idx3])[1],(*positions[chiralSet->d_idx3])[2]); RDGeom::Point3D p4((*positions[chiralSet->d_idx4])[0],(*positions[chiralSet->d_idx4])[1],(*positions[chiralSet->d_idx4])[2]); //RDGeom::Point3D centroid = (p1+p2+p3+p4)/4.; bool res = _sameSide(p1,p2,p3,p4,p0) && _sameSide(p2,p3,p4,p1,p0) && _sameSide(p3,p4,p1,p2,p0) && _sameSide(p4,p1,p2,p3,p0); //std::cerr<<"civ:"<d_idx0<<" "<d_idx1<<" "<d_idx2<<" "<d_idx3<<" "<d_idx4<<"->"< &atoms, const DistGeom::BoundsMatrix &mmat, const RDGeom::PointPtrVect &positions) { //unsigned int N = mmat.numRows(); //std::cerr << N << " " << atoms.size() << std::endl; // loop over all pair of atoms for (unsigned int i = 0; i < atoms.size()-1; ++i) { for (unsigned int j = i+1; j < atoms.size(); ++j) { int a1 = atoms[i]; int a2 = atoms[j]; RDGeom::Point3D p0((*positions[a1])[0],(*positions[a1])[1],(*positions[a1])[2]); RDGeom::Point3D p1((*positions[a2])[0],(*positions[a2])[1],(*positions[a2])[2]); double d2 = (p0-p1).length(); // distance double lb = mmat.getLowerBound(a1,a2); double ub = mmat.getUpperBound(a1,a2); // bounds if (((d2 < lb) && (fabs(d2-lb) > 0.17)) || ((d2 > ub) && (fabs(d2-ub) > 0.17))) { //std::cerr << a1 << " " << a2 << ":" << d2 << " " << lb << " " << ub << " " << fabs(d2-lb) << " " << fabs(d2-ub) << std::endl; return false; } } } return true; } // the minimization using experimental torsion angle preferences void _minimizeWithExpTorsions(RDGeom::PointPtrVect &positions, DistGeom::BoundsMatPtr mmat, double optimizerForceTol, double basinThresh, const std::vector > &bonds, const std::vector > &angles, const std::vector > &expTorsionAtoms, const std::vector, std::vector > > &expTorsionAngles, const std::vector > &improperAtoms, const std::vector &atomNums, bool useBasicKnowledge) { // convert to 3D positions and create coordMap RDGeom::Point3DPtrVect positions3D; for (unsigned int p = 0; p < positions.size(); ++p) { positions3D.push_back(new RDGeom::Point3D((*positions[p])[0], (*positions[p])[1], (*positions[p])[2])); } // create the force field ForceFields::ForceField *field; if (useBasicKnowledge) { // ETKDG or KDG field = DistGeom::construct3DForceField(*mmat, positions3D, bonds, angles, expTorsionAtoms, expTorsionAngles, improperAtoms, atomNums); } else { // plain ETDG field = DistGeom::constructPlain3DForceField(*mmat, positions3D, bonds, angles, expTorsionAtoms, expTorsionAngles, atomNums); } // minimize! int nPasses = 0; field->initialize(); //std::cout << "Field with torsion constraints: " << field->calcEnergy() << " " << ERROR_TOL << std::endl; if (field->calcEnergy() > ERROR_TOL) { int needMore = 1; //while (needMore) { needMore = field->minimize(300, optimizerForceTol); // ++nPasses; //} } //std::cout << field->calcEnergy() << std::endl; delete field; // overwrite positions and delete the 3D ones for (unsigned int i = 0; i < positions3D.size(); ++i) { (*positions[i])[0] = (*positions3D[i])[0]; (*positions[i])[1] = (*positions3D[i])[1]; (*positions[i])[2] = (*positions3D[i])[2]; delete positions3D[i]; } } bool _embedPoints(RDGeom::PointPtrVect *positions, const DistGeom::BoundsMatPtr mmat, bool useRandomCoords,double boxSizeMult, bool randNegEig, unsigned int numZeroFail, double optimizerForceTol, double basinThresh, int seed, unsigned int maxIterations, const DistGeom::VECT_CHIRALSET *chiralCenters, bool enforceChirality, bool useExpTorsionAnglePrefs, bool useBasicKnowledge, const std::vector > &bonds, const std::vector > &angles, const std::vector > &expTorsionAtoms, const std::vector, std::vector > > &expTorsionAngles, const std::vector > &improperAtoms, const std::vector &atomNums){ unsigned int nat = positions->size(); if(maxIterations==0){ maxIterations=10*nat; } RDNumeric::DoubleSymmMatrix distMat(nat, 0.0); // The basin threshold just gets us into trouble when we're using // random coordinates since it ends up ignoring 1-4 (and higher) // interactions. This causes us to get folded-up (and self-penetrating) // conformations for large flexible molecules if(useRandomCoords) basinThresh=1e8; RDKit::double_source_type *rng=0; RDKit::rng_type *generator; RDKit::uniform_double *distrib; if(seed>0){ generator=new RDKit::rng_type(42u); generator->seed(seed); distrib=new RDKit::uniform_double(0.0,1.0); rng = new RDKit::double_source_type(*generator,*distrib); } else { rng = &RDKit::getDoubleRandomSource(); } bool gotCoords = false; unsigned int iter = 0; double largestDistance=-1.0; RDUNUSED_PARAM(largestDistance); while ((gotCoords == false) && (iter < maxIterations)) { ++iter; if(!useRandomCoords){ largestDistance=DistGeom::pickRandomDistMat(*mmat, distMat, *rng); gotCoords = DistGeom::computeInitialCoords(distMat, *positions,*rng, randNegEig, numZeroFail); } else { double boxSize; if(boxSizeMult>0){ boxSize=5.*boxSizeMult; } else { boxSize=-1*boxSizeMult; } gotCoords = DistGeom::computeRandomCoords(*positions,boxSize,*rng); } if (gotCoords) { ForceFields::ForceField *field = DistGeom::constructForceField(*mmat, *positions, *chiralCenters, 1.0, 0.1, 0,basinThresh); unsigned int nPasses=0; field->initialize(); //std::cerr<<"FIELD E: "<calcEnergy()<calcEnergy() > ERROR_TOL){ int needMore = 1; while(needMore){ needMore = field->minimize(400,optimizerForceTol); ++nPasses; } } delete field; field=NULL; //std::cerr<<" "<calcEnergy()<<" after npasses: "<size()>0){ // check the chiral volume: BOOST_FOREACH(DistGeom::ChiralSetPtr chiralSet, *chiralCenters){ double vol = DistGeom::ChiralViolationContrib::calcChiralVolume(chiralSet->d_idx1,chiralSet->d_idx2, chiralSet->d_idx3,chiralSet->d_idx4, *positions); double lb=chiralSet->getLowerVolumeBound(); double ub=chiralSet->getUpperVolumeBound(); if( ( lb>0 && vol < lb && (lb - vol)/lb > .2 ) || ( ub<0 && vol > ub && (vol - ub)/ub > .2 ) ){ //std::cerr<<" fail! ("<d_idx0<<") iter: "<size()>0 || useRandomCoords) ) { ForceFields::ForceField *field2 = DistGeom::constructForceField(*mmat, *positions, *chiralCenters, 0.2, 1.0, 0, basinThresh); field2->initialize(); //std::cerr<<"FIELD2 E: "<calcEnergy()<calcEnergy() > ERROR_TOL){ int needMore = 1; int nPasses2=0; while(needMore){ needMore = field2->minimize(200,optimizerForceTol); ++nPasses2; } //std::cerr<<" "<calcEnergy()<<" after npasses2: "<size() > 0)) { // "distance matrix" chirality test std::set atoms; BOOST_FOREACH(DistGeom::ChiralSetPtr chiralSet, *chiralCenters) { if(chiralSet->d_idx0 != chiralSet->d_idx4) { atoms.insert(chiralSet->d_idx0); atoms.insert(chiralSet->d_idx1); atoms.insert(chiralSet->d_idx2); atoms.insert(chiralSet->d_idx3); atoms.insert(chiralSet->d_idx4); } } std::vector atomsToCheck(atoms.begin(), atoms.end()); if (atomsToCheck.size() > 0) { if (!_boundsFulfilled(atomsToCheck, *mmat, *positions)) { gotCoords=false; } } // "center in volume" chirality test if (gotCoords) { BOOST_FOREACH(DistGeom::ChiralSetPtr chiralSet, *chiralCenters){ // it could happen that the centroid is outside the volume defined by the other // four points. That is also a fail. if(!_centerInVolume(chiralSet,*positions)){ //std::cerr<<" fail2! ("<d_idx0<<") iter: "<0 && rng){ delete rng; delete generator; delete distrib; } return gotCoords; } void _findChiralSets(const ROMol &mol, DistGeom::VECT_CHIRALSET &chiralCenters) { ROMol::ConstAtomIterator ati; INT_VECT nbrs; ROMol::OEDGE_ITER beg,end; //Atom *oatom; for (ati = mol.beginAtoms(); ati != mol.endAtoms(); ati++) { if ((*ati)->getAtomicNum() != 1) { //skip hydrogens Atom::ChiralType chiralType=(*ati)->getChiralTag(); if (chiralType==Atom::CHI_TETRAHEDRAL_CW || chiralType==Atom::CHI_TETRAHEDRAL_CCW) { // make a chiral set from the neighbors nbrs.clear(); nbrs.reserve(4); // find the neighbors of this atom and enter them into the // nbr list boost::tie(beg,end) = mol.getAtomBonds(*ati); while (beg != end) { nbrs.push_back(mol[*beg]->getOtherAtom(*ati)->getIdx()); ++beg; } // if we have less than 4 heavy atoms as neighbors, // we need to include the chiral center into the mix // we should at least have 3 though bool includeSelf = false; RDUNUSED_PARAM(includeSelf); CHECK_INVARIANT(nbrs.size() >= 3, "Cannot be a chiral center"); if (nbrs.size() < 4) { nbrs.insert(nbrs.end(), (*ati)->getIdx()); includeSelf = true; } // now create a chiral set and set the upper and lower bound on the volume if (chiralType == Atom::CHI_TETRAHEDRAL_CCW) { // postive chiral volume DistGeom::ChiralSet *cset = new DistGeom::ChiralSet((*ati)->getIdx(), nbrs[0], nbrs[1], nbrs[2], nbrs[3], 5.0, 100.0); DistGeom::ChiralSetPtr cptr(cset); chiralCenters.push_back(cptr); } else { DistGeom::ChiralSet *cset = new DistGeom::ChiralSet((*ati)->getIdx(), nbrs[0], nbrs[1], nbrs[2], nbrs[3], -100.0, -5.0); DistGeom::ChiralSetPtr cptr(cset); chiralCenters.push_back(cptr); } } // if block -chirality check } // if block - heavy atom check } // for loop over atoms } // end of _findChiralSets void _fillAtomPositions(RDGeom::Point3DConstPtrVect &pts, const Conformer &conf) { unsigned int na = conf.getNumAtoms(); pts.clear(); unsigned int ai; pts.reserve(na); for (ai = 0; ai < na; ++ai) { pts.push_back(&conf.getAtomPos(ai)); } } bool _isConfFarFromRest(const ROMol &mol, const Conformer &conf, double threshold) { // NOTE: it is tempting to use some triangle inequality to prune // conformations here but some basic testing has shown very // little advantage and given that the time for pruning fades in // comparison to embedding - we will use a simple for loop below // over all conformation until we find a match ROMol::ConstConformerIterator confi; RDGeom::Point3DConstPtrVect refPoints, prbPoints; _fillAtomPositions(refPoints, conf); bool res = true; unsigned int na = conf.getNumAtoms(); double ssrThres = na*threshold*threshold; RDGeom::Transform3D trans; double ssr; for (confi = mol.beginConformers(); confi != mol.endConformers(); confi++) { _fillAtomPositions(prbPoints, *(*confi)); ssr = RDNumeric::Alignments::AlignPoints(refPoints, prbPoints, trans); if (ssr < ssrThres) { res = false; break; } } return res; } int EmbedMolecule(ROMol &mol, unsigned int maxIterations, int seed, bool clearConfs, bool useRandomCoords,double boxSizeMult, bool randNegEig, unsigned int numZeroFail, const std::map *coordMap, double optimizerForceTol, bool ignoreSmoothingFailures, bool enforceChirality, bool useExpTorsionAnglePrefs, bool useBasicKnowledge, bool verbose, double basinThresh) { INT_VECT confIds; EmbedMultipleConfs(mol,confIds,1,1,maxIterations,seed,clearConfs, useRandomCoords,boxSizeMult,randNegEig, numZeroFail,-1.0,coordMap,optimizerForceTol, ignoreSmoothingFailures, enforceChirality, useExpTorsionAnglePrefs, useBasicKnowledge, verbose, basinThresh); int res; if(confIds.size()){ res=confIds[0]; } else { res=-1; } return res; } void adjustBoundsMatFromCoordMap(DistGeom::BoundsMatPtr mmat,unsigned int nAtoms, const std::map *coordMap){ RDUNUSED_PARAM(nAtoms); // std::cerr<getVal(i,j); // } // std::cerr<::const_iterator iIt=coordMap->begin(); iIt!=coordMap->end();++iIt){ int iIdx=iIt->first; const RDGeom::Point3D &iPoint=iIt->second; std::map::const_iterator jIt=iIt; while(++jIt != coordMap->end()){ int jIdx=jIt->first; const RDGeom::Point3D &jPoint=jIt->second; double dist=(iPoint-jPoint).length(); mmat->setUpperBound(iIdx,jIdx,dist); mmat->setLowerBound(iIdx,jIdx,dist); } } // std::cerr<getVal(i,j); // } // std::cerr< *confsOk; bool fourD; INT_VECT *fragMapping; std::vector< Conformer * > *confs; unsigned int fragIdx; DistGeom::BoundsMatPtr mmat; bool useRandomCoords; double boxSizeMult; bool randNegEig; unsigned int numZeroFail; double optimizerForceTol; double basinThresh; int seed; unsigned int maxIterations; DistGeom::VECT_CHIRALSET const *chiralCenters; bool enforceChirality; bool useExpTorsionAnglePrefs; bool useBasicKnowledge; std::vector > *bonds; std::vector > *angles; std::vector > *expTorsionAtoms; std::vector, std::vector > > *expTorsionAngles; std::vector > *improperAtoms; std::vector *atomNums; } EmbedArgs; void embedHelper_(int threadId, int numThreads, EmbedArgs *eargs ){ unsigned int nAtoms=eargs->mmat->numRows(); RDGeom::PointPtrVect positions; for (unsigned int i = 0; i < nAtoms; ++i) { if(eargs->fourD){ positions.push_back(new RDGeom::PointND(4)); } else { positions.push_back(new RDGeom::Point3D()); } } for (size_t ci=0; ciconfs->size(); ci++) { if(rdcast(ci%numThreads) != threadId) continue; if(!(*eargs->confsOk)[ci]){ // if one of the fragments here has already failed, there's no // sense in embedding this one continue; } bool gotCoords = _embedPoints(&positions, eargs->mmat, eargs->useRandomCoords,eargs->boxSizeMult, eargs->randNegEig, eargs->numZeroFail, eargs->optimizerForceTol, eargs->basinThresh, (ci+1)*eargs->seed, eargs->maxIterations, eargs->chiralCenters, eargs->enforceChirality, eargs->useExpTorsionAnglePrefs, eargs->useBasicKnowledge, *eargs->bonds, *eargs->angles, *eargs->expTorsionAtoms, *eargs->expTorsionAngles, *eargs->improperAtoms, *eargs->atomNums); if (gotCoords) { Conformer *conf = (*eargs->confs)[ci]; unsigned int fragAtomIdx=0; for (unsigned int i = 0; i < (*eargs->confs)[0]->getNumAtoms();++i){ if((*eargs->fragMapping)[i]==static_cast(eargs->fragIdx) ){ conf->setAtomPos(i, RDGeom::Point3D((*positions[fragAtomIdx])[0], (*positions[fragAtomIdx])[1], (*positions[fragAtomIdx])[2])); ++fragAtomIdx; } } } else { (*eargs->confsOk)[ci]=0; } } for (unsigned int i = 0; i < nAtoms; ++i) { delete positions[i]; } } } //end of namespace detail void EmbedMultipleConfs(ROMol &mol, INT_VECT &res, unsigned int numConfs, int numThreads, unsigned int maxIterations, int seed, bool clearConfs, bool useRandomCoords,double boxSizeMult, bool randNegEig, unsigned int numZeroFail, double pruneRmsThresh, const std::map *coordMap, double optimizerForceTol, bool ignoreSmoothingFailures, bool enforceChirality, bool useExpTorsionAnglePrefs, bool useBasicKnowledge, bool verbose, double basinThresh){ if(!mol.getNumAtoms()){ throw ValueErrorException("molecule has no atoms"); } INT_VECT fragMapping; std::vector molFrags=MolOps::getMolFrags(mol,true,&fragMapping); if(molFrags.size()>1 && coordMap){ BOOST_LOG(rdWarningLog)<<"Constrained conformer generation (via the coordMap argument) does not work with molecules that have multiple fragments."< confs; confs.reserve(numConfs); for(unsigned int i=0;i confsOk(numConfs); confsOk.set(); if (clearConfs) { res.clear(); mol.clearConformers(); } for(unsigned int fragIdx=0;fragIdxgetNumAtoms(); DistGeom::BoundsMatrix *mat = new DistGeom::BoundsMatrix(nAtoms); DistGeom::BoundsMatPtr mmat(mat); initBoundsMat(mmat); double tol=0.0; std::vector > expTorsionAtoms; std::vector, std::vector > > expTorsionAngles; std::vector > improperAtoms; std::vector > bonds; std::vector > angles; std::vector atomNums(nAtoms); if (useExpTorsionAnglePrefs || useBasicKnowledge) { ForceFields::CrystalFF::getExperimentalTorsions(*piece, expTorsionAtoms, expTorsionAngles, improperAtoms, useExpTorsionAnglePrefs, useBasicKnowledge, verbose); setTopolBounds(*piece, mmat, bonds, angles, true, false); for (int i = 0; i < nAtoms; ++i) { atomNums[i] = (*piece).getAtomWithIdx(i)->getAtomicNum(); } } else { setTopolBounds(*piece, mmat, true, false); } if(coordMap){ adjustBoundsMatFromCoordMap(mmat,nAtoms,coordMap); tol=0.05; } if (!DistGeom::triangleSmoothBounds(mmat,tol)) { // ok this bound matrix failed to triangle smooth - re-compute the bounds matrix // without 15 bounds and with VDW scaling initBoundsMat(mmat); setTopolBounds(*piece, mmat, false, true); if(coordMap){ adjustBoundsMatFromCoordMap(mmat,nAtoms,coordMap); } // try triangle smoothing again if (!DistGeom::triangleSmoothBounds(mmat,tol)) { // ok, we're not going to be able to smooth this, if(ignoreSmoothingFailures){ // proceed anyway with the more relaxed bounds matrix initBoundsMat(mmat); setTopolBounds(*piece, mmat, false, true); if(coordMap){ adjustBoundsMatFromCoordMap(mmat,nAtoms,coordMap); } } else { BOOST_LOG(rdWarningLog)<<"Could not triangle bounds smooth molecule."<getNumAtoms();++li){ for(unsigned int lj=li+1;ljgetNumAtoms();++lj){ std::cerr<<" ("<getLowerBound(li,lj)<<" -> "<getUpperBound(li,lj)< 0) { fourD = true; } #ifdef RDK_THREADSAFE_SSS boost::thread_group tg; #endif numThreads = getNumThreadsToUse(numThreads); detail::EmbedArgs eargs={&confsOk, fourD, &fragMapping,&confs, fragIdx, mmat, useRandomCoords,boxSizeMult, randNegEig, numZeroFail, optimizerForceTol, basinThresh, seed, maxIterations, &chiralCenters, enforceChirality, useExpTorsionAnglePrefs, useBasicKnowledge, &bonds, &angles, &expTorsionAtoms, &expTorsionAngles, &improperAtoms, &atomNums}; if(numThreads==1){ detail::embedHelper_(0,1,&eargs); } #ifdef RDK_THREADSAFE_SSS else { for(int tid=0;tid 0.0 && !_isConfFarFromRest(mol, *conf, pruneRmsThresh)) { delete conf; } else { int confId = (int)mol.addConformer(conf, true); res.push_back(confId); } } else { delete conf; } } } INT_VECT EmbedMultipleConfs(ROMol &mol, unsigned int numConfs, unsigned int maxIterations, int seed, bool clearConfs, bool useRandomCoords,double boxSizeMult, bool randNegEig, unsigned int numZeroFail, double pruneRmsThresh, const std::map *coordMap, double optimizerForceTol, bool ignoreSmoothingFailures, bool enforceChirality, bool useExpTorsionAnglePrefs, bool useBasicKnowledge, bool verbose, double basinThresh){ INT_VECT res; EmbedMultipleConfs(mol,res,numConfs,1, maxIterations,seed,clearConfs, useRandomCoords,boxSizeMult, randNegEig,numZeroFail, pruneRmsThresh, coordMap, optimizerForceTol, ignoreSmoothingFailures, enforceChirality, useExpTorsionAnglePrefs, useBasicKnowledge, verbose, basinThresh); return res; } } // end of namespace DGeomHelpers } // end of namespace RDKit