mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
* update AvalonTools to version 2.0.1 * Improvements to 2D depiction and alignment/RMSD calculation - Refactored the straightenDepiction code which is now much simpler and more readable and supports a minimizeRotation parameter - added C++, Python and JS tests for the new minimizeRotation parameter - refactored tests to use CalcRMS rather than an internal implementation to compute RMS deviations - Removed duplicated code in CalcRMS() and getBestRMS() and made their APIs consistent with respect to supported parameters IMPORTANT NOTE: for backwards compatibility I set the CalcRMS() default for the new symmetrizeConjugatedTerminalGroups to false as this parameter was not originally supported. @greg: I would be very much in favor of setting this to true instead if you agree, even though it might change results for existing scripts, as I think it is a much more sensible default. - Improved documentation to clarify the difference between CalcRMS() and getBestRMS() - Added unit tests for CalcRMS() as there was none previously - Added tests for the additional CalcRMS() and getBestRMS() parameters - Added a new getBestAlignmentTransform() function - The CFFI function set_2d_coords_aligned() now returns the matching atoms similarly to the C++, Python and JS counterparts IMPORTANT NOTE: this required an API change for the additional char ** parameter used to return the match. Existing code using set_2d_coords_aligned() will fail to compile and will require a last NULL parameter to be added to compile again - Removed duplicated code between CFFI set_2d_coords_aligned() and JS generate_aligned_coords() - Added has_2d_coords() to the CFFI library - generate_aligned_coords() now supports JSON parameters and the previous versions are deprecated - set_2d_coords_aligned() and generate_aligned_coords() both support an alignOnly parameter (which defaults to false). If set to true, rather than re-generating a fresh 2D layout around templateMol, the existing coordinates (if any) are simply aligned to the provided templateMol. If the molecule has no coordinates, a set of 2D coordinates is generated independently of templateMol and then aligned to the provided templateMol - avoid that when acceptFailure is false set_2d_coords_aligned() and generate_aligned_coords() overwrite existing coordinates * - explicitly link testDepictor to MolAlign library * - add MolAlign dependency to testDepictor (rather than to the catch test as in the previous commit) - add a couple of tweaks * suppress compiler warnings (1st pass) * warnings: 2nd pass * warnings: 3rd pass * - alignOnly mode should also support allowRGroups * - fixed C++ build - added tests for allowRGroups+alignOnly combination * changes in response to review * added an entry to backward incompatible changes regarding set_2d_coords_aligned() Co-authored-by: Tosco, Paolo <paolo.tosco@novartis.com>
312 lines
11 KiB
C++
312 lines
11 KiB
C++
//
|
|
// Copyright (C) 2001-2018 Rational Discovery LLC
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include <RDGeneral/test.h>
|
|
#include "AlignMolecules.h"
|
|
#include "O3AAlignMolecules.h"
|
|
#include <GraphMol/FileParsers/MolSupplier.h>
|
|
#include <GraphMol/FileParsers/MolWriters.h>
|
|
#include <GraphMol/FileParsers/FileParsers.h>
|
|
#include <GraphMol/ROMol.h>
|
|
#include <GraphMol/Conformer.h>
|
|
#include <GraphMol/Substruct/SubstructMatch.h>
|
|
#include <Numerics/Vector.h>
|
|
#include <GraphMol/MolPickler.h>
|
|
#include <GraphMol/DistGeomHelpers/Embedder.h>
|
|
#include <GraphMol/SmilesParse/SmilesParse.h>
|
|
#include <GraphMol/ChemTransforms/MolFragmenter.h>
|
|
#include <GraphMol/MolTransforms/MolTransforms.h>
|
|
#include <ForceField/ForceField.h>
|
|
#include <GraphMol/ForceFieldHelpers/UFF/Builder.h>
|
|
|
|
using namespace RDKit;
|
|
|
|
void test1MolAlign() {
|
|
std::string rdbase = getenv("RDBASE");
|
|
std::string fname1 = rdbase + "/Code/GraphMol/MolAlign/test_data/1oir.mol";
|
|
ROMol *m1 = MolFileToMol(fname1);
|
|
std::string fname2 =
|
|
rdbase + "/Code/GraphMol/MolAlign/test_data/1oir_conf.mol";
|
|
ROMol *m2 = MolFileToMol(fname2);
|
|
|
|
double rmsd = MolAlign::alignMol(*m2, *m1);
|
|
TEST_ASSERT(RDKit::feq(rmsd, 0.6578) || RDKit::feq(rmsd, 1.0345));
|
|
|
|
std::string fname3 =
|
|
rdbase + "/Code/GraphMol/MolAlign/test_data/1oir_trans.mol";
|
|
ROMol *m3 = MolFileToMol(fname3);
|
|
const Conformer &conf1 = m2->getConformer(0);
|
|
const Conformer &conf2 = m3->getConformer(0);
|
|
unsigned int i, nat = m3->getNumAtoms();
|
|
for (i = 0; i < nat; i++) {
|
|
RDGeom::Point3D pt1 = conf1.getAtomPos(i);
|
|
RDGeom::Point3D pt2 = conf2.getAtomPos(i);
|
|
TEST_ASSERT(RDKit::feq(pt1.x, pt2.x, 0.001));
|
|
TEST_ASSERT(RDKit::feq(pt1.y, pt2.y, 0.001));
|
|
TEST_ASSERT(RDKit::feq(pt1.z, pt2.z, 0.001));
|
|
}
|
|
RDGeom::Transform3D trans;
|
|
rmsd = MolAlign::getAlignmentTransform(*m1, *m2, trans);
|
|
TEST_ASSERT(RDKit::feq(rmsd, 0.6578) || RDKit::feq(rmsd, 1.0345));
|
|
|
|
// specify conformations
|
|
rmsd = MolAlign::alignMol(*m1, *m2, 0, 0);
|
|
TEST_ASSERT(RDKit::feq(rmsd, 0.6578) || RDKit::feq(rmsd, 1.0345));
|
|
|
|
// provide an atom mapping
|
|
delete m1;
|
|
delete m2;
|
|
delete m3;
|
|
}
|
|
|
|
void test1GetBestRMS() {
|
|
std::string rdbase = getenv("RDBASE");
|
|
std::string fname =
|
|
rdbase + "/Code/GraphMol/MolAlign/test_data/probe_mol.sdf";
|
|
SDMolSupplier supplier(fname, true, false);
|
|
std::unique_ptr<ROMol> prb(supplier[1]);
|
|
std::unique_ptr<ROMol> ref(supplier[2]);
|
|
std::unique_ptr<ROMol> prbCopy1(new ROMol(*prb));
|
|
std::unique_ptr<ROMol> prbCopy2(new ROMol(*prb));
|
|
std::unique_ptr<ROMol> prbCopy3(new ROMol(*prb));
|
|
RDGeom::Transform3D bestTrans;
|
|
MatchVectType bestMatch;
|
|
|
|
// alignMol() would return this for the rms: 2.50561
|
|
// But the best rms is: 2.43449
|
|
double rmsdInPlace = MolAlign::CalcRMS(*prbCopy1, *ref);
|
|
TEST_ASSERT(RDKit::feq(rmsdInPlace, 2.6026));
|
|
double rmsd = MolAlign::getBestRMS(*prb, *ref);
|
|
TEST_ASSERT(RDKit::feq(rmsd, 2.43449));
|
|
double rmsdCopy = MolAlign::getBestAlignmentTransform(*prbCopy1, *ref,
|
|
bestTrans, bestMatch);
|
|
TEST_ASSERT(RDKit::feq(rmsd, rmsdCopy));
|
|
TEST_ASSERT(bestMatch.size() == ref->getNumAtoms());
|
|
|
|
SmilesParserParams params;
|
|
params.removeHs = false;
|
|
ROMOL_SPTR scaffold(SmilesToMol(
|
|
"N1C([H])([H])C([H])([H])C([H])([H])[N+]([H])([H])C([H])([H])C1([H])[H]",
|
|
params));
|
|
MatchVectType scaffoldMatch;
|
|
TEST_ASSERT(SubstructMatch(*ref, *scaffold, scaffoldMatch));
|
|
boost::dynamic_bitset<> scaffoldIndices(ref->getNumAtoms());
|
|
for (const auto &pair : scaffoldMatch) {
|
|
scaffoldIndices.set(pair.second);
|
|
}
|
|
std::vector<MatchVectType> matches;
|
|
TEST_ASSERT(SubstructMatch(*ref, *prb, matches, false));
|
|
std::vector<MatchVectType> matchesPruned(matches.size());
|
|
std::transform(matches.begin(), matches.end(), matchesPruned.begin(),
|
|
[&scaffoldIndices](const auto &match) {
|
|
MatchVectType matchPruned;
|
|
std::copy_if(match.begin(), match.end(),
|
|
std::back_inserter(matchPruned),
|
|
[&scaffoldIndices](const auto &pair) {
|
|
return scaffoldIndices.test(pair.second);
|
|
});
|
|
return matchPruned;
|
|
});
|
|
rmsdInPlace = MolAlign::CalcRMS(*prbCopy2, *ref, -1, -1, matchesPruned);
|
|
TEST_ASSERT(RDKit::feq(rmsdInPlace, 2.5672));
|
|
rmsd = MolAlign::getBestRMS(*prb, *ref, -1, -1, matchesPruned);
|
|
TEST_ASSERT(RDKit::feq(rmsd, 1.14329));
|
|
rmsdCopy = MolAlign::getBestAlignmentTransform(
|
|
*prbCopy2, *ref, bestTrans, bestMatch, -1, -1, matchesPruned);
|
|
TEST_ASSERT(RDKit::feq(rmsd, rmsdCopy));
|
|
TEST_ASSERT(bestMatch.size() == scaffoldMatch.size());
|
|
RDNumeric::DoubleVector weights(scaffoldIndices.size(), 1.0);
|
|
for (unsigned int i = 0; i < scaffoldIndices.size(); ++i) {
|
|
if (scaffoldIndices.test(i)) {
|
|
weights.setVal(i, 100.0);
|
|
}
|
|
}
|
|
rmsdInPlace =
|
|
MolAlign::CalcRMS(*prbCopy3, *ref, -1, -1, matches, 1000, true, &weights);
|
|
TEST_ASSERT(RDKit::feq(rmsdInPlace, 17.7959));
|
|
rmsd =
|
|
MolAlign::getBestRMS(*prb, *ref, -1, -1, matches, 1000, true, &weights);
|
|
TEST_ASSERT(RDKit::feq(rmsd, 10.9681));
|
|
rmsdCopy = MolAlign::getBestAlignmentTransform(*prbCopy3, *ref, bestTrans,
|
|
bestMatch, -1, -1, matches,
|
|
1000, true, &weights);
|
|
TEST_ASSERT(RDKit::feq(rmsd, rmsdCopy));
|
|
TEST_ASSERT(bestMatch.size() == ref->getNumAtoms());
|
|
}
|
|
|
|
void test1MolWithQueryAlign() {
|
|
// identical to test1MolAlign except we replace one atom with a QueryAtom
|
|
// instead
|
|
|
|
std::string rdbase = getenv("RDBASE");
|
|
std::string fname1 = rdbase + "/Code/GraphMol/MolAlign/test_data/1oir.mol";
|
|
auto *m1 = MolFileToMol(fname1);
|
|
auto *a1 = new QueryAtom(6);
|
|
std::string fname2 =
|
|
rdbase + "/Code/GraphMol/MolAlign/test_data/1oir_conf.mol";
|
|
auto *m2 = MolFileToMol(fname2);
|
|
auto *a2 = new QueryAtom(6);
|
|
|
|
// we replace the same nitrogen instead with a null
|
|
// query 28 and 19 are the "same" atoms
|
|
m1->replaceAtom(28, a1);
|
|
m2->replaceAtom(19, a2);
|
|
|
|
double rmsd = MolAlign::alignMol(*m2, *m1);
|
|
TEST_ASSERT(RDKit::feq(rmsd, 0.6578) || RDKit::feq(rmsd, 1.0345));
|
|
|
|
std::string fname3 =
|
|
rdbase + "/Code/GraphMol/MolAlign/test_data/1oir_trans.mol";
|
|
|
|
auto *m3 = MolFileToMol(fname3);
|
|
auto *a3 = new QueryAtom(5);
|
|
m3->replaceAtom(0, a3);
|
|
|
|
const Conformer &conf1 = m2->getConformer(0);
|
|
const Conformer &conf2 = m3->getConformer(0);
|
|
unsigned int i, nat = m3->getNumAtoms();
|
|
for (i = 0; i < nat; i++) {
|
|
RDGeom::Point3D pt1 = conf1.getAtomPos(i);
|
|
RDGeom::Point3D pt2 = conf2.getAtomPos(i);
|
|
TEST_ASSERT(RDKit::feq(pt1.x, pt2.x, 0.001));
|
|
TEST_ASSERT(RDKit::feq(pt1.y, pt2.y, 0.001));
|
|
TEST_ASSERT(RDKit::feq(pt1.z, pt2.z, 0.001));
|
|
}
|
|
|
|
RDGeom::Transform3D trans;
|
|
rmsd = MolAlign::getAlignmentTransform(*m1, *m2, trans);
|
|
TEST_ASSERT(RDKit::feq(rmsd, 0.6578) || RDKit::feq(rmsd, 1.0345));
|
|
|
|
// specify conformations
|
|
rmsd = MolAlign::alignMol(*m1, *m2, 0, 0);
|
|
TEST_ASSERT(RDKit::feq(rmsd, 0.6578) || RDKit::feq(rmsd, 1.0345));
|
|
|
|
// provide an atom mapping
|
|
delete m1;
|
|
delete m2;
|
|
delete m3;
|
|
delete a1;
|
|
delete a2;
|
|
delete a3;
|
|
}
|
|
|
|
void test2AtomMap() {
|
|
std::string rdbase = getenv("RDBASE");
|
|
std::string fname1 = rdbase + "/Code/GraphMol/MolAlign/test_data/1oir.mol";
|
|
ROMol *m1 = MolFileToMol(fname1);
|
|
std::string fname2 =
|
|
rdbase + "/Code/GraphMol/MolAlign/test_data/1oir_conf.mol";
|
|
ROMol *m2 = MolFileToMol(fname2);
|
|
MatchVectType atomMap;
|
|
atomMap.push_back(std::pair<int, int>(18, 27));
|
|
atomMap.push_back(std::pair<int, int>(13, 23));
|
|
atomMap.push_back(std::pair<int, int>(21, 14));
|
|
atomMap.push_back(std::pair<int, int>(24, 7));
|
|
atomMap.push_back(std::pair<int, int>(9, 19));
|
|
atomMap.push_back(std::pair<int, int>(16, 30));
|
|
double rmsd = MolAlign::alignMol(*m2, *m1, 0, 0, &atomMap);
|
|
TEST_ASSERT(RDKit::feq(rmsd, 0.8525));
|
|
delete m1;
|
|
delete m2;
|
|
}
|
|
|
|
void test3Weights() {
|
|
std::string rdbase = getenv("RDBASE");
|
|
std::string fname1 = rdbase + "/Code/GraphMol/MolAlign/test_data/1oir.mol";
|
|
ROMol *m1 = MolFileToMol(fname1);
|
|
std::string fname2 =
|
|
rdbase + "/Code/GraphMol/MolAlign/test_data/1oir_conf.mol";
|
|
ROMol *m2 = MolFileToMol(fname2);
|
|
MatchVectType atomMap;
|
|
atomMap.push_back(std::pair<int, int>(18, 27));
|
|
atomMap.push_back(std::pair<int, int>(13, 23));
|
|
atomMap.push_back(std::pair<int, int>(21, 14));
|
|
atomMap.push_back(std::pair<int, int>(24, 7));
|
|
atomMap.push_back(std::pair<int, int>(9, 19));
|
|
atomMap.push_back(std::pair<int, int>(16, 30));
|
|
|
|
RDNumeric::DoubleVector wts(6);
|
|
wts.setVal(0, 1.0);
|
|
wts.setVal(1, 1.0);
|
|
wts.setVal(2, 1.0);
|
|
wts.setVal(3, 1.0);
|
|
wts.setVal(4, 1.0);
|
|
wts.setVal(5, 2.0);
|
|
double rmsd = MolAlign::alignMol(*m2, *m1, 0, 0, &atomMap, &wts);
|
|
TEST_ASSERT(RDKit::feq(rmsd, 0.9513));
|
|
delete m1;
|
|
delete m2;
|
|
}
|
|
|
|
void testIssue241() {
|
|
std::string rdbase = getenv("RDBASE");
|
|
std::string fname1 =
|
|
rdbase + "/Code/GraphMol/MolAlign/test_data/Issue241.mol";
|
|
ROMol *m1 = MolFileToMol(fname1);
|
|
std::string res;
|
|
MolPickler::pickleMol(*m1, res);
|
|
auto *ref = new ROMol(res);
|
|
DGeomHelpers::EmbedMolecule(*ref, 30, 239 * 10);
|
|
ForceFields::ForceField *ff1 = UFF::constructForceField(*ref);
|
|
ff1->initialize();
|
|
ff1->minimize(200, 1e-8, 1e-6);
|
|
|
|
std::string pkl2;
|
|
MolPickler::pickleMol(*m1, pkl2);
|
|
auto *probe = new ROMol(pkl2);
|
|
DGeomHelpers::EmbedMolecule(*probe, 30, 239 * 10);
|
|
ForceFields::ForceField *ff2 = UFF::constructForceField(*probe);
|
|
ff2->initialize();
|
|
ff2->minimize(200, 1e-8, 1e-6);
|
|
|
|
double rmsd = MolAlign::alignMol(*ref, *probe);
|
|
|
|
delete ff1;
|
|
delete ff2;
|
|
delete m1;
|
|
delete ref;
|
|
delete probe;
|
|
|
|
TEST_ASSERT(RDKit::feq(rmsd, 0.0));
|
|
}
|
|
|
|
int main() {
|
|
std::cout << "***********************************************************\n";
|
|
std::cout << "Testing MolAlign\n";
|
|
|
|
#if 1
|
|
std::cout << "\t---------------------------------\n";
|
|
std::cout << "\t test1MolAlign \n\n";
|
|
test1MolAlign();
|
|
|
|
std::cout << "\t---------------------------------\n";
|
|
std::cout << "\t test1GetBestRMS \n\n";
|
|
test1GetBestRMS();
|
|
|
|
std::cout << "\t---------------------------------\n";
|
|
std::cout << "\t test1MolWithQueryAlign \n\n";
|
|
test1MolWithQueryAlign();
|
|
|
|
std::cout << "\t---------------------------------\n";
|
|
std::cout << "\t test2AtomMap \n\n";
|
|
test2AtomMap();
|
|
|
|
std::cout << "\t---------------------------------\n";
|
|
std::cout << "\t test3Weights \n\n";
|
|
test3Weights();
|
|
|
|
std::cout << "\t---------------------------------\n";
|
|
std::cout << "\t testIssue241 \n\n";
|
|
testIssue241();
|
|
|
|
#endif
|
|
|
|
std::cout << "***********************************************************\n";
|
|
}
|