mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
* update .gitignore * foundation for 3D descriptors move PBF into core * cleanup work * a bit more cleanup * move the principal moments calc to MolTransforms * cleanup * cleanups * add caching of the principal moments and values * do not include the 3D descriptors in MolDescriptors.h * the properties are computed * add PMI descriptors and tests * add tests for NPR descriptors * return 0 when the largest PMI is zero * PMI edge case tests * NPR edge case tests * PBF edge case tests * PBF edge case tests * more edge cases * add a few more 3d descriptors * add defns to docs * tests for the new descriptors * add versions to new descriptors * add 3d descriptors to python wrapper * add eigen support to the travis build * try to get non-windows builds working * remove computeCovarianceMatrix() from java wrapper * make pmi property names "private"
438 lines
13 KiB
C++
438 lines
13 KiB
C++
//
|
|
// Copyright (C) 2016 Greg Landrum
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
|
|
#ifdef _MSC_VER
|
|
// disable warnings about getenv in visual C++
|
|
#define _CRT_SECURE_NO_WARNINGS
|
|
#endif
|
|
|
|
|
|
#include <iostream>
|
|
#include <fstream>
|
|
|
|
#include <RDGeneral/BoostStartInclude.h>
|
|
#include <boost/algorithm/string.hpp>
|
|
#include <boost/algorithm/string/trim.hpp>
|
|
#include <boost/lexical_cast.hpp>
|
|
#include <RDGeneral/BoostEndInclude.h>
|
|
|
|
#include <RDGeneral/Invariant.h>
|
|
#include <RDGeneral/RDLog.h>
|
|
#include <RDGeneral/utils.h>
|
|
#include <RDGeneral/StreamOps.h>
|
|
|
|
#include <GraphMol/RDKitBase.h>
|
|
#include <GraphMol/SmilesParse/SmilesParse.h>
|
|
#include <GraphMol/FileParsers/FileParsers.h>
|
|
#include <GraphMol/FileParsers/MolSupplier.h>
|
|
|
|
#include <GraphMol/Descriptors/MolDescriptors3D.h>
|
|
|
|
using namespace RDKit;
|
|
using namespace RDKit::Descriptors;
|
|
|
|
bool compare(const std::string &inm,double ref,double val,double tol=1e-3){
|
|
if(fabs(ref-val)>.001){
|
|
std::cerr<<"value mismatch: "<<inm<<" "<<ref<<" "<<val<<std::endl;
|
|
}
|
|
return fabs(ref-val)<tol;
|
|
}
|
|
|
|
void testPMI1(){
|
|
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
|
|
BOOST_LOG(rdErrorLog) << " Basic PMI tests." << std::endl;
|
|
|
|
std::string pathName = getenv("RDBASE");
|
|
std::string sdfName = pathName+"/Code/GraphMol/Descriptors/test_data/PBF_egfr.sdf";
|
|
|
|
RDKit::SDMolSupplier reader(sdfName,true,false);
|
|
std::string fName = pathName+"/Code/GraphMol/Descriptors/test_data/PMI_egfr.out";
|
|
std::ifstream instrm(fName.c_str());
|
|
int nDone=0;
|
|
while(!reader.atEnd()){
|
|
RDKit::ROMol *m=reader.next();
|
|
TEST_ASSERT(m);
|
|
RDKit::ROMol mcpy(*m);
|
|
std::string nm;
|
|
m->getProp("_Name",nm);
|
|
std::string inm;
|
|
instrm>>inm;
|
|
TEST_ASSERT(inm==nm);
|
|
double val;
|
|
double pmi1_m,pmi2_m,pmi3_m,pmi1_nom,pmi2_nom,pmi3_nom;
|
|
instrm>>pmi1_m;
|
|
instrm>>pmi2_m;
|
|
instrm>>pmi3_m;
|
|
instrm>>pmi1_nom;
|
|
instrm>>pmi2_nom;
|
|
instrm>>pmi3_nom;
|
|
|
|
val = RDKit::Descriptors::PMI1(*m);
|
|
TEST_ASSERT(compare(inm,pmi1_m,val));
|
|
val = RDKit::Descriptors::PMI2(*m);
|
|
TEST_ASSERT(compare(inm,pmi2_m,val));
|
|
val = RDKit::Descriptors::PMI3(*m);
|
|
TEST_ASSERT(compare(inm,pmi3_m,val));
|
|
|
|
val = RDKit::Descriptors::PMI1(*m,-1,false);
|
|
TEST_ASSERT(compare(inm,pmi1_nom,val));
|
|
val = RDKit::Descriptors::PMI2(*m,-1,false);
|
|
TEST_ASSERT(compare(inm,pmi2_nom,val));
|
|
val = RDKit::Descriptors::PMI3(*m,-1,false);
|
|
TEST_ASSERT(compare(inm,pmi3_nom,val));
|
|
|
|
// now try doing it in the reverse order to make sure caching doesn't
|
|
// screw up.
|
|
val = RDKit::Descriptors::PMI1(mcpy,-1,false);
|
|
TEST_ASSERT(compare(inm,pmi1_nom,val));
|
|
val = RDKit::Descriptors::PMI2(mcpy,-1,false);
|
|
TEST_ASSERT(compare(inm,pmi2_nom,val));
|
|
val = RDKit::Descriptors::PMI3(mcpy,-1,false);
|
|
TEST_ASSERT(compare(inm,pmi3_nom,val));
|
|
val = RDKit::Descriptors::PMI1(mcpy);
|
|
TEST_ASSERT(compare(inm,pmi1_m,val));
|
|
val = RDKit::Descriptors::PMI2(mcpy);
|
|
TEST_ASSERT(compare(inm,pmi2_m,val));
|
|
val = RDKit::Descriptors::PMI3(mcpy);
|
|
TEST_ASSERT(compare(inm,pmi3_m,val));
|
|
|
|
|
|
delete m;
|
|
++nDone;
|
|
}
|
|
BOOST_LOG(rdErrorLog) << " done" << std::endl;
|
|
}
|
|
|
|
void testPMIEdges(){
|
|
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
|
|
BOOST_LOG(rdErrorLog) << " PMI edge cases." << std::endl;
|
|
|
|
{
|
|
std::string pathName = getenv("RDBASE");
|
|
std::string sdfName = pathName+"/Code/GraphMol/Descriptors/test_data/linear.mol";
|
|
|
|
RDKit::ROMol *m=MolFileToMol(sdfName);
|
|
TEST_ASSERT(m);
|
|
double val;
|
|
|
|
val = RDKit::Descriptors::PMI1(*m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
val = RDKit::Descriptors::PMI2(*m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
val = RDKit::Descriptors::PMI3(*m);
|
|
TEST_ASSERT(val>=1e-4);
|
|
|
|
delete m;
|
|
}
|
|
{
|
|
std::string pathName = getenv("RDBASE");
|
|
std::string sdfName = pathName+"/Code/GraphMol/Descriptors/test_data/linear_2atom.mol";
|
|
|
|
RDKit::ROMol *m=MolFileToMol(sdfName);
|
|
TEST_ASSERT(m);
|
|
double val;
|
|
|
|
val = RDKit::Descriptors::PMI1(*m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
val = RDKit::Descriptors::PMI2(*m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
val = RDKit::Descriptors::PMI3(*m);
|
|
TEST_ASSERT(val>=1e-4);
|
|
|
|
delete m;
|
|
}
|
|
{
|
|
std::string pathName = getenv("RDBASE");
|
|
std::string sdfName = pathName+"/Code/GraphMol/Descriptors/test_data/planar.mol";
|
|
|
|
RDKit::ROMol *m=MolFileToMol(sdfName);
|
|
TEST_ASSERT(m);
|
|
double val;
|
|
|
|
val = RDKit::Descriptors::PMI1(*m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
val = RDKit::Descriptors::PMI2(*m);
|
|
TEST_ASSERT(val>=1e-4);
|
|
val = RDKit::Descriptors::PMI3(*m);
|
|
TEST_ASSERT(val>=1e-4);
|
|
|
|
delete m;
|
|
}
|
|
{
|
|
std::string pathName = getenv("RDBASE");
|
|
std::string sdfName = pathName+"/Code/GraphMol/Descriptors/test_data/planar_3atom.mol";
|
|
|
|
RDKit::ROMol *m=MolFileToMol(sdfName);
|
|
TEST_ASSERT(m);
|
|
double val;
|
|
|
|
val = RDKit::Descriptors::PMI1(*m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
val = RDKit::Descriptors::PMI2(*m);
|
|
TEST_ASSERT(val>=1e-4);
|
|
val = RDKit::Descriptors::PMI3(*m);
|
|
TEST_ASSERT(val>=1e-4);
|
|
|
|
delete m;
|
|
}
|
|
|
|
{
|
|
RDKit::RWMol m;
|
|
m.addAtom(new RDKit::Atom(6));
|
|
m.addAtom(new RDKit::Atom(6));
|
|
m.addAtom(new RDKit::Atom(6));
|
|
m.addAtom(new RDKit::Atom(6));
|
|
m.addAtom(new RDKit::Atom(6));
|
|
m.addAtom(new RDKit::Atom(6));
|
|
m.addConformer(new RDKit::Conformer(m.getNumAtoms()));
|
|
double val = RDKit::Descriptors::PMI1(m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
val = RDKit::Descriptors::PMI2(m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
val = RDKit::Descriptors::PMI3(m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
}
|
|
|
|
BOOST_LOG(rdErrorLog) << " done" << std::endl;
|
|
}
|
|
|
|
|
|
void testNPR1(){
|
|
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
|
|
BOOST_LOG(rdErrorLog) << " Basic NPR tests." << std::endl;
|
|
|
|
std::string pathName = getenv("RDBASE");
|
|
std::string sdfName = pathName+"/Code/GraphMol/Descriptors/test_data/PBF_egfr.sdf";
|
|
RDKit::SDMolSupplier reader(sdfName,true,false);
|
|
|
|
int nDone=0;
|
|
while(!reader.atEnd()){
|
|
RDKit::ROMol *m=reader.next();
|
|
TEST_ASSERT(m);
|
|
RDKit::ROMol mcpy(*m);
|
|
std::string nm;
|
|
m->getProp("_Name",nm);
|
|
|
|
double val;
|
|
double pmi1_m,pmi2_m,pmi3_m,pmi1_nom,pmi2_nom,pmi3_nom;
|
|
pmi1_m = RDKit::Descriptors::PMI1(*m);
|
|
pmi2_m = RDKit::Descriptors::PMI2(*m);
|
|
pmi3_m = RDKit::Descriptors::PMI3(*m);
|
|
pmi1_nom = RDKit::Descriptors::PMI1(*m,-1,false);
|
|
pmi2_nom = RDKit::Descriptors::PMI2(*m,-1,false);
|
|
pmi3_nom = RDKit::Descriptors::PMI3(*m,-1,false);
|
|
|
|
val = RDKit::Descriptors::NPR1(*m);
|
|
compare(nm,pmi1_m/pmi3_m,val);
|
|
val = RDKit::Descriptors::NPR2(*m);
|
|
compare(nm,pmi2_m/pmi3_m,val);
|
|
|
|
delete m;
|
|
++nDone;
|
|
}
|
|
BOOST_LOG(rdErrorLog) << " done" << std::endl;
|
|
}
|
|
|
|
void testNPREdges(){
|
|
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
|
|
BOOST_LOG(rdErrorLog) << " NPR edge cases." << std::endl;
|
|
|
|
{
|
|
std::string pathName = getenv("RDBASE");
|
|
std::string sdfName = pathName+"/Code/GraphMol/Descriptors/test_data/linear.mol";
|
|
|
|
RDKit::ROMol *m=MolFileToMol(sdfName);
|
|
TEST_ASSERT(m);
|
|
double val;
|
|
|
|
val = RDKit::Descriptors::NPR1(*m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
val = RDKit::Descriptors::NPR2(*m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
|
|
delete m;
|
|
}
|
|
{
|
|
std::string pathName = getenv("RDBASE");
|
|
std::string sdfName = pathName+"/Code/GraphMol/Descriptors/test_data/linear_2atom.mol";
|
|
|
|
RDKit::ROMol *m=MolFileToMol(sdfName);
|
|
TEST_ASSERT(m);
|
|
double val;
|
|
|
|
val = RDKit::Descriptors::NPR1(*m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
val = RDKit::Descriptors::NPR2(*m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
|
|
delete m;
|
|
}
|
|
{
|
|
std::string pathName = getenv("RDBASE");
|
|
std::string sdfName = pathName+"/Code/GraphMol/Descriptors/test_data/planar.mol";
|
|
|
|
RDKit::ROMol *m=MolFileToMol(sdfName);
|
|
TEST_ASSERT(m);
|
|
double val;
|
|
|
|
val = RDKit::Descriptors::NPR1(*m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
val = RDKit::Descriptors::NPR2(*m);
|
|
TEST_ASSERT(val>=1e-4);
|
|
|
|
delete m;
|
|
}
|
|
{
|
|
std::string pathName = getenv("RDBASE");
|
|
std::string sdfName = pathName+"/Code/GraphMol/Descriptors/test_data/planar_3atom.mol";
|
|
|
|
RDKit::ROMol *m=MolFileToMol(sdfName);
|
|
TEST_ASSERT(m);
|
|
double val;
|
|
|
|
val = RDKit::Descriptors::NPR1(*m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
val = RDKit::Descriptors::NPR2(*m);
|
|
TEST_ASSERT(val>=1e-4);
|
|
|
|
delete m;
|
|
}
|
|
{
|
|
RDKit::RWMol m;
|
|
m.addAtom(new RDKit::Atom(6));
|
|
m.addAtom(new RDKit::Atom(6));
|
|
m.addAtom(new RDKit::Atom(6));
|
|
m.addAtom(new RDKit::Atom(6));
|
|
m.addAtom(new RDKit::Atom(6));
|
|
m.addAtom(new RDKit::Atom(6));
|
|
m.addConformer(new RDKit::Conformer(m.getNumAtoms()));
|
|
double val = RDKit::Descriptors::NPR1(m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
val = RDKit::Descriptors::NPR2(m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
}
|
|
|
|
BOOST_LOG(rdErrorLog) << " done" << std::endl;
|
|
}
|
|
|
|
void test3DEdges(){
|
|
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
|
|
BOOST_LOG(rdErrorLog) << " 3D descriptor edge cases." << std::endl;
|
|
|
|
{
|
|
std::string pathName = getenv("RDBASE");
|
|
std::string sdfName = pathName+"/Code/GraphMol/Descriptors/test_data/linear.mol";
|
|
|
|
RDKit::ROMol *m=MolFileToMol(sdfName);
|
|
TEST_ASSERT(m);
|
|
double val;
|
|
|
|
val = RDKit::Descriptors::radiusOfGyration(*m);
|
|
TEST_ASSERT(fabs(val)<1e-2);
|
|
val = RDKit::Descriptors::inertialShapeFactor(*m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
val = RDKit::Descriptors::eccentricity(*m);
|
|
TEST_ASSERT(fabs(1.0-val)<1e-4);
|
|
val = RDKit::Descriptors::asphericity(*m);
|
|
TEST_ASSERT(fabs(1.0-val)<1e-4);
|
|
val = RDKit::Descriptors::spherocityIndex(*m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
|
|
delete m;
|
|
}
|
|
{
|
|
std::string pathName = getenv("RDBASE");
|
|
std::string sdfName = pathName+"/Code/GraphMol/Descriptors/test_data/planar.mol";
|
|
|
|
RDKit::ROMol *m=MolFileToMol(sdfName);
|
|
TEST_ASSERT(m);
|
|
double val;
|
|
|
|
val = RDKit::Descriptors::radiusOfGyration(*m);
|
|
TEST_ASSERT(fabs(val)>1e-2);
|
|
val = RDKit::Descriptors::inertialShapeFactor(*m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
val = RDKit::Descriptors::eccentricity(*m);
|
|
TEST_ASSERT(fabs(1.0-val)<1e-4);
|
|
val = RDKit::Descriptors::asphericity(*m);
|
|
TEST_ASSERT(fabs(0.5-val)<1e-4);
|
|
val = RDKit::Descriptors::spherocityIndex(*m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
|
|
delete m;
|
|
}
|
|
{ // octahedron
|
|
RDKit::RWMol m;
|
|
m.addAtom(new RDKit::Atom(1));
|
|
m.addAtom(new RDKit::Atom(1));
|
|
m.addAtom(new RDKit::Atom(1));
|
|
m.addAtom(new RDKit::Atom(1));
|
|
m.addAtom(new RDKit::Atom(1));
|
|
m.addAtom(new RDKit::Atom(1));
|
|
m.addConformer(new RDKit::Conformer(m.getNumAtoms()));
|
|
m.getConformer().setAtomPos(0,RDGeom::Point3D(1,0,0));
|
|
m.getConformer().setAtomPos(1,RDGeom::Point3D(-1,0,0));
|
|
m.getConformer().setAtomPos(2,RDGeom::Point3D(0,1,0));
|
|
m.getConformer().setAtomPos(3,RDGeom::Point3D(0,-1,0));
|
|
m.getConformer().setAtomPos(4,RDGeom::Point3D(0,0,1));
|
|
m.getConformer().setAtomPos(5,RDGeom::Point3D(0,0,-1));
|
|
double val;
|
|
val = RDKit::Descriptors::radiusOfGyration(m);
|
|
TEST_ASSERT(fabs(val)>0.1);
|
|
val = RDKit::Descriptors::inertialShapeFactor(m);
|
|
TEST_ASSERT(fabs(val)>1);
|
|
val = RDKit::Descriptors::eccentricity(m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
val = RDKit::Descriptors::asphericity(m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
val = RDKit::Descriptors::spherocityIndex(m);
|
|
TEST_ASSERT(fabs(1.-val)<1e-4);
|
|
}
|
|
|
|
{
|
|
RDKit::RWMol m;
|
|
m.addAtom(new RDKit::Atom(6));
|
|
m.addAtom(new RDKit::Atom(6));
|
|
m.addAtom(new RDKit::Atom(6));
|
|
m.addAtom(new RDKit::Atom(6));
|
|
m.addAtom(new RDKit::Atom(6));
|
|
m.addAtom(new RDKit::Atom(6));
|
|
m.addConformer(new RDKit::Conformer(m.getNumAtoms()));
|
|
double val;
|
|
val = RDKit::Descriptors::radiusOfGyration(m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
val = RDKit::Descriptors::inertialShapeFactor(m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
val = RDKit::Descriptors::eccentricity(m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
val = RDKit::Descriptors::asphericity(m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
val = RDKit::Descriptors::spherocityIndex(m);
|
|
TEST_ASSERT(fabs(val)<1e-4);
|
|
}
|
|
|
|
BOOST_LOG(rdErrorLog) << " done" << std::endl;
|
|
}
|
|
|
|
|
|
|
|
//-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
|
|
//
|
|
//-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
|
|
int main() {
|
|
RDLog::InitLogs();
|
|
test3DEdges();
|
|
testPMIEdges();
|
|
testNPREdges();
|
|
testPMI1();
|
|
testNPR1();
|
|
|
|
}
|