Made changes as requested in review of PR.

Most notably, added test cases for C++ and Python which just make sure
the code runs without crashing, no comparison of results with
existing test files.
This commit is contained in:
DavidACosgrove
2016-12-17 17:46:46 +00:00
parent 698a84c94b
commit 472c77ac20
6 changed files with 300 additions and 60 deletions

View File

@@ -431,31 +431,8 @@ unsigned int compute2DCoordsMimicDistMat(
//! \brief Compute 2D coordinates where a piece of the molecule is
// constrained to have the same coordinates as a reference.
/*!
This function generates a depiction for a molecule where a piece of the
molecule is constrained to have the same coordinates as a reference.
This is useful for, for example, generating depictions of SAR data
sets so that the cores of the molecules are all oriented the same way.
ARGUMENTS:
\param mol - the molecule to be aligned, this will come back
with a single conformer.
\param reference - a molecule with the reference atoms to align to;
this should have a depiction.
\param confId - (optional) the id of the reference conformation to use
\param referencePattern - (optional) an optional molecule to be used to
generate the atom mapping between the molecule
and the reference.
\param acceptFailure - (optional) if True, standard depictions will be generated
for molecules that don't have a substructure match to the
reference; if false, throws a DepictException.
*/
void generateDepictionMatching2DStructure( RDKit::ROMol &mol ,
RDKit::ROMol &reference ,
const RDKit::ROMol &reference ,
int confId ,
RDKit::ROMol *referencePattern ,
bool acceptFailure ) {
@@ -469,34 +446,34 @@ void generateDepictionMatching2DStructure( RDKit::ROMol &mol ,
RDKit::MatchVectType refMatchVect;
RDKit::SubstructMatch( reference , *referencePattern , refMatchVect );
if( refMatchVect.empty() ) {
throw RDDepict::DepictException( "Reference does not map to itself." );
throw RDDepict::DepictException( "Reference pattern does not map to reference." );
}
refMatch.reserve( refMatchVect.size() );
for( size_t i = 0 , is = refMatchVect.size() ; i < is ; ++i ) {
for( size_t i = 0 ; i < refMatchVect.size() ; ++i ) {
refMatch.push_back( refMatchVect[i].second );
}
RDKit::SubstructMatch( mol , *referencePattern , matchVect );
} else {
refMatch.reserve( reference.getNumAtoms( true ) );
for( int i = 0 , is = reference.getNumAtoms( true ) ; i < is ; ++i ) {
for( unsigned int i = 0 ; i < reference.getNumAtoms( true ) ; ++i ) {
refMatch.push_back( i );
}
RDKit::SubstructMatch( mol , reference , matchVect );
}
RDGeom::INT_POINT2D_MAP coordMap;
if( RDKit::SubstructMatch( mol , reference , matchVect ) ) {
RDKit::Conformer &conf = reference.getConformer( confId );
if( matchVect.empty() ) {
if( !acceptFailure ) {
throw RDDepict::DepictException( "Substructure match with reference not found." );
}
} else {
const RDKit::Conformer &conf = reference.getConformer( confId );
for( RDKit::MatchVectType::const_iterator mv = matchVect.begin() ;
mv != matchVect.end() ; ++mv ) {
RDGeom::Point3D pt3 = conf.getAtomPos( refMatch[mv->first] );
RDGeom::Point2D pt2( pt3.x , pt3.y );
coordMap[mv->second] = pt2;
}
} else {
if( !acceptFailure ) {
throw RDDepict::DepictException( "Substructure match with reference not found." );
}
}
RDDepict::compute2DCoords( mol , &coordMap , false /* canonOrient */ ,
true /* clearConfs */ );
@@ -505,27 +482,8 @@ void generateDepictionMatching2DStructure( RDKit::ROMol &mol ,
//! \brief Generate a 2D depiction for a molecule where all or part of
// it mimics the coordinates of a 3D reference structure.
/*!
Generates a depiction for a molecule where a piece of the molecule
is constrained to have coordinates similar to those of a 3D reference
structure.
ARGUMENTS:
\param mol - the molecule to be aligned, this will come back
with a single conformer containing 2D coordinates
\param reference - a molecule with the reference atoms to align to.
By default this should be the same as mol, but with
3D coordinates
\param confId - (optional) the id of the reference conformation to use
\param refPattern - (optional) a query molecule to map a subset of
the reference onto the mol, so that only some of the
atoms are aligned.
\param acceptFailure - (optional) if true, standard depictions will be generated
for molecules that don't have a substructure match to the
reference; if false, throws a DepictException.
*/
void generateDepictionMatching3DStructure( RDKit::ROMol &mol ,
RDKit::ROMol &reference ,
const RDKit::ROMol &reference ,
int confId ,
RDKit::ROMol *referencePattern ,
bool acceptFailure ) {
@@ -553,7 +511,7 @@ void generateDepictionMatching3DStructure( RDKit::ROMol &mol ,
throw RDDepict::DepictException( "Reference pattern didn't match molecule or reference." );
}
}
for( size_t i = 0 , is = molMatchVect.size() ; i < is ; ++i ) {
for( size_t i = 0 ; i < molMatchVect.size() ; ++i ) {
mol_to_ref[molMatchVect[i].second] = refMatchVect[i].second;
}
@@ -563,7 +521,7 @@ void generateDepictionMatching3DStructure( RDKit::ROMol &mol ,
}
}
RDKit::Conformer &conf = reference.getConformer( confId );
const RDKit::Conformer &conf = reference.getConformer( confId );
// the distance matrix is a triangular representation
RDDepict::DOUBLE_SMART_PTR dmat( new double[num_ats * ( num_ats - 1 ) / 2] );
// negative distances are ignored, so initialise to -1.0 so subset by

View File

@@ -151,7 +151,7 @@ unsigned int compute2DCoordsMimicDistMat(
*/
void generateDepictionMatching2DStructure( RDKit::ROMol &mol ,
RDKit::ROMol &reference ,
const RDKit::ROMol &reference ,
int confId = -1 ,
RDKit::ROMol *referencePattern = static_cast<RDKit::ROMol *>( 0 ) ,
bool acceptFailure = false );
@@ -178,7 +178,7 @@ void generateDepictionMatching2DStructure( RDKit::ROMol &mol ,
referencePattern; if false, throws a DepictException.
*/
void generateDepictionMatching3DStructure( RDKit::ROMol &mol ,
RDKit::ROMol &reference ,
const RDKit::ROMol &reference ,
int confId = -1 ,
RDKit::ROMol *referencePattern = 0 ,
bool acceptFailure = false );

View File

@@ -107,7 +107,7 @@ unsigned int Compute2DCoordsMimicDistmat(
bool acceptFailure ) {
RDKit::ROMol *referencePattern = 0;
if( refPatt ) {
if( refPatt != python::object() ) {
referencePattern = python::extract<RDKit::ROMol *>( refPatt );
}
@@ -231,7 +231,7 @@ BOOST_PYTHON_MODULE(rdDepictor) {
RDDepict::GenerateDepictionMatching2DStructure,
(python::arg( "mol" ) , python::arg( "reference" ) ,
python::arg( "confId" ) = -1 ,
python::arg( "refPatt" ) = 0 ,
python::arg( "refPatt" ) = python::object() ,
python::arg( "acceptFailure" ) = false ),
docString.c_str() );
@@ -257,7 +257,7 @@ BOOST_PYTHON_MODULE(rdDepictor) {
RDDepict::GenerateDepictionMatching3DStructure,
(python::arg( "mol" ) , python::arg( "reference" ) ,
python::arg( "confId" ) = -1 ,
python::arg( "refPatt" ) = 0 ,
python::arg( "refPatt" ) = python::object() ,
python::arg( "acceptFailure" ) = false ),
docString.c_str() );
}

View File

@@ -216,6 +216,24 @@ class TestCase(unittest.TestCase):
self.assertAlmostEqual(conf.GetAtomPosition(0).x, -0.750, 3)
self.assertAlmostEqual(conf.GetAtomPosition(1).x, 0.750, 3)
def testConstrainedCoords(self) :
templ = Chem.MolFromSmiles( 'c1nccc2n1ccc2' )
rdDepictor.Compute2DCoords( templ )
m1 = Chem.MolFromSmiles( 'c1cccc2ncn3cccc3c21' )
rdDepictor.GenerateDepictionMatching2DStructure(m1,templ)
m2 = Chem.MolFromSmiles( 'c1cc(Cl)cc2ncn3cccc3c21' )
rdDepictor.Compute2DCoords(m2)
refPatt1 = Chem.MolFromSmarts( '*1****2*1***2' )
rdDepictor.GenerateDepictionMatching2DStructure( m2 , templ , -1 , refPatt1 )
fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'Depictor', 'test_data',
'1XP0_ligand.sdf')
xp0_lig = Chem.MolFromMolFile( fileN )
xp0_lig_2d = Chem.Mol( xp0_lig )
rdDepictor.GenerateDepictionMatching3DStructure( xp0_lig_2d , xp0_lig )
xp0_ref = Chem.MolFromSmarts( '[#6]1~[#7][#6]~[#6]2[#6](=[#8])[#7]~[#6](c3ccccc3)[#7][#7]12' )
rdDepictor.GenerateDepictionMatching3DStructure( xp0_lig_2d , xp0_lig , -1 , xp0_ref )
if __name__ == '__main__':
unittest.main()

View File

@@ -823,6 +823,46 @@ void testGitHubIssue1073() {
}
}
void testConstrainedCoords() {
std::string rdbase = getenv("RDBASE");
std::string ofile =
rdbase + "/Code/GraphMol/Depictor/test_data/constrainedCoords.out.sdf";
SDWriter writer(ofile);
std::string templ_smiles= "c1nccc2n1ccc2";
ROMol *templ = SmilesToMol( templ_smiles );
TEST_ASSERT( templ );
RDDepict::compute2DCoords( *templ );
std::string smiles = "c1cccc2ncn3cccc3c21";
ROMol *m = SmilesToMol( smiles );
TEST_ASSERT( m );
RDDepict::generateDepictionMatching2DStructure( *m , *templ );
writer.write( *m );
std::string smarts = "*1****2*1***2";
ROMol *refPatt = SmartsToMol( smarts );
RDDepict::generateDepictionMatching2DStructure( *m , *templ , -1 , refPatt );
writer.write( *m );
delete templ;
delete m;
delete refPatt;
std::string xp0_file =
rdbase + "/Code/GraphMol/Depictor/test_data/1XP0_ligand.sdf";
RDKit::ROMol *xp0_lig = RDKit::MolFileToMol( xp0_file );
RDKit::ROMol *xp0_lig_2d = new RDKit::ROMol( *xp0_lig );
RDDepict::compute2DCoords( *xp0_lig_2d );
writer.write( *xp0_lig_2d );
RDDepict::generateDepictionMatching3DStructure( *xp0_lig_2d , *xp0_lig );
writer.write( *xp0_lig_2d );
delete xp0_lig;
delete xp0_lig_2d;
}
int main() {
RDLog::InitLogs();
#if 1
@@ -994,5 +1034,12 @@ int main() {
BOOST_LOG(rdInfoLog)
<< "***********************************************************\n";
BOOST_LOG(rdInfoLog)
<< "***********************************************************\n";
BOOST_LOG(rdInfoLog) << " testConstrainedCoords\n";
testConstrainedCoords();
BOOST_LOG(rdInfoLog)
<< "***********************************************************\n";
return (0);
}

View File

@@ -0,0 +1,217 @@
1XP0_VDN_A_201
RCSB PDB01141521343D
Coordinates from PDB:1XP0:A:201 Model:1 without hydrogens
34 37 0 0 0 0 999 V2000
-17.3110 28.7690 59.4250 C 0 0 0 0 0 0 0 0 0 0 0 0
-18.4240 27.7500 59.5520 C 0 0 0 0 0 0 0 0 0 0 0 0
-19.4990 28.3170 60.2890 O 0 0 0 0 0 0 0 0 0 0 0 0
-20.6790 27.6470 60.5320 C 0 0 0 0 0 0 0 0 0 0 0 0
-20.8720 26.3220 60.1320 C 0 0 0 0 0 0 0 0 0 0 0 0
-22.0880 25.7000 60.4070 C 0 0 0 0 0 0 0 0 0 0 0 0
-23.1120 26.3870 61.0750 C 0 0 0 0 0 0 0 0 0 0 0 0
-22.9030 27.7070 61.4740 C 0 0 0 0 0 0 0 0 0 0 0 0
-21.7100 28.3360 61.1660 C 0 0 0 0 0 0 0 0 0 0 0 0
-24.5130 25.6830 61.4210 S 0 0 0 0 0 0 0 0 0 0 0 0
-25.6280 26.5010 61.0510 O 0 0 0 0 0 0 0 0 0 0 0 0
-24.5590 24.4480 60.7000 O 0 0 0 0 0 0 0 0 0 0 0 0
-23.9260 24.9690 67.8530 C 0 0 0 0 0 0 0 0 0 0 0 0
-24.6520 25.3890 62.9190 N 0 0 0 0 0 0 0 0 0 0 0 0
-23.5890 24.5300 63.4650 C 0 0 0 0 0 0 0 0 0 0 0 0
-23.7010 24.3520 64.9680 C 0 0 0 0 0 0 0 0 0 0 0 0
-23.9540 25.6730 65.5070 N 0 0 0 0 0 0 0 0 0 0 0 0
-25.3030 26.1390 65.2360 C 0 0 0 0 0 0 0 0 0 0 0 0
-25.4770 26.3040 63.7200 C 0 0 0 0 0 0 0 0 0 0 0 0
-23.3800 25.9480 66.8200 C 0 0 0 0 0 0 0 0 0 0 0 0
-21.3890 29.7160 61.6140 C 0 0 0 0 0 0 0 0 0 0 0 0
-21.0380 30.7260 60.6430 N 0 0 0 0 0 0 0 0 0 0 0 0
-20.9950 32.0290 60.9210 C 0 0 0 0 0 0 0 0 0 0 0 0
-21.5360 32.3970 62.2430 C 0 0 0 0 0 0 0 0 0 0 0 0
-21.9910 31.5330 63.1240 N 0 0 0 0 0 0 0 0 0 0 0 0
-22.0380 30.1810 62.8070 N 0 0 0 0 0 0 0 0 0 0 0 0
-20.5730 32.8270 60.0980 O 0 0 0 0 0 0 0 0 0 0 0 0
-21.7100 33.6360 62.8500 C 0 0 0 0 0 0 0 0 0 0 0 0
-22.2670 33.3890 64.0670 N 0 0 0 0 0 0 0 0 0 0 0 0
-22.4610 32.0570 64.2250 C 0 0 0 0 0 0 0 0 0 0 0 0
-21.3840 35.0070 62.3250 C 0 0 0 0 0 0 0 0 0 0 0 0
-23.0180 31.3240 65.4100 C 0 0 0 0 0 0 0 0 0 0 0 0
-21.8600 30.6260 66.1240 C 0 0 0 0 0 0 0 0 0 0 0 0
-22.4100 29.8920 67.3240 C 0 0 0 0 0 0 0 0 0 0 0 0
1 2 1 0 0 0 0
2 3 1 0 0 0 0
3 4 1 0 0 0 0
4 5 2 0 0 0 0
4 9 1 0 0 0 0
5 6 1 0 0 0 0
6 7 2 0 0 0 0
7 8 1 0 0 0 0
7 10 1 0 0 0 0
8 9 2 0 0 0 0
9 21 1 0 0 0 0
10 11 2 0 0 0 0
10 12 2 0 0 0 0
10 14 1 0 0 0 0
13 20 1 0 0 0 0
14 15 1 0 0 0 0
14 19 1 0 0 0 0
15 16 1 0 0 0 0
16 17 1 0 0 0 0
17 18 1 0 0 0 0
17 20 1 0 0 0 0
18 19 1 0 0 0 0
21 22 2 0 0 0 0
21 26 1 0 0 0 0
22 23 1 0 0 0 0
23 24 1 0 0 0 0
23 27 2 0 0 0 0
24 25 1 0 0 0 0
24 28 2 0 0 0 0
25 26 1 0 0 0 0
25 30 1 0 0 0 0
28 29 1 0 0 0 0
28 31 1 0 0 0 0
29 30 2 0 0 0 0
30 32 1 0 0 0 0
32 33 1 0 0 0 0
33 34 1 0 0 0 0
A 1
C1
A 2
C2
A 3
O3
A 4
C4
A 5
C5
A 6
C6
A 7
C7
A 8
C8
A 9
C9
A 10
S10
A 11
O11
A 12
O12
A 13
C13
A 14
N14
A 15
C15
A 16
C16
A 17
N17
A 18
C18
A 19
C19
A 20
C20
A 21
C21
A 22
N22
A 23
C23
A 24
C24
A 25
N25
A 26
N26
A 27
O27
A 28
C28
A 29
N29
A 30
C30
A 31
C31
A 32
C32
A 33
C33
A 34
C34
M END
> <InstanceId>
1XP0_VDN_A_201
> <ChemCompId>
VDN
> <PdbId>
1XP0
> <ChainId>
A
> <ResidueNumber>
201
> <InsertionCode>
> <Model>
1
> <AltIds>
> <MissingHeavyAtoms>
0
> <ObservedFormula>
C23 N6 O4 S
> <Name>
2-{2-ETHOXY-5-[(4-ETHYLPIPERAZIN-1-YL)SULFONYL]PHENYL}-5-METHYL-7-PROPYLIMIDAZO[5,1-F][1,2,4]TRIAZIN-4(1H)-ONE
> <SystematicName>
2-[2-ethoxy-5-(4-ethylpiperazin-1-yl)sulfonyl-phenyl]-5-methyl-7-propyl-1H-imidazo[5,1-f][1,2,4]triazin-4-one
> <Synonyms>
VARDENAFIL, LEVITRA
> <Type>
NON-POLYMER
> <Formula>
C23 H32 N6 O4 S
> <MolecularWeight>
488.603
> <ModifiedDate>
2011-06-04
> <Parent>
> <OneLetterCode>
> <SubcomponentList>
> <AmbiguousFlag>
N
> <InChI>
InChI=1S/C23H32N6O4S/c1-5-8-20-24-16(4)21-23(30)25-22(26-29(20)21)18-15-17(9-10-19(18)33-7-3)34(31,32)28-13-11-27(6-2)12-14-28/h9-10,15H,5-8,11-14H2,1-4H3,(H,25,26,30)
> <InChIKey>
SECKRCOLJRRGGV-UHFFFAOYSA-N
> <SMILES>
CCCc1nc(c2n1NC(=NC2=O)c3cc(ccc3OCC)S(=O)(=O)N4CCN(CC4)CC)C
$$$$