From 489dbfbc9a08507e4be6a795a936b2805ed26698 Mon Sep 17 00:00:00 2001 From: Greg Landrum Date: Sun, 29 Nov 2015 05:09:39 +0100 Subject: [PATCH] add a test for initstruchk, fix an input problem --- External/AvalonTools/test1.cpp | 334 ++++++++++++++++++--------------- rdkit/Chem/MolKey/MolKey.py | 78 ++++---- 2 files changed, 222 insertions(+), 190 deletions(-) diff --git a/External/AvalonTools/test1.cpp b/External/AvalonTools/test1.cpp index 4157000c6..e0f79004c 100755 --- a/External/AvalonTools/test1.cpp +++ b/External/AvalonTools/test1.cpp @@ -4,169 +4,172 @@ // // -// Expected test results here correspond to v1.0 of the open-source avalontoolkit +// Expected test results here correspond to v1.0 of the open-source +// avalontoolkit // - #include -#include +#include #include #include -#include +#include #include #include #include +#include #include "AvalonTools.h" #include using namespace RDKit; -void test1(){ +void test1() { BOOST_LOG(rdInfoLog) << "testing canonical smiles generation" << std::endl; { ROMol *m = static_cast(SmilesToMol("c1ccccc1")); TEST_ASSERT(m); - std::string smi=AvalonTools::getCanonSmiles(*m); - TEST_ASSERT(smi=="c1ccccc1"); + std::string smi = AvalonTools::getCanonSmiles(*m); + TEST_ASSERT(smi == "c1ccccc1"); delete m; } { ROMol *m = static_cast(SmilesToMol("c1cccnc1")); TEST_ASSERT(m); - std::string smi=AvalonTools::getCanonSmiles(*m); - TEST_ASSERT(smi=="c1ccncc1"); + std::string smi = AvalonTools::getCanonSmiles(*m); + TEST_ASSERT(smi == "c1ccncc1"); delete m; } { ROMol *m = static_cast(SmilesToMol("n1ccccc1")); TEST_ASSERT(m); - std::string smi=AvalonTools::getCanonSmiles(*m); - TEST_ASSERT(smi=="c1ccncc1"); + std::string smi = AvalonTools::getCanonSmiles(*m); + TEST_ASSERT(smi == "c1ccncc1"); delete m; } { - std::string smi=AvalonTools::getCanonSmiles("n1ccccc1",true); - TEST_ASSERT(smi=="c1ccncc1"); + std::string smi = AvalonTools::getCanonSmiles("n1ccccc1", true); + TEST_ASSERT(smi == "c1ccncc1"); } { - std::string smi=AvalonTools::getCanonSmiles("c1cccnc1",true); - TEST_ASSERT(smi=="c1ccncc1"); + std::string smi = AvalonTools::getCanonSmiles("c1cccnc1", true); + TEST_ASSERT(smi == "c1ccncc1"); } BOOST_LOG(rdInfoLog) << "done" << std::endl; } -void test2(){ +void test2() { BOOST_LOG(rdInfoLog) << "testing coordinate generation" << std::endl; #if 1 { RWMol *m = SmilesToMol("c1cccnc1"); TEST_ASSERT(m); - unsigned int confId=AvalonTools::set2DCoords(*m); - TEST_ASSERT(m->getNumConformers()==1); - TEST_ASSERT(confId==0); + unsigned int confId = AvalonTools::set2DCoords(*m); + TEST_ASSERT(m->getNumConformers() == 1); + TEST_ASSERT(confId == 0); delete m; } #endif { - std::string molb = AvalonTools::set2DCoords("c1cccnc1",true); - TEST_ASSERT(molb!=""); + std::string molb = AvalonTools::set2DCoords("c1cccnc1", true); + TEST_ASSERT(molb != ""); } BOOST_LOG(rdInfoLog) << "done" << std::endl; } -void test3(){ +void test3() { BOOST_LOG(rdInfoLog) << "testing fingerprint generation" << std::endl; { ROMol *m = static_cast(SmilesToMol("c1ccccn1")); TEST_ASSERT(m); ExplicitBitVect bv(512); - AvalonTools::getAvalonFP(*m,bv,512,false,true,0x00006FFF); + AvalonTools::getAvalonFP(*m, bv, 512, false, true, 0x00006FFF); BOOST_LOG(rdInfoLog) << "c1ccccn1 " << bv.getNumOnBits() << std::endl; - TEST_ASSERT(bv.getNumOnBits()==18); + TEST_ASSERT(bv.getNumOnBits() == 18); delete m; } { ROMol *m = static_cast(SmilesToMol("c1ccccc1")); TEST_ASSERT(m); ExplicitBitVect bv(512); - AvalonTools::getAvalonFP(*m,bv,512,false,true,0x006FFF); + AvalonTools::getAvalonFP(*m, bv, 512, false, true, 0x006FFF); BOOST_LOG(rdInfoLog) << "c1ccccn1 " << bv.getNumOnBits() << std::endl; - TEST_ASSERT(bv.getNumOnBits()==6); + TEST_ASSERT(bv.getNumOnBits() == 6); delete m; } { ROMol *m = static_cast(SmilesToMol("c1nnccc1")); TEST_ASSERT(m); ExplicitBitVect bv(512); - AvalonTools::getAvalonFP(*m,bv,512,false,true,0x006FFF); + AvalonTools::getAvalonFP(*m, bv, 512, false, true, 0x006FFF); BOOST_LOG(rdInfoLog) << "c1nnccc1 " << bv.getNumOnBits() << std::endl; - TEST_ASSERT(bv.getNumOnBits()==28); + TEST_ASSERT(bv.getNumOnBits() == 28); delete m; } { ROMol *m = static_cast(SmilesToMol("c1ncncc1")); TEST_ASSERT(m); ExplicitBitVect bv(512); - AvalonTools::getAvalonFP(*m,bv,512,false,true,0x006FFF); + AvalonTools::getAvalonFP(*m, bv, 512, false, true, 0x006FFF); BOOST_LOG(rdInfoLog) << "c1ncncc1 " << bv.getNumOnBits() << std::endl; - TEST_ASSERT(bv.getNumOnBits()==25); + TEST_ASSERT(bv.getNumOnBits() == 25); delete m; } { ExplicitBitVect bv(512); - AvalonTools::getAvalonFP("c1cccnc1",true,bv,512,false,true,0x006FFF); + AvalonTools::getAvalonFP("c1cccnc1", true, bv, 512, false, true, 0x006FFF); BOOST_LOG(rdInfoLog) << "c1cccnc1 " << bv.getNumOnBits() << std::endl; - TEST_ASSERT(bv.getNumOnBits()==18); + TEST_ASSERT(bv.getNumOnBits() == 18); } { ExplicitBitVect bv(512); - AvalonTools::getAvalonFP("c1ccccc1",true,bv,512,false,true,0x006FFF); + AvalonTools::getAvalonFP("c1ccccc1", true, bv, 512, false, true, 0x006FFF); BOOST_LOG(rdInfoLog) << "c1ccccc1 " << bv.getNumOnBits() << std::endl; - TEST_ASSERT(bv.getNumOnBits()==6); + TEST_ASSERT(bv.getNumOnBits() == 6); } { ROMol *m = static_cast(SmilesToMol("c1cccnc1")); TEST_ASSERT(m); ExplicitBitVect bv(1024); - AvalonTools::getAvalonFP(*m,bv,1024,false,true,0x006FFF); + AvalonTools::getAvalonFP(*m, bv, 1024, false, true, 0x006FFF); BOOST_LOG(rdInfoLog) << "c1cccnc1 " << bv.getNumOnBits() << std::endl; - TEST_ASSERT(bv.getNumOnBits()==19); + TEST_ASSERT(bv.getNumOnBits() == 19); delete m; } { ExplicitBitVect bv(2048); - AvalonTools::getAvalonFP("c1cocc1",true,bv,2048,false,true,0x006FFF); + AvalonTools::getAvalonFP("c1cocc1", true, bv, 2048, false, true, 0x006FFF); BOOST_LOG(rdInfoLog) << "c1cocc1 " << bv.getNumOnBits() << std::endl; - TEST_ASSERT(bv.getNumOnBits()==53); + TEST_ASSERT(bv.getNumOnBits() == 53); } { ExplicitBitVect bv(2048); - AvalonTools::getAvalonFP("C1=COC=C1",true,bv,2048,false,true,0x006FFF); + AvalonTools::getAvalonFP("C1=COC=C1", true, bv, 2048, false, true, + 0x006FFF); BOOST_LOG(rdInfoLog) << "C1=COC=C1 " << bv.getNumOnBits() << std::endl; - TEST_ASSERT(bv.getNumOnBits()==53); + TEST_ASSERT(bv.getNumOnBits() == 53); } BOOST_LOG(rdInfoLog) << "done" << std::endl; } - -void testRDK151(){ - BOOST_LOG(rdInfoLog) << "testing Jira issue RDK-151: pyAvalonTools not generating chiral smiles from molecules" << std::endl; +void testRDK151() { + BOOST_LOG(rdInfoLog) << "testing Jira issue RDK-151: pyAvalonTools not " + "generating chiral smiles from molecules" + << std::endl; { - std::string tSmi="C[C@H](F)Cl"; + std::string tSmi = "C[C@H](F)Cl"; ROMol *m = static_cast(SmilesToMol(tSmi)); TEST_ASSERT(m); - std::string smi=AvalonTools::getCanonSmiles(tSmi,true); - CHECK_INVARIANT(smi==tSmi,smi+"!="+tSmi); - smi=AvalonTools::getCanonSmiles(*m); - CHECK_INVARIANT(smi==tSmi,smi+"!="+tSmi); + std::string smi = AvalonTools::getCanonSmiles(tSmi, true); + CHECK_INVARIANT(smi == tSmi, smi + "!=" + tSmi); + smi = AvalonTools::getCanonSmiles(*m); + CHECK_INVARIANT(smi == tSmi, smi + "!=" + tSmi); delete m; } @@ -174,57 +177,60 @@ void testRDK151(){ BOOST_LOG(rdInfoLog) << "done" << std::endl; } -void testSmilesFailures(){ +void testSmilesFailures() { BOOST_LOG(rdInfoLog) << "testing handling of bad smiles strings" << std::endl; { - std::string tSmi="C1C"; - std::string smi=AvalonTools::getCanonSmiles(tSmi,true); - CHECK_INVARIANT(smi=="",smi); + std::string tSmi = "C1C"; + std::string smi = AvalonTools::getCanonSmiles(tSmi, true); + CHECK_INVARIANT(smi == "", smi); } BOOST_LOG(rdInfoLog) << "done" << std::endl; } -void testSubstructFps(){ +void testSubstructFps() { BOOST_LOG(rdInfoLog) << "testing substructure fingerprints " << std::endl; { - ExplicitBitVect bv1(512),bv2(512); - AvalonTools::getAvalonFP("c1ccccc1",true,bv1,512,true,true,AvalonTools::avalonSSSBits); - AvalonTools::getAvalonFP("c1ccccc1C(F)(F)F",true,bv2,512); - TEST_ASSERT((bv1&bv2)==bv1); - AvalonTools::getAvalonFP("c1ccccc1C(F)(F)F",true,bv1,512); - TEST_ASSERT((bv1&bv2)==bv1); - AvalonTools::getAvalonFP("c1cccc(C)c1C(F)(F)F",true,bv2,512); - TEST_ASSERT((bv1&bv2)==bv1); + ExplicitBitVect bv1(512), bv2(512); + AvalonTools::getAvalonFP("c1ccccc1", true, bv1, 512, true, true, + AvalonTools::avalonSSSBits); + AvalonTools::getAvalonFP("c1ccccc1C(F)(F)F", true, bv2, 512); + TEST_ASSERT((bv1 & bv2) == bv1); + AvalonTools::getAvalonFP("c1ccccc1C(F)(F)F", true, bv1, 512); + TEST_ASSERT((bv1 & bv2) == bv1); + AvalonTools::getAvalonFP("c1cccc(C)c1C(F)(F)F", true, bv2, 512); + TEST_ASSERT((bv1 & bv2) == bv1); } { - ExplicitBitVect bv1(512),bv2(512); - AvalonTools::getAvalonFP("c1ccccc1O",true,bv1,512,true,true,AvalonTools::avalonSSSBits); - AvalonTools::getAvalonFP("c1ccccc1OC",true,bv2,512); - TEST_ASSERT((bv1&bv2)==bv1); + ExplicitBitVect bv1(512), bv2(512); + AvalonTools::getAvalonFP("c1ccccc1O", true, bv1, 512, true, true, + AvalonTools::avalonSSSBits); + AvalonTools::getAvalonFP("c1ccccc1OC", true, bv2, 512); + TEST_ASSERT((bv1 & bv2) == bv1); } BOOST_LOG(rdInfoLog) << "done" << std::endl; } -void testStruChk(){ +void testStruChk() { BOOST_LOG(rdInfoLog) << "testing structure checking " << std::endl; { int errs = 0; - RDKit::ROMOL_SPTR m = AvalonTools::checkMol(errs, "c1ccccc1",true); - TEST_ASSERT(errs==0); - m = AvalonTools::checkMol(errs, "c1c(R)cccc1C1(CC-C(C)C1)C",true); - TEST_ASSERT(errs!=0); + RDKit::ROMOL_SPTR m = AvalonTools::checkMol(errs, "c1ccccc1", true); + TEST_ASSERT(errs == 0); + m = AvalonTools::checkMol(errs, "c1c(R)cccc1C1(CC-C(C)C1)C", true); + TEST_ASSERT(errs != 0); } { int errs = 0; std::string res; - boost::tie(res,errs)=AvalonTools::checkMolString("c1ccccc1",true); - TEST_ASSERT(errs==0); - TEST_ASSERT(res!=""); - boost::tie(res,errs)=AvalonTools::checkMolString("c1c(R)cccc1C1(CC-C(C)C1)C",true); - TEST_ASSERT(errs==1); - TEST_ASSERT(res==""); + boost::tie(res, errs) = AvalonTools::checkMolString("c1ccccc1", true); + TEST_ASSERT(errs == 0); + TEST_ASSERT(res != ""); + boost::tie(res, errs) = + AvalonTools::checkMolString("c1c(R)cccc1C1(CC-C(C)C1)C", true); + TEST_ASSERT(errs == 1); + TEST_ASSERT(res == ""); } BOOST_LOG(rdInfoLog) << "done" << std::endl; } @@ -233,123 +239,124 @@ void testBadMolfile() { BOOST_LOG(rdInfoLog) << "testing handling bad molecules " << std::endl; // some tests around dealing with bad mol blocks { - std::string molb="SNAP007157A\n\ + std::string molb = + "SNAP007157A\n\ MACCS-II3194121345\n\ \n\ 0 0 0 0 0"; - std::string smi=AvalonTools::getCanonSmiles(molb,false); - CHECK_INVARIANT(smi=="",smi); + std::string smi = AvalonTools::getCanonSmiles(molb, false); + CHECK_INVARIANT(smi == "", smi); ExplicitBitVect bv(1024); - AvalonTools::getAvalonFP(molb,false,bv,1024); - TEST_ASSERT(bv.getNumOnBits()==0); - + AvalonTools::getAvalonFP(molb, false, bv, 1024); + TEST_ASSERT(bv.getNumOnBits() == 0); + std::string oMolb; - AvalonTools::set2DCoords(molb,false); - CHECK_INVARIANT(oMolb=="",oMolb); - + AvalonTools::set2DCoords(molb, false); + CHECK_INVARIANT(oMolb == "", oMolb); } } void testSmilesSegFault() { - BOOST_LOG(rdInfoLog) << "testing a canonical smiles case that led to seg faults " << std::endl; + BOOST_LOG(rdInfoLog) + << "testing a canonical smiles case that led to seg faults " << std::endl; // some tests around dealing with bad mol blocks { - std::string inSmi(1024,'C'); - std::string smi=AvalonTools::getCanonSmiles(inSmi,true); - TEST_ASSERT(smi==inSmi); + std::string inSmi(1024, 'C'); + std::string smi = AvalonTools::getCanonSmiles(inSmi, true); + TEST_ASSERT(smi == inSmi); } { - std::string inSmi(1534,'C'); - std::string smi=AvalonTools::getCanonSmiles(inSmi,true); - TEST_ASSERT(smi==inSmi); + std::string inSmi(1534, 'C'); + std::string smi = AvalonTools::getCanonSmiles(inSmi, true); + TEST_ASSERT(smi == inSmi); } BOOST_LOG(rdInfoLog) << "done" << std::endl; } void testGithub336() { - BOOST_LOG(rdInfoLog) << "testing github issue 336: bad canonical smiles for conjugated double bonds" << std::endl; + BOOST_LOG(rdInfoLog) << "testing github issue 336: bad canonical smiles for " + "conjugated double bonds" << std::endl; // some tests around dealing with bad mol blocks { - std::string pathName=getenv("RDBASE"); + std::string pathName = getenv("RDBASE"); pathName += "/External/AvalonTools/test_data/"; - std::ifstream ins((pathName+"EZ_test.2.sdf").c_str()); - std::string mb((std::istreambuf_iterator(ins)), + std::ifstream ins((pathName + "EZ_test.2.sdf").c_str()); + std::string mb((std::istreambuf_iterator(ins)), std::istreambuf_iterator()); ROMol *m = MolBlockToMol(mb); TEST_ASSERT(m); - TEST_ASSERT(m->getNumAtoms()==17); + TEST_ASSERT(m->getNumAtoms() == 17); - std::string smi1=AvalonTools::getCanonSmiles(mb,false); - std::string smi2=AvalonTools::getCanonSmiles(*m); - std::cerr<<"smi1: "<(ins)), + std::ifstream ins((pathName + "heterocycle.mol").c_str()); + std::string mb((std::istreambuf_iterator(ins)), std::istreambuf_iterator()); - RWMol *m = MolBlockToMol(mb,false); + RWMol *m = MolBlockToMol(mb, false); TEST_ASSERT(m); - TEST_ASSERT(m->getNumAtoms()==6); + TEST_ASSERT(m->getNumAtoms() == 6); m->updatePropertyCache(); MolOps::cleanUp(*m); MolOps::setAromaticity(*m); - std::string smi1=AvalonTools::getCanonSmiles(mb,false); - std::string smi2=AvalonTools::getCanonSmiles(*m); - std::cerr<<"smi1: "<(ins)), + std::ifstream ins((pathName + "heterocycle2.mol").c_str()); + std::string mb((std::istreambuf_iterator(ins)), std::istreambuf_iterator()); - RWMol *m = MolBlockToMol(mb,false); + RWMol *m = MolBlockToMol(mb, false); TEST_ASSERT(m); - TEST_ASSERT(m->getNumAtoms()==11); + TEST_ASSERT(m->getNumAtoms() == 11); m->updatePropertyCache(); MolOps::cleanUp(*m); MolOps::setAromaticity(*m); - std::string smi1=AvalonTools::getCanonSmiles(mb,false); - std::string smi2=AvalonTools::getCanonSmiles(*m); - std::cerr<<"smi1: "< cv1(5000),cv2(5000); - AvalonTools::getAvalonCountFP("c1ccccc1",true,cv1,5000); - AvalonTools::getAvalonCountFP("c1ccccc1.c1ccccc1",true,cv2,5000); - for(unsigned int i=0;i cv1(5000), cv2(5000); + AvalonTools::getAvalonCountFP("c1ccccc1", true, cv1, 5000); + AvalonTools::getAvalonCountFP("c1ccccc1.c1ccccc1", true, cv2, 5000); + for (unsigned int i = 0; i < cv1.size(); ++i) { + if (cv1[i] && (cv2[i] != 2 * cv1[i])) { + std::cerr << " mismatch: " << i << " " << cv1[i] << " " << cv2[i] + << std::endl; } } - for(unsigned int i=0;i(SmilesToMol("c1ccccc1.c1ccccc1")); TEST_ASSERT(m2); - SparseIntVect cv1(5000),cv2(5000); - AvalonTools::getAvalonCountFP(*m1,cv1,5000); - AvalonTools::getAvalonCountFP(*m2,cv2,5000); - for(unsigned int i=0;i cv1(5000), cv2(5000); + AvalonTools::getAvalonCountFP(*m1, cv1, 5000); + AvalonTools::getAvalonCountFP(*m2, cv2, 5000); + for (unsigned int i = 0; i < cv1.size(); ++i) { + if (cv1[i] && (cv2[i] != 2 * cv1[i])) { + std::cerr << " mismatch: " << i << " " << cv1[i] << " " << cv2[i] + << std::endl; } } - for(unsigned int i=0;i0, False if 0. - Throw BadMoleculeException if there are no characters - at the required position or if they cannot be converted + ''' look at atom count position (line 4, characters 0:3) + Return True if the count is >0, False if 0. + Throw BadMoleculeException if there are no characters + at the required position or if they cannot be converted to a positive integer ''' try: @@ -143,14 +143,14 @@ def _ctab_has_atoms(ctab_lines): except IndexError: raise BadMoleculeException('Invalid molfile format') except ValueError: - raise BadMoleculeException('Expected integer') + raise BadMoleculeException('Expected integer') return rval - + def _ctab_remove_chiral_flag(ctab_lines): - ''' read the chiral flag (line 4, characters 12:15) + ''' read the chiral flag (line 4, characters 12:15) and set it to 0. Return True if it was 1, False if 0. - Throw BadMoleculeException if there are no characters + Throw BadMoleculeException if there are no characters at the required position or if they where not 0 or 1 ''' try: @@ -161,13 +161,13 @@ def _ctab_remove_chiral_flag(ctab_lines): elif a_count == 1: rval = True orig_line = ctab_lines[3] - ctab_lines[3] = orig_line[:CHIRAL_POS] + ' 0' + orig_line[CHIRAL_POS + 3:] + ctab_lines[3] = orig_line[:CHIRAL_POS] + ' 0' + orig_line[CHIRAL_POS + 3:] else: raise BadMoleculeException('Expected chiral flag 0 or 1') except IndexError: raise BadMoleculeException('Invalid molfile format') except ValueError: - raise BadMoleculeException('Expected integer, got {0}'.format(str_a_count)) + raise BadMoleculeException('Expected integer, got {0}'.format(str_a_count)) return rval @@ -183,7 +183,7 @@ def initStruchk(configDir=None,logFile=None): fd.close() logFile= fd.name struchk_init = '''-tm - -ta {0}checkfgs.trn +-ta {0}checkfgs.trn -tm -or -ca {0}checkfgs.chk @@ -191,7 +191,7 @@ def initStruchk(configDir=None,logFile=None): -cl 3 -cs -cn 999 --l {1}'''.format(configDir, logFile) +-l {1}\n'''.format(configDir, logFile) initRes=pyAvalonTools.InitializeCheckMol(struchk_init) if initRes: raise ValueError('bad result from InitializeCheckMol: '+str(initRes)) @@ -203,7 +203,7 @@ def CheckCTAB(ctab, isSmiles=True): mol_str = ctab if not mol_str: raise BadMoleculeException('Unexpected blank or NULL molecule') - else: + else: mol_str = _fix_line_ends(mol_str) mol_str = _fix_chemdraw_header(mol_str) @@ -211,22 +211,22 @@ def CheckCTAB(ctab, isSmiles=True): if mol_str and NULL_SMILES_RE.match(mol_str): rval = T_NULL_MOL else: - rval = pyAvalonTools.CheckMoleculeString(mol_str, isSmiles) + rval = pyAvalonTools.CheckMoleculeString(mol_str, isSmiles) else: # decompose the ctab into lines - # the line terminator may be \n or \r\n, or even r'\n' + # the line terminator may be \n or \r\n, or even r'\n' ctab_lines = mol_str.split('\n') if len(ctab_lines) <= 3: raise BadMoleculeException('Not enough lines in CTAB') _ctab_remove_chiral_flag(ctab_lines) if not _ctab_has_atoms(ctab_lines): rval = T_NULL_MOL - else: # reassemble the ctab lines into one string. + else: # reassemble the ctab lines into one string. mol_str = '\n'.join(ctab_lines) rval = pyAvalonTools.CheckMoleculeString(mol_str, isSmiles) return rval -InchiResult = namedtuple('InchiResult',['error','inchi','fixed_ctab']) +InchiResult = namedtuple('InchiResult',['error','inchi','fixed_ctab']) def GetInchiForCTAB(ctab): """ >>> from rdkit.Chem.MolKey import MolKey @@ -237,7 +237,7 @@ def GetInchiForCTAB(ctab): >>> res = MolKey.GetInchiForCTAB(pyAvalonTools.Generate2DCoords('c1c[nH]nc1C(Cl)Br',True)) >>> res.inchi 'InChI=1/C4H4BrClN2/c5-4(6)3-1-2-7-8-3/h1-2,4H,(H,7,8)/t4?/f/h7H' - >>> + >>> """ inchi = None ctab_str = ctab @@ -260,13 +260,13 @@ def GetInchiForCTAB(ctab): return InchiResult(strucheck_err | conversion_err, inchi, fixed_mol) def _make_racemate_inchi(inchi): - """ Normalize the stereo information (t-layer) to one selected isomer. """ + """ Normalize the stereo information (t-layer) to one selected isomer. """ # set stereo type = 3 (racemate) for consistency # reset inverted flag to m0 - not inverted new_stereo = '/m0/s3/' stereo_match = GET_STEREO_RE.match(inchi) if stereo_match: - inchi = stereo_match.group(1) + new_stereo + stereo_match.group(2) + inchi = stereo_match.group(1) + new_stereo + stereo_match.group(2) return inchi def _get_identification_string(err, ctab, inchi, stereo_category=None, extra_stereo=None): @@ -284,13 +284,13 @@ def _get_identification_string(err, ctab, inchi, stereo_category=None, extra_ste else: pieces.append('ST=' + stereo_category) if extra_stereo: - pieces.append('XTR=' + extra_stereo) + pieces.append('XTR=' + extra_stereo) key_string = '/'.join(pieces) return key_string def _get_null_mol_identification_string(extra_stereo) : key_string = str(uuid.uuid1 ()) - return key_string + return key_string def _get_bad_mol_identification_string(ctab, stereo_category, extra_stereo): pieces = [] @@ -298,7 +298,7 @@ def _get_bad_mol_identification_string(ctab, stereo_category, extra_stereo): if ctab_str: # make the ctab part of the key if available ctab_str = _fix_line_ends(ctab_str) ctab_str = _fix_chemdraw_header(ctab_str) - ctab_str = '\n'.join(ctab_str.split('\n')[3:]) + ctab_str = '\n'.join(ctab_str.split('\n')[3:]) pieces.append(ctab_str.replace('\n', r'\n')) # make a handy one-line string else: pass @@ -312,12 +312,12 @@ def _get_bad_mol_identification_string(ctab, stereo_category, extra_stereo): return key_string def _identify(err, ctab, inchi, stereo_category, extra_structure_desc=None): - """ Compute the molecule key based on the inchi string, - stereo category as well as extra structure + """ Compute the molecule key based on the inchi string, + stereo category as well as extra structure information """ key_string = _get_identification_string(err, ctab, inchi, stereo_category, extra_structure_desc) if key_string: - return "{0}|{1}".format(MOL_KEY_VERSION, + return "{0}|{1}".format(MOL_KEY_VERSION, base64.b64encode(hashlib.md5(key_string.encode('UTF-8')).digest()).decode()) #pylint: disable=E1101 else: return None @@ -382,7 +382,7 @@ def GetKeyForCTAB(ctab,stereo_info=None,stereo_comment=None,logger=None): >>> res=MolKey.GetKeyForCTAB(pyAvalonTools.Generate2DCoords('c1cccc(C(Br)Cl)c1C(F)Cl',True)) >>> res.mol_key '1|5H9R3LvclagMXHp3Clrc/g==' - >>> res.stereo_code + >>> res.stereo_code 'S_UNKN' >>> res=MolKey.GetKeyForCTAB(pyAvalonTools.Generate2DCoords('c1cccc(C(Br)Cl)c1C(F)Cl',True),stereo_info='S_REL') >>> res.mol_key @@ -415,7 +415,7 @@ def GetKeyForCTAB(ctab,stereo_info=None,stereo_comment=None,logger=None): extra_structure_desc = info_flds[1].strip() else: logger.warn('stereo code {0} not recognized. Using default value for ctab.'.format(code_fld)) - + if not (err & BAD_SET): (n_stereo, n_undef_stereo, is_meso, dummy) = InchiInfo.InchiInfo(inchi).get_sp3_stereo()['main']['non-isotopic'] if stereo_category == None or stereo_category == 'DEFAULT' : # compute if not set @@ -426,7 +426,7 @@ def GetKeyForCTAB(ctab,stereo_info=None,stereo_comment=None,logger=None): key = _identify(err, fixed_mol, inchi, stereo_category, extra_structure_desc) return MolKeyResult(key, err, inchi, fixed_mol, stereo_category, extra_structure_desc) - + #------------------------------------ #