- update AvalonTools to version 2.0.4a (#5796)

- update config files in Data to the same version as in AvalonTools
- fix a bug in AvalonTools.cpp causing the last character of the optString bassed to initCheckMol() to be deleted
- add a test to confirm the bug is fixed

Co-authored-by: Tosco, Paolo <paolo.tosco@novartis.com>
This commit is contained in:
Paolo Tosco
2022-12-01 16:48:09 +01:00
committed by GitHub
parent c94a47bbd3
commit d75d3e6b1c
6 changed files with 37 additions and 11 deletions

View File

@@ -1,4 +1,4 @@
606 ! _N10_, 12.Dec.2008 incl. Sandoz R-...; C-C(=O)-D,T.
607 ! _N10_, 12.Dec.2008 incl. Sandoz R-...; C-C(=O)-D,T.
/* 00 */ "Li,Na,K,Rb,Cs,Fr,Tl+1"
/* 00 */ "Be,Mg,Ca,Sr,Ba,Ra,Sn,Pb+2"
/* 00 */ "Al,Ga,In,Tl,Sb,Bi,Fe+3"
@@ -309,6 +309,7 @@
/* 03C*/ "C(-C+1)(-C)(-C)"
/* 03 */ "C(-I+1)(-C)(-C)"
/* 03 */ "C(-N+1)(-N+1)(-C)"
/* 03 */ "@C(=C)(-O)(-O-1)"
/* 03 */ "C(=C)(-B)(-B,C,N,O,S)"
/* 03 */ "C(=C)(-C,Si,Ge,Sn,Pb)(-C,Si,Ge,Sn,Pb,O,S,Se,Te,N,P,As,F,Cl,Br,I,D,T)"
/* 03 */ "C(=C)(-O,S,Se,Te,N,P,F,Cl,Br,I)(-O,S,Se,Te,N,P,F,Cl,Br,I)"

View File

@@ -1,9 +1,9 @@
# Parameter file for STRUCHK 2.0
# for use with CORDAT
#
ta \apps\chemistry\checkfgs.trn # augmented atom transformations
ta StandardFiles/checkfgs.trn # augmented atom transformations
or # create data fields with STRUCHK results
ca \apps\chemistry\checkfgs.chk # table of allowed augmented atoms
ca StandardFiles/checkfgs.chk # table of allowed augmented atoms
cc # check for atom collisions
cl 3 # collision limit is 3% of bond length
cs # check stereo conventions

View File

@@ -79,7 +79,7 @@
/*F123*/ "C(-C)(-O-1)(-N+1)" --> "C(-C)(-O)(-N+1)" prot.
/*F203*/ "C(=C)(-N-1)(-C,N)" --> "C(=C)(-N)(-C,N)" prot.
/*F213*/ "C(=C)(-O,S-1)(-C)" --> "C(=C)(-O,S)(-C)" prot. enol C
/*F223*/ "C(=C)(-O-1)(-N,O,S)" --> "C(-C)(=O)(-N,O,S)" prot. enol Het.
/*F223*/ "!@C(=C)(-O-1)(-N,O,S)" --> "C(-C)(=O)(-N,O,S)" prot. enol Het.
/*F233*/ "C(=C)(-S-1)(-N,S)" --> "C(-C)(=S)(-N,S)" prot. enol
/*F243*/ "C(=C)(-O-1)(-N+1)" --> "C(-C)(=O)(-N+1)" prot. enol
/*F303*/ "C(=N)(-C,N,O,S-1)(-C,N,O,S)" --> "C(=N)(-C,N,O,S)(-C,N,O,S)" prot.

View File

@@ -349,7 +349,7 @@ void getAvalonCountFP(const std::string &data, bool isSmiles,
reaccsToCounts(mp, res, bitFlags, isQuery, nBits);
FreeMolecule(mp);
} else {
BOOST_LOG(rdErrorLog) << "ERROR: no fingeprint generated for molecule."
BOOST_LOG(rdErrorLog) << "ERROR: no fingerprint generated for molecule."
<< std::endl;
}
}
@@ -368,7 +368,7 @@ void getAvalonFP(const std::string &data, bool isSmiles, ExplicitBitVect &res,
reaccsToFingerprint(mp, res, bitFlags, isQuery, resetVect, nBytes);
FreeMolecule(mp);
} else {
BOOST_LOG(rdErrorLog) << "ERROR: no fingeprint generated for molecule."
BOOST_LOG(rdErrorLog) << "ERROR: no fingerprint generated for molecule."
<< std::endl;
}
}
@@ -387,7 +387,7 @@ void getAvalonFP(const std::string &data, bool isSmiles,
reaccsToFingerprint(mp, res, bitFlags, isQuery, resetVect, nBytes);
FreeMolecule(mp);
} else {
BOOST_LOG(rdErrorLog) << "ERROR: no fingeprint generated for molecule."
BOOST_LOG(rdErrorLog) << "ERROR: no fingerprint generated for molecule."
<< std::endl;
}
}
@@ -438,8 +438,8 @@ int initCheckMol(const std::string &optString) {
// n.b. always add a cr to the end for safety
auto *optBuffer = new char[optString.size() + 2];
optString.copy(optBuffer, optString.size());
optBuffer[optString.size() - 1] = '\n';
optBuffer[optString.size()] = '\0';
optBuffer[optString.size()] = '\n';
optBuffer[optString.size() + 1] = '\0';
int res = InitCheckMol(optBuffer);
delete[] optBuffer;
return res;

View File

@@ -2,7 +2,7 @@ if(NOT RDK_BUILD_AVALON_SUPPORT)
return()
endif(NOT RDK_BUILD_AVALON_SUPPORT)
set(AVALON_VERSION "2.0.2")
set(AVALON_VERSION "2.0.4a")
if(NOT DEFINED AVALONTOOLS_DIR)
set(AVALONTOOLS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/ava-formake-AvalonToolkit_${AVALON_VERSION}")
set(fileToCheck "${AVALONTOOLS_DIR}/src/main/C/common/reaccsio.c")
@@ -25,7 +25,7 @@ if(needDownload)
set(AVALONTOOLS_URL "https://github.com/rohdebe1/ava-formake/archive/refs/tags/AvalonToolkit_${AVALON_VERSION}.tar.gz")
endif()
if(NOT DEFINED AVALONTOOLS_MD5SUM)
set(AVALONTOOLS_MD5SUM "fc188383a8896802e948c977b73dbe71")
set(AVALONTOOLS_MD5SUM "8e0fdedce0d93bdbc1e3c6ab3ae5c6cf")
endif()
if(NOT DEFINED AVALONTOOLS_BASE)
string(REGEX REPLACE "^.*/" "" AVALONTOOLS_BASE "${AVALONTOOLS_URL}")

View File

@@ -529,6 +529,30 @@ void testNoAtomCTAB() {
TEST_ASSERT(res.empty());
}
void testBigMoleculeNoNewlineInInitString() {
BOOST_LOG(rdInfoLog) << "testing molecule with >150 atoms when "
"init string has no newline"
<< std::endl;
std::string pathName = getenv("RDBASE");
pathName += "/Data/struchk/";
std::stringstream struchk_init;
struchk_init << "-ta " << pathName << "checkfgs.trn\n"
<< "-tm\n"
"-or\n"
"-ca " << pathName << "checkfgs.chk\n"
<< "-cc\n"
"-cl 3\n"
"-cs\n"
"-cn 999";
int errs = AvalonTools::initCheckMol(struchk_init.str());
TEST_ASSERT(!errs);
std::string bigMol =
"CC(C)CC(C(=O)NC(CCSC)C(=O)NC(CC(=O)N)C(=O)NC(C(C)O)C(=O)O)NC(=O)C(CC1=CNC2=CC=CC=C21)NC(=O)C(CCC(=O)N)NC(=O)C(C(C)C)NC(=O)C(CC3=CC=CC=C3)NC(=O)C(CC(=O)O)NC(=O)C(CCC(=O)N)NC(=O)C(C)NC(=O)C(CCCNC(=N)N)NC(=O)C(CCCNC(=N)N)NC(=O)C(CO)NC(=O)C(CC(=O)O)NC(=O)C(CC(C)C)NC(=O)C(CC4=CC=C(C=C4)O)NC(=O)C(CCCCN)NC(=O)C(CO)NC(=O)C(CC5=CC=C(C=C5)O)NC(=O)C(CC(=O)O)NC(=O)C(CO)NC(=O)C(C(C)O)NC(=O)C(CC6=CC=CC=C6)NC(=O)C(C(C)O)NC(=O)CNC(=O)C(CCC(=O)N)NC(=O)C(CO)NC(=O)C(CC7=CN=CN7)N";
RDKit::ROMOL_SPTR m = AvalonTools::checkMol(errs, bigMol, true);
AvalonTools::closeCheckMolFiles();
TEST_ASSERT(errs == 0);
}
int main() {
RDLog::InitLogs();
#if 1
@@ -549,6 +573,7 @@ int main() {
testGithub4075();
testGithub4330();
testNoAtomCTAB();
testBigMoleculeNoNewlineInInitString();
return 0;
}