* - Fix #8029
- avoid unnecessary rounding errors in the JSON writer
- remove a warning when compiling MinimalLib without SubstructLIbrary support

* changes in response to review

* changes in response to review

---------

Co-authored-by: ptosco <paolo.tosco@novartis.com>
This commit is contained in:
Paolo Tosco
2024-11-30 07:20:20 +01:00
committed by GitHub
parent 7beee94603
commit bd8289738d
6 changed files with 286 additions and 17 deletions

View File

@@ -772,7 +772,8 @@ void test_modifications() {
free(ctab);
free(mpkl);
mpkl = get_mol("C([H])([H])([H])C([2H])([H])C([H])([H])[H]", &mpkl_size, "{\"removeHs\":false}");
mpkl = get_mol("C([H])([H])([H])C([2H])([H])C([H])([H])[H]", &mpkl_size,
"{\"removeHs\":false}");
smi = get_smiles(mpkl, mpkl_size, NULL);
assert(!strcmp(smi, "[H]C([H])([H])C([H])([2H])C([H])([H])[H]"));
free(smi);
@@ -2192,19 +2193,22 @@ void test_partial_sanitization() {
assert(!strstr(mb, "M CHG"));
free(mb);
free(mpkl);
mpkl = get_mol("c1ccccc1N(=O)=O", &mpkl_size, "{\"sanitize\":{\"SANITIZE_CLEANUP\":true}}");
mpkl = get_mol("c1ccccc1N(=O)=O", &mpkl_size,
"{\"sanitize\":{\"SANITIZE_CLEANUP\":true}}");
mb = get_molblock(mpkl, mpkl_size, "{\"kekulize\":false}");
assert(strstr(mb, " 1 2 4 0"));
assert((strstr(mb, " 7 8 1 0") && strstr(mb, " 7 9 2 0"))
|| (strstr(mb, " 7 8 2 0") && strstr(mb, " 7 9 1 0")));
assert((strstr(mb, " 7 8 1 0") && strstr(mb, " 7 9 2 0")) ||
(strstr(mb, " 7 8 2 0") && strstr(mb, " 7 9 1 0")));
assert(strstr(mb, "M CHG 2"));
free(mb);
free(mpkl);
mpkl = get_mol("c1ccccc1N(=O)=O", &mpkl_size, "{\"sanitize\":{\"SANITIZE_CLEANUP\":true,\"SANITIZE_KEKULIZE\":true}}");
mpkl = get_mol(
"c1ccccc1N(=O)=O", &mpkl_size,
"{\"sanitize\":{\"SANITIZE_CLEANUP\":true,\"SANITIZE_KEKULIZE\":true}}");
mb = get_molblock(mpkl, mpkl_size, "{\"kekulize\":false}");
assert(!strstr(mb, " 1 2 4 0"));
assert((strstr(mb, " 7 8 1 0") && strstr(mb, " 7 9 2 0"))
|| (strstr(mb, " 7 8 2 0") && strstr(mb, " 7 9 1 0")));
assert((strstr(mb, " 7 8 1 0") && strstr(mb, " 7 9 2 0")) ||
(strstr(mb, " 7 8 2 0") && strstr(mb, " 7 9 1 0")));
assert(strstr(mb, "M CHG 2"));
free(mb);
free(mpkl);
@@ -2874,6 +2878,112 @@ void test_props() {
free(mpkl);
}
void test_get_mol_remove_hs() {
printf("--------------------------\n");
printf(" get_mol removeHs parameter\n");
const char *mb_in =
"\n\
MJ240300 \n\
\n\
8 8 0 0 0 0 0 0 0 0999 V2000\n\
-1.4955 1.1152 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\
-2.2099 0.7027 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\
-2.2099 -0.1223 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\
-1.4955 -0.5348 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\
-0.7810 -0.1223 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\
-0.7810 0.7027 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\
-0.0666 1.1152 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n\
-2.9244 -0.5348 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n\
1 2 2 0 0 0 0\n\
2 3 1 0 0 0 0\n\
3 4 2 0 0 0 0\n\
4 5 1 0 0 0 0\n\
5 6 2 0 0 0 0\n\
6 1 1 0 0 0 0\n\
6 7 1 0 0 0 0\n\
3 8 1 0 0 0 0\n\
M ISO 1 7 2\n\
M END\n\
";
const char *no_details = "";
const char *removehs_true = "{\"removeHs\":true}";
const char *removehs_false = "{\"removeHs\":false}";
const char *deuterium_coords =
" -0.0666 1.1152 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0";
const char *hydrogen_coords =
" -2.9244 -0.5348 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0";
char *mpkl;
size_t mpkl_size;
char *mb_out;
char *smi_out;
char *jb = NULL;
char *smi = NULL;
for (int i = 0; i < 2; ++i) {
mpkl = get_mol(mb_in, &mpkl_size, i ? removehs_false : no_details);
assert(mpkl && mpkl_size);
mb_out = get_molblock(mpkl, mpkl_size, NULL);
assert(mb_out);
assert(strstr(mb_out, deuterium_coords));
assert(strstr(mb_out, hydrogen_coords));
free(mb_out);
if (!smi) {
smi = get_smiles(mpkl, mpkl_size, NULL);
assert(smi);
}
if (!jb) {
jb = get_json(mpkl, mpkl_size, NULL);
assert(jb);
}
free(mpkl);
}
mpkl = get_mol(mb_in, &mpkl_size, removehs_true);
assert(mpkl && mpkl_size);
mb_out = get_molblock(mpkl, mpkl_size, NULL);
assert(mb_out);
assert(strstr(mb_out, deuterium_coords));
assert(!strstr(mb_out, hydrogen_coords));
free(mb_out);
free(mpkl);
for (int i = 0; i < 2; ++i) {
mpkl = get_mol(jb, &mpkl_size, i ? removehs_false : no_details);
assert(mpkl && mpkl_size);
mb_out = get_molblock(mpkl, mpkl_size, NULL);
assert(mb_out);
assert(strstr(mb_out, deuterium_coords));
assert(strstr(mb_out, hydrogen_coords));
free(mb_out);
free(mpkl);
}
mpkl = get_mol(jb, &mpkl_size, removehs_true);
assert(mpkl && mpkl_size);
mb_out = get_molblock(mpkl, mpkl_size, NULL);
assert(mb_out);
assert(strstr(mb_out, deuterium_coords));
assert(!strstr(mb_out, hydrogen_coords));
free(mb_out);
free(jb);
free(mpkl);
for (int i = 0; i < 2; ++i) {
mpkl = get_mol(smi, &mpkl_size, i ? removehs_true : no_details);
assert(mpkl && mpkl_size);
smi_out = get_smiles(mpkl, mpkl_size, NULL);
assert(smi_out);
assert(strstr(smi_out, "[2H]"));
assert(!strstr(smi_out, "[H]"));
free(smi_out);
free(mpkl);
}
mpkl = get_mol(smi, &mpkl_size, removehs_false);
assert(mpkl && mpkl_size);
smi_out = get_smiles(mpkl, mpkl_size, NULL);
assert(smi_out);
assert(strstr(smi_out, "[2H]"));
assert(strstr(smi_out, "[H]"));
free(smi_out);
free(smi);
free(mpkl);
}
int main() {
enable_logging();
char *vers = version();
@@ -2912,5 +3022,6 @@ int main() {
test_bw_palette();
test_custom_palette();
test_props();
test_get_mol_remove_hs();
return 0;
}

View File

@@ -103,9 +103,18 @@ static constexpr int d_defaultHeight = 200;
RWMol *mol_from_input(const std::string &input,
const std::string &details_json = "") {
auto haveMolBlock = false;
auto haveRDKitJson = false;
if (input.find("M END") != std::string::npos) {
haveMolBlock = true;
} else if (input.find("commonchem") != std::string::npos ||
input.find("rdkitjson") != std::string::npos) {
haveRDKitJson = true;
}
auto haveSmiles = (!haveMolBlock && !haveRDKitJson);
bool sanitize = true;
bool kekulize = true;
bool removeHs = true;
bool removeHs = haveSmiles;
bool mergeQueryHs = false;
bool setAromaticity = true;
bool fastFindRings = true;
@@ -144,12 +153,14 @@ RWMol *mol_from_input(const std::string &input,
LPT_OPT_GET(makeDummiesQueries);
}
try {
if (input.find("M END") != std::string::npos) {
// We set default sanitization to false
// as we want to enable partial sanitization
// if required by the user through JSON details
if (haveMolBlock) {
bool strictParsing = false;
LPT_OPT_GET(strictParsing);
res = MolBlockToMol(input, false, removeHs, strictParsing);
} else if (input.find("commonchem") != std::string::npos ||
input.find("rdkitjson") != std::string::npos) {
res = MolBlockToMol(input, false, false, strictParsing);
} else if (haveRDKitJson) {
auto ps = MolInterchange::defaultJSONParseParameters;
LPT_OPT_GET2(ps, setAromaticBonds);
LPT_OPT_GET2(ps, strictValenceCheck);
@@ -173,6 +184,10 @@ RWMol *mol_from_input(const std::string &input,
}
if (res) {
try {
if (removeHs && !haveSmiles) {
MolOps::RemoveHsParameters removeHsParams;
MolOps::removeHs(*res, removeHsParams, false);
}
if (sanitize) {
unsigned int failedOp;
if (!kekulize) {

View File

@@ -61,8 +61,10 @@ namespace rj = rapidjson;
using namespace RDKit;
namespace {
#ifdef RDK_BUILD_MINIMAL_LIB_SUBSTRUCTLIBRARY
static const char *NO_SUPPORT_FOR_PATTERN_FPS =
"This SubstructLibrary was built without support for pattern fps";
#endif
std::string mappingToJsonArray(const ROMol &mol) {
std::vector<unsigned int> atomMapping;

View File

@@ -3547,6 +3547,98 @@ function test_remove_hs_details() {
mol.delete();
}
function test_get_mol_remove_hs() {
const mbIn = `
MJ240300
8 8 0 0 0 0 0 0 0 0999 V2000
-1.4955 1.1152 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-2.2099 0.7027 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-2.2099 -0.1223 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-1.4955 -0.5348 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-0.7810 -0.1223 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-0.7810 0.7027 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-0.0666 1.1152 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
-2.9244 -0.5348 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
1 2 2 0 0 0 0
2 3 1 0 0 0 0
3 4 2 0 0 0 0
4 5 1 0 0 0 0
5 6 2 0 0 0 0
6 1 1 0 0 0 0
6 7 1 0 0 0 0
3 8 1 0 0 0 0
M ISO 1 7 2
M END
`;
const noDetails = {};
const removeHsTrue = { removeHs: true };
const removeHsFalse = { removeHs: false };
const deuteriumCoords = ' -0.0666 1.1152 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0';
const hydrogenCoords = ' -2.9244 -0.5348 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0';
let mol;
let mbOut;
let smiOut;
let jb;
let smi;
[noDetails, removeHsFalse].forEach((details) => {
mol = RDKitModule.get_mol(mbIn, JSON.stringify(details));
assert(mol);
mbOut = mol.get_molblock();
assert(mbOut);
assert(mbOut.includes(deuteriumCoords));
assert(mbOut.includes(hydrogenCoords));
if (!jb) {
jb = mol.get_json();
assert(jb);
}
if (!smi) {
smi = mol.get_smiles();
assert(smi);
}
mol.delete();
});
mol = RDKitModule.get_mol(mbIn, JSON.stringify(removeHsTrue));
assert(mol);
mbOut = mol.get_molblock();
assert(mbOut);
assert(mbOut.includes(deuteriumCoords));
assert(!mbOut.includes(hydrogenCoords));
mol.delete();
[noDetails, removeHsFalse].forEach((details) => {
mol = RDKitModule.get_mol(mbIn, JSON.stringify(details));
assert(mol);
mbOut = mol.get_molblock();
assert(mbOut);
assert(mbOut.includes(deuteriumCoords));
assert(mbOut.includes(hydrogenCoords));
mol.delete();
});
mol = RDKitModule.get_mol(jb, JSON.stringify(removeHsTrue));
assert(mol);
mbOut = mol.get_molblock();
assert(mbOut);
assert(mbOut.includes(deuteriumCoords));
assert(!mbOut.includes(hydrogenCoords));
mol.delete();
[noDetails, removeHsTrue].forEach((details) => {
mol = RDKitModule.get_mol(smi, JSON.stringify(details));
assert(mol);
smiOut = mol.get_smiles();
assert(smiOut);
assert(smiOut.includes('[2H]'));
assert(!smiOut.includes('[H]'));
mol.delete();
});
mol = RDKitModule.get_mol(smi, JSON.stringify(removeHsFalse));
assert(mol);
smiOut = mol.get_smiles();
assert(smiOut);
assert(smiOut.includes('[2H]'));
assert(smiOut.includes('[H]'));
mol.delete();
}
initRDKitModule().then(function(instance) {
var done = {};
const waitAllTestsFinished = () => {
@@ -3640,6 +3732,7 @@ initRDKitModule().then(function(instance) {
}
test_pickle();
test_remove_hs_details();
test_get_mol_remove_hs();
waitAllTestsFinished().then(() =>
console.log("Tests finished successfully")