cartridge: expose sanitize options to mol_from_ctab (#8326)

* add sanitize and removeHs options to mol_from_ctab

* bump version to 4.7.0
add update script
fix a bug in the 4.4.0 - 4.5.0 update script

* document the new arguments

Should add argument names to all cartridge functions in a future PR

* fix a mistake

* response to review

* response to review

---------

Co-authored-by: Greg Landrum <glandrum@ethz.ch>
This commit is contained in:
Greg Landrum
2025-03-11 06:14:44 +01:00
committed by GitHub
parent fe5ccb7d47
commit 35c8c54a3a
9 changed files with 282 additions and 11 deletions

View File

@@ -272,18 +272,27 @@ extern "C" CROMol parseMolBlob(char *data, int len) {
}
extern "C" CROMol parseMolCTAB(char *data, bool keepConformer, bool warnOnFail,
bool asQuery) {
bool asQuery, bool sanitize, bool removeHs) {
RWMol *mol = nullptr;
try {
if (!asQuery) {
mol = MolBlockToMol(data);
mol = MolBlockToMol(data, sanitize, removeHs);
if (mol && !sanitize) {
mol->updatePropertyCache(false);
unsigned int failedOp;
unsigned int ops = MolOps::SANITIZE_ALL ^ MolOps::SANITIZE_PROPERTIES ^
MolOps::SANITIZE_KEKULIZE;
MolOps::sanitizeMol(*mol, failedOp, ops);
}
} else {
mol = MolBlockToMol(data, false, false);
if (mol != nullptr) {
mol->updatePropertyCache(false);
MolOps::setAromaticity(*mol);
MolOps::mergeQueryHs(*mol);
if (removeHs) {
MolOps::mergeQueryHs(*mol);
}
}
}
} catch (...) {

View File

@@ -1583,3 +1583,140 @@ M END'));
(1 row)
-- sanitization and Hs: do not sanitize, removing Hs has no effect
select mol_to_smiles(mol_from_ctab('
RDKit 2D
0 0 0 0 0 0 0 0 0 0999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 3 2 0 0 0
M V30 BEGIN ATOM
M V30 1 C 0.000000 0.000000 0.000000 0
M V30 2 F 1.299038 0.750000 0.000000 0
M V30 3 H 2.598076 -0.000000 0.000000 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 1 2 3
M V30 END BOND
M V30 END CTAB
M END
',false,false,true));
mol_to_smiles
---------------
[H]FC
(1 row)
-- sanitization and Hs: do not sanitize, do not remove Hs
select mol_to_smiles(mol_from_ctab('
RDKit 2D
0 0 0 0 0 0 0 0 0 0999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 3 2 0 0 0
M V30 BEGIN ATOM
M V30 1 C 0.000000 0.000000 0.000000 0
M V30 2 F 1.299038 0.750000 0.000000 0
M V30 3 H 2.598076 -0.000000 0.000000 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 1 2 3
M V30 END BOND
M V30 END CTAB
M END
',false,false,false));
mol_to_smiles
---------------
[H]FC
(1 row)
-- sanitization and Hs: sanitize, do not remove Hs
select mol_to_smiles(mol_from_ctab('
RDKit 2D
0 0 0 0 0 0 0 0 0 0999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 3 2 0 0 0
M V30 BEGIN ATOM
M V30 1 C 0.000000 0.000000 0.000000 0
M V30 2 O 1.299038 0.750000 0.000000 0
M V30 3 H 2.598076 -0.000000 0.000000 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 1 2 3
M V30 END BOND
M V30 END CTAB
M END
',false,true,false));
mol_to_smiles
---------------
[H]OC
(1 row)
-- sanitization and Hs: default is to sanitize and remove Hs
select mol_to_smiles(mol_from_ctab('
RDKit 2D
0 0 0 0 0 0 0 0 0 0999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 3 2 0 0 0
M V30 BEGIN ATOM
M V30 1 C 0.000000 0.000000 0.000000 0
M V30 2 O 1.299038 0.750000 0.000000 0
M V30 3 H 2.598076 -0.000000 0.000000 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 1 2 3
M V30 END BOND
M V30 END CTAB
M END
',false));
mol_to_smiles
---------------
CO
(1 row)
-- sanitization and Hs: sanitize, do not remove Hs, input coords manipulated
select mol_to_v3kctab(mol_from_ctab('
RDKit 2D
0 0 0 0 0 0 0 0 0 0999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 3 2 0 0 0
M V30 BEGIN ATOM
M V30 1 C 0.000000 0.000000 0.000000 0
M V30 2 O 1.000000 0.500000 0.000000 0
M V30 3 H 2.598076 -0.000000 0.000000 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 1 2 3
M V30 END BOND
M V30 END CTAB
M END
',false,true,false));
mol_to_v3kctab
------------------------------------------
+
RDKit 2D +
+
0 0 0 0 0 0 0 0 0 0999 V3000 +
M V30 BEGIN CTAB +
M V30 COUNTS 3 2 0 0 0 +
M V30 BEGIN ATOM +
M V30 1 C 0.000000 0.000000 0.000000 0 +
M V30 2 O 1.299038 0.750000 0.000000 0 +
M V30 3 H 2.598076 -0.000000 0.000000 0+
M V30 END ATOM +
M V30 BEGIN BOND +
M V30 1 1 1 2 +
M V30 2 1 2 3 +
M V30 END BOND +
M V30 END CTAB +
M END +
(1 row)

View File

@@ -1,4 +1,4 @@
comment = 'Cheminformatics functionality for PostgreSQL.'
default_version = '4.6.1'
default_version = '4.7.0'
module_pathname = '$libdir/rdkit'
relocatable = true

View File

@@ -126,7 +126,7 @@ char *makeMolBlob(CROMol data, int *len);
CROMol parseMolText(char *data, bool asSmarts, bool warnOnFail, bool asQuery,
bool sanitize);
CROMol parseMolCTAB(char *data, bool keepConformer, bool warnOnFail,
bool asQuery);
bool asQuery, bool sanitize, bool removeHs);
char *makeMolText(CROMol data, int *len, bool asSmarts, bool cxSmiles,
bool isomeric);
char *makeCtabText(CROMol data, int *len, bool createDepictionIfMissing,

View File

@@ -225,7 +225,7 @@ RETURNS NULL ON NULL INPUT;
CREATE CAST (text as mol) WITH FUNCTION mol_from_smiles(text) AS IMPLICIT;
CREATE CAST (varchar as mol) WITH FUNCTION mol_from_smiles(text) AS IMPLICIT;
CREATE OR REPLACE FUNCTION mol_from_ctab(cstring,bool default false)
CREATE OR REPLACE FUNCTION mol_from_ctab(ctab cstring,keep_conformer bool default false, sanitize bool default true, remove_hs bool default true)
RETURNS mol
PARALLEL SAFE
AS 'MODULE_PATHNAME'
@@ -300,7 +300,7 @@ PARALLEL SAFE
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT IMMUTABLE;
CREATE OR REPLACE FUNCTION mol_to_ctab(mol,bool default true, bool default false)
CREATE OR REPLACE FUNCTION mol_to_ctab(mol mol,bool default true, bool default false)
RETURNS cstring
PARALLEL SAFE
AS 'MODULE_PATHNAME'
@@ -340,7 +340,7 @@ PARALLEL SAFE
AS 'MODULE_PATHNAME', 'mol_from_smarts'
LANGUAGE C STRICT IMMUTABLE;
CREATE OR REPLACE FUNCTION qmol_from_ctab(cstring,bool default false)
CREATE OR REPLACE FUNCTION qmol_from_ctab(ctab cstring,keep_conformer bool default false, merge_hs bool default true)
RETURNS qmol
PARALLEL SAFE
AS 'MODULE_PATHNAME'

View File

@@ -112,10 +112,15 @@ PG_FUNCTION_INFO_V1(mol_from_ctab);
Datum mol_from_ctab(PG_FUNCTION_ARGS) {
char *data = PG_GETARG_CSTRING(0);
bool keepConformer = PG_GETARG_BOOL(1);
bool sanitize = PG_GETARG_BOOL(2);
bool removeHs = PG_GETARG_BOOL(3);
CROMol mol;
Mol *res;
mol = parseMolCTAB(data, keepConformer, true, false);
bool warnOnFail = true;
bool asQuery = false;
mol = parseMolCTAB(data, keepConformer, warnOnFail, asQuery, sanitize, removeHs);
if (!mol) {
PG_RETURN_NULL();
}
@@ -130,10 +135,14 @@ PG_FUNCTION_INFO_V1(qmol_from_ctab);
Datum qmol_from_ctab(PG_FUNCTION_ARGS) {
char *data = PG_GETARG_CSTRING(0);
bool keepConformer = PG_GETARG_BOOL(1);
bool removeHs = PG_GETARG_BOOL(2);
CROMol mol;
Mol *res;
mol = parseMolCTAB(data, keepConformer, true, true);
bool warnOnFail = true;
bool asQuery = true;
bool sanitize = false;
mol = parseMolCTAB(data, keepConformer, warnOnFail, asQuery, sanitize, removeHs);
if (!mol) {
PG_RETURN_NULL();
}

View File

@@ -546,4 +546,101 @@ M V30 2 2 2 3
M V30 3 1 4 5 ENDPTS=(3 1 2 3) ATTACH=ANY
M V30 END BOND
M V30 END CTAB
M END'));
M END'));
-- sanitization and Hs: do not sanitize, removing Hs has no effect
select mol_to_smiles(mol_from_ctab('
RDKit 2D
0 0 0 0 0 0 0 0 0 0999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 3 2 0 0 0
M V30 BEGIN ATOM
M V30 1 C 0.000000 0.000000 0.000000 0
M V30 2 F 1.299038 0.750000 0.000000 0
M V30 3 H 2.598076 -0.000000 0.000000 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 1 2 3
M V30 END BOND
M V30 END CTAB
M END
',false,false,true));
-- sanitization and Hs: do not sanitize, do not remove Hs
select mol_to_smiles(mol_from_ctab('
RDKit 2D
0 0 0 0 0 0 0 0 0 0999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 3 2 0 0 0
M V30 BEGIN ATOM
M V30 1 C 0.000000 0.000000 0.000000 0
M V30 2 F 1.299038 0.750000 0.000000 0
M V30 3 H 2.598076 -0.000000 0.000000 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 1 2 3
M V30 END BOND
M V30 END CTAB
M END
',false,false,false));
-- sanitization and Hs: sanitize, do not remove Hs
select mol_to_smiles(mol_from_ctab('
RDKit 2D
0 0 0 0 0 0 0 0 0 0999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 3 2 0 0 0
M V30 BEGIN ATOM
M V30 1 C 0.000000 0.000000 0.000000 0
M V30 2 O 1.299038 0.750000 0.000000 0
M V30 3 H 2.598076 -0.000000 0.000000 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 1 2 3
M V30 END BOND
M V30 END CTAB
M END
',false,true,false));
-- sanitization and Hs: default is to sanitize and remove Hs
select mol_to_smiles(mol_from_ctab('
RDKit 2D
0 0 0 0 0 0 0 0 0 0999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 3 2 0 0 0
M V30 BEGIN ATOM
M V30 1 C 0.000000 0.000000 0.000000 0
M V30 2 O 1.299038 0.750000 0.000000 0
M V30 3 H 2.598076 -0.000000 0.000000 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 1 2 3
M V30 END BOND
M V30 END CTAB
M END
',false));
-- sanitization and Hs: sanitize, do not remove Hs, input coords manipulated
select mol_to_v3kctab(mol_from_ctab('
RDKit 2D
0 0 0 0 0 0 0 0 0 0999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 3 2 0 0 0
M V30 BEGIN ATOM
M V30 1 C 0.000000 0.000000 0.000000 0
M V30 2 O 1.000000 0.500000 0.000000 0
M V30 3 H 2.598076 -0.000000 0.000000 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 1 2 3
M V30 END BOND
M V30 END CTAB
M END
',false,true,false));

View File

@@ -1,9 +1,13 @@
DROP FUNCTION IF EXISTS mol_to_smiles(mol);
CREATE OR REPLACE FUNCTION mol_to_smiles(mol, isomeric bool DEFAULT true)
RETURNS cstring
PARALLEL SAFE
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT IMMUTABLE;
DROP FUNCTION IF EXISTS mol_to_cxsmiles(mol);
CREATE OR REPLACE FUNCTION mol_to_cxsmiles(mol, isomeric bool DEFAULT true)
RETURNS cstring
PARALLEL SAFE

View File

@@ -0,0 +1,15 @@
DROP FUNCTION IF EXISTS mol_from_ctab(cstring,bool);
CREATE OR REPLACE FUNCTION mol_from_ctab(ctab cstring,keep_conformer bool default false, sanitize bool default true, remove_hs bool default true)
RETURNS mol
PARALLEL SAFE
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT IMMUTABLE;
DROP FUNCTION IF EXISTS qmol_from_ctab(cstring,bool);
CREATE OR REPLACE FUNCTION qmol_from_ctab(ctab cstring,keep_conformer bool default false, merge_hs bool default true)
RETURNS qmol
PARALLEL SAFE
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT IMMUTABLE;