Allow converting mol blocks directly to InChI (#2262)

* basics of enabling molblock -> InChI

* first pass at python wrapper

* test that options work
This commit is contained in:
Greg Landrum
2019-02-13 15:43:02 +01:00
committed by Brian Kelley
parent cb35234f8c
commit bebf0c9002
6 changed files with 340 additions and 4 deletions

View File

@@ -44,6 +44,16 @@ namespace {
rv.auxInfoPtr);
}
boost::python::tuple MolBlockToInchi(const std::string &molblock, std::string options)
{
RDKit::ExtraInchiReturnValues rv;
const char* _options = NULL;
if (options.size())
_options = options.c_str();
std::string inchi = RDKit::MolBlockToInchi(molblock, rv, _options);
return boost::python::make_tuple(inchi, rv.returnCode, rv.messagePtr, rv.logPtr,
rv.auxInfoPtr);
}
boost::python::tuple InchiToMol(const std::string &inchi, bool sanitize,
bool removeHs)
{
@@ -95,6 +105,28 @@ BOOST_PYTHON_MODULE(rdinchi) {
boost::python::arg("options")=std::string()),
docString.c_str()
);
docString = "return the InChI for a ROMol molecule.\n\
\n\
Arguments:\n\
- molblock: the mol block to use.\n\
- options: the InChI generation options.\n\
Options should be prefixed with either a - or a /\n\
Available options are explained in the InChI technical FAQ:\n\
http://www.inchi-trust.org/fileadmin/user_upload/html/inchifaq/inchi-faq.html#15.14\n\
and the User Guide:\n\
http://www.inchi-trust.org/fileadmin/user_upload/software/inchi-v1.04/InChI_UserGuide.pdf\n\
Returns:\n\
a tuple with:\n\
- the InChI\n\
- the return code from the InChI conversion\n\
- a string with any messages from the InChI conversion\n\
- a string with any log messages from the InChI conversion\n\
- a string with the InChI AuxInfo\n";
boost::python::def("MolBlockToInchi", MolBlockToInchi,
(boost::python::arg("molblock"),
boost::python::arg("options")=std::string()),
docString.c_str()
);
docString = "return the InChI key for an InChI string";
boost::python::def("InchiToInchiKey", RDKit::InchiToInchiKey,
(boost::python::arg("inchi")),

View File

@@ -1960,6 +1960,41 @@ std::string MolToInchi(const ROMol& mol, ExtraInchiReturnValues& rv,
return inchi;
}
std::string MolBlockToInchi(const std::string &molBlock, ExtraInchiReturnValues& rv,
const char* options) {
// create output
inchi_Output output;
memset((void *)&output, 0, sizeof(output));
// call DLL
std::string inchi;
{
#if RDK_TEST_MULTITHREADED
std::lock_guard<std::mutex> lock(inchiMutex);
#endif
char *_options = nullptr;
if (options) {
_options = new char[strlen(options) + 1];
fixOptionSymbol(options, _options);
options = _options;
}
int retcode = MakeINCHIFromMolfileText( molBlock.c_str(), (char *)options, &output );
// generate output
rv.returnCode = retcode;
if (output.szInChI) inchi = std::string(output.szInChI);
if (output.szMessage) rv.messagePtr = std::string(output.szMessage);
if (output.szLog) rv.logPtr = std::string(output.szLog);
if (output.szAuxInfo) rv.auxInfoPtr = std::string(output.szAuxInfo);
// clean up
FreeINCHI(&output);
delete [] _options;
}
return inchi;
}
std::string InchiToInchiKey(const std::string& inchi) {
char inchiKey[29];
char xtra1[65], xtra2[65];

View File

@@ -67,6 +67,21 @@ namespace RDKit {
*/
RDKIT_RDINCHILIB_EXPORT std::string MolToInchi(const ROMol& mol, ExtraInchiReturnValues& rv,
const char *options=NULL);
/*! Get the InChI string for a given mol block
* \param mol The input mol block
* \param rv An ExtraInchiReturnValues struct instance that is used to receive
* extra return values such as InChI Auxiliary Information and error messages
* from InChI API.
* \param options An null-terminated character string of space-deliminated
* InChI options that is passed to InChI API as is (except that / is naively
* converted to - to non-Windows platforms and - is converted to / on Windows)
* Available options are explained in the InChI technical FAQ:
* http://www.inchi-trust.org/fileadmin/user_upload/html/inchifaq/inchi-faq.html#15.14
* and the User Guide:
* http://www.inchi-trust.org/fileadmin/user_upload/software/inchi-v1.04/InChI_UserGuide.pdf
*/
RDKIT_RDINCHILIB_EXPORT std::string MolBlockToInchi(const std::string & mol, ExtraInchiReturnValues& rv,
const char *options=NULL);
/*! Get the InChI Key for an input InChI string
* \param inchi The input InChI string, which can be standard or not.
*/

View File

@@ -1,4 +1,3 @@
# $Id$
#
# Copyright (c) 2011, Novartis Institutes for BioMedical Research Inc.
# All rights reserved.
@@ -123,6 +122,37 @@ def MolToInchiAndAuxInfo(mol, options="", logLevel=None, treatWarningAsError=Fal
raise InchiReadWriteError(inchi, aux, message)
return inchi, aux
def MolBlockToInchiAndAuxInfo(molblock, options="", logLevel=None, treatWarningAsError=False):
"""Returns the standard InChI string and InChI auxInfo for a mol block
Keyword arguments:
logLevel -- the log level used for logging logs and messages from InChI
API. set to None to diable the logging completely
treatWarningAsError -- set to True to raise an exception in case of a
molecule that generates warning in calling InChI API. The resultant InChI
string and AuxInfo string as well as the error message are encoded in the
exception.
Returns:
a tuple of the standard InChI string and the auxInfo string returned by
InChI API, in that order, for the input molecule
"""
inchi, retcode, message, logs, aux = rdinchi.MolBlockToInchi(molblock, options)
if logLevel is not None:
if logLevel not in logLevelToLogFunctionLookup:
raise ValueError("Unsupported log level: %d" % logLevel)
log = logLevelToLogFunctionLookup[logLevel]
if retcode == 0:
log(message)
if retcode != 0:
if retcode == 1:
logger.warning(message)
else:
logger.error(message)
if treatWarningAsError and retcode != 0:
raise InchiReadWriteError(inchi, aux, message)
return inchi, aux
def MolToInchi(mol, options="", logLevel=None, treatWarningAsError=False):
"""Returns the standard InChI string for a molecule
@@ -152,6 +182,33 @@ def MolToInchi(mol, options="", logLevel=None, treatWarningAsError=False):
raise InchiReadWriteError(inchi, message)
return inchi
def MolBlockToInchi(molblock, options="", logLevel=None, treatWarningAsError=False):
"""Returns the standard InChI string for a mol block
Keyword arguments:
logLevel -- the log level used for logging logs and messages from InChI
API. set to None to diable the logging completely
treatWarningAsError -- set to True to raise an exception in case of a
molecule that generates warning in calling InChI API. The resultant InChI
string and AuxInfo string as well as the error message are encoded in the
exception.
Returns:
the standard InChI string returned by InChI API for the input molecule
"""
if options.find('AuxNone') == -1:
if options:
options += " /AuxNone"
else:
options += "/AuxNone"
try:
inchi, aux = MolBlockToInchiAndAuxInfo(molblock, options, logLevel=logLevel,
treatWarningAsError=treatWarningAsError)
except InchiReadWriteError as inst:
inchi, aux, message = inst.args
raise InchiReadWriteError(inchi, message)
return inchi
def InchiToInchiKey(inchi):
"""Return the InChI key for the given InChI string. Return None on error"""
@@ -173,5 +230,5 @@ def MolToInchiKey(mol, options=""):
__all__ = ['MolToInchiAndAuxInfo', 'MolToInchi', 'MolFromInchi', 'InchiReadWriteError',
__all__ = ['MolToInchiAndAuxInfo', 'MolToInchi', 'MolBlockToInchiAndAuxInfo', 'MolBlockToInchi', 'MolFromInchi', 'InchiReadWriteError',
'InchiToInchiKey', 'MolToInchiKey', 'INCHI_AVAILABLE']

View File

@@ -566,6 +566,151 @@ void testGithubIssue1572() {
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void testMolBlockToInchi() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdInfoLog)
<< "testing mol block to InChI"
<< std::endl;
{
std::string molb = R"MOL(
Mrv1824 02111920092D
6 6 0 0 0 0 999 V2000
-5.5134 3.5259 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-6.2279 3.1134 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-6.2279 2.2884 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-5.5134 1.8759 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-4.7989 2.2884 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-4.7989 3.1134 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
1 2 1 0 0 0 0
3 4 1 0 0 0 0
4 5 1 0 0 0 0
5 6 1 0 0 0 0
1 6 1 0 0 0 0
2 3 2 0 0 0 0
M END
)MOL";
ExtraInchiReturnValues tmp;
std::string inchi = MolBlockToInchi(molb, tmp);
TEST_ASSERT(inchi=="InChI=1S/C5H8O/c1-2-4-6-5-3-1/h1-2H,3-5H2");
}
{
std::string molb=R"MOL(BDBM163075
RDKit 2D
27 30 0 0 0 0 0 0 0 0999 V2000
1.6146 -5.5162 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1.9260 -4.0489 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
7.0484 1.6535 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-7.2594 1.8470 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
6.4379 3.0237 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
6.1670 0.4398 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-6.6489 0.4769 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-6.3781 3.0608 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-1.9796 -3.6396 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
4.9460 3.1800 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
4.6752 0.5961 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-5.1571 0.3205 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-4.8863 2.9045 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-3.0946 -2.6362 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-0.4878 -3.7959 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.5729 2.1226 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-2.7839 1.3780 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
4.0647 1.9663 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-4.2758 1.5343 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-0.0711 -1.5187 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1.6916 0.9089 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-0.6816 -0.1486 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1.4207 -1.6751 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-2.1734 0.0078 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
0.1997 1.0652 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
2.3020 -0.4613 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
0.8110 -3.0456 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
1 2 1 0
2 27 1 0
3 5 2 0
3 6 1 0
4 7 2 0
4 8 1 0
5 10 1 0
6 11 2 0
7 12 1 0
8 13 2 0
9 14 2 0
9 15 1 0
10 18 2 0
11 18 1 0
12 19 2 0
13 19 1 0
14 20 1 0
15 27 1 0
16 18 1 0
16 21 1 0
17 19 1 0
17 24 1 0
20 22 2 0
20 23 1 0
21 25 2 0
21 26 1 0
22 24 1 0
22 25 1 0
23 26 2 0
23 27 1 0
M CHG 2 15 -1 27 1
M END
)MOL";
ExtraInchiReturnValues tmp;
std::string inchi = MolBlockToInchi(molb, tmp);
TEST_ASSERT(inchi=="InChI=1S/C23H23N3O/c1-2-27-15-9-14-20-22(24-17-19-12-7-4-8-13-19)25-21(26-23(20)27)16-18-10-5-3-6-11-18/h3-15H,2,16-17H2,1H3,(H,24,25,26)");
TEST_ASSERT(tmp.messagePtr == "Charges were rearranged; Accepted unusual valence(s): O(4)");
}
{
std::string molb=R"MOL(
Mrv1824 02121905282D
10 11 0 0 0 0 999 V2000
-4.6875 -1.1393 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-5.4020 -1.5518 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-5.4020 -2.3768 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-4.6875 -2.7893 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-3.9730 -2.3768 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-3.9730 -1.5518 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-3.2586 -2.7893 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
-2.5441 -1.5518 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-2.5441 -2.3768 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
-1.9608 -0.9684 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1 2 2 0 0 0 0
2 3 1 0 0 0 0
3 4 2 0 0 0 0
4 5 1 0 0 0 0
5 6 2 0 0 0 0
1 6 1 0 0 0 0
7 9 1 0 0 0 0
6 8 1 0 0 0 0
7 5 1 0 0 0 0
8 10 1 0 0 0 0
8 9 2 0 0 0 0
M END
)MOL";
{
ExtraInchiReturnValues tmp;
std::string inchi = MolBlockToInchi(molb, tmp);
TEST_ASSERT(inchi=="InChI=1S/C8H8N2/c1-6-7-4-2-3-5-8(7)10-9-6/h2-5H,1H3,(H,9,10)");
}
{
ExtraInchiReturnValues tmp;
std::string inchi = MolBlockToInchi(molb, tmp, "/FixedH");
TEST_ASSERT(inchi=="InChI=1/C8H8N2/c1-6-7-4-2-3-5-8(7)10-9-6/h2-5H,1H3,(H,9,10)/f/h10H");
}
}
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
//-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
//
//-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
@@ -582,6 +727,7 @@ int main() {
testGithubIssue437();
testGithubIssue562();
testGithubIssue614();
#endif
testGithubIssue1572();
#endif
testMolBlockToInchi();
}

View File

@@ -46,7 +46,7 @@ from rdkit.Chem import MolFromMolBlock, MolToMolBlock
from rdkit.Chem import INCHI_AVAILABLE
if INCHI_AVAILABLE:
from rdkit.Chem import InchiReadWriteError
from rdkit.Chem import MolToInchi, MolFromInchi, InchiToInchiKey, MolToInchiKey
from rdkit.Chem import MolToInchi, MolBlockToInchi, MolFromInchi, InchiToInchiKey, MolToInchiKey
COLOR_RED = '\033[31m'
COLOR_GREEN = '\033[32m'
@@ -273,6 +273,57 @@ class TestCase(unittest.TestCase):
k2 = MolToInchiKey(m)
self.assertEqual(k1, k2)
def test5MolBlockToInchi(self):
mb = """
Mrv1824 02111920092D
6 6 0 0 0 0 999 V2000
-5.5134 3.5259 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-6.2279 3.1134 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-6.2279 2.2884 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-5.5134 1.8759 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-4.7989 2.2884 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-4.7989 3.1134 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
1 2 1 0 0 0 0
3 4 1 0 0 0 0
4 5 1 0 0 0 0
5 6 1 0 0 0 0
1 6 1 0 0 0 0
2 3 2 0 0 0 0
M END"""
inchi = MolBlockToInchi(mb)
self.assertEqual(inchi,"InChI=1S/C5H8O/c1-2-4-6-5-3-1/h1-2H,3-5H2")
# make sure that options work
mb2 = """
Mrv1824 02121905282D
10 11 0 0 0 0 999 V2000
-4.6875 -1.1393 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-5.4020 -1.5518 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-5.4020 -2.3768 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-4.6875 -2.7893 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-3.9730 -2.3768 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-3.9730 -1.5518 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-3.2586 -2.7893 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
-2.5441 -1.5518 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-2.5441 -2.3768 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
-1.9608 -0.9684 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1 2 2 0 0 0 0
2 3 1 0 0 0 0
3 4 2 0 0 0 0
4 5 1 0 0 0 0
5 6 2 0 0 0 0
1 6 1 0 0 0 0
7 9 1 0 0 0 0
6 8 1 0 0 0 0
7 5 1 0 0 0 0
8 10 1 0 0 0 0
8 9 2 0 0 0 0
M END"""
inchi2 = MolBlockToInchi(mb2,options="/FixedH")
self.assertEqual(inchi2,"InChI=1/C8H8N2/c1-6-7-4-2-3-5-8(7)10-9-6/h2-5H,1H3,(H,9,10)/f/h10H")
if __name__ == '__main__': # pragma: nocover
# only run the test if InChI is available