// $Id$ // // Copyright (C) 2007,2008 Greg Landrum // // @@ All Rights Reserved @@ // #include SQLITE_EXTENSION_INIT1 #include #include #include #include #include #include #include #include #include #include #include std::string stringFromTextArg(sqlite3_value *arg){ const unsigned char *text=sqlite3_value_text(arg); int nBytes=sqlite3_value_bytes(arg); std::string res((const char *)text,nBytes); return res; } std::string stringFromBlobArg(sqlite3_value *arg){ const void *blob=sqlite3_value_blob(arg); int nBytes=sqlite3_value_bytes(arg); std::string res((const char *)blob,nBytes); return res; } RDKit::ROMol *molFromBlobArg(sqlite3_value *arg){ std::string pkl=stringFromBlobArg(arg); RDKit::ROMol *m; try{ m = new RDKit::ROMol(pkl); } catch (RDKit::MolPicklerException &){ m=0; } return m; } ExplicitBitVect *ebvFromBlobArg(sqlite3_value *arg){ std::string pkl=stringFromBlobArg(arg); ExplicitBitVect *ebv; try{ ebv = new ExplicitBitVect(pkl); } catch (ValueErrorException &){ ebv=0; } return ebv; } template RDKit::SparseIntVect *sivFromBlobArg(sqlite3_value *arg){ std::string pkl=stringFromBlobArg(arg); RDKit::SparseIntVect *siv; try{ siv = new RDKit::SparseIntVect(pkl); } catch (ValueErrorException &){ siv=0; } return siv; } /* --------------------------------- Benchmarking results. Database: 65385 pubchem compounds Simple access: select count(*) from molecules where length(molpkl)>40; 0.3s depickle : select count(*) from molecules where rdk_molNumAtoms(molpkl)>40; 11.3s substruct1 : select count(*) from molecules where rdk_molHasSubstruct(molpkl,'c1ncncn1'); 18.0s substruct2 : select count(*) from molecules where rdk_molHasSubstruct(molpkl,'[#6;r10]'); 15.8 3 Oct 2007: depickle : select count(*) from molecules where rdk_molNumAtoms(molpkl)>40; 9.4s mw : select count(*) from molecules where rdk_molAMW(molpkl)<200; 9.7s --------------------------------- */ static void numAtomsFunc( sqlite3_context *context, int argc, sqlite3_value **argv ){ RDKit::ROMol *m=molFromBlobArg(argv[0]); if(m){ int res=m->getNumAtoms(); delete m; sqlite3_result_int(context, res); } else { std::string errorMsg="BLOB could not be converted into a molecule"; sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length()); } } static void molWtFunc( sqlite3_context *context, int argc, sqlite3_value **argv ){ RDKit::ROMol *m=molFromBlobArg(argv[0]); if(m){ double res=RDKit::Descriptors::CalcAMW(*m); delete m; sqlite3_result_double(context, res); } else { std::string errorMsg="BLOB could not be converted into a molecule"; sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length()); } } static void molLogPFunc( sqlite3_context *context, int argc, sqlite3_value **argv ){ RDKit::ROMol *m=molFromBlobArg(argv[0]); if(m){ double res,tmp; RDKit::Descriptors::CalcCrippenDescriptors(*m,res,tmp); delete m; sqlite3_result_double(context, res); } else { std::string errorMsg="BLOB could not be converted into a molecule"; sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length()); } } static void smilesToBlob( sqlite3_context *context, int argc, sqlite3_value **argv ){ std::string smiles=stringFromTextArg(argv[0]); RDKit::ROMol *m=0; try{ m=RDKit::SmilesToMol(smiles); } catch(RDKit::MolSanitizeException &){ m=0; } if(m){ std::string text; RDKit::MolPickler::pickleMol(*m,text); delete m; sqlite3_result_blob(context, text.c_str(), text.length(), SQLITE_TRANSIENT ); } else { std::string errorMsg="SMILES could not be converted into a molecule"; sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length()); } } static void molHasSubstruct( sqlite3_context *context, int argc, sqlite3_value **argv ){ RDKit::ROMol *m=molFromBlobArg(argv[0]); if(!m){ std::string errorMsg="BLOB (argument 1) could not be converted into a molecule"; sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length()); return; } std::string smarts=stringFromTextArg(argv[1]); std::map &molMap= *static_cast *>(sqlite3_user_data(context)); RDKit::ROMol *patt=0; if(molMap.find(smarts)!=molMap.end()){ patt=boost::any_cast(molMap[smarts]).get(); } else { patt=static_cast(RDKit::SmartsToMol(smarts)); molMap[smarts]=boost::any(RDKit::ROMOL_SPTR(patt)); } if(!patt){ std::string errorMsg="SMARTS (argument 2) could not be converted into a molecule"; sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length()); return; } RDKit::MatchVectType match; int res=RDKit::SubstructMatch(*m,*patt,match,true,false,true); delete m; sqlite3_result_int(context, res); } static void molSubstructCount( sqlite3_context *context, int argc, sqlite3_value **argv ){ RDKit::ROMol *m=molFromBlobArg(argv[0]); if(!m){ std::string errorMsg="BLOB (argument 1) could not be converted into a molecule"; sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length()); return; } std::string smarts=stringFromTextArg(argv[1]); std::map &molMap= *static_cast *>(sqlite3_user_data(context)); RDKit::ROMol *patt=0; if(molMap.find(smarts)!=molMap.end()){ patt=boost::any_cast(molMap[smarts]).get(); } else { patt=static_cast(RDKit::SmartsToMol(smarts)); molMap[smarts]=boost::any(RDKit::ROMOL_SPTR(patt)); } if(!patt){ std::string errorMsg="SMARTS (argument 2) could not be converted into a molecule"; sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length()); return; } std::vector matches; int res=RDKit::SubstructMatch(*m,*patt,matches,true,true,false); delete m; sqlite3_result_int(context, res); } static void blobToRDKitFingerprint( sqlite3_context *context, int argc, sqlite3_value **argv ){ RDKit::ROMol *m=molFromBlobArg(argv[0]); if(!m){ std::string errorMsg="BLOB (argument 1) could not be converted into a molecule"; sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length()); return; } ExplicitBitVect *fp=RDKit::DaylightFingerprintMol(*m,1,7,2048,4,true,0.3,128); std::string text=fp->ToString(); delete fp; delete m; sqlite3_result_text(context, text.c_str(), text.length(), SQLITE_TRANSIENT ); } static void blobToAtomPairFingerprint( sqlite3_context *context, int argc, sqlite3_value **argv ){ RDKit::ROMol *m=molFromBlobArg(argv[0]); if(!m){ std::string errorMsg="BLOB (argument 1) could not be converted into a molecule"; sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length()); return; } RDKit::SparseIntVect *fp=RDKit::Descriptors::AtomPairs::getAtomPairFingerprint(*m); std::string text=fp->toString(); delete fp; delete m; sqlite3_result_text(context, text.c_str(), text.length(), SQLITE_TRANSIENT ); } static void bvTanimotoSim( sqlite3_context *context, int argc, sqlite3_value **argv ){ ExplicitBitVect *bv1=ebvFromBlobArg(argv[0]); if(!bv1){ std::string errorMsg="BLOB (argument 1) could not be converted into a bit vector"; sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length()); return; } ExplicitBitVect *bv2=ebvFromBlobArg(argv[1]); if(!bv2){ delete bv1; std::string errorMsg="BLOB (argument 2) could not be converted into a bit vector"; sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length()); return; } double res=SimilarityWrapper(*bv1,*bv2,TanimotoSimilarity); delete bv1; delete bv2; sqlite3_result_double(context, res); } static void sivDiceSim( sqlite3_context *context, int argc, sqlite3_value **argv ){ RDKit::SparseIntVect *v1=sivFromBlobArg(argv[0]); if(!v1){ std::string errorMsg="BLOB (argument 1) could not be converted into an int vector"; sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length()); return; } RDKit::SparseIntVect *v2=sivFromBlobArg(argv[1]); if(!v2){ delete v1; std::string errorMsg="BLOB (argument 2) could not be converted into a bit vector"; sqlite3_result_error(context,errorMsg.c_str(),errorMsg.length()); return; } double res= RDKit::DiceSimilarity(*v1,*v2); delete v1; delete v2; sqlite3_result_double(context, res); } /* SQLite invokes this routine once when it loads the extension. ** Create new functions, collating sequences, and virtual table ** modules here. This is usually the only exported symbol in ** the shared library. */ extern "C" int sqlite3_extension_init( sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi ){ SQLITE_EXTENSION_INIT2(pApi); std::map *molMap=new std::map(); sqlite3_create_function(db, "rdk_molNumAtoms", 1, SQLITE_ANY, 0, numAtomsFunc, 0, 0); sqlite3_create_function(db, "rdk_molAMW", 1, SQLITE_ANY, 0, molWtFunc, 0, 0); sqlite3_create_function(db, "rdk_smilesToBlob", 1, SQLITE_ANY, 0, smilesToBlob, 0, 0); sqlite3_create_function(db, "rdk_molToRDKitFP", 1, SQLITE_ANY, 0, blobToRDKitFingerprint, 0, 0); sqlite3_create_function(db, "rdk_bvTanimotoSim", 2, SQLITE_ANY, 0, bvTanimotoSim, 0, 0); sqlite3_create_function(db, "rdk_molToAtomPairFP", 1, SQLITE_ANY, 0, blobToAtomPairFingerprint, 0, 0); sqlite3_create_function(db, "rdk_sivDiceSim", 2, SQLITE_ANY, 0, sivDiceSim, 0, 0); sqlite3_create_function(db, "rdk_molHasSubstruct", 2, SQLITE_ANY, static_cast(molMap), molHasSubstruct, 0, 0); sqlite3_create_function(db, "rdk_molSubstructCount", 2, SQLITE_ANY, static_cast(molMap), molSubstructCount, 0, 0); sqlite3_create_function(db, "rdk_molLogP", 1, SQLITE_ANY, 0, molLogPFunc, 0, 0); return 0; }