cdxml parser (#5273)

This commit is contained in:
Brian Kelley
2022-09-27 23:49:27 -04:00
committed by GitHub
parent 75416ba090
commit d1985caaa7
56 changed files with 90113 additions and 43 deletions

View File

@@ -42,6 +42,10 @@
#include <GraphMol/ChemReactions/ReactionPickler.h>
%}
%ignore RDKit::CDXMLToChemicalReactions; //(const std::string &);
%ignore RDKit::CDXMLFileToChemicalReactions; //(const std::string &);
%ignore RDKit::CDXMLDataStreamToChemicalReactions; //(std::istream &);
%include <GraphMol/ChemReactions/Reaction.h>
%include <GraphMol/ChemReactions/ReactionParser.h>
%ignore RDKit::ChemicalReaction::validate(unsigned int &,unsigned int &,bool);
@@ -51,6 +55,7 @@
%ignore RDKit::isMoleculeProductOfReaction(const ChemicalReaction &r,const ROMol &,
unsigned int &);
%newobject ReactionFromSmarts;
%newobject ReactionFromRxnBlock;
%newobject ReactionFromRxnFile;
@@ -140,4 +145,24 @@ static RDKit::ChemicalReaction *RxnFromBinary(std::vector<int> pkl){
return res;
};
static std::vector<std::shared_ptr<ChemicalReaction>> CDXMLToChemicalReactions(
const std::string &block, bool sanitize=false, bool removeHs=false) {
auto reactions = RDKit::CDXMLToChemicalReactions(block, sanitize, removeHs);
std::vector<std::shared_ptr<RDKit::ChemicalReaction>> result;
for(auto &rxn : reactions) {
result.emplace_back(rxn.release());
}
return result;
}
static std::vector<std::shared_ptr<ChemicalReaction>> CDXMLFileToChemicalReactions(
const std::string &filename, bool sanitize=false, bool removeHs=false) {
auto reactions = RDKit::CDXMLFileToChemicalReactions(filename, sanitize, removeHs);
std::vector<std::shared_ptr<RDKit::ChemicalReaction>> result;
for(auto &rxn : reactions) {
result.emplace_back(rxn.release());
}
return result;
}
}

View File

@@ -41,6 +41,8 @@
#include <GraphMol/FileParsers/MolFileStereochem.h>
%}
%template(RWMol_Vect) std::vector< boost::shared_ptr<RDKit::RWMol> >;
// ignore the methods that allow the molecule to take ownership of atoms/Bonds
// (instead of copying them). This just leads to memory problems with Java
%ignore RDKit::RWMol::addAtom(Atom *atom,bool updateLabel,bool takeOwnership);
@@ -128,6 +130,27 @@ static RDKit::RWMOL_SPTR MolFromHELM(std::string text,
return RDKit::RWMOL_SPTR(mol);
}
static std::vector<RDKit::RWMOL_SPTR> MolsFromCDXML(std::string text,
bool sanitize=true){
auto res = RDKit::CDXMLToMols(text, sanitize);
std::vector<RDKit::RWMOL_SPTR> mols;
for(auto &mol: res) {
mols.emplace_back(mol.release());
}
return mols;
}
static std::vector<RDKit::RWMOL_SPTR> MolsFromCDXMLFile(std::string text,
bool sanitize=true){
auto res = RDKit::CDXMLFileToMols(text, sanitize);
std::vector<RDKit::RWMOL_SPTR> mols;
for(auto &mol: res) {
mols.emplace_back(mol.release());
}
return mols;
}
/* Methods from MolFileStereoChem.h */
void DetectAtomStereoChemistry(const RDKit::Conformer *conf) {

View File

@@ -33,7 +33,7 @@
package org.RDKit;
import static org.junit.Assert.*;
import java.io.*;
import java.io.File;
import java.util.ArrayList;
@@ -327,6 +327,26 @@ public class WrapperTests extends GraphMolTest {
uInt_Vect.delete();
e.delete();
}
@Test
public void cdxmlReader() {
String rdpath = System.getenv("RDBASE");
if (rdpath == null)
org.junit.Assert.fail("No definition for RDBASE");
File base = new File(rdpath);
File testFile = new File(base, "Code" + File.separator + "GraphMol"
+ File.separator + "test_data" + File.separator +
"CDXML" + File.separator + "beta-cypermethrin.cdxml");
String fn = testFile.getAbsolutePath();
RWMol_Vect prods = RWMol.MolsFromCDXMLFile(fn);
assertEquals(prods.size(), 1);
for(int idx = 0; idx < prods.size(); idx++) {
if(idx == 0) {
assertEquals(prods.get(idx).MolToSmiles(true), "CC1(C)[C@H](C=C(Cl)Cl)[C@H]1C(=O)O[C@@H](C#N)c1cccc(Oc2ccccc2)c1");
}
}
}
public static void main(String args[]) {
org.junit.runner.JUnitCore.main("org.RDKit.WrapperTests");