add support the new sequence readers to wrappers (#1181) (#1221)

* Fixes #1181
needs more testing

* improved docs and a couple tests

* add the new sequence support to the java wrappers too
This commit is contained in:
Greg Landrum
2016-12-21 04:42:17 +01:00
parent 60d163c8d0
commit 01829cc4e7
7 changed files with 216 additions and 48 deletions

View File

@@ -33,7 +33,18 @@ RWMol *SequenceToMol(const std::string &seq, bool sanitize, bool lowerD);
* \param seq - the string to be processed
* \param sanitize - toggles sanitization and stereochemistry perception of
*the molecule
* \param flavor - 0 & 1 Protein, 2, 3, 4 & 5 RNA, 6, 7, 8 & 9 DNA
* \param flavor -
* 0 Protein, L amino acids (default)
* 1 Protein, D amino acids
* 2 RNA, no cap
* 3 RNA, 5' cap
* 4 RNA, 3' cap
* 5 RNA, both caps
* 6 DNA, no cap
* 7 DNA, 5' cap
* 8 DNA, 3' cap
* 9 DNA, both caps
*
*/
RWMol *SequenceToMol(const char *seq, bool sanitize = true, int flavor = 0);
//! \overload
@@ -59,13 +70,22 @@ RWMol *FASTAToMol(const std::string &seq, bool sanitize, bool lowerD);
* \param seq - the string to be processed
* \param sanitize - toggles sanitization and stereochemistry perception of
*the molecule
* \param flavor - 0 & 1 protein, 2, 3, 4, & 5 RNA, 6, 7, 8 & 9 DNA
* \param flavor -
* 0 Protein, L amino acids (default)
* 1 Protein, D amino acids
* 2 RNA, no cap
* 3 RNA, 5' cap
* 4 RNA, 3' cap
* 5 RNA, both caps
* 6 DNA, no cap
* 7 DNA, 5' cap
* 8 DNA, 3' cap
* 9 DNA, both caps
*
*/
RWMol *FASTAToMol(const char *seq, bool sanitize = true, int flavor = 0);
//! \overload
RWMol *FASTAToMol(const std::string &seq, bool sanitize = true,
int flavor = 0);
RWMol *FASTAToMol(const std::string &seq, bool sanitize = true, int flavor = 0);
// \brief construct a molecule from a HELM string (currently only supports
// peptides)

View File

@@ -204,20 +204,20 @@ ROMol *MolFromPDBBlock(python::object molBlock, bool sanitize, bool removeHs,
return static_cast<ROMol *>(newM);
}
ROMol *MolFromSequence(python::object seq, bool sanitize, bool lowerD) {
ROMol *MolFromSequence(python::object seq, bool sanitize, int flavor) {
RWMol *newM = 0;
try {
newM = SequenceToMol(pyObjectToString(seq), sanitize, lowerD);
newM = SequenceToMol(pyObjectToString(seq), sanitize, flavor);
} catch (RDKit::FileParseException &e) {
BOOST_LOG(rdWarningLog) << e.message() << std::endl;
} catch (...) {
}
return static_cast<ROMol *>(newM);
}
ROMol *MolFromFASTA(python::object seq, bool sanitize, bool lowerD) {
ROMol *MolFromFASTA(python::object seq, bool sanitize, int flavor) {
RWMol *newM = 0;
try {
newM = FASTAToMol(pyObjectToString(seq), sanitize, lowerD);
newM = FASTAToMol(pyObjectToString(seq), sanitize, flavor);
} catch (RDKit::FileParseException &e) {
BOOST_LOG(rdWarningLog) << e.message() << std::endl;
} catch (...) {
@@ -898,8 +898,17 @@ BOOST_PYTHON_MODULE(rdmolfiles) {
- sanitize: (optional) toggles sanitization of the molecule.\n\
Defaults to True.\n\
\n\
- lowerD: (optional)\n\
Defaults to false.\n\
- flavor: (optional)\n\
0 Protein, L amino acids (default)\n\
1 Protein, D amino acids\n\
2 RNA, no cap\n\
3 RNA, 5' cap\n\
4 RNA, 3' cap\n\
5 RNA, both caps\n\
6 DNA, no cap\n\
7 DNA, 5' cap\n\
8 DNA, 3' cap\n\
9 DNA, both caps\n\
\n\
RETURNS:\n\
\n\
@@ -907,7 +916,7 @@ BOOST_PYTHON_MODULE(rdmolfiles) {
\n";
python::def("MolFromSequence", RDKit::MolFromSequence,
(python::arg("text"), python::arg("sanitize") = true,
python::arg("lowerD") = false),
python::arg("flavor") = 0),
docString.c_str(),
python::return_value_policy<python::manage_new_object>());
docString =
@@ -934,16 +943,24 @@ BOOST_PYTHON_MODULE(rdmolfiles) {
- sanitize: (optional) toggles sanitization of the molecule.\n\
Defaults to True.\n\
\n\
- lowerD: (optional)\n\
Defaults to false.\n\
\n\
- flavor: (optional)\n\
0 Protein, L amino acids (default)\n\
1 Protein, D amino acids\n\
2 RNA, no cap\n\
3 RNA, 5' cap\n\
4 RNA, 3' cap\n\
5 RNA, both caps\n\
6 DNA, no cap\n\
7 DNA, 5' cap\n\
8 DNA, 3' cap\n\
9 DNA, both caps\n\
RETURNS:\n\
\n\
a Mol object, None on failure.\n\
\n";
python::def("MolFromFASTA", RDKit::MolFromFASTA,
(python::arg("text"), python::arg("sanitize") = true,
python::arg("lowerD") = false),
python::arg("flavor") = 0),
docString.c_str(),
python::return_value_policy<python::manage_new_object>());
docString =

View File

@@ -3220,6 +3220,18 @@ CAS<~>
self.assertEqual(Chem.MolToFASTA(m), fasta)
self.assertEqual(Chem.MolToSmiles(m, isomericSmiles=True), smi)
seq = "CGCGAATTACCGCG"
m = Chem.MolFromSequence(seq,flavor=6) # DNA
self.assertEqual(Chem.MolToSequence(m),'CGCGAATTACCGCG')
self.assertEqual(Chem.MolToHELM(m),'RNA1{[dR](C)P.[dR](G)P.[dR](C)P.[dR](G)P.[dR](A)P.[dR](A)P.[dR](T)P.[dR](T)P.[dR](A)P.[dR](C)P.[dR](C)P.[dR](G)P.[dR](C)P.[dR](G)}$$$$')
seq = "CGCGAAUUACCGCG"
m = Chem.MolFromSequence(seq,flavor=2) # RNA
self.assertEqual(Chem.MolToSequence(m),'CGCGAAUUACCGCG')
self.assertEqual(Chem.MolToHELM(m),'RNA1{R(C)P.R(G)P.R(C)P.R(G)P.R(A)P.R(A)P.R(U)P.R(U)P.R(A)P.R(C)P.R(C)P.R(G)P.R(C)P.R(G)}$$$$')
m = Chem.MolFromSequence(seq,flavor=3) # RNA - 5' cap
self.assertEqual(Chem.MolToSequence(m),'CGCGAAUUACCGCG')
self.assertEqual(Chem.MolToHELM(m),'RNA1{P.R(C)P.R(G)P.R(C)P.R(G)P.R(A)P.R(A)P.R(U)P.R(U)P.R(A)P.R(C)P.R(C)P.R(G)P.R(C)P.R(G)}$$$$')
def testResMolSupplier(self):
mol = Chem.MolFromSmiles('CC')
resMolSuppl = Chem.ResonanceMolSupplier(mol)

View File

@@ -1,21 +1,21 @@
/*
/*
* $Id$
*
* Copyright (c) 2010, Novartis Institutes for BioMedical Research Inc.
* All rights reserved.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
* met:
*
* * Redistributions of source code must retain the above copyright
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
* * Neither the name of Novartis Institutes for BioMedical Research Inc.
* nor the names of its contributors may be used to endorse or promote
* * Neither the name of Novartis Institutes for BioMedical Research Inc.
* nor the names of its contributors may be used to endorse or promote
* products derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
@@ -42,6 +42,7 @@
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>
#include <GraphMol/FileParsers/FileParsers.h>
#include <GraphMol/FileParsers/SequenceWriters.h>
#include <GraphMol/Bond.h>
#include <GraphMol/FileParsers/MolFileStereochem.h>
#include <GraphMol/Descriptors/Crippen.h>
@@ -66,7 +67,7 @@
%template(ROMol_Vect_Vect) std::vector< std::vector< boost::shared_ptr<RDKit::ROMol> > >;
%template(Atom_Vect) std::vector<RDKit::Atom*>;
// These prevent duplicate definitions in Java code
// These prevent duplicate definitions in Java code
%ignore RDKit::ROMol::getAtomDegree(const Atom *) const;
%ignore RDKit::ROMol::setAtomBookmark(Atom *,int);
%ignore RDKit::ROMol::clearAtomBookmark(const int, const Atom *);
@@ -90,7 +91,7 @@
/*
* Special handling for Conformer objects which should not be GCed until the molecule is destroyed
* We want to modify the behavior of the Conformer coming into the addConformer method without
* We want to modify the behavior of the Conformer coming into the addConformer method without
* impacting Conformer objects that are arguments to other methods. Therefore we define a pattern
* that will trigger special handling of the Conformer input (the addConf method match this pattern).
* Then add the necessary Java code to modify the Conformer object to no longer be the owner of the
@@ -155,6 +156,15 @@
void MolToPDBFile(std::string fName,int confId=-1,unsigned int flavor=0) {
RDKit::MolToPDBFile(*($self), fName, confId, flavor);
}
std::string MolToSequence() {
return RDKit::MolToSequence(*($self));
}
std::string MolToFASTA() {
return RDKit::MolToFASTA(*($self));
}
std::string MolToHELM() {
return RDKit::MolToHELM(*($self));
}
bool hasSubstructMatch(RDKit::ROMol &query,bool useChirality=false){
RDKit::MatchVectType mv;
@@ -212,7 +222,7 @@
/* Methods from ConjugHybrid.cpp */
void setConjugation() {
RDKit::MolOps::setConjugation(*($self));
}
}
void setHybridization() {
RDKit::MolOps::setHybridization(*($self));
@@ -228,9 +238,9 @@
bool permuteDeg4Nodes=false) {
return RDDepict::compute2DCoords(*($self),
coordMap,
canonOrient,
clearConfs,
nFlipsPerSample,
canonOrient,
clearConfs,
nFlipsPerSample,
nSamples,
sampleSeed,
permuteDeg4Nodes);
@@ -254,7 +264,7 @@
}
}
unsigned int compute2DCoordsMimicDistMat(const RDDepict::DOUBLE_SMART_PTR *dmat=0,
bool canonOrient=true,
bool clearConfs=true,
@@ -363,21 +373,21 @@
}
/* From GraphMol/MolAlign/AlignMolecules */
double alignMol(const RDKit::ROMol &refMol,
double alignMol(const RDKit::ROMol &refMol,
int prbCid=-1, int refCid=-1,
const std::vector<std::pair<int,int> > *atomMap=0,
const RDNumeric::DoubleVector *weights=0,
const std::vector<std::pair<int,int> > *atomMap=0,
const RDNumeric::DoubleVector *weights=0,
bool reflect=false, unsigned int maxIters=50) {
return RDKit::MolAlign::alignMol(*($self), refMol, prbCid, refCid, atomMap, weights, reflect, maxIters);
}
void alignMolConformers(ROMol &mol, const std::vector<unsigned int> *atomIds=0,
const std::vector<unsigned int> *confIds=0,
const RDNumeric::DoubleVector *weights=0,
const RDNumeric::DoubleVector *weights=0,
bool reflect=false, unsigned int maxIters=50) {
RDKit::MolAlign::alignMolConformers(*($self), atomIds, confIds, weights, reflect, maxIters);
}
/* From GraphMol/MolAlign/AlignMolecules */
double getAlignmentTransform(const RDKit::ROMol &refMol,
RDGeom::Transform3D &trans, int prbCid = -1,
@@ -388,13 +398,13 @@
}
/* From GraphMol/MolAlign/AlignMolecules */
std::pair<double,double> O3AAlignMol(RDKit::ROMol &refMol,
std::pair<double,double> O3AAlignMol(RDKit::ROMol &refMol,
int prbCid=-1, int refCid=-1,
bool reflect=false, unsigned int maxIters=50,
unsigned int accuracy=0) {
RDKit::MMFF::MMFFMolProperties prbMP(*($self));
RDKit::MMFF::MMFFMolProperties refMP(refMol);
RDKit::MolAlign::O3A o3a(*($self), refMol, &prbMP, &refMP, RDKit::MolAlign::O3A::MMFF94,
prbCid, refCid,
reflect,maxIters,accuracy);

View File

@@ -1,21 +1,21 @@
/*
/*
* $Id$
*
* Copyright (c) 2010, Novartis Institutes for BioMedical Research Inc.
* All rights reserved.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
* met:
*
* * Redistributions of source code must retain the above copyright
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
* * Neither the name of Novartis Institutes for BioMedical Research Inc.
* nor the names of its contributors may be used to endorse or promote
* * Neither the name of Novartis Institutes for BioMedical Research Inc.
* nor the names of its contributors may be used to endorse or promote
* products derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
@@ -36,6 +36,7 @@
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>
#include <GraphMol/FileParsers/FileParsers.h>
#include <GraphMol/FileParsers/SequenceParsers.h>
#include <GraphMol/Bond.h>
#include <GraphMol/FileParsers/MolFileStereochem.h>
%}
@@ -54,7 +55,7 @@
%include <GraphMol/FileParsers/FileParsers.h>
%include <GraphMol/RWMol.h>
%extend RDKit::RWMol {
%extend RDKit::RWMol {
static RDKit::RWMOL_SPTR MolFromSmiles(std::string smi,int debugParse=0,bool sanitize=1,
std::map<std::string,std::string> *replacements=0){
return RDKit::RWMOL_SPTR(RDKit::SmilesToMol(smi, debugParse, sanitize,replacements));
@@ -109,8 +110,26 @@ static RDKit::RWMOL_SPTR MolFromPDBFile(std::string fName,
mol=RDKit::PDBFileToMol(fName,sanitize,removeHs,flavor);
return RDKit::RWMOL_SPTR(mol);
}
static RDKit::RWMOL_SPTR MolFromSequence(std::string text,
bool sanitize=true,int flavor=0){
RDKit::RWMol *mol=0;
mol=RDKit::SequenceToMol(text,sanitize,flavor);
return RDKit::RWMOL_SPTR(mol);
}
static RDKit::RWMOL_SPTR MolFromFASTA(std::string text,
bool sanitize=true,int flavor=0){
RDKit::RWMol *mol=0;
mol=RDKit::FASTAToMol(text,sanitize,flavor);
return RDKit::RWMOL_SPTR(mol);
}
static RDKit::RWMOL_SPTR MolFromHELM(std::string text,
bool sanitize=true){
RDKit::RWMol *mol=0;
mol=RDKit::HELMToMol(text,sanitize);
return RDKit::RWMOL_SPTR(mol);
}
/* Methods from MolFileStereoChem.h */
void DetectAtomStereoChemistry(const RDKit::Conformer *conf) {
RDKit::DetectAtomStereoChemistry(*($self), conf);

View File

@@ -317,6 +317,10 @@ ADD_TEST(JavaPDBTests
java -Djava.library.path=${CMAKE_CURRENT_SOURCE_DIR}
-cp "${JUNIT_JAR}${PATH_SEP}${CMAKE_JAVA_TEST_OUTDIR}${PATH_SEP}${CMAKE_CURRENT_SOURCE_DIR}/org.RDKit.jar"
org.RDKit.PDBTests)
ADD_TEST(JavaSequenceTests
java -Djava.library.path=${CMAKE_CURRENT_SOURCE_DIR}
-cp "${JUNIT_JAR}${PATH_SEP}${CMAKE_JAVA_TEST_OUTDIR}${PATH_SEP}${CMAKE_CURRENT_SOURCE_DIR}/org.RDKit.jar"
org.RDKit.SequenceTests)
ADD_TEST(JavaAlignTests
java -Djava.library.path=${CMAKE_CURRENT_SOURCE_DIR}

View File

@@ -0,0 +1,86 @@
/*
*
* Copyright (c) 2016, Greg Landrum
*
* @@ All Rights Reserved @@
* This file is part of the RDKit.
* The contents are covered by the terms of the BSD license
* which is included in the file license.txt, found at the root
* of the RDKit source tree.
*/
package org.RDKit;
import static org.junit.Assert.*;
import java.io.File;
import org.junit.Test;
public class SequenceTests extends GraphMolTest {
@Test
public void testSequence1() {
ROMol m = RWMol.MolFromSequence("CYIQNCPLG");
AtomMonomerInfo mi=new AtomMonomerInfo(m.getAtomWithIdx(0).getMonomerInfo());
assert(mi instanceof AtomPDBResidueInfo);
String seq = new String(m.MolToSequence());
assertEquals(seq,"CYIQNCPLG");
String fasta = new String(m.MolToFASTA());
assertEquals(fasta,">\nCYIQNCPLG\n");
String helm = new String(m.MolToHELM());
assertEquals(helm,"PEPTIDE1{C.Y.I.Q.N.C.P.L.G}$$$$");
}
@Test
public void testSequence2() {
ROMol m = RWMol.MolFromFASTA(">\nCYIQNCPLG\n");
AtomMonomerInfo mi=new AtomMonomerInfo(m.getAtomWithIdx(0).getMonomerInfo());
assert(mi instanceof AtomPDBResidueInfo);
String seq = new String(m.MolToSequence());
assertEquals(seq,"CYIQNCPLG");
String fasta = new String(m.MolToFASTA());
assertEquals(fasta,">\nCYIQNCPLG\n");
String helm = new String(m.MolToHELM());
assertEquals(helm,"PEPTIDE1{C.Y.I.Q.N.C.P.L.G}$$$$");
}
@Test
public void testSequence3() {
ROMol m = RWMol.MolFromHELM("PEPTIDE1{C.Y.I.Q.N.C.P.L.G}$$$$\n");
AtomMonomerInfo mi=new AtomMonomerInfo(m.getAtomWithIdx(0).getMonomerInfo());
assert(mi instanceof AtomPDBResidueInfo);
String seq = new String(m.MolToSequence());
assertEquals(seq,"CYIQNCPLG");
String fasta = new String(m.MolToFASTA());
assertEquals(fasta,">\nCYIQNCPLG\n");
String helm = new String(m.MolToHELM());
assertEquals(helm,"PEPTIDE1{C.Y.I.Q.N.C.P.L.G}$$$$");
}
@Test
public void testSequence4() {
ROMol m = RWMol.MolFromSequence("CGCGAATTACCGCG",false,6);
AtomMonomerInfo mi=new AtomMonomerInfo(m.getAtomWithIdx(0).getMonomerInfo());
assert(mi instanceof AtomPDBResidueInfo);
String seq = new String(m.MolToSequence());
assertEquals(seq,"CGCGAATTACCGCG");
String fasta = new String(m.MolToFASTA());
assertEquals(fasta,">\nCGCGAATTACCGCG\n");
String helm = new String(m.MolToHELM());
assertEquals(helm,"RNA1{[dR](C)P.[dR](G)P.[dR](C)P.[dR](G)P.[dR](A)P.[dR](A)P.[dR](T)P.[dR](T)P.[dR](A)P.[dR](C)P.[dR](C)P.[dR](G)P.[dR](C)P.[dR](G)}$$$$");
}
public void testSequence5() {
ROMol m = RWMol.MolFromSequence("CGCGAAUUACCGCG",false,2);
AtomMonomerInfo mi=new AtomMonomerInfo(m.getAtomWithIdx(0).getMonomerInfo());
assert(mi instanceof AtomPDBResidueInfo);
String seq = new String(m.MolToSequence());
assertEquals(seq,"CGCGAAUUACCGCG");
String fasta = new String(m.MolToFASTA());
assertEquals(fasta,">\nCGCGAAUUACCGCG\n");
String helm = new String(m.MolToHELM());
assertEquals(helm,"RNA1{R(C)P.R(G)P.R(C)P.R(G)P.R(A)P.R(A)P.R(U)P.R(U)P.R(A)P.R(C)P.R(C)P.R(G)P.R(C)P.R(G)}$$$$");
}
public static void main(String args[]) {
org.junit.runner.JUnitCore.main("org.RDKit.SequenceTests");
}
}