mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-05 22:04:27 +08:00
1324 lines
37 KiB
C++
1324 lines
37 KiB
C++
//
|
|
// Copyright (C) 2015 Greg Landrum and NextMove Software
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include <string.h>
|
|
|
|
#include <string>
|
|
#include <vector>
|
|
#include <map>
|
|
|
|
#include <GraphMol/GraphMol.h>
|
|
#include <GraphMol/MolOps.h>
|
|
#include <GraphMol/MonomerInfo.h>
|
|
|
|
|
|
namespace RDKit {
|
|
|
|
|
|
static Atom *CreateAAAtom(RWMol *mol, const char *name,
|
|
AtomPDBResidueInfo &info)
|
|
{
|
|
Atom *atom = (Atom*)0;
|
|
|
|
if (name[0]==' ' && name[1]=='C') {
|
|
atom = new Atom(6);
|
|
} else if (name[0]==' ' && name[1]=='N') {
|
|
atom = new Atom(7);
|
|
} else if (name[0]==' ' && name[1]=='O') {
|
|
atom = new Atom(8);
|
|
} else if (name[0]==' ' && name[1]=='S') {
|
|
atom = new Atom(16);
|
|
} else if (name[0]=='S' && name[1]=='E') {
|
|
atom = new Atom(34);
|
|
} else atom = new Atom(0);
|
|
mol->addAtom(atom,true,true);
|
|
AtomPDBResidueInfo *copy = (AtomPDBResidueInfo*)info.copy();
|
|
copy->setName(name);
|
|
atom->setMonomerInfo(copy);
|
|
|
|
unsigned int serno = info.getSerialNumber();
|
|
info.setSerialNumber(serno+1);
|
|
return atom;
|
|
}
|
|
|
|
|
|
static void CreateAABond(RWMol *mol, Atom *beg, Atom *end,
|
|
unsigned int order)
|
|
{
|
|
Bond *bond;
|
|
if (order == 2)
|
|
bond = new Bond(Bond::DOUBLE);
|
|
else bond = new Bond(Bond::SINGLE);
|
|
bond->setOwningMol(mol);
|
|
bond->setBeginAtom(beg);
|
|
bond->setEndAtom(end);
|
|
mol->addBond(bond,true);
|
|
}
|
|
|
|
|
|
static void CreateAABackbone(RWMol *mol, Atom *&r1, Atom *&r2, Atom *&cb,
|
|
AtomPDBResidueInfo &info, int ldstereo)
|
|
{
|
|
r1 = CreateAAAtom(mol," N ",info);
|
|
Atom *ca = CreateAAAtom(mol," CA ",info);
|
|
r2 = CreateAAAtom(mol," C ",info);
|
|
Atom *o = CreateAAAtom(mol," O ",info);
|
|
cb = CreateAAAtom(mol," CB ",info);
|
|
CreateAABond(mol,r1,ca,1);
|
|
CreateAABond(mol,ca,r2,1);
|
|
CreateAABond(mol,r2,o,2);
|
|
CreateAABond(mol,ca,cb,1);
|
|
|
|
if (ldstereo > 0) // L-stereo
|
|
ca->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW);
|
|
else if (ldstereo < 0) // D-stereo
|
|
ca->setChiralTag(Atom::CHI_TETRAHEDRAL_CW);
|
|
}
|
|
|
|
|
|
// aa is a three letter PDB residue code
|
|
static void CreateAminoAcid(RWMol *mol, const char *aa,
|
|
Atom *&r1, Atom *&r2, Atom *&r3,
|
|
AtomPDBResidueInfo &info)
|
|
{
|
|
Atom *atom[10];
|
|
|
|
r1 = (Atom*)0;
|
|
r2 = (Atom*)0;
|
|
r3 = (Atom*)0;
|
|
|
|
int resno = info.getResidueNumber();
|
|
info.setResidueNumber(resno+1);
|
|
info.setIsHeteroAtom(false);
|
|
info.setResidueName(aa);
|
|
|
|
// Standard amino acids before non-standard, in PDB code alphabetical order
|
|
switch (aa[0]) {
|
|
case 'A':
|
|
if (!strcmp(aa,"ALA")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,1);
|
|
} else
|
|
if (!strcmp(aa,"ARG")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," CD ",info);
|
|
atom[3] = CreateAAAtom(mol," NE ",info);
|
|
atom[4] = CreateAAAtom(mol," CZ ",info);
|
|
atom[5] = CreateAAAtom(mol," NH1",info);
|
|
atom[6] = CreateAAAtom(mol," NH2",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],1);
|
|
CreateAABond(mol,atom[2],atom[3],1);
|
|
CreateAABond(mol,atom[3],atom[4],1);
|
|
CreateAABond(mol,atom[4],atom[5],2);
|
|
CreateAABond(mol,atom[4],atom[6],1);
|
|
} else
|
|
if (!strcmp(aa,"ASP")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," OD1",info);
|
|
atom[3] = CreateAAAtom(mol," OD2",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],2);
|
|
CreateAABond(mol,atom[1],atom[3],1);
|
|
} else
|
|
if (!strcmp(aa,"ASN")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," OD1",info);
|
|
atom[3] = CreateAAAtom(mol," ND2",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],2);
|
|
CreateAABond(mol,atom[1],atom[3],1);
|
|
} else
|
|
if (!strcmp(aa,"ABA")) {
|
|
info.setIsHeteroAtom(true);
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
} else
|
|
if (!strcmp(aa,"ACE")) {
|
|
info.setIsHeteroAtom(true);
|
|
r2 = CreateAAAtom(mol," C ",info);
|
|
atom[0] = CreateAAAtom(mol," O ",info);
|
|
atom[1] = CreateAAAtom(mol," CH3",info);
|
|
CreateAABond(mol,r2,atom[1],1);
|
|
CreateAABond(mol,r2,atom[0],2);
|
|
}
|
|
break;
|
|
|
|
case 'C':
|
|
if (!strcmp(aa,"CYS")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,1);
|
|
r3 = CreateAAAtom(mol," SG ",info);
|
|
CreateAABond(mol,atom[0],r3,1);
|
|
}
|
|
break;
|
|
|
|
case 'D':
|
|
if (!strcmp(aa,"DAL")) {
|
|
info.setIsHeteroAtom(true);
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
|
|
} else
|
|
if (!strcmp(aa,"DAR")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," CD ",info);
|
|
atom[3] = CreateAAAtom(mol," NE ",info);
|
|
atom[4] = CreateAAAtom(mol," CZ ",info);
|
|
atom[5] = CreateAAAtom(mol," NH1",info);
|
|
atom[6] = CreateAAAtom(mol," NH2",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],1);
|
|
CreateAABond(mol,atom[2],atom[3],1);
|
|
CreateAABond(mol,atom[3],atom[4],1);
|
|
CreateAABond(mol,atom[4],atom[5],2);
|
|
CreateAABond(mol,atom[4],atom[6],1);
|
|
} else
|
|
if (!strcmp(aa,"DAS")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," OD1",info);
|
|
atom[3] = CreateAAAtom(mol," OD2",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],2);
|
|
CreateAABond(mol,atom[1],atom[3],1);
|
|
} else
|
|
if (!strcmp(aa,"DBB")) {
|
|
info.setIsHeteroAtom(true);
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
} else
|
|
if (!strcmp(aa,"DCY")) {
|
|
info.setIsHeteroAtom(true);
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
|
|
r3 = CreateAAAtom(mol," SG ",info);
|
|
CreateAABond(mol,atom[0],r3,1);
|
|
} else
|
|
if (!strcmp(aa,"DGL")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," CD ",info);
|
|
atom[3] = CreateAAAtom(mol," OE1",info);
|
|
atom[4] = CreateAAAtom(mol," OE2",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],1);
|
|
CreateAABond(mol,atom[2],atom[3],2);
|
|
CreateAABond(mol,atom[2],atom[4],1);
|
|
} else
|
|
if (!strcmp(aa,"DGN")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," CD ",info);
|
|
atom[3] = CreateAAAtom(mol," OE1",info);
|
|
atom[4] = CreateAAAtom(mol," NE2",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],1);
|
|
CreateAABond(mol,atom[2],atom[3],2);
|
|
CreateAABond(mol,atom[2],atom[4],1);
|
|
} else
|
|
if (!strcmp(aa,"DHI")) {
|
|
info.setIsHeteroAtom(true);
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," ND1",info);
|
|
atom[3] = CreateAAAtom(mol," CD2",info);
|
|
atom[4] = CreateAAAtom(mol," CE1",info);
|
|
atom[5] = CreateAAAtom(mol," NE2",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],1);
|
|
CreateAABond(mol,atom[2],atom[4],2);
|
|
CreateAABond(mol,atom[4],atom[5],1);
|
|
CreateAABond(mol,atom[5],atom[3],1);
|
|
CreateAABond(mol,atom[3],atom[1],2);
|
|
} else
|
|
if (!strcmp(aa,"DIL")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
|
|
atom[1] = CreateAAAtom(mol," CG1",info);
|
|
atom[2] = CreateAAAtom(mol," CG2",info);
|
|
atom[3] = CreateAAAtom(mol," CD1",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[0],atom[2],1);
|
|
CreateAABond(mol,atom[2],atom[3],1);
|
|
atom[0]->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW);
|
|
} else
|
|
if (!strcmp(aa,"DLE")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," CD1",info);
|
|
atom[3] = CreateAAAtom(mol," CD2",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],1);
|
|
CreateAABond(mol,atom[1],atom[3],1);
|
|
} else
|
|
if (!strcmp(aa,"DLY")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," CD ",info);
|
|
atom[3] = CreateAAAtom(mol," CE ",info);
|
|
atom[4] = CreateAAAtom(mol," NZ ",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],1);
|
|
CreateAABond(mol,atom[2],atom[3],1);
|
|
CreateAABond(mol,atom[3],atom[4],1);
|
|
} else
|
|
if (!strcmp(aa,"DPN")) {
|
|
info.setIsHeteroAtom(true);
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," CD1",info);
|
|
atom[3] = CreateAAAtom(mol," CD2",info);
|
|
atom[4] = CreateAAAtom(mol," CE1",info);
|
|
atom[5] = CreateAAAtom(mol," CE2",info);
|
|
atom[6] = CreateAAAtom(mol," CZ ",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],2);
|
|
CreateAABond(mol,atom[2],atom[4],1);
|
|
CreateAABond(mol,atom[4],atom[6],2);
|
|
CreateAABond(mol,atom[6],atom[5],1);
|
|
CreateAABond(mol,atom[5],atom[3],2);
|
|
CreateAABond(mol,atom[3],atom[1],1);
|
|
} else
|
|
if (!strcmp(aa,"DPR")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," CD ",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],1);
|
|
CreateAABond(mol,atom[2],r1,1);
|
|
} else
|
|
if (!strcmp(aa,"DSG")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," OD1",info);
|
|
atom[3] = CreateAAAtom(mol," ND2",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],2);
|
|
CreateAABond(mol,atom[1],atom[3],1);
|
|
} else
|
|
if (!strcmp(aa,"DTH")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
|
|
atom[1] = CreateAAAtom(mol," OG1",info);
|
|
atom[2] = CreateAAAtom(mol," CG2",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[0],atom[2],1);
|
|
atom[0]->setChiralTag(Atom::CHI_TETRAHEDRAL_CW);
|
|
} else
|
|
if (!strcmp(aa,"DTR")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," CD1",info);
|
|
atom[3] = CreateAAAtom(mol," CD2",info);
|
|
atom[4] = CreateAAAtom(mol," NE1",info);
|
|
atom[5] = CreateAAAtom(mol," CE2",info);
|
|
atom[6] = CreateAAAtom(mol," CE3",info);
|
|
atom[7] = CreateAAAtom(mol," CZ2",info);
|
|
atom[8] = CreateAAAtom(mol," CZ3",info);
|
|
atom[9] = CreateAAAtom(mol," CH2",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],2);
|
|
CreateAABond(mol,atom[1],atom[3],1);
|
|
CreateAABond(mol,atom[2],atom[4],1);
|
|
CreateAABond(mol,atom[3],atom[5],2);
|
|
CreateAABond(mol,atom[3],atom[6],1);
|
|
CreateAABond(mol,atom[4],atom[5],1);
|
|
CreateAABond(mol,atom[5],atom[7],1);
|
|
CreateAABond(mol,atom[6],atom[8],2);
|
|
CreateAABond(mol,atom[7],atom[9],2);
|
|
CreateAABond(mol,atom[8],atom[9],1);
|
|
} else
|
|
if (!strcmp(aa,"DVA")) {
|
|
info.setIsHeteroAtom(true);
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
|
|
atom[1] = CreateAAAtom(mol," CG1",info);
|
|
atom[2] = CreateAAAtom(mol," CG2",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[0],atom[2],1);
|
|
}
|
|
break;
|
|
|
|
case 'G':
|
|
if (!strcmp(aa,"GLN")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," CD ",info);
|
|
atom[3] = CreateAAAtom(mol," OE1",info);
|
|
atom[4] = CreateAAAtom(mol," NE2",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],1);
|
|
CreateAABond(mol,atom[2],atom[3],2);
|
|
CreateAABond(mol,atom[2],atom[4],1);
|
|
} else
|
|
if (!strcmp(aa,"GLU")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," CD ",info);
|
|
atom[3] = CreateAAAtom(mol," OE1",info);
|
|
atom[4] = CreateAAAtom(mol," OE2",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],1);
|
|
CreateAABond(mol,atom[2],atom[3],2);
|
|
CreateAABond(mol,atom[2],atom[4],1);
|
|
} else
|
|
if (!strcmp(aa,"GLY")) {
|
|
r1 = CreateAAAtom(mol," N ",info);
|
|
atom[0] = CreateAAAtom(mol," CA ",info);
|
|
r2 = CreateAAAtom(mol," C ",info);
|
|
atom[1] = CreateAAAtom(mol," O ",info);
|
|
CreateAABond(mol,r1,atom[0],1);
|
|
CreateAABond(mol,atom[0],r2,1);
|
|
CreateAABond(mol,r2,atom[1],2);
|
|
}
|
|
break;
|
|
|
|
case 'H':
|
|
if (!strcmp(aa,"HIS")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," ND1",info);
|
|
atom[3] = CreateAAAtom(mol," CD2",info);
|
|
atom[4] = CreateAAAtom(mol," CE1",info);
|
|
atom[5] = CreateAAAtom(mol," NE2",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],1);
|
|
CreateAABond(mol,atom[2],atom[4],2);
|
|
CreateAABond(mol,atom[4],atom[5],1);
|
|
CreateAABond(mol,atom[5],atom[3],1);
|
|
CreateAABond(mol,atom[3],atom[1],2);
|
|
}
|
|
break;
|
|
|
|
case 'I':
|
|
if (!strcmp(aa,"ILE")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,1);
|
|
atom[1] = CreateAAAtom(mol," CG1",info);
|
|
atom[2] = CreateAAAtom(mol," CG2",info);
|
|
atom[3] = CreateAAAtom(mol," CD1",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[0],atom[2],1);
|
|
CreateAABond(mol,atom[2],atom[3],1);
|
|
atom[0]->setChiralTag(Atom::CHI_TETRAHEDRAL_CW);
|
|
}
|
|
break;
|
|
|
|
case 'L':
|
|
if (!strcmp(aa,"LEU")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," CD1",info);
|
|
atom[3] = CreateAAAtom(mol," CD2",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],1);
|
|
CreateAABond(mol,atom[1],atom[3],1);
|
|
} else
|
|
if (!strcmp(aa,"LYS")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," CD ",info);
|
|
atom[3] = CreateAAAtom(mol," CE ",info);
|
|
atom[4] = CreateAAAtom(mol," NZ ",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],1);
|
|
CreateAABond(mol,atom[2],atom[3],1);
|
|
CreateAABond(mol,atom[3],atom[4],1);
|
|
}
|
|
break;
|
|
|
|
case 'M':
|
|
if (!strcmp(aa,"MET")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," SD ",info);
|
|
atom[3] = CreateAAAtom(mol," CE ",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],1);
|
|
CreateAABond(mol,atom[2],atom[3],1);
|
|
} else
|
|
if (!strcmp(aa,"MED")) {
|
|
info.setIsHeteroAtom(true);
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," SD ",info);
|
|
atom[3] = CreateAAAtom(mol," CE ",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],1);
|
|
CreateAABond(mol,atom[2],atom[3],1);
|
|
} else
|
|
if (!strcmp(aa,"MSE")) {
|
|
info.setIsHeteroAtom(true);
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol,"SE ",info);
|
|
atom[3] = CreateAAAtom(mol," CE ",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],1);
|
|
CreateAABond(mol,atom[2],atom[3],1);
|
|
}
|
|
break;
|
|
|
|
case 'N':
|
|
if (!strcmp(aa,"NLE")) {
|
|
info.setIsHeteroAtom(true);
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," CD ",info);
|
|
atom[3] = CreateAAAtom(mol," CE ",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],1);
|
|
CreateAABond(mol,atom[2],atom[3],1);
|
|
} else
|
|
if (!strcmp(aa,"NVA")) {
|
|
info.setIsHeteroAtom(true);
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," CD ",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],1);
|
|
}
|
|
break;
|
|
|
|
case 'O':
|
|
if (!strcmp(aa,"ORN")) {
|
|
info.setIsHeteroAtom(true);
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," CD ",info);
|
|
atom[3] = CreateAAAtom(mol," NE ",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],1);
|
|
CreateAABond(mol,atom[2],atom[3],1);
|
|
}
|
|
break;
|
|
|
|
case 'P':
|
|
if (!strcmp(aa,"PHE")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," CD1",info);
|
|
atom[3] = CreateAAAtom(mol," CD2",info);
|
|
atom[4] = CreateAAAtom(mol," CE1",info);
|
|
atom[5] = CreateAAAtom(mol," CE2",info);
|
|
atom[6] = CreateAAAtom(mol," CZ ",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],2);
|
|
CreateAABond(mol,atom[2],atom[4],1);
|
|
CreateAABond(mol,atom[4],atom[6],2);
|
|
CreateAABond(mol,atom[6],atom[5],1);
|
|
CreateAABond(mol,atom[5],atom[3],2);
|
|
CreateAABond(mol,atom[3],atom[1],1);
|
|
} else
|
|
if (!strcmp(aa,"PRO")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," CD ",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],1);
|
|
CreateAABond(mol,atom[2],r1,1);
|
|
} else
|
|
if (!strcmp(aa,"PCA")) {
|
|
info.setIsHeteroAtom(true);
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," CD ",info);
|
|
atom[3] = CreateAAAtom(mol," OE ",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],1);
|
|
CreateAABond(mol,atom[2],r1,1);
|
|
CreateAABond(mol,atom[2],atom[3],2);
|
|
}
|
|
break;
|
|
|
|
case 'S':
|
|
if (!strcmp(aa,"SER")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,1);
|
|
atom[1] = CreateAAAtom(mol," OG ",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
} else
|
|
if (!strcmp(aa,"SAR")) {
|
|
info.setIsHeteroAtom(true);
|
|
r1 = CreateAAAtom(mol," N ",info);
|
|
atom[0] = CreateAAAtom(mol," CA ",info);
|
|
r2 = CreateAAAtom(mol," C ",info);
|
|
atom[1] = CreateAAAtom(mol," O ",info);
|
|
atom[2] = CreateAAAtom(mol," CN ",info);
|
|
CreateAABond(mol,r1,atom[0],1);
|
|
CreateAABond(mol,atom[0],r2,1);
|
|
CreateAABond(mol,r2,atom[1],2);
|
|
CreateAABond(mol,r1,atom[2],1);
|
|
}
|
|
break;
|
|
|
|
case 'T':
|
|
if (!strcmp(aa,"THR")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,1);
|
|
atom[1] = CreateAAAtom(mol," OG1",info);
|
|
atom[2] = CreateAAAtom(mol," CG2",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[0],atom[2],1);
|
|
atom[0]->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW);
|
|
} else
|
|
if (!strcmp(aa,"TRP")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," CD1",info);
|
|
atom[3] = CreateAAAtom(mol," CD2",info);
|
|
atom[4] = CreateAAAtom(mol," NE1",info);
|
|
atom[5] = CreateAAAtom(mol," CE2",info);
|
|
atom[6] = CreateAAAtom(mol," CE3",info);
|
|
atom[7] = CreateAAAtom(mol," CZ2",info);
|
|
atom[8] = CreateAAAtom(mol," CZ3",info);
|
|
atom[9] = CreateAAAtom(mol," CH2",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],2);
|
|
CreateAABond(mol,atom[1],atom[3],1);
|
|
CreateAABond(mol,atom[2],atom[4],1);
|
|
CreateAABond(mol,atom[3],atom[5],2);
|
|
CreateAABond(mol,atom[3],atom[6],1);
|
|
CreateAABond(mol,atom[4],atom[5],1);
|
|
CreateAABond(mol,atom[5],atom[7],1);
|
|
CreateAABond(mol,atom[6],atom[8],2);
|
|
CreateAABond(mol,atom[7],atom[9],2);
|
|
CreateAABond(mol,atom[8],atom[9],1);
|
|
} else
|
|
if (!strcmp(aa,"TYR")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,1);
|
|
atom[1] = CreateAAAtom(mol," CG ",info);
|
|
atom[2] = CreateAAAtom(mol," CD1",info);
|
|
atom[3] = CreateAAAtom(mol," CD2",info);
|
|
atom[4] = CreateAAAtom(mol," CE1",info);
|
|
atom[5] = CreateAAAtom(mol," CE2",info);
|
|
atom[6] = CreateAAAtom(mol," CZ ",info);
|
|
atom[7] = CreateAAAtom(mol," OH ",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[1],atom[2],2);
|
|
CreateAABond(mol,atom[2],atom[4],1);
|
|
CreateAABond(mol,atom[4],atom[6],2);
|
|
CreateAABond(mol,atom[6],atom[5],1);
|
|
CreateAABond(mol,atom[5],atom[3],2);
|
|
CreateAABond(mol,atom[3],atom[1],1);
|
|
CreateAABond(mol,atom[6],atom[7],1);
|
|
}
|
|
break;
|
|
|
|
case 'V':
|
|
if (!strcmp(aa,"VAL")) {
|
|
CreateAABackbone(mol,r1,r2,atom[0],info,1);
|
|
atom[1] = CreateAAAtom(mol," CG1",info);
|
|
atom[2] = CreateAAAtom(mol," CG2",info);
|
|
CreateAABond(mol,atom[0],atom[1],1);
|
|
CreateAABond(mol,atom[0],atom[2],1);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
RWMol *SequenceToMol(const char *seq, bool sanitize, bool lowerD)
|
|
{
|
|
if (!seq)
|
|
return (RWMol*)0;
|
|
|
|
Atom *prev = (Atom*)0;
|
|
AtomPDBResidueInfo info;
|
|
info.setSerialNumber(1);
|
|
info.setAltLoc(" ");
|
|
info.setResidueNumber(0);
|
|
info.setInsertionCode(" ");
|
|
info.setChainId("A");
|
|
|
|
RWMol *mol = new RWMol();
|
|
while (*seq) {
|
|
Atom *r1 = (Atom*)0;
|
|
Atom *r2 = (Atom*)0;
|
|
Atom *r3 = (Atom*)0;
|
|
|
|
switch (*seq) {
|
|
case '\n':
|
|
case '\r':
|
|
case '-':
|
|
seq++;
|
|
continue;
|
|
|
|
case ' ':
|
|
case '\t':
|
|
break;
|
|
|
|
case '.':
|
|
if (prev) {
|
|
Atom *oxt = CreateAAAtom(mol," OXT",info);
|
|
CreateAABond(mol,prev,oxt,1);
|
|
prev = (Atom*)0;
|
|
}
|
|
seq++;
|
|
continue;
|
|
|
|
default:
|
|
delete mol;
|
|
return (RWMol*)0;
|
|
|
|
case 'A':
|
|
CreateAminoAcid(mol,"ALA",r1,r2,r3,info);
|
|
break;
|
|
case 'C':
|
|
CreateAminoAcid(mol,"CYS",r1,r2,r3,info);
|
|
break;
|
|
case 'D':
|
|
CreateAminoAcid(mol,"ASP",r1,r2,r3,info);
|
|
break;
|
|
case 'E':
|
|
CreateAminoAcid(mol,"GLU",r1,r2,r3,info);
|
|
break;
|
|
case 'F':
|
|
CreateAminoAcid(mol,"PHE",r1,r2,r3,info);
|
|
break;
|
|
case 'G':
|
|
case 'g':
|
|
CreateAminoAcid(mol,"GLY",r1,r2,r3,info);
|
|
break;
|
|
case 'H':
|
|
CreateAminoAcid(mol,"HIS",r1,r2,r3,info);
|
|
break;
|
|
case 'I':
|
|
CreateAminoAcid(mol,"ILE",r1,r2,r3,info);
|
|
break;
|
|
case 'K':
|
|
CreateAminoAcid(mol,"LYS",r1,r2,r3,info);
|
|
break;
|
|
case 'L':
|
|
CreateAminoAcid(mol,"LEU",r1,r2,r3,info);
|
|
break;
|
|
case 'M':
|
|
CreateAminoAcid(mol,"MET",r1,r2,r3,info);
|
|
break;
|
|
case 'N':
|
|
CreateAminoAcid(mol,"ASN",r1,r2,r3,info);
|
|
break;
|
|
case 'P':
|
|
CreateAminoAcid(mol,"PRO",r1,r2,r3,info);
|
|
break;
|
|
case 'Q':
|
|
CreateAminoAcid(mol,"GLN",r1,r2,r3,info);
|
|
break;
|
|
case 'R':
|
|
CreateAminoAcid(mol,"ARG",r1,r2,r3,info);
|
|
break;
|
|
case 'S':
|
|
CreateAminoAcid(mol,"SER",r1,r2,r3,info);
|
|
break;
|
|
case 'T':
|
|
CreateAminoAcid(mol,"THR",r1,r2,r3,info);
|
|
break;
|
|
case 'V':
|
|
CreateAminoAcid(mol,"VAL",r1,r2,r3,info);
|
|
break;
|
|
case 'W':
|
|
CreateAminoAcid(mol,"TRP",r1,r2,r3,info);
|
|
break;
|
|
case 'Y':
|
|
CreateAminoAcid(mol,"TYR",r1,r2,r3,info);
|
|
break;
|
|
|
|
case 'a':
|
|
CreateAminoAcid(mol,lowerD?"DAL":"ALA",r1,r2,r3,info);
|
|
break;
|
|
case 'c':
|
|
CreateAminoAcid(mol,lowerD?"DCY":"CYS",r1,r2,r3,info);
|
|
break;
|
|
case 'f':
|
|
CreateAminoAcid(mol,lowerD?"DPN":"PHE",r1,r2,r3,info);
|
|
break;
|
|
case 'h':
|
|
CreateAminoAcid(mol,lowerD?"DHI":"HIS",r1,r2,r3,info);
|
|
break;
|
|
case 'i':
|
|
CreateAminoAcid(mol,lowerD?"DIL":"ILE",r1,r2,r3,info);
|
|
break;
|
|
case 'p':
|
|
CreateAminoAcid(mol,lowerD?"DPR":"PRO",r1,r2,r3,info);
|
|
break;
|
|
case 's':
|
|
CreateAminoAcid(mol,lowerD?"DSN":"SER",r1,r2,r3,info);
|
|
break;
|
|
case 't':
|
|
CreateAminoAcid(mol,lowerD?"DTH":"THR",r1,r2,r3,info);
|
|
break;
|
|
case 'v':
|
|
CreateAminoAcid(mol,lowerD?"DVA":"VAL",r1,r2,r3,info);
|
|
break;
|
|
case 'w':
|
|
CreateAminoAcid(mol,lowerD?"DTR":"TRP",r1,r2,r3,info);
|
|
break;
|
|
case 'y':
|
|
CreateAminoAcid(mol,lowerD?"DTY":"TYR",r1,r2,r3,info);
|
|
break;
|
|
}
|
|
if (prev && r1)
|
|
CreateAABond(mol,prev,r1,1);
|
|
prev = r2;
|
|
seq++;
|
|
}
|
|
|
|
if (prev) {
|
|
Atom *oxt = CreateAAAtom(mol," OXT",info);
|
|
CreateAABond(mol,prev,oxt,1);
|
|
}
|
|
|
|
if (sanitize)
|
|
MolOps::sanitizeMol(*mol);
|
|
return mol;
|
|
}
|
|
|
|
|
|
RWMol *SequenceToMol(const std::string &seq, bool sanitize, bool lowerD)
|
|
{
|
|
return SequenceToMol(seq.c_str(),sanitize,lowerD);
|
|
}
|
|
|
|
|
|
RWMol *FASTAToMol(const char *seq, bool sanitize, bool lowerD)
|
|
{
|
|
if (!seq)
|
|
return (RWMol*)0;
|
|
|
|
std::string title;
|
|
if (seq[0]=='>') {
|
|
seq++;
|
|
while (*seq && *seq!='\n' && *seq!='\r')
|
|
title += *seq++;
|
|
}
|
|
RWMol *mol = SequenceToMol(seq,sanitize,lowerD);
|
|
if (!title.empty())
|
|
mol->setProp(common_properties::_Name,title);
|
|
return mol;
|
|
}
|
|
|
|
|
|
RWMol *FASTAToMol(const std::string &seq, bool sanitize, bool lowerD)
|
|
{
|
|
return FASTAToMol(seq.c_str(),sanitize,lowerD);
|
|
}
|
|
|
|
|
|
struct HELMMonomer {
|
|
Atom *r1;
|
|
Atom *r2;
|
|
Atom *r3;
|
|
Atom *oxt;
|
|
|
|
HELMMonomer() : r1(0), r2(0), r3(0), oxt(0) {}
|
|
HELMMonomer(Atom *x, Atom *y, Atom *z) : r1(x), r2(y), r3(z), oxt(0) {}
|
|
};
|
|
|
|
|
|
static const char *GetHELMOneLetterCode(char ch)
|
|
{
|
|
switch (ch) {
|
|
case 'A':
|
|
return "ALA";
|
|
case 'C':
|
|
return "CYS";
|
|
case 'D':
|
|
return "ASP";
|
|
case 'E':
|
|
return "GLU";
|
|
case 'F':
|
|
return "PHE";
|
|
case 'G':
|
|
return "GLY";
|
|
case 'H':
|
|
return "HIS";
|
|
case 'I':
|
|
return "ILE";
|
|
case 'K':
|
|
return "LYS";
|
|
case 'L':
|
|
return "LEU";
|
|
case 'M':
|
|
return "MET";
|
|
case 'N':
|
|
return "ASN";
|
|
case 'P':
|
|
return "PRO";
|
|
case 'Q':
|
|
return "GLN";
|
|
case 'R':
|
|
return "ARG";
|
|
case 'S':
|
|
return "SER";
|
|
case 'T':
|
|
return "THR";
|
|
case 'V':
|
|
return "VAL";
|
|
case 'W':
|
|
return "TRP";
|
|
case 'Y':
|
|
return "TYR";
|
|
}
|
|
return (char*)0;
|
|
}
|
|
|
|
|
|
static bool IsHELMMonomerIDChar(char ch)
|
|
{
|
|
if (ch>='A' && ch<='Z')
|
|
return true;
|
|
if (ch>='a' && ch<='z')
|
|
return true;
|
|
if (ch>='0' && ch<='9')
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
|
|
static const char *LookupHELMPeptideMonomer(const char *ptr)
|
|
{
|
|
switch (ptr[0]) {
|
|
case 'A':
|
|
if (ptr[1]=='\0')
|
|
return "ALA";
|
|
if (ptr[1]=='b' && ptr[2]=='u' && ptr[3]=='\0')
|
|
return "ABA";
|
|
break;
|
|
case 'C':
|
|
if (ptr[1]=='\0')
|
|
return "CYS";
|
|
break;
|
|
case 'D':
|
|
if (ptr[1]=='\0')
|
|
return "ASP";
|
|
case 'E':
|
|
if (ptr[1]=='\0')
|
|
return "L-Glu";
|
|
break;
|
|
case 'F':
|
|
if (ptr[1]=='\0')
|
|
return "L-Phe";
|
|
break;
|
|
case 'G':
|
|
if (ptr[1]=='\0')
|
|
return "GLY";
|
|
if (ptr[1]=='l' && ptr[2]=='p' && ptr[3]=='\0')
|
|
return "PCA";
|
|
break;
|
|
case 'H':
|
|
if (ptr[1]=='\0')
|
|
return "HIS";
|
|
break;
|
|
case 'I':
|
|
if (ptr[1]=='\0')
|
|
return "ILE";
|
|
break;
|
|
case 'K':
|
|
if (ptr[1]=='\0')
|
|
return "LYS";
|
|
break;
|
|
case 'L':
|
|
if (ptr[1]=='\0')
|
|
return "LEU";
|
|
break;
|
|
case 'M':
|
|
if (ptr[1]=='\0')
|
|
return "MET";
|
|
break;
|
|
case 'N':
|
|
if (ptr[1]=='\0')
|
|
return "ASN";
|
|
if (ptr[1]=='a' && ptr[2]=='l' && ptr[3]=='\0')
|
|
return "NAL";
|
|
if (ptr[1]=='l' && ptr[2]=='e' && ptr[3]=='\0')
|
|
return "NLE";
|
|
if (ptr[1]=='v' && ptr[2]=='a' && ptr[3]=='\0')
|
|
return "NVA";
|
|
break;
|
|
case 'O':
|
|
if (ptr[1]=='r' && ptr[2]=='n' && ptr[3]=='\0')
|
|
return "ORN";
|
|
break;
|
|
case 'P':
|
|
if (ptr[1]=='\0')
|
|
return "PRO";
|
|
break;
|
|
case 'Q':
|
|
if (ptr[1]=='\0')
|
|
return "GLN";
|
|
break;
|
|
case 'R':
|
|
if (ptr[1]=='\0')
|
|
return "ARG";
|
|
break;
|
|
case 'S':
|
|
if (ptr[1]=='\0')
|
|
return "SER";
|
|
if (ptr[1]=='a' && ptr[2]=='r' && ptr[3]=='\0')
|
|
return "SAR";
|
|
break;
|
|
case 'T':
|
|
if (ptr[1]=='\0')
|
|
return "THR";
|
|
break;
|
|
case 'V':
|
|
if (ptr[1]=='\0')
|
|
return "VAL";
|
|
break;
|
|
case 'W':
|
|
if (ptr[1]=='\0')
|
|
return "TRP";
|
|
break;
|
|
case 'Y':
|
|
if (ptr[1]=='\0')
|
|
return "TYR";
|
|
break;
|
|
case 'd':
|
|
switch (ptr[1]) {
|
|
case 'A':
|
|
if (ptr[2]=='\0')
|
|
return "DAL";
|
|
break;
|
|
case 'C':
|
|
if (ptr[2]=='\0')
|
|
return "DCY";
|
|
break;
|
|
case 'D':
|
|
if (ptr[2]=='\0')
|
|
return "DAS";
|
|
break;
|
|
case 'E':
|
|
if (ptr[2]=='\0')
|
|
return "DGL";
|
|
break;
|
|
case 'F':
|
|
if (ptr[2]=='\0')
|
|
return "DPN";
|
|
break;
|
|
case 'H':
|
|
if (ptr[2]=='\0')
|
|
return "DHI";
|
|
break;
|
|
case 'I':
|
|
if (ptr[2]=='\0')
|
|
return "DIL";
|
|
break;
|
|
case 'K':
|
|
if (ptr[2]=='\0')
|
|
return "DLY";
|
|
break;
|
|
case 'L':
|
|
if (ptr[2]=='\0')
|
|
return "DLE";
|
|
break;
|
|
case 'M':
|
|
if (ptr[2]=='\0')
|
|
return "MED";
|
|
break;
|
|
case 'N':
|
|
if (ptr[2]=='\0')
|
|
return "DSG";
|
|
break;
|
|
case 'P':
|
|
if (ptr[2]=='\0')
|
|
return "DPR";
|
|
break;
|
|
case 'Q':
|
|
if (ptr[2]=='\0')
|
|
return "DGN";
|
|
break;
|
|
case 'R':
|
|
if (ptr[2]=='\0')
|
|
return "DAR";
|
|
break;
|
|
case 'S':
|
|
if (ptr[2]=='\0')
|
|
return "DSN";
|
|
break;
|
|
case 'T':
|
|
if (ptr[2]=='\0')
|
|
return "DTH";
|
|
break;
|
|
case 'V':
|
|
if (ptr[2]=='\0')
|
|
return "DVA";
|
|
break;
|
|
case 'W':
|
|
if (ptr[2]=='\0')
|
|
return "DTR";
|
|
break;
|
|
case 'Y':
|
|
if (ptr[2]=='\0')
|
|
return "DTY";
|
|
break;
|
|
}
|
|
break;
|
|
case 's':
|
|
if (ptr[1]=='e' && ptr[2]=='C' && ptr[3]=='\0')
|
|
return "MSE";
|
|
break;
|
|
}
|
|
return (const char*)0;
|
|
}
|
|
|
|
|
|
static const char *ParseHELMPeptide(RWMol *mol,
|
|
const char *ptr,
|
|
const char *chain,
|
|
std::vector<HELMMonomer> &vseq)
|
|
{
|
|
unsigned int len = 0;
|
|
HELMMonomer curr;
|
|
|
|
vseq.clear();
|
|
if (ptr[0]=='}')
|
|
return ptr;
|
|
|
|
AtomPDBResidueInfo info;
|
|
info.setSerialNumber(1);
|
|
info.setAltLoc(" ");
|
|
info.setResidueNumber(0);
|
|
info.setInsertionCode(" ");
|
|
info.setChainId(chain);
|
|
|
|
if (ptr[0]=='[' && ptr[1]=='a' && ptr[2]=='c' && ptr[3]==']') {
|
|
if (ptr[4] != '.')
|
|
return (const char*)0;
|
|
info.setResidueNumber(-2);
|
|
CreateAminoAcid(mol,"ACE",curr.r1,curr.r2,curr.r3,info);
|
|
vseq.push_back(curr);
|
|
info.setResidueNumber(0);
|
|
ptr += 5;
|
|
len = 1;
|
|
}
|
|
|
|
for (;;) {
|
|
const char *name = 0;
|
|
if (*ptr=='[') {
|
|
std::string tmp;
|
|
ptr++;
|
|
while (IsHELMMonomerIDChar(*ptr))
|
|
tmp += *ptr++;
|
|
if (*ptr != ']')
|
|
return (char*)0;
|
|
name = LookupHELMPeptideMonomer(tmp.c_str());
|
|
} else name = GetHELMOneLetterCode(*ptr);
|
|
if (!name) return (const char*)0;
|
|
ptr++;
|
|
|
|
CreateAminoAcid(mol,name,curr.r1,curr.r2,curr.r3,info);
|
|
if (len && vseq[len-1].r2 && curr.r1) {
|
|
CreateAABond(mol,vseq[len-1].r2,curr.r1,1);
|
|
vseq[len-1].r2 = 0;
|
|
}
|
|
vseq.push_back(curr);
|
|
len++;
|
|
|
|
if (*ptr=='.') {
|
|
if (ptr[1]=='[' && ptr[2]=='a' && ptr[3]=='m' &&
|
|
ptr[4]==']' && ptr[5]=='}') {
|
|
if (!vseq[len-1].r2)
|
|
return (const char*)0;
|
|
int resno = info.getResidueNumber();
|
|
info.setResidueNumber(resno+1);
|
|
info.setIsHeteroAtom(true);
|
|
info.setResidueName("NH2");
|
|
Atom *n = CreateAAAtom(mol," N ",info);
|
|
CreateAABond(mol,vseq[len-1].r2,n,1);
|
|
vseq[len-1].r2 = (Atom*)0;
|
|
vseq.push_back(HELMMonomer());
|
|
len++;
|
|
return ptr+5;
|
|
}
|
|
ptr++;
|
|
} else if (*ptr=='}') {
|
|
if (!vseq[len-1].r2)
|
|
return (const char*)0;
|
|
Atom *oxt = CreateAAAtom(mol," OXT",info);
|
|
CreateAABond(mol,vseq[len-1].r2,oxt,1);
|
|
vseq[len-1].oxt = oxt;
|
|
return ptr;
|
|
} else return (const char*)0;
|
|
}
|
|
}
|
|
|
|
|
|
static bool ParseHELM(RWMol *mol, const char *ptr)
|
|
{
|
|
std::map<std::string,std::vector<HELMMonomer> >seqs;
|
|
const char *orig;
|
|
char chain[2];
|
|
chain[0] = 'A';
|
|
chain[1] = '\0';
|
|
|
|
for (;;) {
|
|
orig = ptr;
|
|
if (ptr[0]=='P' && ptr[1]=='E' && ptr[2]=='P' &&
|
|
ptr[3]=='T' && ptr[4]=='I' && ptr[5]=='D' &&
|
|
ptr[6]=='E' && ptr[7]>='1' && ptr[7]<='9') {
|
|
ptr += 8;
|
|
while (*ptr>='0' && *ptr<='9')
|
|
ptr++;
|
|
if (*ptr != '{')
|
|
return false;
|
|
std::string id(orig,ptr-orig);
|
|
chain[0] = 'A' + (orig[7]-'1');
|
|
ptr = ParseHELMPeptide(mol,ptr+1,chain,seqs[id]);
|
|
if (!ptr || *ptr != '}')
|
|
return false;
|
|
ptr++;
|
|
} else return false;
|
|
|
|
if (*ptr == '$')
|
|
break;
|
|
if (*ptr == '\0')
|
|
return true;
|
|
if (*ptr != '|')
|
|
return false;
|
|
ptr++;
|
|
}
|
|
ptr++;
|
|
|
|
if (ptr[0]=='$' && ptr[1]=='$' && ptr[2]=='$')
|
|
return true;
|
|
|
|
for (;;) {
|
|
orig = ptr;
|
|
if (ptr[0]=='P' && ptr[1]=='E' && ptr[2]=='P' &&
|
|
ptr[3]=='T' && ptr[4]=='I' && ptr[5]=='D' &&
|
|
ptr[6]=='E' && ptr[7]>='1' && ptr[7]<='9') {
|
|
ptr += 8;
|
|
} else return false;
|
|
while (*ptr>='0' && *ptr<='9')
|
|
ptr++;
|
|
if (*ptr != ',')
|
|
return false;
|
|
|
|
std::string id1(orig,ptr-orig);
|
|
ptr++;
|
|
|
|
orig = ptr;
|
|
if (ptr[0]=='P' && ptr[1]=='E' && ptr[2]=='P' &&
|
|
ptr[3]=='T' && ptr[4]=='I' && ptr[5]=='D' &&
|
|
ptr[6]=='E' && ptr[7]>='1' && ptr[7]<='9') {
|
|
ptr += 8;
|
|
} else return false;
|
|
while (*ptr>='0' && *ptr<='9')
|
|
ptr++;
|
|
if (*ptr != ',')
|
|
return false;
|
|
|
|
std::string id2(orig,ptr-orig);
|
|
ptr++;
|
|
|
|
unsigned int res1;
|
|
unsigned int res2;
|
|
unsigned int res1r;
|
|
unsigned int res2r;
|
|
|
|
if (*ptr>='1' && *ptr<='9') {
|
|
res1 = (*ptr++)-'0';
|
|
while (*ptr>='0' && *ptr<='9')
|
|
res1 = 10*res1 + ((*ptr++)-'0');
|
|
} else return false;
|
|
if (ptr[0]==':' && ptr[1]=='R' && ptr[2]>='1' && ptr[2]<='9') {
|
|
res1r = ptr[2]-'0';
|
|
ptr += 3;
|
|
} else return false;
|
|
if (*ptr != '-')
|
|
return false;
|
|
ptr++;
|
|
|
|
if (*ptr>='1' && *ptr<='9') {
|
|
res2 = (*ptr++)-'0';
|
|
while (*ptr>='0' && *ptr<='9')
|
|
res2 = 10*res2 + ((*ptr++)-'0');
|
|
} else return false;
|
|
if (ptr[0]==':' && ptr[1]=='R' && ptr[2]>='1' && ptr[2]<='9') {
|
|
res2r = ptr[2]-'0';
|
|
ptr += 3;
|
|
} else return false;
|
|
|
|
// printf("%s:%u:R%u - %s:%u:R%u\n",id1.c_str(),res1,res1r,
|
|
// id2.c_str(),res2,res2r);
|
|
|
|
if (res1<1 || res2<1)
|
|
return false;
|
|
if (seqs.find(id1)==seqs.end() || seqs.find(id2)==seqs.end())
|
|
return false;
|
|
std::vector<HELMMonomer> *vseq1 = &seqs[id1];
|
|
if (res1 > (unsigned int)vseq1->size())
|
|
return false;
|
|
std::vector<HELMMonomer> *vseq2 = &seqs[id2];
|
|
if (res2 > (unsigned int)vseq2->size())
|
|
return false;
|
|
|
|
if (res1r == 3 && res2r == 3) {
|
|
Atom *src = (*vseq1)[res1-1].r3;
|
|
Atom *dst = (*vseq2)[res2-1].r3;
|
|
if (src && dst && src != dst) {
|
|
CreateAABond(mol,src,dst,1);
|
|
(*vseq1)[res1-1].r3 = (Atom*)0;
|
|
(*vseq2)[res2-1].r3 = (Atom*)0;
|
|
} else return false;
|
|
} else if (res1r == 1 && res2r == 2) {
|
|
Atom *src = (*vseq1)[res1-1].r1;
|
|
Atom *dst = (*vseq2)[res2-1].r2;
|
|
Atom *oxt = (*vseq2)[res2-1].oxt;
|
|
if (src && dst && oxt && src != dst) {
|
|
mol->removeAtom(oxt);
|
|
CreateAABond(mol,src,dst,1);
|
|
(*vseq1)[res1-1].r1 = (Atom*)0;
|
|
(*vseq2)[res2-1].r2 = (Atom*)0;
|
|
} else return false;
|
|
} else if (res1r == 2 && res2r == 1) {
|
|
Atom *src = (*vseq2)[res2-1].r1;
|
|
Atom *dst = (*vseq1)[res1-1].r2;
|
|
Atom *oxt = (*vseq1)[res1-1].oxt;
|
|
if (src && dst && oxt && src != dst) {
|
|
mol->removeAtom(oxt);
|
|
CreateAABond(mol,dst,src,1);
|
|
(*vseq1)[res1-1].r2 = (Atom*)0;
|
|
(*vseq2)[res2-1].r1 = (Atom*)0;
|
|
} else return false;
|
|
} else return false;
|
|
|
|
if (*ptr == '$')
|
|
break;
|
|
if (*ptr != '|')
|
|
return false;
|
|
ptr++;
|
|
}
|
|
ptr++;
|
|
return ptr[0]=='$' && ptr[1]=='$';
|
|
}
|
|
|
|
|
|
RWMol *HELMToMol(const char *helm, bool sanitize)
|
|
{
|
|
RWMol *mol = new RWMol();
|
|
|
|
const char *ptr = helm;
|
|
if (ptr[0]=='$' && ptr[1]=='$' && ptr[2]=='$' && ptr[3]=='$')
|
|
return mol;
|
|
|
|
if (ParseHELM(mol,ptr)) {
|
|
if (sanitize)
|
|
MolOps::sanitizeMol(*mol);
|
|
return mol;
|
|
}
|
|
delete mol;
|
|
return (RWMol*)0;
|
|
}
|
|
|
|
|
|
RWMol *HELMToMol(const std::string &helm, bool sanitize)
|
|
{
|
|
return HELMToMol(helm.c_str(),sanitize);
|
|
}
|
|
|
|
} // namespace RDKit
|
|
|