Files
rdkit/Code/GraphMol/FileParsers/SequenceParsers.cpp
2015-09-10 05:58:08 +02:00

1324 lines
37 KiB
C++

//
// Copyright (C) 2015 Greg Landrum and NextMove Software
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include <string.h>
#include <string>
#include <vector>
#include <map>
#include <GraphMol/GraphMol.h>
#include <GraphMol/MolOps.h>
#include <GraphMol/MonomerInfo.h>
namespace RDKit {
static Atom *CreateAAAtom(RWMol *mol, const char *name,
AtomPDBResidueInfo &info)
{
Atom *atom = (Atom*)0;
if (name[0]==' ' && name[1]=='C') {
atom = new Atom(6);
} else if (name[0]==' ' && name[1]=='N') {
atom = new Atom(7);
} else if (name[0]==' ' && name[1]=='O') {
atom = new Atom(8);
} else if (name[0]==' ' && name[1]=='S') {
atom = new Atom(16);
} else if (name[0]=='S' && name[1]=='E') {
atom = new Atom(34);
} else atom = new Atom(0);
mol->addAtom(atom,true,true);
AtomPDBResidueInfo *copy = (AtomPDBResidueInfo*)info.copy();
copy->setName(name);
atom->setMonomerInfo(copy);
unsigned int serno = info.getSerialNumber();
info.setSerialNumber(serno+1);
return atom;
}
static void CreateAABond(RWMol *mol, Atom *beg, Atom *end,
unsigned int order)
{
Bond *bond;
if (order == 2)
bond = new Bond(Bond::DOUBLE);
else bond = new Bond(Bond::SINGLE);
bond->setOwningMol(mol);
bond->setBeginAtom(beg);
bond->setEndAtom(end);
mol->addBond(bond,true);
}
static void CreateAABackbone(RWMol *mol, Atom *&r1, Atom *&r2, Atom *&cb,
AtomPDBResidueInfo &info, int ldstereo)
{
r1 = CreateAAAtom(mol," N ",info);
Atom *ca = CreateAAAtom(mol," CA ",info);
r2 = CreateAAAtom(mol," C ",info);
Atom *o = CreateAAAtom(mol," O ",info);
cb = CreateAAAtom(mol," CB ",info);
CreateAABond(mol,r1,ca,1);
CreateAABond(mol,ca,r2,1);
CreateAABond(mol,r2,o,2);
CreateAABond(mol,ca,cb,1);
if (ldstereo > 0) // L-stereo
ca->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW);
else if (ldstereo < 0) // D-stereo
ca->setChiralTag(Atom::CHI_TETRAHEDRAL_CW);
}
// aa is a three letter PDB residue code
static void CreateAminoAcid(RWMol *mol, const char *aa,
Atom *&r1, Atom *&r2, Atom *&r3,
AtomPDBResidueInfo &info)
{
Atom *atom[10];
r1 = (Atom*)0;
r2 = (Atom*)0;
r3 = (Atom*)0;
int resno = info.getResidueNumber();
info.setResidueNumber(resno+1);
info.setIsHeteroAtom(false);
info.setResidueName(aa);
// Standard amino acids before non-standard, in PDB code alphabetical order
switch (aa[0]) {
case 'A':
if (!strcmp(aa,"ALA")) {
CreateAABackbone(mol,r1,r2,atom[0],info,1);
} else
if (!strcmp(aa,"ARG")) {
CreateAABackbone(mol,r1,r2,atom[0],info,1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," CD ",info);
atom[3] = CreateAAAtom(mol," NE ",info);
atom[4] = CreateAAAtom(mol," CZ ",info);
atom[5] = CreateAAAtom(mol," NH1",info);
atom[6] = CreateAAAtom(mol," NH2",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],1);
CreateAABond(mol,atom[2],atom[3],1);
CreateAABond(mol,atom[3],atom[4],1);
CreateAABond(mol,atom[4],atom[5],2);
CreateAABond(mol,atom[4],atom[6],1);
} else
if (!strcmp(aa,"ASP")) {
CreateAABackbone(mol,r1,r2,atom[0],info,1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," OD1",info);
atom[3] = CreateAAAtom(mol," OD2",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],2);
CreateAABond(mol,atom[1],atom[3],1);
} else
if (!strcmp(aa,"ASN")) {
CreateAABackbone(mol,r1,r2,atom[0],info,1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," OD1",info);
atom[3] = CreateAAAtom(mol," ND2",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],2);
CreateAABond(mol,atom[1],atom[3],1);
} else
if (!strcmp(aa,"ABA")) {
info.setIsHeteroAtom(true);
CreateAABackbone(mol,r1,r2,atom[0],info,1);
atom[1] = CreateAAAtom(mol," CG ",info);
CreateAABond(mol,atom[0],atom[1],1);
} else
if (!strcmp(aa,"ACE")) {
info.setIsHeteroAtom(true);
r2 = CreateAAAtom(mol," C ",info);
atom[0] = CreateAAAtom(mol," O ",info);
atom[1] = CreateAAAtom(mol," CH3",info);
CreateAABond(mol,r2,atom[1],1);
CreateAABond(mol,r2,atom[0],2);
}
break;
case 'C':
if (!strcmp(aa,"CYS")) {
CreateAABackbone(mol,r1,r2,atom[0],info,1);
r3 = CreateAAAtom(mol," SG ",info);
CreateAABond(mol,atom[0],r3,1);
}
break;
case 'D':
if (!strcmp(aa,"DAL")) {
info.setIsHeteroAtom(true);
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
} else
if (!strcmp(aa,"DAR")) {
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," CD ",info);
atom[3] = CreateAAAtom(mol," NE ",info);
atom[4] = CreateAAAtom(mol," CZ ",info);
atom[5] = CreateAAAtom(mol," NH1",info);
atom[6] = CreateAAAtom(mol," NH2",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],1);
CreateAABond(mol,atom[2],atom[3],1);
CreateAABond(mol,atom[3],atom[4],1);
CreateAABond(mol,atom[4],atom[5],2);
CreateAABond(mol,atom[4],atom[6],1);
} else
if (!strcmp(aa,"DAS")) {
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," OD1",info);
atom[3] = CreateAAAtom(mol," OD2",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],2);
CreateAABond(mol,atom[1],atom[3],1);
} else
if (!strcmp(aa,"DBB")) {
info.setIsHeteroAtom(true);
CreateAABackbone(mol,r1,r2,atom[0],info,1);
atom[1] = CreateAAAtom(mol," CG ",info);
CreateAABond(mol,atom[0],atom[1],1);
} else
if (!strcmp(aa,"DCY")) {
info.setIsHeteroAtom(true);
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
r3 = CreateAAAtom(mol," SG ",info);
CreateAABond(mol,atom[0],r3,1);
} else
if (!strcmp(aa,"DGL")) {
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," CD ",info);
atom[3] = CreateAAAtom(mol," OE1",info);
atom[4] = CreateAAAtom(mol," OE2",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],1);
CreateAABond(mol,atom[2],atom[3],2);
CreateAABond(mol,atom[2],atom[4],1);
} else
if (!strcmp(aa,"DGN")) {
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," CD ",info);
atom[3] = CreateAAAtom(mol," OE1",info);
atom[4] = CreateAAAtom(mol," NE2",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],1);
CreateAABond(mol,atom[2],atom[3],2);
CreateAABond(mol,atom[2],atom[4],1);
} else
if (!strcmp(aa,"DHI")) {
info.setIsHeteroAtom(true);
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," ND1",info);
atom[3] = CreateAAAtom(mol," CD2",info);
atom[4] = CreateAAAtom(mol," CE1",info);
atom[5] = CreateAAAtom(mol," NE2",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],1);
CreateAABond(mol,atom[2],atom[4],2);
CreateAABond(mol,atom[4],atom[5],1);
CreateAABond(mol,atom[5],atom[3],1);
CreateAABond(mol,atom[3],atom[1],2);
} else
if (!strcmp(aa,"DIL")) {
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
atom[1] = CreateAAAtom(mol," CG1",info);
atom[2] = CreateAAAtom(mol," CG2",info);
atom[3] = CreateAAAtom(mol," CD1",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[0],atom[2],1);
CreateAABond(mol,atom[2],atom[3],1);
atom[0]->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW);
} else
if (!strcmp(aa,"DLE")) {
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," CD1",info);
atom[3] = CreateAAAtom(mol," CD2",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],1);
CreateAABond(mol,atom[1],atom[3],1);
} else
if (!strcmp(aa,"DLY")) {
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," CD ",info);
atom[3] = CreateAAAtom(mol," CE ",info);
atom[4] = CreateAAAtom(mol," NZ ",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],1);
CreateAABond(mol,atom[2],atom[3],1);
CreateAABond(mol,atom[3],atom[4],1);
} else
if (!strcmp(aa,"DPN")) {
info.setIsHeteroAtom(true);
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," CD1",info);
atom[3] = CreateAAAtom(mol," CD2",info);
atom[4] = CreateAAAtom(mol," CE1",info);
atom[5] = CreateAAAtom(mol," CE2",info);
atom[6] = CreateAAAtom(mol," CZ ",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],2);
CreateAABond(mol,atom[2],atom[4],1);
CreateAABond(mol,atom[4],atom[6],2);
CreateAABond(mol,atom[6],atom[5],1);
CreateAABond(mol,atom[5],atom[3],2);
CreateAABond(mol,atom[3],atom[1],1);
} else
if (!strcmp(aa,"DPR")) {
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," CD ",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],1);
CreateAABond(mol,atom[2],r1,1);
} else
if (!strcmp(aa,"DSG")) {
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," OD1",info);
atom[3] = CreateAAAtom(mol," ND2",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],2);
CreateAABond(mol,atom[1],atom[3],1);
} else
if (!strcmp(aa,"DTH")) {
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
atom[1] = CreateAAAtom(mol," OG1",info);
atom[2] = CreateAAAtom(mol," CG2",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[0],atom[2],1);
atom[0]->setChiralTag(Atom::CHI_TETRAHEDRAL_CW);
} else
if (!strcmp(aa,"DTR")) {
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," CD1",info);
atom[3] = CreateAAAtom(mol," CD2",info);
atom[4] = CreateAAAtom(mol," NE1",info);
atom[5] = CreateAAAtom(mol," CE2",info);
atom[6] = CreateAAAtom(mol," CE3",info);
atom[7] = CreateAAAtom(mol," CZ2",info);
atom[8] = CreateAAAtom(mol," CZ3",info);
atom[9] = CreateAAAtom(mol," CH2",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],2);
CreateAABond(mol,atom[1],atom[3],1);
CreateAABond(mol,atom[2],atom[4],1);
CreateAABond(mol,atom[3],atom[5],2);
CreateAABond(mol,atom[3],atom[6],1);
CreateAABond(mol,atom[4],atom[5],1);
CreateAABond(mol,atom[5],atom[7],1);
CreateAABond(mol,atom[6],atom[8],2);
CreateAABond(mol,atom[7],atom[9],2);
CreateAABond(mol,atom[8],atom[9],1);
} else
if (!strcmp(aa,"DVA")) {
info.setIsHeteroAtom(true);
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
atom[1] = CreateAAAtom(mol," CG1",info);
atom[2] = CreateAAAtom(mol," CG2",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[0],atom[2],1);
}
break;
case 'G':
if (!strcmp(aa,"GLN")) {
CreateAABackbone(mol,r1,r2,atom[0],info,1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," CD ",info);
atom[3] = CreateAAAtom(mol," OE1",info);
atom[4] = CreateAAAtom(mol," NE2",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],1);
CreateAABond(mol,atom[2],atom[3],2);
CreateAABond(mol,atom[2],atom[4],1);
} else
if (!strcmp(aa,"GLU")) {
CreateAABackbone(mol,r1,r2,atom[0],info,1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," CD ",info);
atom[3] = CreateAAAtom(mol," OE1",info);
atom[4] = CreateAAAtom(mol," OE2",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],1);
CreateAABond(mol,atom[2],atom[3],2);
CreateAABond(mol,atom[2],atom[4],1);
} else
if (!strcmp(aa,"GLY")) {
r1 = CreateAAAtom(mol," N ",info);
atom[0] = CreateAAAtom(mol," CA ",info);
r2 = CreateAAAtom(mol," C ",info);
atom[1] = CreateAAAtom(mol," O ",info);
CreateAABond(mol,r1,atom[0],1);
CreateAABond(mol,atom[0],r2,1);
CreateAABond(mol,r2,atom[1],2);
}
break;
case 'H':
if (!strcmp(aa,"HIS")) {
CreateAABackbone(mol,r1,r2,atom[0],info,1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," ND1",info);
atom[3] = CreateAAAtom(mol," CD2",info);
atom[4] = CreateAAAtom(mol," CE1",info);
atom[5] = CreateAAAtom(mol," NE2",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],1);
CreateAABond(mol,atom[2],atom[4],2);
CreateAABond(mol,atom[4],atom[5],1);
CreateAABond(mol,atom[5],atom[3],1);
CreateAABond(mol,atom[3],atom[1],2);
}
break;
case 'I':
if (!strcmp(aa,"ILE")) {
CreateAABackbone(mol,r1,r2,atom[0],info,1);
atom[1] = CreateAAAtom(mol," CG1",info);
atom[2] = CreateAAAtom(mol," CG2",info);
atom[3] = CreateAAAtom(mol," CD1",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[0],atom[2],1);
CreateAABond(mol,atom[2],atom[3],1);
atom[0]->setChiralTag(Atom::CHI_TETRAHEDRAL_CW);
}
break;
case 'L':
if (!strcmp(aa,"LEU")) {
CreateAABackbone(mol,r1,r2,atom[0],info,1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," CD1",info);
atom[3] = CreateAAAtom(mol," CD2",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],1);
CreateAABond(mol,atom[1],atom[3],1);
} else
if (!strcmp(aa,"LYS")) {
CreateAABackbone(mol,r1,r2,atom[0],info,1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," CD ",info);
atom[3] = CreateAAAtom(mol," CE ",info);
atom[4] = CreateAAAtom(mol," NZ ",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],1);
CreateAABond(mol,atom[2],atom[3],1);
CreateAABond(mol,atom[3],atom[4],1);
}
break;
case 'M':
if (!strcmp(aa,"MET")) {
CreateAABackbone(mol,r1,r2,atom[0],info,1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," SD ",info);
atom[3] = CreateAAAtom(mol," CE ",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],1);
CreateAABond(mol,atom[2],atom[3],1);
} else
if (!strcmp(aa,"MED")) {
info.setIsHeteroAtom(true);
CreateAABackbone(mol,r1,r2,atom[0],info,-1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," SD ",info);
atom[3] = CreateAAAtom(mol," CE ",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],1);
CreateAABond(mol,atom[2],atom[3],1);
} else
if (!strcmp(aa,"MSE")) {
info.setIsHeteroAtom(true);
CreateAABackbone(mol,r1,r2,atom[0],info,1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol,"SE ",info);
atom[3] = CreateAAAtom(mol," CE ",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],1);
CreateAABond(mol,atom[2],atom[3],1);
}
break;
case 'N':
if (!strcmp(aa,"NLE")) {
info.setIsHeteroAtom(true);
CreateAABackbone(mol,r1,r2,atom[0],info,1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," CD ",info);
atom[3] = CreateAAAtom(mol," CE ",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],1);
CreateAABond(mol,atom[2],atom[3],1);
} else
if (!strcmp(aa,"NVA")) {
info.setIsHeteroAtom(true);
CreateAABackbone(mol,r1,r2,atom[0],info,1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," CD ",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],1);
}
break;
case 'O':
if (!strcmp(aa,"ORN")) {
info.setIsHeteroAtom(true);
CreateAABackbone(mol,r1,r2,atom[0],info,1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," CD ",info);
atom[3] = CreateAAAtom(mol," NE ",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],1);
CreateAABond(mol,atom[2],atom[3],1);
}
break;
case 'P':
if (!strcmp(aa,"PHE")) {
CreateAABackbone(mol,r1,r2,atom[0],info,1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," CD1",info);
atom[3] = CreateAAAtom(mol," CD2",info);
atom[4] = CreateAAAtom(mol," CE1",info);
atom[5] = CreateAAAtom(mol," CE2",info);
atom[6] = CreateAAAtom(mol," CZ ",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],2);
CreateAABond(mol,atom[2],atom[4],1);
CreateAABond(mol,atom[4],atom[6],2);
CreateAABond(mol,atom[6],atom[5],1);
CreateAABond(mol,atom[5],atom[3],2);
CreateAABond(mol,atom[3],atom[1],1);
} else
if (!strcmp(aa,"PRO")) {
CreateAABackbone(mol,r1,r2,atom[0],info,1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," CD ",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],1);
CreateAABond(mol,atom[2],r1,1);
} else
if (!strcmp(aa,"PCA")) {
info.setIsHeteroAtom(true);
CreateAABackbone(mol,r1,r2,atom[0],info,1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," CD ",info);
atom[3] = CreateAAAtom(mol," OE ",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],1);
CreateAABond(mol,atom[2],r1,1);
CreateAABond(mol,atom[2],atom[3],2);
}
break;
case 'S':
if (!strcmp(aa,"SER")) {
CreateAABackbone(mol,r1,r2,atom[0],info,1);
atom[1] = CreateAAAtom(mol," OG ",info);
CreateAABond(mol,atom[0],atom[1],1);
} else
if (!strcmp(aa,"SAR")) {
info.setIsHeteroAtom(true);
r1 = CreateAAAtom(mol," N ",info);
atom[0] = CreateAAAtom(mol," CA ",info);
r2 = CreateAAAtom(mol," C ",info);
atom[1] = CreateAAAtom(mol," O ",info);
atom[2] = CreateAAAtom(mol," CN ",info);
CreateAABond(mol,r1,atom[0],1);
CreateAABond(mol,atom[0],r2,1);
CreateAABond(mol,r2,atom[1],2);
CreateAABond(mol,r1,atom[2],1);
}
break;
case 'T':
if (!strcmp(aa,"THR")) {
CreateAABackbone(mol,r1,r2,atom[0],info,1);
atom[1] = CreateAAAtom(mol," OG1",info);
atom[2] = CreateAAAtom(mol," CG2",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[0],atom[2],1);
atom[0]->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW);
} else
if (!strcmp(aa,"TRP")) {
CreateAABackbone(mol,r1,r2,atom[0],info,1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," CD1",info);
atom[3] = CreateAAAtom(mol," CD2",info);
atom[4] = CreateAAAtom(mol," NE1",info);
atom[5] = CreateAAAtom(mol," CE2",info);
atom[6] = CreateAAAtom(mol," CE3",info);
atom[7] = CreateAAAtom(mol," CZ2",info);
atom[8] = CreateAAAtom(mol," CZ3",info);
atom[9] = CreateAAAtom(mol," CH2",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],2);
CreateAABond(mol,atom[1],atom[3],1);
CreateAABond(mol,atom[2],atom[4],1);
CreateAABond(mol,atom[3],atom[5],2);
CreateAABond(mol,atom[3],atom[6],1);
CreateAABond(mol,atom[4],atom[5],1);
CreateAABond(mol,atom[5],atom[7],1);
CreateAABond(mol,atom[6],atom[8],2);
CreateAABond(mol,atom[7],atom[9],2);
CreateAABond(mol,atom[8],atom[9],1);
} else
if (!strcmp(aa,"TYR")) {
CreateAABackbone(mol,r1,r2,atom[0],info,1);
atom[1] = CreateAAAtom(mol," CG ",info);
atom[2] = CreateAAAtom(mol," CD1",info);
atom[3] = CreateAAAtom(mol," CD2",info);
atom[4] = CreateAAAtom(mol," CE1",info);
atom[5] = CreateAAAtom(mol," CE2",info);
atom[6] = CreateAAAtom(mol," CZ ",info);
atom[7] = CreateAAAtom(mol," OH ",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[1],atom[2],2);
CreateAABond(mol,atom[2],atom[4],1);
CreateAABond(mol,atom[4],atom[6],2);
CreateAABond(mol,atom[6],atom[5],1);
CreateAABond(mol,atom[5],atom[3],2);
CreateAABond(mol,atom[3],atom[1],1);
CreateAABond(mol,atom[6],atom[7],1);
}
break;
case 'V':
if (!strcmp(aa,"VAL")) {
CreateAABackbone(mol,r1,r2,atom[0],info,1);
atom[1] = CreateAAAtom(mol," CG1",info);
atom[2] = CreateAAAtom(mol," CG2",info);
CreateAABond(mol,atom[0],atom[1],1);
CreateAABond(mol,atom[0],atom[2],1);
}
break;
}
}
RWMol *SequenceToMol(const char *seq, bool sanitize, bool lowerD)
{
if (!seq)
return (RWMol*)0;
Atom *prev = (Atom*)0;
AtomPDBResidueInfo info;
info.setSerialNumber(1);
info.setAltLoc(" ");
info.setResidueNumber(0);
info.setInsertionCode(" ");
info.setChainId("A");
RWMol *mol = new RWMol();
while (*seq) {
Atom *r1 = (Atom*)0;
Atom *r2 = (Atom*)0;
Atom *r3 = (Atom*)0;
switch (*seq) {
case '\n':
case '\r':
case '-':
seq++;
continue;
case ' ':
case '\t':
break;
case '.':
if (prev) {
Atom *oxt = CreateAAAtom(mol," OXT",info);
CreateAABond(mol,prev,oxt,1);
prev = (Atom*)0;
}
seq++;
continue;
default:
delete mol;
return (RWMol*)0;
case 'A':
CreateAminoAcid(mol,"ALA",r1,r2,r3,info);
break;
case 'C':
CreateAminoAcid(mol,"CYS",r1,r2,r3,info);
break;
case 'D':
CreateAminoAcid(mol,"ASP",r1,r2,r3,info);
break;
case 'E':
CreateAminoAcid(mol,"GLU",r1,r2,r3,info);
break;
case 'F':
CreateAminoAcid(mol,"PHE",r1,r2,r3,info);
break;
case 'G':
case 'g':
CreateAminoAcid(mol,"GLY",r1,r2,r3,info);
break;
case 'H':
CreateAminoAcid(mol,"HIS",r1,r2,r3,info);
break;
case 'I':
CreateAminoAcid(mol,"ILE",r1,r2,r3,info);
break;
case 'K':
CreateAminoAcid(mol,"LYS",r1,r2,r3,info);
break;
case 'L':
CreateAminoAcid(mol,"LEU",r1,r2,r3,info);
break;
case 'M':
CreateAminoAcid(mol,"MET",r1,r2,r3,info);
break;
case 'N':
CreateAminoAcid(mol,"ASN",r1,r2,r3,info);
break;
case 'P':
CreateAminoAcid(mol,"PRO",r1,r2,r3,info);
break;
case 'Q':
CreateAminoAcid(mol,"GLN",r1,r2,r3,info);
break;
case 'R':
CreateAminoAcid(mol,"ARG",r1,r2,r3,info);
break;
case 'S':
CreateAminoAcid(mol,"SER",r1,r2,r3,info);
break;
case 'T':
CreateAminoAcid(mol,"THR",r1,r2,r3,info);
break;
case 'V':
CreateAminoAcid(mol,"VAL",r1,r2,r3,info);
break;
case 'W':
CreateAminoAcid(mol,"TRP",r1,r2,r3,info);
break;
case 'Y':
CreateAminoAcid(mol,"TYR",r1,r2,r3,info);
break;
case 'a':
CreateAminoAcid(mol,lowerD?"DAL":"ALA",r1,r2,r3,info);
break;
case 'c':
CreateAminoAcid(mol,lowerD?"DCY":"CYS",r1,r2,r3,info);
break;
case 'f':
CreateAminoAcid(mol,lowerD?"DPN":"PHE",r1,r2,r3,info);
break;
case 'h':
CreateAminoAcid(mol,lowerD?"DHI":"HIS",r1,r2,r3,info);
break;
case 'i':
CreateAminoAcid(mol,lowerD?"DIL":"ILE",r1,r2,r3,info);
break;
case 'p':
CreateAminoAcid(mol,lowerD?"DPR":"PRO",r1,r2,r3,info);
break;
case 's':
CreateAminoAcid(mol,lowerD?"DSN":"SER",r1,r2,r3,info);
break;
case 't':
CreateAminoAcid(mol,lowerD?"DTH":"THR",r1,r2,r3,info);
break;
case 'v':
CreateAminoAcid(mol,lowerD?"DVA":"VAL",r1,r2,r3,info);
break;
case 'w':
CreateAminoAcid(mol,lowerD?"DTR":"TRP",r1,r2,r3,info);
break;
case 'y':
CreateAminoAcid(mol,lowerD?"DTY":"TYR",r1,r2,r3,info);
break;
}
if (prev && r1)
CreateAABond(mol,prev,r1,1);
prev = r2;
seq++;
}
if (prev) {
Atom *oxt = CreateAAAtom(mol," OXT",info);
CreateAABond(mol,prev,oxt,1);
}
if (sanitize)
MolOps::sanitizeMol(*mol);
return mol;
}
RWMol *SequenceToMol(const std::string &seq, bool sanitize, bool lowerD)
{
return SequenceToMol(seq.c_str(),sanitize,lowerD);
}
RWMol *FASTAToMol(const char *seq, bool sanitize, bool lowerD)
{
if (!seq)
return (RWMol*)0;
std::string title;
if (seq[0]=='>') {
seq++;
while (*seq && *seq!='\n' && *seq!='\r')
title += *seq++;
}
RWMol *mol = SequenceToMol(seq,sanitize,lowerD);
if (!title.empty())
mol->setProp(common_properties::_Name,title);
return mol;
}
RWMol *FASTAToMol(const std::string &seq, bool sanitize, bool lowerD)
{
return FASTAToMol(seq.c_str(),sanitize,lowerD);
}
struct HELMMonomer {
Atom *r1;
Atom *r2;
Atom *r3;
Atom *oxt;
HELMMonomer() : r1(0), r2(0), r3(0), oxt(0) {}
HELMMonomer(Atom *x, Atom *y, Atom *z) : r1(x), r2(y), r3(z), oxt(0) {}
};
static const char *GetHELMOneLetterCode(char ch)
{
switch (ch) {
case 'A':
return "ALA";
case 'C':
return "CYS";
case 'D':
return "ASP";
case 'E':
return "GLU";
case 'F':
return "PHE";
case 'G':
return "GLY";
case 'H':
return "HIS";
case 'I':
return "ILE";
case 'K':
return "LYS";
case 'L':
return "LEU";
case 'M':
return "MET";
case 'N':
return "ASN";
case 'P':
return "PRO";
case 'Q':
return "GLN";
case 'R':
return "ARG";
case 'S':
return "SER";
case 'T':
return "THR";
case 'V':
return "VAL";
case 'W':
return "TRP";
case 'Y':
return "TYR";
}
return (char*)0;
}
static bool IsHELMMonomerIDChar(char ch)
{
if (ch>='A' && ch<='Z')
return true;
if (ch>='a' && ch<='z')
return true;
if (ch>='0' && ch<='9')
return true;
return false;
}
static const char *LookupHELMPeptideMonomer(const char *ptr)
{
switch (ptr[0]) {
case 'A':
if (ptr[1]=='\0')
return "ALA";
if (ptr[1]=='b' && ptr[2]=='u' && ptr[3]=='\0')
return "ABA";
break;
case 'C':
if (ptr[1]=='\0')
return "CYS";
break;
case 'D':
if (ptr[1]=='\0')
return "ASP";
case 'E':
if (ptr[1]=='\0')
return "L-Glu";
break;
case 'F':
if (ptr[1]=='\0')
return "L-Phe";
break;
case 'G':
if (ptr[1]=='\0')
return "GLY";
if (ptr[1]=='l' && ptr[2]=='p' && ptr[3]=='\0')
return "PCA";
break;
case 'H':
if (ptr[1]=='\0')
return "HIS";
break;
case 'I':
if (ptr[1]=='\0')
return "ILE";
break;
case 'K':
if (ptr[1]=='\0')
return "LYS";
break;
case 'L':
if (ptr[1]=='\0')
return "LEU";
break;
case 'M':
if (ptr[1]=='\0')
return "MET";
break;
case 'N':
if (ptr[1]=='\0')
return "ASN";
if (ptr[1]=='a' && ptr[2]=='l' && ptr[3]=='\0')
return "NAL";
if (ptr[1]=='l' && ptr[2]=='e' && ptr[3]=='\0')
return "NLE";
if (ptr[1]=='v' && ptr[2]=='a' && ptr[3]=='\0')
return "NVA";
break;
case 'O':
if (ptr[1]=='r' && ptr[2]=='n' && ptr[3]=='\0')
return "ORN";
break;
case 'P':
if (ptr[1]=='\0')
return "PRO";
break;
case 'Q':
if (ptr[1]=='\0')
return "GLN";
break;
case 'R':
if (ptr[1]=='\0')
return "ARG";
break;
case 'S':
if (ptr[1]=='\0')
return "SER";
if (ptr[1]=='a' && ptr[2]=='r' && ptr[3]=='\0')
return "SAR";
break;
case 'T':
if (ptr[1]=='\0')
return "THR";
break;
case 'V':
if (ptr[1]=='\0')
return "VAL";
break;
case 'W':
if (ptr[1]=='\0')
return "TRP";
break;
case 'Y':
if (ptr[1]=='\0')
return "TYR";
break;
case 'd':
switch (ptr[1]) {
case 'A':
if (ptr[2]=='\0')
return "DAL";
break;
case 'C':
if (ptr[2]=='\0')
return "DCY";
break;
case 'D':
if (ptr[2]=='\0')
return "DAS";
break;
case 'E':
if (ptr[2]=='\0')
return "DGL";
break;
case 'F':
if (ptr[2]=='\0')
return "DPN";
break;
case 'H':
if (ptr[2]=='\0')
return "DHI";
break;
case 'I':
if (ptr[2]=='\0')
return "DIL";
break;
case 'K':
if (ptr[2]=='\0')
return "DLY";
break;
case 'L':
if (ptr[2]=='\0')
return "DLE";
break;
case 'M':
if (ptr[2]=='\0')
return "MED";
break;
case 'N':
if (ptr[2]=='\0')
return "DSG";
break;
case 'P':
if (ptr[2]=='\0')
return "DPR";
break;
case 'Q':
if (ptr[2]=='\0')
return "DGN";
break;
case 'R':
if (ptr[2]=='\0')
return "DAR";
break;
case 'S':
if (ptr[2]=='\0')
return "DSN";
break;
case 'T':
if (ptr[2]=='\0')
return "DTH";
break;
case 'V':
if (ptr[2]=='\0')
return "DVA";
break;
case 'W':
if (ptr[2]=='\0')
return "DTR";
break;
case 'Y':
if (ptr[2]=='\0')
return "DTY";
break;
}
break;
case 's':
if (ptr[1]=='e' && ptr[2]=='C' && ptr[3]=='\0')
return "MSE";
break;
}
return (const char*)0;
}
static const char *ParseHELMPeptide(RWMol *mol,
const char *ptr,
const char *chain,
std::vector<HELMMonomer> &vseq)
{
unsigned int len = 0;
HELMMonomer curr;
vseq.clear();
if (ptr[0]=='}')
return ptr;
AtomPDBResidueInfo info;
info.setSerialNumber(1);
info.setAltLoc(" ");
info.setResidueNumber(0);
info.setInsertionCode(" ");
info.setChainId(chain);
if (ptr[0]=='[' && ptr[1]=='a' && ptr[2]=='c' && ptr[3]==']') {
if (ptr[4] != '.')
return (const char*)0;
info.setResidueNumber(-2);
CreateAminoAcid(mol,"ACE",curr.r1,curr.r2,curr.r3,info);
vseq.push_back(curr);
info.setResidueNumber(0);
ptr += 5;
len = 1;
}
for (;;) {
const char *name = 0;
if (*ptr=='[') {
std::string tmp;
ptr++;
while (IsHELMMonomerIDChar(*ptr))
tmp += *ptr++;
if (*ptr != ']')
return (char*)0;
name = LookupHELMPeptideMonomer(tmp.c_str());
} else name = GetHELMOneLetterCode(*ptr);
if (!name) return (const char*)0;
ptr++;
CreateAminoAcid(mol,name,curr.r1,curr.r2,curr.r3,info);
if (len && vseq[len-1].r2 && curr.r1) {
CreateAABond(mol,vseq[len-1].r2,curr.r1,1);
vseq[len-1].r2 = 0;
}
vseq.push_back(curr);
len++;
if (*ptr=='.') {
if (ptr[1]=='[' && ptr[2]=='a' && ptr[3]=='m' &&
ptr[4]==']' && ptr[5]=='}') {
if (!vseq[len-1].r2)
return (const char*)0;
int resno = info.getResidueNumber();
info.setResidueNumber(resno+1);
info.setIsHeteroAtom(true);
info.setResidueName("NH2");
Atom *n = CreateAAAtom(mol," N ",info);
CreateAABond(mol,vseq[len-1].r2,n,1);
vseq[len-1].r2 = (Atom*)0;
vseq.push_back(HELMMonomer());
len++;
return ptr+5;
}
ptr++;
} else if (*ptr=='}') {
if (!vseq[len-1].r2)
return (const char*)0;
Atom *oxt = CreateAAAtom(mol," OXT",info);
CreateAABond(mol,vseq[len-1].r2,oxt,1);
vseq[len-1].oxt = oxt;
return ptr;
} else return (const char*)0;
}
}
static bool ParseHELM(RWMol *mol, const char *ptr)
{
std::map<std::string,std::vector<HELMMonomer> >seqs;
const char *orig;
char chain[2];
chain[0] = 'A';
chain[1] = '\0';
for (;;) {
orig = ptr;
if (ptr[0]=='P' && ptr[1]=='E' && ptr[2]=='P' &&
ptr[3]=='T' && ptr[4]=='I' && ptr[5]=='D' &&
ptr[6]=='E' && ptr[7]>='1' && ptr[7]<='9') {
ptr += 8;
while (*ptr>='0' && *ptr<='9')
ptr++;
if (*ptr != '{')
return false;
std::string id(orig,ptr-orig);
chain[0] = 'A' + (orig[7]-'1');
ptr = ParseHELMPeptide(mol,ptr+1,chain,seqs[id]);
if (!ptr || *ptr != '}')
return false;
ptr++;
} else return false;
if (*ptr == '$')
break;
if (*ptr == '\0')
return true;
if (*ptr != '|')
return false;
ptr++;
}
ptr++;
if (ptr[0]=='$' && ptr[1]=='$' && ptr[2]=='$')
return true;
for (;;) {
orig = ptr;
if (ptr[0]=='P' && ptr[1]=='E' && ptr[2]=='P' &&
ptr[3]=='T' && ptr[4]=='I' && ptr[5]=='D' &&
ptr[6]=='E' && ptr[7]>='1' && ptr[7]<='9') {
ptr += 8;
} else return false;
while (*ptr>='0' && *ptr<='9')
ptr++;
if (*ptr != ',')
return false;
std::string id1(orig,ptr-orig);
ptr++;
orig = ptr;
if (ptr[0]=='P' && ptr[1]=='E' && ptr[2]=='P' &&
ptr[3]=='T' && ptr[4]=='I' && ptr[5]=='D' &&
ptr[6]=='E' && ptr[7]>='1' && ptr[7]<='9') {
ptr += 8;
} else return false;
while (*ptr>='0' && *ptr<='9')
ptr++;
if (*ptr != ',')
return false;
std::string id2(orig,ptr-orig);
ptr++;
unsigned int res1;
unsigned int res2;
unsigned int res1r;
unsigned int res2r;
if (*ptr>='1' && *ptr<='9') {
res1 = (*ptr++)-'0';
while (*ptr>='0' && *ptr<='9')
res1 = 10*res1 + ((*ptr++)-'0');
} else return false;
if (ptr[0]==':' && ptr[1]=='R' && ptr[2]>='1' && ptr[2]<='9') {
res1r = ptr[2]-'0';
ptr += 3;
} else return false;
if (*ptr != '-')
return false;
ptr++;
if (*ptr>='1' && *ptr<='9') {
res2 = (*ptr++)-'0';
while (*ptr>='0' && *ptr<='9')
res2 = 10*res2 + ((*ptr++)-'0');
} else return false;
if (ptr[0]==':' && ptr[1]=='R' && ptr[2]>='1' && ptr[2]<='9') {
res2r = ptr[2]-'0';
ptr += 3;
} else return false;
// printf("%s:%u:R%u - %s:%u:R%u\n",id1.c_str(),res1,res1r,
// id2.c_str(),res2,res2r);
if (res1<1 || res2<1)
return false;
if (seqs.find(id1)==seqs.end() || seqs.find(id2)==seqs.end())
return false;
std::vector<HELMMonomer> *vseq1 = &seqs[id1];
if (res1 > (unsigned int)vseq1->size())
return false;
std::vector<HELMMonomer> *vseq2 = &seqs[id2];
if (res2 > (unsigned int)vseq2->size())
return false;
if (res1r == 3 && res2r == 3) {
Atom *src = (*vseq1)[res1-1].r3;
Atom *dst = (*vseq2)[res2-1].r3;
if (src && dst && src != dst) {
CreateAABond(mol,src,dst,1);
(*vseq1)[res1-1].r3 = (Atom*)0;
(*vseq2)[res2-1].r3 = (Atom*)0;
} else return false;
} else if (res1r == 1 && res2r == 2) {
Atom *src = (*vseq1)[res1-1].r1;
Atom *dst = (*vseq2)[res2-1].r2;
Atom *oxt = (*vseq2)[res2-1].oxt;
if (src && dst && oxt && src != dst) {
mol->removeAtom(oxt);
CreateAABond(mol,src,dst,1);
(*vseq1)[res1-1].r1 = (Atom*)0;
(*vseq2)[res2-1].r2 = (Atom*)0;
} else return false;
} else if (res1r == 2 && res2r == 1) {
Atom *src = (*vseq2)[res2-1].r1;
Atom *dst = (*vseq1)[res1-1].r2;
Atom *oxt = (*vseq1)[res1-1].oxt;
if (src && dst && oxt && src != dst) {
mol->removeAtom(oxt);
CreateAABond(mol,dst,src,1);
(*vseq1)[res1-1].r2 = (Atom*)0;
(*vseq2)[res2-1].r1 = (Atom*)0;
} else return false;
} else return false;
if (*ptr == '$')
break;
if (*ptr != '|')
return false;
ptr++;
}
ptr++;
return ptr[0]=='$' && ptr[1]=='$';
}
RWMol *HELMToMol(const char *helm, bool sanitize)
{
RWMol *mol = new RWMol();
const char *ptr = helm;
if (ptr[0]=='$' && ptr[1]=='$' && ptr[2]=='$' && ptr[3]=='$')
return mol;
if (ParseHELM(mol,ptr)) {
if (sanitize)
MolOps::sanitizeMol(*mol);
return mol;
}
delete mol;
return (RWMol*)0;
}
RWMol *HELMToMol(const std::string &helm, bool sanitize)
{
return HELMToMol(helm.c_str(),sanitize);
}
} // namespace RDKit