mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-05 22:04:27 +08:00
1189 lines
39 KiB
C++
1189 lines
39 KiB
C++
//
|
|
// Copyright (C) 2015 Greg Landrum and NextMove Software
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include <string.h>
|
|
|
|
#include <string>
|
|
#include <vector>
|
|
#include <map>
|
|
|
|
#include <GraphMol/GraphMol.h>
|
|
#include <GraphMol/MolOps.h>
|
|
#include <GraphMol/MonomerInfo.h>
|
|
|
|
namespace RDKit {
|
|
|
|
static Atom *CreateAAAtom(RWMol *mol, const char *name,
|
|
AtomPDBResidueInfo &info) {
|
|
Atom *atom = (Atom *)0;
|
|
|
|
if (name[0] == ' ' && name[1] == 'C') {
|
|
atom = new Atom(6);
|
|
} else if (name[0] == ' ' && name[1] == 'N') {
|
|
atom = new Atom(7);
|
|
} else if (name[0] == ' ' && name[1] == 'O') {
|
|
atom = new Atom(8);
|
|
} else if (name[0] == ' ' && name[1] == 'S') {
|
|
atom = new Atom(16);
|
|
} else if (name[0] == 'S' && name[1] == 'E') {
|
|
atom = new Atom(34);
|
|
} else
|
|
atom = new Atom(0);
|
|
mol->addAtom(atom, true, true);
|
|
AtomPDBResidueInfo *copy = (AtomPDBResidueInfo *)info.copy();
|
|
copy->setName(name);
|
|
atom->setMonomerInfo(copy);
|
|
|
|
unsigned int serno = info.getSerialNumber();
|
|
info.setSerialNumber(serno + 1);
|
|
return atom;
|
|
}
|
|
|
|
static void CreateAABond(RWMol *mol, Atom *beg, Atom *end, unsigned int order) {
|
|
Bond *bond;
|
|
if (order == 2)
|
|
bond = new Bond(Bond::DOUBLE);
|
|
else
|
|
bond = new Bond(Bond::SINGLE);
|
|
bond->setOwningMol(mol);
|
|
bond->setBeginAtom(beg);
|
|
bond->setEndAtom(end);
|
|
mol->addBond(bond, true);
|
|
}
|
|
|
|
static void CreateAABackbone(RWMol *mol, Atom *&r1, Atom *&r2, Atom *&cb,
|
|
AtomPDBResidueInfo &info, int ldstereo) {
|
|
r1 = CreateAAAtom(mol, " N ", info);
|
|
Atom *ca = CreateAAAtom(mol, " CA ", info);
|
|
r2 = CreateAAAtom(mol, " C ", info);
|
|
Atom *o = CreateAAAtom(mol, " O ", info);
|
|
cb = CreateAAAtom(mol, " CB ", info);
|
|
CreateAABond(mol, r1, ca, 1);
|
|
CreateAABond(mol, ca, r2, 1);
|
|
CreateAABond(mol, r2, o, 2);
|
|
CreateAABond(mol, ca, cb, 1);
|
|
|
|
if (ldstereo > 0) // L-stereo
|
|
ca->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW);
|
|
else if (ldstereo < 0) // D-stereo
|
|
ca->setChiralTag(Atom::CHI_TETRAHEDRAL_CW);
|
|
}
|
|
|
|
// aa is a three letter PDB residue code
|
|
static void CreateAminoAcid(RWMol *mol, const char *aa, Atom *&r1, Atom *&r2,
|
|
Atom *&r3, AtomPDBResidueInfo &info) {
|
|
Atom *atom[10];
|
|
|
|
r1 = (Atom *)0;
|
|
r2 = (Atom *)0;
|
|
r3 = (Atom *)0;
|
|
|
|
int resno = info.getResidueNumber();
|
|
info.setResidueNumber(resno + 1);
|
|
info.setIsHeteroAtom(false);
|
|
info.setResidueName(aa);
|
|
|
|
// Standard amino acids before non-standard, in PDB code alphabetical order
|
|
switch (aa[0]) {
|
|
case 'A':
|
|
if (!strcmp(aa, "ALA")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, 1);
|
|
} else if (!strcmp(aa, "ARG")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, 1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " CD ", info);
|
|
atom[3] = CreateAAAtom(mol, " NE ", info);
|
|
atom[4] = CreateAAAtom(mol, " CZ ", info);
|
|
atom[5] = CreateAAAtom(mol, " NH1", info);
|
|
atom[6] = CreateAAAtom(mol, " NH2", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 1);
|
|
CreateAABond(mol, atom[2], atom[3], 1);
|
|
CreateAABond(mol, atom[3], atom[4], 1);
|
|
CreateAABond(mol, atom[4], atom[5], 2);
|
|
CreateAABond(mol, atom[4], atom[6], 1);
|
|
} else if (!strcmp(aa, "ASP")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, 1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " OD1", info);
|
|
atom[3] = CreateAAAtom(mol, " OD2", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 2);
|
|
CreateAABond(mol, atom[1], atom[3], 1);
|
|
} else if (!strcmp(aa, "ASN")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, 1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " OD1", info);
|
|
atom[3] = CreateAAAtom(mol, " ND2", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 2);
|
|
CreateAABond(mol, atom[1], atom[3], 1);
|
|
} else if (!strcmp(aa, "ABA")) {
|
|
info.setIsHeteroAtom(true);
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, 1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
} else if (!strcmp(aa, "ACE")) {
|
|
info.setIsHeteroAtom(true);
|
|
r2 = CreateAAAtom(mol, " C ", info);
|
|
atom[0] = CreateAAAtom(mol, " O ", info);
|
|
atom[1] = CreateAAAtom(mol, " CH3", info);
|
|
CreateAABond(mol, r2, atom[1], 1);
|
|
CreateAABond(mol, r2, atom[0], 2);
|
|
}
|
|
break;
|
|
|
|
case 'C':
|
|
if (!strcmp(aa, "CYS")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, 1);
|
|
r3 = CreateAAAtom(mol, " SG ", info);
|
|
CreateAABond(mol, atom[0], r3, 1);
|
|
}
|
|
break;
|
|
|
|
case 'D':
|
|
if (!strcmp(aa, "DAL")) {
|
|
info.setIsHeteroAtom(true);
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, -1);
|
|
} else if (!strcmp(aa, "DAR")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, -1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " CD ", info);
|
|
atom[3] = CreateAAAtom(mol, " NE ", info);
|
|
atom[4] = CreateAAAtom(mol, " CZ ", info);
|
|
atom[5] = CreateAAAtom(mol, " NH1", info);
|
|
atom[6] = CreateAAAtom(mol, " NH2", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 1);
|
|
CreateAABond(mol, atom[2], atom[3], 1);
|
|
CreateAABond(mol, atom[3], atom[4], 1);
|
|
CreateAABond(mol, atom[4], atom[5], 2);
|
|
CreateAABond(mol, atom[4], atom[6], 1);
|
|
} else if (!strcmp(aa, "DAS")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, -1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " OD1", info);
|
|
atom[3] = CreateAAAtom(mol, " OD2", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 2);
|
|
CreateAABond(mol, atom[1], atom[3], 1);
|
|
} else if (!strcmp(aa, "DBB")) {
|
|
info.setIsHeteroAtom(true);
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, 1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
} else if (!strcmp(aa, "DCY")) {
|
|
info.setIsHeteroAtom(true);
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, -1);
|
|
r3 = CreateAAAtom(mol, " SG ", info);
|
|
CreateAABond(mol, atom[0], r3, 1);
|
|
} else if (!strcmp(aa, "DGL")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, -1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " CD ", info);
|
|
atom[3] = CreateAAAtom(mol, " OE1", info);
|
|
atom[4] = CreateAAAtom(mol, " OE2", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 1);
|
|
CreateAABond(mol, atom[2], atom[3], 2);
|
|
CreateAABond(mol, atom[2], atom[4], 1);
|
|
} else if (!strcmp(aa, "DGN")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, -1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " CD ", info);
|
|
atom[3] = CreateAAAtom(mol, " OE1", info);
|
|
atom[4] = CreateAAAtom(mol, " NE2", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 1);
|
|
CreateAABond(mol, atom[2], atom[3], 2);
|
|
CreateAABond(mol, atom[2], atom[4], 1);
|
|
} else if (!strcmp(aa, "DHI")) {
|
|
info.setIsHeteroAtom(true);
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, -1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " ND1", info);
|
|
atom[3] = CreateAAAtom(mol, " CD2", info);
|
|
atom[4] = CreateAAAtom(mol, " CE1", info);
|
|
atom[5] = CreateAAAtom(mol, " NE2", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 1);
|
|
CreateAABond(mol, atom[2], atom[4], 2);
|
|
CreateAABond(mol, atom[4], atom[5], 1);
|
|
CreateAABond(mol, atom[5], atom[3], 1);
|
|
CreateAABond(mol, atom[3], atom[1], 2);
|
|
} else if (!strcmp(aa, "DIL")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, -1);
|
|
atom[1] = CreateAAAtom(mol, " CG1", info);
|
|
atom[2] = CreateAAAtom(mol, " CG2", info);
|
|
atom[3] = CreateAAAtom(mol, " CD1", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[0], atom[2], 1);
|
|
CreateAABond(mol, atom[2], atom[3], 1);
|
|
atom[0]->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW);
|
|
} else if (!strcmp(aa, "DLE")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, -1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " CD1", info);
|
|
atom[3] = CreateAAAtom(mol, " CD2", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 1);
|
|
CreateAABond(mol, atom[1], atom[3], 1);
|
|
} else if (!strcmp(aa, "DLY")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, -1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " CD ", info);
|
|
atom[3] = CreateAAAtom(mol, " CE ", info);
|
|
atom[4] = CreateAAAtom(mol, " NZ ", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 1);
|
|
CreateAABond(mol, atom[2], atom[3], 1);
|
|
CreateAABond(mol, atom[3], atom[4], 1);
|
|
} else if (!strcmp(aa, "DPN")) {
|
|
info.setIsHeteroAtom(true);
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, -1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " CD1", info);
|
|
atom[3] = CreateAAAtom(mol, " CD2", info);
|
|
atom[4] = CreateAAAtom(mol, " CE1", info);
|
|
atom[5] = CreateAAAtom(mol, " CE2", info);
|
|
atom[6] = CreateAAAtom(mol, " CZ ", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 2);
|
|
CreateAABond(mol, atom[2], atom[4], 1);
|
|
CreateAABond(mol, atom[4], atom[6], 2);
|
|
CreateAABond(mol, atom[6], atom[5], 1);
|
|
CreateAABond(mol, atom[5], atom[3], 2);
|
|
CreateAABond(mol, atom[3], atom[1], 1);
|
|
} else if (!strcmp(aa, "DPR")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, -1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " CD ", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 1);
|
|
CreateAABond(mol, atom[2], r1, 1);
|
|
} else if (!strcmp(aa, "DSG")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, -1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " OD1", info);
|
|
atom[3] = CreateAAAtom(mol, " ND2", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 2);
|
|
CreateAABond(mol, atom[1], atom[3], 1);
|
|
} else if (!strcmp(aa, "DTH")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, -1);
|
|
atom[1] = CreateAAAtom(mol, " OG1", info);
|
|
atom[2] = CreateAAAtom(mol, " CG2", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[0], atom[2], 1);
|
|
atom[0]->setChiralTag(Atom::CHI_TETRAHEDRAL_CW);
|
|
} else if (!strcmp(aa, "DTR")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, -1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " CD1", info);
|
|
atom[3] = CreateAAAtom(mol, " CD2", info);
|
|
atom[4] = CreateAAAtom(mol, " NE1", info);
|
|
atom[5] = CreateAAAtom(mol, " CE2", info);
|
|
atom[6] = CreateAAAtom(mol, " CE3", info);
|
|
atom[7] = CreateAAAtom(mol, " CZ2", info);
|
|
atom[8] = CreateAAAtom(mol, " CZ3", info);
|
|
atom[9] = CreateAAAtom(mol, " CH2", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 2);
|
|
CreateAABond(mol, atom[1], atom[3], 1);
|
|
CreateAABond(mol, atom[2], atom[4], 1);
|
|
CreateAABond(mol, atom[3], atom[5], 2);
|
|
CreateAABond(mol, atom[3], atom[6], 1);
|
|
CreateAABond(mol, atom[4], atom[5], 1);
|
|
CreateAABond(mol, atom[5], atom[7], 1);
|
|
CreateAABond(mol, atom[6], atom[8], 2);
|
|
CreateAABond(mol, atom[7], atom[9], 2);
|
|
CreateAABond(mol, atom[8], atom[9], 1);
|
|
} else if (!strcmp(aa, "DVA")) {
|
|
info.setIsHeteroAtom(true);
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, -1);
|
|
atom[1] = CreateAAAtom(mol, " CG1", info);
|
|
atom[2] = CreateAAAtom(mol, " CG2", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[0], atom[2], 1);
|
|
}
|
|
break;
|
|
|
|
case 'G':
|
|
if (!strcmp(aa, "GLN")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, 1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " CD ", info);
|
|
atom[3] = CreateAAAtom(mol, " OE1", info);
|
|
atom[4] = CreateAAAtom(mol, " NE2", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 1);
|
|
CreateAABond(mol, atom[2], atom[3], 2);
|
|
CreateAABond(mol, atom[2], atom[4], 1);
|
|
} else if (!strcmp(aa, "GLU")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, 1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " CD ", info);
|
|
atom[3] = CreateAAAtom(mol, " OE1", info);
|
|
atom[4] = CreateAAAtom(mol, " OE2", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 1);
|
|
CreateAABond(mol, atom[2], atom[3], 2);
|
|
CreateAABond(mol, atom[2], atom[4], 1);
|
|
} else if (!strcmp(aa, "GLY")) {
|
|
r1 = CreateAAAtom(mol, " N ", info);
|
|
atom[0] = CreateAAAtom(mol, " CA ", info);
|
|
r2 = CreateAAAtom(mol, " C ", info);
|
|
atom[1] = CreateAAAtom(mol, " O ", info);
|
|
CreateAABond(mol, r1, atom[0], 1);
|
|
CreateAABond(mol, atom[0], r2, 1);
|
|
CreateAABond(mol, r2, atom[1], 2);
|
|
}
|
|
break;
|
|
|
|
case 'H':
|
|
if (!strcmp(aa, "HIS")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, 1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " ND1", info);
|
|
atom[3] = CreateAAAtom(mol, " CD2", info);
|
|
atom[4] = CreateAAAtom(mol, " CE1", info);
|
|
atom[5] = CreateAAAtom(mol, " NE2", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 1);
|
|
CreateAABond(mol, atom[2], atom[4], 2);
|
|
CreateAABond(mol, atom[4], atom[5], 1);
|
|
CreateAABond(mol, atom[5], atom[3], 1);
|
|
CreateAABond(mol, atom[3], atom[1], 2);
|
|
}
|
|
break;
|
|
|
|
case 'I':
|
|
if (!strcmp(aa, "ILE")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, 1);
|
|
atom[1] = CreateAAAtom(mol, " CG1", info);
|
|
atom[2] = CreateAAAtom(mol, " CG2", info);
|
|
atom[3] = CreateAAAtom(mol, " CD1", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[0], atom[2], 1);
|
|
CreateAABond(mol, atom[2], atom[3], 1);
|
|
atom[0]->setChiralTag(Atom::CHI_TETRAHEDRAL_CW);
|
|
}
|
|
break;
|
|
|
|
case 'L':
|
|
if (!strcmp(aa, "LEU")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, 1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " CD1", info);
|
|
atom[3] = CreateAAAtom(mol, " CD2", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 1);
|
|
CreateAABond(mol, atom[1], atom[3], 1);
|
|
} else if (!strcmp(aa, "LYS")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, 1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " CD ", info);
|
|
atom[3] = CreateAAAtom(mol, " CE ", info);
|
|
atom[4] = CreateAAAtom(mol, " NZ ", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 1);
|
|
CreateAABond(mol, atom[2], atom[3], 1);
|
|
CreateAABond(mol, atom[3], atom[4], 1);
|
|
}
|
|
break;
|
|
|
|
case 'M':
|
|
if (!strcmp(aa, "MET")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, 1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " SD ", info);
|
|
atom[3] = CreateAAAtom(mol, " CE ", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 1);
|
|
CreateAABond(mol, atom[2], atom[3], 1);
|
|
} else if (!strcmp(aa, "MED")) {
|
|
info.setIsHeteroAtom(true);
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, -1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " SD ", info);
|
|
atom[3] = CreateAAAtom(mol, " CE ", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 1);
|
|
CreateAABond(mol, atom[2], atom[3], 1);
|
|
} else if (!strcmp(aa, "MSE")) {
|
|
info.setIsHeteroAtom(true);
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, 1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, "SE ", info);
|
|
atom[3] = CreateAAAtom(mol, " CE ", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 1);
|
|
CreateAABond(mol, atom[2], atom[3], 1);
|
|
}
|
|
break;
|
|
|
|
case 'N':
|
|
if (!strcmp(aa, "NLE")) {
|
|
info.setIsHeteroAtom(true);
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, 1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " CD ", info);
|
|
atom[3] = CreateAAAtom(mol, " CE ", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 1);
|
|
CreateAABond(mol, atom[2], atom[3], 1);
|
|
} else if (!strcmp(aa, "NVA")) {
|
|
info.setIsHeteroAtom(true);
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, 1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " CD ", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 1);
|
|
}
|
|
break;
|
|
|
|
case 'O':
|
|
if (!strcmp(aa, "ORN")) {
|
|
info.setIsHeteroAtom(true);
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, 1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " CD ", info);
|
|
atom[3] = CreateAAAtom(mol, " NE ", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 1);
|
|
CreateAABond(mol, atom[2], atom[3], 1);
|
|
}
|
|
break;
|
|
|
|
case 'P':
|
|
if (!strcmp(aa, "PHE")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, 1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " CD1", info);
|
|
atom[3] = CreateAAAtom(mol, " CD2", info);
|
|
atom[4] = CreateAAAtom(mol, " CE1", info);
|
|
atom[5] = CreateAAAtom(mol, " CE2", info);
|
|
atom[6] = CreateAAAtom(mol, " CZ ", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 2);
|
|
CreateAABond(mol, atom[2], atom[4], 1);
|
|
CreateAABond(mol, atom[4], atom[6], 2);
|
|
CreateAABond(mol, atom[6], atom[5], 1);
|
|
CreateAABond(mol, atom[5], atom[3], 2);
|
|
CreateAABond(mol, atom[3], atom[1], 1);
|
|
} else if (!strcmp(aa, "PRO")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, 1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " CD ", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 1);
|
|
CreateAABond(mol, atom[2], r1, 1);
|
|
} else if (!strcmp(aa, "PCA")) {
|
|
info.setIsHeteroAtom(true);
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, 1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " CD ", info);
|
|
atom[3] = CreateAAAtom(mol, " OE ", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 1);
|
|
CreateAABond(mol, atom[2], r1, 1);
|
|
CreateAABond(mol, atom[2], atom[3], 2);
|
|
}
|
|
break;
|
|
|
|
case 'S':
|
|
if (!strcmp(aa, "SER")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, 1);
|
|
atom[1] = CreateAAAtom(mol, " OG ", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
} else if (!strcmp(aa, "SAR")) {
|
|
info.setIsHeteroAtom(true);
|
|
r1 = CreateAAAtom(mol, " N ", info);
|
|
atom[0] = CreateAAAtom(mol, " CA ", info);
|
|
r2 = CreateAAAtom(mol, " C ", info);
|
|
atom[1] = CreateAAAtom(mol, " O ", info);
|
|
atom[2] = CreateAAAtom(mol, " CN ", info);
|
|
CreateAABond(mol, r1, atom[0], 1);
|
|
CreateAABond(mol, atom[0], r2, 1);
|
|
CreateAABond(mol, r2, atom[1], 2);
|
|
CreateAABond(mol, r1, atom[2], 1);
|
|
}
|
|
break;
|
|
|
|
case 'T':
|
|
if (!strcmp(aa, "THR")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, 1);
|
|
atom[1] = CreateAAAtom(mol, " OG1", info);
|
|
atom[2] = CreateAAAtom(mol, " CG2", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[0], atom[2], 1);
|
|
atom[0]->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW);
|
|
} else if (!strcmp(aa, "TRP")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, 1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " CD1", info);
|
|
atom[3] = CreateAAAtom(mol, " CD2", info);
|
|
atom[4] = CreateAAAtom(mol, " NE1", info);
|
|
atom[5] = CreateAAAtom(mol, " CE2", info);
|
|
atom[6] = CreateAAAtom(mol, " CE3", info);
|
|
atom[7] = CreateAAAtom(mol, " CZ2", info);
|
|
atom[8] = CreateAAAtom(mol, " CZ3", info);
|
|
atom[9] = CreateAAAtom(mol, " CH2", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 2);
|
|
CreateAABond(mol, atom[1], atom[3], 1);
|
|
CreateAABond(mol, atom[2], atom[4], 1);
|
|
CreateAABond(mol, atom[3], atom[5], 2);
|
|
CreateAABond(mol, atom[3], atom[6], 1);
|
|
CreateAABond(mol, atom[4], atom[5], 1);
|
|
CreateAABond(mol, atom[5], atom[7], 1);
|
|
CreateAABond(mol, atom[6], atom[8], 2);
|
|
CreateAABond(mol, atom[7], atom[9], 2);
|
|
CreateAABond(mol, atom[8], atom[9], 1);
|
|
} else if (!strcmp(aa, "TYR")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, 1);
|
|
atom[1] = CreateAAAtom(mol, " CG ", info);
|
|
atom[2] = CreateAAAtom(mol, " CD1", info);
|
|
atom[3] = CreateAAAtom(mol, " CD2", info);
|
|
atom[4] = CreateAAAtom(mol, " CE1", info);
|
|
atom[5] = CreateAAAtom(mol, " CE2", info);
|
|
atom[6] = CreateAAAtom(mol, " CZ ", info);
|
|
atom[7] = CreateAAAtom(mol, " OH ", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[1], atom[2], 2);
|
|
CreateAABond(mol, atom[2], atom[4], 1);
|
|
CreateAABond(mol, atom[4], atom[6], 2);
|
|
CreateAABond(mol, atom[6], atom[5], 1);
|
|
CreateAABond(mol, atom[5], atom[3], 2);
|
|
CreateAABond(mol, atom[3], atom[1], 1);
|
|
CreateAABond(mol, atom[6], atom[7], 1);
|
|
}
|
|
break;
|
|
|
|
case 'V':
|
|
if (!strcmp(aa, "VAL")) {
|
|
CreateAABackbone(mol, r1, r2, atom[0], info, 1);
|
|
atom[1] = CreateAAAtom(mol, " CG1", info);
|
|
atom[2] = CreateAAAtom(mol, " CG2", info);
|
|
CreateAABond(mol, atom[0], atom[1], 1);
|
|
CreateAABond(mol, atom[0], atom[2], 1);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
RWMol *SequenceToMol(const char *seq, bool sanitize, bool lowerD) {
|
|
if (!seq) return (RWMol *)0;
|
|
|
|
Atom *prev = (Atom *)0;
|
|
AtomPDBResidueInfo info;
|
|
info.setSerialNumber(1);
|
|
info.setAltLoc(" ");
|
|
info.setResidueNumber(0);
|
|
info.setInsertionCode(" ");
|
|
info.setChainId("A");
|
|
|
|
RWMol *mol = new RWMol();
|
|
while (*seq) {
|
|
Atom *r1 = (Atom *)0;
|
|
Atom *r2 = (Atom *)0;
|
|
Atom *r3 = (Atom *)0;
|
|
|
|
switch (*seq) {
|
|
case '\n':
|
|
case '\r':
|
|
case '-':
|
|
seq++;
|
|
continue;
|
|
|
|
case ' ':
|
|
case '\t':
|
|
break;
|
|
|
|
case '.':
|
|
if (prev) {
|
|
Atom *oxt = CreateAAAtom(mol, " OXT", info);
|
|
CreateAABond(mol, prev, oxt, 1);
|
|
prev = (Atom *)0;
|
|
}
|
|
seq++;
|
|
continue;
|
|
|
|
default:
|
|
delete mol;
|
|
return (RWMol *)0;
|
|
|
|
case 'A':
|
|
CreateAminoAcid(mol, "ALA", r1, r2, r3, info);
|
|
break;
|
|
case 'C':
|
|
CreateAminoAcid(mol, "CYS", r1, r2, r3, info);
|
|
break;
|
|
case 'D':
|
|
CreateAminoAcid(mol, "ASP", r1, r2, r3, info);
|
|
break;
|
|
case 'E':
|
|
CreateAminoAcid(mol, "GLU", r1, r2, r3, info);
|
|
break;
|
|
case 'F':
|
|
CreateAminoAcid(mol, "PHE", r1, r2, r3, info);
|
|
break;
|
|
case 'G':
|
|
case 'g':
|
|
CreateAminoAcid(mol, "GLY", r1, r2, r3, info);
|
|
break;
|
|
case 'H':
|
|
CreateAminoAcid(mol, "HIS", r1, r2, r3, info);
|
|
break;
|
|
case 'I':
|
|
CreateAminoAcid(mol, "ILE", r1, r2, r3, info);
|
|
break;
|
|
case 'K':
|
|
CreateAminoAcid(mol, "LYS", r1, r2, r3, info);
|
|
break;
|
|
case 'L':
|
|
CreateAminoAcid(mol, "LEU", r1, r2, r3, info);
|
|
break;
|
|
case 'M':
|
|
CreateAminoAcid(mol, "MET", r1, r2, r3, info);
|
|
break;
|
|
case 'N':
|
|
CreateAminoAcid(mol, "ASN", r1, r2, r3, info);
|
|
break;
|
|
case 'P':
|
|
CreateAminoAcid(mol, "PRO", r1, r2, r3, info);
|
|
break;
|
|
case 'Q':
|
|
CreateAminoAcid(mol, "GLN", r1, r2, r3, info);
|
|
break;
|
|
case 'R':
|
|
CreateAminoAcid(mol, "ARG", r1, r2, r3, info);
|
|
break;
|
|
case 'S':
|
|
CreateAminoAcid(mol, "SER", r1, r2, r3, info);
|
|
break;
|
|
case 'T':
|
|
CreateAminoAcid(mol, "THR", r1, r2, r3, info);
|
|
break;
|
|
case 'V':
|
|
CreateAminoAcid(mol, "VAL", r1, r2, r3, info);
|
|
break;
|
|
case 'W':
|
|
CreateAminoAcid(mol, "TRP", r1, r2, r3, info);
|
|
break;
|
|
case 'Y':
|
|
CreateAminoAcid(mol, "TYR", r1, r2, r3, info);
|
|
break;
|
|
|
|
case 'a':
|
|
CreateAminoAcid(mol, lowerD ? "DAL" : "ALA", r1, r2, r3, info);
|
|
break;
|
|
case 'c':
|
|
CreateAminoAcid(mol, lowerD ? "DCY" : "CYS", r1, r2, r3, info);
|
|
break;
|
|
case 'f':
|
|
CreateAminoAcid(mol, lowerD ? "DPN" : "PHE", r1, r2, r3, info);
|
|
break;
|
|
case 'h':
|
|
CreateAminoAcid(mol, lowerD ? "DHI" : "HIS", r1, r2, r3, info);
|
|
break;
|
|
case 'i':
|
|
CreateAminoAcid(mol, lowerD ? "DIL" : "ILE", r1, r2, r3, info);
|
|
break;
|
|
case 'p':
|
|
CreateAminoAcid(mol, lowerD ? "DPR" : "PRO", r1, r2, r3, info);
|
|
break;
|
|
case 's':
|
|
CreateAminoAcid(mol, lowerD ? "DSN" : "SER", r1, r2, r3, info);
|
|
break;
|
|
case 't':
|
|
CreateAminoAcid(mol, lowerD ? "DTH" : "THR", r1, r2, r3, info);
|
|
break;
|
|
case 'v':
|
|
CreateAminoAcid(mol, lowerD ? "DVA" : "VAL", r1, r2, r3, info);
|
|
break;
|
|
case 'w':
|
|
CreateAminoAcid(mol, lowerD ? "DTR" : "TRP", r1, r2, r3, info);
|
|
break;
|
|
case 'y':
|
|
CreateAminoAcid(mol, lowerD ? "DTY" : "TYR", r1, r2, r3, info);
|
|
break;
|
|
}
|
|
if (prev && r1) CreateAABond(mol, prev, r1, 1);
|
|
prev = r2;
|
|
seq++;
|
|
}
|
|
|
|
if (prev) {
|
|
Atom *oxt = CreateAAAtom(mol, " OXT", info);
|
|
CreateAABond(mol, prev, oxt, 1);
|
|
}
|
|
|
|
if (sanitize) MolOps::sanitizeMol(*mol);
|
|
return mol;
|
|
}
|
|
|
|
RWMol *SequenceToMol(const std::string &seq, bool sanitize, bool lowerD) {
|
|
return SequenceToMol(seq.c_str(), sanitize, lowerD);
|
|
}
|
|
|
|
RWMol *FASTAToMol(const char *seq, bool sanitize, bool lowerD) {
|
|
if (!seq) return (RWMol *)0;
|
|
|
|
std::string title;
|
|
if (seq[0] == '>') {
|
|
seq++;
|
|
while (*seq && *seq != '\n' && *seq != '\r') title += *seq++;
|
|
}
|
|
RWMol *mol = SequenceToMol(seq, sanitize, lowerD);
|
|
if (!title.empty()) mol->setProp(common_properties::_Name, title);
|
|
return mol;
|
|
}
|
|
|
|
RWMol *FASTAToMol(const std::string &seq, bool sanitize, bool lowerD) {
|
|
return FASTAToMol(seq.c_str(), sanitize, lowerD);
|
|
}
|
|
|
|
struct HELMMonomer {
|
|
Atom *r1;
|
|
Atom *r2;
|
|
Atom *r3;
|
|
Atom *oxt;
|
|
|
|
HELMMonomer() : r1(0), r2(0), r3(0), oxt(0) {}
|
|
HELMMonomer(Atom *x, Atom *y, Atom *z) : r1(x), r2(y), r3(z), oxt(0) {}
|
|
};
|
|
|
|
static const char *GetHELMOneLetterCode(char ch) {
|
|
switch (ch) {
|
|
case 'A':
|
|
return "ALA";
|
|
case 'C':
|
|
return "CYS";
|
|
case 'D':
|
|
return "ASP";
|
|
case 'E':
|
|
return "GLU";
|
|
case 'F':
|
|
return "PHE";
|
|
case 'G':
|
|
return "GLY";
|
|
case 'H':
|
|
return "HIS";
|
|
case 'I':
|
|
return "ILE";
|
|
case 'K':
|
|
return "LYS";
|
|
case 'L':
|
|
return "LEU";
|
|
case 'M':
|
|
return "MET";
|
|
case 'N':
|
|
return "ASN";
|
|
case 'P':
|
|
return "PRO";
|
|
case 'Q':
|
|
return "GLN";
|
|
case 'R':
|
|
return "ARG";
|
|
case 'S':
|
|
return "SER";
|
|
case 'T':
|
|
return "THR";
|
|
case 'V':
|
|
return "VAL";
|
|
case 'W':
|
|
return "TRP";
|
|
case 'Y':
|
|
return "TYR";
|
|
}
|
|
return (char *)0;
|
|
}
|
|
|
|
static bool IsHELMMonomerIDChar(char ch) {
|
|
if (ch >= 'A' && ch <= 'Z') return true;
|
|
if (ch >= 'a' && ch <= 'z') return true;
|
|
if (ch >= '0' && ch <= '9') return true;
|
|
return false;
|
|
}
|
|
|
|
static const char *LookupHELMPeptideMonomer(const char *ptr) {
|
|
switch (ptr[0]) {
|
|
case 'A':
|
|
if (ptr[1] == '\0') return "ALA";
|
|
if (ptr[1] == 'b' && ptr[2] == 'u' && ptr[3] == '\0') return "ABA";
|
|
break;
|
|
case 'C':
|
|
if (ptr[1] == '\0') return "CYS";
|
|
break;
|
|
case 'D':
|
|
if (ptr[1] == '\0') return "ASP";
|
|
case 'E':
|
|
if (ptr[1] == '\0') return "L-Glu";
|
|
break;
|
|
case 'F':
|
|
if (ptr[1] == '\0') return "L-Phe";
|
|
break;
|
|
case 'G':
|
|
if (ptr[1] == '\0') return "GLY";
|
|
if (ptr[1] == 'l' && ptr[2] == 'p' && ptr[3] == '\0') return "PCA";
|
|
break;
|
|
case 'H':
|
|
if (ptr[1] == '\0') return "HIS";
|
|
break;
|
|
case 'I':
|
|
if (ptr[1] == '\0') return "ILE";
|
|
break;
|
|
case 'K':
|
|
if (ptr[1] == '\0') return "LYS";
|
|
break;
|
|
case 'L':
|
|
if (ptr[1] == '\0') return "LEU";
|
|
break;
|
|
case 'M':
|
|
if (ptr[1] == '\0') return "MET";
|
|
break;
|
|
case 'N':
|
|
if (ptr[1] == '\0') return "ASN";
|
|
if (ptr[1] == 'a' && ptr[2] == 'l' && ptr[3] == '\0') return "NAL";
|
|
if (ptr[1] == 'l' && ptr[2] == 'e' && ptr[3] == '\0') return "NLE";
|
|
if (ptr[1] == 'v' && ptr[2] == 'a' && ptr[3] == '\0') return "NVA";
|
|
break;
|
|
case 'O':
|
|
if (ptr[1] == 'r' && ptr[2] == 'n' && ptr[3] == '\0') return "ORN";
|
|
break;
|
|
case 'P':
|
|
if (ptr[1] == '\0') return "PRO";
|
|
break;
|
|
case 'Q':
|
|
if (ptr[1] == '\0') return "GLN";
|
|
break;
|
|
case 'R':
|
|
if (ptr[1] == '\0') return "ARG";
|
|
break;
|
|
case 'S':
|
|
if (ptr[1] == '\0') return "SER";
|
|
if (ptr[1] == 'a' && ptr[2] == 'r' && ptr[3] == '\0') return "SAR";
|
|
break;
|
|
case 'T':
|
|
if (ptr[1] == '\0') return "THR";
|
|
break;
|
|
case 'V':
|
|
if (ptr[1] == '\0') return "VAL";
|
|
break;
|
|
case 'W':
|
|
if (ptr[1] == '\0') return "TRP";
|
|
break;
|
|
case 'Y':
|
|
if (ptr[1] == '\0') return "TYR";
|
|
break;
|
|
case 'd':
|
|
switch (ptr[1]) {
|
|
case 'A':
|
|
if (ptr[2] == '\0') return "DAL";
|
|
break;
|
|
case 'C':
|
|
if (ptr[2] == '\0') return "DCY";
|
|
break;
|
|
case 'D':
|
|
if (ptr[2] == '\0') return "DAS";
|
|
break;
|
|
case 'E':
|
|
if (ptr[2] == '\0') return "DGL";
|
|
break;
|
|
case 'F':
|
|
if (ptr[2] == '\0') return "DPN";
|
|
break;
|
|
case 'H':
|
|
if (ptr[2] == '\0') return "DHI";
|
|
break;
|
|
case 'I':
|
|
if (ptr[2] == '\0') return "DIL";
|
|
break;
|
|
case 'K':
|
|
if (ptr[2] == '\0') return "DLY";
|
|
break;
|
|
case 'L':
|
|
if (ptr[2] == '\0') return "DLE";
|
|
break;
|
|
case 'M':
|
|
if (ptr[2] == '\0') return "MED";
|
|
break;
|
|
case 'N':
|
|
if (ptr[2] == '\0') return "DSG";
|
|
break;
|
|
case 'P':
|
|
if (ptr[2] == '\0') return "DPR";
|
|
break;
|
|
case 'Q':
|
|
if (ptr[2] == '\0') return "DGN";
|
|
break;
|
|
case 'R':
|
|
if (ptr[2] == '\0') return "DAR";
|
|
break;
|
|
case 'S':
|
|
if (ptr[2] == '\0') return "DSN";
|
|
break;
|
|
case 'T':
|
|
if (ptr[2] == '\0') return "DTH";
|
|
break;
|
|
case 'V':
|
|
if (ptr[2] == '\0') return "DVA";
|
|
break;
|
|
case 'W':
|
|
if (ptr[2] == '\0') return "DTR";
|
|
break;
|
|
case 'Y':
|
|
if (ptr[2] == '\0') return "DTY";
|
|
break;
|
|
}
|
|
break;
|
|
case 's':
|
|
if (ptr[1] == 'e' && ptr[2] == 'C' && ptr[3] == '\0') return "MSE";
|
|
break;
|
|
}
|
|
return (const char *)0;
|
|
}
|
|
|
|
static const char *ParseHELMPeptide(RWMol *mol, const char *ptr,
|
|
const char *chain,
|
|
std::vector<HELMMonomer> &vseq) {
|
|
unsigned int len = 0;
|
|
HELMMonomer curr;
|
|
|
|
vseq.clear();
|
|
if (ptr[0] == '}') return ptr;
|
|
|
|
AtomPDBResidueInfo info;
|
|
info.setSerialNumber(1);
|
|
info.setAltLoc(" ");
|
|
info.setResidueNumber(0);
|
|
info.setInsertionCode(" ");
|
|
info.setChainId(chain);
|
|
|
|
if (ptr[0] == '[' && ptr[1] == 'a' && ptr[2] == 'c' && ptr[3] == ']') {
|
|
if (ptr[4] != '.') return (const char *)0;
|
|
info.setResidueNumber(-2);
|
|
CreateAminoAcid(mol, "ACE", curr.r1, curr.r2, curr.r3, info);
|
|
vseq.push_back(curr);
|
|
info.setResidueNumber(0);
|
|
ptr += 5;
|
|
len = 1;
|
|
}
|
|
|
|
for (;;) {
|
|
const char *name = 0;
|
|
if (*ptr == '[') {
|
|
std::string tmp;
|
|
ptr++;
|
|
while (IsHELMMonomerIDChar(*ptr)) tmp += *ptr++;
|
|
if (*ptr != ']') return (char *)0;
|
|
name = LookupHELMPeptideMonomer(tmp.c_str());
|
|
} else
|
|
name = GetHELMOneLetterCode(*ptr);
|
|
if (!name) return (const char *)0;
|
|
ptr++;
|
|
|
|
CreateAminoAcid(mol, name, curr.r1, curr.r2, curr.r3, info);
|
|
if (len && vseq[len - 1].r2 && curr.r1) {
|
|
CreateAABond(mol, vseq[len - 1].r2, curr.r1, 1);
|
|
vseq[len - 1].r2 = 0;
|
|
}
|
|
vseq.push_back(curr);
|
|
len++;
|
|
|
|
if (*ptr == '.') {
|
|
if (ptr[1] == '[' && ptr[2] == 'a' && ptr[3] == 'm' && ptr[4] == ']' &&
|
|
ptr[5] == '}') {
|
|
if (!vseq[len - 1].r2) return (const char *)0;
|
|
int resno = info.getResidueNumber();
|
|
info.setResidueNumber(resno + 1);
|
|
info.setIsHeteroAtom(true);
|
|
info.setResidueName("NH2");
|
|
Atom *n = CreateAAAtom(mol, " N ", info);
|
|
CreateAABond(mol, vseq[len - 1].r2, n, 1);
|
|
vseq[len - 1].r2 = (Atom *)0;
|
|
vseq.push_back(HELMMonomer());
|
|
len++;
|
|
return ptr + 5;
|
|
}
|
|
ptr++;
|
|
} else if (*ptr == '}') {
|
|
if (!vseq[len - 1].r2) return (const char *)0;
|
|
Atom *oxt = CreateAAAtom(mol, " OXT", info);
|
|
CreateAABond(mol, vseq[len - 1].r2, oxt, 1);
|
|
vseq[len - 1].oxt = oxt;
|
|
return ptr;
|
|
} else
|
|
return (const char *)0;
|
|
}
|
|
}
|
|
|
|
static bool ParseHELM(RWMol *mol, const char *ptr) {
|
|
std::map<std::string, std::vector<HELMMonomer> > seqs;
|
|
const char *orig;
|
|
char chain[2];
|
|
chain[0] = 'A';
|
|
chain[1] = '\0';
|
|
|
|
for (;;) {
|
|
orig = ptr;
|
|
if (ptr[0] == 'P' && ptr[1] == 'E' && ptr[2] == 'P' && ptr[3] == 'T' &&
|
|
ptr[4] == 'I' && ptr[5] == 'D' && ptr[6] == 'E' && ptr[7] >= '1' &&
|
|
ptr[7] <= '9') {
|
|
ptr += 8;
|
|
while (*ptr >= '0' && *ptr <= '9') ptr++;
|
|
if (*ptr != '{') return false;
|
|
std::string id(orig, ptr - orig);
|
|
chain[0] = 'A' + (orig[7] - '1');
|
|
ptr = ParseHELMPeptide(mol, ptr + 1, chain, seqs[id]);
|
|
if (!ptr || *ptr != '}') return false;
|
|
ptr++;
|
|
} else
|
|
return false;
|
|
|
|
if (*ptr == '$') break;
|
|
if (*ptr == '\0') return true;
|
|
if (*ptr != '|') return false;
|
|
ptr++;
|
|
}
|
|
ptr++;
|
|
|
|
if (ptr[0] == '$' && ptr[1] == '$' && ptr[2] == '$') return true;
|
|
|
|
for (;;) {
|
|
orig = ptr;
|
|
if (ptr[0] == 'P' && ptr[1] == 'E' && ptr[2] == 'P' && ptr[3] == 'T' &&
|
|
ptr[4] == 'I' && ptr[5] == 'D' && ptr[6] == 'E' && ptr[7] >= '1' &&
|
|
ptr[7] <= '9') {
|
|
ptr += 8;
|
|
} else
|
|
return false;
|
|
while (*ptr >= '0' && *ptr <= '9') ptr++;
|
|
if (*ptr != ',') return false;
|
|
|
|
std::string id1(orig, ptr - orig);
|
|
ptr++;
|
|
|
|
orig = ptr;
|
|
if (ptr[0] == 'P' && ptr[1] == 'E' && ptr[2] == 'P' && ptr[3] == 'T' &&
|
|
ptr[4] == 'I' && ptr[5] == 'D' && ptr[6] == 'E' && ptr[7] >= '1' &&
|
|
ptr[7] <= '9') {
|
|
ptr += 8;
|
|
} else
|
|
return false;
|
|
while (*ptr >= '0' && *ptr <= '9') ptr++;
|
|
if (*ptr != ',') return false;
|
|
|
|
std::string id2(orig, ptr - orig);
|
|
ptr++;
|
|
|
|
unsigned int res1;
|
|
unsigned int res2;
|
|
unsigned int res1r;
|
|
unsigned int res2r;
|
|
|
|
if (*ptr >= '1' && *ptr <= '9') {
|
|
res1 = (*ptr++) - '0';
|
|
while (*ptr >= '0' && *ptr <= '9') res1 = 10 * res1 + ((*ptr++) - '0');
|
|
} else
|
|
return false;
|
|
if (ptr[0] == ':' && ptr[1] == 'R' && ptr[2] >= '1' && ptr[2] <= '9') {
|
|
res1r = ptr[2] - '0';
|
|
ptr += 3;
|
|
} else
|
|
return false;
|
|
if (*ptr != '-') return false;
|
|
ptr++;
|
|
|
|
if (*ptr >= '1' && *ptr <= '9') {
|
|
res2 = (*ptr++) - '0';
|
|
while (*ptr >= '0' && *ptr <= '9') res2 = 10 * res2 + ((*ptr++) - '0');
|
|
} else
|
|
return false;
|
|
if (ptr[0] == ':' && ptr[1] == 'R' && ptr[2] >= '1' && ptr[2] <= '9') {
|
|
res2r = ptr[2] - '0';
|
|
ptr += 3;
|
|
} else
|
|
return false;
|
|
|
|
// printf("%s:%u:R%u - %s:%u:R%u\n",id1.c_str(),res1,res1r,
|
|
// id2.c_str(),res2,res2r);
|
|
|
|
if (res1 < 1 || res2 < 1) return false;
|
|
if (seqs.find(id1) == seqs.end() || seqs.find(id2) == seqs.end())
|
|
return false;
|
|
std::vector<HELMMonomer> *vseq1 = &seqs[id1];
|
|
if (res1 > (unsigned int)vseq1->size()) return false;
|
|
std::vector<HELMMonomer> *vseq2 = &seqs[id2];
|
|
if (res2 > (unsigned int)vseq2->size()) return false;
|
|
|
|
if (res1r == 3 && res2r == 3) {
|
|
Atom *src = (*vseq1)[res1 - 1].r3;
|
|
Atom *dst = (*vseq2)[res2 - 1].r3;
|
|
if (src && dst && src != dst) {
|
|
CreateAABond(mol, src, dst, 1);
|
|
(*vseq1)[res1 - 1].r3 = (Atom *)0;
|
|
(*vseq2)[res2 - 1].r3 = (Atom *)0;
|
|
} else
|
|
return false;
|
|
} else if (res1r == 1 && res2r == 2) {
|
|
Atom *src = (*vseq1)[res1 - 1].r1;
|
|
Atom *dst = (*vseq2)[res2 - 1].r2;
|
|
Atom *oxt = (*vseq2)[res2 - 1].oxt;
|
|
if (src && dst && oxt && src != dst) {
|
|
mol->removeAtom(oxt);
|
|
CreateAABond(mol, src, dst, 1);
|
|
(*vseq1)[res1 - 1].r1 = (Atom *)0;
|
|
(*vseq2)[res2 - 1].r2 = (Atom *)0;
|
|
} else
|
|
return false;
|
|
} else if (res1r == 2 && res2r == 1) {
|
|
Atom *src = (*vseq2)[res2 - 1].r1;
|
|
Atom *dst = (*vseq1)[res1 - 1].r2;
|
|
Atom *oxt = (*vseq1)[res1 - 1].oxt;
|
|
if (src && dst && oxt && src != dst) {
|
|
mol->removeAtom(oxt);
|
|
CreateAABond(mol, dst, src, 1);
|
|
(*vseq1)[res1 - 1].r2 = (Atom *)0;
|
|
(*vseq2)[res2 - 1].r1 = (Atom *)0;
|
|
} else
|
|
return false;
|
|
} else
|
|
return false;
|
|
|
|
if (*ptr == '$') break;
|
|
if (*ptr != '|') return false;
|
|
ptr++;
|
|
}
|
|
ptr++;
|
|
return ptr[0] == '$' && ptr[1] == '$';
|
|
}
|
|
|
|
RWMol *HELMToMol(const char *helm, bool sanitize) {
|
|
RWMol *mol = new RWMol();
|
|
|
|
const char *ptr = helm;
|
|
if (ptr[0] == '$' && ptr[1] == '$' && ptr[2] == '$' && ptr[3] == '$')
|
|
return mol;
|
|
|
|
if (ParseHELM(mol, ptr)) {
|
|
if (sanitize) MolOps::sanitizeMol(*mol);
|
|
return mol;
|
|
}
|
|
delete mol;
|
|
return (RWMol *)0;
|
|
}
|
|
|
|
RWMol *HELMToMol(const std::string &helm, bool sanitize) {
|
|
return HELMToMol(helm.c_str(), sanitize);
|
|
}
|
|
|
|
} // namespace RDKit
|