// // Copyright (C) 2015,2016 Greg Landrum and NextMove Software // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include #include #include #include #include "SequenceParsers.h" #include #include #include namespace RDKit { static Atom *CreateAAAtom(RWMol *mol, const char *name, AtomPDBResidueInfo &info) { Atom *atom = (Atom *)nullptr; if (name[0] == ' ' && name[1] == 'C') { atom = new Atom(6); } else if (name[0] == ' ' && name[1] == 'N') { atom = new Atom(7); } else if (name[0] == ' ' && name[1] == 'O') { atom = new Atom(8); } else if (name[0] == ' ' && name[1] == 'P') { atom = new Atom(15); } else if (name[0] == ' ' && name[1] == 'S') { atom = new Atom(16); } else if (name[0] == 'S' && name[1] == 'E') { atom = new Atom(34); } else atom = new Atom(0); mol->addAtom(atom, true, true); AtomPDBResidueInfo *copy = (AtomPDBResidueInfo *)info.copy(); copy->setName(name); atom->setMonomerInfo(copy); unsigned int serno = info.getSerialNumber(); info.setSerialNumber(serno + 1); return atom; } static void CreateAABond(RWMol *mol, Atom *beg, Atom *end, unsigned int order) { Bond *bond; if (order == 2) bond = new Bond(Bond::DOUBLE); else bond = new Bond(Bond::SINGLE); bond->setOwningMol(mol); bond->setBeginAtom(beg); bond->setEndAtom(end); mol->addBond(bond, true); } static void CreateAABackbone(RWMol *mol, Atom *&r1, Atom *&r2, Atom *&cb, AtomPDBResidueInfo &info, int ldstereo) { r1 = CreateAAAtom(mol, " N ", info); Atom *ca = CreateAAAtom(mol, " CA ", info); r2 = CreateAAAtom(mol, " C ", info); Atom *o = CreateAAAtom(mol, " O ", info); cb = CreateAAAtom(mol, " CB ", info); CreateAABond(mol, r1, ca, 1); CreateAABond(mol, ca, r2, 1); CreateAABond(mol, r2, o, 2); CreateAABond(mol, ca, cb, 1); if (ldstereo > 0) // L-stereo ca->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW); else if (ldstereo < 0) // D-stereo ca->setChiralTag(Atom::CHI_TETRAHEDRAL_CW); } // aa is a three letter PDB residue code static void CreateAminoAcid(RWMol *mol, const char *aa, Atom *&r1, Atom *&r2, Atom *&r3, AtomPDBResidueInfo &info) { Atom *atom[10]; r1 = (Atom *)nullptr; r2 = (Atom *)nullptr; r3 = (Atom *)nullptr; int resno = info.getResidueNumber(); info.setResidueNumber(resno + 1); info.setIsHeteroAtom(false); info.setResidueName(aa); // Standard amino acids before non-standard, in PDB code alphabetical order switch (aa[0]) { case 'A': if (!strcmp(aa, "ALA")) { CreateAABackbone(mol, r1, r2, atom[0], info, 1); } else if (!strcmp(aa, "ARG")) { CreateAABackbone(mol, r1, r2, atom[0], info, 1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " CD ", info); atom[3] = CreateAAAtom(mol, " NE ", info); atom[4] = CreateAAAtom(mol, " CZ ", info); atom[5] = CreateAAAtom(mol, " NH1", info); atom[6] = CreateAAAtom(mol, " NH2", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 1); CreateAABond(mol, atom[2], atom[3], 1); CreateAABond(mol, atom[3], atom[4], 1); CreateAABond(mol, atom[4], atom[5], 2); CreateAABond(mol, atom[4], atom[6], 1); } else if (!strcmp(aa, "ASP")) { CreateAABackbone(mol, r1, r2, atom[0], info, 1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " OD1", info); atom[3] = CreateAAAtom(mol, " OD2", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 2); CreateAABond(mol, atom[1], atom[3], 1); } else if (!strcmp(aa, "ASN")) { CreateAABackbone(mol, r1, r2, atom[0], info, 1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " OD1", info); atom[3] = CreateAAAtom(mol, " ND2", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 2); CreateAABond(mol, atom[1], atom[3], 1); } else if (!strcmp(aa, "ABA")) { info.setIsHeteroAtom(true); CreateAABackbone(mol, r1, r2, atom[0], info, 1); atom[1] = CreateAAAtom(mol, " CG ", info); CreateAABond(mol, atom[0], atom[1], 1); } else if (!strcmp(aa, "ACE")) { info.setIsHeteroAtom(true); r2 = CreateAAAtom(mol, " C ", info); atom[0] = CreateAAAtom(mol, " O ", info); atom[1] = CreateAAAtom(mol, " CH3", info); CreateAABond(mol, r2, atom[1], 1); CreateAABond(mol, r2, atom[0], 2); } break; case 'C': if (!strcmp(aa, "CYS")) { CreateAABackbone(mol, r1, r2, atom[0], info, 1); r3 = CreateAAAtom(mol, " SG ", info); CreateAABond(mol, atom[0], r3, 1); } break; case 'D': if (!strcmp(aa, "DAL")) { info.setIsHeteroAtom(true); CreateAABackbone(mol, r1, r2, atom[0], info, -1); } else if (!strcmp(aa, "DAR")) { CreateAABackbone(mol, r1, r2, atom[0], info, -1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " CD ", info); atom[3] = CreateAAAtom(mol, " NE ", info); atom[4] = CreateAAAtom(mol, " CZ ", info); atom[5] = CreateAAAtom(mol, " NH1", info); atom[6] = CreateAAAtom(mol, " NH2", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 1); CreateAABond(mol, atom[2], atom[3], 1); CreateAABond(mol, atom[3], atom[4], 1); CreateAABond(mol, atom[4], atom[5], 2); CreateAABond(mol, atom[4], atom[6], 1); } else if (!strcmp(aa, "DAS")) { CreateAABackbone(mol, r1, r2, atom[0], info, -1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " OD1", info); atom[3] = CreateAAAtom(mol, " OD2", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 2); CreateAABond(mol, atom[1], atom[3], 1); } else if (!strcmp(aa, "DBB")) { info.setIsHeteroAtom(true); CreateAABackbone(mol, r1, r2, atom[0], info, 1); atom[1] = CreateAAAtom(mol, " CG ", info); CreateAABond(mol, atom[0], atom[1], 1); } else if (!strcmp(aa, "DCY")) { info.setIsHeteroAtom(true); CreateAABackbone(mol, r1, r2, atom[0], info, -1); r3 = CreateAAAtom(mol, " SG ", info); CreateAABond(mol, atom[0], r3, 1); } else if (!strcmp(aa, "DGL")) { CreateAABackbone(mol, r1, r2, atom[0], info, -1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " CD ", info); atom[3] = CreateAAAtom(mol, " OE1", info); atom[4] = CreateAAAtom(mol, " OE2", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 1); CreateAABond(mol, atom[2], atom[3], 2); CreateAABond(mol, atom[2], atom[4], 1); } else if (!strcmp(aa, "DGN")) { CreateAABackbone(mol, r1, r2, atom[0], info, -1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " CD ", info); atom[3] = CreateAAAtom(mol, " OE1", info); atom[4] = CreateAAAtom(mol, " NE2", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 1); CreateAABond(mol, atom[2], atom[3], 2); CreateAABond(mol, atom[2], atom[4], 1); } else if (!strcmp(aa, "DHI")) { info.setIsHeteroAtom(true); CreateAABackbone(mol, r1, r2, atom[0], info, -1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " ND1", info); atom[3] = CreateAAAtom(mol, " CD2", info); atom[4] = CreateAAAtom(mol, " CE1", info); atom[5] = CreateAAAtom(mol, " NE2", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 1); CreateAABond(mol, atom[2], atom[4], 2); CreateAABond(mol, atom[4], atom[5], 1); CreateAABond(mol, atom[5], atom[3], 1); CreateAABond(mol, atom[3], atom[1], 2); } else if (!strcmp(aa, "DIL")) { CreateAABackbone(mol, r1, r2, atom[0], info, -1); atom[1] = CreateAAAtom(mol, " CG1", info); atom[2] = CreateAAAtom(mol, " CG2", info); atom[3] = CreateAAAtom(mol, " CD1", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[0], atom[2], 1); CreateAABond(mol, atom[2], atom[3], 1); atom[0]->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW); } else if (!strcmp(aa, "DLE")) { CreateAABackbone(mol, r1, r2, atom[0], info, -1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " CD1", info); atom[3] = CreateAAAtom(mol, " CD2", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 1); CreateAABond(mol, atom[1], atom[3], 1); } else if (!strcmp(aa, "DLY")) { CreateAABackbone(mol, r1, r2, atom[0], info, -1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " CD ", info); atom[3] = CreateAAAtom(mol, " CE ", info); atom[4] = CreateAAAtom(mol, " NZ ", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 1); CreateAABond(mol, atom[2], atom[3], 1); CreateAABond(mol, atom[3], atom[4], 1); } else if (!strcmp(aa, "DPN")) { info.setIsHeteroAtom(true); CreateAABackbone(mol, r1, r2, atom[0], info, -1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " CD1", info); atom[3] = CreateAAAtom(mol, " CD2", info); atom[4] = CreateAAAtom(mol, " CE1", info); atom[5] = CreateAAAtom(mol, " CE2", info); atom[6] = CreateAAAtom(mol, " CZ ", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 2); CreateAABond(mol, atom[2], atom[4], 1); CreateAABond(mol, atom[4], atom[6], 2); CreateAABond(mol, atom[6], atom[5], 1); CreateAABond(mol, atom[5], atom[3], 2); CreateAABond(mol, atom[3], atom[1], 1); } else if (!strcmp(aa, "DPR")) { CreateAABackbone(mol, r1, r2, atom[0], info, -1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " CD ", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 1); CreateAABond(mol, atom[2], r1, 1); } else if (!strcmp(aa, "DSG")) { CreateAABackbone(mol, r1, r2, atom[0], info, -1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " OD1", info); atom[3] = CreateAAAtom(mol, " ND2", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 2); CreateAABond(mol, atom[1], atom[3], 1); } else if (!strcmp(aa, "DTH")) { CreateAABackbone(mol, r1, r2, atom[0], info, -1); atom[1] = CreateAAAtom(mol, " OG1", info); atom[2] = CreateAAAtom(mol, " CG2", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[0], atom[2], 1); atom[0]->setChiralTag(Atom::CHI_TETRAHEDRAL_CW); } else if (!strcmp(aa, "DTR")) { CreateAABackbone(mol, r1, r2, atom[0], info, -1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " CD1", info); atom[3] = CreateAAAtom(mol, " CD2", info); atom[4] = CreateAAAtom(mol, " NE1", info); atom[5] = CreateAAAtom(mol, " CE2", info); atom[6] = CreateAAAtom(mol, " CE3", info); atom[7] = CreateAAAtom(mol, " CZ2", info); atom[8] = CreateAAAtom(mol, " CZ3", info); atom[9] = CreateAAAtom(mol, " CH2", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 2); CreateAABond(mol, atom[1], atom[3], 1); CreateAABond(mol, atom[2], atom[4], 1); CreateAABond(mol, atom[3], atom[5], 2); CreateAABond(mol, atom[3], atom[6], 1); CreateAABond(mol, atom[4], atom[5], 1); CreateAABond(mol, atom[5], atom[7], 1); CreateAABond(mol, atom[6], atom[8], 2); CreateAABond(mol, atom[7], atom[9], 2); CreateAABond(mol, atom[8], atom[9], 1); } else if (!strcmp(aa, "DVA")) { info.setIsHeteroAtom(true); CreateAABackbone(mol, r1, r2, atom[0], info, -1); atom[1] = CreateAAAtom(mol, " CG1", info); atom[2] = CreateAAAtom(mol, " CG2", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[0], atom[2], 1); } break; case 'G': if (!strcmp(aa, "GLN")) { CreateAABackbone(mol, r1, r2, atom[0], info, 1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " CD ", info); atom[3] = CreateAAAtom(mol, " OE1", info); atom[4] = CreateAAAtom(mol, " NE2", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 1); CreateAABond(mol, atom[2], atom[3], 2); CreateAABond(mol, atom[2], atom[4], 1); } else if (!strcmp(aa, "GLU")) { CreateAABackbone(mol, r1, r2, atom[0], info, 1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " CD ", info); atom[3] = CreateAAAtom(mol, " OE1", info); atom[4] = CreateAAAtom(mol, " OE2", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 1); CreateAABond(mol, atom[2], atom[3], 2); CreateAABond(mol, atom[2], atom[4], 1); } else if (!strcmp(aa, "GLY")) { r1 = CreateAAAtom(mol, " N ", info); atom[0] = CreateAAAtom(mol, " CA ", info); r2 = CreateAAAtom(mol, " C ", info); atom[1] = CreateAAAtom(mol, " O ", info); CreateAABond(mol, r1, atom[0], 1); CreateAABond(mol, atom[0], r2, 1); CreateAABond(mol, r2, atom[1], 2); } break; case 'H': if (!strcmp(aa, "HIS")) { CreateAABackbone(mol, r1, r2, atom[0], info, 1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " ND1", info); atom[3] = CreateAAAtom(mol, " CD2", info); atom[4] = CreateAAAtom(mol, " CE1", info); atom[5] = CreateAAAtom(mol, " NE2", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 1); CreateAABond(mol, atom[2], atom[4], 2); CreateAABond(mol, atom[4], atom[5], 1); CreateAABond(mol, atom[5], atom[3], 1); CreateAABond(mol, atom[3], atom[1], 2); } break; case 'I': if (!strcmp(aa, "ILE")) { CreateAABackbone(mol, r1, r2, atom[0], info, 1); atom[1] = CreateAAAtom(mol, " CG1", info); atom[2] = CreateAAAtom(mol, " CG2", info); atom[3] = CreateAAAtom(mol, " CD1", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[0], atom[2], 1); CreateAABond(mol, atom[2], atom[3], 1); atom[0]->setChiralTag(Atom::CHI_TETRAHEDRAL_CW); } break; case 'L': if (!strcmp(aa, "LEU")) { CreateAABackbone(mol, r1, r2, atom[0], info, 1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " CD1", info); atom[3] = CreateAAAtom(mol, " CD2", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 1); CreateAABond(mol, atom[1], atom[3], 1); } else if (!strcmp(aa, "LYS")) { CreateAABackbone(mol, r1, r2, atom[0], info, 1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " CD ", info); atom[3] = CreateAAAtom(mol, " CE ", info); atom[4] = CreateAAAtom(mol, " NZ ", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 1); CreateAABond(mol, atom[2], atom[3], 1); CreateAABond(mol, atom[3], atom[4], 1); } break; case 'M': if (!strcmp(aa, "MET")) { CreateAABackbone(mol, r1, r2, atom[0], info, 1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " SD ", info); atom[3] = CreateAAAtom(mol, " CE ", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 1); CreateAABond(mol, atom[2], atom[3], 1); } else if (!strcmp(aa, "MED")) { info.setIsHeteroAtom(true); CreateAABackbone(mol, r1, r2, atom[0], info, -1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " SD ", info); atom[3] = CreateAAAtom(mol, " CE ", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 1); CreateAABond(mol, atom[2], atom[3], 1); } else if (!strcmp(aa, "MSE")) { info.setIsHeteroAtom(true); CreateAABackbone(mol, r1, r2, atom[0], info, 1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, "SE ", info); atom[3] = CreateAAAtom(mol, " CE ", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 1); CreateAABond(mol, atom[2], atom[3], 1); } break; case 'N': if (!strcmp(aa, "NLE")) { info.setIsHeteroAtom(true); CreateAABackbone(mol, r1, r2, atom[0], info, 1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " CD ", info); atom[3] = CreateAAAtom(mol, " CE ", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 1); CreateAABond(mol, atom[2], atom[3], 1); } else if (!strcmp(aa, "NVA")) { info.setIsHeteroAtom(true); CreateAABackbone(mol, r1, r2, atom[0], info, 1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " CD ", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 1); } break; case 'O': if (!strcmp(aa, "ORN")) { info.setIsHeteroAtom(true); CreateAABackbone(mol, r1, r2, atom[0], info, 1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " CD ", info); atom[3] = CreateAAAtom(mol, " NE ", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 1); CreateAABond(mol, atom[2], atom[3], 1); } break; case 'P': if (!strcmp(aa, "PHE")) { CreateAABackbone(mol, r1, r2, atom[0], info, 1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " CD1", info); atom[3] = CreateAAAtom(mol, " CD2", info); atom[4] = CreateAAAtom(mol, " CE1", info); atom[5] = CreateAAAtom(mol, " CE2", info); atom[6] = CreateAAAtom(mol, " CZ ", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 2); CreateAABond(mol, atom[2], atom[4], 1); CreateAABond(mol, atom[4], atom[6], 2); CreateAABond(mol, atom[6], atom[5], 1); CreateAABond(mol, atom[5], atom[3], 2); CreateAABond(mol, atom[3], atom[1], 1); } else if (!strcmp(aa, "PRO")) { CreateAABackbone(mol, r1, r2, atom[0], info, 1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " CD ", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 1); CreateAABond(mol, atom[2], r1, 1); } else if (!strcmp(aa, "PCA")) { info.setIsHeteroAtom(true); CreateAABackbone(mol, r1, r2, atom[0], info, 1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " CD ", info); atom[3] = CreateAAAtom(mol, " OE ", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 1); CreateAABond(mol, atom[2], r1, 1); CreateAABond(mol, atom[2], atom[3], 2); } break; case 'S': if (!strcmp(aa, "SER")) { CreateAABackbone(mol, r1, r2, atom[0], info, 1); atom[1] = CreateAAAtom(mol, " OG ", info); CreateAABond(mol, atom[0], atom[1], 1); } else if (!strcmp(aa, "SAR")) { info.setIsHeteroAtom(true); r1 = CreateAAAtom(mol, " N ", info); atom[0] = CreateAAAtom(mol, " CA ", info); r2 = CreateAAAtom(mol, " C ", info); atom[1] = CreateAAAtom(mol, " O ", info); atom[2] = CreateAAAtom(mol, " CN ", info); CreateAABond(mol, r1, atom[0], 1); CreateAABond(mol, atom[0], r2, 1); CreateAABond(mol, r2, atom[1], 2); CreateAABond(mol, r1, atom[2], 1); } break; case 'T': if (!strcmp(aa, "THR")) { CreateAABackbone(mol, r1, r2, atom[0], info, 1); atom[1] = CreateAAAtom(mol, " OG1", info); atom[2] = CreateAAAtom(mol, " CG2", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[0], atom[2], 1); atom[0]->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW); } else if (!strcmp(aa, "TRP")) { CreateAABackbone(mol, r1, r2, atom[0], info, 1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " CD1", info); atom[3] = CreateAAAtom(mol, " CD2", info); atom[4] = CreateAAAtom(mol, " NE1", info); atom[5] = CreateAAAtom(mol, " CE2", info); atom[6] = CreateAAAtom(mol, " CE3", info); atom[7] = CreateAAAtom(mol, " CZ2", info); atom[8] = CreateAAAtom(mol, " CZ3", info); atom[9] = CreateAAAtom(mol, " CH2", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 2); CreateAABond(mol, atom[1], atom[3], 1); CreateAABond(mol, atom[2], atom[4], 1); CreateAABond(mol, atom[3], atom[5], 2); CreateAABond(mol, atom[3], atom[6], 1); CreateAABond(mol, atom[4], atom[5], 1); CreateAABond(mol, atom[5], atom[7], 1); CreateAABond(mol, atom[6], atom[8], 2); CreateAABond(mol, atom[7], atom[9], 2); CreateAABond(mol, atom[8], atom[9], 1); } else if (!strcmp(aa, "TYR")) { CreateAABackbone(mol, r1, r2, atom[0], info, 1); atom[1] = CreateAAAtom(mol, " CG ", info); atom[2] = CreateAAAtom(mol, " CD1", info); atom[3] = CreateAAAtom(mol, " CD2", info); atom[4] = CreateAAAtom(mol, " CE1", info); atom[5] = CreateAAAtom(mol, " CE2", info); atom[6] = CreateAAAtom(mol, " CZ ", info); atom[7] = CreateAAAtom(mol, " OH ", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 2); CreateAABond(mol, atom[2], atom[4], 1); CreateAABond(mol, atom[4], atom[6], 2); CreateAABond(mol, atom[6], atom[5], 1); CreateAABond(mol, atom[5], atom[3], 2); CreateAABond(mol, atom[3], atom[1], 1); CreateAABond(mol, atom[6], atom[7], 1); } break; case 'V': if (!strcmp(aa, "VAL")) { CreateAABackbone(mol, r1, r2, atom[0], info, 1); atom[1] = CreateAAAtom(mol, " CG1", info); atom[2] = CreateAAAtom(mol, " CG2", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[0], atom[2], 1); } break; } } static RWMol *AASequenceToMol(const char *seq, bool lowerD) { AtomPDBResidueInfo info; Atom *prev = (Atom *)nullptr; char chain[2]; chain[0] = 'A'; chain[1] = '\0'; info.setSerialNumber(1); info.setAltLoc(" "); info.setResidueNumber(0); info.setInsertionCode(" "); info.setChainId(chain); auto *mol = new RWMol(); while (*seq) { Atom *r1 = (Atom *)nullptr; Atom *r2 = (Atom *)nullptr; Atom *r3 = (Atom *)nullptr; switch (*seq) { case '\n': case '\r': case '-': seq++; continue; case ' ': case '\t': case '\0': break; case '.': if (prev) { Atom *oxt = CreateAAAtom(mol, " OXT", info); CreateAABond(mol, prev, oxt, 1); if (chain[0] < 'Z') { chain[0]++; info.setChainId(chain); } info.setResidueNumber(0); prev = (Atom *)nullptr; } seq++; continue; default: delete mol; return (RWMol *)nullptr; case 'A': CreateAminoAcid(mol, "ALA", r1, r2, r3, info); break; case 'C': CreateAminoAcid(mol, "CYS", r1, r2, r3, info); break; case 'D': CreateAminoAcid(mol, "ASP", r1, r2, r3, info); break; case 'E': CreateAminoAcid(mol, "GLU", r1, r2, r3, info); break; case 'F': CreateAminoAcid(mol, "PHE", r1, r2, r3, info); break; case 'G': case 'g': CreateAminoAcid(mol, "GLY", r1, r2, r3, info); break; case 'H': CreateAminoAcid(mol, "HIS", r1, r2, r3, info); break; case 'I': CreateAminoAcid(mol, "ILE", r1, r2, r3, info); break; case 'K': CreateAminoAcid(mol, "LYS", r1, r2, r3, info); break; case 'L': CreateAminoAcid(mol, "LEU", r1, r2, r3, info); break; case 'M': CreateAminoAcid(mol, "MET", r1, r2, r3, info); break; case 'N': CreateAminoAcid(mol, "ASN", r1, r2, r3, info); break; case 'P': CreateAminoAcid(mol, "PRO", r1, r2, r3, info); break; case 'Q': CreateAminoAcid(mol, "GLN", r1, r2, r3, info); break; case 'R': CreateAminoAcid(mol, "ARG", r1, r2, r3, info); break; case 'S': CreateAminoAcid(mol, "SER", r1, r2, r3, info); break; case 'T': CreateAminoAcid(mol, "THR", r1, r2, r3, info); break; case 'V': CreateAminoAcid(mol, "VAL", r1, r2, r3, info); break; case 'W': CreateAminoAcid(mol, "TRP", r1, r2, r3, info); break; case 'Y': CreateAminoAcid(mol, "TYR", r1, r2, r3, info); break; case 'a': CreateAminoAcid(mol, lowerD ? "DAL" : "ALA", r1, r2, r3, info); break; case 'c': CreateAminoAcid(mol, lowerD ? "DCY" : "CYS", r1, r2, r3, info); break; case 'd': CreateAminoAcid(mol, lowerD ? "DAS" : "ASP", r1, r2, r3, info); break; case 'e': CreateAminoAcid(mol, lowerD ? "DGL" : "GLU", r1, r2, r3, info); break; case 'f': CreateAminoAcid(mol, lowerD ? "DPN" : "PHE", r1, r2, r3, info); break; case 'h': CreateAminoAcid(mol, lowerD ? "DHI" : "HIS", r1, r2, r3, info); break; case 'i': CreateAminoAcid(mol, lowerD ? "DIL" : "ILE", r1, r2, r3, info); break; case 'k': CreateAminoAcid(mol, lowerD ? "DLY" : "LYS", r1, r2, r3, info); break; case 'l': CreateAminoAcid(mol, lowerD ? "DLE" : "LEU", r1, r2, r3, info); break; case 'm': CreateAminoAcid(mol, lowerD ? "MED" : "MET", r1, r2, r3, info); break; case 'n': CreateAminoAcid(mol, lowerD ? "DSG" : "ASN", r1, r2, r3, info); break; case 'p': CreateAminoAcid(mol, lowerD ? "DPR" : "PRO", r1, r2, r3, info); break; case 'q': CreateAminoAcid(mol, lowerD ? "DGN" : "GLN", r1, r2, r3, info); break; case 'r': CreateAminoAcid(mol, lowerD ? "DAR" : "ARG", r1, r2, r3, info); break; case 's': CreateAminoAcid(mol, lowerD ? "DSN" : "SER", r1, r2, r3, info); break; case 't': CreateAminoAcid(mol, lowerD ? "DTH" : "THR", r1, r2, r3, info); break; case 'v': CreateAminoAcid(mol, lowerD ? "DVA" : "VAL", r1, r2, r3, info); break; case 'w': CreateAminoAcid(mol, lowerD ? "DTR" : "TRP", r1, r2, r3, info); break; case 'y': CreateAminoAcid(mol, lowerD ? "DTY" : "TYR", r1, r2, r3, info); break; } if (prev && r1) CreateAABond(mol, prev, r1, 1); prev = r2; seq++; } if (prev) { Atom *oxt = CreateAAAtom(mol, " OXT", info); CreateAABond(mol, prev, oxt, 1); } return mol; } static void CreateNucleicAcid(RWMol *mol, const char *na, Atom *&r1, Atom *&r2, AtomPDBResidueInfo &info, bool PCap5) { Atom *atom[32]; r1 = (Atom *)nullptr; r2 = (Atom *)nullptr; int resno = info.getResidueNumber(); info.setResidueNumber(resno + 1); info.setIsHeteroAtom(false); info.setResidueName(na); if (resno) { atom[1] = CreateAAAtom(mol, " P ", info); atom[2] = CreateAAAtom(mol, " OP1", info); atom[3] = CreateAAAtom(mol, " OP2", info); atom[4] = CreateAAAtom(mol, " O5'", info); CreateAABond(mol, atom[1], atom[2], 2); CreateAABond(mol, atom[1], atom[3], 1); CreateAABond(mol, atom[1], atom[4], 1); r1 = atom[1]; } else if (PCap5) { atom[0] = CreateAAAtom(mol, " OP3", info); atom[1] = CreateAAAtom(mol, " P ", info); atom[2] = CreateAAAtom(mol, " OP1", info); atom[3] = CreateAAAtom(mol, " OP2", info); atom[4] = CreateAAAtom(mol, " O5'", info); CreateAABond(mol, atom[0], atom[1], 1); CreateAABond(mol, atom[1], atom[2], 2); CreateAABond(mol, atom[1], atom[3], 1); CreateAABond(mol, atom[1], atom[4], 1); r1 = atom[0]; } else { atom[4] = CreateAAAtom(mol, " O5'", info); r1 = atom[4]; } atom[5] = CreateAAAtom(mol, " C5'", info); atom[6] = CreateAAAtom(mol, " C4'", info); atom[7] = CreateAAAtom(mol, " O4'", info); atom[8] = CreateAAAtom(mol, " C3'", info); atom[9] = CreateAAAtom(mol, " O3'", info); atom[10] = CreateAAAtom(mol, " C2'", info); CreateAABond(mol, atom[4], atom[5], 1); CreateAABond(mol, atom[5], atom[6], 1); CreateAABond(mol, atom[6], atom[7], 1); CreateAABond(mol, atom[6], atom[8], 1); CreateAABond(mol, atom[8], atom[9], 1); CreateAABond(mol, atom[8], atom[10], 1); atom[6]->setChiralTag(Atom::CHI_TETRAHEDRAL_CW); atom[8]->setChiralTag(Atom::CHI_TETRAHEDRAL_CW); if (na[1]==' ') { atom[11] = CreateAAAtom(mol, " O2'", info); atom[12] = CreateAAAtom(mol, " C1'", info); CreateAABond(mol, atom[10], atom[11], 1); CreateAABond(mol, atom[7], atom[12], 1); CreateAABond(mol, atom[10], atom[12], 1); atom[10]->setChiralTag(Atom::CHI_TETRAHEDRAL_CW); } else { atom[12] = CreateAAAtom(mol, " C1'", info); CreateAABond(mol, atom[7], atom[12], 1); CreateAABond(mol, atom[10], atom[12], 1); } r2 = atom[9]; switch (na[2]) { case 'A': atom[13] = CreateAAAtom(mol, " N9 ", info); atom[14] = CreateAAAtom(mol, " C8 ", info); atom[15] = CreateAAAtom(mol, " N7 ", info); atom[16] = CreateAAAtom(mol, " C5 ", info); atom[17] = CreateAAAtom(mol, " C6 ", info); atom[18] = CreateAAAtom(mol, " N6 ", info); atom[19] = CreateAAAtom(mol, " N1 ", info); atom[20] = CreateAAAtom(mol, " C2 ", info); atom[21] = CreateAAAtom(mol, " N3 ", info); atom[22] = CreateAAAtom(mol, " C4 ", info); CreateAABond(mol, atom[12], atom[13], 1); CreateAABond(mol, atom[13], atom[14], 1); CreateAABond(mol, atom[13], atom[22], 1); CreateAABond(mol, atom[14], atom[15], 2); CreateAABond(mol, atom[15], atom[16], 1); CreateAABond(mol, atom[16], atom[17], 1); CreateAABond(mol, atom[16], atom[22], 2); CreateAABond(mol, atom[17], atom[18], 1); CreateAABond(mol, atom[17], atom[19], 2); CreateAABond(mol, atom[19], atom[20], 1); CreateAABond(mol, atom[20], atom[21], 2); CreateAABond(mol, atom[21], atom[22], 1); atom[12]->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW); break; case 'C': atom[13] = CreateAAAtom(mol, " N1 ", info); atom[14] = CreateAAAtom(mol, " C2 ", info); atom[15] = CreateAAAtom(mol, " O2 ", info); atom[16] = CreateAAAtom(mol, " N3 ", info); atom[17] = CreateAAAtom(mol, " C4 ", info); atom[18] = CreateAAAtom(mol, " N4 ", info); atom[19] = CreateAAAtom(mol, " C5 ", info); atom[20] = CreateAAAtom(mol, " C6 ", info); CreateAABond(mol, atom[12], atom[13], 1); CreateAABond(mol, atom[13], atom[14], 1); CreateAABond(mol, atom[13], atom[20], 1); CreateAABond(mol, atom[14], atom[15], 2); CreateAABond(mol, atom[14], atom[16], 1); CreateAABond(mol, atom[16], atom[17], 2); CreateAABond(mol, atom[17], atom[18], 1); CreateAABond(mol, atom[17], atom[19], 1); CreateAABond(mol, atom[19], atom[20], 2); atom[12]->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW); break; case 'G': atom[13] = CreateAAAtom(mol, " N9 ", info); atom[14] = CreateAAAtom(mol, " C8 ", info); atom[15] = CreateAAAtom(mol, " N7 ", info); atom[16] = CreateAAAtom(mol, " C5 ", info); atom[17] = CreateAAAtom(mol, " C6 ", info); atom[18] = CreateAAAtom(mol, " O6 ", info); atom[19] = CreateAAAtom(mol, " N1 ", info); atom[20] = CreateAAAtom(mol, " C2 ", info); atom[21] = CreateAAAtom(mol, " N2 ", info); atom[22] = CreateAAAtom(mol, " N3 ", info); atom[23] = CreateAAAtom(mol, " C4 ", info); CreateAABond(mol, atom[12], atom[13], 1); CreateAABond(mol, atom[13], atom[14], 1); CreateAABond(mol, atom[13], atom[23], 1); CreateAABond(mol, atom[14], atom[15], 2); CreateAABond(mol, atom[15], atom[16], 1); CreateAABond(mol, atom[16], atom[17], 1); CreateAABond(mol, atom[16], atom[23], 2); CreateAABond(mol, atom[17], atom[18], 2); CreateAABond(mol, atom[17], atom[19], 1); CreateAABond(mol, atom[19], atom[20], 1); CreateAABond(mol, atom[20], atom[21], 1); CreateAABond(mol, atom[20], atom[22], 2); CreateAABond(mol, atom[22], atom[23], 1); atom[12]->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW); break; case 'T': atom[13] = CreateAAAtom(mol, " N1 ", info); atom[14] = CreateAAAtom(mol, " C2 ", info); atom[15] = CreateAAAtom(mol, " O2 ", info); atom[16] = CreateAAAtom(mol, " N3 ", info); atom[17] = CreateAAAtom(mol, " C4 ", info); atom[18] = CreateAAAtom(mol, " O4 ", info); atom[19] = CreateAAAtom(mol, " C5 ", info); atom[20] = CreateAAAtom(mol, " C7 ", info); atom[21] = CreateAAAtom(mol, " C6 ", info); CreateAABond(mol, atom[12], atom[13], 1); CreateAABond(mol, atom[13], atom[14], 1); CreateAABond(mol, atom[13], atom[21], 1); CreateAABond(mol, atom[14], atom[15], 2); CreateAABond(mol, atom[14], atom[16], 1); CreateAABond(mol, atom[16], atom[17], 1); CreateAABond(mol, atom[17], atom[18], 2); CreateAABond(mol, atom[17], atom[19], 1); CreateAABond(mol, atom[19], atom[20], 1); CreateAABond(mol, atom[19], atom[21], 2); atom[12]->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW); break; case 'U': atom[13] = CreateAAAtom(mol, " N1 ", info); atom[14] = CreateAAAtom(mol, " C2 ", info); atom[15] = CreateAAAtom(mol, " O2 ", info); atom[16] = CreateAAAtom(mol, " N3 ", info); atom[17] = CreateAAAtom(mol, " C4 ", info); atom[18] = CreateAAAtom(mol, " O4 ", info); atom[19] = CreateAAAtom(mol, " C5 ", info); atom[20] = CreateAAAtom(mol, " C6 ", info); CreateAABond(mol, atom[12], atom[13], 1); CreateAABond(mol, atom[13], atom[14], 1); CreateAABond(mol, atom[13], atom[20], 1); CreateAABond(mol, atom[14], atom[15], 2); CreateAABond(mol, atom[14], atom[16], 1); CreateAABond(mol, atom[16], atom[17], 1); CreateAABond(mol, atom[17], atom[18], 2); CreateAABond(mol, atom[17], atom[19], 1); CreateAABond(mol, atom[19], atom[20], 2); atom[12]->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW); break; } } static void CreatePCap3(RWMol *mol, Atom *prev, AtomPDBResidueInfo &info) { Atom *atom[4]; int resno = info.getResidueNumber(); info.setResidueNumber(resno + 1); info.setIsHeteroAtom(true); info.setResidueName("2PO"); atom[0] = CreateAAAtom(mol, " P ", info); atom[1] = CreateAAAtom(mol, " OP1", info); atom[2] = CreateAAAtom(mol, " OP2", info); atom[3] = CreateAAAtom(mol, " OP3", info); CreateAABond(mol, prev, atom[0], 1); CreateAABond(mol, atom[0], atom[1], 2); CreateAABond(mol, atom[0], atom[2], 1); CreateAABond(mol, atom[0], atom[3], 1); } static RWMol *NASequenceToMol(const char *seq, bool Dna, bool PCap5, bool PCap3) { char chain[2]; chain[0] = 'A'; chain[1] = '\0'; Atom *prev = (Atom *)nullptr; AtomPDBResidueInfo info; info.setSerialNumber(1); info.setAltLoc(" "); info.setResidueNumber(0); info.setInsertionCode(" "); info.setChainId(chain); auto *mol = new RWMol(); while (*seq) { Atom *r1 = (Atom *)nullptr; Atom *r2 = (Atom *)nullptr; switch (*seq) { case '\n': case '\r': case '-': seq++; continue; case ' ': case '\t': case '\0': break; case '.': if (prev) { if (PCap3) CreatePCap3(mol, prev, info); if (chain[0] < 'Z') { chain[0]++; info.setChainId(chain); } info.setResidueNumber(0); } prev = (Atom *)nullptr; seq++; continue; default: delete mol; return (RWMol *)nullptr; case 'A': case 'a': CreateNucleicAcid(mol, Dna ? " DA" : " A", r1, r2, info, PCap5); break; case 'C': case 'c': CreateNucleicAcid(mol, Dna ? " DC" : " C", r1, r2, info, PCap5); break; case 'G': case 'g': CreateNucleicAcid(mol, Dna ? " DG" : " G", r1, r2, info, PCap5); break; case 'T': case 't': CreateNucleicAcid(mol, Dna ? " DT" : " T", r1, r2, info, PCap5); break; case 'U': case 'u': CreateNucleicAcid(mol, Dna ? " DU" : " U", r1, r2, info, PCap5); break; } if (prev && r1) CreateAABond(mol, prev, r1, 1); prev = r2; seq++; } if (prev && PCap3) CreatePCap3(mol, prev, info); return mol; } RWMol *SequenceToMol(const char *seq, bool sanitize, int flavor) { if (!seq) return (RWMol *)nullptr; RWMol *mol; switch (flavor) { /* Protein */ case 0: mol = AASequenceToMol(seq,false); break; case 1: mol = AASequenceToMol(seq,true); break; /* RNA */ case 2: mol = NASequenceToMol(seq,false,false,false); break; case 3: mol = NASequenceToMol(seq,false,true,false); break; case 4: mol = NASequenceToMol(seq,false,false,true); break; case 5: mol = NASequenceToMol(seq,false,true,true); break; /* DNA */ case 6: mol = NASequenceToMol(seq,true,false,false); break; case 7: mol = NASequenceToMol(seq,true,true,false); break; case 8: mol = NASequenceToMol(seq,true,false,true); break; case 9: mol = NASequenceToMol(seq,true,true,true); break; default: return (RWMol *)nullptr; } if (sanitize && mol) MolOps::sanitizeMol(*mol); return mol; } RWMol *SequenceToMol(const std::string &seq, bool sanitize, int flavor) { return SequenceToMol(seq.c_str(), sanitize, flavor); } RWMol *SequenceToMol(const char *seq, bool sanitize, bool lowerD) { return SequenceToMol(seq, sanitize, lowerD ? 1 : 0); } RWMol *SequenceToMol(const std::string &seq, bool sanitize, bool lowerD) { return SequenceToMol(seq.c_str(), sanitize, lowerD ? 1 : 0); } RWMol *FASTAToMol(const char *seq, bool sanitize, int flavor) { if (!seq) return (RWMol *)nullptr; std::string title; if (seq[0] == '>') { seq++; while (*seq && *seq != '\n' && *seq != '\r') title += *seq++; } RWMol *mol = SequenceToMol(seq, sanitize, flavor); if (!title.empty()) mol->setProp(common_properties::_Name, title); return mol; } RWMol *FASTAToMol(const std::string &seq, bool sanitize, int flavor) { return FASTAToMol(seq.c_str(), sanitize, flavor); } RWMol *FASTAToMol(const char *seq, bool sanitize, bool lowerD) { return FASTAToMol(seq, sanitize, lowerD ? 1 : 0); } RWMol *FASTAToMol(const std::string &seq, bool sanitize, bool lowerD) { return FASTAToMol(seq.c_str(), sanitize, lowerD ? 1 : 0); } struct HELMMonomer { Atom *r1; Atom *r2; Atom *r3; Atom *oxt; HELMMonomer() : r1(nullptr), r2(nullptr), r3(nullptr), oxt(nullptr) {} HELMMonomer(Atom *x, Atom *y, Atom *z) : r1(x), r2(y), r3(z), oxt(nullptr) {} }; static const char *GetHELMOneLetterCode(char ch) { switch (ch) { case 'A': return "ALA"; case 'C': return "CYS"; case 'D': return "ASP"; case 'E': return "GLU"; case 'F': return "PHE"; case 'G': return "GLY"; case 'H': return "HIS"; case 'I': return "ILE"; case 'K': return "LYS"; case 'L': return "LEU"; case 'M': return "MET"; case 'N': return "ASN"; case 'P': return "PRO"; case 'Q': return "GLN"; case 'R': return "ARG"; case 'S': return "SER"; case 'T': return "THR"; case 'V': return "VAL"; case 'W': return "TRP"; case 'Y': return "TYR"; } return (char *)nullptr; } static bool IsHELMMonomerIDChar(char ch) { if (ch >= 'A' && ch <= 'Z') return true; if (ch >= 'a' && ch <= 'z') return true; if (ch >= '0' && ch <= '9') return true; return false; } static const char *LookupHELMPeptideMonomer(const char *ptr) { switch (ptr[0]) { case 'A': if (ptr[1] == '\0') return "ALA"; if (ptr[1] == 'b' && ptr[2] == 'u' && ptr[3] == '\0') return "ABA"; break; case 'C': if (ptr[1] == '\0') return "CYS"; break; case 'D': if (ptr[1] == '\0') return "ASP"; break; case 'E': if (ptr[1] == '\0') return "GLU"; break; case 'F': if (ptr[1] == '\0') return "PHE"; break; case 'G': if (ptr[1] == '\0') return "GLY"; if (ptr[1] == 'l' && ptr[2] == 'p' && ptr[3] == '\0') return "PCA"; break; case 'H': if (ptr[1] == '\0') return "HIS"; break; case 'I': if (ptr[1] == '\0') return "ILE"; break; case 'K': if (ptr[1] == '\0') return "LYS"; break; case 'L': if (ptr[1] == '\0') return "LEU"; break; case 'M': if (ptr[1] == '\0') return "MET"; break; case 'N': if (ptr[1] == '\0') return "ASN"; if (ptr[1] == 'a' && ptr[2] == 'l' && ptr[3] == '\0') return "NAL"; if (ptr[1] == 'l' && ptr[2] == 'e' && ptr[3] == '\0') return "NLE"; if (ptr[1] == 'v' && ptr[2] == 'a' && ptr[3] == '\0') return "NVA"; break; case 'O': if (ptr[1] == 'r' && ptr[2] == 'n' && ptr[3] == '\0') return "ORN"; break; case 'P': if (ptr[1] == '\0') return "PRO"; break; case 'Q': if (ptr[1] == '\0') return "GLN"; break; case 'R': if (ptr[1] == '\0') return "ARG"; break; case 'S': if (ptr[1] == '\0') return "SER"; if (ptr[1] == 'a' && ptr[2] == 'r' && ptr[3] == '\0') return "SAR"; break; case 'T': if (ptr[1] == '\0') return "THR"; break; case 'V': if (ptr[1] == '\0') return "VAL"; break; case 'W': if (ptr[1] == '\0') return "TRP"; break; case 'Y': if (ptr[1] == '\0') return "TYR"; break; case 'd': switch (ptr[1]) { case 'A': if (ptr[2] == '\0') return "DAL"; break; case 'C': if (ptr[2] == '\0') return "DCY"; break; case 'D': if (ptr[2] == '\0') return "DAS"; break; case 'E': if (ptr[2] == '\0') return "DGL"; break; case 'F': if (ptr[2] == '\0') return "DPN"; break; case 'H': if (ptr[2] == '\0') return "DHI"; break; case 'I': if (ptr[2] == '\0') return "DIL"; break; case 'K': if (ptr[2] == '\0') return "DLY"; break; case 'L': if (ptr[2] == '\0') return "DLE"; break; case 'M': if (ptr[2] == '\0') return "MED"; break; case 'N': if (ptr[2] == '\0') return "DSG"; break; case 'P': if (ptr[2] == '\0') return "DPR"; break; case 'Q': if (ptr[2] == '\0') return "DGN"; break; case 'R': if (ptr[2] == '\0') return "DAR"; break; case 'S': if (ptr[2] == '\0') return "DSN"; break; case 'T': if (ptr[2] == '\0') return "DTH"; break; case 'V': if (ptr[2] == '\0') return "DVA"; break; case 'W': if (ptr[2] == '\0') return "DTR"; break; case 'Y': if (ptr[2] == '\0') return "DTY"; break; } break; case 's': if (ptr[1] == 'e' && ptr[2] == 'C' && ptr[3] == '\0') return "MSE"; break; } return (const char *)nullptr; } static const char *ParseHELMPeptide(RWMol *mol, const char *ptr, const char *chain, std::vector &vseq) { unsigned int len = 0; HELMMonomer curr; vseq.clear(); if (ptr[0] == '}') return ptr; AtomPDBResidueInfo info; info.setSerialNumber(1); info.setAltLoc(" "); info.setResidueNumber(0); info.setInsertionCode(" "); info.setChainId(chain); if (ptr[0] == '[' && ptr[1] == 'a' && ptr[2] == 'c' && ptr[3] == ']') { if (ptr[4] != '.') return (const char *)nullptr; info.setResidueNumber(-2); CreateAminoAcid(mol, "ACE", curr.r1, curr.r2, curr.r3, info); vseq.push_back(curr); info.setResidueNumber(0); ptr += 5; len = 1; } for (;;) { const char *name = nullptr; if (*ptr == '[') { std::string tmp; ptr++; while (IsHELMMonomerIDChar(*ptr)) tmp += *ptr++; if (*ptr != ']') return (char *)nullptr; name = LookupHELMPeptideMonomer(tmp.c_str()); } else name = GetHELMOneLetterCode(*ptr); if (!name) return (const char *)nullptr; ptr++; CreateAminoAcid(mol, name, curr.r1, curr.r2, curr.r3, info); if (len && vseq[len - 1].r2 && curr.r1) { CreateAABond(mol, vseq[len - 1].r2, curr.r1, 1); vseq[len - 1].r2 = nullptr; } vseq.push_back(curr); len++; if (*ptr == '.') { if (ptr[1] == '[' && ptr[2] == 'a' && ptr[3] == 'm' && ptr[4] == ']' && ptr[5] == '}') { if (!vseq[len - 1].r2) return (const char *)nullptr; int resno = info.getResidueNumber(); info.setResidueNumber(resno + 1); info.setIsHeteroAtom(true); info.setResidueName("NH2"); Atom *n = CreateAAAtom(mol, " N ", info); CreateAABond(mol, vseq[len - 1].r2, n, 1); vseq[len - 1].r2 = (Atom *)nullptr; vseq.push_back(HELMMonomer()); len++; return ptr + 5; } ptr++; } else if (*ptr == '}') { if (!vseq[len - 1].r2) return (const char *)nullptr; Atom *oxt = CreateAAAtom(mol, " OXT", info); CreateAABond(mol, vseq[len - 1].r2, oxt, 1); vseq[len - 1].oxt = oxt; return ptr; } else return (const char *)nullptr; } } static const char *ParseHELMNucleic(RWMol *mol, const char *ptr, const char *chain) { if (ptr[0] == '}') return ptr; bool PCap5 = false; Atom *prev = nullptr; Atom *r1 = nullptr; Atom *r2 = nullptr; AtomPDBResidueInfo info; info.setSerialNumber(1); info.setAltLoc(" "); info.setResidueNumber(0); info.setInsertionCode(" "); info.setChainId(chain); if (*ptr=='P') { PCap5 = true; ptr++; if (*ptr=='.') ptr++; } for (;;) { const char *name = nullptr; if (*ptr == 'R' && ptr[1]=='(') { if (ptr[2]=='A') { if (ptr[3]==')') { name = " A"; ptr += 4; } } else if (ptr[2]=='C') { if (ptr[3]==')') { name = " C"; ptr += 4; } } else if (ptr[2]=='G') { if (ptr[3]==')') { name = " G"; ptr += 4; } } else if (ptr[2]=='T') { if (ptr[3]==')') { name = " T"; ptr += 4; } } else if (ptr[2]=='U') { if (ptr[3]==')') { name = " U"; ptr += 4; } } } else if (*ptr=='[' && ptr[1]=='d' && ptr[2]=='R' && ptr[3]==']' && ptr[4]=='(') { if (ptr[5]=='A') { if (ptr[6]==')') { name = " DA"; ptr += 7; } } else if (ptr[5]=='C') { if (ptr[6]==')') { name = " DC"; ptr += 7; } } else if (ptr[5]=='G') { if (ptr[6]==')') { name = " DG"; ptr += 7; } } else if (ptr[5]=='T') { if (ptr[6]==')') { name = " DT"; ptr += 7; } } else if (ptr[5]=='U') { if (ptr[6]==')') { name = " DU"; ptr += 7; } } } if (!name) return (const char *)nullptr; CreateNucleicAcid(mol,name,r1,r2,info,PCap5); if (prev && r1) CreateAABond(mol, prev, r1, 1); prev = r2; if (*ptr == '}') return ptr; if (*ptr == '.') ptr++; if (*ptr != 'P') return (const char *)nullptr; ptr++; if (*ptr == '}') { CreatePCap3(mol,prev,info); return ptr; } PCap5 = true; if (*ptr == '.') ptr++; } } static bool ParseHELM(RWMol *mol, const char *ptr) { std::map > seqs; const char *orig; char chain[2]; chain[0] = 'A'; chain[1] = '\0'; for (;;) { orig = ptr; if (ptr[0] == 'P' && ptr[1] == 'E' && ptr[2] == 'P' && ptr[3] == 'T' && ptr[4] == 'I' && ptr[5] == 'D' && ptr[6] == 'E' && ptr[7] >= '1' && ptr[7] <= '9') { ptr += 8; while (*ptr >= '0' && *ptr <= '9') ptr++; if (*ptr != '{') return false; std::string id(orig, ptr - orig); chain[0] = 'A' + (orig[7] - '1'); ptr = ParseHELMPeptide(mol, ptr + 1, chain, seqs[id]); if (!ptr || *ptr != '}') return false; ptr++; } else if (ptr[0]=='R' && ptr[1]=='N' && ptr[2]=='A' && ptr[3]>='1' && ptr[3]<='9') { ptr += 4; while (*ptr >= '0' && *ptr <= '9') ptr++; if (*ptr != '{') return false; chain[0] = 'A' + (orig[3] - '1'); ptr = ParseHELMNucleic(mol, ptr + 1, chain); if (!ptr || *ptr != '}') return false; ptr++; } else return false; if (*ptr == '$') break; if (*ptr == '\0') return true; if (*ptr != '|') return false; ptr++; } ptr++; if (ptr[0] == '$' && ptr[1] == '$' && ptr[2] == '$') return true; for (;;) { orig = ptr; if (ptr[0] == 'P' && ptr[1] == 'E' && ptr[2] == 'P' && ptr[3] == 'T' && ptr[4] == 'I' && ptr[5] == 'D' && ptr[6] == 'E' && ptr[7] >= '1' && ptr[7] <= '9') { ptr += 8; } else return false; while (*ptr >= '0' && *ptr <= '9') ptr++; if (*ptr != ',') return false; std::string id1(orig, ptr - orig); ptr++; orig = ptr; if (ptr[0] == 'P' && ptr[1] == 'E' && ptr[2] == 'P' && ptr[3] == 'T' && ptr[4] == 'I' && ptr[5] == 'D' && ptr[6] == 'E' && ptr[7] >= '1' && ptr[7] <= '9') { ptr += 8; } else return false; while (*ptr >= '0' && *ptr <= '9') ptr++; if (*ptr != ',') return false; std::string id2(orig, ptr - orig); ptr++; unsigned int res1; unsigned int res2; unsigned int res1r; unsigned int res2r; if (*ptr >= '1' && *ptr <= '9') { res1 = (*ptr++) - '0'; while (*ptr >= '0' && *ptr <= '9') res1 = 10 * res1 + ((*ptr++) - '0'); } else return false; if (ptr[0] == ':' && ptr[1] == 'R' && ptr[2] >= '1' && ptr[2] <= '9') { res1r = ptr[2] - '0'; ptr += 3; } else return false; if (*ptr != '-') return false; ptr++; if (*ptr >= '1' && *ptr <= '9') { res2 = (*ptr++) - '0'; while (*ptr >= '0' && *ptr <= '9') res2 = 10 * res2 + ((*ptr++) - '0'); } else return false; if (ptr[0] == ':' && ptr[1] == 'R' && ptr[2] >= '1' && ptr[2] <= '9') { res2r = ptr[2] - '0'; ptr += 3; } else return false; // printf("%s:%u:R%u - %s:%u:R%u\n",id1.c_str(),res1,res1r, // id2.c_str(),res2,res2r); if (res1 < 1 || res2 < 1) return false; if (seqs.find(id1) == seqs.end() || seqs.find(id2) == seqs.end()) return false; std::vector *vseq1 = &seqs[id1]; if (res1 > (unsigned int)vseq1->size()) return false; std::vector *vseq2 = &seqs[id2]; if (res2 > (unsigned int)vseq2->size()) return false; if (res1r == 3 && res2r == 3) { Atom *src = (*vseq1)[res1 - 1].r3; Atom *dst = (*vseq2)[res2 - 1].r3; if (src && dst && src != dst) { CreateAABond(mol, src, dst, 1); (*vseq1)[res1 - 1].r3 = (Atom *)nullptr; (*vseq2)[res2 - 1].r3 = (Atom *)nullptr; } else return false; } else if (res1r == 1 && res2r == 2) { Atom *src = (*vseq1)[res1 - 1].r1; Atom *dst = (*vseq2)[res2 - 1].r2; Atom *oxt = (*vseq2)[res2 - 1].oxt; if (src && dst && oxt && src != dst) { mol->removeAtom(oxt); CreateAABond(mol, src, dst, 1); (*vseq1)[res1 - 1].r1 = (Atom *)nullptr; (*vseq2)[res2 - 1].r2 = (Atom *)nullptr; } else return false; } else if (res1r == 2 && res2r == 1) { Atom *src = (*vseq2)[res2 - 1].r1; Atom *dst = (*vseq1)[res1 - 1].r2; Atom *oxt = (*vseq1)[res1 - 1].oxt; if (src && dst && oxt && src != dst) { mol->removeAtom(oxt); CreateAABond(mol, dst, src, 1); (*vseq1)[res1 - 1].r2 = (Atom *)nullptr; (*vseq2)[res2 - 1].r1 = (Atom *)nullptr; } else return false; } else return false; if (*ptr == '$') break; if (*ptr != '|') return false; ptr++; } ptr++; return ptr[0] == '$' && ptr[1] == '$'; } RWMol *HELMToMol(const char *helm, bool sanitize) { auto *mol = new RWMol(); const char *ptr = helm; if (ptr[0] == '$' && ptr[1] == '$' && ptr[2] == '$' && ptr[3] == '$') return mol; if (ParseHELM(mol, ptr)) { if (sanitize) MolOps::sanitizeMol(*mol); return mol; } delete mol; return (RWMol *)nullptr; } RWMol *HELMToMol(const std::string &helm, bool sanitize) { return HELMToMol(helm.c_str(), sanitize); } } // namespace RDKit