Files
rdkit/Code/GraphMol/Canon.h
Greg Landrum 270f7b76e5 Partial support for reading CXSMILES (#1237)
* add a SmilesParserParams object to prepare for this

* add a SmilesParserParams object to prepare for this

* add tests for the SmilesParseParmas

* support name parsing, should it be the default?

* rename CXNSmiles to CXSmiles;
add a spirit parser for CXSmiles coordinate that is at least syntax correct

* abandon boost::spirit for now; crude atom token parser

* support params in smiles parser (not tested, may not build)

* can read coords and atom labels along with mol names; crude, but works

* read coordinate bonds

* remove some compiler warnings with VS2015

* remove a bunch of compiler warnings on windows

* remove more warnings on windows

* remove more warnings on windows

* backup commit: first pass at parsing query features

* radical spec parsing

* handle attachment points using atom mapping

* switch to a named property for atom labels

* fix handling of the "A" atom query

* add functions to construct A and Q queries (needs more work)

* fix a problem created while cleaning up warnings earlier

* add some additional convenience functions for making generic atoms.
Still need M and to recognize these while writing CXSMILES

* add M queries; update some tests

* fix a linux compile problem

* get the cxsmiles stuff working in python; basic testing

* support "M" in CXSMILES
2017-01-31 13:50:36 -05:00

123 lines
3.7 KiB
C++

//
// Copyright (C) 2004-2006 Rational Discovery LLC
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#ifndef _RD_CANON_H_
#define _RD_CANON_H_
#include <RDGeneral/BoostStartInclude.h>
#include <boost/tuple/tuple.hpp>
#include <boost/dynamic_bitset.hpp>
#include <RDGeneral/BoostEndInclude.h>
namespace RDKit {
class ROMol;
class Atom;
class Bond;
namespace Canon {
const int MAX_NATOMS = 5000; //!< used in the canonical traversal code
const int MAX_CYCLES = 1000; //!< used in the canonical traversal code
const int MAX_BONDTYPE = 32; //!< used in the canonical traversal code
//! used in traversals of the molecule
typedef enum {
WHITE_NODE = 0, //! not visited
GREY_NODE, //! visited, but not finished
BLACK_NODE, //! visited and finished
} AtomColors;
//! used to indicate types of entries in the molecular stack:
typedef enum {
MOL_STACK_ATOM = 0, //!< an Atom
MOL_STACK_BOND, //!< a Bond
MOL_STACK_RING, //!< a ring closure
MOL_STACK_BRANCH_OPEN, //!< beginning of a branch
MOL_STACK_BRANCH_CLOSE, //!< end of a branch
} MolStackTypes;
//! used to store components in the molecular stack
typedef union {
Atom *atom;
Bond *bond;
} MolStackUnion;
//! these are the actual elements in the molecular stack
class MolStackElem {
public:
//! construct an Atom node
explicit MolStackElem(Atom *at) {
type = MOL_STACK_ATOM;
obj.atom = at;
};
//! construct a bond node
/*!
\param bond pointer to the Bond being added
\param idx index of the Atom traversed before this Bond
(beginAtom in the canonical traversal order)
*/
explicit MolStackElem(Bond *bond, int idx) {
type = MOL_STACK_BOND;
obj.bond = bond;
number = idx;
};
//! construct for a ring closure
explicit MolStackElem(int idx) {
type = MOL_STACK_RING;
number = idx;
};
//! construct for a branch opening or closing
explicit MolStackElem(const char *chr, int idx) {
switch (chr[0]) {
case '(':
type = MOL_STACK_BRANCH_OPEN;
break;
case ')':
type = MOL_STACK_BRANCH_CLOSE;
break;
default:
break;
}
number = idx;
}
MolStackTypes type; //!< stores the type of node
MolStackUnion obj; //!< holds our pointer (if appropriate)
int number; //!< stores our number (relevant for bonds and ring closures)
};
typedef std::vector<MolStackElem> MolStack;
//! used to represent possible branches from an atom
typedef boost::tuple<int, int, Bond *> PossibleType;
//! constructs the canonical traversal order for a molecular fragment
/*!
\param mol the ROMol we're working on
\param atomIdx the index of the atom to start the traversal from
\param colors the traversal status of each atom in \c mol
\param ranks the assigned rank of each atom in \c mol
\param molStack the current traversal stack (used to return the results)
<b>Notes</b>
- \c mol will, in general, be modified by this operation as bond directions
and the like are changed to fit the canonical traversal order
*/
void canonicalizeFragment(ROMol &mol, int atomIdx,
std::vector<AtomColors> &colors,
const std::vector<unsigned int> &ranks,
MolStack &molStack,
const boost::dynamic_bitset<> *bondsInPlay = 0,
const std::vector<std::string> *bondSymbols = 0,
bool doIsomericSmiles = false);
} // end of namespace Canon
} // end of namespace RDKit
#endif