mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
* reverted all changes that did not really belong to this PR * I had forgotten ChemReactions_static * changes in response to review --------- Co-authored-by: Tosco, Paolo <paolo.tosco@novartis.com>
944 lines
35 KiB
C++
944 lines
35 KiB
C++
//
|
|
// Copyright (C) 2014 Novartis Institutes for BioMedical Research
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include <list>
|
|
#include <algorithm>
|
|
#include <cmath>
|
|
|
|
#include <RDGeneral/BoostStartInclude.h>
|
|
#include <boost/property_tree/ptree.hpp>
|
|
#include <boost/property_tree/json_parser.hpp>
|
|
#include <RDGeneral/BoostEndInclude.h>
|
|
|
|
#include <iostream>
|
|
#include <sstream>
|
|
#include "SubstructMatchCustom.h"
|
|
#include "MaximumCommonSubgraph.h"
|
|
#include <GraphMol/QueryOps.h>
|
|
|
|
namespace RDKit {
|
|
|
|
namespace {
|
|
struct cmpCStr {
|
|
bool operator()(const char *a, const char *b) const {
|
|
return std::strcmp(a, b) < 0;
|
|
}
|
|
};
|
|
} // namespace
|
|
|
|
void MCSParameters::setMCSAtomTyperFromEnum(AtomComparator atomComp) {
|
|
switch (atomComp) {
|
|
case AtomCompareAny:
|
|
AtomTyper = MCSAtomCompareAny;
|
|
break;
|
|
case AtomCompareElements:
|
|
AtomTyper = MCSAtomCompareElements;
|
|
break;
|
|
case AtomCompareIsotopes:
|
|
AtomTyper = MCSAtomCompareIsotopes;
|
|
break;
|
|
case AtomCompareAnyHeavyAtom:
|
|
AtomTyper = MCSAtomCompareAnyHeavyAtom;
|
|
break;
|
|
default:
|
|
throw ValueErrorException("Unknown AtomComparator");
|
|
}
|
|
}
|
|
|
|
void MCSParameters::setMCSAtomTyperFromConstChar(const char* atomComp) {
|
|
PRECONDITION(atomComp, "atomComp must not be NULL");
|
|
static const std::map<const char*, AtomComparator, cmpCStr> atomCompStringToEnum = {
|
|
{"Any", AtomCompareAny},
|
|
{"Elements", AtomCompareElements},
|
|
{"Isotopes", AtomCompareIsotopes},
|
|
{"AnyHeavy", AtomCompareAnyHeavyAtom}};
|
|
try {
|
|
setMCSAtomTyperFromEnum(atomCompStringToEnum.at(atomComp));
|
|
} catch (const std::out_of_range&) {
|
|
// we accept "def" as a no-op
|
|
}
|
|
}
|
|
|
|
void MCSParameters::setMCSBondTyperFromEnum(BondComparator bondComp) {
|
|
switch (bondComp) {
|
|
case BondCompareAny:
|
|
BondTyper = MCSBondCompareAny;
|
|
break;
|
|
case BondCompareOrder:
|
|
BondTyper = MCSBondCompareOrder;
|
|
break;
|
|
case BondCompareOrderExact:
|
|
BondTyper = MCSBondCompareOrderExact;
|
|
break;
|
|
default:
|
|
throw ValueErrorException("Unknown BondComparator");
|
|
}
|
|
}
|
|
|
|
void MCSParameters::setMCSBondTyperFromConstChar(const char* bondComp) {
|
|
PRECONDITION(bondComp, "bondComp must not be NULL");
|
|
static const std::map<const char*, BondComparator, cmpCStr> bondCompStringToEnum = {
|
|
{"Any", BondCompareAny},
|
|
{"Order", BondCompareOrder},
|
|
{"OrderExact", BondCompareOrderExact}};
|
|
try {
|
|
setMCSBondTyperFromEnum(bondCompStringToEnum.at(bondComp));
|
|
} catch (const std::out_of_range&) {
|
|
// we accept "def" as a no-op
|
|
}
|
|
}
|
|
|
|
void parseMCSParametersJSON(const char* json, MCSParameters* params) {
|
|
if (params && json && 0 != strlen(json)) {
|
|
std::istringstream ss;
|
|
ss.str(json);
|
|
boost::property_tree::ptree pt;
|
|
boost::property_tree::read_json(ss, pt);
|
|
|
|
RDKit::MCSParameters& p = *params;
|
|
p.MaximizeBonds = pt.get<bool>("MaximizeBonds", p.MaximizeBonds);
|
|
p.Threshold = pt.get<double>("Threshold", p.Threshold);
|
|
p.Timeout = pt.get<unsigned>("Timeout", p.Timeout);
|
|
p.AtomCompareParameters.MatchValences =
|
|
pt.get<bool>("MatchValences", p.AtomCompareParameters.MatchValences);
|
|
p.AtomCompareParameters.MatchChiralTag =
|
|
pt.get<bool>("MatchChiralTag", p.AtomCompareParameters.MatchChiralTag);
|
|
p.AtomCompareParameters.MatchFormalCharge = pt.get<bool>(
|
|
"MatchFormalCharge", p.AtomCompareParameters.MatchFormalCharge);
|
|
p.AtomCompareParameters.RingMatchesRingOnly = pt.get<bool>(
|
|
"RingMatchesRingOnly", p.AtomCompareParameters.RingMatchesRingOnly);
|
|
p.AtomCompareParameters.MaxDistance =
|
|
pt.get<double>("MaxDistance", p.AtomCompareParameters.MaxDistance);
|
|
p.BondCompareParameters.RingMatchesRingOnly = pt.get<bool>(
|
|
"RingMatchesRingOnly", p.BondCompareParameters.RingMatchesRingOnly);
|
|
p.AtomCompareParameters.RingMatchesRingOnly = pt.get<bool>(
|
|
"AtomRingMatchesRingOnly", p.AtomCompareParameters.RingMatchesRingOnly);
|
|
p.BondCompareParameters.RingMatchesRingOnly = pt.get<bool>(
|
|
"BondRingMatchesRingOnly", p.BondCompareParameters.RingMatchesRingOnly);
|
|
p.BondCompareParameters.CompleteRingsOnly = pt.get<bool>(
|
|
"CompleteRingsOnly", p.BondCompareParameters.CompleteRingsOnly);
|
|
p.AtomCompareParameters.CompleteRingsOnly = pt.get<bool>(
|
|
"AtomCompleteRingsOnly", p.AtomCompareParameters.CompleteRingsOnly);
|
|
p.BondCompareParameters.CompleteRingsOnly = pt.get<bool>(
|
|
"BondCompleteRingsOnly", p.BondCompareParameters.CompleteRingsOnly);
|
|
p.BondCompareParameters.MatchFusedRings = pt.get<bool>(
|
|
"MatchFusedRings", p.BondCompareParameters.MatchFusedRings);
|
|
p.BondCompareParameters.MatchFusedRingsStrict = pt.get<bool>(
|
|
"MatchFusedRingsStrict", p.BondCompareParameters.MatchFusedRingsStrict);
|
|
p.BondCompareParameters.MatchStereo =
|
|
pt.get<bool>("MatchStereo", p.BondCompareParameters.MatchStereo);
|
|
|
|
p.setMCSAtomTyperFromConstChar(
|
|
pt.get<std::string>("AtomCompare", "def").c_str());
|
|
p.setMCSBondTyperFromConstChar(
|
|
pt.get<std::string>("BondCompare", "def").c_str());
|
|
|
|
p.InitialSeed = pt.get<std::string>("InitialSeed", "");
|
|
}
|
|
}
|
|
|
|
MCSResult findMCS(const std::vector<ROMOL_SPTR>& mols,
|
|
const MCSParameters* params) {
|
|
MCSParameters p;
|
|
if (nullptr == params) {
|
|
params = &p;
|
|
}
|
|
RDKit::FMCS::MaximumCommonSubgraph fmcs(params);
|
|
return fmcs.find(mols);
|
|
}
|
|
|
|
MCSResult findMCS_P(const std::vector<ROMOL_SPTR>& mols,
|
|
const char* params_json) {
|
|
MCSParameters p;
|
|
parseMCSParametersJSON(params_json, &p);
|
|
return findMCS(mols, &p);
|
|
}
|
|
|
|
MCSResult findMCS(const std::vector<ROMOL_SPTR>& mols, bool maximizeBonds,
|
|
double threshold, unsigned timeout, bool verbose,
|
|
bool matchValences, bool ringMatchesRingOnly,
|
|
bool completeRingsOnly, bool matchChiralTag,
|
|
AtomComparator atomComp, BondComparator bondComp) {
|
|
return findMCS(mols, maximizeBonds, threshold, timeout, verbose,
|
|
matchValences, ringMatchesRingOnly, completeRingsOnly,
|
|
matchChiralTag, atomComp, bondComp, IgnoreRingFusion);
|
|
}
|
|
|
|
MCSResult findMCS(const std::vector<ROMOL_SPTR>& mols, bool maximizeBonds,
|
|
double threshold, unsigned timeout, bool verbose,
|
|
bool matchValences, bool ringMatchesRingOnly,
|
|
bool completeRingsOnly, bool matchChiralTag,
|
|
AtomComparator atomComp, BondComparator bondComp,
|
|
RingComparator ringComp) {
|
|
auto* ps = new MCSParameters();
|
|
ps->MaximizeBonds = maximizeBonds;
|
|
ps->Threshold = threshold;
|
|
ps->Timeout = timeout;
|
|
ps->Verbose = verbose;
|
|
ps->setMCSAtomTyperFromEnum(atomComp);
|
|
ps->AtomCompareParameters.MatchValences = matchValences;
|
|
ps->AtomCompareParameters.MatchChiralTag = matchChiralTag;
|
|
ps->AtomCompareParameters.RingMatchesRingOnly = ringMatchesRingOnly;
|
|
ps->setMCSBondTyperFromEnum(bondComp);
|
|
ps->BondCompareParameters.RingMatchesRingOnly = ringMatchesRingOnly;
|
|
ps->BondCompareParameters.CompleteRingsOnly = completeRingsOnly;
|
|
ps->BondCompareParameters.MatchFusedRings = (ringComp != IgnoreRingFusion);
|
|
ps->BondCompareParameters.MatchFusedRingsStrict =
|
|
(ringComp == StrictRingFusion);
|
|
MCSResult res = findMCS(mols, ps);
|
|
delete ps;
|
|
return res;
|
|
}
|
|
|
|
bool MCSProgressCallbackTimeout(const MCSProgressData&,
|
|
const MCSParameters& params, void* userData) {
|
|
PRECONDITION(userData, "userData must not be NULL");
|
|
auto* t0 = (unsigned long long*)userData;
|
|
unsigned long long t = nanoClock();
|
|
return t - *t0 <= params.Timeout * 1000000ULL;
|
|
}
|
|
|
|
// PREDEFINED FUNCTORS:
|
|
|
|
//=== ATOM COMPARE ========================================================
|
|
bool checkAtomRingMatch(const MCSAtomCompareParameters& p, const ROMol& mol1,
|
|
unsigned int atom1, const ROMol& mol2,
|
|
unsigned int atom2) {
|
|
if (p.RingMatchesRingOnly) {
|
|
bool atom1inRing = queryIsAtomInRing(mol1.getAtomWithIdx(atom1));
|
|
bool atom2inRing = queryIsAtomInRing(mol2.getAtomWithIdx(atom2));
|
|
return atom1inRing == atom2inRing;
|
|
} else {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
bool checkAtomCharge(const MCSAtomCompareParameters&, const ROMol& mol1,
|
|
unsigned int atom1, const ROMol& mol2,
|
|
unsigned int atom2) {
|
|
const Atom& a1 = *mol1.getAtomWithIdx(atom1);
|
|
const Atom& a2 = *mol2.getAtomWithIdx(atom2);
|
|
return a1.getFormalCharge() == a2.getFormalCharge();
|
|
}
|
|
|
|
bool checkAtomChirality(const MCSAtomCompareParameters&, const ROMol& mol1,
|
|
unsigned int atom1, const ROMol& mol2,
|
|
unsigned int atom2) {
|
|
const Atom& a1 = *mol1.getAtomWithIdx(atom1);
|
|
const Atom& a2 = *mol2.getAtomWithIdx(atom2);
|
|
Atom::ChiralType ac1 = a1.getChiralTag();
|
|
Atom::ChiralType ac2 = a2.getChiralTag();
|
|
if (ac1 == Atom::CHI_TETRAHEDRAL_CW || ac1 == Atom::CHI_TETRAHEDRAL_CCW) {
|
|
return (ac2 == Atom::CHI_TETRAHEDRAL_CW ||
|
|
ac2 == Atom::CHI_TETRAHEDRAL_CCW);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool checkAtomDistance(const MCSAtomCompareParameters& p, const ROMol& mol1,
|
|
unsigned int atom1, const ROMol& mol2,
|
|
unsigned int atom2) {
|
|
const Conformer& ci1 = mol1.getConformer(-1);
|
|
const Conformer& ci2 = mol2.getConformer(-1);
|
|
const RDGeom::Point3D& pos1 = ci1.getAtomPos(atom1);
|
|
const RDGeom::Point3D& pos2 = ci2.getAtomPos(atom2);
|
|
bool withinRange = (pos1 - pos2).length() <= p.MaxDistance;
|
|
return withinRange;
|
|
}
|
|
|
|
bool MCSAtomCompareAny(const MCSAtomCompareParameters& p, const ROMol& mol1,
|
|
unsigned int atom1, const ROMol& mol2,
|
|
unsigned int atom2, void*) {
|
|
if (p.MatchChiralTag && !checkAtomChirality(p, mol1, atom1, mol2, atom2)) {
|
|
return false;
|
|
}
|
|
if (p.MatchFormalCharge && !checkAtomCharge(p, mol1, atom1, mol2, atom2)) {
|
|
return false;
|
|
}
|
|
if (p.MaxDistance > 0 && !checkAtomDistance(p, mol1, atom1, mol2, atom2)) {
|
|
return false;
|
|
}
|
|
if (p.RingMatchesRingOnly) {
|
|
return checkAtomRingMatch(p, mol1, atom1, mol2, atom2);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool MCSAtomCompareElements(const MCSAtomCompareParameters& p,
|
|
const ROMol& mol1, unsigned int atom1,
|
|
const ROMol& mol2, unsigned int atom2, void*) {
|
|
const Atom& a1 = *mol1.getAtomWithIdx(atom1);
|
|
const Atom& a2 = *mol2.getAtomWithIdx(atom2);
|
|
if (a1.getAtomicNum() != a2.getAtomicNum()) {
|
|
return false;
|
|
}
|
|
if (p.MatchValences && a1.getTotalValence() != a2.getTotalValence()) {
|
|
return false;
|
|
}
|
|
if (p.MatchChiralTag && !checkAtomChirality(p, mol1, atom1, mol2, atom2)) {
|
|
return false;
|
|
}
|
|
if (p.MatchFormalCharge && !checkAtomCharge(p, mol1, atom1, mol2, atom2)) {
|
|
return false;
|
|
}
|
|
if (p.MaxDistance > 0 && !checkAtomDistance(p, mol1, atom1, mol2, atom2)) {
|
|
return false;
|
|
}
|
|
if (p.RingMatchesRingOnly) {
|
|
return checkAtomRingMatch(p, mol1, atom1, mol2, atom2);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool MCSAtomCompareIsotopes(const MCSAtomCompareParameters& p,
|
|
const ROMol& mol1, unsigned int atom1,
|
|
const ROMol& mol2, unsigned int atom2, void*) {
|
|
// ignore everything except isotope information:
|
|
// if( ! MCSAtomCompareElements (p, mol1, atom1, mol2, atom2, ud))
|
|
// return false;
|
|
const Atom& a1 = *mol1.getAtomWithIdx(atom1);
|
|
const Atom& a2 = *mol2.getAtomWithIdx(atom2);
|
|
if (a1.getIsotope() != a2.getIsotope()) {
|
|
return false;
|
|
}
|
|
if (p.MatchChiralTag && !checkAtomChirality(p, mol1, atom1, mol2, atom2)) {
|
|
return false;
|
|
}
|
|
if (p.MatchFormalCharge && !checkAtomCharge(p, mol1, atom1, mol2, atom2)) {
|
|
return false;
|
|
}
|
|
if (p.MaxDistance > 0 && !checkAtomDistance(p, mol1, atom1, mol2, atom2)) {
|
|
return false;
|
|
}
|
|
if (p.RingMatchesRingOnly) {
|
|
return checkAtomRingMatch(p, mol1, atom1, mol2, atom2);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool MCSAtomCompareAnyHeavyAtom(const MCSAtomCompareParameters& p,
|
|
const ROMol& mol1, unsigned int atom1,
|
|
const ROMol& mol2, unsigned int atom2, void*) {
|
|
const Atom& a1 = *mol1.getAtomWithIdx(atom1);
|
|
const Atom& a2 = *mol2.getAtomWithIdx(atom2);
|
|
// Any atom, including H, matches another atom of the same type, according to
|
|
// the other flags
|
|
if (a1.getAtomicNum() == a2.getAtomicNum() ||
|
|
(a1.getAtomicNum() > 1 && a2.getAtomicNum() > 1)) {
|
|
return MCSAtomCompareAny(p, mol1, atom1, mol2, atom2, nullptr);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
//=== BOND COMPARE ========================================================
|
|
|
|
class BondMatchOrderMatrix {
|
|
bool MatchMatrix[Bond::ZERO + 1][Bond::ZERO + 1];
|
|
|
|
public:
|
|
BondMatchOrderMatrix(bool ignoreAromatization) {
|
|
memset(MatchMatrix, 0, sizeof(MatchMatrix));
|
|
for (size_t i = 0; i <= Bond::ZERO;
|
|
i++) { // fill cells of the same and unspecified type
|
|
MatchMatrix[i][i] = true;
|
|
MatchMatrix[Bond::UNSPECIFIED][i] = MatchMatrix[i][Bond::UNSPECIFIED] =
|
|
true;
|
|
MatchMatrix[Bond::ZERO][i] = MatchMatrix[i][Bond::ZERO] = true;
|
|
}
|
|
if (ignoreAromatization) {
|
|
MatchMatrix[Bond::SINGLE][Bond::AROMATIC] =
|
|
MatchMatrix[Bond::AROMATIC][Bond::SINGLE] = true;
|
|
MatchMatrix[Bond::SINGLE][Bond::ONEANDAHALF] =
|
|
MatchMatrix[Bond::ONEANDAHALF][Bond::SINGLE] = true;
|
|
MatchMatrix[Bond::DOUBLE][Bond::TWOANDAHALF] =
|
|
MatchMatrix[Bond::TWOANDAHALF][Bond::DOUBLE] = true;
|
|
MatchMatrix[Bond::TRIPLE][Bond::THREEANDAHALF] =
|
|
MatchMatrix[Bond::THREEANDAHALF][Bond::TRIPLE] = true;
|
|
MatchMatrix[Bond::QUADRUPLE][Bond::FOURANDAHALF] =
|
|
MatchMatrix[Bond::FOURANDAHALF][Bond::QUADRUPLE] = true;
|
|
MatchMatrix[Bond::QUINTUPLE][Bond::FIVEANDAHALF] =
|
|
MatchMatrix[Bond::FIVEANDAHALF][Bond::QUINTUPLE] = true;
|
|
}
|
|
}
|
|
inline bool isEqual(unsigned i, unsigned j) const {
|
|
return MatchMatrix[i][j];
|
|
}
|
|
};
|
|
|
|
bool checkBondStereo(const MCSBondCompareParameters&, const ROMol& mol1,
|
|
unsigned int bond1, const ROMol& mol2,
|
|
unsigned int bond2) {
|
|
const Bond* b1 = mol1.getBondWithIdx(bond1);
|
|
const Bond* b2 = mol2.getBondWithIdx(bond2);
|
|
Bond::BondStereo bs1 = b1->getStereo();
|
|
Bond::BondStereo bs2 = b2->getStereo();
|
|
if (b1->getBondType() == Bond::DOUBLE && b2->getBondType() == Bond::DOUBLE) {
|
|
if (bs1 > Bond::STEREOANY && !(bs2 > Bond::STEREOANY)) {
|
|
return false;
|
|
} else {
|
|
return bs1 == bs2;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool checkBondRingMatch(const MCSBondCompareParameters&, const ROMol&,
|
|
unsigned int bond1, const ROMol& mol2,
|
|
unsigned int bond2, void* v_ringMatchMatrixSet) {
|
|
if (!v_ringMatchMatrixSet) {
|
|
throw "v_ringMatchMatrixSet is NULL"; // never
|
|
}
|
|
auto* ringMatchMatrixSet =
|
|
static_cast<FMCS::RingMatchTableSet*>(v_ringMatchMatrixSet);
|
|
|
|
const std::vector<size_t>& ringsIdx1 =
|
|
ringMatchMatrixSet->getQueryBondRings(bond1); // indices of rings
|
|
const std::vector<size_t>& ringsIdx2 =
|
|
ringMatchMatrixSet->getTargetBondRings(&mol2, bond2); // indices of rings
|
|
bool bond1inRing = !ringsIdx1.empty();
|
|
bool bond2inRing = !ringsIdx2.empty();
|
|
|
|
// bond are both either in a ring or not
|
|
return (bond1inRing == bond2inRing);
|
|
}
|
|
|
|
bool MCSBondCompareAny(const MCSBondCompareParameters& p, const ROMol& mol1,
|
|
unsigned int bond1, const ROMol& mol2,
|
|
unsigned int bond2, void* ud) {
|
|
if (p.MatchStereo && !checkBondStereo(p, mol1, bond1, mol2, bond2)) {
|
|
return false;
|
|
}
|
|
if (p.RingMatchesRingOnly) {
|
|
return checkBondRingMatch(p, mol1, bond1, mol2, bond2, ud);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool MCSBondCompareOrder(const MCSBondCompareParameters& p, const ROMol& mol1,
|
|
unsigned int bond1, const ROMol& mol2,
|
|
unsigned int bond2, void* ud) {
|
|
static const BondMatchOrderMatrix match(true); // ignore Aromatization
|
|
const Bond* b1 = mol1.getBondWithIdx(bond1);
|
|
const Bond* b2 = mol2.getBondWithIdx(bond2);
|
|
Bond::BondType t1 = b1->getBondType();
|
|
Bond::BondType t2 = b2->getBondType();
|
|
if (match.isEqual(t1, t2)) {
|
|
if (p.MatchStereo && !checkBondStereo(p, mol1, bond1, mol2, bond2)) {
|
|
return false;
|
|
}
|
|
if (p.RingMatchesRingOnly) {
|
|
return checkBondRingMatch(p, mol1, bond1, mol2, bond2, ud);
|
|
}
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool MCSBondCompareOrderExact(const MCSBondCompareParameters& p,
|
|
const ROMol& mol1, unsigned int bond1,
|
|
const ROMol& mol2, unsigned int bond2, void* ud) {
|
|
static const BondMatchOrderMatrix match(false); // AROMATIC != SINGLE
|
|
const Bond* b1 = mol1.getBondWithIdx(bond1);
|
|
const Bond* b2 = mol2.getBondWithIdx(bond2);
|
|
Bond::BondType t1 = b1->getBondType();
|
|
Bond::BondType t2 = b2->getBondType();
|
|
if (match.isEqual(t1, t2)) {
|
|
if (p.MatchStereo && !checkBondStereo(p, mol1, bond1, mol2, bond2)) {
|
|
return false;
|
|
}
|
|
if (p.RingMatchesRingOnly) {
|
|
return checkBondRingMatch(p, mol1, bond1, mol2, bond2, ud);
|
|
}
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
//=== RING COMPARE ========================================================
|
|
inline bool ringFusionCheck(const std::uint32_t c1[], const std::uint32_t c2[],
|
|
const ROMol& mol1, const FMCS::Graph& query,
|
|
const ROMol& mol2, const FMCS::Graph& target,
|
|
const MCSParameters* p) {
|
|
PRECONDITION(p, "p must not be NULL");
|
|
const RingInfo* ri2 = mol2.getRingInfo();
|
|
const VECT_INT_VECT& br2 = ri2->bondRings();
|
|
std::vector<size_t> nonFusedBonds(br2.size(), 0);
|
|
std::vector<size_t> numMcsBondRings(br2.size(), 0);
|
|
RDKit::FMCS::Graph::BOND_ITER_PAIR bpIter = boost::edges(query);
|
|
size_t i = 0;
|
|
// numMcsBondRings stores the number of bonds which
|
|
// are part of the MCS for each ring
|
|
for (auto it = bpIter.first; it != bpIter.second; ++it) {
|
|
const Bond* b =
|
|
mol2.getBondBetweenAtoms(target[c2[boost::source(*it, query)]],
|
|
target[c2[boost::target(*it, query)]]);
|
|
if (!b) {
|
|
continue;
|
|
}
|
|
unsigned int bi = b->getIdx();
|
|
if (!ri2->numBondRings(bi)) {
|
|
continue;
|
|
}
|
|
for (i = 0; i < br2.size(); ++i) {
|
|
if (std::find(br2[i].begin(), br2[i].end(), bi) != br2[i].end()) {
|
|
++numMcsBondRings[i];
|
|
}
|
|
}
|
|
}
|
|
// nonFusedBonds stores the number of non-fused bonds
|
|
// (i.e., which belong to a single ring) for each ring
|
|
for (i = 0; i < br2.size(); ++i) {
|
|
for (auto bi : br2[i]) {
|
|
if (ri2->numBondRings(bi) == 1) {
|
|
++nonFusedBonds[i];
|
|
}
|
|
}
|
|
}
|
|
/* if a ring has at least one bond which is part of the MCS,
|
|
we need to check how many bonds are actually part of the MCS:
|
|
if they are greater than the number of fused bonds but lower
|
|
than the number of non-fused bonds, then the ring is not
|
|
complete and we should return true straightaway
|
|
We need to check that the number of bonds in a ring
|
|
is greater than the number of fused bonds because that
|
|
guarantees that this ring is actually involved in the
|
|
MCS and not just adjacent to another ring which is
|
|
indeed part of the MCS. */
|
|
bool missingFusedBond = false;
|
|
for (i = 0; i < br2.size(); ++i) {
|
|
if (numMcsBondRings[i] &&
|
|
numMcsBondRings[i] > (br2[i].size() - nonFusedBonds[i]) &&
|
|
numMcsBondRings[i] < nonFusedBonds[i]) {
|
|
if (p->BondCompareParameters.CompleteRingsOnly) {
|
|
return true;
|
|
} else {
|
|
continue;
|
|
}
|
|
}
|
|
if (numMcsBondRings[i] == nonFusedBonds[i] &&
|
|
numMcsBondRings[i] < br2[i].size()) {
|
|
missingFusedBond = true;
|
|
}
|
|
}
|
|
/* If we found that the MCS is missing a fused bond, we may need to
|
|
check against the smaller molecule as well.
|
|
Consider this case: MCS between
|
|
2-methylbicyclo[4.3.0]nonane and 1-methylbicyclo[3.1.0]hexane
|
|
|
|
\__
|
|
/ \_ \___
|
|
\__/ \ / \/
|
|
\ / \ /
|
|
C C
|
|
H2 H2
|
|
|
|
In permissive mode, we are happy for methylcyclohexane to be the MCS.
|
|
in strict mode, we don't want methylcyclohexane to be the MCS.
|
|
|
|
\__
|
|
/ \
|
|
\__/
|
|
|
|
When methylcyclohexane is checked against 2-methylbicyclo[4.3.0]nonane
|
|
there is no missing fused bond. This is OK for permissive mode.
|
|
In strict mode, we also need to check against 1-methylbicyclo[3.1.0]hexane,
|
|
where there is indeed a missing fused bond. */
|
|
bool missingFusedBond2 = false;
|
|
if (missingFusedBond ^ p->BondCompareParameters.MatchFusedRingsStrict) {
|
|
// if we are in permissive mode we allow one of the molecules to miss
|
|
// fused bonds, but not both. In strict mode we allow neither.
|
|
if (!p->BondCompareParameters.MatchFusedRingsStrict) {
|
|
missingFusedBond = false;
|
|
}
|
|
const RingInfo* ri1 = mol1.getRingInfo();
|
|
const VECT_INT_VECT& br1 = ri1->bondRings();
|
|
std::set<unsigned int> mcsRingBondIdxSet;
|
|
// put all MCS bond indices which belong to one or more rings in a set
|
|
for (auto it = bpIter.first; it != bpIter.second; ++it) {
|
|
const Bond* b =
|
|
mol1.getBondBetweenAtoms(query[c1[boost::source(*it, query)]],
|
|
query[c1[boost::target(*it, query)]]);
|
|
if (b && ri1->numBondRings(b->getIdx())) {
|
|
mcsRingBondIdxSet.insert(b->getIdx());
|
|
}
|
|
}
|
|
for (i = 0; i < br1.size(); ++i) {
|
|
size_t mcsBonds = 0;
|
|
size_t mcsFusedBonds = 0;
|
|
size_t nonFusedBonds = 0;
|
|
// for each ring, check how many bonds belong to MCS (mcsBonds),
|
|
// how many bonds in the MCS belong to multiple rings (mcsFusedBonds).
|
|
// and how many bonds in each ring belong to a single ring (nonFusedBonds)
|
|
for (auto bi : br1[i]) {
|
|
bool isFused = (ri1->numBondRings(bi) > 1);
|
|
if (!isFused) {
|
|
++nonFusedBonds;
|
|
}
|
|
if (std::find(mcsRingBondIdxSet.begin(), mcsRingBondIdxSet.end(), bi) !=
|
|
mcsRingBondIdxSet.end()) {
|
|
++mcsBonds;
|
|
if (isFused) {
|
|
++mcsFusedBonds;
|
|
}
|
|
}
|
|
}
|
|
if (!p->BondCompareParameters.CompleteRingsOnly &&
|
|
mcsBonds > mcsFusedBonds &&
|
|
mcsBonds - mcsFusedBonds < nonFusedBonds) {
|
|
continue;
|
|
}
|
|
// if the ring is part of the MCS, and we found more bonds than the fused
|
|
// ones (i.e., the ring is not simply adjacent to an MCS ring) but less
|
|
// than the bonds which are part of this ring, then some fused bond is
|
|
// missing.
|
|
if (mcsBonds && mcsBonds > mcsFusedBonds && mcsBonds < br1[i].size()) {
|
|
missingFusedBond2 = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
// if both missingFusedBond and missingFusedBond2 are false, return true
|
|
return (!missingFusedBond && !missingFusedBond2);
|
|
}
|
|
|
|
bool FinalMatchCheckFunction(const std::uint32_t c1[], const std::uint32_t c2[],
|
|
const ROMol& mol1, const FMCS::Graph& query,
|
|
const ROMol& mol2, const FMCS::Graph& target,
|
|
const MCSParameters* p) {
|
|
PRECONDITION(p, "p must not be NULL");
|
|
if ((p->BondCompareParameters.MatchFusedRings ||
|
|
p->BondCompareParameters.MatchFusedRingsStrict) &&
|
|
!ringFusionCheck(c1, c2, mol1, query, mol2, target, p)) {
|
|
return false;
|
|
}
|
|
if (p->AtomCompareParameters.MatchChiralTag &&
|
|
!FinalChiralityCheckFunction(c1, c2, mol1, query, mol2, target, p)) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool FinalChiralityCheckFunction(const std::uint32_t c1[],
|
|
const std::uint32_t c2[], const ROMol& mol1,
|
|
const FMCS::Graph& query, const ROMol& mol2,
|
|
const FMCS::Graph& target,
|
|
const MCSParameters* /*unused*/) {
|
|
const unsigned int qna = boost::num_vertices(query); // getNumAtoms()
|
|
// check chiral atoms only:
|
|
for (unsigned int i = 0; i < qna; ++i) {
|
|
const Atom& a1 = *mol1.getAtomWithIdx(query[c1[i]]);
|
|
Atom::ChiralType ac1 = a1.getChiralTag();
|
|
|
|
const Atom& a2 = *mol2.getAtomWithIdx(target[c2[i]]);
|
|
Atom::ChiralType ac2 = a2.getChiralTag();
|
|
|
|
///*------------------ OLD Code :
|
|
// ???: non chiral query atoms ARE ALLOWED TO MATCH to Chiral target atoms
|
|
// (see test for issue 481)
|
|
if (a1.getDegree() < 3 || //#688: doesn't deal with "explicit" Hs properly
|
|
!(ac1 == Atom::CHI_TETRAHEDRAL_CW ||
|
|
ac1 == Atom::CHI_TETRAHEDRAL_CCW)) {
|
|
continue; // skip non chiral center QUERY atoms
|
|
}
|
|
if (!(ac2 == Atom::CHI_TETRAHEDRAL_CW ||
|
|
ac2 == Atom::CHI_TETRAHEDRAL_CCW)) {
|
|
return false;
|
|
}
|
|
//--------------------
|
|
/* More accurate check:
|
|
|
|
if( !(ac1 == Atom::CHI_TETRAHEDRAL_CW || ac1 ==
|
|
Atom::CHI_TETRAHEDRAL_CCW)
|
|
&& !(ac2 == Atom::CHI_TETRAHEDRAL_CW || ac2 ==
|
|
Atom::CHI_TETRAHEDRAL_CCW))
|
|
continue; // skip check if both atoms are non chiral center
|
|
|
|
if(!( (ac1 == Atom::CHI_TETRAHEDRAL_CW || ac1 ==
|
|
Atom::CHI_TETRAHEDRAL_CCW)
|
|
&& (ac2 == Atom::CHI_TETRAHEDRAL_CW || ac2 ==
|
|
Atom::CHI_TETRAHEDRAL_CCW)))//ac2 != ac1)
|
|
return false; // both atoms must be chiral or not without a
|
|
query priority
|
|
*/
|
|
const unsigned a1Degree =
|
|
boost::out_degree(c1[i], query); // a1.getDegree();
|
|
// number of all connected atoms in a seed
|
|
if (a1Degree > a2.getDegree()) { //#688 was != . // FIX issue 631
|
|
// printf("atoms Degree (%u, %u) %u [%u], %u\n", query[c1[i]],
|
|
// target[c2[i]], a1Degree, a1.getDegree(), a2.getDegree());
|
|
if (1 == a1Degree && a1.getDegree() == a2.getDegree()) {
|
|
continue; // continue to grow the seed
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
INT_LIST qOrder;
|
|
for (unsigned int j = 0; j < qna && qOrder.size() != a1Degree; ++j) {
|
|
const Bond* qB = mol1.getBondBetweenAtoms(query[c1[i]], query[c1[j]]);
|
|
if (qB) {
|
|
qOrder.push_back(qB->getIdx());
|
|
}
|
|
}
|
|
|
|
//#688
|
|
INT_LIST qmoOrder;
|
|
{
|
|
ROMol::OEDGE_ITER dbeg, dend;
|
|
boost::tie(dbeg, dend) = mol1.getAtomBonds(&a1);
|
|
for (; dbeg != dend; dbeg++) {
|
|
int dbidx = mol1[*dbeg]->getIdx();
|
|
if (std::find(qOrder.begin(), qOrder.end(), dbidx) != qOrder.end()) {
|
|
qmoOrder.push_back(dbidx);
|
|
}
|
|
// else
|
|
// qmoOrder.push_back(-1);
|
|
}
|
|
}
|
|
int qPermCount = // was: a1.getPerturbationOrder(qOrder);
|
|
static_cast<int>(countSwapsToInterconvert(qmoOrder, qOrder));
|
|
|
|
INT_LIST mOrder;
|
|
for (unsigned int j = 0; j < qna && mOrder.size() != a2.getDegree(); ++j) {
|
|
const Bond* mB = mol2.getBondBetweenAtoms(target[c2[i]], target[c2[j]]);
|
|
if (mB) {
|
|
mOrder.push_back(mB->getIdx());
|
|
}
|
|
}
|
|
|
|
//#688
|
|
while (mOrder.size() < a2.getDegree()) {
|
|
mOrder.push_back(-1);
|
|
}
|
|
INT_LIST moOrder;
|
|
ROMol::OEDGE_ITER dbeg, dend;
|
|
boost::tie(dbeg, dend) = mol2.getAtomBonds(&a2);
|
|
for (; dbeg != dend; dbeg++) {
|
|
int dbidx = mol2[*dbeg]->getIdx();
|
|
if (std::find(mOrder.begin(), mOrder.end(), dbidx) != mOrder.end()) {
|
|
moOrder.push_back(dbidx);
|
|
} else {
|
|
moOrder.push_back(-1);
|
|
}
|
|
}
|
|
|
|
int mPermCount = // was: a2.getPerturbationOrder(mOrder);
|
|
static_cast<int>(countSwapsToInterconvert(moOrder, mOrder));
|
|
//----
|
|
|
|
if ((qPermCount % 2 == mPermCount % 2 &&
|
|
a1.getChiralTag() != a2.getChiralTag()) ||
|
|
(qPermCount % 2 != mPermCount % 2 &&
|
|
a1.getChiralTag() == a2.getChiralTag())) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// check double bonds ONLY (why ???)
|
|
const unsigned int qnb = boost::num_edges(query);
|
|
std::map<unsigned int, unsigned int> qMap;
|
|
for (unsigned int j = 0; j < qna; ++j) {
|
|
qMap[query[c1[j]]] = j;
|
|
}
|
|
RDKit::FMCS::Graph::BOND_ITER_PAIR bpIter = boost::edges(query);
|
|
RDKit::FMCS::Graph::EDGE_ITER bIter = bpIter.first;
|
|
for (unsigned int i = 0; i < qnb; i++, ++bIter) {
|
|
const Bond* qBnd = mol1.getBondWithIdx(query[*bIter]);
|
|
if (qBnd->getBondType() != Bond::DOUBLE ||
|
|
qBnd->getStereo() <= Bond::STEREOANY) {
|
|
continue;
|
|
}
|
|
// don't think this can actually happen, but check to be sure:
|
|
if (qBnd->getStereoAtoms().size() != 2) { // MUST check it in the seed, not
|
|
// in full query molecule, but
|
|
// never happens !!!
|
|
continue;
|
|
}
|
|
|
|
const Bond* mBnd =
|
|
mol2.getBondBetweenAtoms(target[c2[qMap[qBnd->getBeginAtomIdx()]]],
|
|
target[c2[qMap[qBnd->getEndAtomIdx()]]]);
|
|
CHECK_INVARIANT(mBnd, "Matching bond not found");
|
|
if (mBnd->getBondType() != Bond::DOUBLE ||
|
|
mBnd->getStereo() <= Bond::STEREOANY) {
|
|
continue;
|
|
}
|
|
// don't think this can actually happen, but check to be sure:
|
|
if (mBnd->getStereoAtoms().size() != 2) {
|
|
continue;
|
|
}
|
|
|
|
unsigned int end1Matches = 0;
|
|
unsigned int end2Matches = 0;
|
|
if (target[c2[qMap[qBnd->getBeginAtomIdx()]]] ==
|
|
rdcast<unsigned int>(mBnd->getBeginAtomIdx())) {
|
|
// query Begin == mol Begin
|
|
if (target[c2[qMap[qBnd->getStereoAtoms()[0]]]] ==
|
|
rdcast<unsigned int>(mBnd->getStereoAtoms()[0])) {
|
|
end1Matches = 1;
|
|
}
|
|
if (target[c2[qMap[qBnd->getStereoAtoms()[1]]]] ==
|
|
rdcast<unsigned int>(mBnd->getStereoAtoms()[1])) {
|
|
end2Matches = 1;
|
|
}
|
|
} else {
|
|
// query End == mol Begin
|
|
if (target[c2[qMap[qBnd->getStereoAtoms()[0]]]] ==
|
|
rdcast<unsigned int>(mBnd->getStereoAtoms()[1])) {
|
|
end1Matches = 1;
|
|
}
|
|
if (target[c2[qMap[qBnd->getStereoAtoms()[1]]]] ==
|
|
rdcast<unsigned int>(mBnd->getStereoAtoms()[0])) {
|
|
end2Matches = 1;
|
|
}
|
|
}
|
|
// std::cerr<<" bnd: "<<qBnd->getIdx()<<":"<<qBnd->getStereo()<<" -
|
|
// "<<mBnd->getIdx()<<":"<<mBnd->getStereo()<<" -- "<<end1Matches<<"
|
|
// "<<end2Matches<<std::endl;
|
|
if (mBnd->getStereo() == qBnd->getStereo() &&
|
|
(end1Matches + end2Matches) == 1) {
|
|
return false;
|
|
}
|
|
if (mBnd->getStereo() != qBnd->getStereo() &&
|
|
(end1Matches + end2Matches) != 1) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool FinalChiralityCheckFunction_1(const short unsigned c1[],
|
|
const short unsigned c2[], const ROMol& mol1,
|
|
const FMCS::Graph& query, const ROMol& mol2,
|
|
const FMCS::Graph& target,
|
|
const MCSParameters*) {
|
|
const unsigned int qna = boost::num_vertices(query); // getNumAtoms()
|
|
// check chiral atoms:
|
|
for (unsigned int i = 0; i < qna; ++i) {
|
|
const Atom& a1 = *mol1.getAtomWithIdx(query[c1[i]]);
|
|
Atom::ChiralType ac1 = a1.getChiralTag();
|
|
if (!(ac1 == Atom::CHI_TETRAHEDRAL_CW ||
|
|
ac1 == Atom::CHI_TETRAHEDRAL_CCW)) {
|
|
continue; // skip non chiral center query atoms
|
|
}
|
|
const Atom& a2 = *mol2.getAtomWithIdx(target[c2[i]]);
|
|
Atom::ChiralType ac2 = a2.getChiralTag();
|
|
if (!(ac2 == Atom::CHI_TETRAHEDRAL_CW ||
|
|
ac2 == Atom::CHI_TETRAHEDRAL_CCW)) {
|
|
continue; // skip non chiral center TARGET atoms even if query atom is
|
|
}
|
|
// chiral
|
|
//// return false;
|
|
// both atoms are chiral:
|
|
const unsigned a1Degree =
|
|
boost::out_degree(c1[i], query); // a1.getDegree();
|
|
if (a1Degree != a2.getDegree()) { // number of all connected atoms in seed
|
|
return false; // ???
|
|
}
|
|
INT_LIST qOrder;
|
|
for (unsigned int j = 0; j < qna && qOrder.size() != a1Degree; ++j) {
|
|
const Bond* qB = mol1.getBondBetweenAtoms(query[c1[i]], query[c1[j]]);
|
|
if (qB) {
|
|
qOrder.push_back(qB->getIdx());
|
|
}
|
|
}
|
|
|
|
int qPermCount = a1.getPerturbationOrder(qOrder);
|
|
INT_LIST mOrder;
|
|
for (unsigned int j = 0; j < qna && mOrder.size() != a2.getDegree(); ++j) {
|
|
const Bond* mB = mol2.getBondBetweenAtoms(target[c2[i]], target[c2[j]]);
|
|
if (mB) {
|
|
mOrder.push_back(mB->getIdx());
|
|
}
|
|
}
|
|
int mPermCount = a2.getPerturbationOrder(mOrder);
|
|
|
|
if ((qPermCount % 2 == mPermCount % 2 &&
|
|
a1.getChiralTag() != a2.getChiralTag()) ||
|
|
(qPermCount % 2 != mPermCount % 2 &&
|
|
a1.getChiralTag() == a2.getChiralTag())) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// check double bonds ONLY (why ???)
|
|
const unsigned int qnb = boost::num_edges(query);
|
|
std::map<unsigned int, unsigned int> qMap;
|
|
for (unsigned int j = 0; j < qna; ++j) {
|
|
qMap[query[c1[j]]] = j;
|
|
}
|
|
RDKit::FMCS::Graph::BOND_ITER_PAIR bpIter = boost::edges(query);
|
|
RDKit::FMCS::Graph::EDGE_ITER bIter = bpIter.first;
|
|
for (unsigned int i = 0; i < qnb; i++, ++bIter) {
|
|
const Bond* qBnd = mol1.getBondWithIdx(query[*bIter]);
|
|
if (qBnd->getBondType() != Bond::DOUBLE ||
|
|
qBnd->getStereo() <= Bond::STEREOANY) {
|
|
continue;
|
|
}
|
|
// don't think this can actually happen, but check to be sure:
|
|
if (qBnd->getStereoAtoms().size() != 2) { // MUST check it in the seed, not
|
|
// in full query molecule, but
|
|
// never happens !!!
|
|
continue;
|
|
}
|
|
|
|
const Bond* mBnd =
|
|
mol2.getBondBetweenAtoms(target[c2[qMap[qBnd->getBeginAtomIdx()]]],
|
|
target[c2[qMap[qBnd->getEndAtomIdx()]]]);
|
|
CHECK_INVARIANT(mBnd, "Matching bond not found");
|
|
if (mBnd->getBondType() != Bond::DOUBLE ||
|
|
mBnd->getStereo() <= Bond::STEREOANY) {
|
|
continue;
|
|
}
|
|
// don't think this can actually happen, but check to be sure:
|
|
if (mBnd->getStereoAtoms().size() != 2) {
|
|
continue;
|
|
}
|
|
|
|
unsigned int end1Matches = 0;
|
|
unsigned int end2Matches = 0;
|
|
if (target[c2[qMap[qBnd->getBeginAtomIdx()]]] == mBnd->getBeginAtomIdx()) {
|
|
// query Begin == mol Begin
|
|
if (target[c2[qMap[qBnd->getStereoAtoms()[0]]]] ==
|
|
rdcast<unsigned int>(mBnd->getStereoAtoms()[0])) {
|
|
end1Matches = 1;
|
|
}
|
|
if (target[c2[qMap[qBnd->getStereoAtoms()[1]]]] ==
|
|
rdcast<unsigned int>(mBnd->getStereoAtoms()[1])) {
|
|
end2Matches = 1;
|
|
}
|
|
} else {
|
|
// query End == mol Begin
|
|
if (target[c2[qMap[qBnd->getStereoAtoms()[0]]]] ==
|
|
rdcast<unsigned int>(mBnd->getStereoAtoms()[1])) {
|
|
end1Matches = 1;
|
|
}
|
|
if (target[c2[qMap[qBnd->getStereoAtoms()[1]]]] ==
|
|
rdcast<unsigned int>(mBnd->getStereoAtoms()[0])) {
|
|
end2Matches = 1;
|
|
}
|
|
}
|
|
// std::cerr<<" bnd: "<<qBnd->getIdx()<<":"<<qBnd->getStereo()<<" -
|
|
// "<<mBnd->getIdx()<<":"<<mBnd->getStereo()<<" -- "<<end1Matches<<"
|
|
// "<<end2Matches<<std::endl;
|
|
if (mBnd->getStereo() == qBnd->getStereo() &&
|
|
(end1Matches + end2Matches) == 1) {
|
|
return false;
|
|
}
|
|
if (mBnd->getStereo() != qBnd->getStereo() &&
|
|
(end1Matches + end2Matches) != 1) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
} // namespace RDKit
|