mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
- Switched from dynamic to static allocation for an instance of `MCSParameters` - Switched to using `auto` where possible - Added a few `CHECK_INVARIANT` where appropriate before dereferencing pointers - Moved some inline comments to the previous line to improve readability - Added a early check for `CompleteRingsOnly` in `checkBondRingMatch()` to improve computational efficiency - Removed `RingMatchTableSet` entirely as 1) it is unnecessary since its functionality is already provided by `RingInfo` 2) it abused the `userData` pointer. This allows cleaning up and simplifying the code, particularly the Python wrappers which had a significant amount of added complexity to support it - Removed all the code that was deprecated several releases ago - Reimplemented ringFusionCheck() from scratch to address several bug reports; also switched from std::set to boost::dynamic_bitset for better efficiency - Replaced boost::tie with boost::make_iterator_range - Modernized `for` loops where possible - Removed entirely the QueryRings structure as its functionality is already available in RingInfo - Removed entirely the _DFS() function since the same algorithm can be implemented in a simpler and more efficient way using RingInfo (from 2m28.441s to 2m9.859s for the same task) - Replaced std::vector<bool> with boost::dynamic_bitset - Replaced C-style casts with C++ casts - Replaced some size_t with unsigned int - Refactored checkIfRingsAreClosed() such that checkNoLoneRingAtoms() is not needed anymore - Added a test for slow runtimes with CompleteRingsOnly - Setting Timeout to 0 means no timeout, as it should be - Removed unused `steps` variable from `MaximumCommonSubgraph::growSeeds` - Storing both Atom and Bond pointers and their indices on Seed and MCS data structures is time-consuming and a potential source of incons istencies; storing pointers is sufficient - Promoted `MaximumCommonSubgraph::match` from `private` to `public` - `NewBonds` was declared `mutable`, but `Seed::fillNewBonds()` was incorrectly declared as `non-const`, which caused the need for an ugly (and unnecessary) `const_cast`. I have now removed the `const_cast` and correctly declared functions that alter `NewBonds` as `const`, since `NewBonds` is explicitly `mut able` - Removed some useless random scoping that was peppering the MCS code - Removed a significant amount of duplicate code from the Python wrappers by inheriting from a base `PyMCSWrapper` class - Fixed #6082 - Fixed #5510 - Fixed #5457 - Fixed #5440 - Fixed #5411 - Fixed #3965 - Fixed #6578 Co-authored-by: ptosco <paolo.tosco@novartis.com>
134 lines
3.9 KiB
C++
134 lines
3.9 KiB
C++
//
|
|
// Copyright (C) 2014 Novartis Institutes for BioMedical Research
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include <RDGeneral/export.h>
|
|
#pragma once
|
|
#include <cstdio>
|
|
#include <cstring>
|
|
#include <cstddef>
|
|
#include <ctime>
|
|
#include <iostream>
|
|
#ifdef _MSC_VER
|
|
#define _CRT_SECURE_NO_WARNINGS
|
|
#define NOMINMAX
|
|
#include <Winsock2.h> // for timeval
|
|
#ifdef _DEBUG // check memory leaks
|
|
#include <crtdbg.h>
|
|
#define _CRTDBG_MAP_ALLOC
|
|
#ifndef new
|
|
#define new new (_NORMAL_BLOCK, __FILE__, __LINE__)
|
|
#endif
|
|
#endif
|
|
#else
|
|
#include <unistd.h>
|
|
#include <fcntl.h>
|
|
#include <sys/time.h>
|
|
#ifndef _WIN32
|
|
#include <sys/resource.h>
|
|
#endif
|
|
#endif
|
|
|
|
// SELECT ALGORITHM OPTIONS by comment some lines to exclude additional or
|
|
// experimental optimisations:
|
|
|
|
#define SEED_GROW_DEEP // fast and works much times faster (but it can depend
|
|
// on molecules)
|
|
// #define EXCLUDE_WRONG_COMPOSITION // fast but with a little effect, because
|
|
// amount of external bonds usually is very small.
|
|
// Exclude mismatched bonds combinations during seed growing (2^N-1 stage)
|
|
|
|
#define FAST_SUBSTRUCT_CACHE // based on a hash of Morgan code
|
|
#define DUP_SUBSTRUCT_CACHE // based on list of query atoms and bonds. For
|
|
// rings where seeds growing in both directions
|
|
// throw the same ring.
|
|
|
|
#define FAST_INCREMENTAL_MATCH // fast and some time very useful. request
|
|
// PRECOMPUTED_TABLES_MATCH
|
|
// previous match result based match checking without finding new matched
|
|
// substructure location in the target
|
|
|
|
#define VERBOSE_STATISTICS_ON
|
|
|
|
#ifdef _MSC_VER
|
|
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
|
|
struct timezone {
|
|
int tz_minuteswest; // minutes W of Greenwich
|
|
int tz_dsttime; // type of dst correction
|
|
};
|
|
|
|
static inline int gettimeofday(struct timeval *tv, struct timezone *tz) {
|
|
FILETIME ft;
|
|
unsigned __int64 tmpres = 0;
|
|
static int tzflag;
|
|
|
|
if (nullptr != tv) {
|
|
GetSystemTimeAsFileTime(&ft);
|
|
|
|
tmpres |= ft.dwHighDateTime;
|
|
tmpres <<= 32;
|
|
tmpres |= ft.dwLowDateTime;
|
|
|
|
// converting file time to unix epoch
|
|
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
tmpres /= 10; // convert into microseconds
|
|
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
}
|
|
|
|
if (nullptr != tz) {
|
|
if (!tzflag) {
|
|
_tzset();
|
|
tzflag++;
|
|
}
|
|
tz->tz_minuteswest = _timezone / 60;
|
|
tz->tz_dsttime = _daylight;
|
|
}
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
static inline unsigned long long nanoClock(
|
|
void) { // actually returns microseconds
|
|
struct timeval t;
|
|
gettimeofday(&t, (struct timezone *)nullptr);
|
|
return t.tv_usec + t.tv_sec * 1000000ULL;
|
|
}
|
|
|
|
namespace RDKit {
|
|
namespace FMCS {
|
|
|
|
#ifdef VERBOSE_STATISTICS_ON
|
|
|
|
// compute statistics of really very very fast calls.
|
|
// It a bit decrease overal performance, but might be interested for
|
|
// investigation purpose (only)
|
|
// #define VERBOSE_STATISTICS_FASTCALLS_ON
|
|
|
|
struct ExecStatistics {
|
|
unsigned int TotalSteps{0}, MCSFoundStep{0};
|
|
unsigned long long MCSFoundTime;
|
|
unsigned int InitialSeed{0}, MismatchedInitialSeed{0};
|
|
unsigned int Seed{0}, RemainingSizeRejected{0};
|
|
unsigned int SeedCheck{0}, IndividualBondExcluded{0};
|
|
unsigned int MatchCall{0}, MatchCallTrue{0};
|
|
unsigned int FastMatchCall{0}, FastMatchCallTrue{0}, SlowMatchCallTrue{0};
|
|
unsigned int ExactMatchCall{0}, ExactMatchCallTrue{0}; // hash cache
|
|
unsigned int FindHashInCache{0}, HashKeyFoundInCache{0};
|
|
unsigned int AtomCompareCalls{0}, BondCompareCalls{0};
|
|
unsigned int AtomFunctorCalls{0}, BondFunctorCalls{0};
|
|
unsigned int WrongCompositionRejected{0}, WrongCompositionDetected{0};
|
|
unsigned int DupCacheFound{0}, DupCacheFoundMatch{0};
|
|
|
|
ExecStatistics() : MCSFoundTime(nanoClock()) {}
|
|
};
|
|
#endif
|
|
} // namespace FMCS
|
|
} // namespace RDKit
|