mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
* unused vars in bison parser cleanup * initialization order in TopologicalTorsionGenerator * unused params in SLN bison * sln flex unused params * throwing destructor in TDTWriter * signed comparison in substructmethods * unused input param in smiles/smarts bison * unused ms param in sln bison * signed comparison in FingerprintGenerator * store return of fscanf in StructCheckerOptions * unreferenced var in catch * uninitialized value in FileParserUtils * avoid override overload warning in MolDraw2DSVG * non-final overrides in Validate.h * unused static var in Avalon * unused vars in catch blocks * make AvalonTools avalonSimilarityBits & avalonSSSBits const int * assert fscanf result in StructCheckerOptions
702 lines
22 KiB
C++
702 lines
22 KiB
C++
//
|
|
// Copyright (C) 2016 Novartis Institutes for BioMedical Research
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include <ctype.h>
|
|
#include <memory.h>
|
|
#include <errno.h>
|
|
#include <iostream>
|
|
#include <sstream>
|
|
#include <fstream>
|
|
#include <boost/property_tree/json_parser.hpp>
|
|
#include "../../RDGeneral/StreamOps.h"
|
|
#include "../FileParsers/FileParsers.h"
|
|
#include "../FileParsers/MolSupplier.h" //SDF
|
|
#include "../SmilesParse/SmilesParse.h"
|
|
#include "StructCheckerOptions.h"
|
|
|
|
#include "AugmentedAtomData.cpp"
|
|
|
|
namespace RDKit {
|
|
namespace StructureCheck {
|
|
|
|
bool parseOptionsJSON(const std::string &json, StructCheckerOptions &op) {
|
|
if (json.empty()) return false;
|
|
try {
|
|
std::istringstream ss;
|
|
ss.str(json);
|
|
boost::property_tree::ptree pt;
|
|
boost::property_tree::read_json(ss, pt);
|
|
op = StructCheckerOptions(); // reset to default values
|
|
op.AcidityLimit = pt.get<double>("AcidityLimit", op.AcidityLimit);
|
|
op.RemoveMinorFragments =
|
|
pt.get<bool>("RemoveMinorFragments", op.RemoveMinorFragments);
|
|
op.DesiredCharge = pt.get<int>("DesiredCharge", op.DesiredCharge);
|
|
op.CheckCollisions = pt.get<bool>("CheckCollisions", op.CheckCollisions);
|
|
op.CollisionLimitPercent =
|
|
pt.get<int>("CollisionLimitPercent", op.CollisionLimitPercent);
|
|
op.MaxMolSize = pt.get<int>("MaxMolSize", op.MaxMolSize);
|
|
op.ConvertSText = pt.get<bool>("ConvertSText", op.ConvertSText);
|
|
op.SqueezeIdentifiers =
|
|
pt.get<bool>("SqueezeIdentifiers", op.SqueezeIdentifiers);
|
|
op.StripZeros = pt.get<bool>("StripZeros", op.StripZeros);
|
|
op.CheckStereo = pt.get<bool>("CheckStereo", op.CheckStereo);
|
|
op.ConvertAtomTexts = pt.get<bool>("ConvertAtomTexts", op.ConvertAtomTexts);
|
|
op.GroupsToSGroups = pt.get<bool>("GroupsToSGroups", op.GroupsToSGroups);
|
|
op.Verbose = pt.get<bool>("Verbose", op.Verbose);
|
|
} catch (boost::property_tree::json_parser_error &ex) {
|
|
BOOST_LOG(rdErrorLog) << "JSON StructureCheck Options:" << ex.message()
|
|
<< "\n";
|
|
return false;
|
|
} catch (std::exception &ex) {
|
|
BOOST_LOG(rdErrorLog) << "JSON StructureCheck Options:" << ex.what()
|
|
<< "\n";
|
|
return false;
|
|
} catch (...) {
|
|
BOOST_LOG(rdErrorLog)
|
|
<< "JSON StructureCheck Options: Unknown exception.\n";
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool loadOptionsFromFiles(
|
|
StructCheckerOptions &op, const std::string &augmentedAtomTranslationsFile,
|
|
const std::string &patternFile, // file with clean patterns
|
|
const std::string &rotatePatternFile, // file with rotate patterns
|
|
const std::string &stereoPatternFile, // file with stereo patterns
|
|
const std::string &tautomerFile) {
|
|
bool res = true;
|
|
if (!augmentedAtomTranslationsFile.empty())
|
|
res &= op.loadAugmentedAtomTranslations(augmentedAtomTranslationsFile);
|
|
if (!patternFile.empty()) res &= op.loadPatterns(patternFile);
|
|
if (!rotatePatternFile.empty())
|
|
res &= op.loadRotatePatterns(rotatePatternFile);
|
|
if (!stereoPatternFile.empty())
|
|
res &= op.loadStereoPatterns(stereoPatternFile);
|
|
if (!tautomerFile.empty()) res &= op.loadTautomerData(tautomerFile);
|
|
return res;
|
|
}
|
|
|
|
//=====================================================================
|
|
// File parsers helper functions:
|
|
|
|
static const char
|
|
*bond_to_string[] = // ordered in according with BondType values
|
|
{"?", "-", "=", "#", "~", "-=", "-~", "=~", "*"};
|
|
|
|
// used in unit test
|
|
bool StringToAugmentedAtom(const char *str, AugmentedAtom &aa) {
|
|
/*
|
|
* The syntax of a augmented atom string is as follows:
|
|
*
|
|
* <AAString> ::= ['@'|'!@'] <Atom Symbol> [<Charge>]
|
|
* {'(' <Bond Symbol> <Atom Symbol> [<Charge>] ')'}.
|
|
* Bond symbols and charge descriptors are defined in the symbol tables
|
|
* 'bond_to_string' and 'charge_to_string'.
|
|
* '@' means central atom is in ring, '!@' means central atom is not in ring,
|
|
*omitting means any topography could match.
|
|
*/
|
|
|
|
size_t i;
|
|
aa.ShortName = str;
|
|
|
|
if (str[0] == '@') {
|
|
aa.Topology = RING;
|
|
str++;
|
|
} else if (str[0] == '!' && str[1] == '@') {
|
|
aa.Topology = CHAIN;
|
|
str++;
|
|
str++;
|
|
}
|
|
aa.Ligands.clear(); // initially no ligands
|
|
|
|
// fetch atom symbol
|
|
if (!isalpha(*str)) { // atom symbol is required
|
|
BOOST_LOG(rdErrorLog) << "atom symbol is required '" << str << "'\n";
|
|
return false;
|
|
}
|
|
i = 0;
|
|
while (isalnum(str[i]) || str[i] == ',' || str[i] == '#') i++;
|
|
aa.AtomSymbol = std::string(str, i);
|
|
str += i;
|
|
|
|
// charge definition
|
|
if (str[0] == '+' && str[1] == '-') { // "+-" - ANY_CHARGE
|
|
aa.Charge = ANY_CHARGE;
|
|
str += 2;
|
|
} else if (*str == '-' || *str == '+') {
|
|
int sign = (*str == '-') ? (-1) : (1);
|
|
str++;
|
|
if (*str >= '1' && *str <= '7') {
|
|
aa.Charge = sign * (*str - '0');
|
|
str++;
|
|
} else { // no match
|
|
BOOST_LOG(rdErrorLog) << "syntax error '" << str - 1 << "'\n";
|
|
return false;
|
|
}
|
|
} else
|
|
aa.Charge = 0; // NONE; default
|
|
|
|
// radical definition
|
|
if (str == strpbrk(str, "|.:") &&
|
|
!isdigit(str[1])) { // don't confuse with degree specification
|
|
switch (*str) {
|
|
case '|':
|
|
aa.Radical = SINGLET; // RadicalType::SINGLET;
|
|
str++;
|
|
break;
|
|
case '.':
|
|
aa.Radical = DOUBLET;
|
|
str++;
|
|
break;
|
|
case ':':
|
|
aa.Radical = TRIPLET;
|
|
str++;
|
|
break;
|
|
default:
|
|
return false; // never
|
|
};
|
|
} else
|
|
aa.Radical = RT_NONE;
|
|
|
|
// read ligand descriptions
|
|
while (*str == '(') {
|
|
aa.Ligands.push_back(Ligand());
|
|
str++; // skip '('
|
|
// read bond type
|
|
bool found = false;
|
|
for (i = 0; i < sizeof(bond_to_string) / sizeof(*bond_to_string); i++) {
|
|
if (0 == memcmp(str, bond_to_string[i], isalpha(str[1]) ? 1 : 2)) {
|
|
found = true;
|
|
aa.Ligands.back().BondType = (AABondType)(i);
|
|
str += strlen(bond_to_string[i]);
|
|
break;
|
|
}
|
|
}
|
|
if (!found) {
|
|
BOOST_LOG(rdErrorLog) << "syntax error '" << str
|
|
<< "'\nthere must be a bond type symbol.\n";
|
|
return false;
|
|
}
|
|
if (!isalpha(*str)) { /* there must be an atom symbol */
|
|
BOOST_LOG(rdErrorLog) << "syntax error '" << str
|
|
<< "'\nthere must be an atom symbol.\n";
|
|
return false;
|
|
}
|
|
|
|
i = 0; /* fetch atom symbol */
|
|
while (isalnum(str[i]) || str[i] == ',' || str[i] == '#') i++;
|
|
aa.Ligands.back().AtomSymbol = std::string(str, i);
|
|
str += i;
|
|
|
|
// charge definition
|
|
if (str[0] == '+' && str[1] == '-') { // "+-" - ANY_CHARGE
|
|
aa.Ligands.back().Charge = ANY_CHARGE;
|
|
str += 2;
|
|
} else if (*str == '-' || *str == '+') {
|
|
int sign = (*str == '-') ? (-1) : (1);
|
|
str++;
|
|
if (*str >= '1' && *str <= '7') {
|
|
aa.Ligands.back().Charge = sign * (*str - '0');
|
|
str++;
|
|
} else { // no match
|
|
BOOST_LOG(rdErrorLog) << "syntax error '" << str - 1 << "'\n";
|
|
return false;
|
|
}
|
|
} else
|
|
aa.Ligands.back().Charge = 0; // NONE; default
|
|
|
|
// radical definition
|
|
if (str == strpbrk(str, "|.:") &&
|
|
!isdigit(str[1])) { // don't confuse with degree specification
|
|
switch (*str) {
|
|
case '|':
|
|
aa.Ligands.back().Radical = SINGLET;
|
|
str++;
|
|
break;
|
|
case '.':
|
|
aa.Ligands.back().Radical = DOUBLET;
|
|
str++;
|
|
break;
|
|
case ':':
|
|
aa.Ligands.back().Radical = TRIPLET;
|
|
str++;
|
|
break;
|
|
default:
|
|
return false; // never
|
|
};
|
|
} else
|
|
aa.Ligands.back().Radical = RT_NONE;
|
|
|
|
// substitution count descriptor
|
|
if (str[0] == ':' && isdigit(str[1])) {
|
|
aa.Ligands.back().SubstitutionCount = str[1] - '0';
|
|
str += 2;
|
|
} else
|
|
aa.Ligands.back().SubstitutionCount = 0;
|
|
|
|
if (*str == ')') // check for close ')'
|
|
str++;
|
|
else {
|
|
BOOST_LOG(rdErrorLog) << "unclosed ( '" << str << "'\n";
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (*str != '\0') { // there are some extra symbols after scan (inside quoted
|
|
// AA string)
|
|
BOOST_LOG(rdErrorLog) << "unexpected symbols after scan '" << str << "'\n";
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static bool ReadAugmentedAtoms(const std::string &path,
|
|
std::vector<AugmentedAtom> &atoms) {
|
|
/*
|
|
* '*.chk' and '*.aci' files loader
|
|
* Only the portion of the strings between the two '"' characters is used,
|
|
* other characters are regarded as comments.
|
|
*/
|
|
FILE *fp = fopen(path.c_str(), "r");
|
|
if (!fp) {
|
|
BOOST_LOG(rdErrorLog) << "could not open file '" << path << "'\n"
|
|
<< strerror(errno) << "\n";
|
|
return false;
|
|
}
|
|
unsigned n = 0;
|
|
|
|
char str[1024];
|
|
while (fgets(str, sizeof(str), fp)) { // comment block in '*.aci' file
|
|
if (*str != '#') break;
|
|
}
|
|
sscanf(str, "%d", &n);
|
|
|
|
atoms.clear();
|
|
atoms.resize(n);
|
|
/* read: "! _N10_" - unused version string // '*.chk' file only
|
|
char buffer[80];
|
|
char *cp;
|
|
int vers_len;
|
|
fgets(buffer, 80, fp);
|
|
cp = strchr(buffer, '_');
|
|
if (cp && strchr(cp + 1, '_'))
|
|
{
|
|
vers_len = (int)(strchr(cp + 1, '_') - cp) - 1;
|
|
strncpy(aa_check_version, cp + 1, vers_len);
|
|
aa_check_version[vers_len] = '\0';
|
|
}
|
|
|
|
if (log_file)
|
|
fprintf(log_file, "augmented atom check version = %s\n",
|
|
aa_check_version);
|
|
*/
|
|
for (unsigned i = 0; i < n && !feof(fp); i++) {
|
|
str[0] = '\0';
|
|
if (fgets(str, sizeof(str), fp)) {
|
|
char *s = str;
|
|
while (*s >= ' ' && *s != '"') s++;
|
|
if (*s == '"') {
|
|
s++;
|
|
size_t len = 0;
|
|
while (s[len] >= ' ' && s[len] != '"') len++;
|
|
s[len] = '\0'; // == atom_string
|
|
if (!StringToAugmentedAtom(s, atoms[i])) {
|
|
BOOST_LOG(rdErrorLog) << "unsuccessful AA parsing of " << i + 1 << " "
|
|
<< str << "\n";
|
|
fclose(fp);
|
|
return false;
|
|
}
|
|
} else { // incorrect text line
|
|
BOOST_LOG(rdErrorLog) << "unsuccessful AA parsing of " << i + 1 << " "
|
|
<< str << "\n";
|
|
fclose(fp);
|
|
return false;
|
|
}
|
|
} else if (ferror(fp)) {
|
|
BOOST_LOG(rdErrorLog) << "unsuccessful AA parsing of " << i + 1 << " "
|
|
<< str << "\n";
|
|
fclose(fp);
|
|
return false;
|
|
}
|
|
}
|
|
fclose(fp);
|
|
return true;
|
|
}
|
|
|
|
static bool ReadAAPairs(
|
|
const std::string &path,
|
|
std::vector<std::pair<AugmentedAtom, AugmentedAtom>> &trans_pairs) {
|
|
/*
|
|
* '*.trn' file loader
|
|
* Reads file and constructs an array of pairs of augmented atom
|
|
* descriptions. The first one being the search pattern and the second
|
|
* one the target pattern.
|
|
* The function expects the number of augmented atom description pairs
|
|
* on the first line and the strings corresponding to the data structures
|
|
* on the n following lines.
|
|
* Only the portion of the strings between the two '"' characters is used,
|
|
* other characters are regarded as comments.
|
|
*/
|
|
FILE *fp = fopen(path.c_str(), "r");
|
|
if (!fp) {
|
|
BOOST_LOG(rdErrorLog) << "could not open file '" << path << "'\n"
|
|
<< strerror(errno) << "\n";
|
|
return false;
|
|
}
|
|
unsigned n = 0;
|
|
int num_scan = fscanf(fp, "%d", &n);
|
|
TEST_ASSERT(num_scan == 1);
|
|
|
|
char buffer[80];
|
|
|
|
// Reads the version from the first line in the file
|
|
if (fgets(buffer, sizeof(buffer), fp)) {
|
|
/*
|
|
int vers_len;
|
|
char *cp;
|
|
cp = strchr(buffer, '_');
|
|
if (cp && strchr(cp + 1, '_'))
|
|
{
|
|
vers_len = (int)(strchr(cp + 1, '_') - cp) - 1;
|
|
strncpy(aa_trans_version, cp + 1, vers_len);
|
|
aa_trans_version[vers_len] = '\0';
|
|
}
|
|
if (log_file)
|
|
fprintf(log_file, "augmented atom transformation version =
|
|
%s\n", aa_trans_version);
|
|
*/
|
|
}
|
|
trans_pairs.clear();
|
|
trans_pairs.resize(n);
|
|
|
|
for (unsigned i = 0; i < n; i++) {
|
|
char str[1024];
|
|
str[0] = '\0';
|
|
if (fgets(str, sizeof(str), fp)) {
|
|
char *s = str;
|
|
while (*s >= ' ' && *s != '"') s++;
|
|
if (*s == '"') {
|
|
s++;
|
|
size_t len = 0;
|
|
while (s[len] >= ' ' && s[len] != '"') len++;
|
|
s[len] = '\0'; // == atom_string
|
|
|
|
if (!StringToAugmentedAtom(s, trans_pairs[i].first)) {
|
|
BOOST_LOG(rdErrorLog) << "unsuccessful translation of " << str
|
|
<< "\n";
|
|
fclose(fp);
|
|
return false;
|
|
}
|
|
|
|
s += len;
|
|
s++;
|
|
// second atom quoted string
|
|
while (*s >= ' ' && *s != '"') s++;
|
|
if (*s == '"') {
|
|
s++;
|
|
len = 0;
|
|
while (s[len] >= ' ' && s[len] != '"') len++;
|
|
s[len] = '\0'; // == atom_string
|
|
|
|
if (!StringToAugmentedAtom(s, trans_pairs[i].second)) {
|
|
BOOST_LOG(rdErrorLog) << "unsuccessful translation of " << str
|
|
<< "\n";
|
|
fclose(fp);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
fclose(fp);
|
|
if (trans_pairs.size() != n) {
|
|
BOOST_LOG(rdErrorLog) << "syntax error in translation file " << path
|
|
<< " pair " << trans_pairs.size() + 1 << "\n";
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
//=====================================================================
|
|
static void loadDefaultAugmentedAtoms(StructCheckerOptions &struchkOpts) {
|
|
std::vector<AugmentedAtom> good;
|
|
good.reserve(sizeof(DefaultGoodAtoms) / sizeof(*DefaultGoodAtoms));
|
|
|
|
for (const auto &DefaultGoodAtom : DefaultGoodAtoms) {
|
|
good.push_back(AugmentedAtom());
|
|
if (!StringToAugmentedAtom(DefaultGoodAtom.str, good.back())) {
|
|
throw "INTERNAL ERROR in default data";
|
|
}
|
|
}
|
|
|
|
std::vector<AugmentedAtom> acidic;
|
|
acidic.reserve(sizeof(DefaultAcidicAtoms) / sizeof(*DefaultAcidicAtoms));
|
|
|
|
for (const auto &DefaultAcidicAtom : DefaultAcidicAtoms) {
|
|
acidic.push_back(AugmentedAtom());
|
|
if (!StringToAugmentedAtom(DefaultAcidicAtom.str, acidic.back())) {
|
|
throw "INTERNAL ERROR in default data";
|
|
}
|
|
}
|
|
|
|
std::vector<std::pair<AugmentedAtom, AugmentedAtom>> trans_pairs;
|
|
trans_pairs.resize(sizeof(DefaultAugmentedAtomTransforms) /
|
|
sizeof(*DefaultAugmentedAtomTransforms));
|
|
for (size_t i = 0; i < sizeof(DefaultAugmentedAtomTransforms) /
|
|
sizeof(*DefaultAugmentedAtomTransforms);
|
|
++i) {
|
|
if (!StringToAugmentedAtom(DefaultAugmentedAtomTransforms[i].from,
|
|
trans_pairs[i].first)) {
|
|
throw "INTERNAL Error in default augmented atom transforms";
|
|
}
|
|
if (!StringToAugmentedAtom(DefaultAugmentedAtomTransforms[i].to,
|
|
trans_pairs[i].second)) {
|
|
throw "INTERNAL Error in default augmented atom transforms";
|
|
}
|
|
}
|
|
|
|
struchkOpts.setGoodAugmentedAtoms(good);
|
|
struchkOpts.setAcidicAugmentedAtoms(acidic);
|
|
struchkOpts.setAugmentedAtomTranslations(trans_pairs);
|
|
}
|
|
//=====================================================================
|
|
|
|
bool StructCheckerOptions::loadAugmentedAtomTranslations(
|
|
const std::string &path) {
|
|
AugmentedAtomPairs.clear();
|
|
if (path.empty()) return false;
|
|
return ReadAAPairs(path, AugmentedAtomPairs);
|
|
}
|
|
|
|
void StructCheckerOptions::setAugmentedAtomTranslations(
|
|
const std::vector<std::pair<AugmentedAtom, AugmentedAtom>> &aaPairs) {
|
|
AugmentedAtomPairs = aaPairs;
|
|
}
|
|
|
|
bool StructCheckerOptions::loadAcidicAugmentedAtoms(const std::string &path) {
|
|
AcidicAtoms.clear();
|
|
if (path.empty()) return false;
|
|
return ReadAugmentedAtoms(path, AcidicAtoms);
|
|
}
|
|
|
|
void StructCheckerOptions::setAcidicAugmentedAtoms(
|
|
const std::vector<AugmentedAtom> &atoms) {
|
|
AcidicAtoms = atoms;
|
|
}
|
|
|
|
bool StructCheckerOptions::loadGoodAugmentedAtoms(const std::string &path) {
|
|
GoodAtoms.clear();
|
|
if (path.empty()) return false;
|
|
return ReadAugmentedAtoms(path, GoodAtoms);
|
|
}
|
|
void StructCheckerOptions::setGoodAugmentedAtoms(
|
|
const std::vector<AugmentedAtom> &atoms) {
|
|
GoodAtoms = atoms;
|
|
}
|
|
|
|
static bool loadSDF(const std::string &path, std::vector<ROMOL_SPTR> &mols) {
|
|
mols.clear();
|
|
try {
|
|
SDMolSupplier suppler(path);
|
|
while (!suppler.atEnd()) {
|
|
ROMol *m = suppler.next();
|
|
if (m) {
|
|
// ?? MakeHydrogensImplicit()
|
|
mols.push_back(ROMOL_SPTR(m));
|
|
}
|
|
}
|
|
} catch (std::exception &ex) {
|
|
BOOST_LOG(rdErrorLog) << ex.what(); // log error
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool StructCheckerOptions::loadPatterns(const std::string &path) {
|
|
Patterns.clear();
|
|
if (path.empty()) return false;
|
|
return loadSDF(path, Patterns);
|
|
}
|
|
|
|
void StructCheckerOptions::setPatterns(const std::vector<ROMOL_SPTR> &p) {
|
|
Patterns = p;
|
|
}
|
|
|
|
bool StructCheckerOptions::loadRotatePatterns(const std::string &path) {
|
|
RotatePatterns.clear();
|
|
if (path.empty()) return false;
|
|
return loadSDF(path, RotatePatterns);
|
|
}
|
|
|
|
void StructCheckerOptions::setRotatePatterns(const std::vector<ROMOL_SPTR> &p) {
|
|
RotatePatterns = p;
|
|
}
|
|
|
|
bool StructCheckerOptions::loadStereoPatterns(const std::string &path) {
|
|
StereoPatterns.clear();
|
|
if (path.empty()) return false;
|
|
return loadSDF(path, StereoPatterns);
|
|
}
|
|
|
|
void StructCheckerOptions::setStereoPatterns(const std::vector<ROMOL_SPTR> &p) {
|
|
StereoPatterns = p;
|
|
}
|
|
|
|
bool StructCheckerOptions::loadTautomerData(const std::string &path) {
|
|
ToTautomer.clear();
|
|
FromTautomer.clear();
|
|
if (path.empty()) return false;
|
|
try {
|
|
std::string ext = path.substr(path.find_last_of(".") + 1);
|
|
if (ext == "sdf" || ext == "SDF") {
|
|
SDMolSupplier suppler(path);
|
|
while (!suppler.atEnd()) {
|
|
ROMol *m1 = suppler.next();
|
|
if (suppler.atEnd()) break;
|
|
ROMol *m2 = suppler.next();
|
|
if (m1 && m2) {
|
|
// ?? MakeHydrogensImplicit()
|
|
FromTautomer.push_back(ROMOL_SPTR(m1));
|
|
ToTautomer.push_back(ROMOL_SPTR(m2));
|
|
}
|
|
}
|
|
} else if (ext == "rdf" ||
|
|
ext == "RDF") { // load RD-File, see CTfile format specification
|
|
std::ifstream in(path.c_str());
|
|
if (!in) {
|
|
BOOST_LOG(rdErrorLog) << "could not open file '" << path << "'\n"
|
|
<< strerror(errno) << "\n";
|
|
return false;
|
|
}
|
|
std::string str;
|
|
unsigned line = 0;
|
|
do {
|
|
str = getLine(in);
|
|
line++;
|
|
if (str.length() >= 4 && 0 == strcmp(str.c_str(), "$RXN")) {
|
|
while (!in.eof()) {
|
|
str = getLine(in);
|
|
line++;
|
|
if (str.length() >= 4 && 0 == strcmp(str.c_str(), "$MOL")) {
|
|
RWMol *molFrom = MolDataStreamToMol(in, line);
|
|
if (!molFrom) {
|
|
BOOST_LOG(rdErrorLog) << "RD-file corrupted. " << path
|
|
<< " line " << line << "\n"; // log error
|
|
return false;
|
|
}
|
|
while (molFrom && !in.eof()) {
|
|
str = getLine(in);
|
|
line++;
|
|
if (str.length() >= 4 && 0 == strcmp(str.c_str(), "$MOL")) {
|
|
RWMol *molTo = MolDataStreamToMol(in, line);
|
|
if (!molTo) {
|
|
BOOST_LOG(rdErrorLog) << "RD-file corrupted. " << path
|
|
<< " line " << line
|
|
<< "\n"; // log error
|
|
return false;
|
|
}
|
|
// ?? MakeHydrogensImplicit()
|
|
FromTautomer.push_back(ROMOL_SPTR(molFrom));
|
|
ToTautomer.push_back(ROMOL_SPTR(molTo));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} while (!in.eof());
|
|
} else {
|
|
BOOST_LOG(rdErrorLog) << "unsupported file type. " << path
|
|
<< "\n"; // log error
|
|
return false;
|
|
}
|
|
} catch (std::exception &ex) {
|
|
BOOST_LOG(rdErrorLog) << ex.what() << "\n"; // log error
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void StructCheckerOptions::setTautomerData(const std::vector<ROMOL_SPTR> &from,
|
|
const std::vector<ROMOL_SPTR> &to) {
|
|
FromTautomer = from;
|
|
ToTautomer = to;
|
|
}
|
|
|
|
static void parseSMARTS(std::vector<ROMOL_SPTR> &mols,
|
|
const std::vector<std::string> &smarts) {
|
|
mols.clear();
|
|
for (const auto &patt : smarts) {
|
|
ROMol *m = SmartsToMol(patt);
|
|
if (m) mols.push_back(ROMOL_SPTR(m));
|
|
}
|
|
}
|
|
|
|
void StructCheckerOptions::parsePatterns(
|
|
const std::vector<std::string> &smarts) {
|
|
parseSMARTS(Patterns, smarts);
|
|
}
|
|
|
|
void StructCheckerOptions::parseRotatePatterns(
|
|
const std::vector<std::string> &smarts) {
|
|
parseSMARTS(RotatePatterns, smarts);
|
|
}
|
|
|
|
void StructCheckerOptions::parseStereoPatterns(
|
|
const std::vector<std::string> &smarts) {
|
|
parseSMARTS(StereoPatterns, smarts);
|
|
}
|
|
|
|
void StructCheckerOptions::parseTautomerData(
|
|
const std::vector<std::string> &smartsFrom,
|
|
const std::vector<std::string> &smartsTo) {
|
|
parseSMARTS(FromTautomer, smartsFrom);
|
|
parseSMARTS(ToTautomer, smartsTo);
|
|
}
|
|
//--------------------------------------------------------------------------
|
|
|
|
bool loadChargeDataTables(const std::string& path) {
|
|
// TODO: !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
RDUNUSED_PARAM(path);
|
|
/*
|
|
double Elneg0; // elneg_table[0].value;
|
|
std::map<unsigned, double> ElnegTable; // AtomicNumber -> eleng
|
|
std::vector<inc_entry_t> AtomAcidity; //atom_acidity_table[]
|
|
std::vector<inc_entry_t> charge_inc_table; // std::map AtomicNumber ->
|
|
...
|
|
double Alpha, Beta;
|
|
std::vector<path_entry_t> alpha_path_table, beta_path_table;
|
|
*/
|
|
return true;
|
|
}
|
|
|
|
StructCheckerOptions::StructCheckerOptions()
|
|
: AcidityLimit(0.0),
|
|
RemoveMinorFragments(false),
|
|
DesiredCharge(0),
|
|
CheckCollisions(false),
|
|
CollisionLimitPercent(15),
|
|
MaxMolSize(255),
|
|
ConvertSText(false),
|
|
SqueezeIdentifiers(false),
|
|
StripZeros(false),
|
|
CheckStereo(false),
|
|
ConvertAtomTexts(false),
|
|
GroupsToSGroups(false),
|
|
Verbose(false),
|
|
Elneg0(0.0) // elneg_table[0].value;
|
|
,
|
|
Alpha(0.0),
|
|
Beta(0.0) {
|
|
loadDefaultAugmentedAtoms(*this);
|
|
}
|
|
|
|
} // namespace StructureCheck
|
|
} // namespace RDKit
|