Files
rdkit/Code/GraphMol/FileParsers/MolSGroupParsing.h
tadhurst-cdd ca41fa5bfd Add SCSR parsing to RDKit (#8147)
* Parsing SCSR

* add scsrol to mol

* removed bad include file

* loosen distGeom test slightly

* add wrap test for SCSRMol

* Add test for scsr in python

* tests added for scsr and strict parsing removed

* remove extra stuff

* More fully specified use of SCSRMol for PR CI build

* Added flags for SCSR expansion to not include any leaving groups

* Added MolFromScsrParams to Wrap for python

* added SCSRMol destructor

* Added two tests for RNA macromols, and fixed a bug they revealed

* Added new tests abd expected files

* changes as per PR review

* SCSR Chnages for leaving groups

* fixed testScsr.py

* hydrogen bond treatment

* in SCSR expand, allow Hbond to be autoatically detected

* changes as per code review

* Adding new test file

* chages for SCSR contructors, destructors for CI build

* fixed pyton for SCSR hydrogen bond modes, and added tests

* Added new test files

* fixed edge case for SCSR

* fix checksum for inchi

* consistent capitalization of SCSR throughout

* switch to enum class

* make things shorter

* simplify

* get rid of the ATTCHORD class

* New section for SCSR in RDKit_book

* addeed section to RDKit_Book

* SCSRMol is no longer exposed in Python

* fix leak in MolFromSCSRFile()
light refactoring

* expose MolFromSCSRFile() to python
make the MolFromSCSR functions work with default args
a bit more testing

* removed C++ access to SCSRMol

* CXMsiles now ouputs hbonds, fix to template matching, and a few other things

* Addl fix for bad aromaticity in Hbond rings

* Test files needed

* Test files needed

* try to fix a CI build errors

* CI error fix

* Added missing test file

* CMake version - for CI build

* remove full file compoarison from macromol test file

* accidental change to debug restored to release

* Code review changes

* As per PR review

---------

Co-authored-by: Greg Landrum <greg.landrum@gmail.com>
2025-05-14 13:37:59 +02:00

163 lines
7.0 KiB
C++

//
// Copyright (C) 2002-2018 Greg Landrum and T5 Informatics GmbH
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include <RDGeneral/export.h>
#pragma once
#include <GraphMol/SubstanceGroup.h>
#include <RDGeneral/FileParseException.h>
#include <sstream>
namespace RDKit {
namespace SGroupParsing {
typedef std::map<int, SubstanceGroup> IDX_TO_SGROUP_MAP;
typedef std::map<int, STR_VECT> IDX_TO_STR_VECT_MAP;
/* ------------------ V2000 Utils ------------------ */
unsigned int ParseSGroupIntField(const std::string &text, unsigned int line,
unsigned int &pos,
bool isFieldCounter = false);
unsigned int ParseSGroupIntField(bool &ok, bool strictParsing,
const std::string &text, unsigned int line,
unsigned int &pos,
bool isFieldCounter = false);
double ParseSGroupDoubleField(const std::string &text, unsigned int line,
unsigned int &pos);
double ParseSGroupDoubleField(bool &ok, bool strictParsing,
const std::string &text, unsigned int line,
unsigned int &pos);
SubstanceGroup *FindSgIdx(IDX_TO_SGROUP_MAP &sGroupMap, int sgIdx,
unsigned int line);
template <class Exc = FileParseException>
void SGroupWarnOrThrow(bool strictParsing, const std::string &msg) {
if (strictParsing) {
throw Exc(msg);
} else {
BOOST_LOG(rdWarningLog) << msg << std::endl;
}
}
void ParseSGroupV2000STYLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
const std::string &text, unsigned int line,
bool strictParsing = true);
void ParseSGroupV2000VectorDataLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
const std::string &text, unsigned int line,
bool strictParsing = true);
void ParseSGroupV2000SDILine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
const std::string &text, unsigned int line,
bool strictParsing = true);
void ParseSGroupV2000SSTLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
const std::string &text, unsigned int &line,
bool strictParsing = true);
void ParseSGroupV2000SMTLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
const std::string &text, unsigned int &line,
bool strictParsing = true);
void ParseSGroupV2000SLBLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
const std::string &text, unsigned int line,
bool strictParsing = true);
void ParseSGroupV2000SCNLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
const std::string &text, unsigned int line,
bool strictParsing = true);
void ParseSGroupV2000SDSLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
const std::string &text, unsigned int line,
bool strictParsing = true);
void ParseSGroupV2000SBVLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
const std::string &text, unsigned int line,
bool strictParsing = true);
void ParseSGroupV2000SDTLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
const std::string &text, unsigned int line,
bool strictParsing = true);
void ParseSGroupV2000SDDLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
const std::string &text, unsigned int line,
bool strictParsing = true);
void ParseSGroupV2000SCDSEDLine(IDX_TO_SGROUP_MAP &sGroupMap,
IDX_TO_STR_VECT_MAP &dataFieldsMap, RWMol *mol,
const std::string &text, unsigned int line,
bool strictParsing, unsigned int &counter,
unsigned int &lastDataSGroup,
std::ostringstream &currentDataField);
void ParseSGroupV2000SPLLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
const std::string &text, unsigned int line,
bool strictParsing = true);
void ParseSGroupV2000SNCLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
const std::string &text, unsigned int line,
bool strictParsing = true);
//! if the SAP line is malformed and has no lvIdx and no id,
//! lvIdx is set to mol->getNumAtoms() and id is set to " "
//! the user is responsible for replacing lvIdx with the correct
//! index: if d_bonds.size() == 1, and one of the bond atom indices
//! is aIdx, the other can be safely assigned to lvIdx
void ParseSGroupV2000SAPLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
const std::string &text, unsigned int line,
bool strictParsing = true);
void ParseSGroupV2000SCLLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
const std::string &text, unsigned int line,
bool strictParsing = true);
void ParseSGroupV2000SBTLine(IDX_TO_SGROUP_MAP &sGroupMap, RWMol *mol,
const std::string &text, unsigned int line,
bool strictParsing = true);
/* ------------------ V3000 Utils ------------------ */
template <class T>
RDKIT_FILEPARSERS_EXPORT std::vector<T> ParseV3000Array(
std::stringstream &stream, int maxV = -1, bool strictParsing = false);
#if defined(_MSC_VER) && defined(RDKIT_DYN_LINK)
template RDKIT_FILEPARSERS_EXPORT std::vector<int> ParseV3000Array(
std::stringstream &, int, bool);
template RDKIT_FILEPARSERS_EXPORT std::vector<unsigned int> ParseV3000Array(
std::stringstream &, int, bool);
#endif
template <class T>
std::vector<T> ParseV3000Array(const std::string &s, int maxV = -1,
bool strictParsing = false) {
std::stringstream stream(s);
return ParseV3000Array<T>(stream, maxV, strictParsing);
}
void ParseV3000CStateLabel(RWMol *mol, SubstanceGroup &sgroup,
std::stringstream &stream, unsigned int line,
bool strictParsing = true);
void ParseV3000SAPLabel(RWMol *mol, SubstanceGroup &sgroup,
std::stringstream &stream, bool strictParsing = true);
std::string ParseV3000StringPropLabel(std::stringstream &stream);
// returns the last line read in the SGroups block
std::string ParseV3000SGroupsBlock(std::istream *inStream, unsigned int &line,
unsigned int nSgroups, RWMol *mol,
bool strictParsing);
} // namespace SGroupParsing
} // namespace RDKit