mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
Add a custom CXSMILES feature to indicate Zero Order Bonds (#8454)
* implement the ZOB CXSMILES feature * restore release notes
This commit is contained in:
committed by
GitHub
parent
3ec88d643a
commit
15e0f784b7
@@ -509,6 +509,42 @@ bool parse_coordinate_bonds(Iterator &first, Iterator last, RDKit::RWMol &mol,
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename Iterator>
|
||||
bool parse_zero_bonds(Iterator &first, Iterator last, RDKit::RWMol &mol,
|
||||
unsigned int, unsigned int startBondIdx) {
|
||||
// these look like: C1CCCCC~CCCC1 |Z:5|
|
||||
if (first >= last || *first != 'Z') {
|
||||
return false;
|
||||
}
|
||||
++first;
|
||||
if (first >= last || *first != ':') {
|
||||
return false;
|
||||
}
|
||||
++first;
|
||||
|
||||
while (first < last && *first >= '0' && *first <= '9') {
|
||||
unsigned int bondIdx;
|
||||
if (!read_int(first, last, bondIdx)) {
|
||||
return false;
|
||||
}
|
||||
if (VALID_BNDIDX(bondIdx)) {
|
||||
auto bond = get_bond_with_smiles_idx(mol, bondIdx - startBondIdx);
|
||||
|
||||
if (!bond) {
|
||||
BOOST_LOG(rdWarningLog)
|
||||
<< "bond " << bondIdx
|
||||
<< " not found, cannot mark as zero order bond." << std::endl;
|
||||
return false;
|
||||
}
|
||||
bond->setBondType(Bond::ZERO);
|
||||
}
|
||||
if (first < last && *first == ',') {
|
||||
++first;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename Iterator>
|
||||
bool parse_unsaturation(Iterator &first, Iterator last, RDKit::RWMol &mol,
|
||||
unsigned int startAtomIdx) {
|
||||
@@ -1407,6 +1443,10 @@ bool parse_it(Iterator &first, Iterator last, RDKit::RWMol &mol,
|
||||
startAtomIdx, startBondIdx)) {
|
||||
return false;
|
||||
}
|
||||
} else if (*first == 'Z') {
|
||||
if (!parse_zero_bonds(first, last, mol, startAtomIdx, startBondIdx)) {
|
||||
return false;
|
||||
}
|
||||
} else if (*first == '^') {
|
||||
if (!parse_radicals(first, last, mol, startAtomIdx)) {
|
||||
return false;
|
||||
@@ -2238,6 +2278,26 @@ std::string get_coordbonds_block(const ROMol &mol,
|
||||
return res;
|
||||
}
|
||||
|
||||
std::string get_zerobonds_block(const ROMol &mol,
|
||||
const std::vector<unsigned int> &,
|
||||
const std::vector<unsigned int> &bondOrder) {
|
||||
std::string res = "";
|
||||
for (unsigned int i = 0; i < bondOrder.size(); ++i) {
|
||||
auto idx = bondOrder[i];
|
||||
const auto bond = mol.getBondWithIdx(idx);
|
||||
if (bond->getBondType() != Bond::BondType::ZERO) {
|
||||
continue;
|
||||
}
|
||||
if (!res.empty()) {
|
||||
res += ",";
|
||||
} else {
|
||||
res = "Z:";
|
||||
}
|
||||
res += boost::str(boost::format("%d") % i);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
std::string get_ringbond_cistrans_block(
|
||||
const ROMol &mol, const std::vector<unsigned int> &atomOrder,
|
||||
const std::vector<unsigned int> &bondOrder) {
|
||||
@@ -2533,6 +2593,11 @@ std::string getCXExtensions(const ROMol &mol, std::uint32_t flags) {
|
||||
appendToCXExtension(block, res);
|
||||
}
|
||||
|
||||
if (flags & SmilesWrite::CXSmilesFields::CX_ZERO_BONDS) {
|
||||
const auto block = get_zerobonds_block(mol, atomOrder, bondOrder);
|
||||
appendToCXExtension(block, res);
|
||||
}
|
||||
|
||||
if (flags & SmilesWrite::CXSmilesFields::CX_LINKNODES) {
|
||||
const auto linknodeblock = get_linknodes_block(mol, atomOrder);
|
||||
appendToCXExtension(linknodeblock, res);
|
||||
|
||||
@@ -54,7 +54,7 @@ struct RDKIT_SMILESPARSE_EXPORT SmilesWriteParams {
|
||||
|
||||
namespace SmilesWrite {
|
||||
|
||||
BETTER_ENUM(CXSmilesFields, uint32_t,
|
||||
BETTER_ENUM(CXSmilesFields, uint32_t, // clang-format off
|
||||
CX_NONE = 0,
|
||||
CX_ATOM_LABELS = 1 << 0,
|
||||
CX_MOLFILE_VALUES = 1 << 1,
|
||||
@@ -68,6 +68,7 @@ BETTER_ENUM(CXSmilesFields, uint32_t,
|
||||
CX_BOND_CFG = 1 << 9,
|
||||
CX_BOND_ATROPISOMER = 1 << 10,
|
||||
CX_COORDINATE_BONDS = 1 << 11,
|
||||
CX_ZERO_BONDS = 1 << 12,
|
||||
CX_ALL = 0x7fffffff,
|
||||
CX_ALL_BUT_COORDS = CX_ALL ^ CX_COORDS
|
||||
);
|
||||
|
||||
@@ -3093,4 +3093,37 @@ TEST_CASE("atoms bound to metals should always have Hs specified") {
|
||||
CHECK(osmi == expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("ZOB cx smiles extension", "[smiles][cxsmiles]") {
|
||||
SECTION("basics") {
|
||||
auto m = "CC"_smiles;
|
||||
REQUIRE(m);
|
||||
|
||||
auto b = m->getBondWithIdx(0);
|
||||
b->setBondType(Bond::ZERO);
|
||||
|
||||
auto smi = MolToCXSmiles(*m);
|
||||
REQUIRE(smi == "C~C |Z:0|");
|
||||
|
||||
auto m2 = RDKit::v2::SmilesParse::MolFromSmiles(smi);
|
||||
REQUIRE(m2);
|
||||
|
||||
CHECK(m2->getBondWithIdx(0)->getBondType() == Bond::ZERO);
|
||||
}
|
||||
SECTION("Reverse") {
|
||||
constexpr const char *smi = "FB1(F)N2CCCC/C2=N/C2=[NH+]~1CCC=C2 |Z:12|";
|
||||
|
||||
auto p = v2::SmilesParse::SmilesParserParams();
|
||||
p.sanitize = false;
|
||||
auto m = v2::SmilesParse::MolFromSmiles(smi, p);
|
||||
REQUIRE(m);
|
||||
|
||||
auto b = m->getBondWithIdx(15);
|
||||
CHECK(b->getBondType() == Bond::BondType::ZERO);
|
||||
CHECK(b->getBeginAtom()->getAtomicNum() == 7);
|
||||
CHECK(b->getEndAtom()->getAtomicNum() == 5);
|
||||
|
||||
REQUIRE(MolToCXSmiles(*m) == smi);
|
||||
}
|
||||
}
|
||||
@@ -156,8 +156,8 @@ ROMol *MolFromMolBlock(python::object imolBlock, bool sanitize, bool removeHs,
|
||||
return static_cast<ROMol *>(newM);
|
||||
}
|
||||
|
||||
ROMol *MolFromMolFile(const std::string &molFilename, bool sanitize, bool removeHs,
|
||||
bool strictParsing) {
|
||||
ROMol *MolFromMolFile(const std::string &molFilename, bool sanitize,
|
||||
bool removeHs, bool strictParsing) {
|
||||
RWMol *newM = nullptr;
|
||||
try {
|
||||
newM = MolFileToMol(molFilename, sanitize, removeHs, strictParsing);
|
||||
@@ -171,7 +171,8 @@ ROMol *MolFromMolFile(const std::string &molFilename, bool sanitize, bool remove
|
||||
return static_cast<ROMol *>(newM);
|
||||
}
|
||||
|
||||
ROMol *MolFromMrvFile(const std::string &molFilename, bool sanitize, bool removeHs) {
|
||||
ROMol *MolFromMrvFile(const std::string &molFilename, bool sanitize,
|
||||
bool removeHs) {
|
||||
RWMol *newM = nullptr;
|
||||
try {
|
||||
newM = MrvFileToMol(molFilename, sanitize, removeHs);
|
||||
@@ -604,7 +605,8 @@ python::object addMetadataToPNGStringHelper(python::dict pymetadata,
|
||||
return retval;
|
||||
}
|
||||
|
||||
python::object MolsFromPNGFile(const std::string &filename, const std::string &tag,
|
||||
python::object MolsFromPNGFile(const std::string &filename,
|
||||
const std::string &tag,
|
||||
python::object pyParams) {
|
||||
SmilesParserParams params;
|
||||
if (pyParams) {
|
||||
@@ -1729,6 +1731,7 @@ BOOST_PYTHON_MODULE(rdmolfiles) {
|
||||
RDKit::SmilesWrite::CXSmilesFields::CX_BOND_ATROPISOMER)
|
||||
.value("CX_COORDINATE_BONDS",
|
||||
RDKit::SmilesWrite::CXSmilesFields::CX_COORDINATE_BONDS)
|
||||
.value("CX_ZERO_BONDS", RDKit::SmilesWrite::CXSmilesFields::CX_ZERO_BONDS)
|
||||
.value("CX_ALL", RDKit::SmilesWrite::CXSmilesFields::CX_ALL)
|
||||
.value("CX_ALL_BUT_COORDS",
|
||||
RDKit::SmilesWrite::CXSmilesFields::CX_ALL_BUT_COORDS);
|
||||
|
||||
@@ -251,6 +251,8 @@ The features which are parsed include:
|
||||
``Q_e``, ``QH_p``, ``AH_P``, ``X_p``, ``XH_p``, ``M_p``, ``MH_p``, ``*``)
|
||||
- atomic properties ``atomprop``
|
||||
- coordinate/dative bonds ``C`` (these are translated into dative bonds)
|
||||
- hydrogen bonds ``H``
|
||||
- zero order bonds bonds ``Z`` (custom extension, same syntax as c/t/ctu below)
|
||||
- radicals ``^``
|
||||
- enhanced stereo (these are converted into ``StereoGroups``)
|
||||
- linknodes ``LN``
|
||||
@@ -292,8 +294,9 @@ The features which are written by :py:func:`rdkit.Chem.rdmolfiles.MolToCXSmiles`
|
||||
>>> m.GetAtomWithIdx(1).SetProp('p2','A1')
|
||||
>>> m.GetAtomWithIdx(0).SetProp('atomLabel','O1')
|
||||
>>> m.GetAtomWithIdx(1).SetProp('atomLabel','C2')
|
||||
>>> m.GetBondWithIdx(0).SetBondType(Chem.BondType.ZERO)
|
||||
>>> Chem.MolToCXSmiles(m)
|
||||
'CO |$C2;O1$,atomProp:0.p1.5:0.p2.A1:1.p1.2|'
|
||||
'C~O |$C2;O1$,atomProp:0.p1.5:0.p2.A1:1.p1.2,Z:0|'
|
||||
|
||||
Reading molecule names
|
||||
----------------------
|
||||
|
||||
Reference in New Issue
Block a user