diff --git a/Code/GraphMol/FileParsers/cdxml_parser_catch.cpp b/Code/GraphMol/FileParsers/cdxml_parser_catch.cpp index d9d7dabd0..fcccaf291 100644 --- a/Code/GraphMol/FileParsers/cdxml_parser_catch.cpp +++ b/Code/GraphMol/FileParsers/cdxml_parser_catch.cpp @@ -1201,6 +1201,7 @@ TEST_CASE("Github #7501 - dative bonds") { CDXMLParserParams params; auto mols = MolsFromCDXMLFile(fname, params); CHECK(MolToSmiles(*mols[0]) == - "CC(C)->[Os]12<-CCCN->1CC=N->2"); // All datives to the Oxygen + "C[CH2](C)->[Os]12<-[CH3]CC[NH]->1CC=[NH]->2"); // All datives to the + // Osmium } } diff --git a/Code/GraphMol/FileParsers/test1.cpp b/Code/GraphMol/FileParsers/test1.cpp index f8f1f6bda..b355a67fd 100644 --- a/Code/GraphMol/FileParsers/test1.cpp +++ b/Code/GraphMol/FileParsers/test1.cpp @@ -4897,7 +4897,7 @@ void testMolFileDativeBonds() { TEST_ASSERT(m->getBondWithIdx(4)->getBondType() == Bond::DATIVE); std::string smiles = MolToSmiles(*m); - TEST_ASSERT(smiles == "CCC(=O)O->[Cu]"); + TEST_ASSERT(smiles == "CCC(=O)[OH]->[Cu]"); delete m; } @@ -4911,7 +4911,7 @@ void testMolFileDativeBonds() { TEST_ASSERT(m->getBondWithIdx(9)->getBondType() == Bond::DATIVE); std::string smiles = MolToSmiles(*m); - TEST_ASSERT(smiles == "CCC(=O)O->[Cu]<-OC(O)CC"); + TEST_ASSERT(smiles == "CCC(=O)[OH]->[Cu]<-[OH]C(O)CC"); delete m; } @@ -4924,7 +4924,7 @@ void testMolFileDativeBonds() { TEST_ASSERT(m->getBondWithIdx(4)->getBondType() == Bond::DATIVE); std::string smiles = MolToSmiles(*m); - TEST_ASSERT(smiles == "CC(C)->[Mg](Cl)Cl"); + TEST_ASSERT(smiles == "C[CH2](C)->[Mg]([Cl])[Cl]"); delete m; } diff --git a/Code/GraphMol/MolOps.cpp b/Code/GraphMol/MolOps.cpp index 79bdd7629..60d162531 100644 --- a/Code/GraphMol/MolOps.cpp +++ b/Code/GraphMol/MolOps.cpp @@ -185,13 +185,8 @@ void halogenCleanup(RWMol &mol, Atom *atom) { } } -bool isMetal(const Atom *atom) { - static const std::unique_ptr q(makeMAtomQuery()); - return q->Match(atom); -} - bool isHypervalentNonMetal(Atom *atom) { - if (isMetal(atom)) { + if (QueryOps::isMetal(*atom)) { return false; } atom->updatePropertyCache(false); @@ -261,7 +256,7 @@ void metalBondCleanup(RWMol &mol, Atom *atom, // see if there are any metals bonded to it by a single bond for (auto bond : mol.atomBonds(atom)) { if (bond->getBondType() == Bond::BondType::SINGLE && - isMetal(bond->getOtherAtom(atom))) { + QueryOps::isMetal(*bond->getOtherAtom(atom))) { metals.push_back(bond->getOtherAtom(atom)); } } @@ -317,7 +312,7 @@ void cleanUpOrganometallics(RWMol &mol) { // see if there are any metals bonded to it by a single bond for (auto bond : mol.atomBonds(atom)) { if (bond->getBondType() == Bond::BondType::SINGLE && - isMetal(bond->getOtherAtom(atom))) { + QueryOps::isMetal(*bond->getOtherAtom(atom))) { needsFixing = true; break; } diff --git a/Code/GraphMol/MolStandardize/Wrap/testMolStandardize.py b/Code/GraphMol/MolStandardize/Wrap/testMolStandardize.py index 597d88421..f3e4b6b4d 100644 --- a/Code/GraphMol/MolStandardize/Wrap/testMolStandardize.py +++ b/Code/GraphMol/MolStandardize/Wrap/testMolStandardize.py @@ -82,9 +82,9 @@ class TestCase(unittest.TestCase): Chem.MolFromSmarts( "[Li,K,Rb,Cs,Fr,Be,Mg,Ca,Sr,Ba,Ra,Sc,Ti,V,Cr,Mn,Fe,Co,Ni,Cu,Zn,Al,Ga,Y,Zr,Nb,Mo,Tc,Ru,Rh,Pd,Ag,Cd,In,Sn,Hf,Ta,W,Re,Os,Ir,Pt,Au,Hg,Tl,Pb,Bi]~[N,O,F]" )) - mol2 = Chem.MolFromSmiles("CCC(=O)O[Na]") + mol2 = Chem.MolFromSmiles("CCC(=O)[O][Na]") nm2 = md.Disconnect(mol2) - self.assertEqual(Chem.MolToSmiles(nm2), "CCC(=O)O[Na]") + self.assertEqual(Chem.MolToSmiles(nm2), "CCC(=O)[O][Na]") # Split with organometallics disconnector, two ways. rufile = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolStandardize', 'test_data', @@ -230,14 +230,14 @@ class TestCase(unittest.TestCase): lfrag_params = rdMolStandardize.LargestFragmentChooser(lfParams) mol4 = Chem.MolFromSmiles(smi4) lfrag4 = lfrag_params.choose(mol4) - self.assertEqual(Chem.MolToSmiles(lfrag4), "O=[Pb]=O") + self.assertEqual(Chem.MolToSmiles(lfrag4), "[O]=[Pb]=[O]") lfParams = rdMolStandardize.CleanupParameters() lfParams.largestFragmentChooserUseAtomCount = False lfrag_params = rdMolStandardize.LargestFragmentChooser(lfParams) mol4 = Chem.MolFromSmiles(smi4) lfrag4 = lfrag_params.choose(mol4) - self.assertEqual(Chem.MolToSmiles(lfrag4), "O=[Pb]=O") + self.assertEqual(Chem.MolToSmiles(lfrag4), "[O]=[Pb]=[O]") lfParams = rdMolStandardize.CleanupParameters() lfParams.largestFragmentChooserCountHeavyAtomsOnly = True diff --git a/Code/GraphMol/MolStandardize/test1.cpp b/Code/GraphMol/MolStandardize/test1.cpp index 6ae03a04d..3e6597e38 100644 --- a/Code/GraphMol/MolStandardize/test1.cpp +++ b/Code/GraphMol/MolStandardize/test1.cpp @@ -60,7 +60,7 @@ void testCleanup() { { RWMOL_SPTR m = "C[Hg]C"_smiles; RWMOL_SPTR res(MolStandardize::cleanup(*m, params)); - TEST_ASSERT(MolToSmiles(*res) == "C[Hg]C") + TEST_ASSERT(MolToSmiles(*res) == "[CH3][Hg][CH3]") } BOOST_LOG(rdDebugLog) << "Finished" << std::endl; } @@ -143,7 +143,7 @@ void testMetalDisconnector() { RWMOL_SPTR m("c1ccccc1[Mg]Br"_smiles); TEST_ASSERT(m); md.disconnect(*m); - TEST_ASSERT(MolToSmiles(*m) == "Br[Mg]c1ccccc1"); + TEST_ASSERT(MolToSmiles(*m) == "[Br][Mg][c]1ccccc1"); } { @@ -157,7 +157,7 @@ void testMetalDisconnector() { RWMOL_SPTR m("Br[Mg]c1ccccc1CCC(=O)O[Na]"_smiles); TEST_ASSERT(m); md.disconnect(*m); - TEST_ASSERT(MolToSmiles(*m) == "O=C([O-])CCc1ccccc1[Mg]Br.[Na+]"); + TEST_ASSERT(MolToSmiles(*m) == "O=C([O-])CCc1cccc[c]1[Mg][Br].[Na+]"); } // test input own dp_metal_non, dp_metal_nof @@ -172,7 +172,7 @@ void testMetalDisconnector() { ROMOL_SPTR m("CCC(=O)O[Na]"_smiles); TEST_ASSERT(m); ROMOL_SPTR nm(md2.disconnect(*m)); - TEST_ASSERT(MolToSmiles(*nm) == "CCC(=O)O[Na]"); // not disconnected + TEST_ASSERT(MolToSmiles(*nm) == "CCC(=O)[O][Na]"); // not disconnected } // test that metals are not assigned excess positive charge diff --git a/Code/GraphMol/MolStandardize/testFragment.cpp b/Code/GraphMol/MolStandardize/testFragment.cpp index 9e2854e6d..e40a2ce73 100644 --- a/Code/GraphMol/MolStandardize/testFragment.cpp +++ b/Code/GraphMol/MolStandardize/testFragment.cpp @@ -185,7 +185,7 @@ void test_largest_fragment() { smi9 = "CC[Hg]SC1=C(C=CC=C1)C(=O)[O][Na]"; std::shared_ptr m9(SmilesToMol(smi9)); std::shared_ptr res9(MolStandardize::fragmentParent(*m9, params)); - TEST_ASSERT(MolToSmiles(*res9) == "CC[Hg]Sc1ccccc1C(=O)[O-]"); + TEST_ASSERT(MolToSmiles(*res9) == "C[CH2][Hg][S]c1ccccc1C(=O)[O-]"); // Covalent bond with metal. smi10 = "[Ag]OC(=O)O[Ag]"; @@ -247,9 +247,9 @@ void test_largest_fragment() { LargestFragmentChooser lfrag_params(lfParams); std::shared_ptr m12(SmilesToMol(smi12)); std::shared_ptr lfrag7(lfrag_params.choose(*m12)); - TEST_ASSERT(MolToSmiles(*lfrag7) == "O=[Pb]=O"); + TEST_ASSERT(MolToSmiles(*lfrag7) == "[O]=[Pb]=[O]"); lfrag_params.chooseInPlace(*m12); - TEST_ASSERT(MolToSmiles(*m12) == "O=[Pb]=O"); + TEST_ASSERT(MolToSmiles(*m12) == "[O]=[Pb]=[O]"); } { CleanupParameters lfParams; @@ -257,9 +257,9 @@ void test_largest_fragment() { LargestFragmentChooser lfrag_params(lfParams); std::shared_ptr m12(SmilesToMol(smi12)); std::shared_ptr lfrag7(lfrag_params.choose(*m12)); - TEST_ASSERT(MolToSmiles(*lfrag7) == "O=[Pb]=O"); + TEST_ASSERT(MolToSmiles(*lfrag7) == "[O]=[Pb]=[O]"); lfrag_params.chooseInPlace(*m12); - TEST_ASSERT(MolToSmiles(*m12) == "O=[Pb]=O"); + TEST_ASSERT(MolToSmiles(*m12) == "[O]=[Pb]=[O]"); } { CleanupParameters lfParams; diff --git a/Code/GraphMol/QueryOps.cpp b/Code/GraphMol/QueryOps.cpp index 364b33be7..e1527d2c5 100644 --- a/Code/GraphMol/QueryOps.cpp +++ b/Code/GraphMol/QueryOps.cpp @@ -1171,5 +1171,10 @@ void finalizeQueryFromDescription( } } +bool isMetal(const Atom &atom) { + static const std::unique_ptr q(makeMAtomQuery()); + return q->Match(&atom); +} + } // namespace QueryOps }; // namespace RDKit diff --git a/Code/GraphMol/QueryOps.h b/Code/GraphMol/QueryOps.h index e1785c71e..e3559c234 100644 --- a/Code/GraphMol/QueryOps.h +++ b/Code/GraphMol/QueryOps.h @@ -1158,6 +1158,7 @@ inline bool hasComplexBondTypeQuery(const Bond &bond) { return hasComplexBondTypeQuery(*bond.getQuery()); } +RDKIT_GRAPHMOL_EXPORT bool isMetal(const Atom &atom); } // namespace QueryOps } // namespace RDKit #endif diff --git a/Code/GraphMol/SmilesParse/SmilesWrite.cpp b/Code/GraphMol/SmilesParse/SmilesWrite.cpp index 67f179009..32ca8cde2 100644 --- a/Code/GraphMol/SmilesParse/SmilesWrite.cpp +++ b/Code/GraphMol/SmilesParse/SmilesWrite.cpp @@ -1,5 +1,5 @@ // -// Copyright (C) 2002-2021 Greg Landrum and other RDKit contributors +// Copyright (C) 2002-2025 Greg Landrum and other RDKit contributors // // @@ All Rights Reserved @@ // This file is part of the RDKit. @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -88,6 +89,65 @@ std::string getAtomChiralityInfo(const Atom *atom) { } return atString; } + +// ----- +// figure out if we need to put a bracket around the atom, +// the conditions for this are: +// - not in the organic subset +// - formal charge specified +// - chirality present and writing isomeric smiles +// - non-default isotope and writing isomeric smiles +// - atom-map information present +// - the atom has a nonstandard valence +// - bonded to a metal +bool atomNeedsBracket(const Atom *atom, const std::string &atString, + const SmilesWriteParams ¶ms) { + PRECONDITION(atom, "null atom"); + auto num = atom->getAtomicNum(); + if (!inOrganicSubset(num)) { + return true; + } + + if (atom->getFormalCharge()) { + return true; + } + if (params.doIsomericSmiles && (atom->getIsotope() || !atString.empty())) { + return true; + } + if (atom->hasProp(common_properties::molAtomMapNumber)) { + return true; + } + + const INT_VECT &defaultVs = PeriodicTable::getTable()->getValenceList(num); + int totalValence = atom->getTotalValence(); + bool nonStandard = false; + if (atom->getNumRadicalElectrons()) { + nonStandard = true; + } else if ((num == 7 || num == 15) && atom->getIsAromatic() && + atom->getNumExplicitHs()) { + // another type of "nonstandard" valence is an aromatic N or P with + // explicit Hs indicated: + nonStandard = true; + } else { + nonStandard = (totalValence != defaultVs.front() && atom->getTotalNumHs()); + } + if (nonStandard) { + return true; + } + + // check for bonds to a metal + if (atom->hasOwningMol()) { + for (const auto bond : atom->getOwningMol().atomBonds(atom)) { + auto oatom = bond->getOtherAtom(atom); + if (QueryOps::isMetal(*oatom)) { + return true; + } + } + } + + return false; +} + } // namespace std::string GetAtomSmiles(const Atom *atom, const SmilesWriteParams ¶ms) { @@ -97,7 +157,6 @@ std::string GetAtomSmiles(const Atom *atom, const SmilesWriteParams ¶ms) { int num = atom->getAtomicNum(); int isotope = atom->getIsotope(); - bool needsBracket = false; std::string symb; bool hasCustomSymbol = atom->getPropIfPresent(common_properties::smilesSymbol, symb); @@ -113,41 +172,9 @@ std::string GetAtomSmiles(const Atom *atom, const SmilesWriteParams ¶ms) { atString = getAtomChiralityInfo(atom); } } - if (!params.allHsExplicit && inOrganicSubset(num)) { - // it's a member of the organic subset - - // ----- - // figure out if we need to put a bracket around the atom, - // the conditions for this are: - // - formal charge specified - // - the atom has a nonstandard valence - // - chirality present and writing isomeric smiles - // - non-default isotope and writing isomeric smiles - // - atom-map information present - const INT_VECT &defaultVs = PeriodicTable::getTable()->getValenceList(num); - int totalValence = atom->getTotalValence(); - bool nonStandard = false; - - if (hasCustomSymbol || atom->getNumRadicalElectrons()) { - nonStandard = true; - } else if ((num == 7 || num == 15) && atom->getIsAromatic() && - atom->getNumExplicitHs()) { - // another type of "nonstandard" valence is an aromatic N or P with - // explicit Hs indicated: - nonStandard = true; - } else { - nonStandard = - (totalValence != defaultVs.front() && atom->getTotalNumHs()); - } - - if (fc || nonStandard || - atom->hasProp(common_properties::molAtomMapNumber)) { - needsBracket = true; - } else if (params.doIsomericSmiles && (isotope || atString != "")) { - needsBracket = true; - } - } else { - needsBracket = true; + bool needsBracket = true; + if (!hasCustomSymbol && !params.allHsExplicit) { + needsBracket = atomNeedsBracket(atom, atString, params); } if (needsBracket) { res += "["; diff --git a/Code/GraphMol/SmilesParse/catch_tests.cpp b/Code/GraphMol/SmilesParse/catch_tests.cpp index 9cb6ca82f..d9b441e26 100644 --- a/Code/GraphMol/SmilesParse/catch_tests.cpp +++ b/Code/GraphMol/SmilesParse/catch_tests.cpp @@ -213,7 +213,7 @@ TEST_CASE("github #2257: writing cxsmiles", "[smiles][cxsmiles]") { CHECK(mol->getAtomWithIdx(3)->getNumRadicalElectrons() == 1); auto smi = MolToCXSmiles(*mol); - CHECK(smi == "[O]N([O])[Fe] |^1:0,2|"); + CHECK(smi == "[O][N]([O])[Fe] |^1:0,2|"); } SECTION("radicals2") { auto mol = "[CH]C[CH2] |^1:2,^2:0|"_smiles; @@ -794,13 +794,13 @@ TEST_CASE("github #3774: MolToSmarts inverts direction of dative bond", auto m = "N->[Cu+]"_smiles; REQUIRE(m); CHECK(MolToSmarts(*m) == "[#7]->[Cu+]"); - CHECK(MolToSmiles(*m) == "N->[Cu+]"); + CHECK(MolToSmiles(*m) == "[NH3]->[Cu+]"); } { auto m = "N<-[Cu+]"_smiles; REQUIRE(m); CHECK(MolToSmarts(*m) == "[#7]<-[Cu+]"); - CHECK(MolToSmiles(*m) == "N<-[Cu+]"); + CHECK(MolToSmiles(*m) == "[NH2]<-[Cu+]"); } } SECTION("from smarts") { @@ -2495,7 +2495,8 @@ TEST_CASE("Dative bond in cxsmiles double double def", "[bug][cxsmiles]") { std::string smilesOut = MolToSmiles(*smilesMol, ps); - CHECK(smilesOut == "CC(/C=C/C1CCCC1)=O->[Fe]1<-N2=C(CC3=N->1CCC3)CCC2"); + CHECK(smilesOut == + "CC(/C=C/C1CCCC1)=[O]->[Fe]1<-[N]2=C(CC3=[N]->1CCC3)CCC2"); } } } @@ -2912,11 +2913,11 @@ TEST_CASE("Github #7372: SMILES output option to disable dative bonds") { auto m = "[NH3]->[Fe]-[NH2]"_smiles; REQUIRE(m); auto smi = MolToSmiles(*m); - CHECK(smi == "N[Fe]<-N"); + CHECK(smi == "[NH2][Fe]<-[NH3]"); SmilesWriteParams ps; ps.includeDativeBonds = false; auto newSmi = MolToSmiles(*m, ps); - CHECK(newSmi == "N[Fe][NH3]"); + CHECK(newSmi == "[NH2][Fe][NH3]"); // ensure that representation round trips: auto m2 = v2::SmilesParse::MolFromSmiles(newSmi); REQUIRE(m2); @@ -3061,3 +3062,19 @@ TEST_CASE("trimethylcyclohexane") { CHECK(smiOut == "C[C@H]1C[C@H](C)C[C@H](C)C1 |o1:1,o2:3,o3:6|"); } } + +TEST_CASE("atoms bound to metals should always have Hs specified") { + SECTION("basics") { + std::vector> smileses = { + {"Cl[Pt](F)([NH2])[OH]", "[NH2][Pt]([OH])([F])[Cl]"}, + {"Cl[Pt](F)(<-[NH3])[OH]", "[NH3]->[Pt]([OH])([F])[Cl]"}, + }; + for (const auto &[smi, expected] : smileses) { + auto m = v2::SmilesParse::MolFromSmiles(smi); + REQUIRE(m); + auto osmi = MolToSmiles(*m); + INFO(smi); + CHECK(osmi == expected); + } + } +} \ No newline at end of file diff --git a/Code/GraphMol/SmilesParse/cxsmiles_test.cpp b/Code/GraphMol/SmilesParse/cxsmiles_test.cpp index 6e5955653..e802890dd 100644 --- a/Code/GraphMol/SmilesParse/cxsmiles_test.cpp +++ b/Code/GraphMol/SmilesParse/cxsmiles_test.cpp @@ -1496,14 +1496,14 @@ TEST_CASE("Github #7372: SMILES output option to disable dative bonds") { auto m = "[NH3]->[Fe]-[NH2]"_smiles; REQUIRE(m); auto smi = MolToCXSmiles(*m); - CHECK(smi == "N[Fe][NH3] |C:2.1|"); + CHECK(smi == "[NH2][Fe][NH3] |C:2.1|"); // disable the dative bond output SmilesWriteParams ps; smi = MolToCXSmiles(*m, ps, SmilesWrite::CXSmilesFields::CX_ALL_BUT_COORDS ^ SmilesWrite::CXSmilesFields::CX_COORDINATE_BONDS); - CHECK(smi == "N[Fe][NH3]"); + CHECK(smi == "[NH2][Fe][NH3]"); } SECTION("basics, SMARTS output") { auto m = "[NH3]->[Fe]-[NH2]"_smiles; diff --git a/Code/GraphMol/SmilesParse/nontetrahedral_tests.cpp b/Code/GraphMol/SmilesParse/nontetrahedral_tests.cpp index 149880114..546291421 100644 --- a/Code/GraphMol/SmilesParse/nontetrahedral_tests.cpp +++ b/Code/GraphMol/SmilesParse/nontetrahedral_tests.cpp @@ -96,9 +96,9 @@ TEST_CASE("non-canonical non-tetrahedral output") { SECTION("no reordering") { // clang-format off std::vector data = { - "C[Pt@SP1](F)(O)Cl", "C[Pt@SP2](F)(O)Cl", - "C[Pt@TB1](F)(O)(N)Cl", "C[Pt@TB2](F)(O)(N)Cl", - "C[Pt@OH1](F)(O)(N)(Br)Cl", "C[Pt@OH2](F)(O)(N)(Br)Cl", + "[CH3][Pt@SP1]([F])([OH])[Cl]", "[CH3][Pt@SP2]([F])([OH])[Cl]", + "[CH3][Pt@TB1]([F])([OH])([NH2])[Cl]", "[CH3][Pt@TB2]([F])([OH])([NH2])[Cl]", + "[CH3][Pt@OH1]([F])([OH])([NH2])([Br])[Cl]", "[CH3][Pt@OH2]([F])([OH])([NH2])([Br])[Cl]", }; // clang-format on for (const auto &smi : data) { @@ -114,10 +114,10 @@ TEST_CASE("non-canonical non-tetrahedral output") { SECTION("reordering") { // clang-format off std::vector> data = { - {"F[Pt@SP1](C)(O)Cl","C[Pt@SP3](O)(F)Cl",}, - {"F[Pt@SP2](C)(O)Cl","C[Pt@SP1](O)(F)Cl"}, + {"F[Pt@SP1](C)(O)Cl","[CH3][Pt@SP3]([OH])([F])[Cl]",}, + {"F[Pt@SP2](C)(O)Cl","[CH3][Pt@SP1]([OH])([F])[Cl]"}, {"S[As@TB1](F)(Cl)(Br)N","N[As@TB6](F)(S)(Cl)Br"}, - {"C[Pt@OH1](F)(O)(N)(Br)Cl","C[Pt@OH16](N)(O)(F)(Cl)Br"}, + {"C[Pt@OH1](F)(O)(N)(Br)Cl","[CH3][Pt@OH16]([NH2])([OH])([F])([Cl])[Br]"}, }; // clang-format on for (const auto &pr : data) { @@ -257,8 +257,9 @@ TEST_CASE("hasNonTetrahedralStereo") { TEST_CASE("zero permutation is in SMILES") { Chirality::setAllowNontetrahedralChirality(true); - std::vector smis = {"CC[Pt@SP](C)(O)F", "C[Pt@TB](N)(F)(Cl)Br", - "C[Pt@OH](N)(F)(Cl)(Br)I"}; + std::vector smis = {"C[CH2][Pt@SP]([CH3])([OH])[F]", + "[CH3][Pt@TB]([NH2])([F])([Cl])[Br]", + "[CH3][Pt@OH]([NH2])([F])([Cl])([Br])[I]"}; for (auto smi : smis) { std::unique_ptr m{SmilesToMol(smi)}; REQUIRE(m); @@ -292,6 +293,6 @@ TEST_CASE("do not read from/write to SMILES when disabled") { m->getAtomWithIdx(2)->setChiralTag(Atom::ChiralType::CHI_SQUAREPLANAR); m->getAtomWithIdx(2)->setProp(common_properties::_chiralPermutation, 1); auto smi = MolToSmiles(*m); - CHECK(smi == "CC[Pt](C)(O)F"); + CHECK(smi == "C[CH2][Pt]([CH3])([OH])[F]"); } } \ No newline at end of file diff --git a/Code/GraphMol/SmilesParse/test.cpp b/Code/GraphMol/SmilesParse/test.cpp index eb1bf78a2..d669208c1 100644 --- a/Code/GraphMol/SmilesParse/test.cpp +++ b/Code/GraphMol/SmilesParse/test.cpp @@ -1671,9 +1671,9 @@ void testIsotopes() { RWMol *mol = SmilesToMol(smi); TEST_ASSERT(mol); smi = MolToSmiles(*mol, false); - TEST_ASSERT(smi == "CC[U]"); + TEST_ASSERT(smi == "C[CH2][U]"); smi = MolToSmiles(*mol, true); - TEST_ASSERT(smi == "CC[U]"); + TEST_ASSERT(smi == "C[CH2][U]"); delete mol; } { @@ -1681,9 +1681,9 @@ void testIsotopes() { RWMol *mol = SmilesToMol(smi); TEST_ASSERT(mol); smi = MolToSmiles(*mol, false); - TEST_ASSERT(smi == "CC[U]"); + TEST_ASSERT(smi == "C[CH2][U]"); smi = MolToSmiles(*mol, true); - TEST_ASSERT(smi == "CC[238U]"); + TEST_ASSERT(smi == "C[CH2][238U]"); delete mol; } { @@ -3842,7 +3842,7 @@ void testDativeBonds() { std::string out_smiles = MolToSmiles(*m, true); delete m; - TEST_ASSERT(out_smiles == smiles); + TEST_ASSERT(out_smiles == "CCC(=O)[OH]->[Cu]"); } { std::string smiles = "CCC(=O)O->[Cu]<-OC(O)CC"; @@ -3859,7 +3859,7 @@ void testDativeBonds() { std::string out_smiles = MolToSmiles(*m, true); delete m; - TEST_ASSERT(out_smiles == smiles); + TEST_ASSERT(out_smiles == "CCC(=O)[OH]->[Cu]<-[OH]C(O)CC"); } BOOST_LOG(rdInfoLog) << "done" << std::endl; } @@ -4333,7 +4333,6 @@ int main(int argc, char *argv[]) { (void)argc; (void)argv; RDLog::InitLogs(); -// boost::logging::enable_logs("rdApp.debug"); testPass(); testFail(); diff --git a/Code/GraphMol/Wrap/rough_test.py b/Code/GraphMol/Wrap/rough_test.py index 62b93e1e3..fef4fa92f 100644 --- a/Code/GraphMol/Wrap/rough_test.py +++ b/Code/GraphMol/Wrap/rough_test.py @@ -8055,7 +8055,7 @@ M END femol = Chem.MolFromMolFile(fefile) newfemol = Chem.rdmolops.HapticBondsToDative(femol) self.assertEqual(Chem.MolToSmiles(newfemol), - 'c12->[Fe+2]3456789(<-c1c->3[cH-]->4c->52)<-c1c->6c->7[cH-]->8c->91') + '[cH]12->[Fe+2]3456789(<-[cH]1[cH]->3[cH-]->4[cH]->52)<-[cH]1[cH]->6[cH]->7[cH-]->8[cH]->91') def test_DativeBondsToHaptic(self): fefile = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolStandardize', 'test_data', diff --git a/Code/GraphMol/catch_canon.cpp b/Code/GraphMol/catch_canon.cpp index adb93516a..452900c80 100644 --- a/Code/GraphMol/catch_canon.cpp +++ b/Code/GraphMol/catch_canon.cpp @@ -1033,7 +1033,7 @@ M END auto smiles = MolToSmiles(*m); CHECK( smiles == - R"SMI(CC[C@@]1(C)/C2=C(C)/C3=N4->[CoH2+]56N2[C@H]([C@@H]1C)[C@]1(C)N->5=C(/C(C)=C2N->6=C(/C=C4/C(C)(C)[C@@H]3C)[C@@H](C)C\2(C)C)[C@@H](C)C1(C)C)SMI"); + R"SMI(CC[C@@]1(C)/C2=C(C)/C3=[N]4->[CoH2+]56[N]2[C@H]([C@@H]1C)[C@]1(C)[N]->5=C(/C(C)=C2[N]->6=C(/C=C4/C(C)(C)[C@@H]3C)[C@@H](C)C\2(C)C)[C@@H](C)C1(C)C)SMI"); } TEST_CASE("chiral presence and ranking") { diff --git a/Code/GraphMol/catch_graphmol.cpp b/Code/GraphMol/catch_graphmol.cpp index 21eee1756..a5883a227 100644 --- a/Code/GraphMol/catch_graphmol.cpp +++ b/Code/GraphMol/catch_graphmol.cpp @@ -2962,11 +2962,11 @@ TEST_CASE("molecules with single bond to metal atom use dative instead") { // Counting from 1, 4th mol is one that gave other problems during testing. std::vector> test_vals{ {"CC1=C(CCC(O)=O)C2=[N]3C1=Cc1c(C)c(C=C)c4C=C5C(C)=C(C=C)C6=[N]5[Fe]3(n14)n1c(=C6)c(C)c(CCC(O)=O)c1=C2", - "C=CC1=C(C)C2=Cc3c(C=C)c(C)c4n3[Fe]35<-N2=C1C=c1c(C)c(CCC(=O)O)c(n13)=CC1=N->5C(=C4)C(C)=C1CCC(=O)O"}, + "C=CC1=C(C)C2=Cc3c(C=C)c(C)c4[n]3[Fe]35<-[N]2=C1C=c1c(C)c(CCC(=O)O)c([n]13)=CC1=[N]->5C(=C4)C(C)=C1CCC(=O)O"}, {"CC1=C(CCC([O-])=O)C2=[N+]3C1=Cc1c(C)c(C=C)c4C=C5C(C)=C(C=C)C6=[N+]5[Fe--]3(n14)n1c(=C6)c(C)c(CCC([O-])=O)c1=C2", - "C=CC1=C(C)C2=Cc3c(C=C)c(C)c4n3[Fe-2]35n6c(c(C)c(CCC(=O)[O-])c6=CC6=[N+]3C(=C4)C(C)=C6CCC(=O)[O-])=CC1=[N+]25"}, + "C=CC1=C(C)C2=Cc3c(C=C)c(C)c4[n]3[Fe-2]35[n]6c(c(C)c(CCC(=O)[O-])c6=CC6=[N+]3C(=C4)C(C)=C6CCC(=O)[O-])=CC1=[N+]25"}, {"CC1=C(CCC(O)=O)C2=[N+]3C1=Cc1c(C)c(C=C)c4C=C5C(C)=C(C=C)C6=[N+]5[Fe--]3(n14)n1c(=C6)c(C)c(CCC(O)=O)c1=C2", - "C=CC1=C(C)C2=Cc3c(C=C)c(C)c4n3[Fe-2]35n6c(c(C)c(CCC(=O)O)c6=CC6=[N+]3C(=C4)C(C)=C6CCC(=O)O)=CC1=[N+]25"}, + "C=CC1=C(C)C2=Cc3c(C=C)c(C)c4[n]3[Fe-2]35[n]6c(c(C)c(CCC(=O)O)c6=CC6=[N+]3C(=C4)C(C)=C6CCC(=O)O)=CC1=[N+]25"}, {"CCC1=[O+][Cu]2([O+]=C(CC)C1)[O+]=C(CC)CC(CC)=[O+]2", "CCC1=[O+][Cu]2([O+]=C(CC)C1)[O+]=C(CC)CC(CC)=[O+]2"}}; for (size_t i = 0; i < test_vals.size(); ++i) { @@ -2987,8 +2987,10 @@ TEST_CASE("molecules with single bond to metal atom use dative instead") { TEST_CASE( "cleanUpOrganometallics should produce canonical output. cf PR6292") { std::vector> test_vals{ - {"F[Pd](Cl)(Cl1)Cl[Pd]1(Cl)Cl", "F[Pd]1(Cl)<-Cl[Pd](Cl)(Cl)<-Cl1"}, - {"F[Pt]1(F)[35Cl][Pt]([Cl]1)(F)Br", "F[Pt]1(Br)<-Cl[Pt](F)(F)<-[35Cl]1"}, + {"F[Pd](Cl)(Cl1)Cl[Pd]1(Cl)Cl", + "[F][Pd]1([Cl])<-[Cl][Pd]([Cl])([Cl])<-[Cl]1"}, + {"F[Pt]1(F)[35Cl][Pt]([Cl]1)(F)Br", + "[F][Pt]1([Br])<-[Cl][Pt]([F])([F])<-[35Cl]1"}, }; for (size_t j = 0; j < test_vals.size(); ++j) { diff --git a/Code/JavaWrappers/gmwrapper/src-test/org/RDKit/Chemv2Tests.java b/Code/JavaWrappers/gmwrapper/src-test/org/RDKit/Chemv2Tests.java index d254bc188..e9e9c9130 100644 --- a/Code/JavaWrappers/gmwrapper/src-test/org/RDKit/Chemv2Tests.java +++ b/Code/JavaWrappers/gmwrapper/src-test/org/RDKit/Chemv2Tests.java @@ -696,12 +696,12 @@ public class Chemv2Tests extends GraphMolTest { RDKFuncs.setAllowNontetrahedralChirality(true); m = RWMol.MolFromMolBlock(ctab); assertTrue(m != null); - assertEquals(m.MolToSmiles(), "F[Pt@SP3](F)(Cl)Cl"); + assertEquals(m.MolToSmiles(), "[F][Pt@SP3]([F])([Cl])[Cl]"); m.delete(); RDKFuncs.setAllowNontetrahedralChirality(false); m = RWMol.MolFromMolBlock(ctab); assertTrue(m != null); - assertEquals(m.MolToSmiles(), "F[Pt](F)(Cl)Cl"); + assertEquals(m.MolToSmiles(), "[F][Pt]([F])([Cl])[Cl]"); m.delete(); RDKFuncs.setAllowNontetrahedralChirality(allowNonTetrahedralChirality); } finally { diff --git a/Code/MinimalLib/cffi_test.c b/Code/MinimalLib/cffi_test.c index 992aad0a0..44933354d 100644 --- a/Code/MinimalLib/cffi_test.c +++ b/Code/MinimalLib/cffi_test.c @@ -1278,7 +1278,7 @@ void test_standardize() { size_t mpkl_size; mpkl = get_mol("[Pt]CCN(=O)=O", &mpkl_size, "{\"sanitize\":false}"); char *smi = get_smiles(mpkl, mpkl_size, ""); - assert(!strcmp(smi, "O=N(=O)CC[Pt]")); + assert(!strcmp(smi, "O=N(=O)C[CH2][Pt]")); free(smi); assert(cleanup(&mpkl, &mpkl_size, "") > 0); smi = get_smiles(mpkl, mpkl_size, ""); @@ -2026,7 +2026,7 @@ M END\n"; assert(mpkl); assert(mpkl_size > 0); char *smiles = get_smiles(mpkl, mpkl_size, NULL); - assert(!strcmp(smiles, "F[Pt@SP3](F)(Cl)Cl")); + assert(!strcmp(smiles, "[F][Pt@SP3]([F])([Cl])[Cl]")); free(smiles); free(mpkl); allow_non_tetrahedral_chirality(0); @@ -2034,7 +2034,7 @@ M END\n"; assert(mpkl); assert(mpkl_size > 0); smiles = get_smiles(mpkl, mpkl_size, NULL); - assert(!strcmp(smiles, "F[Pt](F)(Cl)Cl")); + assert(!strcmp(smiles, "[F][Pt]([F])([Cl])[Cl]")); free(smiles); free(mpkl); allow_non_tetrahedral_chirality(orig_setting); diff --git a/Code/MinimalLib/tests/tests.js b/Code/MinimalLib/tests/tests.js index 1f778fe85..664c3875b 100644 --- a/Code/MinimalLib/tests/tests.js +++ b/Code/MinimalLib/tests/tests.js @@ -1284,11 +1284,11 @@ M END origSetting = RDKitModule.allow_non_tetrahedral_chirality(true); var mol = RDKitModule.get_mol(ctab); assert(mol !== null); - assert.equal(mol.get_smiles(), "F[Pt@SP3](F)(Cl)Cl"); + assert.equal(mol.get_smiles(), "[F][Pt@SP3]([F])([Cl])[Cl]"); RDKitModule.allow_non_tetrahedral_chirality(false); var mol = RDKitModule.get_mol(ctab); assert(mol !== null); - assert.equal(mol.get_smiles(), "F[Pt](F)(Cl)Cl"); + assert.equal(mol.get_smiles(), "[F][Pt]([F])([Cl])[Cl]"); } finally { RDKitModule.allow_non_tetrahedral_chirality(origSetting); } diff --git a/Docs/Book/Cookbook.rst b/Docs/Book/Cookbook.rst index 20be2efea..8bf1f7944 100644 --- a/Docs/Book/Cookbook.rst +++ b/Docs/Book/Cookbook.rst @@ -2316,7 +2316,7 @@ Organometallics with Dative Bonds .. testoutput:: - CN(C)(C)->[Pt] + C[N](C)(C)->[Pt] Enumerate SMILES diff --git a/Docs/Book/RDKit_Book.rst b/Docs/Book/RDKit_Book.rst index 91c6efd5e..00b4744a9 100644 --- a/Docs/Book/RDKit_Book.rst +++ b/Docs/Book/RDKit_Book.rst @@ -199,7 +199,7 @@ Here's an example of a bipy-copper complex: >>> bipycu.GetBondBetweenAtoms(4,12).GetBondType() rdkit.Chem.rdchem.BondType.DATIVE >>> Chem.MolToSmiles(bipycu) - 'Cl[Cu]1(Cl)<-n2ccccc2-c2ccccn->12' + '[Cl][Cu]1([Cl])<-[n]2ccccc2-c2cccc[n]->12' Dative bonds have the special characteristic that they don't affect the valence on the start atom, but do affect the end atom. So in this case, the N atoms involved in the dative bond have the valence of 3 that we expect from bipy, diff --git a/External/YAeHMOP/CMakeLists.txt b/External/YAeHMOP/CMakeLists.txt index c8daef9a5..846d4adb9 100644 --- a/External/YAeHMOP/CMakeLists.txt +++ b/External/YAeHMOP/CMakeLists.txt @@ -9,8 +9,8 @@ if(NOT DEFINED YAEHMOP_DIR) endif() if(NOT EXISTS "${YAEHMOP_DIR}/tightbind/bind.h") - set(RELEASE_NO "2024.03.1") - set(MD5 "ebbddca4f79ab71544cb1fef9a7eca8a") + set(RELEASE_NO "2025.03.1") + set(MD5 "93b7c8d9419b4899b44699cba3e23404") downloadAndCheckMD5("https://github.com/greglandrum/yaehmop/archive/refs/tags/v${RELEASE_NO}.tar.gz" "${CMAKE_CURRENT_SOURCE_DIR}/yaehmop-${RELEASE_NO}.tar.gz" ${MD5}) execute_process(COMMAND ${CMAKE_COMMAND} -E tar zxf diff --git a/ReleaseNotes.md b/ReleaseNotes.md index d1d8b0e34..3f67716db 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -49,6 +49,11 @@ YOUNG-JAME, thomp-j, esiaero, bbu-imdea, bzoracler - The colors of annotations on atoms and bonds are now controlled by the drawing options `atomNoteColour` and `bondNoteColour` instead of the general `annotationColour`. +- When writing SMILES, organic subset atoms which are bonded to "metals" will + always be written in square brackets, i.e. with their H count explicit. Here + the definition of "metal" is any atom matching an "M" query (the corresponding + SMARTS is `[!#0!#1!#2!#5!#6!#7!#8!#9!#10!#14!#15!#16!#17!#18!#33!#34!#35!#36!#52!#53!#54!#85!#86]`) + ## New Features and Enhancements: - NumAmideBonds descriptor missing