diff --git a/Code/GraphMol/ChemReactions/catch_tests.cpp b/Code/GraphMol/ChemReactions/catch_tests.cpp index 864ed6b13..f67e96bf9 100644 --- a/Code/GraphMol/ChemReactions/catch_tests.cpp +++ b/Code/GraphMol/ChemReactions/catch_tests.cpp @@ -1220,7 +1220,7 @@ TEST_CASE("CXSMILES for reactions", "[cxsmiles]") { REQUIRE(rxn); std::string expected_cxsmarts = - "[#6:6]-[#8:5]-[#6:3](-*)-[#8:2]-*>>[#6:6]-[#7:5]-[#6:3](-*)-[#8:2]-* |$;;;star_e;;star_e;;;;star_e;;star_e$,SgD:1,0:foo:bar::::,,SgD:7,6:foo:baz::::,,,Sg:n:4,2,1,0::ht:::,,Sg:n:10,8,7,6::ht:::,SgH:3:1.1|"; + "[#6:6]-[#8:5]-[#6:3](-*)-[#8:2]-*>>[#6:6]-[#7:5]-[#6:3](-*)-[#8:2]-* |$;;;star_e;;star_e;;;;star_e;;star_e$,SgD:1,0:foo:bar::::,SgD:7,6:foo:baz::::,Sg:n:4,2,1,0::ht:::,Sg:n:10,8,7,6::ht:::,SgH:3:1.1|"; SmilesWriteParams params; auto flags = RDKit::SmilesWrite::CX_ALL ^ RDKit::SmilesWrite::CX_ATOM_PROPS; std::string output_cxsmarts = diff --git a/Code/GraphMol/MarvinParse/test_data/StarAtom.sdf.expected.cxsmi b/Code/GraphMol/MarvinParse/test_data/StarAtom.sdf.expected.cxsmi index 704b1b100..8ccfdd375 100644 --- a/Code/GraphMol/MarvinParse/test_data/StarAtom.sdf.expected.cxsmi +++ b/Code/GraphMol/MarvinParse/test_data/StarAtom.sdf.expected.cxsmi @@ -1 +1 @@ -*C(=O)C(C)NC(=O)C(C)NC(=O)C(C)N[H] |(-9.10047,-0.90832,;-5.97455,-0.07112,;-5.97455,1.46869,;-4.64081,-0.84112,;-4.64081,-2.38112,;-3.30727,-0.07112,;-5.97455,-0.07112,;-5.97455,1.46869,;-4.64081,-0.84112,;-4.64081,-2.38112,;-3.30727,-0.07112,;0.097533,0.84112,;0.097533,2.38112,;1.43127,0.07112,;1.43127,-1.46869,;5.31655,1.11216,;9.10047,1.11216,),,,,Sg:n:11,13,12,14:1:ht:::| \ No newline at end of file +*C(=O)C(C)NC(=O)C(C)NC(=O)C(C)N[H] |(-9.10047,-0.90832,;-5.97455,-0.07112,;-5.97455,1.46869,;-4.64081,-0.84112,;-4.64081,-2.38112,;-3.30727,-0.07112,;-5.97455,-0.07112,;-5.97455,1.46869,;-4.64081,-0.84112,;-4.64081,-2.38112,;-3.30727,-0.07112,;0.097533,0.84112,;0.097533,2.38112,;1.43127,0.07112,;1.43127,-1.46869,;5.31655,1.11216,;9.10047,1.11216,),Sg:n:11,13,12,14:1:ht:::| \ No newline at end of file diff --git a/Code/GraphMol/MarvinParse/test_data/nonProprietary.mol.expected.cxsmi b/Code/GraphMol/MarvinParse/test_data/nonProprietary.mol.expected.cxsmi index 1e9820a38..876e545fa 100644 --- a/Code/GraphMol/MarvinParse/test_data/nonProprietary.mol.expected.cxsmi +++ b/Code/GraphMol/MarvinParse/test_data/nonProprietary.mol.expected.cxsmi @@ -1 +1 @@ -[H]NC(C)C(=O)NC(C)C(=O)NC(C)C(N)=O |(2.7453,0.5608,;0.7182,0.5608,;-1.3632,0.0031,;-1.3632,-0.8218,;-2.0777,0.4156,;-2.0777,1.2406,;-3.9017,-0.0731,;-4.6161,-0.4856,;-4.6161,-1.3106,;-5.3306,-0.0731,;-5.3306,0.7518,;-3.9017,-0.0731,;-4.6161,-0.4856,;-4.6161,-1.3106,;-5.3306,-0.0731,;-7.0052,-0.5216,;-5.3306,0.7518,),,,,Sg:n:4,2,5,3:1:ht:::| \ No newline at end of file +[H]NC(C)C(=O)NC(C)C(=O)NC(C)C(N)=O |(2.7453,0.5608,;0.7182,0.5608,;-1.3632,0.0031,;-1.3632,-0.8218,;-2.0777,0.4156,;-2.0777,1.2406,;-3.9017,-0.0731,;-4.6161,-0.4856,;-4.6161,-1.3106,;-5.3306,-0.0731,;-5.3306,0.7518,;-3.9017,-0.0731,;-4.6161,-0.4856,;-4.6161,-1.3106,;-5.3306,-0.0731,;-7.0052,-0.5216,;-5.3306,0.7518,),Sg:n:4,2,5,3:1:ht:::| \ No newline at end of file diff --git a/Code/GraphMol/SmilesParse/CXSmilesOps.cpp b/Code/GraphMol/SmilesParse/CXSmilesOps.cpp index 93b6ad9e0..9a03ddfd8 100644 --- a/Code/GraphMol/SmilesParse/CXSmilesOps.cpp +++ b/Code/GraphMol/SmilesParse/CXSmilesOps.cpp @@ -1768,7 +1768,7 @@ std::string get_sgroup_polymer_block( if (sg.getPropIfPresent("TYPE", typ) && reverseTypemap.find(typ) != reverseTypemap.end()) { sg.setProp("_cxsmilesOutputIndex", sgroupOutputIndex); - sgroupOutputIndex++; + ++sgroupOutputIndex; res << "Sg:"; std::string subtype; @@ -1823,8 +1823,8 @@ std::string get_sgroup_polymer_block( res.seekp(-1, res.cur); } res << ":"; + res << ","; // only add a comma if we wrote something } - res << ","; } std::string resStr = res.str(); @@ -1856,7 +1856,7 @@ std::string get_sgroup_data_block(const ROMol &mol, for (const auto &sg : sgs) { if (sg.hasProp("TYPE") && sg.getProp("TYPE") == "DAT") { sg.setProp("_cxsmilesOutputIndex", sgroupOutputIndex); - sgroupOutputIndex++; + ++sgroupOutputIndex; res << "SgD:"; // we don't attempt to canonicalize the atom order because the user @@ -1894,8 +1894,8 @@ std::string get_sgroup_data_block(const ROMol &mol, } res << ":"; // FIX: do something about the coordinates + res << ","; // only add a comma if we wrote something } - res << ","; } std::string resStr = res.str(); @@ -2113,7 +2113,7 @@ std::string get_bond_config_block( if (!Atropisomers::getAtropisomerAtomsAndBonds( bondNbr, atomAndBondVecs, mol)) { throw ValueErrorException("Internal error - should not occur"); - // should not happend + // should not happen } else { unsigned int swaps = 0; diff --git a/Code/GraphMol/SmilesParse/catch_tests.cpp b/Code/GraphMol/SmilesParse/catch_tests.cpp index e27487dad..460fe9020 100644 --- a/Code/GraphMol/SmilesParse/catch_tests.cpp +++ b/Code/GraphMol/SmilesParse/catch_tests.cpp @@ -1205,7 +1205,7 @@ TEST_CASE("polymer SGroups") { auto smi = MolToCXSmiles(*mol); CHECK(smi == - "*NCCO* |$star_e;;;;;star_e$,,,Sg:n:1,2::ht:::,Sg:any:3,4::hh:::|"); + "*NCCO* |$star_e;;;;;star_e$,Sg:n:1,2::ht:::,Sg:any:3,4::hh:::|"); } { // multiple s groups + data @@ -1245,7 +1245,7 @@ TEST_CASE("polymer SGroups") { auto smi = MolToCXSmiles(*mol); CHECK(smi == "*OCCNCC " - "|$star_e;;;;;;$,SgD:5:atomdata:val::::,,,,Sg:n:4,3::ht:::,Sg:any:" + "|$star_e;;;;;;$,SgD:5:atomdata:val::::,Sg:n:4,3::ht:::,Sg:any:" "2,1::hh:::|"); } } @@ -1270,7 +1270,7 @@ TEST_CASE("SGroup hierarchy") { CHECK(!sgs[1].hasProp("PARENT")); CHECK(MolToCXSmiles(*mol) == "*CNC(C*)O* " - "|$star_e;;;;;star_e;;star_e$,,,Sg:any:2,1::ht:::,Sg:any:4,3,2,1,0,6:" + "|$star_e;;;;;star_e;;star_e$,Sg:any:2,1::ht:::,Sg:any:4,3,2,1,0,6:" ":ht:::,SgH:1:0|"); } SECTION("nested") { @@ -1291,7 +1291,7 @@ TEST_CASE("SGroup hierarchy") { MolToCXSmiles(*mol) == "*CNC(CC(*)C*)O* |$star_e;;;;;;star_e;;star_e;;star_e$,SgD:4:internal " "data:val::::,SgD:7:atom " - "value:value2::::,,,,,,Sg:n:7::ht:::,Sg:n:2::ht:::,Sg:any:5,7,8,4,3,2," + "value:value2::::,Sg:n:7::ht:::,Sg:n:2::ht:::,Sg:any:5,7,8,4,3,2," "1,0,9::ht:::,SgH:2:1,4:0.2.3|"); } } @@ -1358,9 +1358,8 @@ TEST_CASE("Github #4320: Support toggling components of CXSMILES output") { "*CNC(CC(*)C*)O* " "|$star_e;;;;;;star_e;;star_e;;star_e$,SgD:4:internal " "data:val::::,SgD:7:atom " - "value:value2::::,,,,,,Sg:n:7::ht:::,Sg:n:2::ht:::,Sg:any:5,7,8,4," - "3,2," - "1,0,9::ht:::,SgH:2:1,4:0.2.3|"); + "value:value2::::,Sg:n:7::ht:::,Sg:n:2::ht:::,Sg:any:5,7,8,4," + "3,2,1,0,9::ht:::,SgH:2:1,4:0.2.3|"); CHECK(std::unique_ptr(SmilesToMol(cxsmi))); } { @@ -1374,11 +1373,10 @@ TEST_CASE("Github #4320: Support toggling components of CXSMILES output") { MolToCXSmiles(*mol, ps, SmilesWrite::CXSmilesFields::CX_ALL ^ SmilesWrite::CXSmilesFields::CX_ATOM_LABELS); - CHECK( - cxsmi == - "*CNC(CC(*)C*)O* |SgD:4:internal data:val::::,SgD:7:atom " - "value:value2::::,,,,,,Sg:n:7::ht:::,Sg:n:2::ht:::,Sg:any:5,7,8,4,3," - "2,1,0,9::ht:::,SgH:2:1,4:0.2.3|"); + CHECK(cxsmi == + "*CNC(CC(*)C*)O* |SgD:4:internal data:val::::,SgD:7:atom " + "value:value2::::,Sg:n:7::ht:::,Sg:n:2::ht:::,Sg:any:5,7,8,4,3," + "2,1,0,9::ht:::,SgH:2:1,4:0.2.3|"); CHECK(std::unique_ptr(SmilesToMol(cxsmi))); } { @@ -1387,7 +1385,7 @@ TEST_CASE("Github #4320: Support toggling components of CXSMILES output") { SmilesWrite::CXSmilesFields::CX_SGROUPS); CHECK(cxsmi == "*CNC(CC(*)C*)O* " - "|$star_e;;;;;;star_e;;star_e;;star_e$,,,Sg:n:7::ht:::,Sg:n:2::ht::" + "|$star_e;;;;;;star_e;;star_e;;star_e$,Sg:n:7::ht:::,Sg:n:2::ht::" ":,Sg:any:5,7,8,4,3,2,1,0,9::ht:::,SgH:2:0.1|"); CHECK(std::unique_ptr(SmilesToMol(cxsmi))); } @@ -1398,7 +1396,7 @@ TEST_CASE("Github #4320: Support toggling components of CXSMILES output") { CHECK(cxsmi == "*CNC(CC(*)C*)O* " "|$star_e;;;;;;star_e;;star_e;;star_e$,SgD:4:internal " - "data:val::::,SgD:7:atom value:value2::::,,,|"); + "data:val::::,SgD:7:atom value:value2::::|"); CHECK(std::unique_ptr(SmilesToMol(cxsmi))); } } @@ -3342,4 +3340,39 @@ TEST_CASE("chiral class must be nonzero") { common_properties::_chiralPermutation) == 1); } } -} \ No newline at end of file +} + +TEST_CASE("Non-encodeable SGroups", "[CX][CXSmiles]") { + constexpr const char *molblock = R"CTAB( + RDKit 2D + + 0 0 0 0 0 0 0 0 0 0999 V3000 +M V30 BEGIN CTAB +M V30 COUNTS 2 1 2 0 0 +M V30 BEGIN ATOM +M V30 1 C -1.242424 -0.515152 0.000000 0 +M V30 2 C 0.257576 -0.515152 0.000000 0 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 1 1 2 +M V30 END BOND +M V30 BEGIN SGROUP +M V30 1 SUP 1 ATOMS=(1 1) +M V30 2 SUP 2 ATOMS=(1 2) +M V30 END SGROUP +M V30 END CTAB +M END +$$$$)CTAB"; + + auto m = v2::FileParsers::MolFromMolBlock(molblock); + REQUIRE(m); + + // Write to smiles to populate atom and bond output order properties + MolToSmiles(*m); + + CHECK(SmilesWrite::getCXExtensions(*m) == + "|(-1.24242,-0.515152,;0.257576,-0.515152,)|"); + + CHECK(SmilesWrite::getCXExtensions( + *m, RDKit::SmilesWrite::CXSmilesFields::CX_ALL_BUT_COORDS) == ""); +} diff --git a/Code/GraphMol/catch_chirality.cpp b/Code/GraphMol/catch_chirality.cpp index 6f298a0e7..726e6c49c 100644 --- a/Code/GraphMol/catch_chirality.cpp +++ b/Code/GraphMol/catch_chirality.cpp @@ -3604,20 +3604,17 @@ $$$$ TEST_CASE("ValidateStereo", "[accurateCIP]") { SECTION("validateStereoUniqOldCanon1") { testStereoValidationFromMol(validateStereoUniq1, - "*/C=C/C(=O)C1=C(*)N(*)C(=O)NC1* |,,,|", true, - true); + "*/C=C/C(=O)C1=C(*)N(*)C(=O)NC1*", true, true); } SECTION("validateStereoUniqNewCanon1") { testStereoValidationFromMol(validateStereoUniq1, - "*/C=C/C(=O)C1=C(*)N(*)C(=O)NC1* |,,,|", false, - true); + "*/C=C/C(=O)C1=C(*)N(*)C(=O)NC1*", false, true); } SECTION("validateStereoUniqNewNoCanon1") { - testStereoValidationFromMol(validateStereoUniq1, - "N1C(=O)N(*)C(*)=C(C(/C=C/*)=O)C1* |,,,|", - false, false); + testStereoValidationFromMol( + validateStereoUniq1, "N1C(=O)N(*)C(*)=C(C(/C=C/*)=O)C1*", false, false); } SECTION("SprioChiralLostOldNoCanon") {