Add getSGroupDataLabels() to MolDraw2D_detail namespace (#9189)

Adds a new function MolDraw2D_detail::getSGroupDataLabels() that returns
the text and molecule-coordinate positions of DAT SGroup labels, using
the same placement logic as the drawing code. This allows external
renderers to display SGroup labels consistently with RDKit's placement.

Refactors DrawMol::extractSGroupData() to call getSGroupDataLabels()
internally, eliminating the duplicate FIELDDISP parsing and position
computation logic.

Closes #7829

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Chris Von Bargen
2026-04-15 22:56:00 -04:00
committed by GitHub
parent d4e8aa9fed
commit d8f4afb558
4 changed files with 229 additions and 88 deletions

View File

@@ -602,95 +602,21 @@ void DrawMol::extractSGroupData() {
if (!includeAnnotations_) {
return;
}
const auto &sgs = getSubstanceGroups(*drawMol_);
if (sgs.empty()) {
return;
}
// details of this transformation are in extractAtomCoords
double rot = drawOptions_.rotate * M_PI / 180.0;
RDGeom::Transform2D tform;
tform.SetTransform(Point2D(0.0, 0.0), rot);
for (const auto &sg : sgs) {
std::string typ;
if (sg.getPropIfPresent("TYPE", typ) && typ == "DAT") {
std::string text;
// it seems like we should be rendering FIELDNAME, but
// Marvin Sketch, Biovia Draw, and ChemDraw don't do it
// if (sg.getPropIfPresent("FIELDNAME", text)) {
// text += "=";
// };
if (sg.hasProp("DATAFIELDS")) {
STR_VECT dfs = sg.getProp<STR_VECT>("DATAFIELDS");
for (const auto &df : dfs) {
text += df + "|";
}
text.pop_back();
}
if (text.empty()) {
continue;
}
int atomIdx = -1;
if (!sg.getAtoms().empty()) {
atomIdx = sg.getAtoms()[0];
};
bool located = false;
std::string fieldDisp;
Point2D origLoc(0.0, 0.0);
if (sg.getPropIfPresent("FIELDDISP", fieldDisp)) {
double xp = FileParserUtils::stripSpacesAndCast<double>(
fieldDisp.substr(0, 10));
double yp = FileParserUtils::stripSpacesAndCast<double>(
fieldDisp.substr(10, 10));
// we always invert y for the molecule coords
origLoc = Point2D{xp, -yp};
if (fieldDisp[25] == 'R') {
if (atomIdx < 0) {
// we will warn about this below
text = "";
} else if (fabs(xp) > 1e-3 || fabs(yp) > 1e-3) {
// opposite sign for y
origLoc.x += drawMol_->getConformer().getAtomPos(atomIdx).x;
origLoc.y -= drawMol_->getConformer().getAtomPos(atomIdx).y;
located = true;
}
} else {
if (drawMol_->hasProp("_centroidx")) {
Point2D centroid;
drawMol_->getProp("_centroidx", centroid.x);
drawMol_->getProp("_centroidy", centroid.y);
// opposite sign for y
origLoc.x += centroid.x;
origLoc.y -= centroid.y;
}
located = true;
}
tform.TransformPoint(origLoc);
}
if (!text.empty()) {
// looks like everybody renders these left justified
DrawAnnotation *annot = new DrawAnnotation(
text, TextAlignType::START, "note",
drawOptions_.annotationFontScale, Point2D(0.0, 0.0),
drawOptions_.annotationColour, textDrawer_);
if (!located) {
if (atomIdx >= 0 && !text.empty()) {
calcAnnotationPosition(drawMol_->getAtomWithIdx(atomIdx), *annot);
}
} else {
annot->pos_ = origLoc;
}
annotations_.emplace_back(annot);
} else {
BOOST_LOG(rdWarningLog)
<< "FIELDDISP info not found for DAT SGroup which isn't "
"associated with an atom. SGroup will not be rendered."
<< std::endl;
}
// it seems like we should be rendering FIELDNAME, but
// Marvin Sketch, Biovia Draw, and ChemDraw don't do it
for (const auto &lbl :
MolDraw2D_detail::getSGroupDataLabels(*drawMol_, drawOptions_.rotate)) {
// looks like everybody renders these left justified
DrawAnnotation *annot =
new DrawAnnotation(lbl.text, TextAlignType::START, "note",
drawOptions_.annotationFontScale, Point2D(0.0, 0.0),
drawOptions_.annotationColour, textDrawer_);
if (lbl.positioned) {
annot->pos_ = lbl.pos;
} else {
calcAnnotationPosition(drawMol_->getAtomWithIdx(lbl.atomIdx), *annot);
}
annotations_.emplace_back(annot);
}
}

View File

@@ -11,6 +11,10 @@
#include <GraphMol/MolDraw2D/MolDraw2DDetails.h>
#include <GraphMol/MolDraw2D/StringRect.h>
#include <GraphMol/Chirality.h>
#include <GraphMol/FileParsers/FileParserUtils.h>
#include <GraphMol/SubstanceGroup.h>
#include <Geometry/Transform2D.h>
#include <RDGeneral/types.h>
#include <cmath>
#ifndef M_PI
@@ -426,5 +430,99 @@ void adjustLineEndForEllipse(const Point2D &centre, double xradius,
}
}
// ****************************************************************************
std::vector<SGroupDataLabel> getSGroupDataLabels(const ROMol &mol,
double rotate) {
std::vector<SGroupDataLabel> result;
if (!mol.getNumConformers()) {
return result;
}
const auto &sgs = getSubstanceGroups(mol);
if (sgs.empty()) {
return result;
}
double rot = rotate * M_PI / 180.0;
RDGeom::Transform2D tform;
tform.SetTransform(Point2D(0.0, 0.0), rot);
const auto &conf = mol.getConformer();
for (const auto &sg : sgs) {
std::string typ;
if (!sg.getPropIfPresent("TYPE", typ) || typ != "DAT") {
continue;
}
std::string text;
if (sg.hasProp("DATAFIELDS")) {
STR_VECT dfs = sg.getProp<STR_VECT>("DATAFIELDS");
for (const auto &df : dfs) {
text += df + "|";
}
text.pop_back();
}
if (text.empty()) {
continue;
}
int atomIdx = -1;
if (!sg.getAtoms().empty()) {
atomIdx = sg.getAtoms()[0];
}
bool located = false;
std::string fieldDisp;
Point2D pos(0.0, 0.0);
if (sg.getPropIfPresent("FIELDDISP", fieldDisp)) {
double xp =
FileParserUtils::stripSpacesAndCast<double>(fieldDisp.substr(0, 10));
double yp =
FileParserUtils::stripSpacesAndCast<double>(fieldDisp.substr(10, 10));
// we always invert y for the molecule coords
pos = Point2D{xp, -yp};
if (fieldDisp[25] == 'R') {
if (atomIdx < 0) {
// no atom to anchor relative position to, skip
continue;
} else if (fabs(xp) > 1e-3 || fabs(yp) > 1e-3) {
// opposite sign for y
pos.x += conf.getAtomPos(atomIdx).x;
pos.y -= conf.getAtomPos(atomIdx).y;
located = true;
}
} else {
// Absolute position - check for centroid offset set by drawing pipeline
if (mol.hasProp("_centroidx")) {
Point2D centroid;
mol.getProp("_centroidx", centroid.x);
mol.getProp("_centroidy", centroid.y);
// opposite sign for y
pos.x += centroid.x;
pos.y -= centroid.y;
}
located = true;
}
tform.TransformPoint(pos);
}
if (!located) {
if (atomIdx >= 0) {
const auto &p = conf.getAtomPos(atomIdx);
pos = Point2D{p.x, p.y};
} else {
BOOST_LOG(rdWarningLog)
<< "FIELDDISP info not found for DAT SGroup which isn't "
"associated with an atom. SGroup will not be included."
<< std::endl;
continue;
}
}
result.push_back({text, pos, located, atomIdx});
}
return result;
}
} // namespace MolDraw2D_detail
} // namespace RDKit

View File

@@ -162,6 +162,25 @@ RDKIT_MOLDRAW2D_EXPORT void adjustLineEndForEllipse(const Point2D &centre,
double yradius, Point2D p1,
Point2D &p2);
//! Holds the text and position of a DAT SGroup label.
struct SGroupDataLabel {
std::string text; ///< label text
Point2D pos; ///< position in molecule coordinates
bool positioned; ///< true if pos came from FIELDDISP; false if pos is
///< the associated atom's conformer position (fallback)
int atomIdx; ///< index of the associated atom (-1 if none)
};
//! Returns the text and positions of DAT SGroup labels for a molecule,
//! using the same placement logic as the drawing code.
/*!
\param mol the molecule
\param rotate optional rotation angle in degrees (default 0.0)
\return a vector of SGroupDataLabel objects, one per rendered DAT SGroup
*/
RDKIT_MOLDRAW2D_EXPORT std::vector<SGroupDataLabel> getSGroupDataLabels(
const ROMol &mol, double rotate = 0.0);
} // namespace MolDraw2D_detail
} // namespace RDKit

View File

@@ -2368,6 +2368,104 @@ M END)CTAB"_ctab;
}
}
TEST_CASE("getSGroupDataLabels", "[extras]") {
SECTION("ABS position") {
// FIELDDISP with absolute ('A') position
auto m = R"CTAB(
Mrv2014 12072015352D
0 0 0 0 0 999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 9 9 1 0 0
M V30 BEGIN ATOM
M V30 1 C -6.5833 4.3317 0 0
M V30 2 C -7.917 3.5617 0 0
M V30 3 C -7.917 2.0216 0 0
M V30 4 C -6.5833 1.2516 0 0
M V30 5 C -5.2497 2.0216 0 0
M V30 6 C -5.2497 3.5617 0 0
M V30 7 C -3.916 4.3317 0 0
M V30 8 O -3.916 5.8717 0 0
M V30 9 O -2.5823 3.5617 0 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 2 2 3
M V30 3 1 3 4
M V30 4 2 4 5
M V30 5 1 5 6
M V30 6 2 1 6
M V30 7 1 6 7
M V30 8 2 7 8
M V30 9 1 7 9
M V30 END BOND
M V30 BEGIN SGROUP
M V30 1 DAT 0 ATOMS=(1 9) FIELDNAME=pKa -
M V30 FIELDDISP=" -2.2073 2.3950 DAU ALL 0 0" -
M V30 MRV_FIELDDISP=0 FIELDDATA=4.2
M V30 END SGROUP
M V30 END CTAB
M END
)CTAB"_ctab;
REQUIRE(m);
auto lbls = MolDraw2D_detail::getSGroupDataLabels(*m);
REQUIRE(lbls.size() == 1);
CHECK(lbls[0].text == "4.2");
CHECK(lbls[0].positioned);
CHECK(lbls[0].atomIdx == 8);
// ABS position: (-2.2073, -2.3950) — y is negated in molecule coords
CHECK_THAT(lbls[0].pos.x, Catch::Matchers::WithinAbs(-2.2073, 0.001));
CHECK_THAT(lbls[0].pos.y, Catch::Matchers::WithinAbs(-2.3950, 0.001));
}
SECTION("no FIELDDISP falls back to atom position") {
auto m = R"CTAB(
Mrv2014 12072015352D
0 0 0 0 0 999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 9 9 1 0 0
M V30 BEGIN ATOM
M V30 1 C -6.5833 4.3317 0 0
M V30 2 C -7.917 3.5617 0 0
M V30 3 C -7.917 2.0216 0 0
M V30 4 C -6.5833 1.2516 0 0
M V30 5 C -5.2497 2.0216 0 0
M V30 6 C -5.2497 3.5617 0 0
M V30 7 C -3.916 4.3317 0 0
M V30 8 O -3.916 5.8717 0 0
M V30 9 O -2.5823 3.5617 0 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 2 2 3
M V30 3 1 3 4
M V30 4 2 4 5
M V30 5 1 5 6
M V30 6 2 1 6
M V30 7 1 6 7
M V30 8 2 7 8
M V30 9 1 7 9
M V30 END BOND
M V30 BEGIN SGROUP
M V30 1 DAT 0 ATOMS=(5 2 4 5 3 1) FIELDNAME="Lambda Max" FIELDINFO=nm -
M V30 FIELDDATA="2222"
M V30 END SGROUP
M V30 END CTAB
M END
)CTAB"_ctab;
REQUIRE(m);
auto lbls = MolDraw2D_detail::getSGroupDataLabels(*m);
REQUIRE(lbls.size() == 1);
CHECK(lbls[0].text == "2222");
// no FIELDDISP -> positioned=false, pos is first atom's conformer position
CHECK(!lbls[0].positioned);
CHECK(lbls[0].atomIdx == 1); // first atom in ATOMS list is atom 2 (idx 1)
// falls back to atom 2 (idx 1) position: (-7.917, 3.5617)
CHECK_THAT(lbls[0].pos.x, Catch::Matchers::WithinAbs(-7.917, 0.001));
CHECK_THAT(lbls[0].pos.y, Catch::Matchers::WithinAbs(3.5617, 0.001));
}
}
TEST_CASE("position variation bonds", "[extras]") {
SECTION("simple") {
auto m = R"CTAB(