Extended fix for #9101 (#9255)

* fix extended boundary issue (3 mols)

* clang pass

* no change. retrigger CI for failed java test

there's a failing java test that seems to be failing by chance rather than by changes, as it depends on rng. this is just to retrigger the CI pipeline to confirm this

* no change. retrigger the CI (yet again)

* raw strings and removed garbage collector
This commit is contained in:
Raul Sofia
2026-05-06 05:10:37 +01:00
committed by GitHub
parent 3836049ab2
commit 372fbad131
4 changed files with 77 additions and 8 deletions

View File

@@ -398,11 +398,9 @@ void SDMolSupplier::buildIndexTo(unsigned int targetIdx) {
posHold = posHold + std::streamoff(1);
needEOL = false;
}
this->checkForEnd();
} else {
this->peekCheckForEnd(nlPos, bufEnd,
posHold); // the optimized peek version
}
this->peekCheckForEnd(nlPos, bufEnd,
posHold); // the optimized peek version
if (!this->df_end) {
d_molpos.push_back(posHold);
++d_last;

View File

@@ -2719,3 +2719,68 @@ TEST_CASE("github9101 - $$$$ at buffer end") {
REQUIRE(mol);
delete mol;
}
// extends the "github9101 - $$$$ at buffer end" test case
// just two molecules werent enough to reliably trigger the issue
// (although the second created drift it only took effect on the next molecule
// which would be the third). this creates the edge case but with three
TEST_CASE("chunk boundary stream drift with 3+ molecules") {
std::string m1 = R"CTAB(mol1
RDKit 3D
1 0 0 0 0 0 0 0 0 0999 V2000
0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
M END
> <comment>
)CTAB";
std::string m1tail = R"CTAB(
$$$$
)CTAB";
std::string m2 = R"CTAB(mol2
RDKit 3D
1 0 0 0 0 0 0 0 0 0999 V2000
0.0000 0.0000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
M END
$$$$
)CTAB";
std::string m3 = R"CTAB(mol3
RDKit 3D
1 0 0 0 0 0 0 0 0 0999 V2000
0.0000 0.0000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
M END
$$$$
)CTAB";
// first separator must hit exactly at the 65536 byte boundary (the others
// could too but no need)
size_t paddingSize = 65536 - m1.size() - 5;
std::string padding(paddingSize, 'x');
std::string data = m1 + padding + m1tail + m2 + m3;
size_t firstDollar = data.find("$$$$");
REQUIRE(firstDollar != std::string::npos);
REQUIRE(firstDollar + 4 == 65536);
SDMolSupplier supplier;
supplier.setData(data);
// trigger indexing (that mangles the positions if the issue is present)
CHECK(supplier.length() == 3);
auto *mol = supplier[0];
REQUIRE(mol);
CHECK(mol->getProp<std::string>("_Name") == "mol1");
delete mol;
mol = supplier[1];
REQUIRE(mol);
CHECK(mol->getProp<std::string>("_Name") == "mol2");
delete mol;
mol = supplier[2];
REQUIRE(mol);
CHECK(mol->getProp<std::string>("_Name") == "mol3");
delete mol;
}