From e51844e7b18033a25736998f1dad596f99975ff8 Mon Sep 17 00:00:00 2001 From: Clay Moore <164809477+mooreneural@users.noreply.github.com> Date: Thu, 28 May 2026 04:14:59 -0700 Subject: [PATCH] Fix BFGS gradient-convergence denominator for negative energies (#9298) In Code/Numerics/Optimizer/BFGSOpt.h, the gradient-convergence check computed double term = std::max(funcVal * gradScale, 1.0); ... test /= term; if (test < gradTol) return 0; When funcVal (the current energy) is negative, funcVal * gradScale is negative and std::max clamps the denominator to 1.0. The convergence test therefore divides the gradient norm by 1 instead of by the intended |E| * gradScale, which over-tightens the criterion by a factor of |funcVal * gradScale| whenever |funcVal * gradScale| > 1. Negative energies are a normal mid-minimization state for force fields that include stabilizing terms (MMFF94, UFF with charges, AMBER-style potentials), so this affects realistic workloads: extra BFGS iterations or, occasionally, hitting MAXITS and returning the "too many iterations" status when convergence would otherwise have been reached. The fix is to use |funcVal| in the denominator, matching the pattern used three lines below ('std::max(fabs(pos[i]), 1.0)') and matching the intended interpretation as a magnitude. A new test case 'testBFGSOptimizationNegativeEnergy' in testOptimizer.cpp minimizes a 2D quadratic whose value is always negative along the convergence path and verifies the optimizer reaches the analytic minimum. git blame attributes the original line to commit e08e0d16d (Nov 2015), when the optimizer was restructured; the surrounding code does use absolute values, so this reads as an oversight rather than an intentional choice. --- Code/Numerics/Optimizer/BFGSOpt.h | 7 ++++- Code/Numerics/Optimizer/testOptimizer.cpp | 35 +++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/Code/Numerics/Optimizer/BFGSOpt.h b/Code/Numerics/Optimizer/BFGSOpt.h index 9a5869452..1f9176bcf 100644 --- a/Code/Numerics/Optimizer/BFGSOpt.h +++ b/Code/Numerics/Optimizer/BFGSOpt.h @@ -252,7 +252,12 @@ int minimize(unsigned int dim, double *pos, double gradTol, // is the gradient converged? test = 0.0; - double term = std::max(funcVal * gradScale, 1.0); + // Use |funcVal| so that negative energies (which arise routinely + // mid-minimization in force fields containing stabilizing + // electrostatic or dispersion terms) do not drive + // funcVal * gradScale below zero and clamp the denominator to 1.0, + // which would artificially tighten the gradient convergence test. + double term = std::max(fabs(funcVal) * gradScale, 1.0); for (unsigned int i = 0; i < dim; i++) { double temp = fabs(grad[i]) * std::max(fabs(pos[i]), 1.0); test = std::max(test, temp); diff --git a/Code/Numerics/Optimizer/testOptimizer.cpp b/Code/Numerics/Optimizer/testOptimizer.cpp index 8233e0a28..b357979b2 100644 --- a/Code/Numerics/Optimizer/testOptimizer.cpp +++ b/Code/Numerics/Optimizer/testOptimizer.cpp @@ -63,6 +63,24 @@ double grad2(double *v, double *grad) { return 1.0; } +// A quadratic whose value is always negative on the convergence path: +// f(x, y) = (x - 3)^2 + (y + 1)^2 - 100 +// The minimum is f = -100 at (3, -1). Useful for exercising the +// gradient-convergence denominator when the energy is negative. +double circ_neg(double *v) { + double dx = v[0] - 3.0; + double dy = v[1] + 1.0; + return dx * dx + dy * dy - 100.0; +} + +double circ_neg_grad(double *v, double *grad) { + double dx = v[0] - 3.0; + double dy = v[1] + 1.0; + grad[0] = 2 * dx; + grad[1] = 2 * dy; + return 1.0; +} + TEST_CASE("testLinearSearch") { int dim = 2; double oLoc[2], oVal; @@ -158,3 +176,20 @@ TEST_CASE("testBFGSOptimization") { REQUIRE_THAT(oLoc[0], Catch::Matchers::WithinAbs(1.0, 1e-4)); REQUIRE_THAT(oLoc[1], Catch::Matchers::WithinAbs(0.0, 1e-4)); } + +// Regression test for the gradient-convergence denominator: when the +// energy is negative the denominator used to be +// std::max(funcVal * gradScale, 1.0), which clamps to 1.0 for any +// funcVal < 0 and so over-tightens the test. With the fix +// (std::max(fabs(funcVal) * gradScale, 1.0)) this case must still +// converge to the analytic minimum. +TEST_CASE("testBFGSOptimizationNegativeEnergy") { + unsigned int dim = 2; + double oLoc[2] = {0.0, 0.0}; + double nVal = 0.0; + unsigned int nIters = 0; + BFGSOpt::minimize(dim, oLoc, 1e-4, nIters, nVal, circ_neg, circ_neg_grad); + REQUIRE_THAT(nVal, Catch::Matchers::WithinAbs(-100.0, 1e-4)); + REQUIRE_THAT(oLoc[0], Catch::Matchers::WithinAbs(3.0, 1e-3)); + REQUIRE_THAT(oLoc[1], Catch::Matchers::WithinAbs(-1.0, 1e-3)); +}