From e51844e7b18033a25736998f1dad596f99975ff8 Mon Sep 17 00:00:00 2001
From: Clay Moore <164809477+mooreneural@users.noreply.github.com>
Date: Thu, 28 May 2026 04:14:59 -0700
Subject: [PATCH] Fix BFGS gradient-convergence denominator for negative
 energies (#9298)

In Code/Numerics/Optimizer/BFGSOpt.h, the gradient-convergence check
computed

    double term = std::max(funcVal * gradScale, 1.0);
    ...
    test /= term;
    if (test < gradTol) return 0;

When funcVal (the current energy) is negative, funcVal * gradScale is
negative and std::max clamps the denominator to 1.0. The convergence
test therefore divides the gradient norm by 1 instead of by the
intended |E| * gradScale, which over-tightens the criterion by a factor
of |funcVal * gradScale| whenever |funcVal * gradScale| > 1.

Negative energies are a normal mid-minimization state for force fields
that include stabilizing terms (MMFF94, UFF with charges, AMBER-style
potentials), so this affects realistic workloads: extra BFGS iterations
or, occasionally, hitting MAXITS and returning the "too many iterations"
status when convergence would otherwise have been reached.

The fix is to use |funcVal| in the denominator, matching the pattern
used three lines below ('std::max(fabs(pos[i]), 1.0)') and matching the
intended interpretation as a magnitude.

A new test case 'testBFGSOptimizationNegativeEnergy' in
testOptimizer.cpp minimizes a 2D quadratic whose value is always
negative along the convergence path and verifies the optimizer reaches
the analytic minimum.

git blame attributes the original line to commit e08e0d16d (Nov 2015),
when the optimizer was restructured; the surrounding code does use
absolute values, so this reads as an oversight rather than an
intentional choice.
---
 Code/Numerics/Optimizer/BFGSOpt.h         |  7 ++++-
 Code/Numerics/Optimizer/testOptimizer.cpp | 35 +++++++++++++++++++++++
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/Code/Numerics/Optimizer/BFGSOpt.h b/Code/Numerics/Optimizer/BFGSOpt.h
index 9a5869452..1f9176bcf 100644
--- a/Code/Numerics/Optimizer/BFGSOpt.h
+++ b/Code/Numerics/Optimizer/BFGSOpt.h
@@ -252,7 +252,12 @@ int minimize(unsigned int dim, double *pos, double gradTol,
 
     // is the gradient converged?
     test = 0.0;
-    double term = std::max(funcVal * gradScale, 1.0);
+    // Use |funcVal| so that negative energies (which arise routinely
+    // mid-minimization in force fields containing stabilizing
+    // electrostatic or dispersion terms) do not drive
+    // funcVal * gradScale below zero and clamp the denominator to 1.0,
+    // which would artificially tighten the gradient convergence test.
+    double term = std::max(fabs(funcVal) * gradScale, 1.0);
     for (unsigned int i = 0; i < dim; i++) {
       double temp = fabs(grad[i]) * std::max(fabs(pos[i]), 1.0);
       test = std::max(test, temp);
diff --git a/Code/Numerics/Optimizer/testOptimizer.cpp b/Code/Numerics/Optimizer/testOptimizer.cpp
index 8233e0a28..b357979b2 100644
--- a/Code/Numerics/Optimizer/testOptimizer.cpp
+++ b/Code/Numerics/Optimizer/testOptimizer.cpp
@@ -63,6 +63,24 @@ double grad2(double *v, double *grad) {
   return 1.0;
 }
 
+// A quadratic whose value is always negative on the convergence path:
+//   f(x, y) = (x - 3)^2 + (y + 1)^2 - 100
+// The minimum is f = -100 at (3, -1). Useful for exercising the
+// gradient-convergence denominator when the energy is negative.
+double circ_neg(double *v) {
+  double dx = v[0] - 3.0;
+  double dy = v[1] + 1.0;
+  return dx * dx + dy * dy - 100.0;
+}
+
+double circ_neg_grad(double *v, double *grad) {
+  double dx = v[0] - 3.0;
+  double dy = v[1] + 1.0;
+  grad[0] = 2 * dx;
+  grad[1] = 2 * dy;
+  return 1.0;
+}
+
 TEST_CASE("testLinearSearch") {
   int dim = 2;
   double oLoc[2], oVal;
@@ -158,3 +176,20 @@ TEST_CASE("testBFGSOptimization") {
   REQUIRE_THAT(oLoc[0], Catch::Matchers::WithinAbs(1.0, 1e-4));
   REQUIRE_THAT(oLoc[1], Catch::Matchers::WithinAbs(0.0, 1e-4));
 }
+
+// Regression test for the gradient-convergence denominator: when the
+// energy is negative the denominator used to be
+// std::max(funcVal * gradScale, 1.0), which clamps to 1.0 for any
+// funcVal < 0 and so over-tightens the test. With the fix
+// (std::max(fabs(funcVal) * gradScale, 1.0)) this case must still
+// converge to the analytic minimum.
+TEST_CASE("testBFGSOptimizationNegativeEnergy") {
+  unsigned int dim = 2;
+  double oLoc[2] = {0.0, 0.0};
+  double nVal = 0.0;
+  unsigned int nIters = 0;
+  BFGSOpt::minimize(dim, oLoc, 1e-4, nIters, nVal, circ_neg, circ_neg_grad);
+  REQUIRE_THAT(nVal, Catch::Matchers::WithinAbs(-100.0, 1e-4));
+  REQUIRE_THAT(oLoc[0], Catch::Matchers::WithinAbs(3.0, 1e-3));
+  REQUIRE_THAT(oLoc[1], Catch::Matchers::WithinAbs(-1.0, 1e-3));
+}