Fix SAS point classification stats for explicit-site datasets and improve cluster logging

- PocketRescorer: fall back to explicit site residue atoms for point
  labeling when no ligand atoms are available, fixing 0-positives in
  binary classification stats for site-based eval-predict
- SLinkClustererV2: log cluster count and sizes instead of full contents
This commit is contained in:
rdk
2026-03-04 03:55:17 +01:00
parent c9ad8f71ff
commit 53500dd129
2 changed files with 6 additions and 1 deletions

View File

@@ -74,7 +74,7 @@ public class SLinkClustererV2<E> implements Clusterer<E> {
List<List<E>> result = new ArrayList<>(clusterMap.values());
log.info("clusters: {}", result);
log.info("clusters ({}): sizes {}", result.size(), result.stream().map(List::size).toList());
log.info("clusters together: {} / {}", result.stream().mapToInt(List::size).sum(), elements.size());
return result;

View File

@@ -26,6 +26,11 @@ abstract class PocketRescorer implements Parametrized {
this.ligandedProtein = liganatedProtein
if (liganatedProtein != null) {
ligandAtoms = liganatedProtein.allRelevantLigandAtoms
// Fallback: use explicit site residue atoms for point labeling
if ((ligandAtoms == null || ligandAtoms.empty) && !liganatedProtein.sites.isEmpty()) {
List<Atoms> siteAtomsList = liganatedProtein.sites.collect { it.atoms }
ligandAtoms = Atoms.union(siteAtomsList)
}
}
}