From ae2bb80d0267edd6518311b91a20d877572d304f Mon Sep 17 00:00:00 2001
From: Nathaniel Corley <ncorley@uw.edu>
Date: Fri, 7 Nov 2025 16:08:06 -0800
Subject: [PATCH] Fix/apptainer (#629)

* feat: enable jupyter notebooks; cleanup

* fix: apptainer

* fix: more apptainer shenanigans
---
 .ipd/apptainer/rf3-dev.def                    | 30 ++++++--
 .ipd/apptainer/rf3-dev.sif                    |  2 +-
 .ipd/apptainer/rf3-full.def                   |  3 +
 lib/atomworks                                 |  2 +-
 .../configs/experiment/pretrained/rf3.yaml    |  2 +-
 models/rf3/configs/inference_engine/base.yaml |  2 +
 models/rf3/configs/inference_engine/rf3.yaml  |  3 +-
 models/rf3/pyproject.toml                     |  4 +-
 .../callbacks/dump_validation_structures.py   |  9 +++
 models/rf3/src/rf3/cli.py                     |  2 +-
 models/rf3/src/rf3/inference.py               |  1 +
 models/rf3/src/rf3/inference_engines/rf3.py   | 68 ++++++++++++++++---
 models/rf3/src/rf3/utils/io.py                |  4 +-
 pyproject.toml                                |  1 +
 src/modelhub/__init__.py                      | 18 +++--
 src/modelhub/trainers/fabric.py               | 13 +++-
 16 files changed, 136 insertions(+), 28 deletions(-)

diff --git a/.ipd/apptainer/rf3-dev.def b/.ipd/apptainer/rf3-dev.def
index 381f488..b739785 100644
--- a/.ipd/apptainer/rf3-dev.def
+++ b/.ipd/apptainer/rf3-dev.def
@@ -27,6 +27,7 @@ IncludeCmd: yes
     /etc/hosts
     pyproject.toml /opt/core_pyproject.toml
     models/rf3/pyproject.toml /opt/rf3_pyproject.toml
+    lib/atomworks/pyproject.toml /opt/atomworks_pyproject.toml
 
 %post
     ## GENERAL SETUP
@@ -38,8 +39,23 @@ IncludeCmd: yes
     ln -s /projects /mnt/projects
     ln -s /net /mnt/net
 
+    # Update system and install essential packages
+    apt-get update && apt-get install -y \
+        build-essential \
+        git \
+        libxrender1 \
+        libxrender-dev \
+        libx11-6 \
+        libx11-dev \
+        libxext6 \
+        libxext-dev \
+        && rm -rf /var/lib/apt/lists/*
+
     ## PYTHON DEPENDENCY INSTALLATION
 
+    # Upgrade pip
+    python -m pip install --upgrade pip
+
     # Install uv for fast dependency resolution
     pip install uv
 
@@ -54,12 +70,18 @@ IncludeCmd: yes
     uv pip compile /opt/pyproject.toml --output-file /opt/rf3_requirements.txt --all-extras
     rm /opt/pyproject.toml
 
+    # (AtomWorks)
+    mv /opt/atomworks_pyproject.toml /opt/pyproject.toml
+    uv pip compile /opt/pyproject.toml --output-file /opt/atomworks_requirements.txt --all-extras
+    rm /opt/pyproject.toml
+
     # Merge and dedupe requirements, excluding packages we don't want
     # (atomworks is mounted from host; torch/numpy/nvidia-* provided by base image)
-    # (pynvml/packaging/pandas/markdown-it-py from NGC container to avoid conflicts)
-    cat /opt/core_requirements.txt /opt/rf3_requirements.txt | \
-        grep -vE "^(atomworks|torch(|vision|audio)|numpy|nvidia-.*|pynvml|packaging|pandas|markdown-it-py)==" | \
-        awk '!seen[$0]++' > /opt/combined_requirements.txt
+    # (pynvml/packaging/pandas/markdown-it-py/triton from NGC container to avoid conflicts)
+    # Deduplicate by package name (keeping first occurrence) to handle version conflicts
+    cat /opt/core_requirements.txt /opt/rf3_requirements.txt /opt/atomworks_requirements.txt | \
+        grep -vE "^(atomworks|torch(|vision|audio)|numpy|nvidia-.*|pynvml|packaging|pandas|markdown-it-py|triton)==" | \
+        awk -F'==' '!seen[$1]++' > /opt/combined_requirements.txt
 
     # Print combined requirements for debugging
     echo "=== Combined requirements to install ==="
diff --git a/.ipd/apptainer/rf3-dev.sif b/.ipd/apptainer/rf3-dev.sif
index 2e74ecc..ee8c233 120000
--- a/.ipd/apptainer/rf3-dev.sif
+++ b/.ipd/apptainer/rf3-dev.sif
@@ -1 +1 @@
-/net/software/containers/versions/modelhub/rf3-dev_2025_10_08.sif
\ No newline at end of file
+/net/software/containers/versions/modelhub/rf3-dev_2025_11_07.sif
\ No newline at end of file
diff --git a/.ipd/apptainer/rf3-full.def b/.ipd/apptainer/rf3-full.def
index e6441b1..46f1f3c 100644
--- a/.ipd/apptainer/rf3-full.def
+++ b/.ipd/apptainer/rf3-full.def
@@ -30,6 +30,9 @@ IncludeCmd: yes
         --exclude='outputs' \
         --exclude='logs' \
         --exclude='*.sif' \
+        --exclude='distillation' \
+        --exclude='benchmarks' \
+        --exclude='**/slurm_logs' \
         ./ ${APPTAINER_ROOTFS}/opt/modelhub/
 
     echo "Repository copied successfully."
diff --git a/lib/atomworks b/lib/atomworks
index 11b5d0d..4d45b10 160000
--- a/lib/atomworks
+++ b/lib/atomworks
@@ -1 +1 @@
-Subproject commit 11b5d0d76285b837f843bc2cd60867164637b0d8
+Subproject commit 4d45b107e3d78c87f6c37e59fcfda78f44e949de
diff --git a/models/rf3/configs/experiment/pretrained/rf3.yaml b/models/rf3/configs/experiment/pretrained/rf3.yaml
index df334f6..4e80cc9 100644
--- a/models/rf3/configs/experiment/pretrained/rf3.yaml
+++ b/models/rf3/configs/experiment/pretrained/rf3.yaml
@@ -14,7 +14,7 @@ defaults:
 
 ckpt_config:
   _target_: modelhub.utils.weights.CheckpointConfig
-  path: /net/software/containers/versions/modelhub_inference/ckpts/rf3-w-conf-run10-ep903-remapped.ckpt
+  path: /net/software/containers/versions/modelhub_inference/ckpts/rf3-w-conf-run10-ep922-remapped.ckpt
   reset_optimizer: true
 
 model:
diff --git a/models/rf3/configs/inference_engine/base.yaml b/models/rf3/configs/inference_engine/base.yaml
index 628d1ac..797d9b6 100644
--- a/models/rf3/configs/inference_engine/base.yaml
+++ b/models/rf3/configs/inference_engine/base.yaml
@@ -7,6 +7,7 @@ defaults:
 ckpt_path: ???
 num_nodes: 1
 devices_per_node: 1
+compress_outputs: true
 
 # Parameters for RF3InferenceEngine.run()
 inputs: ???
@@ -19,3 +20,4 @@ sharding_pattern: null
 skip_existing: false
 template_selection: null
 ground_truth_conformer_selection: null
+cyclic_chains: []
diff --git a/models/rf3/configs/inference_engine/rf3.yaml b/models/rf3/configs/inference_engine/rf3.yaml
index 2f2aa03..999e630 100644
--- a/models/rf3/configs/inference_engine/rf3.yaml
+++ b/models/rf3/configs/inference_engine/rf3.yaml
@@ -6,7 +6,7 @@ defaults:
 
 _target_: rf3.inference_engines.rf3.RF3InferenceEngine
 
-ckpt_path: /projects/ml/modelhub/apptainer/rf3-w-conf-run10-ep903-remapped.ckpt
+ckpt_path: /net/software/containers/versions/modelhub_inference/ckpts/rf3-w-conf-run10-ep922-remapped.ckpt
 
 # Transform arguments
 n_recycles: 10
@@ -21,7 +21,6 @@ early_stopping_plddt_threshold: 0.5
 seed: null
 print_config: true
 raise_if_missing_msa_for_protein_of_length_n: null
-cyclic_chains: []
 
 # Metrics
 metrics_cfg:
diff --git a/models/rf3/pyproject.toml b/models/rf3/pyproject.toml
index 9bc44f8..458c6c6 100644
--- a/models/rf3/pyproject.toml
+++ b/models/rf3/pyproject.toml
@@ -35,7 +35,8 @@ dependencies = [
     "cuequivariance_ops_torch_cu12>=0.6.1; sys_platform == 'linux'",
     "cuequivariance_torch>=0.6.1; sys_platform == 'linux'",
     # ... dataloading
-    "atomworks==1.0.2",
+    # (Commenting out for development; we should re-add before release)
+    # "atomworks==1.0.2",
 ]
 
 [project.scripts]
@@ -51,6 +52,7 @@ build-backend = "hatchling.build"
 
 [tool.hatch.version]
 source = "vcs"
+fallback-version = "0.0.0"
 
 [tool.hatch.version.raw-options]
 root = "../.."
diff --git a/models/rf3/src/rf3/callbacks/dump_validation_structures.py b/models/rf3/src/rf3/callbacks/dump_validation_structures.py
index b488072..8b33efe 100644
--- a/models/rf3/src/rf3/callbacks/dump_validation_structures.py
+++ b/models/rf3/src/rf3/callbacks/dump_validation_structures.py
@@ -21,6 +21,7 @@ class DumpValidationStructuresCallback(BaseCallback):
         dump_predictions: bool = False,
         one_model_per_file: bool = False,
         dump_trajectories: bool = False,
+        compress_outputs: bool = True,
     ):
         """
         Args:
@@ -28,12 +29,14 @@ class DumpValidationStructuresCallback(BaseCallback):
             one_model_per_file: If True, write each structure within a diffusion batch to its own CIF files. If False,
                 include each structure within a diffusion batch as a separate model within one CIF file.
             dump_trajectories: Whether to dump denoising trajectories after validation batches.
+            compress_outputs: Whether to gzip output files. Defaults to ``True``.
         """
         super().__init__()
         self.save_dir = Path(save_dir)
         self.dump_predictions = dump_predictions
         self.dump_trajectories = dump_trajectories
         self.one_model_per_file = one_model_per_file
+        self.compress_outputs = compress_outputs
 
     def on_validation_batch_end(
         self,
@@ -68,6 +71,9 @@ class DumpValidationStructuresCallback(BaseCallback):
 
             return path / f"{identifier}{extra}"
 
+        # Determine file type based on compression setting
+        file_type = "cif.gz" if self.compress_outputs else "cif"
+
         if self.dump_predictions:
             atom_array_stack = build_stack_from_atom_array_and_batched_coords(
                 network_output["X_L"], example["atom_array"]
@@ -76,6 +82,7 @@ class DumpValidationStructuresCallback(BaseCallback):
                 atom_arrays=atom_array_stack,
                 base_path=_build_path_from_example_id("predictions"),
                 one_model_per_file=self.one_model_per_file,
+                file_type=file_type,
             )
 
         if self.dump_trajectories:
@@ -83,9 +90,11 @@ class DumpValidationStructuresCallback(BaseCallback):
                 trajectory_list=network_output["X_denoised_L_traj"],
                 atom_array=example["atom_array"],
                 base_path=_build_path_from_example_id("trajectories", "_denoised"),
+                file_type=file_type,
             )
             dump_trajectories(
                 trajectory_list=network_output["X_noisy_L_traj"],
                 atom_array=example["atom_array"],
                 base_path=_build_path_from_example_id("trajectories", "_noisy"),
+                file_type=file_type,
             )
diff --git a/models/rf3/src/rf3/cli.py b/models/rf3/src/rf3/cli.py
index df585ea..4041a1d 100644
--- a/models/rf3/src/rf3/cli.py
+++ b/models/rf3/src/rf3/cli.py
@@ -3,7 +3,7 @@ from pathlib import Path
 import typer
 from hydra import compose, initialize_config_dir
 
-app = typer.Typer()
+app = typer.Typer(pretty_exceptions_enable=False)
 
 
 @app.command(
diff --git a/models/rf3/src/rf3/inference.py b/models/rf3/src/rf3/inference.py
index 0911f2e..af5f1d9 100755
--- a/models/rf3/src/rf3/inference.py
+++ b/models/rf3/src/rf3/inference.py
@@ -45,6 +45,7 @@ def run_inference(cfg: DictConfig) -> None:
         "ground_truth_conformer_selection": cfg.get(
             "ground_truth_conformer_selection", None
         ),
+        "cyclic_chains": cfg.get("cyclic_chains", []),
     }
 
     # Create init config with only __init__ params
diff --git a/models/rf3/src/rf3/inference_engines/rf3.py b/models/rf3/src/rf3/inference_engines/rf3.py
index bc4abe7..eb9af8b 100644
--- a/models/rf3/src/rf3/inference_engines/rf3.py
+++ b/models/rf3/src/rf3/inference_engines/rf3.py
@@ -11,10 +11,12 @@ from atomworks.ml.preprocessing.msa.finding import (
     get_msa_dirs_from_env,
 )
 from atomworks.ml.samplers import LoadBalancedDistributedSampler
+from biotite.structure import AtomArray
 from lightning.fabric import seed_everything
 from omegaconf import OmegaConf
 from torch.utils.data import DataLoader
 
+from modelhub.metrics.metric import MetricManager
 from modelhub.utils.ddp import RankedLogger, set_accelerator_based_on_availability
 from modelhub.utils.logging import print_config_tree
 from rf3.model.RF3 import ShouldEarlyStopFn
@@ -33,7 +35,6 @@ from rf3.utils.predicted_error import (
     compile_af3_confidence_outputs,
     get_mean_atomwise_plddt,
 )
-from modelhub.metrics.metric import MetricManager
 
 logging.basicConfig(
     level=logging.INFO,
@@ -94,7 +95,8 @@ class RF3InferenceEngine:
         metrics_cfg: dict | OmegaConf | MetricManager | None = None,
         num_nodes: int = 1,
         devices_per_node: int = 1,
-        cyclic_chains: list[str] = [],
+        # Output control
+        compress_outputs: bool = True,
         # Debug
         print_config: bool = False,
         raise_if_missing_msa_for_protein_of_length_n: int | None = None,
@@ -118,6 +120,7 @@ class RF3InferenceEngine:
               Defaults to ``None``.
           num_nodes: Number of nodes for distributed inference. Defaults to ``1``.
           devices_per_node: Number of devices per node. Defaults to ``1``.
+          compress_outputs: Whether to gzip output files. Defaults to ``True``.
           print_config: Whether to print config trees. Defaults to ``False``.
           raise_if_missing_msa_for_protein_of_length_n: Debug flag for MSA checking. Defaults to ``None``.
         """
@@ -187,10 +190,9 @@ class RF3InferenceEngine:
             "p_give_polymer_ref_conf": 0.0,
             "p_give_non_polymer_ref_conf": 0.0,
             "p_dropout_ref_conf": 0.0,
+            "use_element_for_atom_names_of_atomized_tokens": True,
         }
 
-        self.cyclic_chains = cyclic_chains
-
         self.print_config = print_config
 
         # Set random seed (only if seed is not None)
@@ -220,6 +222,7 @@ class RF3InferenceEngine:
 
         self.ckpt_path = ckpt_path
         self.early_stopping_plddt_threshold = early_stopping_plddt_threshold
+        self.compress_outputs = compress_outputs
 
         # Setup model
         ranked_logger.info("Setting up model...")
@@ -269,7 +272,14 @@ class RF3InferenceEngine:
 
     def run(
         self,
-        inputs: InferenceInput | list[InferenceInput] | PathLike | list[PathLike],
+        inputs: (
+            InferenceInput
+            | list[InferenceInput]
+            | AtomArray
+            | list[AtomArray]
+            | PathLike
+            | list[PathLike]
+        ),
         # Output control
         out_dir: PathLike | None = None,
         dump_predictions: bool = True,
@@ -281,22 +291,24 @@ class RF3InferenceEngine:
         # Selection overrides (applied to all input types)
         template_selection: list[str] | str | None = None,
         ground_truth_conformer_selection: list[str] | str | None = None,
+        cyclic_chains: list[str] = [],
     ) -> dict[str, dict] | None:
         """Run inference on inputs.
 
         Requires a pre-initialized inference engine.
 
         Args:
-          inputs: Single/list of InferenceInput objects, or file paths, or directory.
+          inputs: Single/list of InferenceInput objects, AtomArray objects, file paths, or directory.
           out_dir: Output directory. If None, returns results as an AtomArray and dictionaries of metrics. Defaults to ``None``.
           dump_predictions: Whether to save predicted structures. Defaults to ``True``.
           dump_trajectories: Whether to save diffusion trajectories. Defaults to ``False``.
           one_model_per_file: Save each model in separate file. Defaults to ``False``.
           annotate_b_factor_with_plddt: Write pLDDT to B-factor column. Defaults to ``False``.
           sharding_pattern: Sharding pattern for output organization. Defaults to ``None``.
-          skip_existing: Skip inputs with existing outputs. Defaults to ``False``.
+          skip_existing: Skip inputs with existing outputs. Requires ``out_dir`` to be set. If ``True`` when ``out_dir=None``, a warning is logged and skipping is disabled. Defaults to ``False``.
           template_selection: Template selection override. Defaults to ``None``.
           ground_truth_conformer_selection: Conformer selection override. Defaults to ``None``.
+          cyclic_chains: List of chain IDs to cyclize. Defaults to ``[]``.
 
         Returns:
           If ``out_dir`` is None: Dict mapping example_id to results dict.
@@ -307,6 +319,21 @@ class RF3InferenceEngine:
         if out_dir:
             out_dir.mkdir(parents=True, exist_ok=True)
             ranked_logger.info(f"Outputs will be written to {out_dir.resolve()}.")
+        if not out_dir:
+            ranked_logger.warning(
+                "out_dir is None - results will be returned in memory! If you want to save to disk, please provide an out_dir."
+            )
+
+        # Validate skip_existing configuration
+        if skip_existing and out_dir is None:
+            ranked_logger.warning(
+                "skip_existing=True requires out_dir to be set. "
+                "Disabling skip_existing for in-memory inference mode."
+            )
+            skip_existing = False
+
+        # Determine file type based on compression setting
+        file_type = "cif.gz" if self.compress_outputs else "cif"
 
         # Convert inputs to InferenceInput objects
         if isinstance(inputs, InferenceInput):
@@ -315,6 +342,26 @@ class RF3InferenceEngine:
             isinstance(i, InferenceInput) for i in inputs
         ):
             inference_inputs = inputs
+        elif isinstance(inputs, AtomArray):
+            # Single AtomArray - convert to InferenceInput
+            inference_inputs = [
+                InferenceInput.from_atom_array(
+                    inputs,
+                    template_selection=template_selection,
+                    ground_truth_conformer_selection=ground_truth_conformer_selection,
+                )
+            ]
+        elif isinstance(inputs, list) and all(isinstance(i, AtomArray) for i in inputs):
+            # List of AtomArrays - convert each to InferenceInput
+            inference_inputs = [
+                InferenceInput.from_atom_array(
+                    arr,
+                    example_id=f"inference_{i}",
+                    template_selection=template_selection,
+                    ground_truth_conformer_selection=ground_truth_conformer_selection,
+                )
+                for i, arr in enumerate(inputs)
+            ]
         elif isinstance(inputs, (str, Path)) or (
             isinstance(inputs, list) and isinstance(inputs[0], (str, Path))
         ):
@@ -329,9 +376,9 @@ class RF3InferenceEngine:
             raise ValueError(f"Unsupported inputs type: {type(inputs)}")
 
         # Flag chains for cyclization if specified
-        if self.cyclic_chains:
+        if cyclic_chains:
             for input_spec in inference_inputs:
-                input_spec.cyclic_chains = self.cyclic_chains
+                input_spec.cyclic_chains = cyclic_chains
 
         # make InferenceInputDataset
         inference_dataset = InferenceInputDataset(inference_inputs)
@@ -495,6 +542,7 @@ class RF3InferenceEngine:
                         atom_arrays=atom_array_list or atom_array_stack,
                         base_path=example_out_dir / input_spec.example_id,
                         one_model_per_file=one_model_per_file,
+                        file_type=file_type,
                     )
 
                 if dump_trajectories:
@@ -502,11 +550,13 @@ class RF3InferenceEngine:
                         trajectory_list=network_output["X_denoised_L_traj"],
                         atom_array=pipeline_output["atom_array"],
                         base_path=example_out_dir / "denoised",
+                        file_type=file_type,
                     )
                     dump_trajectories(
                         trajectory_list=network_output["X_noisy_L_traj"],
                         atom_array=pipeline_output["atom_array"],
                         base_path=example_out_dir / "noisy",
+                        file_type=file_type,
                     )
 
                 ranked_logger.info(
diff --git a/models/rf3/src/rf3/utils/io.py b/models/rf3/src/rf3/utils/io.py
index 8026813..6fb6949 100644
--- a/models/rf3/src/rf3/utils/io.py
+++ b/models/rf3/src/rf3/utils/io.py
@@ -143,6 +143,7 @@ def dump_trajectories(
     atom_array: AtomArray,
     base_path: Path,
     align_structures: bool = True,
+    file_type: str = "cif.gz",
 ) -> None:
     """Write denoising trajectories to CIF files.
 
@@ -153,6 +154,7 @@ def dump_trajectories(
         base_path (Path): Base path where the output files will be saved.
         align_structures (bool): Flag to determine if the structures should be aligned on the final prediction.
             If False, each step may have a different alignment.
+        file_type (str): File type for output (e.g., "cif", "cif.gz", "pdb"). Defaults to ``"cif.gz"``.
     """
     n_steps = len(trajectory_list)
 
@@ -192,5 +194,5 @@ def dump_trajectories(
 
         path = f"{base_path}_model_{i}"
         to_cif_file(
-            atom_array_stack, path, file_type="cif.gz", include_entity_poly=False
+            atom_array_stack, path, file_type=file_type, include_entity_poly=False
         )
diff --git a/pyproject.toml b/pyproject.toml
index 95b9850..5ce0a87 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -65,6 +65,7 @@ build-backend = "hatchling.build"
 
 [tool.hatch.version]
 source = "vcs"
+fallback-version = "0.0.0"
 
 [tool.hatch.build.hooks.vcs]
 version-file = "src/modelhub/version.py"
diff --git a/src/modelhub/__init__.py b/src/modelhub/__init__.py
index fe8ddf1..e0d7b10 100644
--- a/src/modelhub/__init__.py
+++ b/src/modelhub/__init__.py
@@ -32,13 +32,19 @@ SHOULD_USE_CUEQUIVARIANCE = False
 
 try:
     if torch.cuda.is_available():
-        import cuequivariance_torch as cuet  # noqa: I001, F401
+        if _env.bool("DISABLE_CUEQUIVARIANCE", default=False):
+            logger.info("cuEquivariance usage disabled via DISABLE_CUEQUIVARIANCE")
+        else:
+            import cuequivariance_torch as cuet  # noqa: I001, F401
 
-        SHOULD_USE_CUEQUIVARIANCE = True
-        os.environ["CUEQ_DISABLE_AOT_TUNING"] = _env.str(
-            "CUEQ_DISABLE_AOT_TUNING", default="1"
-        )
-        os.environ["CUEQ_DEFAULT_CONFIG"] = _env.str("CUEQ_DEFAULT_CONFIG", default="1")
+            SHOULD_USE_CUEQUIVARIANCE = True
+            os.environ["CUEQ_DISABLE_AOT_TUNING"] = _env.str(
+                "CUEQ_DISABLE_AOT_TUNING", default="1"
+            )
+            os.environ["CUEQ_DEFAULT_CONFIG"] = _env.str(
+                "CUEQ_DEFAULT_CONFIG", default="1"
+            )
+            logger.info("cuEquivariance is available and will be used.")
 
 except ImportError:
     logger.debug("cuEquivariance unavailable: import failed")
diff --git a/src/modelhub/trainers/fabric.py b/src/modelhub/trainers/fabric.py
index 37d053d..2fafa47 100755
--- a/src/modelhub/trainers/fabric.py
+++ b/src/modelhub/trainers/fabric.py
@@ -40,6 +40,15 @@ from modelhub.utils.weights import (
 ranked_logger = RankedLogger(__name__, rank_zero_only=True)
 
 
+def is_interactive_environment() -> bool:
+    try:
+        from IPython import get_ipython
+
+        return get_ipython() is not None
+    except ImportError:
+        return False
+
+
 class FabricTrainer(ABC):
     def __init__(
         self,
@@ -110,11 +119,13 @@ class FabricTrainer(ABC):
             (4) Efficient Gradient Accumulation (https://lightning.ai/docs/fabric/2.4.0/advanced/gradient_accumulation.html)
         """
         # DDP strategy requires a manual timeout higher than the default
-        if strategy == "ddp":
+        if strategy == "ddp" and not is_interactive_environment():
             strategy = DDPStrategy(
                 timeout=timedelta(seconds=nccl_timeout),
                 find_unused_parameters=find_unused_parameters,
             )
+        else:
+            strategy = "auto"  # type: ignore
 
         # See (1) for initialization arguments for Fabric()
         self.fabric = L.Fabric(