From ea707d931636faf60865a803f80ff1d85002b446 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Thu, 12 Feb 2026 19:43:27 -0500
Subject: [PATCH 1/4] Add --deps-only flag to separate dependency fetching from
 source builds

This allows CI to fetch and build dependencies (FFTW, HDF5, etc.) on
login nodes with internet access, then build MFC source code on compute
nodes that may have no network connectivity.

Key changes:
- New `--deps-only` CLI flag for `./mfc.sh build`
- Already-configured dependencies are skipped entirely during regular
  builds, guaranteeing no network access in the source build step
- All clusters (Phoenix, Frontier, Frontier AMD) now follow the same
  pattern: deps on login node, source build + test on compute node

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/bench.yml             | 17 +++++-----
 .github/workflows/frontier/bench.sh     | 38 ++++++++++++++++++++---
 .github/workflows/frontier/build.sh     | 41 +++----------------------
 .github/workflows/frontier/test.sh      | 22 +++++++++++++
 .github/workflows/frontier_amd/bench.sh | 38 ++++++++++++++++++++---
 .github/workflows/frontier_amd/build.sh | 41 +++----------------------
 .github/workflows/frontier_amd/test.sh  | 22 +++++++++++++
 .github/workflows/phoenix/build.sh      | 20 ++++++++++++
 .github/workflows/test.yml              | 11 +++----
 toolchain/mfc/build.py                  | 26 ++++++++++++++++
 toolchain/mfc/cli/commands.py           |  7 +++++
 11 files changed, 186 insertions(+), 97 deletions(-)
 create mode 100644 .github/workflows/phoenix/build.sh

diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
index 6279f5f578..ac2f026dab 100644
--- a/.github/workflows/bench.yml
+++ b/.github/workflows/bench.yml
@@ -91,7 +91,7 @@ jobs:
             flag: p
             device: cpu
             interface: none
-            build_script: ""
+            build_script: "bash .github/workflows/phoenix/build.sh cpu none"
           - cluster: phoenix
             name: Georgia Tech | Phoenix (NVHPC)
             group: phoenix
@@ -99,7 +99,7 @@ jobs:
             flag: p
             device: gpu
             interface: acc
-            build_script: ""
+            build_script: "bash .github/workflows/phoenix/build.sh gpu acc"
           - cluster: phoenix
             name: Georgia Tech | Phoenix (NVHPC)
             group: phoenix
@@ -107,7 +107,7 @@ jobs:
             flag: p
             device: gpu
             interface: omp
-            build_script: ""
+            build_script: "bash .github/workflows/phoenix/build.sh gpu omp"
           - cluster: frontier
             name: Oak Ridge | Frontier (CCE)
             group: phoenix
@@ -115,7 +115,7 @@ jobs:
             flag: f
             device: gpu
             interface: acc
-            build_script: "bash .github/workflows/frontier/build.sh gpu acc bench"
+            build_script: "bash .github/workflows/frontier/build.sh gpu acc"
           - cluster: frontier
             name: Oak Ridge | Frontier (CCE)
             group: phoenix
@@ -123,7 +123,7 @@ jobs:
             flag: f
             device: gpu
             interface: omp
-            build_script: "bash .github/workflows/frontier/build.sh gpu omp bench"
+            build_script: "bash .github/workflows/frontier/build.sh gpu omp"
           - cluster: frontier_amd
             name: Oak Ridge | Frontier (AMD)
             group: phoenix
@@ -131,7 +131,7 @@ jobs:
             flag: famd
             device: gpu
             interface: omp
-            build_script: "bash .github/workflows/frontier_amd/build.sh gpu omp bench"
+            build_script: "bash .github/workflows/frontier_amd/build.sh gpu omp"
     runs-on:
       group: ${{ matrix.group }}
       labels: ${{ matrix.labels }}
@@ -153,9 +153,8 @@ jobs:
           ref: master
           path: master
 
-      - name: Setup & Build
-        if: matrix.build_script != ''
-        run: | 
+      - name: Fetch Dependencies
+        run: |
           (cd pr     && ${{ matrix.build_script }}) &
           (cd master && ${{ matrix.build_script }}) &
           wait %1 && wait %2
diff --git a/.github/workflows/frontier/bench.sh b/.github/workflows/frontier/bench.sh
index 35b4c5950e..13bbbddc2c 100644
--- a/.github/workflows/frontier/bench.sh
+++ b/.github/workflows/frontier/bench.sh
@@ -1,20 +1,50 @@
 #!/bin/bash
 
 n_ranks=12
+build_opts=""
 device_opts=""
 if [ "$job_device" = "gpu" ]; then
     gpus=$(rocm-smi --showid | awk '{print $1}' | grep -Eo '[0-9]+' | uniq | tr '\n' ' ')
     n_ranks=$(echo "$gpus" | wc -w)         # number of GPUs on node
     gpu_ids=$(echo "$gpus" | tr ' ' '\n' | tr '\n' ' ' | sed 's/ $//')  # GPU IDs from rocm-smi
-    device_opts+="--gpu"
+    build_opts+="--gpu"
     if [ "$job_interface" = "acc" ]; then
-        device_opts+=" acc"
+        build_opts+=" acc"
     elif [ "$job_interface" = "omp" ]; then
-        device_opts+=" mp"
+        build_opts+=" mp"
     fi
-    device_opts+=" -g $gpu_ids"
+    device_opts="$build_opts -g $gpu_ids"
 fi
 
+# Build case-optimized binaries on compute node (deps already fetched on login node)
+max_attempts=3
+attempt=1
+while [ $attempt -le $max_attempts ]; do
+    echo "Build attempt $attempt of $max_attempts..."
+    build_cmd_ok=true
+    for dir in benchmarks/*/; do
+        if ! ./mfc.sh run -v "$dir/case.py" --case-optimization -j 8 --dry-run $build_opts; then
+            build_cmd_ok=false
+            break
+        fi
+    done
+
+    if [ "$build_cmd_ok" = true ]; then
+        echo "Build succeeded on attempt $attempt."
+        break
+    fi
+
+    if [ $attempt -lt $max_attempts ]; then
+        echo "Build failed on attempt $attempt. Cleaning and retrying in 30s..."
+        ./mfc.sh clean
+        sleep 30
+    else
+        echo "Build failed after $max_attempts attempts."
+        exit 1
+    fi
+    attempt=$((attempt + 1))
+done
+
 if [ "$job_device" = "gpu" ]; then
     ./mfc.sh bench --mem 12 -j $n_ranks -o "$job_slug.yaml" -- -c frontier $device_opts -n $n_ranks
 else
diff --git a/.github/workflows/frontier/build.sh b/.github/workflows/frontier/build.sh
index 18cddc96ca..84b67020ca 100644
--- a/.github/workflows/frontier/build.sh
+++ b/.github/workflows/frontier/build.sh
@@ -1,11 +1,13 @@
 #!/bin/bash
 
+# Fetch dependencies on login node (internet access).
+# Source code is built on compute nodes via test.sh / bench.sh.
+
 # Ignore SIGHUP to survive login node session drops
 trap '' HUP
 
 job_device=$1
 job_interface=$2
-run_bench=$3
 build_opts=""
 if [ "$job_device" = "gpu" ]; then
   build_opts+="--gpu"
@@ -18,39 +20,4 @@ fi
 
 . ./mfc.sh load -c f -m g
 
-max_attempts=3
-attempt=1
-while [ $attempt -le $max_attempts ]; do
-    echo "Build attempt $attempt of $max_attempts..."
-    if [ "$run_bench" == "bench" ]; then
-        build_cmd_ok=true
-        for dir in benchmarks/*/; do
-            dirname=$(basename "$dir")
-            if ! ./mfc.sh run -v "$dir/case.py" --case-optimization -j 8 --dry-run $build_opts; then
-                build_cmd_ok=false
-                break
-            fi
-        done
-    else
-        if ./mfc.sh test -v -a --dry-run --rdma-mpi -j 8 $build_opts; then
-            build_cmd_ok=true
-        else
-            build_cmd_ok=false
-        fi
-    fi
-
-    if [ "$build_cmd_ok" = true ]; then
-        echo "Build succeeded on attempt $attempt."
-        exit 0
-    fi
-
-    if [ $attempt -lt $max_attempts ]; then
-        echo "Build failed on attempt $attempt. Cleaning and retrying in 30s..."
-        ./mfc.sh clean
-        sleep 30
-    fi
-    attempt=$((attempt + 1))
-done
-
-echo "Build failed after $max_attempts attempts."
-exit 1
+./mfc.sh build --deps-only -j 8 $build_opts
diff --git a/.github/workflows/frontier/test.sh b/.github/workflows/frontier/test.sh
index 17fbbaf8e5..ec790944c0 100644
--- a/.github/workflows/frontier/test.sh
+++ b/.github/workflows/frontier/test.sh
@@ -13,6 +13,28 @@ if [ "$job_device" = "gpu" ]; then
     fi
 fi
 
+# Build source code on compute node (deps already fetched on login node)
+max_attempts=3
+attempt=1
+while [ $attempt -le $max_attempts ]; do
+    echo "Build attempt $attempt of $max_attempts..."
+    if ./mfc.sh test -v -a --dry-run --rdma-mpi -j 8 $device_opts; then
+        echo "Build succeeded on attempt $attempt."
+        break
+    fi
+
+    if [ $attempt -lt $max_attempts ]; then
+        echo "Build failed on attempt $attempt. Cleaning and retrying in 30s..."
+        ./mfc.sh clean
+        sleep 30
+    else
+        echo "Build failed after $max_attempts attempts."
+        exit 1
+    fi
+    attempt=$((attempt + 1))
+done
+
+# Run tests
 if [ "$job_device" = "gpu" ]; then
     ./mfc.sh test -v -a --rdma-mpi --max-attempts 3 -j $ngpus $device_opts -- -c frontier
 else
diff --git a/.github/workflows/frontier_amd/bench.sh b/.github/workflows/frontier_amd/bench.sh
index 6e01687e79..fd263dd800 100644
--- a/.github/workflows/frontier_amd/bench.sh
+++ b/.github/workflows/frontier_amd/bench.sh
@@ -1,20 +1,50 @@
 #!/bin/bash
 
 n_ranks=12
+build_opts=""
 device_opts=""
 if [ "$job_device" = "gpu" ]; then
     gpus=$(rocm-smi --showid | awk '{print $1}' | grep -Eo '[0-9]+' | uniq | tr '\n' ' ')
     n_ranks=$(echo "$gpus" | wc -w)         # number of GPUs on node
     gpu_ids=$(echo "$gpus" | tr ' ' '\n' | tr '\n' ' ' | sed 's/ $//')  # GPU IDs from rocm-smi
-    device_opts+="--gpu"
+    build_opts+="--gpu"
     if [ "$job_interface" = "acc" ]; then
-        device_opts+=" acc"
+        build_opts+=" acc"
     elif [ "$job_interface" = "omp" ]; then
-        device_opts+=" mp"
+        build_opts+=" mp"
     fi
-    device_opts+=" -g $gpu_ids"
+    device_opts="$build_opts -g $gpu_ids"
 fi
 
+# Build case-optimized binaries on compute node (deps already fetched on login node)
+max_attempts=3
+attempt=1
+while [ $attempt -le $max_attempts ]; do
+    echo "Build attempt $attempt of $max_attempts..."
+    build_cmd_ok=true
+    for dir in benchmarks/*/; do
+        if ! ./mfc.sh run -v "$dir/case.py" --case-optimization -j 8 --dry-run $build_opts; then
+            build_cmd_ok=false
+            break
+        fi
+    done
+
+    if [ "$build_cmd_ok" = true ]; then
+        echo "Build succeeded on attempt $attempt."
+        break
+    fi
+
+    if [ $attempt -lt $max_attempts ]; then
+        echo "Build failed on attempt $attempt. Cleaning and retrying in 30s..."
+        ./mfc.sh clean
+        sleep 30
+    else
+        echo "Build failed after $max_attempts attempts."
+        exit 1
+    fi
+    attempt=$((attempt + 1))
+done
+
 if [ "$job_device" = "gpu" ]; then
     ./mfc.sh bench --mem 12 -j $n_ranks -o "$job_slug.yaml" -- -c frontier_amd $device_opts -n $n_ranks
 else
diff --git a/.github/workflows/frontier_amd/build.sh b/.github/workflows/frontier_amd/build.sh
index 56c47d8ff4..1b120ae6f7 100644
--- a/.github/workflows/frontier_amd/build.sh
+++ b/.github/workflows/frontier_amd/build.sh
@@ -1,11 +1,13 @@
 #!/bin/bash
 
+# Fetch dependencies on login node (internet access).
+# Source code is built on compute nodes via test.sh / bench.sh.
+
 # Ignore SIGHUP to survive login node session drops
 trap '' HUP
 
 job_device=$1
 job_interface=$2
-run_bench=$3
 build_opts=""
 if [ "$job_device" = "gpu" ]; then
   build_opts+="--gpu"
@@ -18,39 +20,4 @@ fi
 
 . ./mfc.sh load -c famd -m g
 
-max_attempts=3
-attempt=1
-while [ $attempt -le $max_attempts ]; do
-    echo "Build attempt $attempt of $max_attempts..."
-    if [ "$run_bench" == "bench" ]; then
-        build_cmd_ok=true
-        for dir in benchmarks/*/; do
-            dirname=$(basename "$dir")
-            if ! ./mfc.sh run -v "$dir/case.py" --case-optimization -j 8 --dry-run $build_opts; then
-                build_cmd_ok=false
-                break
-            fi
-        done
-    else
-        if ./mfc.sh test -v -a --dry-run -j 8 $build_opts; then
-            build_cmd_ok=true
-        else
-            build_cmd_ok=false
-        fi
-    fi
-
-    if [ "$build_cmd_ok" = true ]; then
-        echo "Build succeeded on attempt $attempt."
-        exit 0
-    fi
-
-    if [ $attempt -lt $max_attempts ]; then
-        echo "Build failed on attempt $attempt. Cleaning and retrying in 30s..."
-        ./mfc.sh clean
-        sleep 30
-    fi
-    attempt=$((attempt + 1))
-done
-
-echo "Build failed after $max_attempts attempts."
-exit 1
+./mfc.sh build --deps-only -j 8 $build_opts
diff --git a/.github/workflows/frontier_amd/test.sh b/.github/workflows/frontier_amd/test.sh
index ff65aa2b0e..654072a754 100644
--- a/.github/workflows/frontier_amd/test.sh
+++ b/.github/workflows/frontier_amd/test.sh
@@ -13,6 +13,28 @@ if [ "$job_device" = "gpu" ]; then
     fi
 fi
 
+# Build source code on compute node (deps already fetched on login node)
+max_attempts=3
+attempt=1
+while [ $attempt -le $max_attempts ]; do
+    echo "Build attempt $attempt of $max_attempts..."
+    if ./mfc.sh test -v -a --dry-run -j 8 $device_opts; then
+        echo "Build succeeded on attempt $attempt."
+        break
+    fi
+
+    if [ $attempt -lt $max_attempts ]; then
+        echo "Build failed on attempt $attempt. Cleaning and retrying in 30s..."
+        ./mfc.sh clean
+        sleep 30
+    else
+        echo "Build failed after $max_attempts attempts."
+        exit 1
+    fi
+    attempt=$((attempt + 1))
+done
+
+# Run tests
 if [ "$job_device" = "gpu" ]; then
     ./mfc.sh test -v -a --max-attempts 3 -j $ngpus $device_opts -- -c frontier_amd
 else
diff --git a/.github/workflows/phoenix/build.sh b/.github/workflows/phoenix/build.sh
new file mode 100644
index 0000000000..75bf6fc8a8
--- /dev/null
+++ b/.github/workflows/phoenix/build.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# Fetch dependencies on login node (internet access).
+# Source code is built on compute nodes via test.sh / bench.sh.
+
+job_device=$1
+job_interface=$2
+build_opts=""
+if [ "$job_device" = "gpu" ]; then
+  build_opts+="--gpu"
+  if [ "$job_interface" = "acc" ]; then
+      build_opts+=" acc"
+  elif [ "$job_interface" = "omp" ]; then
+      build_opts+=" mp"
+  fi
+fi
+
+. ./mfc.sh load -c p -m $job_device
+
+./mfc.sh build --deps-only -j $(nproc) $build_opts
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 0be51076ec..d79e61fecd 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -149,7 +149,7 @@ jobs:
     strategy:
       matrix:
         include:
-          # Phoenix (GT) — build+test combined in SLURM job
+          # Phoenix (GT) — deps on login node, build+test in SLURM job
           - runner:       'gt'
             cluster:      'phoenix'
             cluster_name: 'Georgia Tech | Phoenix'
@@ -165,7 +165,7 @@ jobs:
             cluster_name: 'Georgia Tech | Phoenix'
             device: 'cpu'
             interface: 'none'
-          # Frontier (ORNL) — build on login node, test via SLURM
+          # Frontier (ORNL) — deps on login node, build+test via SLURM
           - runner:       'frontier'
             cluster:      'frontier'
             cluster_name: 'Oak Ridge | Frontier'
@@ -181,7 +181,7 @@ jobs:
             cluster_name: 'Oak Ridge | Frontier'
             device: 'cpu'
             interface: 'none'
-          # Frontier AMD — build on login node, test via SLURM
+          # Frontier AMD — deps on login node, build+test via SLURM
           - runner:       'frontier'
             cluster:      'frontier_amd'
             cluster_name: 'Oak Ridge | Frontier (AMD)'
@@ -203,11 +203,10 @@ jobs:
       - name: Clone
         uses: actions/checkout@v4
 
-      - name: Build
-        if:   matrix.cluster != 'phoenix'
+      - name: Fetch Dependencies
         run:  bash .github/workflows/${{ matrix.cluster }}/build.sh ${{ matrix.device }} ${{ matrix.interface }}
 
-      - name: Test
+      - name: Build & Test
         run:  bash .github/workflows/${{ matrix.cluster }}/submit.sh .github/workflows/${{ matrix.cluster }}/test.sh ${{ matrix.device }} ${{ matrix.interface }}
 
       - name: Print Logs
diff --git a/toolchain/mfc/build.py b/toolchain/mfc/build.py
index 6430f7ad35..e55ba8c8f1 100644
--- a/toolchain/mfc/build.py
+++ b/toolchain/mfc/build.py
@@ -566,6 +566,12 @@ def __build_target(target: typing.Union[MFCTarget, str], case: input.MFCInputFil
 
     history.add(target.name)
 
+    # Dependencies are pinned to fixed versions. If already configured
+    # (built & installed by a prior --deps-only step), skip entirely
+    # to avoid re-entering the superbuild (which may access the network).
+    if target.isDependency and target.is_configured(case):
+        return
+
     for dep in target.requires.compute():
         # If we have already built and installed this target,
         # do not do so again. This can be inferred by whether
@@ -611,6 +617,26 @@ def build(targets = None, case: input.MFCInputFile = None, history: typing.Set[s
     case    = case or input.load(ARG("input"), ARG("--"), {})
     case.validate_params()
 
+    if ARG("deps_only", False):
+        all_deps = set()
+        for target in targets:
+            target = get_target(target)
+            for dep in target.requires.compute():
+                all_deps.add(dep)
+
+        if len(history) == 0:
+            cons.print(f"[bold]Fetch Dependencies | {format_list_to_string([d.name for d in all_deps], 'magenta', 'None')}[/bold]")
+            cons.print(no_indent=True)
+
+        if not all_deps:
+            cons.print("[yellow]No dependencies to build for the requested targets.[/yellow]")
+            return
+
+        for dep in all_deps:
+            __build_target(dep, case, history)
+
+        return
+
     if len(history) == 0:
         cons.print(__generate_header(case, targets))
         cons.print(no_indent=True)
diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py
index 8ad8c4bd07..bb9fddb76d 100644
--- a/toolchain/mfc/cli/commands.py
+++ b/toolchain/mfc/cli/commands.py
@@ -154,6 +154,13 @@
             default=False,
             dest="case_optimization",
         ),
+        Argument(
+            name="deps-only",
+            help="Only fetch and build dependencies, do not build MFC targets.",
+            action=ArgAction.STORE_TRUE,
+            default=False,
+            dest="deps_only",
+        ),
     ],
     examples=[
         Example("./mfc.sh build", "Build all default targets (CPU)"),

From 2474b4640d138b81c8dfd8b6dc5bc6b11f4ec1d7 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Thu, 12 Feb 2026 19:56:56 -0500
Subject: [PATCH 2/4] Address review feedback: sort deps, fix --deps-only -t
 <dep>, add --no-gpu consistency

- Sort dependency list for deterministic build order and log output
- Include dependency targets themselves in --deps-only (fixes --deps-only -t fftw doing nothing)
- Remove redundant get_target() call (targets already resolved)
- Add trap '' HUP to phoenix/build.sh for SSH resilience
- Add --no-gpu to CPU dry-run builds in frontier test scripts for consistency

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/frontier/test.sh     |  2 ++
 .github/workflows/frontier_amd/test.sh |  2 ++
 .github/workflows/phoenix/build.sh     |  3 +++
 toolchain/mfc/build.py                 | 11 +++++++----
 4 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/frontier/test.sh b/.github/workflows/frontier/test.sh
index ec790944c0..5ba2d18c1a 100644
--- a/.github/workflows/frontier/test.sh
+++ b/.github/workflows/frontier/test.sh
@@ -11,6 +11,8 @@ if [ "$job_device" = "gpu" ]; then
     elif [ "$job_interface" = "omp" ]; then
         device_opts+=" mp"
     fi
+else
+    device_opts+=" --no-gpu"
 fi
 
 # Build source code on compute node (deps already fetched on login node)
diff --git a/.github/workflows/frontier_amd/test.sh b/.github/workflows/frontier_amd/test.sh
index 654072a754..a3f9bd21fa 100644
--- a/.github/workflows/frontier_amd/test.sh
+++ b/.github/workflows/frontier_amd/test.sh
@@ -11,6 +11,8 @@ if [ "$job_device" = "gpu" ]; then
     elif [ "$job_interface" = "omp" ]; then
         device_opts+=" mp"
     fi
+else
+    device_opts+=" --no-gpu"
 fi
 
 # Build source code on compute node (deps already fetched on login node)
diff --git a/.github/workflows/phoenix/build.sh b/.github/workflows/phoenix/build.sh
index 75bf6fc8a8..8a850fca6f 100644
--- a/.github/workflows/phoenix/build.sh
+++ b/.github/workflows/phoenix/build.sh
@@ -3,6 +3,9 @@
 # Fetch dependencies on login node (internet access).
 # Source code is built on compute nodes via test.sh / bench.sh.
 
+# Ignore SIGHUP to survive login node session drops
+trap '' HUP
+
 job_device=$1
 job_interface=$2
 build_opts=""
diff --git a/toolchain/mfc/build.py b/toolchain/mfc/build.py
index e55ba8c8f1..8ed5e693b0 100644
--- a/toolchain/mfc/build.py
+++ b/toolchain/mfc/build.py
@@ -620,19 +620,22 @@ def build(targets = None, case: input.MFCInputFile = None, history: typing.Set[s
     if ARG("deps_only", False):
         all_deps = set()
         for target in targets:
-            target = get_target(target)
+            if target.isDependency:
+                all_deps.add(target)
             for dep in target.requires.compute():
                 all_deps.add(dep)
 
+        sorted_deps = sorted(all_deps, key=lambda t: t.name)
+
         if len(history) == 0:
-            cons.print(f"[bold]Fetch Dependencies | {format_list_to_string([d.name for d in all_deps], 'magenta', 'None')}[/bold]")
+            cons.print(f"[bold]Fetch Dependencies | {format_list_to_string([d.name for d in sorted_deps], 'magenta', 'None')}[/bold]")
             cons.print(no_indent=True)
 
-        if not all_deps:
+        if not sorted_deps:
             cons.print("[yellow]No dependencies to build for the requested targets.[/yellow]")
             return
 
-        for dep in all_deps:
+        for dep in sorted_deps:
             __build_target(dep, case, history)
 
         return

From 7179e09f58b7ea2f9e01b3b54a33e8869cf7128e Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Thu, 12 Feb 2026 20:34:08 -0500
Subject: [PATCH 3/4] Fix retry clean wiping pre-fetched deps; harden dep skip
 check

- Clean only source targets (not deps) in compute-node retry loops,
  so pre-fetched dependencies survive build failures on offline nodes
- Also check install dir exists (not just CMakeCache.txt) before
  skipping a dependency, guarding against configure-ok-but-build-failed

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/frontier/bench.sh     | 4 ++--
 .github/workflows/frontier/test.sh      | 4 ++--
 .github/workflows/frontier_amd/bench.sh | 4 ++--
 .github/workflows/frontier_amd/test.sh  | 4 ++--
 toolchain/mfc/build.py                  | 2 +-
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/frontier/bench.sh b/.github/workflows/frontier/bench.sh
index 13bbbddc2c..1aafa19e76 100644
--- a/.github/workflows/frontier/bench.sh
+++ b/.github/workflows/frontier/bench.sh
@@ -35,8 +35,8 @@ while [ $attempt -le $max_attempts ]; do
     fi
 
     if [ $attempt -lt $max_attempts ]; then
-        echo "Build failed on attempt $attempt. Cleaning and retrying in 30s..."
-        ./mfc.sh clean
+        echo "Build failed on attempt $attempt. Cleaning source targets and retrying in 30s..."
+        ./mfc.sh clean -t pre_process simulation post_process syscheck
         sleep 30
     else
         echo "Build failed after $max_attempts attempts."
diff --git a/.github/workflows/frontier/test.sh b/.github/workflows/frontier/test.sh
index 5ba2d18c1a..3240ced6b8 100644
--- a/.github/workflows/frontier/test.sh
+++ b/.github/workflows/frontier/test.sh
@@ -26,8 +26,8 @@ while [ $attempt -le $max_attempts ]; do
     fi
 
     if [ $attempt -lt $max_attempts ]; then
-        echo "Build failed on attempt $attempt. Cleaning and retrying in 30s..."
-        ./mfc.sh clean
+        echo "Build failed on attempt $attempt. Cleaning source targets and retrying in 30s..."
+        ./mfc.sh clean -t pre_process simulation post_process syscheck
         sleep 30
     else
         echo "Build failed after $max_attempts attempts."
diff --git a/.github/workflows/frontier_amd/bench.sh b/.github/workflows/frontier_amd/bench.sh
index fd263dd800..173251c679 100644
--- a/.github/workflows/frontier_amd/bench.sh
+++ b/.github/workflows/frontier_amd/bench.sh
@@ -35,8 +35,8 @@ while [ $attempt -le $max_attempts ]; do
     fi
 
     if [ $attempt -lt $max_attempts ]; then
-        echo "Build failed on attempt $attempt. Cleaning and retrying in 30s..."
-        ./mfc.sh clean
+        echo "Build failed on attempt $attempt. Cleaning source targets and retrying in 30s..."
+        ./mfc.sh clean -t pre_process simulation post_process syscheck
         sleep 30
     else
         echo "Build failed after $max_attempts attempts."
diff --git a/.github/workflows/frontier_amd/test.sh b/.github/workflows/frontier_amd/test.sh
index a3f9bd21fa..afc944b8d3 100644
--- a/.github/workflows/frontier_amd/test.sh
+++ b/.github/workflows/frontier_amd/test.sh
@@ -26,8 +26,8 @@ while [ $attempt -le $max_attempts ]; do
     fi
 
     if [ $attempt -lt $max_attempts ]; then
-        echo "Build failed on attempt $attempt. Cleaning and retrying in 30s..."
-        ./mfc.sh clean
+        echo "Build failed on attempt $attempt. Cleaning source targets and retrying in 30s..."
+        ./mfc.sh clean -t pre_process simulation post_process syscheck
         sleep 30
     else
         echo "Build failed after $max_attempts attempts."
diff --git a/toolchain/mfc/build.py b/toolchain/mfc/build.py
index 8ed5e693b0..eca3a4f2c0 100644
--- a/toolchain/mfc/build.py
+++ b/toolchain/mfc/build.py
@@ -569,7 +569,7 @@ def __build_target(target: typing.Union[MFCTarget, str], case: input.MFCInputFil
     # Dependencies are pinned to fixed versions. If already configured
     # (built & installed by a prior --deps-only step), skip entirely
     # to avoid re-entering the superbuild (which may access the network).
-    if target.isDependency and target.is_configured(case):
+    if target.isDependency and target.is_configured(case) and os.path.isdir(target.get_install_dirpath(case)):
         return
 
     for dep in target.requires.compute():

From e71b8cc3a25c65933a9cb2e51669202257a8aff3 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Fri, 13 Feb 2026 09:20:23 -0500
Subject: [PATCH 4/4] Fix Phoenix test/bench scripts: preserve deps on retry,
 add --no-gpu for CPU

Same fixes previously applied to Frontier scripts but missed for Phoenix:
- Use targeted clean (-t) to avoid wiping pre-fetched deps on retry
- Add --no-gpu for CPU test jobs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/phoenix/bench.sh | 4 ++--
 .github/workflows/phoenix/test.sh  | 7 +++++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/phoenix/bench.sh b/.github/workflows/phoenix/bench.sh
index 447bd710d9..b3e26178b9 100644
--- a/.github/workflows/phoenix/bench.sh
+++ b/.github/workflows/phoenix/bench.sh
@@ -48,8 +48,8 @@ while [ $attempt -le $max_attempts ]; do
     fi
 
     if [ $attempt -lt $max_attempts ]; then
-        echo "Build failed on attempt $attempt. Cleaning and retrying in 30s..."
-        ./mfc.sh clean
+        echo "Build failed on attempt $attempt. Cleaning source targets and retrying in 30s..."
+        ./mfc.sh clean -t pre_process simulation post_process syscheck
         sleep 30
     else
         echo "Build failed after $max_attempts attempts."
diff --git a/.github/workflows/phoenix/test.sh b/.github/workflows/phoenix/test.sh
index 3920d96ed7..1706713df3 100644
--- a/.github/workflows/phoenix/test.sh
+++ b/.github/workflows/phoenix/test.sh
@@ -10,6 +10,7 @@ if [ "$job_device" = "gpu" ]; then
     fi
 fi
 
+# Build source code on compute node (deps already fetched on login node)
 max_attempts=3
 attempt=1
 while [ $attempt -le $max_attempts ]; do
@@ -20,8 +21,8 @@ while [ $attempt -le $max_attempts ]; do
     fi
 
     if [ $attempt -lt $max_attempts ]; then
-        echo "Build failed on attempt $attempt. Cleaning and retrying in 30s..."
-        ./mfc.sh clean
+        echo "Build failed on attempt $attempt. Cleaning source targets and retrying in 30s..."
+        ./mfc.sh clean -t pre_process simulation post_process syscheck
         sleep 30
     else
         echo "Build failed after $max_attempts attempts."
@@ -37,6 +38,8 @@ if [ "$job_device" = "gpu" ]; then
     gpu_ids=$(seq -s ' ' 0 $(($gpu_count-1))) # 0,1,2,...,gpu_count-1
     device_opts="-g $gpu_ids"
     n_test_threads=`expr $gpu_count \* 2`
+else
+    device_opts="--no-gpu"
 fi
 
 ./mfc.sh test -v --max-attempts 3 -a -j $n_test_threads $device_opts -- -c phoenix