From 5a6a2a4fff3df6ea22c1095807f7ba3ab0dbec9e Mon Sep 17 00:00:00 2001 From: Sydney Lister Date: Thu, 5 Mar 2026 17:24:53 -0500 Subject: [PATCH 1/3] [Evaluation] Recover partial red team results when Foundry execution raises When orchestrator.execute() raises (e.g., ConnectTimeout on 1 of 50 objectives), attempt to recover partial results from the orchestrator before falling back to the empty-result error path. Previously, any single objective failure caused the entire risk category's results to be discarded (data_file set to empty string, 0 results returned). Now, completed objectives are processed through the normal FoundryResultProcessor pipeline and included in the final output. The error is demoted from ERROR to WARNING when partial results are available, since it is not a total failure. The original full-failure path is preserved when get_attack_results() returns empty. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../red_team/_foundry/_execution_manager.py | 37 +++++++++++++------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_foundry/_execution_manager.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_foundry/_execution_manager.py index 2f6655930e93..c2f05e8432e4 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_foundry/_execution_manager.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_foundry/_execution_manager.py @@ -162,19 +162,32 @@ async def execute_attacks( include_baseline=include_baseline, ) except Exception as e: - self.logger.error(f"Error executing attacks for {risk_value}: {e}") - # Use "Foundry" as fallback strategy name to match expected structure - if "Foundry" not in red_team_info: - red_team_info["Foundry"] = {} - red_team_info["Foundry"][risk_value] = { - "data_file": "", - "status": "failed", - "error": str(e), - "asr": 0.0, - } - continue + # Attempt to recover partial results before giving up + partial_results = [] + try: + partial_results = orchestrator.get_attack_results() + except Exception: + pass + + if partial_results: + self.logger.warning( + f"Partial failure executing attacks for {risk_value}: {e}. " + f"Recovered {len(partial_results)} partial results." + ) + else: + self.logger.error(f"Error executing attacks for {risk_value}: {e}") + # No results recoverable — use empty fallback + if "Foundry" not in red_team_info: + red_team_info["Foundry"] = {} + red_team_info["Foundry"][risk_value] = { + "data_file": "", + "status": "failed", + "error": str(e), + "asr": 0.0, + } + continue - # Process results + # Process results (handles both full success and partial recovery) result_processor = FoundryResultProcessor( scenario=orchestrator, dataset_config=dataset_config, From 7af9843af25065bd99e4d461b49cda65864b3465 Mon Sep 17 00:00:00 2001 From: Sydney Lister Date: Mon, 9 Mar 2026 11:50:27 -0400 Subject: [PATCH 2/3] Address review comments: add debug logging, structured partial_failure info Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../red_team/_foundry/_execution_manager.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_foundry/_execution_manager.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_foundry/_execution_manager.py index c2f05e8432e4..5fb34a69d043 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_foundry/_execution_manager.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_foundry/_execution_manager.py @@ -162,18 +162,33 @@ async def execute_attacks( include_baseline=include_baseline, ) except Exception as e: - # Attempt to recover partial results before giving up + # Attempt to recover partial results before giving up. + # partial_results is used only as a truthiness check here; + # FoundryResultProcessor re-retrieves results via orchestrator.get_attack_results(). partial_results = [] try: partial_results = orchestrator.get_attack_results() except Exception: - pass + self.logger.debug( + "Failed to recover partial results for %s", risk_value, exc_info=True + ) if partial_results: self.logger.warning( f"Partial failure executing attacks for {risk_value}: {e}. " f"Recovered {len(partial_results)} partial results." ) + # Record partial failure in structured output so callers + # relying on red_team_info can observe it. + if "Foundry" not in red_team_info: + red_team_info["Foundry"] = {} + red_team_info["Foundry"][risk_value] = { + "data_file": "", + "status": "partial_failure", + "error": str(e), + "partial_failure": True, + "asr": 0.0, + } else: self.logger.error(f"Error executing attacks for {risk_value}: {e}") # No results recoverable — use empty fallback From 2cbb53641ac2000f08fb6d2dc2880711a601dcdb Mon Sep 17 00:00:00 2001 From: Sydney Lister Date: Mon, 9 Mar 2026 15:15:30 -0400 Subject: [PATCH 3/3] Apply black formatting Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../ai/evaluation/red_team/_foundry/_execution_manager.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_foundry/_execution_manager.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_foundry/_execution_manager.py index 5fb34a69d043..61c01c56b352 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_foundry/_execution_manager.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_foundry/_execution_manager.py @@ -169,9 +169,7 @@ async def execute_attacks( try: partial_results = orchestrator.get_attack_results() except Exception: - self.logger.debug( - "Failed to recover partial results for %s", risk_value, exc_info=True - ) + self.logger.debug("Failed to recover partial results for %s", risk_value, exc_info=True) if partial_results: self.logger.warning(