From 8329e079e9dd683b83df812a17655e446af25f1b Mon Sep 17 00:00:00 2001
From: Richard Abrich <richard.abrich@gmail.com>
Date: Fri, 6 Mar 2026 16:12:16 -0500
Subject: [PATCH 1/2] feat: add Win32 API foreground check as alternative to
 a11y-based detection

Add _check_foreground_win32() method that uses GetForegroundWindow() +
GetWindowText() via PowerShell P/Invoke for fast, reliable foreground
window title checking. This replaces the slow a11y-based check as the
default, while keeping a11y available via the focus_check_method config.

- New config field: focus_check_method (win32, a11y, or both)
- New CLI flag: --focus-check-method for run and live subcommands
- Detection of known-bad foreground states (Document Recovery, Start Center)
- Dispatch method routes to win32, a11y, or both (win32 first, a11y fallback)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 openadapt_evals/adapters/waa/live.py | 120 ++++++++++++++++++++++++++-
 openadapt_evals/benchmarks/cli.py    |   9 ++
 2 files changed, 128 insertions(+), 1 deletion(-)

diff --git a/openadapt_evals/adapters/waa/live.py b/openadapt_evals/adapters/waa/live.py
index a20e499..d901620 100644
--- a/openadapt_evals/adapters/waa/live.py
+++ b/openadapt_evals/adapters/waa/live.py
@@ -384,6 +384,7 @@ class WAALiveConfig:
     waa_image_version: str | None = None
     strict_setup_readiness: bool = False
     setup_readiness_retries: int = 3
+    focus_check_method: str = "win32"  # "win32", "a11y", or "both"
 
 
 class WAALiveAdapter(BenchmarkAdapter):
@@ -1997,7 +1998,7 @@ def _try_activate_patterns(
                     )
                     if resp.status_code == 200:
                         time.sleep(0.5)
-                        if self._check_foreground_matches(patterns, requests_module):
+                        if self._check_foreground_dispatch(patterns, requests_module):
                             logger.info(
                                 "Post-setup focus: activated '%s' on attempt %d",
                                 pattern,
@@ -2022,6 +2023,123 @@ def _try_activate_patterns(
                 time.sleep(delay)
         return False
 
+    # Known-bad foreground window titles that indicate the app is not ready.
+    _BAD_FOREGROUND_TITLES = [
+        "document recovery",
+        "libreoffice start center",
+    ]
+
+    def _check_foreground_win32(self, patterns: list[str]) -> bool:
+        """Check foreground window title using Win32 API (fast, reliable).
+
+        Runs a minimal PowerShell script that calls ``GetForegroundWindow()``
+        and ``GetWindowText()`` via P/Invoke to retrieve the current foreground
+        window title, then checks whether it contains any of the expected
+        patterns (case-insensitive).
+
+        Args:
+            patterns: Window title substrings to match (case-insensitive).
+
+        Returns:
+            True if the foreground window title contains any of the patterns.
+        """
+        script = r"""
+Add-Type -TypeDefinition @"
+using System;
+using System.Runtime.InteropServices;
+using System.Text;
+public static class FgWin {
+  [DllImport("user32.dll")] public static extern IntPtr GetForegroundWindow();
+  [DllImport("user32.dll", CharSet=CharSet.Unicode)]
+  public static extern int GetWindowText(IntPtr hWnd, StringBuilder text, int count);
+  public static string GetTitle() {
+    var sb = new StringBuilder(512);
+    GetWindowText(GetForegroundWindow(), sb, sb.Capacity);
+    return sb.ToString();
+  }
+}
+"@
+[FgWin]::GetTitle()
+"""
+        try:
+            output = self.run_powershell(script).strip()
+            # Take the last non-empty line (PowerShell may emit warnings before).
+            title = ""
+            for line in reversed(output.splitlines()):
+                line = line.strip()
+                if line:
+                    title = line
+                    break
+
+            self._last_foreground_title = title
+
+            # Detect known-bad foreground states.
+            title_lower = title.lower()
+            if not title:
+                logger.warning(
+                    "Win32 foreground check: window title is empty/blank"
+                )
+                return False
+            for bad in self._BAD_FOREGROUND_TITLES:
+                if bad in title_lower:
+                    logger.warning(
+                        "Win32 foreground check: detected known-bad title '%s'",
+                        title[:120],
+                    )
+                    return False
+
+            for pattern in patterns:
+                if pattern.lower() in title_lower:
+                    logger.debug(
+                        "Win32 foreground check: matched '%s' in '%s'",
+                        pattern,
+                        title[:100],
+                    )
+                    return True
+            logger.debug(
+                "Win32 foreground check: no pattern matched in '%s'",
+                title[:100],
+            )
+        except Exception as e:
+            logger.debug("Win32 foreground check failed: %s", e)
+        return False
+
+    def _check_foreground_dispatch(
+        self,
+        patterns: list[str],
+        requests_module: Any,
+    ) -> bool:
+        """Dispatch foreground check based on configured method.
+
+        Args:
+            patterns: Window title substrings to match (case-insensitive).
+            requests_module: The ``requests`` module (needed for a11y fallback).
+
+        Returns:
+            True if the foreground window matches any pattern.
+        """
+        method = self.config.focus_check_method
+
+        if method == "win32":
+            return self._check_foreground_win32(patterns)
+        elif method == "a11y":
+            return self._check_foreground_matches(patterns, requests_module)
+        elif method == "both":
+            # Try fast Win32 first; fall back to a11y if it fails.
+            result = self._check_foreground_win32(patterns)
+            if result:
+                return True
+            logger.debug(
+                "Win32 foreground check negative; falling back to a11y"
+            )
+            return self._check_foreground_matches(patterns, requests_module)
+        else:
+            logger.warning(
+                "Unknown focus_check_method '%s'; defaulting to win32",
+                method,
+            )
+            return self._check_foreground_win32(patterns)
+
     def _check_foreground_matches(
         self,
         patterns: list[str],
diff --git a/openadapt_evals/benchmarks/cli.py b/openadapt_evals/benchmarks/cli.py
index 5c0178a..d0dc994 100644
--- a/openadapt_evals/benchmarks/cli.py
+++ b/openadapt_evals/benchmarks/cli.py
@@ -345,6 +345,7 @@ def cmd_run(args: argparse.Namespace) -> int:
         clean_desktop=getattr(args, "clean_desktop", False),
         force_tray_icons=getattr(args, "force_tray_icons", False),
         waa_image_version=getattr(args, "waa_image_version", None),
+        focus_check_method=getattr(args, "focus_check_method", "win32"),
     )
     adapter = WAALiveAdapter(config)
 
@@ -551,6 +552,7 @@ def cmd_live(args: argparse.Namespace) -> int:
         clean_desktop=getattr(args, "clean_desktop", False),
         force_tray_icons=getattr(args, "force_tray_icons", False),
         waa_image_version=getattr(args, "waa_image_version", None),
+        focus_check_method=getattr(args, "focus_check_method", "win32"),
     )
     adapter = WAALiveAdapter(config)
 
@@ -961,6 +963,7 @@ def patch_evaluate_endpoint() -> bool:
                 clean_desktop=getattr(args, "clean_desktop", False),
                 force_tray_icons=getattr(args, "force_tray_icons", False),
                 waa_image_version=getattr(args, "waa_image_version", None),
+                focus_check_method=getattr(args, "focus_check_method", "win32"),
             )
         )
 
@@ -2426,6 +2429,9 @@ def main() -> int:
                            help="Max times to override premature 'done' (default: 3)")
     run_parser.add_argument("--done-gate-threshold", type=float, default=1.0,
                            help="Minimum score to accept 'done' (default: 1.0)")
+    run_parser.add_argument("--focus-check-method", type=str, default="win32",
+                           choices=["win32", "a11y", "both"],
+                           help="Method for foreground window check: win32 (fast, default), a11y, or both")
 
     # Live evaluation (full control)
     live_parser = subparsers.add_parser("live", help="Run live evaluation against WAA server (full control)")
@@ -2460,6 +2466,9 @@ def main() -> int:
                             help="Max times to override premature 'done' (default: 3)")
     live_parser.add_argument("--done-gate-threshold", type=float, default=1.0,
                             help="Minimum score to accept 'done' (default: 1.0)")
+    live_parser.add_argument("--focus-check-method", type=str, default="win32",
+                            choices=["win32", "a11y", "both"],
+                            help="Method for foreground window check: win32 (fast, default), a11y, or both")
 
     # Probe server
     probe_parser = subparsers.add_parser("probe", help="Check if WAA server is reachable")

From db0a6015f5382a59c6f2412c98d9169f559d7b8d Mon Sep 17 00:00:00 2001
From: Richard Abrich <richard.abrich@gmail.com>
Date: Fri, 6 Mar 2026 16:46:41 -0500
Subject: [PATCH 2/2] test: update setup handler tests to mock win32 foreground
 check

The focus check default changed from a11y to win32, so tests need to
mock run_powershell instead of requests.get for the /accessibility
endpoint.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/test_setup_handlers.py | 51 +++++++++++++++---------------------
 1 file changed, 21 insertions(+), 30 deletions(-)

diff --git a/tests/test_setup_handlers.py b/tests/test_setup_handlers.py
index a47c7f1..b387cb5 100644
--- a/tests/test_setup_handlers.py
+++ b/tests/test_setup_handlers.py
@@ -597,9 +597,9 @@ def test_calls_activate_window_with_patterns(self):
         adapter = self._make_adapter()
 
         # Simulate activate_window success (/setup) and foreground check success
-        # via accessibility endpoint (/accessibility).
+        # via win32 API (default focus_check_method).
         setup_calls = []
-        a11y_calls = []
+        ps_calls = []
 
         def _fake_post(url, **kwargs):
             setup_calls.append(url)
@@ -609,30 +609,27 @@ def _fake_post(url, **kwargs):
             resp.text = '{"results": [{"type": "activate_window", "status": "ok"}]}'
             return resp
 
-        def _fake_get(url, **kwargs):
-            a11y_calls.append(url)
-            resp = MagicMock()
-            resp.status_code = 200
-            resp.json.return_value = {"AT": {"name": "LibreOffice Calc - data.xlsx"}}
-            return resp
+        def _fake_powershell(script, **kwargs):
+            ps_calls.append(script)
+            return "LibreOffice Calc - data.xlsx"
 
         with patch("requests.post", side_effect=_fake_post), \
-             patch("requests.get", side_effect=_fake_get), \
+             patch.object(adapter, "run_powershell", side_effect=_fake_powershell), \
              patch("time.sleep"):
             adapter._ensure_app_focused({
                 "related_apps": ["libreoffice_calc"],
             })
 
-        # Should have called activate_window at least once and check at least once
+        # Should have called activate_window at least once and win32 check at least once
         assert len(setup_calls) >= 1
-        assert len(a11y_calls) >= 1
+        assert len(ps_calls) >= 1
 
     def test_retries_on_foreground_mismatch(self):
         """Retries when foreground check does not match expected pattern."""
         adapter = self._make_adapter()
 
         setup_calls = []
-        a11y_calls = []
+        ps_calls = []
 
         def _fake_post(url, **kwargs):
             setup_calls.append(url)
@@ -642,24 +639,21 @@ def _fake_post(url, **kwargs):
             resp.text = '{"results": [{"type": "activate_window", "status": "ok"}]}'
             return resp
 
-        def _fake_get(url, **kwargs):
-            a11y_calls.append(url)
-            resp = MagicMock()
-            resp.status_code = 200
+        def _fake_powershell(script, **kwargs):
+            ps_calls.append(script)
             # Always report desktop as foreground (mismatch).
-            resp.json.return_value = {"AT": {"name": "Desktop"}}
-            return resp
+            return "Desktop"
 
         with patch("requests.post", side_effect=_fake_post), \
-             patch("requests.get", side_effect=_fake_get), \
+             patch.object(adapter, "run_powershell", side_effect=_fake_powershell), \
              patch("time.sleep"):
             adapter._ensure_app_focused({
                 "related_apps": ["notepad"],
             })
 
-        # Should have tried multiple setup activations and foreground checks.
+        # Should have tried multiple setup activations and win32 foreground checks.
         assert len(setup_calls) >= 3  # At least 3 retry attempts
-        assert len(a11y_calls) >= 3  # At least 3 foreground checks
+        assert len(ps_calls) >= 3  # At least 3 foreground checks
 
     def test_succeeds_on_second_attempt(self):
         """If first attempt fails but second succeeds, returns after second."""
@@ -674,20 +668,17 @@ def _fake_post(url, **kwargs):
             resp.text = '{"results": []}'
             return resp
 
-        def _fake_get(url, **kwargs):
+        def _fake_powershell(script, **kwargs):
             attempt_count[0] += 1
-            resp = MagicMock()
-            resp.status_code = 200
             if attempt_count[0] <= 2:
-                # First attempt: wrong window
-                resp.json.return_value = {"AT": {"name": "Desktop"}}
+                # First attempts: wrong window
+                return "Desktop"
             else:
-                # Second attempt: correct window
-                resp.json.return_value = {"AT": {"name": "LibreOffice Calc"}}
-            return resp
+                # Later attempt: correct window
+                return "LibreOffice Calc"
 
         with patch("requests.post", side_effect=_fake_post), \
-             patch("requests.get", side_effect=_fake_get), \
+             patch.object(adapter, "run_powershell", side_effect=_fake_powershell), \
              patch("time.sleep"):
             adapter._ensure_app_focused({
                 "related_apps": ["libreoffice_calc"],