From 8329e079e9dd683b83df812a17655e446af25f1b Mon Sep 17 00:00:00 2001 From: Richard Abrich Date: Fri, 6 Mar 2026 16:12:16 -0500 Subject: [PATCH 1/2] feat: add Win32 API foreground check as alternative to a11y-based detection Add _check_foreground_win32() method that uses GetForegroundWindow() + GetWindowText() via PowerShell P/Invoke for fast, reliable foreground window title checking. This replaces the slow a11y-based check as the default, while keeping a11y available via the focus_check_method config. - New config field: focus_check_method (win32, a11y, or both) - New CLI flag: --focus-check-method for run and live subcommands - Detection of known-bad foreground states (Document Recovery, Start Center) - Dispatch method routes to win32, a11y, or both (win32 first, a11y fallback) Co-Authored-By: Claude Opus 4.6 --- openadapt_evals/adapters/waa/live.py | 120 ++++++++++++++++++++++++++- openadapt_evals/benchmarks/cli.py | 9 ++ 2 files changed, 128 insertions(+), 1 deletion(-) diff --git a/openadapt_evals/adapters/waa/live.py b/openadapt_evals/adapters/waa/live.py index a20e499..d901620 100644 --- a/openadapt_evals/adapters/waa/live.py +++ b/openadapt_evals/adapters/waa/live.py @@ -384,6 +384,7 @@ class WAALiveConfig: waa_image_version: str | None = None strict_setup_readiness: bool = False setup_readiness_retries: int = 3 + focus_check_method: str = "win32" # "win32", "a11y", or "both" class WAALiveAdapter(BenchmarkAdapter): @@ -1997,7 +1998,7 @@ def _try_activate_patterns( ) if resp.status_code == 200: time.sleep(0.5) - if self._check_foreground_matches(patterns, requests_module): + if self._check_foreground_dispatch(patterns, requests_module): logger.info( "Post-setup focus: activated '%s' on attempt %d", pattern, @@ -2022,6 +2023,123 @@ def _try_activate_patterns( time.sleep(delay) return False + # Known-bad foreground window titles that indicate the app is not ready. + _BAD_FOREGROUND_TITLES = [ + "document recovery", + "libreoffice start center", + ] + + def _check_foreground_win32(self, patterns: list[str]) -> bool: + """Check foreground window title using Win32 API (fast, reliable). + + Runs a minimal PowerShell script that calls ``GetForegroundWindow()`` + and ``GetWindowText()`` via P/Invoke to retrieve the current foreground + window title, then checks whether it contains any of the expected + patterns (case-insensitive). + + Args: + patterns: Window title substrings to match (case-insensitive). + + Returns: + True if the foreground window title contains any of the patterns. + """ + script = r""" +Add-Type -TypeDefinition @" +using System; +using System.Runtime.InteropServices; +using System.Text; +public static class FgWin { + [DllImport("user32.dll")] public static extern IntPtr GetForegroundWindow(); + [DllImport("user32.dll", CharSet=CharSet.Unicode)] + public static extern int GetWindowText(IntPtr hWnd, StringBuilder text, int count); + public static string GetTitle() { + var sb = new StringBuilder(512); + GetWindowText(GetForegroundWindow(), sb, sb.Capacity); + return sb.ToString(); + } +} +"@ +[FgWin]::GetTitle() +""" + try: + output = self.run_powershell(script).strip() + # Take the last non-empty line (PowerShell may emit warnings before). + title = "" + for line in reversed(output.splitlines()): + line = line.strip() + if line: + title = line + break + + self._last_foreground_title = title + + # Detect known-bad foreground states. + title_lower = title.lower() + if not title: + logger.warning( + "Win32 foreground check: window title is empty/blank" + ) + return False + for bad in self._BAD_FOREGROUND_TITLES: + if bad in title_lower: + logger.warning( + "Win32 foreground check: detected known-bad title '%s'", + title[:120], + ) + return False + + for pattern in patterns: + if pattern.lower() in title_lower: + logger.debug( + "Win32 foreground check: matched '%s' in '%s'", + pattern, + title[:100], + ) + return True + logger.debug( + "Win32 foreground check: no pattern matched in '%s'", + title[:100], + ) + except Exception as e: + logger.debug("Win32 foreground check failed: %s", e) + return False + + def _check_foreground_dispatch( + self, + patterns: list[str], + requests_module: Any, + ) -> bool: + """Dispatch foreground check based on configured method. + + Args: + patterns: Window title substrings to match (case-insensitive). + requests_module: The ``requests`` module (needed for a11y fallback). + + Returns: + True if the foreground window matches any pattern. + """ + method = self.config.focus_check_method + + if method == "win32": + return self._check_foreground_win32(patterns) + elif method == "a11y": + return self._check_foreground_matches(patterns, requests_module) + elif method == "both": + # Try fast Win32 first; fall back to a11y if it fails. + result = self._check_foreground_win32(patterns) + if result: + return True + logger.debug( + "Win32 foreground check negative; falling back to a11y" + ) + return self._check_foreground_matches(patterns, requests_module) + else: + logger.warning( + "Unknown focus_check_method '%s'; defaulting to win32", + method, + ) + return self._check_foreground_win32(patterns) + def _check_foreground_matches( self, patterns: list[str], diff --git a/openadapt_evals/benchmarks/cli.py b/openadapt_evals/benchmarks/cli.py index 5c0178a..d0dc994 100644 --- a/openadapt_evals/benchmarks/cli.py +++ b/openadapt_evals/benchmarks/cli.py @@ -345,6 +345,7 @@ def cmd_run(args: argparse.Namespace) -> int: clean_desktop=getattr(args, "clean_desktop", False), force_tray_icons=getattr(args, "force_tray_icons", False), waa_image_version=getattr(args, "waa_image_version", None), + focus_check_method=getattr(args, "focus_check_method", "win32"), ) adapter = WAALiveAdapter(config) @@ -551,6 +552,7 @@ def cmd_live(args: argparse.Namespace) -> int: clean_desktop=getattr(args, "clean_desktop", False), force_tray_icons=getattr(args, "force_tray_icons", False), waa_image_version=getattr(args, "waa_image_version", None), + focus_check_method=getattr(args, "focus_check_method", "win32"), ) adapter = WAALiveAdapter(config) @@ -961,6 +963,7 @@ def patch_evaluate_endpoint() -> bool: clean_desktop=getattr(args, "clean_desktop", False), force_tray_icons=getattr(args, "force_tray_icons", False), waa_image_version=getattr(args, "waa_image_version", None), + focus_check_method=getattr(args, "focus_check_method", "win32"), ) ) @@ -2426,6 +2429,9 @@ def main() -> int: help="Max times to override premature 'done' (default: 3)") run_parser.add_argument("--done-gate-threshold", type=float, default=1.0, help="Minimum score to accept 'done' (default: 1.0)") + run_parser.add_argument("--focus-check-method", type=str, default="win32", + choices=["win32", "a11y", "both"], + help="Method for foreground window check: win32 (fast, default), a11y, or both") # Live evaluation (full control) live_parser = subparsers.add_parser("live", help="Run live evaluation against WAA server (full control)") @@ -2460,6 +2466,9 @@ def main() -> int: help="Max times to override premature 'done' (default: 3)") live_parser.add_argument("--done-gate-threshold", type=float, default=1.0, help="Minimum score to accept 'done' (default: 1.0)") + live_parser.add_argument("--focus-check-method", type=str, default="win32", + choices=["win32", "a11y", "both"], + help="Method for foreground window check: win32 (fast, default), a11y, or both") # Probe server probe_parser = subparsers.add_parser("probe", help="Check if WAA server is reachable") From db0a6015f5382a59c6f2412c98d9169f559d7b8d Mon Sep 17 00:00:00 2001 From: Richard Abrich Date: Fri, 6 Mar 2026 16:46:41 -0500 Subject: [PATCH 2/2] test: update setup handler tests to mock win32 foreground check The focus check default changed from a11y to win32, so tests need to mock run_powershell instead of requests.get for the /accessibility endpoint. Co-Authored-By: Claude Opus 4.6 --- tests/test_setup_handlers.py | 51 +++++++++++++++--------------------- 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/tests/test_setup_handlers.py b/tests/test_setup_handlers.py index a47c7f1..b387cb5 100644 --- a/tests/test_setup_handlers.py +++ b/tests/test_setup_handlers.py @@ -597,9 +597,9 @@ def test_calls_activate_window_with_patterns(self): adapter = self._make_adapter() # Simulate activate_window success (/setup) and foreground check success - # via accessibility endpoint (/accessibility). + # via win32 API (default focus_check_method). setup_calls = [] - a11y_calls = [] + ps_calls = [] def _fake_post(url, **kwargs): setup_calls.append(url) @@ -609,30 +609,27 @@ def _fake_post(url, **kwargs): resp.text = '{"results": [{"type": "activate_window", "status": "ok"}]}' return resp - def _fake_get(url, **kwargs): - a11y_calls.append(url) - resp = MagicMock() - resp.status_code = 200 - resp.json.return_value = {"AT": {"name": "LibreOffice Calc - data.xlsx"}} - return resp + def _fake_powershell(script, **kwargs): + ps_calls.append(script) + return "LibreOffice Calc - data.xlsx" with patch("requests.post", side_effect=_fake_post), \ - patch("requests.get", side_effect=_fake_get), \ + patch.object(adapter, "run_powershell", side_effect=_fake_powershell), \ patch("time.sleep"): adapter._ensure_app_focused({ "related_apps": ["libreoffice_calc"], }) - # Should have called activate_window at least once and check at least once + # Should have called activate_window at least once and win32 check at least once assert len(setup_calls) >= 1 - assert len(a11y_calls) >= 1 + assert len(ps_calls) >= 1 def test_retries_on_foreground_mismatch(self): """Retries when foreground check does not match expected pattern.""" adapter = self._make_adapter() setup_calls = [] - a11y_calls = [] + ps_calls = [] def _fake_post(url, **kwargs): setup_calls.append(url) @@ -642,24 +639,21 @@ def _fake_post(url, **kwargs): resp.text = '{"results": [{"type": "activate_window", "status": "ok"}]}' return resp - def _fake_get(url, **kwargs): - a11y_calls.append(url) - resp = MagicMock() - resp.status_code = 200 + def _fake_powershell(script, **kwargs): + ps_calls.append(script) # Always report desktop as foreground (mismatch). - resp.json.return_value = {"AT": {"name": "Desktop"}} - return resp + return "Desktop" with patch("requests.post", side_effect=_fake_post), \ - patch("requests.get", side_effect=_fake_get), \ + patch.object(adapter, "run_powershell", side_effect=_fake_powershell), \ patch("time.sleep"): adapter._ensure_app_focused({ "related_apps": ["notepad"], }) - # Should have tried multiple setup activations and foreground checks. + # Should have tried multiple setup activations and win32 foreground checks. assert len(setup_calls) >= 3 # At least 3 retry attempts - assert len(a11y_calls) >= 3 # At least 3 foreground checks + assert len(ps_calls) >= 3 # At least 3 foreground checks def test_succeeds_on_second_attempt(self): """If first attempt fails but second succeeds, returns after second.""" @@ -674,20 +668,17 @@ def _fake_post(url, **kwargs): resp.text = '{"results": []}' return resp - def _fake_get(url, **kwargs): + def _fake_powershell(script, **kwargs): attempt_count[0] += 1 - resp = MagicMock() - resp.status_code = 200 if attempt_count[0] <= 2: - # First attempt: wrong window - resp.json.return_value = {"AT": {"name": "Desktop"}} + # First attempts: wrong window + return "Desktop" else: - # Second attempt: correct window - resp.json.return_value = {"AT": {"name": "LibreOffice Calc"}} - return resp + # Later attempt: correct window + return "LibreOffice Calc" with patch("requests.post", side_effect=_fake_post), \ - patch("requests.get", side_effect=_fake_get), \ + patch.object(adapter, "run_powershell", side_effect=_fake_powershell), \ patch("time.sleep"): adapter._ensure_app_focused({ "related_apps": ["libreoffice_calc"],