From 2b93ad84bf4abaa77da915d4bf6ad39a5656f640 Mon Sep 17 00:00:00 2001
From: Daniel Weindl <daniel.weindl@uni-bonn.de>
Date: Mon, 17 Nov 2025 22:12:38 +0100
Subject: [PATCH] Fix import performance regression and update time limits

Unfortunately, https://github.com/AMICI-dev/AMICI/pull/3030 doubled the import
time for our performance test model, but the check there was too lax to catch it.

Sometimes, `smart_subs_dict` is applied to individual expressions,
sometimes to complete matrices. If it's only applied to individual expressions
or small matrices, avoid the overhead of flattening the substitutions first.

Update the time limits for the performance tests to catch such regressions earlier.

Fixes #3048.
---
 python/sdist/amici/importers/utils.py | 82 ++++++++++++++++++++-------
 python/tests/test_misc.py             | 11 +++-
 tests/performance/reference.yml       |  8 +--
 3 files changed, 73 insertions(+), 28 deletions(-)

diff --git a/python/sdist/amici/importers/utils.py b/python/sdist/amici/importers/utils.py
index 2ad7e32322..ff05260d81 100644
--- a/python/sdist/amici/importers/utils.py
+++ b/python/sdist/amici/importers/utils.py
@@ -400,6 +400,7 @@ def smart_subs_dict(
     subs: SymbolDef,
     field: str | None = None,
     reverse: bool = True,
+    flatten_first: bool | None = None,
 ) -> sp.Expr:
     """
     Substitutes expressions completely flattening them out. Requires
@@ -418,6 +419,11 @@ def smart_subs_dict(
         Whether ordering in subs should be reversed. Note that substitution
         requires the reverse order of what is required for evaluation.
 
+    :param flatten_first:
+        Choice of algorithm: Flatten the substitution expressions first, then
+        substitute them simultaneously into `sym` (``True``), or substitute
+        them one by one into `sym` (``False``).
+
     :return:
         Substituted symbolic expression
     """
@@ -426,27 +432,61 @@ def smart_subs_dict(
     else:
         s = [(eid, expr[field]) for eid, expr in subs.items()]
 
-    if not reverse:
-        # counter-intuitive, but we need to reverse the order for reverse=False
-        s.reverse()
-
-    with sp.evaluate(False):
-        # The new expressions may themselves contain symbols to be substituted.
-        #  We flatten them out first, so that the substitutions in `sym` can be
-        #  performed simultaneously, which is usually more efficient than
-        #  repeatedly substituting into `sym`.
-        # TODO(performance): This could probably be made more efficient by
-        #   combining with toposort used to order `subs` in the first place.
-        #   Some substitutions could be combined, and some terms not present in
-        #   `sym` could be skipped.
-        for i in range(len(s) - 1):
-            for j in range(i + 1, len(s)):
-                if s[j][1].has(s[i][0]):
-                    s[j] = s[j][0], s[j][1].xreplace({s[i][0]: s[i][1]})
-
-        s = dict(s)
-        sym = sym.xreplace(s)
-    return sym
+    # We have the choice to flatten the replacement expressions first or to
+    #  substitute them one by one into `sym`. Flattening first is usually
+    #  more efficient if `sym` is large (e.g., a matrix with many elements)
+    #  and `subs` is cascading (i.e., substitutions depend on other
+    #  substitutions). Otherwise, substituting one by one is usually more
+    #  efficient, because flattening scales quadratically with the number of
+    #  substitutions.
+    #  The exact threshold is somewhat arbitrary and may need to be
+    #  adjusted in the future.
+    if flatten_first is None:
+        flatten_first = (
+            isinstance(sym, sp.MatrixBase) and sym.rows * sym.cols > 20
+        )
+
+    if flatten_first:
+        if not reverse:
+            # counter-intuitive, but on this branch, we need to reverse the
+            #  order for `reverse=False`
+            s.reverse()
+
+        with sp.evaluate(False):
+            # The new expressions may themselves contain symbols to be
+            #  substituted. We flatten them out first, so that the
+            #  substitutions in `sym` can be performed simultaneously,
+            #  which can be more efficient than repeatedly substituting into
+            #  `sym`.
+            # TODO(performance): This could probably be made more efficient by
+            #  combining with toposort used to order `subs` in the first
+            #  place.
+            #  Some substitutions could be combined, and some terms not
+            #  present in `sym` could be skipped.
+            #  Furthermore, this would provide information on recursion depth,
+            #  which might help decide which strategy is more efficient.
+            #  For flat hierarchies, substituting one by one is most likely
+            #  more efficient.
+            for i in range(len(s) - 1):
+                for j in range(i + 1, len(s)):
+                    if s[j][1].has(s[i][0]):
+                        s[j] = s[j][0], s[j][1].xreplace({s[i][0]: s[i][1]})
+
+            s = dict(s)
+            sym = sym.xreplace(s)
+        return sym
+
+    else:
+        if reverse:
+            s.reverse()
+
+        with sp.evaluate(False):
+            for old, new in s:
+                # note that substitution may change free symbols,
+                #  so we have to do this recursively
+                if sym.has(old):
+                    sym = sym.xreplace({old: new})
+        return sym
 
 
 def smart_subs(element: sp.Expr, old: sp.Symbol, new: sp.Expr) -> sp.Expr:
diff --git a/python/tests/test_misc.py b/python/tests/test_misc.py
index 9ca0b82bfa..83aa6a5d4b 100644
--- a/python/tests/test_misc.py
+++ b/python/tests/test_misc.py
@@ -84,7 +84,8 @@ def test_cmake_compilation(sbml_example_presimulation_module):
 
 
 @skip_on_valgrind
-def test_smart_subs_dict():
+@pytest.mark.parametrize("flatten_first", [True, False])
+def test_smart_subs_dict(flatten_first):
     expr_str = "c + d"
     subs_dict = {
         "c": "a + b",
@@ -98,8 +99,12 @@ def test_smart_subs_dict():
     expected_default = sp.sympify(expected_default_str)
     expected_reverse = sp.sympify(expected_reverse_str)
 
-    result_default = smart_subs_dict(expr_sym, subs_sym)
-    result_reverse = smart_subs_dict(expr_sym, subs_sym, reverse=False)
+    result_default = smart_subs_dict(
+        expr_sym, subs_sym, flatten_first=flatten_first
+    )
+    result_reverse = smart_subs_dict(
+        expr_sym, subs_sym, reverse=False, flatten_first=flatten_first
+    )
 
     assert sp.simplify(result_default - expected_default).is_zero
     assert sp.simplify(result_reverse - expected_reverse).is_zero
diff --git a/tests/performance/reference.yml b/tests/performance/reference.yml
index f72387ae5e..0f2a090615 100644
--- a/tests/performance/reference.yml
+++ b/tests/performance/reference.yml
@@ -1,11 +1,11 @@
 # Reference wall times (seconds) with some buffer
 create_sdist: 16
 install_sdist: 150
-petab_import: 2100
-install_model: 120
+petab_import: 720
+install_model: 60
 install_model_O0: 40
-install_model_O1: 90
-install_model_O2: 120
+install_model_O1: 45
+install_model_O2: 60
 forward_simulation: 2
 forward_sensitivities: 2
 adjoint_sensitivities: 2.5