From 87f3873795cb17f8aacbf71b42825f7f653142e4 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Thu, 27 Mar 2025 14:47:09 -0400 Subject: [PATCH 01/39] Add extra check for window size --- stumpy/core.py | 43 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index a7758c2fd..24e3b192e 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -554,11 +554,12 @@ def get_max_window_size(n): return max_m -def check_window_size(m, max_size=None): +def check_window_size(m, max_size=None, excl_zone=None, last_start_index=None): """ Check the window size and ensure that it is greater than or equal to 3 and, if `max_size` is provided, ensure that the window size is less than or equal to the - `max_size` + `max_size`. Furthermore, if `excl_zone` is provided, then it will also check if the + window size is too large and could lead to meaningless results. Parameters ---------- @@ -568,6 +569,13 @@ def check_window_size(m, max_size=None): max_size : int, default None The maximum window size allowed + excl_zone : int, default None + The exclusion zone. If provided, then the `last_start_index` must also be + provided. + + last_start_index : int, default None + The start index of last subsequence. + Returns ------- None @@ -589,6 +597,37 @@ def check_window_size(m, max_size=None): if max_size is not None and m > max_size: raise ValueError(f"The window size must be less than or equal to {max_size}") + if excl_zone is not None: + if last_start_index is None: + raise ValueError( + "last_start_index must be provided when excl_zone is not None" + ) + + # Check if subsequneces have non-trivial neighbours + + # Case 1: + # There is at least one subsequence with non-trivial neighbour + # i.e. For AN `i`, there exists at least one `j` such that |i - j| > excl_zone + # In this case, we just need to consider the two subsequences that are furthest + # apart from each other. + # In other words: |0 - last_start_index| > excl_zone + cond_1 = last_start_index <= excl_zone + + # Case 2: + # Check if each single subsequence has at least one non-trivial neighbor + # i. e. For ANY `i`, there exists at least one `j` such that |i - j| > excl_zone + # In this case, we need to consider the subseuqence whose furthest neighbour is + # the shortest compared to other subsequences. + # In other words: |0 - ceil(last_start_index / 2)| > excl_zone + cond_2 = math.ceil(last_start_index / 2) <= excl_zone + + if cond_1 or cond_2: + msg = ( + f"The window size, 'm = {m}', may be too large and could lead to " + + "meaningless results. Consider reducing 'm' where necessary" + ) + warnings.warn(msg) + @njit(fastmath=config.STUMPY_FASTMATH_TRUE) def _sliding_dot_product(Q, T): From f8d6df5d4709d4a68eb656bb2160323c33011c6a Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Thu, 27 Mar 2025 19:28:47 -0400 Subject: [PATCH 02/39] update module to include extra check for self join --- stumpy/stump.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/stumpy/stump.py b/stumpy/stump.py index 18409c6e1..3fb3e7758 100644 --- a/stumpy/stump.py +++ b/stumpy/stump.py @@ -711,20 +711,25 @@ def stump( "For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`" ) - core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) - ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial) - n_A = T_A.shape[0] n_B = T_B.shape[0] l = n_A - m + 1 - excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) - + ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial) if ignore_trivial: + excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) diags = np.arange(excl_zone + 1, n_A - m + 1, dtype=np.int64) else: + excl_zone = None diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64) + core.check_window_size( + m, + max_size=min(T_A.shape[0], T_B.shape[0]), + excl_zone=excl_zone, + last_start_index=l - 1, + ) + P, PL, PR, I, IL, IR = _stump( T_A, T_B, From f174ed66ab3c8c0259fb148916f9afe9293bbdac Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Thu, 27 Mar 2025 21:46:13 -0400 Subject: [PATCH 03/39] add tests for warning --- tests/test_core.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/test_core.py b/tests/test_core.py index 8d0721979..5776ea933 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -192,6 +192,36 @@ def test_check_max_window_size(): core.check_window_size(m, max_size=3) +def test_check_window_size_excl_zone_case1(): + # To ensure warning is raised if there is no subsequence + # with non-trivial neighbor + T = np.random.rand(64) + m = 60 + last_start_index = len(T) - m + + excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) + + with pytest.warns(UserWarning): + core.check_window_size( + m, max_size=len(T), excl_zone=excl_zone, last_start_index=last_start_index + ) + + +def test_check_window_size_excl_zone_case2(): + # To ensure warning is raised if there is at least one subsequence + # that has no non-trivial neighbor + T = np.random.rand(64) + m = 48 + last_start_index = len(T) - m + + excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) + + with pytest.warns(UserWarning): + core.check_window_size( + m, max_size=len(T), excl_zone=excl_zone, last_start_index=last_start_index + ) + + @pytest.mark.parametrize("Q, T", test_data) def test_njit_sliding_dot_product(Q, T): ref_mp = naive_rolling_window_dot_product(Q, T) From f6113393ae976192620ff87dfd0c7ee10914bd80 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Thu, 27 Mar 2025 21:53:13 -0400 Subject: [PATCH 04/39] revise comment --- stumpy/core.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 24e3b192e..fb1b22305 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -610,18 +610,18 @@ def check_window_size(m, max_size=None, excl_zone=None, last_start_index=None): # i.e. For AN `i`, there exists at least one `j` such that |i - j| > excl_zone # In this case, we just need to consider the two subsequences that are furthest # apart from each other. - # In other words: |0 - last_start_index| > excl_zone - cond_1 = last_start_index <= excl_zone + # In other words: |last_start_index - 0| > excl_zone + cond_1 = (last_start_index - 0) > excl_zone # Case 2: # Check if each single subsequence has at least one non-trivial neighbor # i. e. For ANY `i`, there exists at least one `j` such that |i - j| > excl_zone # In this case, we need to consider the subseuqence whose furthest neighbour is # the shortest compared to other subsequences. - # In other words: |0 - ceil(last_start_index / 2)| > excl_zone - cond_2 = math.ceil(last_start_index / 2) <= excl_zone + # In other words: |ceil(last_start_index / 2) - 0| > excl_zone + cond_2 = (math.ceil(last_start_index / 2) - 0) > excl_zone - if cond_1 or cond_2: + if not cond_1 or not cond_2: msg = ( f"The window size, 'm = {m}', may be too large and could lead to " + "meaningless results. Consider reducing 'm' where necessary" From c2073ef76b6b815e0eecf2f3e185d2fd4849cbdd Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Thu, 27 Mar 2025 21:55:36 -0400 Subject: [PATCH 05/39] ignore coverage --- stumpy/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stumpy/core.py b/stumpy/core.py index fb1b22305..788a617c9 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -598,7 +598,7 @@ def check_window_size(m, max_size=None, excl_zone=None, last_start_index=None): raise ValueError(f"The window size must be less than or equal to {max_size}") if excl_zone is not None: - if last_start_index is None: + if last_start_index is None: # pragma: no cover raise ValueError( "last_start_index must be provided when excl_zone is not None" ) From 6c691551d5210f80f43d98050c32ac805088feca Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 28 Mar 2025 11:33:27 -0400 Subject: [PATCH 06/39] minor improvement in docstring --- stumpy/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 788a617c9..4248bc8d9 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -570,11 +570,11 @@ def check_window_size(m, max_size=None, excl_zone=None, last_start_index=None): The maximum window size allowed excl_zone : int, default None - The exclusion zone. If provided, then the `last_start_index` must also be + Size of the exclusion zone. If provided, then the `last_start_index` must also be provided. last_start_index : int, default None - The start index of last subsequence. + Start index of the last subsequence Returns ------- From e63860cb388dda3c01e7e64bc66d603e0d7a829e Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 28 Mar 2025 11:34:35 -0400 Subject: [PATCH 07/39] fix flake8 --- stumpy/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 4248bc8d9..9133e5145 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -570,8 +570,8 @@ def check_window_size(m, max_size=None, excl_zone=None, last_start_index=None): The maximum window size allowed excl_zone : int, default None - Size of the exclusion zone. If provided, then the `last_start_index` must also be - provided. + Size of the exclusion zone. If provided, then the `last_start_index` + must also be provided. last_start_index : int, default None Start index of the last subsequence From 91f767ed27f4d803b13eff2301eb2afe8d75becb Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sat, 29 Mar 2025 12:26:17 -0400 Subject: [PATCH 08/39] Revised test function using expected signature --- tests/test_core.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index 5776ea933..3be9ff21f 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -197,13 +197,10 @@ def test_check_window_size_excl_zone_case1(): # with non-trivial neighbor T = np.random.rand(64) m = 60 - last_start_index = len(T) - m - - excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) with pytest.warns(UserWarning): core.check_window_size( - m, max_size=len(T), excl_zone=excl_zone, last_start_index=last_start_index + m, max_size=len(T), n=len(T) ) @@ -212,13 +209,10 @@ def test_check_window_size_excl_zone_case2(): # that has no non-trivial neighbor T = np.random.rand(64) m = 48 - last_start_index = len(T) - m - - excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) with pytest.warns(UserWarning): core.check_window_size( - m, max_size=len(T), excl_zone=excl_zone, last_start_index=last_start_index + m, max_size=len(T), n=len(T) ) From ab411aa82bc9d41724dd9abea7b304178f48108e Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sat, 29 Mar 2025 12:27:02 -0400 Subject: [PATCH 09/39] fixed format --- tests/test_core.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index 3be9ff21f..2b0d05ec7 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -199,9 +199,7 @@ def test_check_window_size_excl_zone_case1(): m = 60 with pytest.warns(UserWarning): - core.check_window_size( - m, max_size=len(T), n=len(T) - ) + core.check_window_size(m, max_size=len(T), n=len(T)) def test_check_window_size_excl_zone_case2(): @@ -211,9 +209,7 @@ def test_check_window_size_excl_zone_case2(): m = 48 with pytest.warns(UserWarning): - core.check_window_size( - m, max_size=len(T), n=len(T) - ) + core.check_window_size(m, max_size=len(T), n=len(T)) @pytest.mark.parametrize("Q, T", test_data) From b7494d920f353190e65ba833e03448f85e15c42f Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sat, 29 Mar 2025 23:42:46 -0400 Subject: [PATCH 10/39] Revise function to pass test --- stumpy/core.py | 60 +++++++++++++++++++++++-------------------------- stumpy/stump.py | 16 +++++-------- 2 files changed, 33 insertions(+), 43 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 9133e5145..2e7c464a6 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -554,7 +554,7 @@ def get_max_window_size(n): return max_m -def check_window_size(m, max_size=None, excl_zone=None, last_start_index=None): +def check_window_size(m, max_size=None, n=None): """ Check the window size and ensure that it is greater than or equal to 3 and, if `max_size` is provided, ensure that the window size is less than or equal to the @@ -569,12 +569,8 @@ def check_window_size(m, max_size=None, excl_zone=None, last_start_index=None): max_size : int, default None The maximum window size allowed - excl_zone : int, default None - Size of the exclusion zone. If provided, then the `last_start_index` - must also be provided. - - last_start_index : int, default None - Start index of the last subsequence + n : int, default None + The length of the time series. Returns ------- @@ -597,31 +593,31 @@ def check_window_size(m, max_size=None, excl_zone=None, last_start_index=None): if max_size is not None and m > max_size: raise ValueError(f"The window size must be less than or equal to {max_size}") - if excl_zone is not None: - if last_start_index is None: # pragma: no cover - raise ValueError( - "last_start_index must be provided when excl_zone is not None" - ) - - # Check if subsequneces have non-trivial neighbours - - # Case 1: - # There is at least one subsequence with non-trivial neighbour - # i.e. For AN `i`, there exists at least one `j` such that |i - j| > excl_zone - # In this case, we just need to consider the two subsequences that are furthest - # apart from each other. - # In other words: |last_start_index - 0| > excl_zone - cond_1 = (last_start_index - 0) > excl_zone - - # Case 2: - # Check if each single subsequence has at least one non-trivial neighbor - # i. e. For ANY `i`, there exists at least one `j` such that |i - j| > excl_zone - # In this case, we need to consider the subseuqence whose furthest neighbour is - # the shortest compared to other subsequences. - # In other words: |ceil(last_start_index / 2) - 0| > excl_zone - cond_2 = (math.ceil(last_start_index / 2) - 0) > excl_zone - - if not cond_1 or not cond_2: + if n is not None: + # The following code raises warning if there is at least one subsequence + # with no non-trivial neighbor. The following logic does not check if + # a subsequence has a non-finite value. + + # Logic: For each subsequnece `S_i = T[i : i + m]`, its neighbor `S_j` + # is non-trivial if |i - j| > excl_zone. Let's denote `S_jmax` as + # the neighbor that is furthest away from `S_i` (index-wise). So: + # |i - jmax| >= |i - j| + # Therefore, if `S_i` has at least one non-trivial neighbor, then `S_jmax` is + # definitely a non-trivial neighbor. Because: + # |i - jmax| >= |i - j| > excl_zone + # To ensure ALL subsequences have at least one non-trivial neighbor, we can just + # check the subsequence `S_i` that has the minimum |i - jmax|. Let's denote `d` + # as that minimum value. So, if d > excl_zone, then: + # For any `i` and its corresponding `jmax`, we have: + # |i - jmax| >= d > excl_zone + + # The minimum |i - jmax| is achieved when `S_i` is the middle ubsequence, + # i.e. i == int(ceil((n - m) / 2)), and its corresponding jmax is 0. Hence, + # we just need to make sure the following inequity is satisfied: + # |int(ceil((n - m) / 2)) - 0| > excl_zone` + + excl_zone = int(math.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) + if (int(math.ceil((n - m) / 2)) - 0) <= excl_zone: msg = ( f"The window size, 'm = {m}', may be too large and could lead to " + "meaningless results. Consider reducing 'm' where necessary" diff --git a/stumpy/stump.py b/stumpy/stump.py index 3fb3e7758..5facdaa27 100644 --- a/stumpy/stump.py +++ b/stumpy/stump.py @@ -716,20 +716,14 @@ def stump( l = n_A - m + 1 ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial) - if ignore_trivial: - excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) + excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) + if ignore_trivial: # self-join + core.check_window_size(m, max_size=n_A, n=n_A) diags = np.arange(excl_zone + 1, n_A - m + 1, dtype=np.int64) - else: - excl_zone = None + else: # AB-join + core.check_window_size(m, max_size=min(n_A, n_B)) diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64) - core.check_window_size( - m, - max_size=min(T_A.shape[0], T_B.shape[0]), - excl_zone=excl_zone, - last_start_index=l - 1, - ) - P, PL, PR, I, IL, IR = _stump( T_A, T_B, From 3c87e0dd233303714f935c66903ad77a7a626343 Mon Sep 17 00:00:00 2001 From: Nima Sarajpoor Date: Sun, 30 Mar 2025 00:10:11 -0400 Subject: [PATCH 11/39] Update stumpy/core.py --- stumpy/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stumpy/core.py b/stumpy/core.py index 2e7c464a6..d88c42275 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -611,7 +611,7 @@ def check_window_size(m, max_size=None, n=None): # For any `i` and its corresponding `jmax`, we have: # |i - jmax| >= d > excl_zone - # The minimum |i - jmax| is achieved when `S_i` is the middle ubsequence, + # The minimum |i - jmax| is achieved when `S_i` is the middle subsequence, # i.e. i == int(ceil((n - m) / 2)), and its corresponding jmax is 0. Hence, # we just need to make sure the following inequity is satisfied: # |int(ceil((n - m) / 2)) - 0| > excl_zone` From 252d52bc601e106fe374aedc26ca33b9f2a59b36 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sun, 30 Mar 2025 10:20:47 -0400 Subject: [PATCH 12/39] improve comments --- stumpy/core.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index d88c42275..9b7f3ea62 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -611,9 +611,12 @@ def check_window_size(m, max_size=None, n=None): # For any `i` and its corresponding `jmax`, we have: # |i - jmax| >= d > excl_zone - # The minimum |i - jmax| is achieved when `S_i` is the middle subsequence, - # i.e. i == int(ceil((n - m) / 2)), and its corresponding jmax is 0. Hence, - # we just need to make sure the following inequity is satisfied: + # Hence, as long as the `S_i` that corresponds to `d` has one non-trivial + # neighbour, any other subsequence has one non-trivial neighbour as well. + + # The minimum |i - jmax| is achieved when `S_i` is the middle ubsequence, + # i.e. i == int(ceil((n - m) / 2)). Its corresponding `jmax` is 0. Hence, + # we just need to make sure the following inequality is satisfied: # |int(ceil((n - m) / 2)) - 0| > excl_zone` excl_zone = int(math.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) From 8e5d9afa92d071b3aaa4c148c018b49feaebc711 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sun, 30 Mar 2025 15:19:59 -0400 Subject: [PATCH 13/39] improve readability of function --- stumpy/core.py | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 9b7f3ea62..67f7374d7 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -594,33 +594,20 @@ def check_window_size(m, max_size=None, n=None): raise ValueError(f"The window size must be less than or equal to {max_size}") if n is not None: - # The following code raises warning if there is at least one subsequence - # with no non-trivial neighbor. The following logic does not check if - # a subsequence has a non-finite value. - - # Logic: For each subsequnece `S_i = T[i : i + m]`, its neighbor `S_j` - # is non-trivial if |i - j| > excl_zone. Let's denote `S_jmax` as - # the neighbor that is furthest away from `S_i` (index-wise). So: - # |i - jmax| >= |i - j| - # Therefore, if `S_i` has at least one non-trivial neighbor, then `S_jmax` is - # definitely a non-trivial neighbor. Because: - # |i - jmax| >= |i - j| > excl_zone - # To ensure ALL subsequences have at least one non-trivial neighbor, we can just - # check the subsequence `S_i` that has the minimum |i - jmax|. Let's denote `d` - # as that minimum value. So, if d > excl_zone, then: - # For any `i` and its corresponding `jmax`, we have: - # |i - jmax| >= d > excl_zone - - # Hence, as long as the `S_i` that corresponds to `d` has one non-trivial - # neighbour, any other subsequence has one non-trivial neighbour as well. - - # The minimum |i - jmax| is achieved when `S_i` is the middle ubsequence, - # i.e. i == int(ceil((n - m) / 2)). Its corresponding `jmax` is 0. Hence, - # we just need to make sure the following inequality is satisfied: - # |int(ceil((n - m) / 2)) - 0| > excl_zone` + # Raise warning if there is at least one subsequence with no + # non-trivial neighbour in a self-join case excl_zone = int(math.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) - if (int(math.ceil((n - m) / 2)) - 0) <= excl_zone: + + l = n - m + 1 + indices = np.arange(l) + + # Compute the maximum index-wise gap between each subsequence + # and its neighbours. For any subsequence: + # The leftmost neighbor is at index `0` + # The rightmost neighbor is at index `l-1` + max_gaps = np.maximum(indices - 0, (l - 1) - indices) + if np.any(max_gaps <= excl_zone): msg = ( f"The window size, 'm = {m}', may be too large and could lead to " + "meaningless results. Consider reducing 'm' where necessary" From 113b5c576542262b687842abef4ffdce7d9ee0d0 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sun, 30 Mar 2025 15:23:58 -0400 Subject: [PATCH 14/39] minor improvement in the description of param --- stumpy/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stumpy/core.py b/stumpy/core.py index 67f7374d7..66544c9fe 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -570,7 +570,7 @@ def check_window_size(m, max_size=None, n=None): The maximum window size allowed n : int, default None - The length of the time series. + The length of the time series in a self-join case Returns ------- From 82caebba7994052bd5ddc0108aff887622e1d6d6 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sun, 30 Mar 2025 18:55:24 -0400 Subject: [PATCH 15/39] remove redundant test function --- tests/test_core.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index 2b0d05ec7..7396086bf 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -192,22 +192,15 @@ def test_check_max_window_size(): core.check_window_size(m, max_size=3) -def test_check_window_size_excl_zone_case1(): - # To ensure warning is raised if there is no subsequence - # with non-trivial neighbor - T = np.random.rand(64) - m = 60 - - with pytest.warns(UserWarning): - core.check_window_size(m, max_size=len(T), n=len(T)) - - -def test_check_window_size_excl_zone_case2(): +def test_check_window_size_excl_zone(): # To ensure warning is raised if there is at least one subsequence # that has no non-trivial neighbor - T = np.random.rand(64) - m = 48 + T = np.random.rand(100) + m = 70 + # For m==70, the `excl_zone` become ceil(m / 4) = 18. For len(T)==100, + # the subsequence that starts at index 15 has no non-trivial neighbor. + # The expectation is that `core.check_window_size` raises warning. with pytest.warns(UserWarning): core.check_window_size(m, max_size=len(T), n=len(T)) From f29732fbbbaf6bb5147763a03a40064744c4a669 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Mon, 31 Mar 2025 14:44:34 -0400 Subject: [PATCH 16/39] Revise logic and the comment --- stumpy/core.py | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 66544c9fe..a80c06f5d 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -570,7 +570,8 @@ def check_window_size(m, max_size=None, n=None): The maximum window size allowed n : int, default None - The length of the time series in a self-join case + The length of the time series in the case of a self-join. + `n` should be set to `None` in the case of an `AB-join`. Returns ------- @@ -595,19 +596,37 @@ def check_window_size(m, max_size=None, n=None): if n is not None: # Raise warning if there is at least one subsequence with no - # non-trivial neighbour in a self-join case + # non-trivial neighbour in the case of a self-join. + + # For any time series `T`, an "eligible nearest neighbor" subsequence for + # the central-most subsequence must be located outside the `excl_zone`. + # The central-most subsequence will ALWAYS have the smallest gap + # to its furthest "eligible nearest neighbor" among all other subsequences. + # Therefore, we only need to check whether the `excl_zone` eliminates all + # "nearest neighbors" for the central-most subsequence in `T`. + # In fact, we just need to verify whether the `excl_zone` eliminates + # the "nearest neighbor" that is furthest away (index-wise) from + # the central-most subsequence. If it does not, this implies that + # all other subsequences in `T` will have at least one or more + # eligible nearest neighbors outside their respective `excl_zone excl_zone = int(math.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) l = n - m + 1 - indices = np.arange(l) - - # Compute the maximum index-wise gap between each subsequence - # and its neighbours. For any subsequence: - # The leftmost neighbor is at index `0` - # The rightmost neighbor is at index `l-1` - max_gaps = np.maximum(indices - 0, (l - 1) - indices) - if np.any(max_gaps <= excl_zone): + max_gap = l // 2 + # The index-wise gap between central-most subsequence + # and its furthest neighbor: + + # If `l` is odd (`l == 2k+1`): + # The central subsequence is at index `k`, with furthest neighbors at `0` + # and `2k`, both `k == l // 2` indices away. + + # If `l` is even (`l == 2k`): + # The central subsequences are at `k-1` and `k`. The furthest neighbor is + # at `2k-1` for `k-1`, and `0` for `k`. In both cases, the subsequence + # and its furthest neighbor are `k == l // 2` indices away. + + if max_gap <= excl_zone: msg = ( f"The window size, 'm = {m}', may be too large and could lead to " + "meaningless results. Consider reducing 'm' where necessary" From 6f308a3c01847363973860522b5521ba76fe9b88 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Mon, 31 Mar 2025 15:04:46 -0400 Subject: [PATCH 17/39] improving comments --- stumpy/core.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index a80c06f5d..e76571b29 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -599,23 +599,19 @@ def check_window_size(m, max_size=None, n=None): # non-trivial neighbour in the case of a self-join. # For any time series `T`, an "eligible nearest neighbor" subsequence for - # the central-most subsequence must be located outside the `excl_zone`. - # The central-most subsequence will ALWAYS have the smallest gap - # to its furthest "eligible nearest neighbor" among all other subsequences. - # Therefore, we only need to check whether the `excl_zone` eliminates all - # "nearest neighbors" for the central-most subsequence in `T`. - # In fact, we just need to verify whether the `excl_zone` eliminates - # the "nearest neighbor" that is furthest away (index-wise) from - # the central-most subsequence. If it does not, this implies that - # all other subsequences in `T` will have at least one or more - # eligible nearest neighbors outside their respective `excl_zone + # the central-most subsequence must be located outside the `excl_zone`, + # and the central-most subsequence will ALWAYS have the smallest gap + # to its furthest neighbour. Therefore, we only need to check whether + # the `excl_zone` eliminates all "neighbors" for the central-most subsequence + # in `T`. In fact, we just need to verify whether the `excl_zone` eliminates + # the "neighbor" that is furthest away (index-wise) from the central-most + # subsequence. If it does not, this implies that all subsequences in `T` + # will have at least one or more "eligible nearest neighbors" outside + # their respective `excl_zone` excl_zone = int(math.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) l = n - m + 1 - max_gap = l // 2 - # The index-wise gap between central-most subsequence - # and its furthest neighbor: # If `l` is odd (`l == 2k+1`): # The central subsequence is at index `k`, with furthest neighbors at `0` @@ -625,6 +621,7 @@ def check_window_size(m, max_size=None, n=None): # The central subsequences are at `k-1` and `k`. The furthest neighbor is # at `2k-1` for `k-1`, and `0` for `k`. In both cases, the subsequence # and its furthest neighbor are `k == l // 2` indices away. + max_gap = l // 2 if max_gap <= excl_zone: msg = ( From 77b878b974709e6013068f0d5b9641cb3b9ddfb2 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Mon, 31 Mar 2025 15:23:40 -0400 Subject: [PATCH 18/39] minor change --- stumpy/stump.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stumpy/stump.py b/stumpy/stump.py index 5facdaa27..2b68fb56a 100644 --- a/stumpy/stump.py +++ b/stumpy/stump.py @@ -718,7 +718,7 @@ def stump( ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial) excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) if ignore_trivial: # self-join - core.check_window_size(m, max_size=n_A, n=n_A) + core.check_window_size(m, max_size=min(n_A, n_B), n=n_A) diags = np.arange(excl_zone + 1, n_A - m + 1, dtype=np.int64) else: # AB-join core.check_window_size(m, max_size=min(n_A, n_B)) From 2c687168867683ebfe1c066eb3e6c15d6018a7d3 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Tue, 1 Apr 2025 16:14:18 -0400 Subject: [PATCH 19/39] minor change in comment --- stumpy/core.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index e76571b29..6858aea5d 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -615,14 +615,13 @@ def check_window_size(m, max_size=None, n=None): # If `l` is odd (`l == 2k+1`): # The central subsequence is at index `k`, with furthest neighbors at `0` - # and `2k`, both `k == l // 2` indices away. + # and `2k`, both `k == l // 2` indices away from the central-most subsequence. # If `l` is even (`l == 2k`): # The central subsequences are at `k-1` and `k`. The furthest neighbor is # at `2k-1` for `k-1`, and `0` for `k`. In both cases, the subsequence # and its furthest neighbor are `k == l // 2` indices away. max_gap = l // 2 - if max_gap <= excl_zone: msg = ( f"The window size, 'm = {m}', may be too large and could lead to " From 643b4b0ecdb993ec9bec387f792dc16855fd63fa Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Tue, 1 Apr 2025 16:18:54 -0400 Subject: [PATCH 20/39] minor change in comment --- tests/test_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_core.py b/tests/test_core.py index 7396086bf..3b92c8136 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -198,7 +198,7 @@ def test_check_window_size_excl_zone(): T = np.random.rand(100) m = 70 - # For m==70, the `excl_zone` become ceil(m / 4) = 18. For len(T)==100, + # For `m==70`, the `excl_zone` is ceil(m / 4) = 18. For `len(T)==100`, # the subsequence that starts at index 15 has no non-trivial neighbor. # The expectation is that `core.check_window_size` raises warning. with pytest.warns(UserWarning): From a15b7577decdebe368f15c0748b87e4b31deca4b Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Tue, 1 Apr 2025 16:20:00 -0400 Subject: [PATCH 21/39] update aamp for checking window size --- stumpy/aamp.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/stumpy/aamp.py b/stumpy/aamp.py index 1e4879bcc..6d6664537 100644 --- a/stumpy/aamp.py +++ b/stumpy/aamp.py @@ -407,17 +407,17 @@ def aamp(T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1): if T_B.ndim != 1: # pragma: no cover raise ValueError(f"T_B is {T_B.ndim}-dimensional and must be 1-dimensional. ") - core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) - ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial) - n_A = T_A.shape[0] n_B = T_B.shape[0] l = n_A - m + 1 + ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial) excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) - if ignore_trivial: + if ignore_trivial: # self-join + core.check_window_size(m, max_size=min(n_A, n_B), n=n_A) diags = np.arange(excl_zone + 1, n_A - m + 1, dtype=np.int64) - else: + else: # AB-join + core.check_window_size(m, max_size=min(n_A, n_B)) diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64) P, PL, PR, I, IL, IR = _aamp( From 316bf07c7e43521bd0461e8c15121da9c09dded5 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Tue, 1 Apr 2025 21:39:42 -0400 Subject: [PATCH 22/39] improve docstring and comments --- stumpy/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 6858aea5d..e92d413b8 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -558,8 +558,8 @@ def check_window_size(m, max_size=None, n=None): """ Check the window size and ensure that it is greater than or equal to 3 and, if `max_size` is provided, ensure that the window size is less than or equal to the - `max_size`. Furthermore, if `excl_zone` is provided, then it will also check if the - window size is too large and could lead to meaningless results. + `max_size`. Furthermore, if `n` is provided, then it checks if there is + at least one subsequence with no non-trivial neighbour in a self-join. Parameters ---------- From 9f718161198f37ef4fa90415cee4e1376b24c35d Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Tue, 1 Apr 2025 21:43:07 -0400 Subject: [PATCH 23/39] improve docstring --- stumpy/core.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index e92d413b8..135df3c62 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -600,14 +600,14 @@ def check_window_size(m, max_size=None, n=None): # For any time series `T`, an "eligible nearest neighbor" subsequence for # the central-most subsequence must be located outside the `excl_zone`, - # and the central-most subsequence will ALWAYS have the smallest gap - # to its furthest neighbour. Therefore, we only need to check whether - # the `excl_zone` eliminates all "neighbors" for the central-most subsequence - # in `T`. In fact, we just need to verify whether the `excl_zone` eliminates - # the "neighbor" that is furthest away (index-wise) from the central-most - # subsequence. If it does not, this implies that all subsequences in `T` - # will have at least one or more "eligible nearest neighbors" outside - # their respective `excl_zone` + # and the central-most subsequence will ALWAYS have the smallest index-wise + # distance to its furthest neighbour amongs all other subsequences. Therefore, + # we only need to check whether the `excl_zone` eliminates all "neighbors" for + # the central-most subsequence in `T`. In fact, we just need to verify whether + # the `excl_zone` eliminates the "neighbor" that is furthest away (index-wise) + # from the central-most subsequence. If it does not, this implies that all + # subsequences in `T` will have at least one "eligible nearest neighbors" + # outside their respective `excl_zone` excl_zone = int(math.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) From 445a6cbd4c14cb07f30d3cb0a850868b89358fcb Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Tue, 1 Apr 2025 21:44:19 -0400 Subject: [PATCH 24/39] use smaller input to make test function more understandable --- tests/test_core.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index 3b92c8136..17059d5dc 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -195,12 +195,14 @@ def test_check_max_window_size(): def test_check_window_size_excl_zone(): # To ensure warning is raised if there is at least one subsequence # that has no non-trivial neighbor - T = np.random.rand(100) - m = 70 - - # For `m==70`, the `excl_zone` is ceil(m / 4) = 18. For `len(T)==100`, - # the subsequence that starts at index 15 has no non-trivial neighbor. - # The expectation is that `core.check_window_size` raises warning. + T = np.random.rand(10) + m = 7 + + # For `len(T) == 10` and `m == 7`, the `excl_zone` is ceil(m / 4) = 2. + # In this case, there are `10 - 7 + 1 = 4` subsequences of length 7, + # starting at indices 0, 1, 2, and 3. For a subsequence that starts at + # index 1, there are no non-trivial neighbors. So, a warning should be + # raised. with pytest.warns(UserWarning): core.check_window_size(m, max_size=len(T), n=len(T)) From 7751792edbe1cfab7425c9a251b39fd648095860 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Tue, 1 Apr 2025 22:34:20 -0400 Subject: [PATCH 25/39] updated stumped and aamped --- stumpy/aamped.py | 6 +++--- stumpy/stumped.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/stumpy/aamped.py b/stumpy/aamped.py index 21132f281..14e6d237e 100644 --- a/stumpy/aamped.py +++ b/stumpy/aamped.py @@ -386,17 +386,17 @@ def aamped(client, T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1): if T_B.ndim != 1: # pragma: no cover raise ValueError(f"T_B is {T_B.ndim}-dimensional and must be 1-dimensional. ") - core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) - ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial) - n_A = T_A.shape[0] n_B = T_B.shape[0] + ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial) excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) if ignore_trivial: + core.check_window_size(m, max_size=min(n_A, n_B), n=n_A) diags = np.arange(excl_zone + 1, n_A - m + 1, dtype=np.int64) else: + core.check_window_size(m, max_size=min(n_A, n_B)) diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64) _aamped = core._client_to_func(client) diff --git a/stumpy/stumped.py b/stumpy/stumped.py index e5abc75c1..0cfa331b2 100644 --- a/stumpy/stumped.py +++ b/stumpy/stumped.py @@ -618,17 +618,17 @@ def stumped( "For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`" ) - core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) - ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial) - n_A = T_A.shape[0] n_B = T_B.shape[0] + ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial) excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) if ignore_trivial: + core.check_window_size(m, max_size=min(n_A, n_B), n=n_A) diags = np.arange(excl_zone + 1, n_A - m + 1, dtype=np.int64) else: + core.check_window_size(m, max_size=min(n_A, n_B)) diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64) _stumped = core._client_to_func(client) From 8bff40b87decde748d8376c6a3b6d835640b63ed Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Tue, 1 Apr 2025 22:47:53 -0400 Subject: [PATCH 26/39] updated maamp and maamped modules --- stumpy/maamp.py | 8 ++++---- stumpy/maamped.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/stumpy/maamp.py b/stumpy/maamp.py index dad6748c3..c4f75b897 100644 --- a/stumpy/maamp.py +++ b/stumpy/maamp.py @@ -140,7 +140,7 @@ def maamp_subspace( returned. """ T = core._preprocess(T) - core.check_window_size(m, max_size=T.shape[-1]) + core.check_window_size(m, max_size=T.shape[-1], n=T.shape[0]) subseqs, _ = core.preprocess_non_normalized(T[:, subseq_idx : subseq_idx + m], m) neighbors, _ = core.preprocess_non_normalized(T[:, nn_idx : nn_idx + m], m) @@ -269,7 +269,7 @@ def maamp_mdl( A list of numpy.ndarrays that contains the `k`th-dimensional subspaces """ T = core._preprocess(T) - core.check_window_size(m, max_size=T.shape[-1]) + core.check_window_size(m, max_size=T.shape[-1], n=T.shape[-1]) if discretize_func is None: T_isfinite = np.isfinite(T) @@ -441,7 +441,7 @@ def maamp_multi_distance_profile(query_idx, T, m, include=None, discords=False, err = f"T is {T.ndim}-dimensional and must be at least 1-dimensional" raise ValueError(f"{err}") - core.check_window_size(m, max_size=T.shape[1]) + core.check_window_size(m, max_size=T.shape[1], n=T.shape[1]) if include is not None: # pragma: no cover include = core._preprocess_include(include) @@ -933,7 +933,7 @@ def maamp(T, m, include=None, discords=False, p=2.0): err = f"T is {T_A.ndim}-dimensional and must be at least 1-dimensional" raise ValueError(f"{err}") - core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1])) + core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]), n=T_A.shape[1]) if include is not None: include = core._preprocess_include(include) diff --git a/stumpy/maamped.py b/stumpy/maamped.py index 0665e3e51..70bdf7e66 100644 --- a/stumpy/maamped.py +++ b/stumpy/maamped.py @@ -389,7 +389,7 @@ def maamped(client, T, m, include=None, discords=False, p=2.0): err = f"T is {T_A.ndim}-dimensional and must be at least 1-dimensional" raise ValueError(f"{err}") - core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1])) + core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]), n=T_A.shape[1]) if include is not None: include = core._preprocess_include(include) From f0cbfae8d1cf24db6c7a2f57a5d0c9a06039f299 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Wed, 2 Apr 2025 00:16:49 -0400 Subject: [PATCH 27/39] update different modules to consider the change in core.check_window_size --- stumpy/gpu_aamp.py | 7 ++++++- stumpy/gpu_stump.py | 7 ++++++- stumpy/mstump.py | 8 +++++--- stumpy/mstumped.py | 4 +++- stumpy/scraamp.py | 7 ++++++- stumpy/scrump.py | 7 ++++++- stumpy/stamp.py | 6 ++++-- stumpy/stomp.py | 7 ++++++- 8 files changed, 42 insertions(+), 11 deletions(-) diff --git a/stumpy/gpu_aamp.py b/stumpy/gpu_aamp.py index a4708a5fc..fc0c74068 100644 --- a/stumpy/gpu_aamp.py +++ b/stumpy/gpu_aamp.py @@ -536,8 +536,13 @@ def gpu_aamp(T_A, m, T_B=None, ignore_trivial=True, device_id=0, p=2.0, k=1): "For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`" ) - core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial) + if ignore_trivial: # self-join + core.check_window_size( + m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0] + ) + else: # AB-join + core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) n = T_B.shape[0] w = T_A.shape[0] - m + 1 diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py index 98e7ebd78..16166ffb2 100644 --- a/stumpy/gpu_stump.py +++ b/stumpy/gpu_stump.py @@ -666,8 +666,13 @@ def gpu_stump( "For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`" ) - core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial) + if ignore_trivial: # self-join + core.check_window_size( + m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0] + ) + else: # AB-join + core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) n = T_B.shape[0] w = T_A.shape[0] - m + 1 diff --git a/stumpy/mstump.py b/stumpy/mstump.py index c4b7ed2c9..ab35929dc 100644 --- a/stumpy/mstump.py +++ b/stumpy/mstump.py @@ -217,7 +217,7 @@ def subspace( array([0, 1]) """ T = core._preprocess(T) - core.check_window_size(m, max_size=T.shape[-1]) + core.check_window_size(m, max_size=T.shape[-1], n=T.shape[-1]) T_subseq_isconstant = core.process_isconstant(T, m, T_subseq_isconstant) if discretize_func is None: @@ -409,7 +409,7 @@ def mdl( (array([ 80. , 111.509775]), [array([1]), array([0, 1])]) """ T = core._preprocess(T) - core.check_window_size(m, max_size=T.shape[-1]) + core.check_window_size(m, max_size=T.shape[-1], n=T.shape[-1]) T_subseq_isconstant = core.process_isconstant(T, m, T_subseq_isconstant) if discretize_func is None: @@ -1228,7 +1228,9 @@ def mstump( err = f"T is {T_A.ndim}-dimensional and must be at least 1-dimensional" raise ValueError(f"{err}") - core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1])) + # mstump currently supports self-join. Therefore, the argument `n` should be + # passed to the function `core.check_window_size`. + core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]), n=T_A.shape[1]) if include is not None: include = core._preprocess_include(include) diff --git a/stumpy/mstumped.py b/stumpy/mstumped.py index aabb0b6ca..29a9f4bc2 100644 --- a/stumpy/mstumped.py +++ b/stumpy/mstumped.py @@ -505,7 +505,9 @@ def mstumped( err = f"T is {T_A.ndim}-dimensional and must be at least 1-dimensional" raise ValueError(f"{err}") - core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1])) + # mstump currently supports self-join. Therefore, the argument `n` should be + # passed to the function `core.check_window_size`. + core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]), n=T_A.shape[1]) if include is not None: include = core._preprocess_include(include) diff --git a/stumpy/scraamp.py b/stumpy/scraamp.py index 56d83f6b6..7d8e9bd24 100644 --- a/stumpy/scraamp.py +++ b/stumpy/scraamp.py @@ -646,10 +646,15 @@ def __init__( "For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`" ) - core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) self._ignore_trivial = core.check_ignore_trivial( self._T_A, self._T_B, self._ignore_trivial ) + if self._ignore_trivial: # self-join + core.check_window_size( + m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0] + ) + else: # AB-join + core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) self._n_A = self._T_A.shape[0] self._n_B = self._T_B.shape[0] diff --git a/stumpy/scrump.py b/stumpy/scrump.py index dd5617480..5e0d212ab 100644 --- a/stumpy/scrump.py +++ b/stumpy/scrump.py @@ -905,10 +905,15 @@ def __init__( "For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`" ) - core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) self._ignore_trivial = core.check_ignore_trivial( self._T_A, self._T_B, self._ignore_trivial ) + if self._ignore_trivial: + core.check_window_size( + m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0] + ) + else: + core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) self._n_A = self._T_A.shape[0] self._n_B = self._T_B.shape[0] diff --git a/stumpy/stamp.py b/stumpy/stamp.py index 855db1f26..5726803c7 100644 --- a/stumpy/stamp.py +++ b/stumpy/stamp.py @@ -208,13 +208,14 @@ def stamp( if T_B.ndim != 1: # pragma: no cover raise ValueError(f"T_B is {T_B.ndim}-dimensional and must be 1-dimensional. ") - core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) - subseq_T_A = core.rolling_window(T_A, m) excl_zone = int(np.ceil(m / 2)) # Add exclusionary zone if ignore_trivial: + core.check_window_size( + m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0] + ) out = [ _mass_PI( subseq, @@ -229,6 +230,7 @@ def stamp( for i, subseq in enumerate(subseq_T_A) ] else: + core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) out = [ _mass_PI( subseq, diff --git a/stumpy/stomp.py b/stumpy/stomp.py index 608ce7770..251d0fdc9 100644 --- a/stumpy/stomp.py +++ b/stumpy/stomp.py @@ -81,8 +81,13 @@ def _stomp(T_A, m, T_B=None, ignore_trivial=True): if T_B.ndim != 1: # pragma: no cover raise ValueError(f"T_B is {T_B.ndim}-dimensional and must be 1-dimensional. ") - core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial) + if ignore_trivial: # self-join + core.check_window_size( + m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0] + ) + else: # AB-join + core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) n = T_A.shape[0] l = n - m + 1 From aa61b2438bab5d1681ab7ec09cef6bd182b2d240 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Wed, 2 Apr 2025 00:25:51 -0400 Subject: [PATCH 28/39] minor fix --- stumpy/maamp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stumpy/maamp.py b/stumpy/maamp.py index c4f75b897..6eb5a823d 100644 --- a/stumpy/maamp.py +++ b/stumpy/maamp.py @@ -140,7 +140,7 @@ def maamp_subspace( returned. """ T = core._preprocess(T) - core.check_window_size(m, max_size=T.shape[-1], n=T.shape[0]) + core.check_window_size(m, max_size=T.shape[-1], n=T.shape[-1]) subseqs, _ = core.preprocess_non_normalized(T[:, subseq_idx : subseq_idx + m], m) neighbors, _ = core.preprocess_non_normalized(T[:, nn_idx : nn_idx + m], m) From 9349e2a450d96ffec0d99201157425dd72cc04b3 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Wed, 2 Apr 2025 16:56:15 -0400 Subject: [PATCH 29/39] improve comments --- stumpy/core.py | 49 ++++++++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 135df3c62..b7d684d0d 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -595,34 +595,45 @@ def check_window_size(m, max_size=None, n=None): raise ValueError(f"The window size must be less than or equal to {max_size}") if n is not None: - # Raise warning if there is at least one subsequence with no - # non-trivial neighbour in the case of a self-join. + # Raise warning if there is at least one subsequence with no eligible + # (non-trivial) neighbor in the case of a self-join. # For any time series `T`, an "eligible nearest neighbor" subsequence for # the central-most subsequence must be located outside the `excl_zone`, - # and the central-most subsequence will ALWAYS have the smallest index-wise - # distance to its furthest neighbour amongs all other subsequences. Therefore, - # we only need to check whether the `excl_zone` eliminates all "neighbors" for - # the central-most subsequence in `T`. In fact, we just need to verify whether - # the `excl_zone` eliminates the "neighbor" that is furthest away (index-wise) + # and the central-most subsequence will ALWAYS have the smallest relative + # (index-wise) distance to its farthest neighbor amongst all other subsequences. + # Therefore, we only need to check whether the `excl_zone` eliminates all "neighbors" + # for the central-most subsequence in `T`. In fact, we just need to verify whether + # the `excl_zone` eliminates the "neighbor" that is farthest away (index-wise) # from the central-most subsequence. If it does not, this implies that all # subsequences in `T` will have at least one "eligible nearest neighbors" # outside their respective `excl_zone` excl_zone = int(math.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) - l = n - m + 1 - - # If `l` is odd (`l == 2k+1`): - # The central subsequence is at index `k`, with furthest neighbors at `0` - # and `2k`, both `k == l // 2` indices away from the central-most subsequence. - - # If `l` is even (`l == 2k`): - # The central subsequences are at `k-1` and `k`. The furthest neighbor is - # at `2k-1` for `k-1`, and `0` for `k`. In both cases, the subsequence - # and its furthest neighbor are `k == l // 2` indices away. - max_gap = l // 2 - if max_gap <= excl_zone: + l = n - m + 1 + # The start index of subsequences are: 0, 1, ..., l-1 + + # If `l` is odd (hence, `l == 2c+1`): + # The central subsequence is located at index `c == l // 2`, + # with two farthest neighbors, one located at index `0`(to the left of `c`) + # and the other located at index `l - 1 == 2c` (to the right of `c`). + # Both indices `0` and `2c` are exactly `c == l // 2` index positions away + # from the central subsequence located at index `c`. + + # If `l` is even (hence, `l == 2c`): + # There are two central-most subsequences located at index locations + # `c` and `c-1`. For subsequence at `c`, its farthest neighbor will be + # located at index `0` (to the left of `c`) and, for `c-1`, its farthest + # neighbor is located at index `l - 1 == 2c - 1` (to the right of `c-1`). + # In both cases, each central subsequence and its farthest neighbor are + # `c == l // 2` indices away. + + # Therefore, regardless if `l` is even or odd, for the central + # subsequence for any time series, the index location of its + # farthest neighbor will always be `l // 2` index positions away. + diff_to_farthest_idx = l // 2 + if diff_to_farthest_idx <= excl_zone: msg = ( f"The window size, 'm = {m}', may be too large and could lead to " + "meaningless results. Consider reducing 'm' where necessary" From 54cd2fadddfc6789e9466729541249410258f950 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Wed, 2 Apr 2025 16:59:09 -0400 Subject: [PATCH 30/39] improve comments --- stumpy/core.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index b7d684d0d..44748bd30 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -559,7 +559,7 @@ def check_window_size(m, max_size=None, n=None): Check the window size and ensure that it is greater than or equal to 3 and, if `max_size` is provided, ensure that the window size is less than or equal to the `max_size`. Furthermore, if `n` is provided, then it checks if there is - at least one subsequence with no non-trivial neighbour in a self-join. + at least one subsequence with no non-trivial neighbor in a self-join. Parameters ---------- @@ -600,37 +600,37 @@ def check_window_size(m, max_size=None, n=None): # For any time series `T`, an "eligible nearest neighbor" subsequence for # the central-most subsequence must be located outside the `excl_zone`, - # and the central-most subsequence will ALWAYS have the smallest relative - # (index-wise) distance to its farthest neighbor amongst all other subsequences. - # Therefore, we only need to check whether the `excl_zone` eliminates all "neighbors" - # for the central-most subsequence in `T`. In fact, we just need to verify whether - # the `excl_zone` eliminates the "neighbor" that is farthest away (index-wise) - # from the central-most subsequence. If it does not, this implies that all - # subsequences in `T` will have at least one "eligible nearest neighbors" - # outside their respective `excl_zone` + # and the central-most subsequence will ALWAYS have the smallest relative + # (index-wise) distance to its farthest neighbor amongst all other subsequences. + # Therefore, we only need to check whether the `excl_zone` eliminates all + # "neighbors" for the central-most subsequence in `T`. In fact, we just need to + # verify whether the `excl_zone` eliminates the "neighbor" that is farthest + # away (index-wise) from the central-most subsequence. If it does not, this + # implies that all subsequences in `T` will have at least one "eligible nearest + # neighbors" outside their respective `excl_zone` excl_zone = int(math.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) - l = n - m + 1 + l = n - m + 1 # The start index of subsequences are: 0, 1, ..., l-1 # If `l` is odd (hence, `l == 2c+1`): - # The central subsequence is located at index `c == l // 2`, + # The central subsequence is located at index `c == l // 2`, # with two farthest neighbors, one located at index `0`(to the left of `c`) - # and the other located at index `l - 1 == 2c` (to the right of `c`). + # and the other located at index `l - 1 == 2c` (to the right of `c`). # Both indices `0` and `2c` are exactly `c == l // 2` index positions away # from the central subsequence located at index `c`. # If `l` is even (hence, `l == 2c`): - # There are two central-most subsequences located at index locations + # There are two central-most subsequences located at index locations # `c` and `c-1`. For subsequence at `c`, its farthest neighbor will be # located at index `0` (to the left of `c`) and, for `c-1`, its farthest # neighbor is located at index `l - 1 == 2c - 1` (to the right of `c-1`). - # In both cases, each central subsequence and its farthest neighbor are + # In both cases, each central subsequence and its farthest neighbor are # `c == l // 2` indices away. # Therefore, regardless if `l` is even or odd, for the central - # subsequence for any time series, the index location of its + # subsequence for any time series, the index location of its # farthest neighbor will always be `l // 2` index positions away. diff_to_farthest_idx = l // 2 if diff_to_farthest_idx <= excl_zone: From 917fcc43723cac3e06cb27d60ff298f3fb756b6c Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Wed, 2 Apr 2025 17:09:39 -0400 Subject: [PATCH 31/39] improved the explanations --- stumpy/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 44748bd30..44722b5c0 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -614,14 +614,14 @@ def check_window_size(m, max_size=None, n=None): l = n - m + 1 # The start index of subsequences are: 0, 1, ..., l-1 - # If `l` is odd (hence, `l == 2c+1`): + # If `l` is odd (`l` can be expressed as `l == 2c+1`): # The central subsequence is located at index `c == l // 2`, # with two farthest neighbors, one located at index `0`(to the left of `c`) # and the other located at index `l - 1 == 2c` (to the right of `c`). # Both indices `0` and `2c` are exactly `c == l // 2` index positions away # from the central subsequence located at index `c`. - # If `l` is even (hence, `l == 2c`): + # If `l` is even (`l` can be expressed as `l == 2c`): # There are two central-most subsequences located at index locations # `c` and `c-1`. For subsequence at `c`, its farthest neighbor will be # located at index `0` (to the left of `c`) and, for `c-1`, its farthest From be4d6bbc1ee8834f68bc1030394d66cdbfa33b4b Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Wed, 2 Apr 2025 17:15:25 -0400 Subject: [PATCH 32/39] minor change in the description of function --- stumpy/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 44722b5c0..8182e190d 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -558,8 +558,8 @@ def check_window_size(m, max_size=None, n=None): """ Check the window size and ensure that it is greater than or equal to 3 and, if `max_size` is provided, ensure that the window size is less than or equal to the - `max_size`. Furthermore, if `n` is provided, then it checks if there is - at least one subsequence with no non-trivial neighbor in a self-join. + `max_size`. Furthermore, if `n` is provided, then it checks whether all + subsequences have at least one non-trivial neighbor in a self-join. Parameters ---------- From 97e6f2b5692bca6603981ad19f1381971b309d4e Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 4 Apr 2025 23:38:43 -0400 Subject: [PATCH 33/39] improve the clarity of the logic --- stumpy/core.py | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 8182e190d..12357340e 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -614,20 +614,27 @@ def check_window_size(m, max_size=None, n=None): l = n - m + 1 # The start index of subsequences are: 0, 1, ..., l-1 - # If `l` is odd (`l` can be expressed as `l == 2c+1`): - # The central subsequence is located at index `c == l // 2`, - # with two farthest neighbors, one located at index `0`(to the left of `c`) - # and the other located at index `l - 1 == 2c` (to the right of `c`). - # Both indices `0` and `2c` are exactly `c == l // 2` index positions away - # from the central subsequence located at index `c`. - - # If `l` is even (`l` can be expressed as `l == 2c`): - # There are two central-most subsequences located at index locations - # `c` and `c-1`. For subsequence at `c`, its farthest neighbor will be - # located at index `0` (to the left of `c`) and, for `c-1`, its farthest - # neighbor is located at index `l - 1 == 2c - 1` (to the right of `c-1`). - # In both cases, each central subsequence and its farthest neighbor are - # `c == l // 2` indices away. + # If `l` is odd + # Suppose `l == 5`. So, the start index of the subsequences + # are: 0, 1, 2, 3, 4 + # The central subsequence is located at index position c=2, with two + # farthest neighbors, one located at index 0, and the other is located + # at index 4. In both cases, the relative (index-wise) distance is 2, + # which is simply `5 // 2`. In general, it can be shown that the + # (index-wise) distance from the central subsequence to its farthest + # neighbor is `l // 2`. + + # If `l` is even + # Suppose `l == 6`. So, the start index of the subsequences + # are: 0, 1, 2, 3, 4, 5 + # There are two central-most subsequences, located at the index + # positions c=2 and c=3. For the central-most subsequence at index + # position c=2, its farthest neighbor will be located at index 5 (to the + # right of c=2) and, for the central-most subsequence at index position + # c=3, its farthest neighbor will be located at index 0 (to the left of + # c=3). In both cases, each central subsequence and its farthest neighbor + # are 3 indices away. In general, it can be shown that the (index-wise) + # distance from the central subsequence to its farthest neighbor is `l // 2`. # Therefore, regardless if `l` is even or odd, for the central # subsequence for any time series, the index location of its From 32a15f3a3d8cceb59537be301909b2af48347675 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 4 Apr 2025 23:39:56 -0400 Subject: [PATCH 34/39] improve comment --- stumpy/mstump.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stumpy/mstump.py b/stumpy/mstump.py index ab35929dc..bc1e0a1ae 100644 --- a/stumpy/mstump.py +++ b/stumpy/mstump.py @@ -1228,8 +1228,8 @@ def mstump( err = f"T is {T_A.ndim}-dimensional and must be at least 1-dimensional" raise ValueError(f"{err}") - # mstump currently supports self-join. Therefore, the argument `n` should be - # passed to the function `core.check_window_size`. + # mstump currently supports self-join. Therefore, the argument `n=T_A.shape[1]` + # should be passed to the function `core.check_window_size`. core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]), n=T_A.shape[1]) if include is not None: From 90d3901a2b36d9dc2841308ad8631590e5fcc12c Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 4 Apr 2025 23:41:52 -0400 Subject: [PATCH 35/39] improve description of function --- stumpy/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 12357340e..d4dac3004 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -558,8 +558,8 @@ def check_window_size(m, max_size=None, n=None): """ Check the window size and ensure that it is greater than or equal to 3 and, if `max_size` is provided, ensure that the window size is less than or equal to the - `max_size`. Furthermore, if `n` is provided, then it checks whether all - subsequences have at least one non-trivial neighbor in a self-join. + `max_size`. Furthermore, if `n` is provided, then a self-join is assumed and it + checks whether all subsequences have at least one non-trivial neighbor. Parameters ---------- From 17e2db9d8b38b4c69dd5136215ce3cd7bcfc7e6f Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 4 Apr 2025 23:43:18 -0400 Subject: [PATCH 36/39] minor change --- stumpy/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stumpy/core.py b/stumpy/core.py index d4dac3004..096b6e9db 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -607,7 +607,7 @@ def check_window_size(m, max_size=None, n=None): # verify whether the `excl_zone` eliminates the "neighbor" that is farthest # away (index-wise) from the central-most subsequence. If it does not, this # implies that all subsequences in `T` will have at least one "eligible nearest - # neighbors" outside their respective `excl_zone` + # neighbor" that is located outside of their respective excl_zone. excl_zone = int(math.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) From 0a25af11ffbe10206859ff0a813e3cdf69f6e6bc Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sat, 5 Apr 2025 00:16:49 -0400 Subject: [PATCH 37/39] improve readability and consistency --- stumpy/aampi.py | 2 +- stumpy/core.py | 6 +++--- stumpy/maamp.py | 4 ++-- stumpy/mstump.py | 4 ++-- stumpy/stumpi.py | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/stumpy/aampi.py b/stumpy/aampi.py index e5c2ee8a1..7674243e5 100644 --- a/stumpy/aampi.py +++ b/stumpy/aampi.py @@ -111,7 +111,7 @@ def __init__(self, T, m, egress=True, p=2.0, k=1, mp=None): computed internally using `stumpy.aamp`. """ self._T = core._preprocess(T) - core.check_window_size(m, max_size=self._T.shape[-1]) + core.check_window_size(m, max_size=self._T.shape[0]) self._m = m self._n = self._T.shape[0] self._excl_zone = int(np.ceil(self._m / config.STUMPY_EXCL_ZONE_DENOM)) diff --git a/stumpy/core.py b/stumpy/core.py index 096b6e9db..09a390c25 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -1412,7 +1412,7 @@ def mass_absolute(Q, T, T_subseq_isfinite=None, p=2.0, query_idx=None): raise ValueError(f"`Q` is {Q.ndim}-dimensional and must be 1-dimensional. ") Q_isfinite = np.isfinite(Q) - check_window_size(m, max_size=Q.shape[-1]) + check_window_size(m, max_size=Q.shape[0]) if query_idx is not None: # pragma: no cover query_idx = int(query_idx) @@ -1759,7 +1759,7 @@ def mass( raise ValueError(f"Q is {Q.ndim}-dimensional and must be 1-dimensional. ") Q_isfinite = np.isfinite(Q) - check_window_size(m, max_size=Q.shape[-1]) + check_window_size(m, max_size=Q.shape[0]) if query_idx is not None: query_idx = int(query_idx) @@ -1984,7 +1984,7 @@ def mass_distance_matrix( T_subseq_isconstant=T_subseq_isconstant, ) - check_window_size(m, max_size=min(Q.shape[-1], T.shape[-1])) + check_window_size(m, max_size=min(Q.shape[0], T.shape[0])) return _mass_distance_matrix( Q, diff --git a/stumpy/maamp.py b/stumpy/maamp.py index 6eb5a823d..c30deab6f 100644 --- a/stumpy/maamp.py +++ b/stumpy/maamp.py @@ -140,7 +140,7 @@ def maamp_subspace( returned. """ T = core._preprocess(T) - core.check_window_size(m, max_size=T.shape[-1], n=T.shape[-1]) + core.check_window_size(m, max_size=T.shape[1], n=T.shape[1]) subseqs, _ = core.preprocess_non_normalized(T[:, subseq_idx : subseq_idx + m], m) neighbors, _ = core.preprocess_non_normalized(T[:, nn_idx : nn_idx + m], m) @@ -269,7 +269,7 @@ def maamp_mdl( A list of numpy.ndarrays that contains the `k`th-dimensional subspaces """ T = core._preprocess(T) - core.check_window_size(m, max_size=T.shape[-1], n=T.shape[-1]) + core.check_window_size(m, max_size=T.shape[1], n=T.shape[1]) if discretize_func is None: T_isfinite = np.isfinite(T) diff --git a/stumpy/mstump.py b/stumpy/mstump.py index bc1e0a1ae..89411b981 100644 --- a/stumpy/mstump.py +++ b/stumpy/mstump.py @@ -217,7 +217,7 @@ def subspace( array([0, 1]) """ T = core._preprocess(T) - core.check_window_size(m, max_size=T.shape[-1], n=T.shape[-1]) + core.check_window_size(m, max_size=T.shape[1], n=T.shape[1]) T_subseq_isconstant = core.process_isconstant(T, m, T_subseq_isconstant) if discretize_func is None: @@ -409,7 +409,7 @@ def mdl( (array([ 80. , 111.509775]), [array([1]), array([0, 1])]) """ T = core._preprocess(T) - core.check_window_size(m, max_size=T.shape[-1], n=T.shape[-1]) + core.check_window_size(m, max_size=T.shape[1], n=T.shape[1]) T_subseq_isconstant = core.process_isconstant(T, m, T_subseq_isconstant) if discretize_func is None: diff --git a/stumpy/stumpi.py b/stumpy/stumpi.py index feb8cb2af..bf6d40661 100644 --- a/stumpy/stumpi.py +++ b/stumpy/stumpi.py @@ -179,7 +179,7 @@ def __init__( array. """ self._T = core._preprocess(T) - core.check_window_size(m, max_size=self._T.shape[-1]) + core.check_window_size(m, max_size=self._T.shape[0]) self._m = m self._k = k From e29cdca03155a3cbdcdd4e76c8d7457ac0df79f9 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sat, 5 Apr 2025 00:21:43 -0400 Subject: [PATCH 38/39] minor change --- stumpy/core.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 09a390c25..d260254fa 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -632,9 +632,10 @@ def check_window_size(m, max_size=None, n=None): # position c=2, its farthest neighbor will be located at index 5 (to the # right of c=2) and, for the central-most subsequence at index position # c=3, its farthest neighbor will be located at index 0 (to the left of - # c=3). In both cases, each central subsequence and its farthest neighbor - # are 3 indices away. In general, it can be shown that the (index-wise) - # distance from the central subsequence to its farthest neighbor is `l // 2`. + # c=3). In both cases, the relative (index-wise) distance is 3, + # which is simply `6 // 2`. In general, it can be shown that the + # (index-wise) distance from the central-most subsequence to its + # farthest neighbor is `l // 2`. # Therefore, regardless if `l` is even or odd, for the central # subsequence for any time series, the index location of its From 949db7eb1fe7479c819568e61cb83182729d634c Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sat, 5 Apr 2025 11:59:32 -0400 Subject: [PATCH 39/39] minor changes --- stumpy/core.py | 8 ++++---- stumpy/mstump.py | 4 ++-- stumpy/mstumped.py | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index d260254fa..ea9b93afd 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -557,9 +557,9 @@ def get_max_window_size(n): def check_window_size(m, max_size=None, n=None): """ Check the window size and ensure that it is greater than or equal to 3 and, if - `max_size` is provided, ensure that the window size is less than or equal to the - `max_size`. Furthermore, if `n` is provided, then a self-join is assumed and it - checks whether all subsequences have at least one non-trivial neighbor. + ``max_size`` is provided, ensure that the window size is less than or equal to + the ``max_size``. Furthermore, if ``n`` is provided, then a self-join is assumed + and it checks whether all subsequences have at least one non-trivial neighbor. Parameters ---------- @@ -571,7 +571,7 @@ def check_window_size(m, max_size=None, n=None): n : int, default None The length of the time series in the case of a self-join. - `n` should be set to `None` in the case of an `AB-join`. + ``n`` should not be supplied (or set to ``None``) in the case of an AB-join. Returns ------- diff --git a/stumpy/mstump.py b/stumpy/mstump.py index 89411b981..6ea97edb7 100644 --- a/stumpy/mstump.py +++ b/stumpy/mstump.py @@ -1228,8 +1228,8 @@ def mstump( err = f"T is {T_A.ndim}-dimensional and must be at least 1-dimensional" raise ValueError(f"{err}") - # mstump currently supports self-join. Therefore, the argument `n=T_A.shape[1]` - # should be passed to the function `core.check_window_size`. + # mstump currently only supports self-join. Therefore, the argument `n=T_A.shape[1]` + # must be passed to the function `core.check_window_size`. core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]), n=T_A.shape[1]) if include is not None: diff --git a/stumpy/mstumped.py b/stumpy/mstumped.py index 29a9f4bc2..d8ce4c3a5 100644 --- a/stumpy/mstumped.py +++ b/stumpy/mstumped.py @@ -505,8 +505,8 @@ def mstumped( err = f"T is {T_A.ndim}-dimensional and must be at least 1-dimensional" raise ValueError(f"{err}") - # mstump currently supports self-join. Therefore, the argument `n` should be - # passed to the function `core.check_window_size`. + # mstump currently only supports self-join. Therefore, the argument `n=T_A.shape[1]` + # must be passed to the function `core.check_window_size`. core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]), n=T_A.shape[1]) if include is not None: