statice · MatteoGiomi · Sep 25, 2025 · May 22, 2025 · May 22, 2025 · May 22, 2025
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -14,11 +14,8 @@ jobs:
       fail-fast: false
       matrix:
         # We test only the minimum and the maximum supported versions of python
-        python-version: ["3.8", "3.11"]
+        python-version: ["3.9", "3.12"]
         pandas-version: ["1.4", "2.1"]
-        exclude:
-        - python-version: "3.8"
-          pandas-version: "2.1"
 
     steps:
     - uses: actions/checkout@v2

diff --git a/README.md b/README.md
@@ -37,7 +37,7 @@ description of the framework and the attack algorithms can be found in the paper
 
 ## Setup and installation
 
-`Anonymeter` requires Python 3.8.x, 3.9.x or 3.10.x installed. The simplest way to install `Anonymeter` is from `PyPi`. Simply run
+`Anonymeter` supports Python from 3.9 to 3.12. The simplest way to install `Anonymeter` is from `PyPi`. Simply run
 
 ```
 pip install anonymeter

diff --git a/pyproject.toml b/pyproject.toml
@@ -13,7 +13,7 @@ authors = [
 ]
 description = "Measure singling out, linkability, and inference risk for synthetic data."
 readme = "README.md"
-requires-python = "<3.12, >3.7" # limited by Numba support
+requires-python = "<3.13, >=3.9"
 license = {file = "LICENSE.md"}
 classifiers = [
     "Programming Language :: Python :: 3",
@@ -23,10 +23,10 @@ classifiers = [
 
 dependencies = [
     "scikit-learn~=1.2",
-    "numpy >=1.22, <1.27", # limited by Numba support
+    "numpy >=1.22, <1.27", # capped to supprt pandas ~1.4
     "pandas>=1.4",
     "joblib~=1.2",
-    "numba~=0.58",
+    "numba~=0.59",
     "polars>=1.8",
 ]
 

diff --git a/src/anonymeter/evaluators/inference_evaluator.py b/src/anonymeter/evaluators/inference_evaluator.py
@@ -3,7 +3,7 @@
 # See https://github.com/statice/anonymeter/blob/main/LICENSE.md for details.
 """Privacy evaluator that measures the inference risk."""
 
-from typing import List, Optional
+from typing import Optional
 
 import numpy as np
 import numpy.typing as npt
@@ -17,7 +17,7 @@ def _run_attack(
     target: pd.DataFrame,
     syn: pd.DataFrame,
     n_attacks: int,
-    aux_cols: List[str],
+    aux_cols: list[str],
     secret: str,
     n_jobs: int,
     naive: bool,
@@ -149,7 +149,7 @@ def __init__(
         self,
         ori: pd.DataFrame,
         syn: pd.DataFrame,
-        aux_cols: List[str],
+        aux_cols: list[str],
         secret: str,
         regression: Optional[bool] = None,
         n_attacks: int = 500,

diff --git a/src/anonymeter/evaluators/linkability_evaluator.py b/src/anonymeter/evaluators/linkability_evaluator.py
@@ -3,7 +3,7 @@
 # See https://github.com/statice/anonymeter/blob/main/LICENSE.md for details.
 """Privacy evaluator that measures the linkability risk."""
 import logging
-from typing import Dict, List, Optional, Set, Tuple, cast
+from typing import Optional, cast
 
 import numpy as np
 import numpy.typing as npt
@@ -37,7 +37,7 @@ def __init__(self, idx_0: npt.NDArray, idx_1: npt.NDArray):
         self._idx_0 = idx_0
         self._idx_1 = idx_1
 
-    def find_links(self, n_neighbors: int) -> Dict[int, Set[int]]:
+    def find_links(self, n_neighbors: int) -> dict[int, set[int]]:
         """Return synthetic records that link originals in the split datasets.
 
         Parameters
@@ -86,9 +86,9 @@ def count_links(self, n_neighbors: int) -> int:
         return _count_links(links)
 
 
-def _count_links(links: Dict[int, Set[int]]) -> int:
+def _count_links(links: dict[int, set[int]]) -> int:
     """Count links."""
-    linkable: Set[int] = set()
+    linkable: set[int] = set()
 
     for ori_idx in links:
         linkable = linkable | {ori_idx}
@@ -127,7 +127,7 @@ def _linkability_attack(
     ori: pd.DataFrame,
     syn: pd.DataFrame,
     n_attacks: int,
-    aux_cols: Tuple[List[str], List[str]],
+    aux_cols: tuple[list[str], list[str]],
     n_neighbors: int,
     n_jobs: int,
 ) -> LinkabilityIndexes:
@@ -185,7 +185,7 @@ def __init__(
         self,
         ori: pd.DataFrame,
         syn: pd.DataFrame,
-        aux_cols: Tuple[List[str], List[str]],
+        aux_cols: tuple[list[str], list[str]],
         n_attacks: Optional[int] = 500,
         n_neighbors: int = 1,
         control: Optional[pd.DataFrame] = None,

diff --git a/src/anonymeter/evaluators/singling_out_evaluator.py b/src/anonymeter/evaluators/singling_out_evaluator.py
@@ -5,8 +5,9 @@
 
 import logging
 import operator
+from collections.abc import Sequence
 from functools import reduce
-from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Tuple, Union, cast
+from typing import Any, Callable, Optional, Union, cast
 
 import numpy as np
 import numpy.typing as npt
@@ -26,7 +27,7 @@ def _escape_quotes(string: str) -> str:
 def _query_from_record(
     record: dict,
     dtypes: dict,  # map col -> pl.DataType
-    columns: List[str],
+    columns: list[str],
     medians: dict,  # map col -> median value
     rng: np.random.Generator,
 ) -> pl.Expr:
@@ -92,9 +93,9 @@ def _random_operator(
 
 
 def _random_query(
-    unique_values: Dict[str, List[Any]],
-    cols: List[str],
-    column_types: Dict[str, str],
+    unique_values: dict[str, list[Any]],
+    cols: list[str],
+    column_types: dict[str, str],
     rng: np.random.Generator,
 ) -> pl.Expr:
     exprs = []
@@ -140,7 +141,7 @@ def _random_queries(
     n_queries: int,
     n_cols: int,
     rng: np.random.Generator,
-) -> List[pl.Expr]:
+) -> list[pl.Expr]:
     unique_values = {col: df[col].unique().to_list() for col in df.columns}
     column_types = {
         col: _convert_polars_dtype(df[col].dtype)
@@ -212,8 +213,8 @@ def singling_out_probability_integral(
 
 
 def _measure_queries_success(
-    df: pl.DataFrame, queries: List[pl.Expr], n_repeat: int, n_meas: int
-) -> Tuple[npt.NDArray, npt.NDArray]:
+    df: pl.DataFrame, queries: list[pl.Expr], n_repeat: int, n_meas: int
+) -> tuple[npt.NDArray, npt.NDArray]:
     sizes, successes = [], []
     min_rows = min(1000, len(df))
 
@@ -247,7 +248,7 @@ def _fit_model(sizes: npt.NDArray, successes: npt.NDArray) -> Callable:
     return lambda x: _model(x, *popt)
 
 
-def fit_correction_term(df: pl.DataFrame, queries: List[pl.Expr]) -> Callable:
+def fit_correction_term(df: pl.DataFrame, queries: list[pl.Expr]) -> Callable:
     """Fit correction for different size of the control dataset.
 
     Parameters
@@ -280,11 +281,11 @@ class UniqueSinglingOutQueries:
     """
 
     def __init__(self, max_size: Optional[int] = None):
-        self._set: Set[str] = set()
-        self._list: List[pl.Expr] = []
+        self._set: set[str] = set()
+        self._list: list[pl.Expr] = []
         self._max_size: Optional[int] = max_size
 
-    def check_and_extend(self, queries: List[pl.Expr], df: pl.DataFrame):
+    def check_and_extend(self, queries: list[pl.Expr], df: pl.DataFrame):
         """Add singling-out queries to the collection.
 
         Only queries that are not already in this collection can be added.
@@ -317,14 +318,14 @@ def __len__(self):
         return len(self._list)
 
     @property
-    def queries(self) -> List[pl.Expr]:
+    def queries(self) -> list[pl.Expr]:
         """Queries that are present in the collection."""
         return self._list
 
 
 def univariate_singling_out_queries(
     df: pl.DataFrame, n_queries: int, rng: np.random.Generator
-) -> List[pl.Expr]:
+) -> list[pl.Expr]:
     """Generate singling out queries from rare attributes.
 
     Parameters
@@ -388,7 +389,7 @@ def multivariate_singling_out_queries(
     max_attempts: Optional[int],
     rng: np.random.Generator,
     batch_size: int = 1000,
-) -> List[pl.Expr]:
+) -> list[pl.Expr]:
     """Generates singling out queries from a combination of attributes.
 
     Parameters
@@ -478,8 +479,8 @@ def multivariate_singling_out_queries(
 
 
 def _evaluate_queries(
-    df: pl.DataFrame, queries: List[pl.Expr]
-) -> Tuple[int, ...]:
+    df: pl.DataFrame, queries: list[pl.Expr]
+) -> tuple[int, ...]:
     if len(queries) == 0:
         return ()
 
@@ -494,8 +495,8 @@ def _evaluate_queries(
 
 
 def _evaluate_queries_and_return_successful(
-    df: pl.DataFrame, queries: List[pl.Expr]
-) -> List[pl.Expr]:
+    df: pl.DataFrame, queries: list[pl.Expr]
+) -> list[pl.Expr]:
     counts = _evaluate_queries(df=df, queries=queries)
 
     counts_np = np.array(counts, dtype=float)
@@ -517,7 +518,7 @@ def _generate_singling_out_queries(
     n_cols: int,
     max_attempts: Optional[int],
     rng: np.random.Generator,
-) -> List[pl.Expr]:
+) -> list[pl.Expr]:
     if mode == "univariate":
         queries = univariate_singling_out_queries(
             df=df, n_queries=n_attacks, rng=rng
@@ -615,12 +616,12 @@ def __init__(
             control = pl.DataFrame(control)
             self._control = control.unique(maintain_order=True)
         self._max_attempts = max_attempts
-        self._queries: List[pl.Expr] = []
-        self._random_queries: List[pl.Expr] = []
+        self._queries: list[pl.Expr] = []
+        self._random_queries: list[pl.Expr] = []
         self._evaluated = False
         self._rng = np.random.default_rng() if seed is None else np.random.default_rng(seed)
 
-    def queries(self, baseline: bool = False) -> List[pl.Expr]:
+    def queries(self, baseline: bool = False) -> list[pl.Expr]:
         """Successful singling out queries.
 
         Parameters

diff --git a/src/anonymeter/neighbors/mixed_types_kneighbors.py b/src/anonymeter/neighbors/mixed_types_kneighbors.py
@@ -4,7 +4,7 @@
 """Nearest neighbor search for mixed type data."""
 import logging
 from math import fabs, isnan
-from typing import Dict, List, Optional, Tuple, Union
+from typing import Optional, Union
 
 import numpy as np
 import numpy.typing as npt
@@ -76,7 +76,7 @@ def gower_distance(r0: npt.NDArray, r1: npt.NDArray, cat_cols_index: int) -> flo
 @jit(nopython=True, nogil=True)
 def _nearest_neighbors(
     queries: npt.NDArray, candidates: npt.NDArray, cat_cols_index: int, n_neighbors: int
-) -> Tuple[npt.NDArray[np.int64], npt.NDArray[np.float64]]:
+) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.float64]]:
     r"""For every element of ``queries``, find its nearest neighbors in ``candidates``.
 
     Parameters
@@ -147,7 +147,7 @@ def __init__(self, n_neighbors: int = 5, n_jobs: int = -2):
         self._n_neighbors = n_neighbors
         self._n_jobs = n_jobs
 
-    def fit(self, candidates: pd.DataFrame, ctypes: Optional[Dict[str, List[str]]] = None):
+    def fit(self, candidates: pd.DataFrame, ctypes: Optional[dict[str, list[str]]] = None):
         """Prepare for nearest neighbor search.
 
         Parameters
@@ -167,7 +167,7 @@ def fit(self, candidates: pd.DataFrame, ctypes: Optional[Dict[str, List[str]]] =
 
     def kneighbors(
         self, queries: pd.DataFrame, n_neighbors: Optional[int] = None, return_distance: bool = False
-    ) -> Union[Tuple[npt.NDArray, npt.NDArray], npt.NDArray]:
+    ) -> Union[tuple[npt.NDArray, npt.NDArray], npt.NDArray]:
         """Find the nearest neighbors for a set of query points.
 
         Note

diff --git a/src/anonymeter/preprocessing/transformations.py b/src/anonymeter/preprocessing/transformations.py
@@ -3,7 +3,6 @@
 # See https://github.com/statice/anonymeter/blob/main/LICENSE.md for details.
 """Data pre-processing and transformations for the privacy evaluators."""
 import logging
-from typing import List, Tuple
 
 import pandas as pd
 from sklearn.preprocessing import LabelEncoder
@@ -14,7 +13,7 @@
 def _encode_categorical(
     df1: pd.DataFrame,
     df2: pd.DataFrame,
-) -> Tuple[pd.DataFrame, pd.DataFrame]:
+) -> tuple[pd.DataFrame, pd.DataFrame]:
     """Encode dataframes with categorical values keeping label consistend."""
     encoded = pd.concat((df1, df2), keys=["df1", "df2"])
 
@@ -24,7 +23,7 @@ def _encode_categorical(
     return encoded.loc["df1"], encoded.loc["df2"]
 
 
-def _scale_numerical(df1: pd.DataFrame, df2: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
+def _scale_numerical(df1: pd.DataFrame, df2: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
     """Scale dataframes with *only* numerical values."""
     df1_min, df1_max = df1.min(), df1.max()
     df2_min, df2_max = df2.min(), df2.max()
@@ -50,8 +49,8 @@ def _scale_numerical(df1: pd.DataFrame, df2: pd.DataFrame) -> Tuple[pd.DataFrame
 
 
 def mixed_types_transform(
-    df1: pd.DataFrame, df2: pd.DataFrame, num_cols: List[str], cat_cols: List[str]
-) -> Tuple[pd.DataFrame, pd.DataFrame]:
+    df1: pd.DataFrame, df2: pd.DataFrame, num_cols: list[str], cat_cols: list[str]
+) -> tuple[pd.DataFrame, pd.DataFrame]:
     """Combination of an encoder and a scaler to treat mixed type data.
 
     Numerical columns are scaled by dividing them by their range across both

diff --git a/src/anonymeter/preprocessing/type_detection.py b/src/anonymeter/preprocessing/type_detection.py
@@ -1,12 +1,11 @@
 # This file is part of Anonymeter and is released under BSD 3-Clause Clear License.
 # Copyright (c) 2022 Anonos IP LLC.
 # See https://github.com/statice/anonymeter/blob/main/LICENSE.md for details.
-from typing import Dict, List
 
 import pandas as pd
 
 
-def detect_col_types(df: pd.DataFrame) -> Dict[str, List[str]]:
+def detect_col_types(df: pd.DataFrame) -> dict[str, list[str]]:
     """Identify numerical and non-numerical columns in the dataframe.
 
     Parameters
@@ -21,8 +20,8 @@ def detect_col_types(df: pd.DataFrame) -> Dict[str, List[str]]:
         Values are lists of column names.
 
     """
-    num_cols: List[str] = list(df.select_dtypes("number").columns.values)
-    cat_cols: List[str] = [cn for cn in df.columns.values if cn not in num_cols]
+    num_cols: list[str] = list(df.select_dtypes("number").columns.values)
+    cat_cols: list[str] = [cn for cn in df.columns.values if cn not in num_cols]
 
     return {"num": sorted(num_cols), "cat": sorted(cat_cols)}
 

diff --git a/src/anonymeter/stats/confidence.py b/src/anonymeter/stats/confidence.py
@@ -5,7 +5,7 @@
 
 import warnings
 from math import sqrt
-from typing import NamedTuple, Optional, Tuple
+from typing import NamedTuple, Optional
 
 from scipy.stats import norm
 
@@ -23,7 +23,7 @@ class PrivacyRisk(NamedTuple):
     """
 
     value: float
-    ci: Tuple[float, float]
+    ci: tuple[float, float]
 
 
 class SuccessRate(NamedTuple):

diff --git a/tests/test_inference_evaluator.py b/tests/test_inference_evaluator.py
@@ -1,7 +1,7 @@
 # This file is part of Anonymeter and is released under BSD 3-Clause Clear License.
 # Copyright (c) 2022 Anonos IP LLC.
 # See https://github.com/statice/anonymeter/blob/main/LICENSE.md for details.
-from typing import Iterable
+from collections.abc import Iterable
 
 import numpy as np
 import pandas as pd