diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 39480995..4dda9426 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -16,6 +16,8 @@
 		// For web display
 		"ghcr.io/devcontainers/features/node:1": {},
 		// For scripting
-		"ghcr.io/va-h/devcontainers-features/uv:1": {}
+		"ghcr.io/va-h/devcontainers-features/uv:1": {},
+		// For paxtools
+		"ghcr.io/devcontainers/features/java:1": {}
 	}
 }
diff --git a/cache/__init__.py b/cache/__init__.py
index 2f15fe4d..900e94cf 100644
--- a/cache/__init__.py
+++ b/cache/__init__.py
@@ -67,9 +67,9 @@ def link(output: str, directive: list[str], uncompress=False):
 
     Path(output).unlink(missing_ok=True)
 
-    # Re-download if the directive has expired.
+    # Re-download if the directive has expired / the artifact mysteriously disappeared.
     cache_item = get_cache_item(directive)
-    if has_expired(directive):
+    if has_expired(directive) or not (artifacts_dir / artifact_name).exists():
         (artifacts_dir / artifact_name).unlink(missing_ok=True)
         cache_item.download(artifacts_dir / artifact_name)
 
diff --git a/cache/directory.py b/cache/directory.py
index b308f1b5..70f0a827 100644
--- a/cache/directory.py
+++ b/cache/directory.py
@@ -2,26 +2,61 @@
 from typing import Union
 from os import PathLike
 from tempfile import NamedTemporaryFile
-from typing import Optional
-import urllib.request
+from typing import Optional, Mapping
 import filecmp
-import urllib.parse
-import os
 from pathlib import Path
+from enum import Enum
+import warnings
+import requests
+import shutil
+import urllib.parse
 
 import gdown
 
-dir_path = Path(os.path.dirname(os.path.realpath(__file__)))
+dir_path = Path(__file__).parent.resolve()
+
+@dataclass
+class Service:
+    url: str
+    headers: Optional[Mapping[str, str]] = None
 
+    def download(self, output: str | PathLike) -> requests.Response:
+        """
+        Downloads a URL, returning the response (to be used with `with`) and modifying the output path.
+        """
+        # As per https://stackoverflow.com/a/39217788/7589775 to enable download streaming.
+        with requests.get(self.url, stream=True, headers=self.headers) as response:
+            response.raw.decode_content = True
+            with open(output, 'wb') as f:
+                shutil.copyfileobj(response.raw, f)
+            return response
 
-def fetch_biomart_url(xml: str) -> str:
+def fetch_biomart_service(xml: str) -> Service:
     """
     Access BioMart data through the BioMart REST API:
     https://useast.ensembl.org/info/data/biomart/biomart_restful.html#biomartxml
     """
     ROOT = "http://www.ensembl.org/biomart/martservice?query="
-    return ROOT + urllib.parse.quote_plus(xml)
+    return Service(ROOT + urllib.parse.quote_plus(xml))
 
+class OnlineStatus(Enum):
+    ONLINE = 1
+    """
+    Services that are always online. If these fail, we fail the workflow and
+    log this.
+    """
+
+    INTERMITTENT_ERROR_CODE = 2
+    """
+    Services that error often (not go down!)
+    these will be logged when they fail, but we continue with the cached option.
+    """
+
+    # (we choose to do this over arbitrary lambdas because its nicer. For now.)
+    INTERMITTENT_HTML = 3
+    """
+    Like INTERMITTENT_ERROR_CODE, but errors when HTML is returned.
+    """
 
 @dataclass
 class CacheItem:
@@ -35,41 +70,46 @@ class CacheItem:
     name: str
     """The display name of the artifact, used for human-printing."""
     cached: str
-    online: str
-    online_headers: Optional[list[tuple[str, str]]] = None
+    online: Optional[Service] = None
+    status: OnlineStatus = OnlineStatus.ONLINE
+    """How much to care about errors from downloading the online file."""
 
     @classmethod
+    @warnings.deprecated("Pending for removal after the CONTRIBUTING guide is updated.")
     def cache_only(cls, name: str, cached: str) -> "CacheItem":
         """Wrapper method to explicitly declare a CacheItem as cached only."""
-        return cls(name=name, online=cached, cached="")
-
-    def download_online(self, output: str | PathLike):
-        # https://stackoverflow.com/a/45313194/7589775: this is to add optional headers to requests.
-        # We remove the opener at the end by re-installing the default opener.
-        opener = urllib.request.build_opener()
-        if self.online_headers:
-            opener.addheaders = self.online_headers
-        urllib.request.install_opener(opener)
-        urllib.request.urlretrieve(self.online, output)
-        urllib.request.install_opener(urllib.request.build_opener())
+        return cls(name=name, cached=cached, online=None)
 
     def download(self, output: str | PathLike):
         print(f"Fetching {self.name}...")
-        print(f"Downloading {self.online}...")
-
-        if self.cached == "":
-            # From CacheItem.cached_only
-            # (gdown doesn't take in Paths for the output_file, so we must stringify it here)
-            gdown.download(self.online, str(output))
-            return
-
-        self.download_online(output)
 
         with NamedTemporaryFile() as cached_file:
             print(f"Downloading cache {self.cached}...")
             gdown.download(self.cached, cached_file)
-            print("Checking that downloaded artifact matches with cached artifact...")
-            filecmp.cmp(output, cached_file.name)
+
+            if self.online is None:
+                return
+
+            print(f"Downloading {self.online}...")
+            with self.online.download(output) as response:
+
+                print("Checking that downloaded artifact matches with cached artifact...")
+                if filecmp.cmp(output, cached_file.name):
+                    return # It does!
+
+                # For debug purposes, we allow the output artifact to be viewed in some kind of temporary folder.
+                debug_file_path = Path(NamedTemporaryFile(prefix="spras-benchmarking-debug-artifact", delete=False).name)
+                # (and we pedantically use this over Path#rename since temporary directories can be mounted to a different file system.)
+                shutil.move(output, debug_file_path)
+                if (self.status == OnlineStatus.INTERMITTENT_ERROR_CODE and not response.ok) \
+                    or (self.status == OnlineStatus.INTERMITTENT_HTML and Path(debug_file_path).read_text().strip().startswith("<!DOCTYPE html>")):
+                    warnings.warn(f"Online url {self.online} erroring with status code {response.status_code}. " \
+                                  f"See {debug_file_path} for the online output. Using the cached file instead...")
+                    # Back up to the cached_file
+                    shutil.move(cached_file.name, output)
+                else:
+                    raise RuntimeError(f"Cached and online files did not match with status code {response.status_code}! " \
+                                       f"See {debug_file_path} for the online output.")
 
 
 CacheDirectory = dict[str, Union[CacheItem, "CacheDirectory"]]
@@ -78,15 +118,15 @@ def download(self, output: str | PathLike):
 directory: CacheDirectory = {
     "STRING": {
         "9606": {
-            "9606.protein.links.txt.gz": CacheItem(
-                name="STRING 9606 protein links",
-                cached="https://drive.google.com/uc?id=1fvjdIbgzbgJrdJxWRRRwwS1zuegf6DOj",
-                online="http://stringdb-downloads.org/download/protein.links.v12.0/9606.protein.links.v12.0.txt.gz",
+            "9606.protein.links.full.txt.gz": CacheItem(
+                name="STRING 9606 full links",
+                cached="https://drive.google.com/uc?id=13tE_-A6g7McZs_lZGz9As7iE-5cBFvqE",
+                online=Service("http://stringdb-downloads.org/download/protein.links.full.v12.0/9606.protein.links.full.v12.0.txt.gz"),
             ),
             "9606.protein.aliases.txt.gz": CacheItem(
                 name="STRING 9606 protein aliases",
                 cached="https://drive.google.com/uc?id=1IWrQeTVCcw1A-jDk-4YiReWLnwP0S9bY",
-                online="https://stringdb-downloads.org/download/protein.aliases.v12.0/9606.protein.aliases.v12.0.txt.gz",
+                online=Service("https://stringdb-downloads.org/download/protein.aliases.v12.0/9606.protein.aliases.v12.0.txt.gz"),
             ),
         }
     },
@@ -98,19 +138,19 @@ def download(self, output: str | PathLike):
             "SwissProt_9606.tsv": CacheItem(
                 name="UniProt 9606 SwissProt genes",
                 cached="https://drive.google.com/uc?id=1h2Cl-60qcKse-djcsqlRXm_n60mVY7lk",
-                online="https://rest.uniprot.org/uniprotkb/stream?fields=accession%2Cid%2Cprotein_name%2Cgene_names&format=tsv&query=%28*%29+AND+%28reviewed%3Atrue%29+AND+%28model_organism%3A9606%29",
+                online=Service("https://rest.uniprot.org/uniprotkb/stream?fields=accession%2Cid%2Cprotein_name%2Cgene_names&format=tsv&query=%28*%29+AND+%28reviewed%3Atrue%29+AND+%28model_organism%3A9606%29"),
             ),
             # idmapping FTP files. See the associated README:
             # https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/README
             "HUMAN_9606_idmapping_selected.tab.gz": CacheItem(
                 name="UniProt 9606 ID external database mapping",
                 cached="https://drive.google.com/uc?id=1Oysa5COq31H771rVeyrs-6KFhE3VJqoX",
-                online="https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/HUMAN_9606_idmapping_selected.tab.gz",
+                online=Service("https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/HUMAN_9606_idmapping_selected.tab.gz"),
             ),
             "HUMAN_9606_idmapping.dat.gz": CacheItem(
                 name="UniProt 9606 internal id mapping",
                 cached="https://drive.google.com/uc?id=1lGxrx_kGyNdupwIOUXzfIZScc7rQKP-O",
-                online="https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/HUMAN_9606_idmapping.dat.gz",
+                online=Service("https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/HUMAN_9606_idmapping.dat.gz"),
             ),
         }
     },
@@ -120,56 +160,56 @@ def download(self, output: str | PathLike):
         "tiga_gene-trait_stats.tsv": CacheItem(
             name="TIGA data",
             cached="https://drive.google.com/uc?id=114qyuNDy4qdmYDHHJAW-yBeTxcGTDUnK",
-            online="https://unmtid-dbs.net/download/TIGA/20250916/tiga_gene-trait_stats.tsv",
+            online=Service("https://unmtid-dbs.net/download/TIGA/20250916/tiga_gene-trait_stats.tsv"),
         ),
         "HumanDO.tsv": CacheItem(
             name="Disease ontology data",
             cached="https://drive.google.com/uc?id=1lfB1DGJgrXTxP_50L6gGu_Nq6OyDjiIi",
-            online="https://raw.githubusercontent.com/DiseaseOntology/HumanDiseaseOntology/016a4ec33d1a1508d669650086cd92ccebe138e6/DOreports/HumanDO.tsv",
+            online=Service("https://raw.githubusercontent.com/DiseaseOntology/HumanDiseaseOntology/016a4ec33d1a1508d669650086cd92ccebe138e6/DOreports/HumanDO.tsv"),
         ),
         "human_disease_textmining_filtered.tsv": CacheItem(
             name="DISEASES textmining channel",
             cached="https://drive.google.com/uc?id=1vD8KbT9sk04VEJx9r3_LglCTGYJdhN0D",
-            online="https://download.jensenlab.org/human_disease_textmining_filtered.tsv",
+            online=Service("https://download.jensenlab.org/human_disease_textmining_filtered.tsv"),
         ),
         "human_disease_knowledge_filtered.tsv": CacheItem(
             name="DISEASES knowledge channel",
             cached="https://drive.google.com/uc?id=1qGUnjVwF9-8p5xvp8_6CfVsbMSM_wkld",
-            online="https://download.jensenlab.org/human_disease_knowledge_filtered.tsv",
+            online=Service("https://download.jensenlab.org/human_disease_knowledge_filtered.tsv"),
         ),
     },
     "BioMart": {
         "ensg-ensp.tsv": CacheItem(
             name="BioMart ENSG <-> ENSP mapping",
             cached="https://drive.google.com/uc?id=1-gPrDoluXIGydzWKjWEnW-nWhYu3YkHL",
-            online=fetch_biomart_url((dir_path / "biomart" / "ensg-ensp.xml").read_text()),
+            online=fetch_biomart_service((dir_path / "biomart" / "ensg-ensp.xml").read_text()),
         )
     },
     "DepMap": {
         "OmicsProfiles.csv": CacheItem(
             name="DepMap omics metadata",
             cached="https://drive.google.com/uc?id=1i54aKfO0Ci2QKLTNJnuQ_jgGhH4c9rTL",
-            online="https://depmap.org/portal/download/api/download?file_name=downloads-by-canonical-id%2F2025-05-01-master-mapping-table-28c2.12%2Fpublic_release_date.2025-05-01.master_mapping_table.csv&dl_name=OmicsProfiles.csv&bucket=depmap-external-downloads",
+            online=Service("https://depmap.org/portal/download/api/download?file_name=downloads-by-canonical-id%2F2025-05-01-master-mapping-table-28c2.12%2Fpublic_release_date.2025-05-01.master_mapping_table.csv&dl_name=OmicsProfiles.csv&bucket=depmap-external-downloads"),
         ),
         "CRISPRGeneDependency.csv": CacheItem(
             name="DepMap gene dependency probability estimates",
             cached="https://drive.google.com/uc?id=122rWNqT_u3M7B_11WYZMtOLiPbBykkaz",
-            online="https://depmap.org/portal/download/api/download?file_name=downloads-by-canonical-id%2F25q2-public-557c.3%2FCRISPRGeneDependency.csv&dl_name=CRISPRGeneDependency.csv&bucket=depmap-external-downloads",
+            online=Service("https://depmap.org/portal/download/api/download?file_name=downloads-by-canonical-id%2F25q2-public-557c.3%2FCRISPRGeneDependency.csv&dl_name=CRISPRGeneDependency.csv&bucket=depmap-external-downloads"),
         ),
         "OmicsSomaticMutationsMatrixDamaging.csv": CacheItem(
             name="DepMap genotyped matrix",
             cached="https://drive.google.com/uc?id=1W7N2H0Qi7NwmTmNChcwa2ZZ4WxAuz-Xh",
-            online="https://depmap.org/portal/download/api/download?file_name=downloads-by-canonical-id%2Fpublic-25q2-c5ef.87%2FOmicsSomaticMutationsMatrixDamaging.csv&dl_name=OmicsSomaticMutationsMatrixDamaging.csv&bucket=depmap-external-downloads",
+            online=Service("https://depmap.org/portal/download/api/download?file_name=downloads-by-canonical-id%2Fpublic-25q2-c5ef.87%2FOmicsSomaticMutationsMatrixDamaging.csv&dl_name=OmicsSomaticMutationsMatrixDamaging.csv&bucket=depmap-external-downloads"),
         ),
         "OmicsExpressionProteinCodingGenesTPMLogp1.csv": CacheItem(
             name="DepMap model-level TPMs",
             cached="https://drive.google.com/uc?id=1P0m88eXJ8GPdru8h9oOcHPeXKU7ljIrP",
-            online="https://depmap.org/portal/download/api/download?file_name=downloads-by-canonical-id%2Fpublic-25q2-c5ef.73%2FOmicsExpressionProteinCodingGenesTPMLogp1.csv&dl_name=OmicsExpressionProteinCodingGenesTPMLogp1.csv&bucket=depmap-external-downloads",
+            online=Service("https://depmap.org/portal/download/api/download?file_name=downloads-by-canonical-id%2Fpublic-25q2-c5ef.73%2FOmicsExpressionProteinCodingGenesTPMLogp1.csv&dl_name=OmicsExpressionProteinCodingGenesTPMLogp1.csv&bucket=depmap-external-downloads"),
         ),
         "OmicsCNGeneWGS.csv": CacheItem(
             name="DepMap gene-level copy number data",
             cached="https://drive.google.com/uc?id=1TPp3cfK7OZUrftucr3fLO-krXSQAA6Ub",
-            online="https://depmap.org/portal/download/api/download?file_name=downloads-by-canonical-id%2Fpublic-25q2-c5ef.104%2FOmicsCNGeneWGS.csv&dl_name=OmicsCNGeneWGS.csv&bucket=depmap-external-downloads",
+            online=Service("https://depmap.org/portal/download/api/download?file_name=downloads-by-canonical-id%2Fpublic-25q2-c5ef.104%2FOmicsCNGeneWGS.csv&dl_name=OmicsCNGeneWGS.csv&bucket=depmap-external-downloads"),
         ),
     },
     "iRefIndex": {
@@ -190,30 +230,166 @@ def download(self, output: str | PathLike):
         # The following files are from https://github.com/gitter-lab/osmotic-stress
         "prizes.txt": CacheItem(
             name="Osmotic Stress Prizes",
-            online="https://raw.githubusercontent.com/gitter-lab/osmotic-stress/refs/heads/master/Input%20Data/prizes.txt",
+            online=Service("https://raw.githubusercontent.com/gitter-lab/osmotic-stress/refs/heads/master/Input%20Data/prizes.txt"),
             cached="https://drive.google.com/uc?id=16WDQs0Vjv6rI12-hbifsbnpH31jMGhJg"
         ),
         "ChasmanNetwork-DirUndir.txt": CacheItem(
             name="Network Input",
-            online="https://raw.githubusercontent.com/gitter-lab/osmotic-stress/refs/heads/master/Input%20Data/ChasmanNetwork-DirUndir.txt",
+            online=Service("https://raw.githubusercontent.com/gitter-lab/osmotic-stress/refs/heads/master/Input%20Data/ChasmanNetwork-DirUndir.txt"),
             cached="https://drive.google.com/uc?id=1qYXPaWcPU72YYME7NaBzD7thYCHRzrLH"
         ),
         "dummy.txt": CacheItem(
             name="Dummy Nodes File",
-            online="https://raw.githubusercontent.com/gitter-lab/osmotic-stress/refs/heads/master/Input%20Data/dummy.txt",
+            online=Service("https://raw.githubusercontent.com/gitter-lab/osmotic-stress/refs/heads/master/Input%20Data/dummy.txt"),
             cached="https://drive.google.com/uc?id=1dsFIhBrIEahggg0JPxw64JwS51pKxoQU"
         ),
         "_edgeFreq.eda ": CacheItem(
             name="Case Study Omics Integrator Edge Frequencies",
-            online="https://raw.githubusercontent.com/gitter-lab/osmotic-stress/refs/heads/master/Notebooks/Forest-TPS/_edgeFreq.eda",
+            online=Service("https://raw.githubusercontent.com/gitter-lab/osmotic-stress/refs/heads/master/Notebooks/Forest-TPS/_edgeFreq.eda"),
             cached="https://drive.google.com/uc?id=1M_rxEzUCo_EVuFyM47OEH2J-4LB3eeCR"
         ),
         "goldStandardUnionDetailed.txt": CacheItem(
             name="Gold Standard Reference Pathways",
-            online="https://raw.githubusercontent.com/gitter-lab/osmotic-stress/refs/heads/master/data/evaluation/goldStandardUnionDetailed.txt",
+            online=Service("https://raw.githubusercontent.com/gitter-lab/osmotic-stress/refs/heads/master/data/evaluation/goldStandardUnionDetailed.txt"),
             cached="https://drive.google.com/uc?id=1-_zF9oKFCNmJbDCC2vq8OM17HJw80s2T"
         ),
     },
+    "Surfaceome": {
+        "table_S3_surfaceome.xlsx": CacheItem(
+            name="Human surfaceome",
+            online=Service("http://wlab.ethz.ch/surfaceome/table_S3_surfaceome.xlsx"),
+            cached="https://docs.google.com/uc?id=1cBXYbDnAJVet0lv3BRrizV5FuqfMbBr0"
+        )
+    },
+    "TranscriptionFactors": {
+        "Homo_sapiens_TF.tsv": CacheItem.cache_only(
+            name="Human transcription factors",
+            # This server has anti-bot protection, so to respect their wishes, we don't download from the server.
+            # The original URL is https://guolab.wchscu.cn/AnimalTFDB4_static/download/TF_list_final/Homo_sapiens_TF,
+            # which is accessible from https://guolab.wchscu.cn/AnimalTFDB4//#/Download -> Homo sapiens
+            # (also under the Internet Archive as of Feb 2nd, 2026. If the original artifact disappears, the drive link below should suffice.)
+            cached="https://drive.google.com/uc?id=1fVi18GpudUlquRPHgUJl3H1jy54gO-uz",
+        )
+    },
+    "PathwayCommons": {
+        # TODO: all of these share the same common URL: can we make this API a little nicer?
+        "PANTHER": {
+            "Apoptosis_signaling_pathway.txt": CacheItem(
+                name="Apoptosis Signaling Pathway",
+                cached="https://drive.google.com/uc?id=1BPcnvqHrGMQeX4oQx2ow3OribgPxzwhG",
+                online=Service("https://www.pathwaycommons.org/pc2/get?format=TXT&uri=https%3A%2F%2Fidentifiers.org%2Fpanther.pathway%3AP00006"),
+                status=OnlineStatus.INTERMITTENT_HTML
+            ),
+            "B_cell_activation.txt": CacheItem(
+                name="B cell activation",
+                cached="https://drive.google.com/uc?id=1iWcb5AfdobGncRB6xQ6T5qunXzb6Gxd-",
+                online=Service("https://www.pathwaycommons.org/pc2/get?format=TXT&uri=https%3A%2F%2Fidentifiers.org%2Fpanther.pathway%3AP00010"),
+                status=OnlineStatus.INTERMITTENT_HTML
+            ),
+            "Beta3_adrenergic_receptor_signaling_pathway.txt": CacheItem(
+                name="Beta3_adrenergic_receptor_signaling_pathway",
+                cached="https://drive.google.com/uc?id=1jrJzrDvhDAs818wYjQ_dm1irOz8Bv4lk",
+                online=Service("https://www.pathwaycommons.org/pc2/get?format=TXT&uri=https%3A%2F%2Fidentifiers.org%2Fpanther.pathway%3AP04379"),
+                status=OnlineStatus.INTERMITTENT_HTML
+            ),
+            "Cadherin_signaling_pathway.txt": CacheItem(
+                name="Cadherin signaling pathway",
+                cached="https://drive.google.com/uc?id=14Of-6mwIpul_QciyJ-Xb9f7t-IrVcIna",
+                online=Service("https://www.pathwaycommons.org/pc2/get?format=TXT&uri=https%3A%2F%2Fidentifiers.org%2Fpanther.pathway%3AP00012"),
+                status=OnlineStatus.INTERMITTENT_HTML
+            ),
+            "Fas_signaling_pathway.txt": CacheItem(
+                name="FAS signaling_pathway",
+                cached="https://drive.google.com/uc?id=121cHJf0ZtglQHvy9xuEpYSjwBbJV9Fju",
+                online=Service("https://www.pathwaycommons.org/pc2/get?format=TXT&uri=https%3A%2F%2Fidentifiers.org%2Fpanther.pathway%3AP00020"),
+                status=OnlineStatus.INTERMITTENT_HTML
+            ),
+            "FGF_signaling_pathway.txt": CacheItem(
+                name="FGF signaling pathway",
+                cached="https://drive.google.com/uc?id=1PIiWK1-ImXE1YHdDh1hGUVB01Ye8brQg",
+                online=Service("https://www.pathwaycommons.org/pc2/get?format=TXT&uri=https%3A%2F%2Fidentifiers.org%2Fpanther.pathway%3AP00021"),
+                status=OnlineStatus.INTERMITTENT_HTML
+            ),
+            "Hedgehog_signaling_pathway.txt": CacheItem(
+                name="Hedgehog signaling pathway",
+                cached="https://drive.google.com/uc?id=1i7HKn4nlJQcaXUDXpbpDFBxbkBXZC0xQ",
+                online=Service("https://www.pathwaycommons.org/pc2/get?format=TXT&uri=https%3A%2F%2Fidentifiers.org%2Fpanther.pathway%3AP00025"),
+                status=OnlineStatus.INTERMITTENT_HTML
+            ),
+            "Insulin_IGF_pathway_protein_kinase_B_signaling_cascade.txt": CacheItem(
+                name="Insulin/IGF pathway-protein kinase B signaling cascade",
+                cached="https://drive.google.com/uc?id=1Xkxcm0ngrE8otau9ccyPeCg7KZUdhJf7",
+                online=Service("https://www.pathwaycommons.org/pc2/get?format=TXT&uri=https%3A%2F%2Fidentifiers.org%2Fpanther.pathway%3AP00033"),
+                status=OnlineStatus.INTERMITTENT_HTML
+            ),
+            "Interferon_gamma_signaling_pathway.txt": CacheItem(
+                name="Interferon-gamma signaling pathway",
+                cached="https://drive.google.com/uc?id=1aPqi0A5ZIOA5kKELVUI_NvC8taiHll5z",
+                online=Service("https://www.pathwaycommons.org/pc2/get?format=TXT&uri=https%3A%2F%2Fidentifiers.org%2Fpanther.pathway%3AP00035"),
+                status=OnlineStatus.INTERMITTENT_HTML
+            ),
+            "Interleukin_signaling_pathway.txt": CacheItem(
+                name="Interleukin signaling pathway",
+                cached="https://drive.google.com/uc?id=1IOv14pRJ8aN9LRnkZ4BQXf3QGUAashku",
+                online=Service("https://www.pathwaycommons.org/pc2/get?format=TXT&uri=https%3A%2F%2Fidentifiers.org%2Fpanther.pathway%3AP00036"),
+                status=OnlineStatus.INTERMITTENT_HTML
+            ),
+            "JAK_STAT_signaling_pathway.txt": CacheItem(
+                name="JAK/STAT signaling pathway",
+                cached="https://drive.google.com/uc?id=1QzMEMUZzeoxUYZZRGcm6Al_HzH6pmwED",
+                online=Service("https://www.pathwaycommons.org/pc2/get?format=TXT&uri=https%3A%2F%2Fidentifiers.org%2Fpanther.pathway%3AP00038"),
+                status=OnlineStatus.INTERMITTENT_HTML
+            ),
+            "Nicotinic_acetylcholine_receptor_signaling_pathway.txt": CacheItem(
+                name="Nicotinic acetylcholine receptor signaling pathway",
+                cached="https://drive.google.com/uc?id=1SdnKr4TthfmZWgMA_FOlTmf-EEpNsdzx",
+                online=Service("https://www.pathwaycommons.org/pc2/get?format=TXT&uri=https%3A%2F%2Fidentifiers.org%2Fpanther.pathway%3AP00044"),
+                status=OnlineStatus.INTERMITTENT_HTML
+            ),
+            "Notch_signaling_pathway.txt": CacheItem(
+                name="Notch signaling pathway",
+                cached="https://drive.google.com/uc?id=1qfyxuc1EomOKGRyI7QyQ7LUUhLPZytz5",
+                online=Service("https://www.pathwaycommons.org/pc2/get?format=TXT&uri=https%3A%2F%2Fidentifiers.org%2Fpanther.pathway%3AP00045"),
+                status=OnlineStatus.INTERMITTENT_HTML
+            ),
+            "PDGF_signaling_pathway.txt": CacheItem(
+                name="PDGF signaling pathway",
+                cached="https://drive.google.com/uc?id=1A9hl340XKnZeNfd3hiiX7lxOVV94lQ5s",
+                online=Service("https://www.pathwaycommons.org/pc2/get?format=TXT&uri=https%3A%2F%2Fidentifiers.org%2Fpanther.pathway%3AP00047"),
+                status=OnlineStatus.INTERMITTENT_HTML
+            ),
+            "Ras_pathway.txt": CacheItem(
+                name="Ras pathway",
+                cached="https://drive.google.com/uc?id=1wNizL5wDh48E5YxHcZjURa9UeKMONrgr",
+                online=Service("https://www.pathwaycommons.org/pc2/get?format=TXT&uri=https%3A%2F%2Fidentifiers.org%2Fpanther.pathway%3AP04393"),
+                status=OnlineStatus.INTERMITTENT_HTML
+            ),
+            "T_cell_activation.txt": CacheItem(
+                name="T cell activation",
+                cached="https://drive.google.com/uc?id=1t5G_jN8QSOiVceQGAmKvbYebkV1G5oJy",
+                online=Service("https://www.pathwaycommons.org/pc2/get?format=TXT&uri=https%3A%2F%2Fidentifiers.org%2Fpanther.pathway%3AP00053"),
+                status=OnlineStatus.INTERMITTENT_HTML
+            ),
+            "Toll_receptor_signaling_pathway.txt": CacheItem(
+                name="Toll receptor signaling pathway",
+                cached="https://drive.google.com/uc?id=1nFix8mMvuU_Vu9tExwgaS279nynqM_Oo",
+                online=Service("https://www.pathwaycommons.org/pc2/get?format=TXT&uri=https%3A%2F%2Fidentifiers.org%2Fpanther.pathway%3AP00054"),
+                status=OnlineStatus.INTERMITTENT_HTML
+            ),
+            "VEGF_signaling_pathway.txt": CacheItem(
+                name="VEGF signaling pathway",
+                cached="https://drive.google.com/uc?id=1W1G0TmA6-JLF9pIZD0TR4w95IwG2IALs",
+                online=Service("https://www.pathwaycommons.org/pc2/get?format=TXT&uri=https%3A%2F%2Fidentifiers.org%2Fpanther.pathway%3AP00056"),
+                status=OnlineStatus.INTERMITTENT_HTML
+            ),
+            "Wnt_signaling_pathway.txt": CacheItem(
+                name="Wnt signaling pathway",
+                cached="https://drive.google.com/uc?id=1diaacbik5hcA9Fo7vMXFAP_wXRe0xCLB",
+                online=Service("https://www.pathwaycommons.org/pc2/get?format=TXT&uri=https%3A%2F%2Fidentifiers.org%2Fpanther.pathway%3AP00057"),
+                status=OnlineStatus.INTERMITTENT_HTML
+            ),
+        }
+    }
 }
 
 
diff --git a/datasets/README.md b/datasets/README.md
index a53730c9..c26dc862 100644
--- a/datasets/README.md
+++ b/datasets/README.md
@@ -11,3 +11,8 @@ Many of the datasets here have been stripped of their extra post-analysis. Here,
 - [`diseases`](https://github.com/Reed-CompBio/spras-benchmarking/tree/3c0155567dbc43278531b91f9173f6d4f4486dd8/datasets/diseases)
 - [`depmap`](https://github.com/Reed-CompBio/spras-benchmarking/tree/b332c0ab53868f111cb89cd4e9f485e8c19aa9e3/datasets/depmap)
 - [`yeast-osmotic-stress`](https://github.com/Reed-CompBio/spras-benchmarking/tree/8f69dcdf4a52607347fe3a962b753df396e44cda/yeast-osmotic-stress)
+
+## `explore` folders
+
+To motivate certain decisions made in-code, such as `synthetic-data`'s PANTHER pathway choices, we provide scripts that use live data
+to assist in data curation. These folders can also contain exploratory CLIs for motivating e.g. magic constants.
diff --git a/datasets/diseases/Snakefile b/datasets/diseases/Snakefile
index 0455b57a..93de50d5 100644
--- a/datasets/diseases/Snakefile
+++ b/datasets/diseases/Snakefile
@@ -13,7 +13,7 @@ produce_fetch_rules({
     "raw/HumanDO.tsv": ["DISEASES", "HumanDO.tsv"],
     "raw/tiga_gene-trait_stats.tsv": ["DISEASES", "tiga_gene-trait_stats.tsv"],
     "raw/ensg-ensp.tsv": ["BioMart", "ensg-ensp.tsv"],
-    "raw/9606.protein.links.txt": FetchConfig(["STRING", "9606", "9606.protein.links.txt.gz"], uncompress=True),
+    "raw/9606.protein.links.full.txt": FetchConfig(["STRING", "9606", "9606.protein.links.full.txt.gz"], uncompress=True),
     "raw/9606.protein.aliases.txt": FetchConfig(["STRING", "9606", "9606.protein.aliases.txt.gz"], uncompress=True),
 })
 
@@ -42,7 +42,7 @@ rule files:
     input:
         "data/inputs.csv",
         "data/gold_standard.csv",
-        "raw/9606.protein.links.txt"
+        "raw/9606.protein.links.full.txt"
     output:
         # These are the two we use for the SPRAS run for now
         "GS_files/Alopecia_areata_GS.txt",
diff --git a/datasets/diseases/scripts/files.py b/datasets/diseases/scripts/files.py
index dc5a949b..f8704461 100644
--- a/datasets/diseases/scripts/files.py
+++ b/datasets/diseases/scripts/files.py
@@ -42,7 +42,7 @@ def main():
 
     # See /cache/directory.py for information on how this was grabbed.
     # 9606 is the organism code for homo sapiens and the required background interactome of DISEASES.
-    string = pd.read_csv(diseases_path / "raw" / "9606.protein.links.txt", sep=" ", skiprows=[0], header=None)
+    string = pd.read_csv(diseases_path / "raw" / "9606.protein.links.full.txt", sep=" ", skiprows=[0], header=None)
 
     # Threshold anything above a confidence score of 900 to trim down the background interactome
     string = string[string.iloc[:, 2] > 900]
diff --git a/datasets/synthetic-data/.gitignore b/datasets/synthetic-data/.gitignore
new file mode 100644
index 00000000..ca5e16da
--- /dev/null
+++ b/datasets/synthetic-data/.gitignore
@@ -0,0 +1,3 @@
+intermediate
+processed
+raw
\ No newline at end of file
diff --git a/datasets/synthetic-data/README.md b/datasets/synthetic-data/README.md
new file mode 100644
index 00000000..1d1bf1b4
--- /dev/null
+++ b/datasets/synthetic-data/README.md
@@ -0,0 +1,67 @@
+# Synthetic Data
+
+## Download STRING Human Interactome
+1. Download the STRING *Homo sapiens* `9606.protein.links.full.v12.0.txt.gz` database file from [STRING](https://string-db.org/cgi/download?sessionId=bL9sRTdIaUEt&species_text=Homo+sapiens&settings_expanded=0&min_download_score=0&filter_redundant_pairs=0&delimiter_type=txt).
+2. Move the downloaded file into the `raw/human-interactome/` folder.
+3. From the `raw/synthetic-data/` directory, extract the file using:
+
+   ```sh
+   gunzip human-interactome/9606.protein.links.full.v12.0.txt.gz
+   ```
+
+## Download New PANTHER Pathways
+1. Visit [Pathway Commons](https://www.pathwaycommons.org/).
+2. Search for the desired pathway (e.g., "signaling") and filter the results by the **PANTHER pathway** data source.  
+   Example: [Search for "Signaling" filtered by PANTHER pathway](https://apps.pathwaycommons.org/search?datasource=panther&q=Signaling&type=Pathway)
+3. Click on the desired pathway and download the **Extended SIF** version of the pathway.
+4. In the `raw/pathway-data/` folder, create a new subfolder named after the pathway you downloaded.
+5. Move the downloaded Extended SIF file to this new folder (as a `.txt` file). Rename the file to match the subfolder name exactly.
+
+## Sources and Targets
+
+[Sources](http://wlab.ethz.ch/surfaceome/), or `table_S3_surfaceome.xlsx`, (see [original paper](https://doi.org/10.1073/pnas.1808790115))
+are silico human surfaceomes receptors.
+
+[Targets]( https://guolab.wchscu.cn/AnimalTFDB4//#/), or `Homo_sapiens_TF.tsv`, (see [original paper](https://doi.org/10.1093/nar/gkac907))
+are human transcription factors.
+
+## Steps to Generate SPRAS-Compatible Pathways
+
+This entire workflow can also be done with `uv run snakemake --cores 1` inside this directory.
+
+### 1. Process PANTHER Pathways
+
+1. Open `Snakefile` and add the name of any new pathways to the `pathways` entry.
+2. Run the command:
+   ```sh
+   uv run scripts/process_panther_pathway.py <pathway>
+   ```
+3. This will create five new files in the respective `pathway` subfolder of the `pathway-data/` directory:
+- `edges.txt`
+- `nodes.txt`
+- `prizes-100.txt`
+- `sources.txt`
+- `targets.txt`
+
+### 2. Convert Pathways to SPRAS-Compatible Format
+1.	In `panther_spras_formatting.py`, add the name of any new pathways to the `pathway_dirs` list on **line 8**.
+2.	From the synthetic-data/ directory, run the command:
+```
+python scripts/panther_spras_formatting.py
+```
+3. This will create a new folder named `spras-compatible-pathway-data`, containing subfolders for each PANTHER pathway in SPRAS-compatible format.  
+Each subfolder will include the following three files:
+- `<pathway_name>_gs_edges.txt`
+- `<pathway_name>_gs_nodes.txt`
+- `<pathway_name>_node_prizes.txt`
+
+# Pilot Data
+For the pilot data, use the list `["Wnt_signaling", "JAK_STAT_signaling", "Interferon_gamma_signaling", "FGF_signaling", "Ras"]` in both:
+- the list in `combine.py`
+- the list in `overlap_analytics.py`
+
+Make sure these pathways in the list are also added `["Wnt_signaling", "JAK_STAT_signaling", "Interferon_gamma_signaling", "FGF_signaling", "Ras"]`to:
+- the `pathways` vector in `ProcessPantherPathway.R`
+- the list in `panther_spras_formatting.py`
+
+**Once you’ve updated the pathway lists in all relevant scripts, run all the steps above to generate the Pilot dataset.**
diff --git a/datasets/synthetic-data/Snakefile b/datasets/synthetic-data/Snakefile
new file mode 100644
index 00000000..88bbe33c
--- /dev/null
+++ b/datasets/synthetic-data/Snakefile
@@ -0,0 +1,91 @@
+include: "../../cache/Snakefile"
+
+pathways = [
+    "Apoptosis_signaling_pathway",
+    "B_cell_activation",
+    "Beta3_adrenergic_receptor_signaling_pathway",
+    "Cadherin_signaling_pathway",
+    "Fas_signaling_pathway",
+    "FGF_signaling_pathway",
+    "Hedgehog_signaling_pathway",
+    "Insulin_IGF_pathway_protein_kinase_B_signaling_cascade",
+    "Interferon_gamma_signaling_pathway",
+    "Interleukin_signaling_pathway",
+    "JAK_STAT_signaling_pathway",
+    "Nicotinic_acetylcholine_receptor_signaling_pathway",
+    "Notch_signaling_pathway",
+    "PDGF_signaling_pathway",
+    "Ras_pathway",
+    "T_cell_activation",
+    "Toll_receptor_signaling_pathway",
+    "VEGF_signaling_pathway",
+    "Wnt_signaling_pathway",
+]
+
+# TODO: deduplicate from sampling.py
+thresholds = list(map(str, map(lambda x: (x + 1) / 10, range(10))))
+
+rule all:
+    input:
+        "raw/9606.protein.links.full.v12.0.txt",
+        expand([
+            "thresholded/{threshold}/{pathway}/interactome.txt",
+            "thresholded/{threshold}/{pathway}/gold_standard_edges.txt",
+        ], pathway=pathways, threshold=thresholds)
+
+produce_fetch_rules({
+    **{
+        "raw/9606.protein.links.full.v12.0.txt": FetchConfig(["STRING", "9606", "9606.protein.links.full.txt.gz"], uncompress=True),
+        "raw/human-interactome/table_S3_surfaceome.xlsx": ["Surfaceome", "table_S3_surfaceome.xlsx"],
+        "raw/human-interactome/Homo_sapiens_TF.tsv": ["TranscriptionFactors", "Homo_sapiens_TF.tsv"],
+        "raw/human-interactome/HUMAN_9606_idmapping_selected.tsv": FetchConfig(["UniProt", "9606", "HUMAN_9606_idmapping_selected.tab.gz"], uncompress=True),
+    },
+    # See directory.py for the online/cached location of all pathways from PathwayCommons.
+    **{f"raw/pathway-data/{k}.txt": ["PathwayCommons", "PANTHER", f"{k}.txt"] for k in pathways}
+})
+rule process_tfs:
+    input:
+        "raw/human-interactome/Homo_sapiens_TF.tsv",
+        "raw/human-interactome/HUMAN_9606_idmapping_selected.tsv"
+    output:
+        "raw/human-interactome/Homo_sapiens_TF_Uniprot.tsv"
+    shell:
+        "uv run scripts/map_transcription_factors.py"
+
+rule process_panther_pathway:
+    input:
+        "raw/pathway-data/{pathway}.txt",
+        "raw/human-interactome/table_S3_surfaceome.xlsx",
+        "raw/human-interactome/Homo_sapiens_TF_Uniprot.tsv"
+    output:
+        "intermediate/{pathway}/edges.txt",
+        "intermediate/{pathway}/nodes.txt",
+        "intermediate/{pathway}/sources.txt",
+        "intermediate/{pathway}/targets.txt",
+        "intermediate/{pathway}/prizes.txt"
+    shell:
+        "uv run scripts/process_panther_pathway.py {wildcards.pathway}"
+
+rule make_spras_compatible:
+    input:
+        "intermediate/{pathway}/edges.txt",
+        "intermediate/{pathway}/nodes.txt",
+        "intermediate/{pathway}/sources.txt",
+        "intermediate/{pathway}/targets.txt",
+        "intermediate/{pathway}/prizes.txt"
+    output:
+        "processed/{pathway}/{pathway}_node_prizes.txt",
+        "processed/{pathway}/{pathway}_gs_edges.txt",
+        "processed/{pathway}/{pathway}_gs_nodes.txt"
+    shell:
+        "uv run scripts/panther_spras_formatting.py {wildcards.pathway}"
+
+rule threshold:
+    input:
+        "processed/{pathway}/{pathway}_node_prizes.txt",
+        "processed/{pathway}/{pathway}_gs_edges.txt"
+    output:
+        expand("thresholded/{threshold}/{{pathway}}/interactome.txt", threshold=thresholds),
+        expand("thresholded/{threshold}/{{pathway}}/gold_standard_edges.txt", threshold=thresholds)
+    shell:
+        "uv run scripts/sampling.py {wildcards.pathway}"
diff --git a/datasets/synthetic-data/explore/README.md b/datasets/synthetic-data/explore/README.md
new file mode 100644
index 00000000..50faeeaa
--- /dev/null
+++ b/datasets/synthetic-data/explore/README.md
@@ -0,0 +1,5 @@
+# explore
+
+See [the datasets readme](../../README.md) for the motivation for the `explore` folder.
+
+This folder contains `candidates.py`, which is a CLI for finding all viable pathways with our custom filtering criteria.
diff --git a/datasets/synthetic-data/explore/candidates.py b/datasets/synthetic-data/explore/candidates.py
new file mode 100644
index 00000000..3d96ccb4
--- /dev/null
+++ b/datasets/synthetic-data/explore/candidates.py
@@ -0,0 +1,64 @@
+"""
+Utility CLI for finding pathway critetia from PathwayCommons based on our desired participant count.
+This is meant to be interactive for easily examining the available pathways from PathwayCommons over PANTHER
+(and perhaps more later!).
+
+See https://www.pathwaycommons.org/pc2/swagger-ui/index.html#/api-controller-v-2 for the API.
+"""
+
+import requests
+
+from pydantic import BaseModel
+
+SEARCH_URL = "https://www.pathwaycommons.org/pc2/v2/search"
+
+# These schemas were manually examined from the API response, and are thus not exhaustive.
+class SearchHit(BaseModel):
+    uri: str
+    name: str
+    biopaxClass: str
+    numParticipants: int
+    numProcesses: int
+
+class SearchResponse(BaseModel):
+    numHits: int
+    maxHitsPerPage: int
+    searchHit: list[SearchHit]
+
+def request(page: int) -> SearchResponse:
+    return SearchResponse.model_validate(requests.post(
+        'https://www.pathwaycommons.org/pc2/v2/search',
+        headers={
+            'accept': 'application/json',
+            'Content-Type': 'application/json',
+        }, json={
+            # Indicates a BioPAX pathway
+            'q': 'xrefid:P*',
+            'type': 'pathway',
+            'organism': [
+                '9606',
+            ],
+            'datasource': [
+                'panther',
+            ],
+            'page': page,
+        }
+    ).json())
+
+def main():
+    # TODO: weirdly constructed loop? could be nicer if we use numHits and maxHitsPerPage
+    hits: list[SearchHit] = []
+    page = 0
+    response = request(page)
+    print(f"Paginating {page}...")
+    while len(response.searchHit) != 0:
+        hits.extend(response.searchHit)
+        page += 1
+        response = request(page)
+        print(f"Paginating {page}...")
+
+    for hit in hits:
+        print(f"({hit.numParticipants}) {hit.name}")
+
+if __name__ == "__main__":
+    main()
diff --git a/datasets/synthetic-data/scripts/interactome.py b/datasets/synthetic-data/scripts/interactome.py
new file mode 100644
index 00000000..ac8bc399
--- /dev/null
+++ b/datasets/synthetic-data/scripts/interactome.py
@@ -0,0 +1,81 @@
+import pandas
+from pathlib import Path
+
+current_directory = Path(__file__).parent.resolve()
+
+
+def main():
+    # Convert the interactome to SPRAS format
+    print("Reading interactome...")
+    interactome_df = pandas.read_csv(
+        current_directory / ".." / "raw" / "9606.protein.links.full.v12.0.txt", sep=" ", usecols=["protein1", "protein2", "combined_score"]
+    )
+    interactome_df.columns = ["Protein1", "Protein2", "Weight"]
+
+    # We also want to representatively remove a certain percentage of elements from the interactome,
+    # to make sure our interactome downsampling preserves edge weight distributions
+    # (we don't care to preserve other major topological properties just yet.)
+    # since this file is large, we opt for streaming the interactome for removing edges instead
+
+    print("Initially processing interactome...")
+    interactome_df["Weight"] = interactome_df["Weight"].div(1000)  # scores are from 1-1000: we normalize from 0-1.
+    interactome_df["Direction"] = "U"
+    print("Sorting interactome...")
+    interactome_df = interactome_df.sort_values("Weight", kind="stable")
+
+    print("Mapping interactome...")
+    # STRINGDB -> UniProt accession ID pairings
+    UniProt_AC = pandas.read_csv(current_directory / ".." / "raw" / "human-interactome" / "String_to_Uniprot_ids_2025_04_06.tsv", sep="\t", header=0)
+    one_to_many_dict = UniProt_AC.groupby("From")["Entry"].apply(list).to_dict()
+
+    def get_aliases(protein_id):
+        return one_to_many_dict.get(protein_id, [])
+
+    interactome_df["Protein1_uniprot"] = interactome_df["Protein1"].apply(get_aliases)
+    interactome_df["Protein2_uniprot"] = interactome_df["Protein2"].apply(get_aliases)
+
+    interactome_df = interactome_df.explode("Protein1_uniprot").explode("Protein2_uniprot")
+
+    missing_alias_edges = interactome_df[(interactome_df["Protein1_uniprot"].isna()) | (interactome_df["Protein2_uniprot"].isna())]
+
+    proteins_without_aliases = (
+        pandas.concat(
+            [
+                missing_alias_edges.loc[missing_alias_edges["Protein1_uniprot"].isna(), "Protein1"],
+                missing_alias_edges.loc[missing_alias_edges["Protein2_uniprot"].isna(), "Protein2"],
+            ],
+            ignore_index=True,
+        )
+        .drop_duplicates()
+        .reset_index(drop=True)
+    )
+    proteins_without_aliases = proteins_without_aliases.to_frame(name="protein")
+
+    removed_edges = missing_alias_edges[["Protein1", "Protein2"]]
+    removed_edges = removed_edges.drop_duplicates().reset_index(drop=True)
+
+    (current_directory / ".." / "processed" / "interactomes" / "uniprot-threshold-interactomes").mkdir(exist_ok=True, parents=True)
+    proteins_without_aliases.to_csv(
+        current_directory / ".." / "processed" / "interactomes" / "uniprot-threshold-interactomes" / "proteins_missing_aliases.csv",
+        sep="\t",
+        index=False,
+        header=True,
+    )
+    removed_edges.to_csv(
+        current_directory / ".." / "processed" / "interactomes" / "uniprot-threshold-interactomes" / "removed_edges.txt",
+        sep="\t",
+        index=False,
+        header=True,
+    )
+    interactome_df = interactome_df.dropna(subset=["Protein1_uniprot", "Protein2_uniprot"]).reset_index(drop=True)
+    interactome_df = interactome_df[["Protein1_uniprot", "Protein2_uniprot", "Weight", "Direction"]]
+
+    print("Counting weight counts...")
+    interactome_df["Weight"].value_counts(sort=False).to_csv(current_directory / ".." / "processed" / "weight-counts.tsv", sep="\t")
+
+    print("Saving interactome...")
+    interactome_df.to_csv(current_directory / ".." / "processed" / "interactome.tsv", sep="\t", header=False, index=False)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/datasets/synthetic-data/scripts/map_transcription_factors.py b/datasets/synthetic-data/scripts/map_transcription_factors.py
new file mode 100644
index 00000000..31741bc8
--- /dev/null
+++ b/datasets/synthetic-data/scripts/map_transcription_factors.py
@@ -0,0 +1,35 @@
+import pandas
+from pathlib import Path
+
+current_directory = Path(__file__).parent.resolve()
+
+interactome_folder = current_directory / ".." / "raw" / "human-interactome"
+
+
+def main():
+    tf_df = pandas.read_csv(interactome_folder / "Homo_sapiens_TF.tsv", sep="\t", header=0)
+    # The very powerful UniProt-provided mapping file: its Ensembl mappings are a semicolon-delimeted list of Emsembl IDs containing
+    # attached isoforms (and not all UniProtKB-AC identifiers have those!) so we'll need to do some extra post-processing.
+    idmapping_selected_df = pandas.read_csv(
+        interactome_folder / "HUMAN_9606_idmapping_selected.tsv",
+        header=None,
+        # See directory.py for the README associated with this mapping file.
+        usecols=[0, 18],
+        names=["UniProtKB-AC", "Ensembl"],
+        sep="\t",
+    )
+    idmapping_selected_df = idmapping_selected_df[idmapping_selected_df["Ensembl"].notnull()]
+    # Handle our ;-delimited list
+    idmapping_selected_df["Ensembl"] = idmapping_selected_df["Ensembl"].str.split("; ")
+    idmapping_selected_df = idmapping_selected_df.explode("Ensembl")
+    # Drop isoforms
+    idmapping_selected_df["Ensembl"] = idmapping_selected_df["Ensembl"].str.split(".").str[0]
+
+    tf_df = tf_df.merge(idmapping_selected_df, on="Ensembl", how="inner")
+    tf_df = tf_df.explode("UniProtKB-AC")
+    tf_df = tf_df.fillna("NA")
+    tf_df.to_csv(interactome_folder / "Homo_sapiens_TF_Uniprot.tsv", header=True, sep="\t", index=False)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/datasets/synthetic-data/scripts/panther_spras_formatting.py b/datasets/synthetic-data/scripts/panther_spras_formatting.py
new file mode 100644
index 00000000..56cda99b
--- /dev/null
+++ b/datasets/synthetic-data/scripts/panther_spras_formatting.py
@@ -0,0 +1,98 @@
+import pandas as pd
+from pathlib import Path
+import sys
+
+current_directory = Path(__file__).parent.resolve()
+
+spras_compatible_dir = Path(current_directory, "..", "processed")
+directory = Path(current_directory, "..", "intermediate")
+
+directed = [
+    "controls-state-change-of",
+    "controls-transport-of",
+    "controls-phosphorylation-of",
+    "controls-expression-of",
+    "catalysis-precedes",
+    "consumption-controlled-by",
+    "controls-production-of",
+    "controls-transport-of-chemical",
+    "chemical-affects",
+    "used-to-produce",
+    "consumption-controled-by",
+]
+
+undirected = ["in-complex-with", "interacts-with", "neighbor-of", "reacts-with"]
+
+
+def raise_unknown_direction(dir: str):
+    raise ValueError(f"Unknown direction {dir}")
+
+
+def main():
+    spras_compatible_dir.mkdir(exist_ok=True)
+
+    pathway = sys.argv[1]
+    pathway_folder = directory / pathway
+
+    # Create the output folder "uniprot" within the pathway directory
+    out_folder = spras_compatible_dir / pathway
+    out_folder.mkdir(exist_ok=True)
+
+    nodes_file = pathway_folder / "nodes.txt"
+    nodes_df = pd.read_csv(nodes_file, sep="\t")
+
+    # a dictionary mapping gene -> Uniprot accession ID
+    gene_to_uniprot = pd.Series(nodes_df["uniprot"].values, index=nodes_df["NODE"]).to_dict()
+
+    # nodes
+    nodes_uniprot = nodes_df[["uniprot"]]
+    nodes_uniprot.to_csv(out_folder / f"{pathway}_gs_nodes.txt", sep="\t", index=False, header=False)
+
+    # edges
+    edges_file = pathway_folder / "edges.txt"
+    edges_df = pd.read_csv(edges_file, sep="\t", header=0)
+    edges_df["NODE1"] = edges_df["NODE1"].map(gene_to_uniprot)
+    edges_df["NODE2"] = edges_df["NODE2"].map(gene_to_uniprot)
+    edges_df["Rank"] = 1
+    edges_df["Direction"] = edges_df["INTERACTION_TYPE"].apply(
+        lambda x: "D" if x in directed else ("U" if x in undirected else raise_unknown_direction(x))
+    )
+    edges_df = edges_df.drop(columns="INTERACTION_TYPE")
+
+    # remove duplicate rows
+    # sort by (node1 and node2) to ensure deterministic sorting
+    edges_df = edges_df.sort_values(by=["NODE1", "NODE2"], ascending=True, ignore_index=True)
+    undirected_mask = edges_df["Direction"] == "U"
+    min_nodes = edges_df.loc[undirected_mask, ["NODE1", "NODE2"]].min(axis=1)
+    max_nodes = edges_df.loc[undirected_mask, ["NODE1", "NODE2"]].max(axis=1)
+    edges_df.loc[undirected_mask, "NODE1"] = min_nodes
+    edges_df.loc[undirected_mask, "NODE2"] = max_nodes
+
+    # keep 1 directed and 1 undirected edge if both exist
+    # since rank is 1, we don't need to sort by rank.
+    edges_df = edges_df.sort_values(by=["NODE1", "NODE2", "Direction"], ascending=True, ignore_index=True)
+    edges_df = edges_df.drop_duplicates(keep="first", ignore_index=True)
+
+    edges_df.to_csv(out_folder / f"{pathway}_gs_edges.txt", sep="\t", index=False, header=False)
+
+    # prizes, targets, sources
+    prizes_file = pathway_folder / "prizes.txt"
+    prizes_df = pd.read_csv(prizes_file, sep="\t")
+
+    target_file = pathway_folder / "targets.txt"
+    target_df = pd.read_csv(target_file, sep="\t")
+
+    source_file = pathway_folder / "sources.txt"
+    source_df = pd.read_csv(source_file, sep="\t")
+
+    # final resulting df combining all the sources, targets, and prizes
+    prizes_df["sources"] = prizes_df["uniprot"].isin(source_df["uniprot"])
+    prizes_df["targets"] = prizes_df["uniprot"].isin(target_df["uniprot"])
+    prizes_df["dummy"] = ""
+    prizes_df.rename(columns={"uniprot": "NODEID", "prizes": "prize"}, inplace=True)
+    result_df = prizes_df[["NODEID", "prize", "sources", "targets", "active", "dummy"]]
+    result_df.to_csv(out_folder / f"{pathway}_node_prizes.txt", sep="\t", index=False, header=True)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/datasets/synthetic-data/scripts/process_panther_pathway.py b/datasets/synthetic-data/scripts/process_panther_pathway.py
new file mode 100644
index 00000000..a7879948
--- /dev/null
+++ b/datasets/synthetic-data/scripts/process_panther_pathway.py
@@ -0,0 +1,82 @@
+import argparse
+import io
+import pandas as pd
+from pathlib import Path
+
+current_directory = Path(__file__).parent.resolve()
+
+data_directory = current_directory / ".." / "raw" / "pathway-data"
+interactome_folder = current_directory / ".." / "raw" / "human-interactome"
+
+
+def process_pathway(file: Path, folder: Path):
+    file_content = file.read_text()
+    # This file has two csv files stacked on top of each other.
+    # This is the header that we are looking for
+    needle = "PARTICIPANT\tPARTICIPANT_TYPE\tPARTICIPANT_NAME\tUNIFICATION_XREF\tRELATIONSHIP_XREF"
+
+    edges, nodes = file_content.split(needle)
+    # Re-add the header
+    nodes = needle + nodes
+    # https://stackoverflow.com/a/65018984/7589775 read the text
+    # as a file.
+    edges_df = pd.read_csv(io.StringIO(edges), header=0, sep="\t")
+    nodes_df = pd.read_csv(io.StringIO(nodes), header=0, sep="\t")
+
+    # First, get the relevant info from the edges
+    edges_df = edges_df[["PARTICIPANT_A", "INTERACTION_TYPE", "PARTICIPANT_B"]]
+    edges_df.columns = ["NODE1", "INTERACTION_TYPE", "NODE2"]
+    # removing ChEBI identifiers: these aren't proteins and we therefore are not interested in them.
+    edges_df = edges_df[~edges_df["NODE1"].str.startswith("chebi:")]
+    edges_df = edges_df[~edges_df["NODE2"].str.startswith("chebi:")]
+
+    # Do the same for the nodes
+    nodes_df = nodes_df[["PARTICIPANT", "UNIFICATION_XREF"]]
+    nodes_df.columns = ["NODE", "uniprot"]
+    # removing the chebi: prefix
+    nodes_df = nodes_df[~nodes_df["NODE"].str.startswith("chebi:")]
+    # and remove the uniprot: prefix
+    nodes_df["uniprot"] = nodes_df["uniprot"].str.removeprefix("uniprot:")
+
+    # Save edges and nodes
+    edges_df.to_csv(folder / "edges.txt", header=True, index=False, sep="\t")
+    nodes_df.to_csv(folder / "nodes.txt", header=True, index=False, sep="\t")
+
+    # Then, we need to get the sources and targets, save them,
+    # and mark them with 1.0 prizes:
+
+    # First, for our targets, or transcription factors
+    human_tfs = pd.read_csv(interactome_folder / "Homo_sapiens_TF_Uniprot.tsv", sep="\t")
+    human_tfs = nodes_df.merge(human_tfs, how="inner", left_on="uniprot", right_on="UniProtKB-AC")
+    human_tfs = human_tfs[["NODE", "uniprot"]]
+    human_tfs.to_csv(folder / "targets.txt", sep="\t", index=False)
+
+    # Then, for our receptors. NOTE: we skip the first row since it's empty in the XLSX, so this might break if the surfaceome authors fix this.
+    human_receptors = pd.read_excel(interactome_folder / "table_S3_surfaceome.xlsx", sheet_name="in silico surfaceome only", skiprows=1)
+    human_receptors = human_receptors[["UniProt accession", "Ensembl gene", "Membranome Almen main-class"]]
+    human_receptors = human_receptors[human_receptors["Membranome Almen main-class"] == "Receptors"]
+    human_receptors = nodes_df.merge(human_receptors, how="inner", left_on="uniprot", right_on="UniProt accession")
+    human_receptors = human_receptors[["NODE", "uniprot"]]
+    human_receptors.to_csv(folder / "sources.txt", sep="\t", index=False)
+
+    # Finally, scores
+    scores = pd.concat([human_tfs, human_receptors]).drop_duplicates()
+    scores["prizes"] = 1
+    scores["active"] = "true"
+    scores.to_csv(folder / "prizes.txt", sep="\t", index=False)
+
+
+def parser():
+    parser = argparse.ArgumentParser(prog="PANTHER pathway parser")
+
+    parser.add_argument("pathway", choices=[file.stem for file in data_directory.iterdir()])
+
+    return parser
+
+
+if __name__ == "__main__":
+    pathway = parser().parse_args().pathway
+    pathway_file = data_directory / Path(pathway).with_suffix(".txt")
+    intermediate_folder = current_directory / ".." / "intermediate" / pathway
+    intermediate_folder.mkdir(parents=True, exist_ok=True)
+    process_pathway(pathway_file, intermediate_folder)
diff --git a/datasets/synthetic-data/scripts/sampling.py b/datasets/synthetic-data/scripts/sampling.py
new file mode 100644
index 00000000..71f52065
--- /dev/null
+++ b/datasets/synthetic-data/scripts/sampling.py
@@ -0,0 +1,112 @@
+import argparse
+import pandas
+from pathlib import Path
+import collections
+from typing import OrderedDict, NamedTuple
+from tools.sample import attempt_sample
+from tools.trim import trim_data_file
+
+current_directory = Path(__file__).parent.resolve()
+
+
+# From SPRAS. TODO: import once SPRAS uses pixi
+def convert_undirected_to_directed(df: pandas.DataFrame) -> pandas.DataFrame:
+    mask = df["Direction"] == "U"
+    new_df = df[mask].copy(deep=True)
+    new_df["Interactor1"], new_df["Interactor2"] = new_df["Interactor2"], new_df["Interactor1"]
+    new_df["Direction"] = "D"
+    df.loc[mask, "Direction"] = "D"
+    df = pandas.concat([df, new_df], ignore_index=True)
+    return df
+
+
+def parser():
+    parser = argparse.ArgumentParser(prog="PANTHER pathway parser")
+
+    parser.add_argument("pathway", choices=[file.stem for file in (current_directory / ".." / "raw" / "pathway-data").iterdir()])
+
+    return parser
+
+
+def count_weights() -> OrderedDict[int, int]:
+    """Returns an ordered map (lowest to highest weight) from the weight to the number of elements the weight has"""
+    weight_counts = pandas.read_csv(current_directory / ".." / "processed" / "weight-counts.tsv", sep="\t")
+    return collections.OrderedDict(sorted({int(k * 1000): int(v) for k, v in dict(weight_counts.values).items()}.items()))
+
+
+def read_pathway(pathway_name: str) -> pandas.DataFrame:
+    """
+    Returns the directed-only pathway from a pathway name,
+    with columns Interactor1 -> Interactor2.
+    """
+    pathway_df = pandas.read_csv(
+        current_directory / ".." / "processed" / pathway_name / f"{pathway_name}_gs_edges.txt",
+        sep="\t",
+        names=["Interactor1", "Interactor2", "Weight", "Direction"],
+    )
+    # We consider an undirected edge to be two directed edges
+    pathway_df = convert_undirected_to_directed(pathway_df)
+    return pathway_df[["Interactor1", "Interactor2"]]
+
+
+class SourcesTargets(NamedTuple):
+    sources: list[str]
+    targets: list[str]
+
+def get_node_data(pathway_name: str) -> pandas.DataFrame:
+    return pandas.read_csv(
+        current_directory / ".." / "processed" / pathway_name / f"{pathway_name}_node_prizes.txt", sep="\t", usecols=["NODEID", "sources", "targets"]
+    )
+
+def sources_and_targets(pathway_node_prizes_df: pandas.DataFrame) -> SourcesTargets:
+    """
+    Returns the sources and targets associated with a particular pathway
+    """
+    sources: list[str] = list(pathway_node_prizes_df[pathway_node_prizes_df["sources"] is True]["NODEID"])
+    targets: list[str] = list(pathway_node_prizes_df[pathway_node_prizes_df["targets"] is True]["NODEID"])
+
+    return SourcesTargets(sources, targets)
+
+
+def main():
+    pathway_name = parser().parse_args().pathway
+    print("Reading interactome...")
+    interactome_df = pandas.read_csv(
+        current_directory / ".." / "processed" / "interactome.tsv",
+        header=None,
+        sep="\t",
+        names=["Interactor1", "Interactor2", "Weight", "Direction"],
+        usecols=[0, 1],
+    )
+
+    # For performance reasons (groupby is quite slow), we sample in the interactome using the pre-computed weight-counts.tsv file
+    weight_mapping = count_weights()
+
+    # Get information about the pathway
+    pathway_df = read_pathway(pathway_name)
+    node_data_df = get_node_data(pathway_name)
+    sources, targets = sources_and_targets(node_data_df)
+
+    # TODO: isolate percentage constant (this currently builds up 0%, 10%, ..., 100%)
+    for percentage in map(lambda x: (x + 1) / 10, range(10)):
+        output_directory = current_directory / '..' / 'thresholded' / str(percentage) / pathway_name
+        output_interactome = output_directory / 'interactome.txt'
+        output_gold_standard = output_directory / 'gold_standard_edges.txt'
+
+        print(f"Sampling with {percentage * 100:.1f}% of edges...")
+        attempt_number = 1
+        while attempt_sample(
+                pathway_name, pathway_df, percentage,
+                weight_mapping, interactome_df, sources, targets,
+                output_interactome=output_interactome,
+                output_gold_standard=output_gold_standard) is None:
+            attempt_number += 1
+            print(f"Attempt number {attempt_number}")
+
+        # We're done sampling:
+        (output_directory / 'attempt-number.txt').write_text(attempt_number)
+        # we need to trim our data file as well.
+        trim_data_file(data_df=node_data_df, gold_standard_df=pathway_df).to_csv(output_directory / 'node_prizes.tsv', sep='\t', index=False)
+
+if __name__ == "__main__":
+    main()
diff --git a/pyproject.toml b/pyproject.toml
index 9a071ecd..c0decbba 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,7 +8,11 @@ dependencies = [
     "gdown>=5.2.0",
     "more-itertools>=10.7.0",
     "networkx>=3.6.1",
+    "openpyxl>=3.1.5",
     "pandas>=2.3.0",
+    "paxtools>=6.0.0.post1",
+    "pydantic>=2.12.5",
+    "requests>=2.32.5",
 ]
 
 [dependency-groups]
diff --git a/run_snakemake.sh b/run_snakemake.sh
index 24305244..cd1a6773 100755
--- a/run_snakemake.sh
+++ b/run_snakemake.sh
@@ -9,7 +9,7 @@
 set -o errexit
 set -o nounset
 
-# Forcibly use the current CWD
+# Forcibly use the CWD
 cd "$(dirname "$0")"
 
 main() {
@@ -18,6 +18,7 @@ main() {
     uv run snakemake --cores 1 -d datasets/diseases -s datasets/diseases/Snakefile
     uv run snakemake --cores 1 -d datasets/rn-muscle-skeletal -s datasets/rn-muscle-skeletal/Snakefile
     uv run snakemake --cores 1 -d datasets/depmap -s datasets/depmap/Snakefile
+    uv run snakemake --cores 1 -d datasets/synthetic-data -s datasets/synthetic-data/Snakefile
 }
 
 main "$@"
diff --git a/tools/README.md b/tools/README.md
new file mode 100644
index 00000000..45fb1104
--- /dev/null
+++ b/tools/README.md
@@ -0,0 +1,8 @@
+# Dataset Processing Tools
+
+This includes common tools for doing dataset processing, which take in SPRAS-compatible file formats. This currently includes:
+
+- `trim.py`: this may be included in SPRAS later, but this contains utilities for trimming a gold standard with its respective interactome,
+and the gold standard data with the interactome and the gold standard itself.
+- `sample.py`: this samples an interactome and downstream samples the gold standard, preserving a percentage of the associated data in the largest
+connected component of the gold standard. _These tools require a gold standard_
diff --git a/tools/sample.py b/tools/sample.py
new file mode 100644
index 00000000..c5c56eb9
--- /dev/null
+++ b/tools/sample.py
@@ -0,0 +1,100 @@
+"""
+Tools for sampling interactomes.
+"""
+
+import collections
+import networkx
+import itertools
+import pandas
+import random
+from typing import OrderedDict, Optional
+import os
+
+def count_weights(weights: dict[float, int]) -> OrderedDict[float, int]:
+    """
+    Returns an ordered map (lowest to highest weight) from the
+    weight to the number of elements the weight has.
+
+    The full workflow for this function should be:
+    ```python
+    count_weights(dict(interactome_df["Weight"].value_counts(sort=False).values))
+    ```
+    """
+    return collections.OrderedDict(sorted({k: int(v) for k, v in weights.items()}.items()))
+
+def find_connected_sources_targets(
+        sources: list[str],
+        targets: list[str],
+        graph: networkx.Graph
+) -> list[tuple[str, str]]:
+    connections: list[tuple[str, str]] = []
+    for source, target in itertools.product(sources, targets):
+        if graph.has_node(source) and graph.has_node(target) and networkx.has_path(graph, source, target):
+            connections.append((source, target))
+    return connections
+
+def attempt_sample(
+    pathway_name: str,
+    pathway_df: pandas.DataFrame,
+    percentage: float,
+    weight_mapping: OrderedDict[int, int],
+    interactome_df: pandas.DataFrame,
+    sources: list[str],
+    targets: list[str],
+    output_interactome: str | os.PathLike,
+    output_gold_standard: str | os.PathLike
+) -> Optional[list[tuple[str, str]]]:
+    # TODO: generalize to node prizes/actives
+    """
+    Samples a {pathway_df} (logged as {pathway_name}) along with its backing {interactome_df}
+    with a certain {percentage} backed by a {weight_mapping} while preserving some {sources} and {targets},
+    outputting to {output_interactome} and {output_gold_standard},
+    returning the connections between {sources} and {targets},
+    or None if the target percentage failed.
+    """
+    interactome_df = sample_interactome(interactome_df, weight_mapping, percentage)
+
+    print(f"Merging {pathway_name} with interactome...")
+    # While we are merging this graph, we are preparing to compare the connectedness of the prev[ious] and curr[ent] (merged) graph.
+    prev_graph = networkx.from_pandas_edgelist(pathway_df, source="Interactor1", target="Interactor2")
+    prev_connections = find_connected_sources_targets(sources, targets, prev_graph)
+
+    print("Checking for pathway connectedness...")
+    pathway_df = pathway_df.merge(interactome_df, how="inner", on=["Interactor1", "Interactor2"])
+    curr_graph = networkx.from_pandas_edgelist(pathway_df, source="Interactor1", target="Interactor2")
+    curr_connections = find_connected_sources_targets(sources, targets, curr_graph)
+
+    # We ask that at least `percentage` of the sources and targets are connected with one another.
+    connection_percentage = float(len(curr_connections)) / float(len(prev_connections))
+
+    if percentage < connection_percentage:
+        print(f"Got {connection_percentage * 100:.1f}% connections above the {percentage * 100:.1f}% threshold.")
+        pathway_df.to_csv(output_gold_standard, sep="\t", index=False, header=False)
+        interactome_df.to_csv(output_interactome, sep='\t', index=False, header=False)
+        return curr_connections
+    print(f"Failed {connection_percentage * 100:.1f}% connections below the {percentage * 100:.1f}% threshold.")
+    return None
+
+def sample_interactome(
+        interactome_df: pandas.DataFrame,
+        weight_mapping: OrderedDict[int, int],
+        percentage: float
+):
+    """
+    Samples an interactome with its weight_counts dictionary. (See `count_weights` for generating `weight_counts`.)
+    """
+    if percentage > 1:
+        raise RuntimeError(f"Got a percentage above 1 ({percentage})?")
+    if percentage == 1:
+        return interactome_df
+    # Using a list then creating the set is faster because of the sets rather than the gets.
+    print("Creating item samples...")
+    full_list: list[int] = []
+    curr_v = 0
+    for k, v in weight_mapping.items():
+        full_list.extend(map(lambda x: x + curr_v, random.sample(range(1, v), round(percentage * v))))
+        curr_v += v
+    full_set = set(full_list)
+
+    print("Sampling interactome...")
+    return interactome_df.iloc[list(full_set)]
diff --git a/tools/trim.py b/tools/trim.py
new file mode 100644
index 00000000..64b89ca0
--- /dev/null
+++ b/tools/trim.py
@@ -0,0 +1,9 @@
+import pandas
+
+def trim_data_file(data_df: pandas.DataFrame, gold_standard_df: pandas.DataFrame) -> pandas.DataFrame:
+    """
+    Trims the associated SPRAS data file with the nodes in the gold standard file.
+    """
+    # We just want the set of all nodes present in the gold standard
+    gold_standard_nodes = set(gold_standard_df["Interactor1"]).union(set(gold_standard_df["Interactor2"]))
+    return data_df[data_df["NODEID"].isin(gold_standard_nodes)]
diff --git a/uv.lock b/uv.lock
index a00522cf..ecb46fa6 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2,6 +2,15 @@ version = 1
 revision = 2
 requires-python = ">=3.13"
 
+[[package]]
+name = "annotated-types"
+version = "0.7.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
+]
+
 [[package]]
 name = "appdirs"
 version = "1.4.4"
@@ -154,6 +163,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/05/d1/8952806fbf9583004ab479d8f58a9496c3d35f6b6009ddd458bdd9978eaf/dpath-2.2.0-py3-none-any.whl", hash = "sha256:b330a375ded0a0d2ed404440f6c6a715deae5313af40bbb01c8a41d891900576", size = 17618, upload-time = "2024-06-12T22:08:01.881Z" },
 ]
 
+[[package]]
+name = "et-xmlfile"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234, upload-time = "2024-10-25T17:25:40.039Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059, upload-time = "2024-10-25T17:25:39.051Z" },
+]
+
 [[package]]
 name = "fastjsonschema"
 version = "2.21.2"
@@ -432,6 +450,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c1/9e/1652778bce745a67b5fe05adde60ed362d38eb17d919a540e813d30f6874/numpy-2.3.2-cp314-cp314t-win_arm64.whl", hash = "sha256:092aeb3449833ea9c0bf0089d70c29ae480685dd2377ec9cdbbb620257f84631", size = 10544226, upload-time = "2025-07-24T20:56:34.509Z" },
 ]
 
+[[package]]
+name = "openpyxl"
+version = "3.1.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "et-xmlfile" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464, upload-time = "2024-06-28T14:03:44.161Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c0/da/977ded879c29cbd04de313843e76868e6e13408a94ed6b987245dc7c8506/openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2", size = 250910, upload-time = "2024-06-28T14:03:41.161Z" },
+]
+
 [[package]]
 name = "packaging"
 version = "25.0"
@@ -468,6 +498,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/cd/d7/612123674d7b17cf345aad0a10289b2a384bff404e0463a83c4a3a59d205/pandas-2.3.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:d2c3554bd31b731cd6490d94a28f3abb8dd770634a9e06eb6d2911b9827db370", size = 13186141, upload-time = "2025-08-21T10:28:05.377Z" },
 ]
 
+[[package]]
+name = "paxtools"
+version = "6.0.0.post1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/2d/76/e65fc320494d07339a7f6b83c23deeb7337e3b7e3814880093114b6f488a/paxtools-6.0.0.post1.tar.gz", hash = "sha256:2fddd9155e92e5a8d5cc4b83427f8e804e6957aa8362d4abbf7656c6f858b9a8", size = 13690012, upload-time = "2026-02-11T07:29:45.375Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9f/fb/5299ab3d3e4bfb686c36a017c098bfaafd5f3b8e3fa81a8a2e618a50638a/paxtools-6.0.0.post1-py3-none-any.whl", hash = "sha256:c10318fc2a7767c4d39dd7365bfbcf1c1e8052201315024f7bf1c62cca2ef8fe", size = 13693939, upload-time = "2026-02-11T07:29:42.783Z" },
+]
+
 [[package]]
 name = "platformdirs"
 version = "4.4.0"
@@ -517,6 +556,74 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/15/8d/a6a9d58c929a869f7f1b99b3d37b3f14ef63e2826eef581416338d686c3f/pulp-3.2.2-py3-none-any.whl", hash = "sha256:d3ca5ff11a28b3e7b2508a992d7e51f3533471d89305f0560b5fe3b6cc821043", size = 16385354, upload-time = "2025-07-29T11:42:01.829Z" },
 ]
 
+[[package]]
+name = "pydantic"
+version = "2.12.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "annotated-types" },
+    { name = "pydantic-core" },
+    { name = "typing-extensions" },
+    { name = "typing-inspection" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591, upload-time = "2025-11-26T15:11:46.471Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580, upload-time = "2025-11-26T15:11:44.605Z" },
+]
+
+[[package]]
+name = "pydantic-core"
+version = "2.41.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403, upload-time = "2025-11-04T13:40:25.248Z" },
+    { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206, upload-time = "2025-11-04T13:40:27.099Z" },
+    { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307, upload-time = "2025-11-04T13:40:29.806Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258, upload-time = "2025-11-04T13:40:33.544Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917, upload-time = "2025-11-04T13:40:35.479Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186, upload-time = "2025-11-04T13:40:37.436Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164, upload-time = "2025-11-04T13:40:40.289Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146, upload-time = "2025-11-04T13:40:42.809Z" },
+    { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788, upload-time = "2025-11-04T13:40:44.752Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133, upload-time = "2025-11-04T13:40:46.66Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852, upload-time = "2025-11-04T13:40:48.575Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679, upload-time = "2025-11-04T13:40:50.619Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766, upload-time = "2025-11-04T13:40:52.631Z" },
+    { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005, upload-time = "2025-11-04T13:40:54.734Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622, upload-time = "2025-11-04T13:40:56.68Z" },
+    { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725, upload-time = "2025-11-04T13:40:58.807Z" },
+    { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040, upload-time = "2025-11-04T13:41:00.853Z" },
+    { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691, upload-time = "2025-11-04T13:41:03.504Z" },
+    { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897, upload-time = "2025-11-04T13:41:05.804Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302, upload-time = "2025-11-04T13:41:07.809Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877, upload-time = "2025-11-04T13:41:09.827Z" },
+    { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680, upload-time = "2025-11-04T13:41:12.379Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960, upload-time = "2025-11-04T13:41:14.627Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102, upload-time = "2025-11-04T13:41:16.868Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039, upload-time = "2025-11-04T13:41:18.934Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126, upload-time = "2025-11-04T13:41:21.418Z" },
+    { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489, upload-time = "2025-11-04T13:41:24.076Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288, upload-time = "2025-11-04T13:41:26.33Z" },
+    { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255, upload-time = "2025-11-04T13:41:28.569Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760, upload-time = "2025-11-04T13:41:31.055Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092, upload-time = "2025-11-04T13:41:33.21Z" },
+    { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385, upload-time = "2025-11-04T13:41:35.508Z" },
+    { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832, upload-time = "2025-11-04T13:41:37.732Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585, upload-time = "2025-11-04T13:41:40Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078, upload-time = "2025-11-04T13:41:42.323Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914, upload-time = "2025-11-04T13:41:45.221Z" },
+    { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560, upload-time = "2025-11-04T13:41:47.474Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244, upload-time = "2025-11-04T13:41:49.992Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955, upload-time = "2025-11-04T13:41:54.079Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906, upload-time = "2025-11-04T13:41:56.606Z" },
+    { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607, upload-time = "2025-11-04T13:41:58.889Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769, upload-time = "2025-11-04T13:42:01.186Z" },
+]
+
 [[package]]
 name = "pyreadline3"
 version = "3.5.4"
@@ -848,7 +955,11 @@ dependencies = [
     { name = "gdown" },
     { name = "more-itertools" },
     { name = "networkx" },
+    { name = "openpyxl" },
     { name = "pandas" },
+    { name = "paxtools" },
+    { name = "pydantic" },
+    { name = "requests" },
 ]
 
 [package.dev-dependencies]
@@ -862,7 +973,11 @@ requires-dist = [
     { name = "gdown", specifier = ">=5.2.0" },
     { name = "more-itertools", specifier = ">=10.7.0" },
     { name = "networkx", specifier = ">=3.6.1" },
+    { name = "openpyxl", specifier = ">=3.1.5" },
     { name = "pandas", specifier = ">=2.3.0" },
+    { name = "paxtools", specifier = ">=6.0.0.post1" },
+    { name = "pydantic", specifier = ">=2.12.5" },
+    { name = "requests", specifier = ">=2.32.5" },
 ]
 
 [package.metadata.requires-dev]
@@ -919,6 +1034,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
 ]
 
+[[package]]
+name = "typing-inspection"
+version = "0.4.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" },
+]
+
 [[package]]
 name = "tzdata"
 version = "2025.2"