From a90cb17971bef975d624d0de020f2c4e9a811145 Mon Sep 17 00:00:00 2001 From: yusuf1759 Date: Wed, 18 Feb 2026 12:37:26 -0500 Subject: [PATCH 1/2] chore: fix covalent linkages description --- src/plinder/data/utils/annotations/ligand_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plinder/data/utils/annotations/ligand_utils.py b/src/plinder/data/utils/annotations/ligand_utils.py index a5ccff60..210fa037 100644 --- a/src/plinder/data/utils/annotations/ligand_utils.py +++ b/src/plinder/data/utils/annotations/ligand_utils.py @@ -738,7 +738,7 @@ class Ligand(DocBaseModel): default_factory=set[str], description="Ligand covalent linkages as described in https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Categories/struct_conn.html " + "with _struct_conn.conn_type_id == 'covale', reported in format " - + "{auth_resid}:{resname}{assym_id}{seq_resid}{atom_name}__{auth_resid}:{resname}{assym_id}{seq_resid}{atom_name}", + + "{auth_resid}:{resname}:{assym_id}:{seq_resid}:{atom_name}__{auth_resid}:{resname}:{assym_id}:{seq_resid}:{atom_name}", ) neighboring_residues: dict[str, list[int]] = Field( default_factory=dict, From 0f789fdeb2e8c685ff7e5f25b43fc16badcdbcb7 Mon Sep 17 00:00:00 2001 From: yusuf1759 Date: Wed, 18 Feb 2026 22:43:52 -0500 Subject: [PATCH 2/2] chore: add descriptions to more functions --- .../data/utils/annotations/interface_gap.py | 22 +++++++++++++++++++ .../data/utils/annotations/ligand_utils.py | 14 +++++++++++- .../data/utils/annotations/mmpdb_utils.py | 4 ---- 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/src/plinder/data/utils/annotations/interface_gap.py b/src/plinder/data/utils/annotations/interface_gap.py index cf731bf3..eb0da672 100644 --- a/src/plinder/data/utils/annotations/interface_gap.py +++ b/src/plinder/data/utils/annotations/interface_gap.py @@ -111,6 +111,28 @@ def annotate_interface_gaps( larger_radius: float = 8.0, use_author_fields: bool = False, ) -> dict[str, Any]: + """ + Annotate interface atoms in close proximity to gaps. + + Parameters + ---------- + cif_file: Path + System mmcif file + protein_chains: list[str] | None = None + Protein chains + ligand_chains: list[str] | None = None + Ligand chains + smaller_radius: float = 4.0 + Smaller radius used for extracting neighboring atoms + larger_radius: float = 8.0 + Larger radius used for extracting neighboring atoms + use_author_fields: bool = False + Use author-defined fields + Returns + ------- + dict[str, Any] + Dictionary of interface atoms near gaps + """ pdb_id = cif_file.stem.split("_")[0].replace("pdb_0000", "") if ".cif" in cif_file.name: diff --git a/src/plinder/data/utils/annotations/ligand_utils.py b/src/plinder/data/utils/annotations/ligand_utils.py index 210fa037..2d2c0786 100644 --- a/src/plinder/data/utils/annotations/ligand_utils.py +++ b/src/plinder/data/utils/annotations/ligand_utils.py @@ -580,6 +580,18 @@ def annotate_interface_gaps_per_chain( interface_proximal_gaps: dict[str, dict[tuple[str, str], dict[str, int]]], asym_id: str, ) -> tuple[int | None, ...]: + """Parse protein-ligand and protein-protein interface atoms in proximity to gaps + to count interface atoms within 4 Å and within 8 Å of gaps. + This focuses on a specific ligand chain and the interfaces and gaps around it. + + Args: + interface_proximal_gaps (dict[str, dict[tuple[str, str], dict[str, int]]]): + Interface atoms close to gaps (within 4 Å and 8 Å ) + asym_id: str + Asymmetric unit chain id + Returns: + tuple[int, int, int, int, int, int] + """ try: ppi_atoms_within_4A_of_gap = sum( [ @@ -989,7 +1001,7 @@ def from_pli( ligand_like_chains: dict[str, str] Chain: chain type for other ligand-like chains in the entry interface_proximal_gaps: dict[str, dict[tuple[str, str], dict[str, int]]] - TODO: document + Dictionary of protein-ligand and protein-protein interface atoms in proximity to residue gaps all_covalent_dict : dict[str, list[tuple[str, str]]] All "covalent" residue in entry as defined by mmcif annotations. They types are separated by dictionary key and they include: diff --git a/src/plinder/data/utils/annotations/mmpdb_utils.py b/src/plinder/data/utils/annotations/mmpdb_utils.py index 3c82ff74..147d4fa0 100644 --- a/src/plinder/data/utils/annotations/mmpdb_utils.py +++ b/src/plinder/data/utils/annotations/mmpdb_utils.py @@ -264,10 +264,6 @@ def add_mmp_clusters_to_data( pocket_similarity_dict ) - # Load mmp index - # load_mmp_df = pd.read_csv(mmp_index, compression="gzip", sep="\t", header=None) - # load_mmp_df.columns = ["SMILES1", "SMILES2", "id1", "id2", "V1>>V2", "CONSTANT"] - # Pad interger lig ids with zeros load_mmp_df["id1"] = load_mmp_df.id1.apply(pad_integer_lig_ids_with_zeros) load_mmp_df["id2"] = load_mmp_df.id2.apply(pad_integer_lig_ids_with_zeros)