From a65770f0dc77d6e35af9d3ca71b1f66355509308 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Tue, 25 Mar 2025 13:16:55 +0000 Subject: [PATCH 001/125] Added file extension check for HTML/XML processing paths. --- autocorpus/Autocorpus.py | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/autocorpus/Autocorpus.py b/autocorpus/Autocorpus.py index 65afb6e2..6ec77a57 100644 --- a/autocorpus/Autocorpus.py +++ b/autocorpus/Autocorpus.py @@ -362,12 +362,12 @@ def process_files(self): This method performs the following steps: 1. Checks if a valid configuration is loaded. If not, raises a RuntimeError. 2. Handles the main text file: - - Parses the HTML content of the file. - - Extracts the main text from the parsed HTML. - - Attempts to extract abbreviations from the main text and HTML content. + - Parses the HTML/XML content of the file. + - Extracts the main text from the parsed HTML/XML. + - Attempts to extract abbreviations from the main text and HTML/XML content. If an error occurs during this process, it prints the error. 3. Processes linked tables, if any: - - Parses the HTML content of each linked table file. + - Parses the HTML/XML content of each linked table file. 4. Merges table data. 5. Checks if there are any documents in the tables and sets the `has_tables` attribute accordingly. @@ -378,17 +378,25 @@ def process_files(self): raise RuntimeError("A valid config file must be loaded.") # handle main_text if self.file_path: - soup = self.__handle_html(self.file_path, self.config) - self.main_text = self.__extract_text(soup, self.config) - try: - self.abbreviations = Abbreviations( - self.main_text, soup, self.config, self.file_path - ).to_dict() - except Exception as e: - print(e) + file_extension = self.file_path.split(".")[-1] + if file_extension in ["html", "htm"]: + soup = self.__handle_html(self.file_path, self.config) + self.main_text = self.__extract_text(soup, self.config) + try: + self.abbreviations = Abbreviations( + self.main_text, soup, self.config, self.file_path + ).to_dict() + except Exception as e: + print(e) + elif file_extension == "xml": + pass # TODO: implement XML handling if self.linked_tables: for table_file in self.linked_tables: - soup = self.__handle_html(table_file, self.config) + file_extension = table_file.split(".")[-1] + if file_extension in ["html", "htm"]: + soup = self.__handle_html(table_file, self.config) + elif file_extension == "xml": + pass # TODO: implement XML handling self.__merge_table_data() if "documents" in self.tables and not self.tables["documents"] == []: self.has_tables = True From 1c66ea009b72321650e49064f0ebd567d290e4a6 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Tue, 25 Mar 2025 13:18:09 +0000 Subject: [PATCH 002/125] lowered file extension checks to avoid potential bug with string comparison --- autocorpus/Autocorpus.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/autocorpus/Autocorpus.py b/autocorpus/Autocorpus.py index 6ec77a57..31936d60 100644 --- a/autocorpus/Autocorpus.py +++ b/autocorpus/Autocorpus.py @@ -379,7 +379,7 @@ def process_files(self): # handle main_text if self.file_path: file_extension = self.file_path.split(".")[-1] - if file_extension in ["html", "htm"]: + if file_extension.lower() in ["html", "htm"]: soup = self.__handle_html(self.file_path, self.config) self.main_text = self.__extract_text(soup, self.config) try: @@ -393,7 +393,7 @@ def process_files(self): if self.linked_tables: for table_file in self.linked_tables: file_extension = table_file.split(".")[-1] - if file_extension in ["html", "htm"]: + if file_extension.lower() in ["html", "htm"]: soup = self.__handle_html(table_file, self.config) elif file_extension == "xml": pass # TODO: implement XML handling From 427bb162cd2c60223ee846256b9ea556cd645c20 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Tue, 25 Mar 2025 13:22:20 +0000 Subject: [PATCH 003/125] Refactored extension check logic so the string is only lowered once per file --- autocorpus/Autocorpus.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/autocorpus/Autocorpus.py b/autocorpus/Autocorpus.py index 31936d60..4f7661e6 100644 --- a/autocorpus/Autocorpus.py +++ b/autocorpus/Autocorpus.py @@ -378,8 +378,8 @@ def process_files(self): raise RuntimeError("A valid config file must be loaded.") # handle main_text if self.file_path: - file_extension = self.file_path.split(".")[-1] - if file_extension.lower() in ["html", "htm"]: + file_extension = self.file_path.split(".")[-1].lower() + if file_extension in ["html", "htm"]: soup = self.__handle_html(self.file_path, self.config) self.main_text = self.__extract_text(soup, self.config) try: @@ -392,8 +392,8 @@ def process_files(self): pass # TODO: implement XML handling if self.linked_tables: for table_file in self.linked_tables: - file_extension = table_file.split(".")[-1] - if file_extension.lower() in ["html", "htm"]: + file_extension = table_file.split(".")[-1].lower() + if file_extension in ["html", "htm"]: soup = self.__handle_html(table_file, self.config) elif file_extension == "xml": pass # TODO: implement XML handling From bc9b22909fc039554811247d66ae4a24d4e8d821 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Wed, 26 Mar 2025 13:35:33 +0000 Subject: [PATCH 004/125] Implemented parsing to validate input file types. --- autocorpus/Autocorpus.py | 21 ++++++++++++------- autocorpus/utils.py | 44 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 7 deletions(-) diff --git a/autocorpus/Autocorpus.py b/autocorpus/Autocorpus.py index 4f7661e6..34113235 100644 --- a/autocorpus/Autocorpus.py +++ b/autocorpus/Autocorpus.py @@ -4,13 +4,14 @@ from pathlib import Path from bioc import biocjson, biocxml +from bioc.biocxml.decoder import etree from bs4 import BeautifulSoup from .abbreviation import Abbreviations from .bioc_formatter import BiocFormatter from .section import Section from .table import Table -from .utils import handle_not_tables +from .utils import check_file_type, handle_not_tables class Autocorpus: @@ -378,8 +379,11 @@ def process_files(self): raise RuntimeError("A valid config file must be loaded.") # handle main_text if self.file_path: - file_extension = self.file_path.split(".")[-1].lower() - if file_extension in ["html", "htm"]: + file_type = check_file_type(Path(self.file_path)) + if file_type == "other": + raise RuntimeError("Main text file must be an HTML or XML file.") + + if file_type == "html": soup = self.__handle_html(self.file_path, self.config) self.main_text = self.__extract_text(soup, self.config) try: @@ -388,14 +392,17 @@ def process_files(self): ).to_dict() except Exception as e: print(e) - elif file_extension == "xml": + else: pass # TODO: implement XML handling + if self.linked_tables: for table_file in self.linked_tables: - file_extension = table_file.split(".")[-1].lower() - if file_extension in ["html", "htm"]: + file_type = check_file_type(Path(table_file)) + if file_type == "other": + raise RuntimeError("Linked table files must be HTML or XML files.") + if file_type == "html": soup = self.__handle_html(table_file, self.config) - elif file_extension == "xml": + else: pass # TODO: implement XML handling self.__merge_table_data() if "documents" in self.tables and not self.tables["documents"] == []: diff --git a/autocorpus/utils.py b/autocorpus/utils.py index f8f69194..f2a7340d 100644 --- a/autocorpus/utils.py +++ b/autocorpus/utils.py @@ -12,6 +12,50 @@ from lxml.html.soupparser import fromstring +def check_file_type(file_path: Path) -> str: + """Determines the type of a file based on its content and extension. + + This function checks whether the given file is an HTML or XML file by + attempting to parse it using appropriate parsers. If the file cannot + be parsed as either HTML or XML, it is classified as "other". + + Args: + file_path (Path): The path to the file to be checked. + + Returns: + str: A string indicating the file type: + - "html" if the file is determined to be an HTML file. + - "xml" if the file is determined to be an XML file. + - "other" if the file type cannot be determined as HTML or XML. + """ + is_html, is_xml = False, False + file_extension = file_path.suffix.lower() + if file_extension in [".html", ".htm"]: + try: + etree.parse(file_path, etree.HTMLParser()) + is_html = True + except etree.ParseError: + etree.parse(file_path, etree.XMLParser()) + is_xml = True + except Exception as ex: + print(f"Error parsing file {file_path}: {ex}") + elif file_extension == ".xml": + try: + etree.parse(file_path, etree.XMLParser()) + is_xml = True + except etree.ParseError: + etree.parse(file_path, etree.HTMLParser()) + is_html = True + except Exception as ex: + print(f"Error parsing file {file_path}: {ex}") + if is_html: + return "html" + elif is_xml: + return "xml" + else: + return "other" + + def get_files(base_dir, pattern=r"(.*).html"): """Recursively retrieve all PMC.html files from the directory. From 831c9e5f110e3236ef02fcfe786cd7064a38990c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 26 Mar 2025 13:35:53 +0000 Subject: [PATCH 005/125] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- autocorpus/Autocorpus.py | 1 - 1 file changed, 1 deletion(-) diff --git a/autocorpus/Autocorpus.py b/autocorpus/Autocorpus.py index 34113235..8abe23a7 100644 --- a/autocorpus/Autocorpus.py +++ b/autocorpus/Autocorpus.py @@ -4,7 +4,6 @@ from pathlib import Path from bioc import biocjson, biocxml -from bioc.biocxml.decoder import etree from bs4 import BeautifulSoup from .abbreviation import Abbreviations From a2fdfb4d32462321de6a57531a59a7c322597daa Mon Sep 17 00:00:00 2001 From: Thomas Rowlands <41603761+Thomas-Rowlands@users.noreply.github.com> Date: Tue, 15 Apr 2025 10:38:46 +0100 Subject: [PATCH 006/125] Update autocorpus/Autocorpus.py Co-authored-by: Adrian D'Alessandro --- autocorpus/Autocorpus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autocorpus/Autocorpus.py b/autocorpus/Autocorpus.py index 8abe23a7..281a4934 100644 --- a/autocorpus/Autocorpus.py +++ b/autocorpus/Autocorpus.py @@ -392,7 +392,7 @@ def process_files(self): except Exception as e: print(e) else: - pass # TODO: implement XML handling + raise NotImplementedError("XML processing is not yet implemented for Auto-CORPus") if self.linked_tables: for table_file in self.linked_tables: From ef7e5960b322ffee98a24b11cd88d16ce980217e Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Tue, 15 Apr 2025 11:01:29 +0100 Subject: [PATCH 007/125] Initial word processing integration --- autocorpus/BioCTable.py | 140 +++++++++ autocorpus/utils.py | 38 +++ autocorpus/word_processing.py | 577 ++++++++++++++++++++++++++++++++++ 3 files changed, 755 insertions(+) create mode 100644 autocorpus/BioCTable.py create mode 100644 autocorpus/word_processing.py diff --git a/autocorpus/BioCTable.py b/autocorpus/BioCTable.py new file mode 100644 index 00000000..d83278b1 --- /dev/null +++ b/autocorpus/BioCTable.py @@ -0,0 +1,140 @@ +"""This module provides functionality for converting tables into an extended BioC format. + +BioCTable objects include table-specific elements such as cell IDs for annotation. +""" + +import datetime +from typing import Any + +from pandas import DataFrame + +from autocorpus.utils import replace_unicode + + +class BioCTable: + """Converts tables from nested lists into a BioC table object.""" + + def __init__(self, table_id: int, table_data: DataFrame): + """Initialize a BioCTable object. + + Args: + table_id (int): The unique identifier for the table. + table_data (pd.DataFrame): The data of the table as a Pandas DataFrame. + textsource (str): The source of the text content. + """ + self.id = str(table_id) + "_1" + self.textsource = "Auto-CORPus (supplementary)" + self.infons: dict[str, Any] = {} + self.passages: list[dict[str, Any]] = [] + self.annotations: list[dict[str, Any]] = [] + self.__build_table(table_data) + + def __build_table(self, table_data: DataFrame): + """Builds a table passage based on the provided table_data and adds it to the passages list. + + Args: + table_data: A pandas DataFrame containing the data for the table. + + Returns: + None + """ + # Create a title passage + title_passage = { + "offset": 0, + "infons": { + "section_title_1": "table_title", + "iao_name_1": "document title", + "iao_id_1": "IAO:0000305", + }, + } + self.passages.append(title_passage) + # Create a caption passage + caption_passage = { + "offset": 0, + "infons": { + "section_title_1": "table_caption", + "iao_name_1": "caption", + "iao_id_1": "IAO:0000304", + }, + } + self.passages.append(caption_passage) + # Create a passage for table content + passage = { + "offset": 0, + "infons": { + "section_title_1": "table_content", + "iao_name_1": "table", + "iao_id_1": "IAO:0000306", + }, + "column_headings": [], + "data_section": [{"table_section_title_1": "", "data_rows": []}], + } + # Populate column headings + for i, text in enumerate(table_data.columns.values): + passage["column_headings"].append( + {"cell_id": self.id + f".1.{i + 1}", "cell_text": replace_unicode(text)} + ) + # Populate table rows with cell data + for row_idx, row in enumerate(table_data.values): + new_row = [] + for cell_idx, cell in enumerate(row): + new_cell = { + "cell_id": f"{self.id}.{row_idx + 2}.{cell_idx + 1}", + "cell_text": f"{replace_unicode(cell)}", + } + new_row.append(new_cell) + passage["data_section"][0]["data_rows"].append(new_row) + # Add the table passage to the passages list + self.passages.append(passage) + + +def get_tables_bioc(tables: list[DataFrame]) -> dict[str, Any]: + """Converts extracted tables into BioC format. + + Args: + tables: A list of tables extracted from an Excel file. + filename: The name of the Excel file. + textsource: Source of the text content. + + Returns: + A BioC format representation of the extracted tables. + """ + # Create a BioC dictionary + bioc = { + "source": "Auto-CORPus (supplementary)", + "date": datetime.date.today().strftime("%Y%m%d"), + "key": "autocorpus_supplementary.key", + "infons": {}, + "documents": [BioCTable(i + 1, x).__dict__ for i, x in enumerate(tables)], + } + return bioc + + +def convert_datetime_to_string(df: DataFrame) -> DataFrame: + """Convert all datetime objects in a DataFrame to string format. + + Args: + df (pd.DataFrame): The input DataFrame. + + Returns: + pd.DataFrame: A DataFrame with datetime columns converted to string. + """ + for col in df.select_dtypes(include=["datetime64[ns]", "datetime64"]): + df[col] = df[col].astype(str) + return df + + +def get_blank_cell_count(row: list[dict[str, str]]) -> int: + """Counts the number of blank cells in a given row. + + Args: + row (list): A list of dictionaries representing cells in a row. + + Returns: + int: The number of blank cells in the row. + """ + blank_count = 0 + for cell in row: + if not cell["text"].strip(): + blank_count += 1 + return blank_count diff --git a/autocorpus/utils.py b/autocorpus/utils.py index d4f9e4ad..72ffe72e 100644 --- a/autocorpus/utils.py +++ b/autocorpus/utils.py @@ -10,6 +10,44 @@ from lxml.html.soupparser import fromstring +def replace_unicode(text: str | list[str]) -> str | list[str]: + """Replaces specific Unicode characters in a given text. + + Args: + text: The input text to be processed. + + Returns: + The processed text with the specified Unicode characters replaced. + + Examples: + replace_unicode('\u00a0Hello\u00adWorld\u2010') # ' Hello-World-' + replace_unicode(['\u00a0Hello', '\u00adWorld']) # [' Hello', 'World'] + """ + if not text: + raise ValueError("Input text is empty or None.") + if isinstance(text, list): + clean_texts = [] + for t in text: + if t and isinstance(t, str): + clean_texts.append( + t.replace("\u00a0", " ") + .replace("\u00ad", "-") + .replace("\u2010", "-") + .replace("\u00d7", "x") + ) + else: + clean_texts.append(t) + return clean_texts + elif isinstance(text, str): + clean_text = ( + text.replace("\u00a0", " ") + .replace("\u00ad", "-") + .replace("\u2010", "-") + .replace("\u00d7", "x") + ) + return clean_text + + def get_files(base_dir, pattern=r"(.*).html"): """Recursively retrieve all PMC.html files from the directory. diff --git a/autocorpus/word_processing.py b/autocorpus/word_processing.py new file mode 100644 index 00000000..38881d8b --- /dev/null +++ b/autocorpus/word_processing.py @@ -0,0 +1,577 @@ +"""This module provides functionality for processing supplementary files. + +Extracts data from various file types such as PDFs, spreadsheets, +PowerPoint presentations, and archives. It also handles logging and error +management for unprocessed files. +""" + +import datetime +import re +from pathlib import Path + +import pandas as pd +from bioc import BioCCollection, BioCDocument, BioCPassage +from BioCTable import get_tables_bioc +from docx import Document + +from autocorpus.utils import replace_unicode + +from . import logger + +WORD_EXTENSIONS = [".doc", ".docx"] + + +def extract_table_from_text(text: str) -> tuple[list[str], list[pd.DataFrame]]: + """Extracts tables from a given text and returns the modified text and extracted tables. + + Args: + text (str): The input text containing potential table data. + + Returns: + tuple[str, list[pd.DataFrame]]: A tuple containing the modified text without table lines + and a list of DataFrames representing the extracted tables. + """ + # Split the text into lines + lines = [x for x in text.splitlines() if x] + text_output = lines + + # store extracted tables + tables = [] + # Identify where the table starts and ends by looking for lines containing pipes + table_lines = [] + # keep unmodified lines used in tables. These must be removed from the original text + lines_to_remove = [] + inside_table = False + for line in lines: + if "|" in line: + inside_table = True + table_lines.append(line) + lines_to_remove.append(line) + elif ( + inside_table + ): # End of table if there's a blank line after lines with pipes + inside_table = False + tables.append(table_lines) + table_lines = [] + continue + + for line in lines_to_remove: + text_output.remove(line) + + tables_output = [] + # Remove lines that are just dashes (table separators) + for table in tables: + table = [line for line in table if not re.match(r"^\s*-+\s*$", line)] + + # Extract rows from the identified table lines + rows = [] + for line in table: + # Match only lines that look like table rows (contain pipes) + if re.search(r"\|", line): + # Split the line into cells using the pipe delimiter and strip whitespace + cells = [ + cell.strip() + for cell in line.split("|") + if not all(x in "|-" for x in cell) + ] + if cells: + # Remove empty cells that may result from leading/trailing pipes + # if cells[0] == '': + # cells.pop(0) + # if cells[-1] == '': + # cells.pop(-1) + rows.append(cells) + + # Determine the maximum number of columns in the table + num_columns = max(len(row) for row in rows) + + # Pad rows with missing cells to ensure they all have the same length + for row in rows: + while len(row) < num_columns: + row.append("") + + # Create a DataFrame from the rows + df = pd.DataFrame(rows[1:], columns=rows[0]) + tables_output.append(df) + return text_output, tables_output + + +def get_bioc_passages(text: list[str] | str) -> list[BioCPassage] | list[str]: + """Identifies passages within the given text and creates passage objects. + + Args: + text (list): The text to be processed, represented as a list of lines. + + Returns: + list: A list of BioCPassage objects. + """ + offset = 0 + passages: list[BioCPassage] = [] + if not text: + return passages + if isinstance(text, str): + text = text.split("\n\n") + text = [x for x in text if x] + # Iterate through each line in the text + for line in text: + # Determine the type of the line and assign appropriate information + iao_name = "supplementary material section" + iao_id = "IAO:0000326" + # Create a passage object and add it to the passages list + passage = BioCPassage() + passage.offset = offset + passage.infons = {"iao_name_1": iao_name, "iao_id_1": iao_id} + passage.text = line + passages.append(passage) + offset += len(line) + return passages + + +def get_text_bioc(parsed_texts: list[str], filename: str): + """Convert parsed texts into BioC format. + + Args: + parsed_texts (list): A list of parsed text segments to be converted. + filename (str): The name of the source file. + textsource (str): The source of the text, default is "Auto-CORPus". + + Returns: + BioCCollection: A BioCCollection object representing the converted text in BioC format. + """ + passages = [ + p + for sublist in [ + get_bioc_passages(replace_unicode(x)).__dict__["passages"] + for x in parsed_texts + ] + for p in sublist + ] + offset = 0 + for p in passages: + p["offset"] = offset + offset += len(p["text"]) + # Create a BioC XML structure dictionary + bioc = BioCCollection() + bioc.source = "Auto-CORPus (supplementary)" + bioc.date = datetime.date.today().strftime("%Y%m%d") + bioc.key = "autocorpus_supplementary.key" + bioc.documents = [] + new_doc = BioCDocument() + new_doc.id = "1" + new_doc.infons = { + "inputfile": Path(filename).name, + "textsource": "Auto-CORPus (supplementary)", + } + new_doc.passages = passages + return bioc + + +class BioCText: + def __init__(self, text): + self.infons = {} + self.passages = self.__identify_passages(text) + self.annotations = [] + + @staticmethod + def __identify_passages(text): + """Identifies passages within the given text and creates passage objects. + + Args: + text (tuple): The text to be processed and a boolean which is True for header text. + + Returns: + list: A list of passage objects. Each passage object is a dictionary containing the following keys: + - "offset": The offset of the passage in the original text. + - "infons": A dictionary of information associated with the passage, including: + - "iao_name_1": The name or type of the passage. + - "iao_id_1": The unique identifier associated with the passage. + - "text": The content of the passage. + - "sentences": An empty list of sentences (to be populated later if needed). + - "annotations": An empty list of annotations (to be populated later if needed). + - "relations": An empty list of relations (to be populated later if needed). + + Example: + text = [ + "Introduction", + "This is the first paragraph.", + "Conclusion" + ] + passages = __identify_passages(text) + """ + offset = 0 + passages = [] + # Iterate through each line in the text + line, is_header = text + line = line.replace("\n", "") + iao_name = "" + iao_id = "" + + # Determine the type of the line and assign appropriate information + if line.isupper() or is_header: + iao_name = "document title" + iao_id = "IAO:0000305" + else: + iao_name = "supplementary material section" + iao_id = "IAO:0000326" + # Create a passage object and add it to the passages list + passages.append( + { + "offset": offset, + "infons": {"iao_name_1": iao_name, "iao_id_1": iao_id}, + "text": line, + "sentences": [], + "annotations": [], + "relations": [], + } + ) + offset += len(line) + return passages + + +class BioCTable: + """Converts tables from nested lists into a BioC table object.""" + + def __init__(self, input_file, table_id, table_data): + self.inputfile = input_file + self.id = str(table_id) + "_1" + self.infons = {} + self.passage = {} + self.annotations = [] + self.__build_table(table_data) + + def __build_table(self, table_data): + """Builds a table passage in a specific format and appends it to the list of passages. + + Args: + table_data (list): The table data to be included in the passage. It should be a list + containing the table's column headings as the first row, followed by + the data rows. + + Returns: + None + + Example: + table_data = [ + ["Column 1", "Column 2", "Column 3"], + [1, 2, 3], + [4, 5, 6] + ] + self.__build_table(table_data) + """ + passage = { + "offset": 0, + "infons": { + "section_title_1": "table_content", + "iao_name_1": "table", + "iao_id_1": "IAO:0000306", + }, + "column_headings": [], + "data_section": [{"table_section_title_1": "", "data_rows": []}], + } + # Process the column headings of the table + for i, col in enumerate(table_data[0]): + passage["column_headings"].append( + {"cell_id": self.id + f".1.{i + 1}", "cell_text": col} + ) + # Process the data rows of the table + for row_idx, row in enumerate(table_data[1:]): + new_row = [] + for cell_idx, cell in enumerate(row): + new_cell = { + "cell_id": f"{self.id}.{row_idx + 2}.{cell_idx + 1}", + "cell_text": f"{cell}", + } + new_row.append(new_cell) + passage["data_section"][0]["data_rows"].append(new_row) + self.passage = passage + + def get_table(self): + return self.passage + + +def get_tables_bioc(tables, filename, textsource="Auto-CORPus"): + """Generates a BioC XML structure containing tables. + + Args: + tables (list): A list of tables to be included in the BioC structure. + Each table should be represented as a nested list, where each inner list + corresponds to a row, and each element in the inner list corresponds to the + text content of a cell in the row. + + Returns: + dict: A dictionary representing the generated BioC XML structure. + + Example: + tables = [[["A", "B"], ["1", "2"]], [["X", "Y"], ["3", "4"]]] + bioc_xml = get_tables_bioc(tables) + """ + # Create a BioC JSON structure dictionary + bioc = { + "source": "Auto-CORPus (supplementary)", + "date": str(datetime.date.today().strftime("%Y%m%d")), + "key": "autocorpus_supplementary.key", + "infons": {}, + "documents": [ + { + "id": 1, + "inputfile": filename, + "textsource": textsource, + "infons": {}, + "passages": [], + "annotations": [], + "relations": [], + } + ], + } + for i, x in enumerate(tables): + bioc["documents"][0]["passages"].append( + BioCTable(filename, i + 1, x).get_table() + ) + return bioc + + +def get_text_bioc(paragraphs, filename, textsource="Auto-CORPus"): + """Generates a BioC JSON structure containing text paragraphs. + + Args: + paragraphs (list): A list of paragraphs to be included in the BioC structure. + + Returns: + dict: A dictionary representing the generated BioC XML structure. + + Example: + paragraphs = ["This is the first paragraph.", "This is the second paragraph."] + bioc_xml = get_text_bioc(paragraphs) + """ + passages = [ + p + for sublist in [ + BioCText(text=replace_unicode(x)).__dict__["passages"] for x in paragraphs + ] + for p in sublist + ] + offset = 0 + for p in passages: + p["offset"] = offset + offset += len(p["text"]) + # Create a BioC XML structure dictionary + bioc = { + "source": "Auto-CORPus (supplementary)", + "date": str(datetime.date.today().strftime("%Y%m%d")), + "key": "autocorpus_supplementary.key", + "infons": {}, + "documents": [ + { + "id": 1, + "inputfile": filename, + "textsource": textsource, + "infons": {}, + "passages": passages, + "annotations": [], + "relations": [], + } + ], + } + return bioc + + +def extract_tables(doc): + """Extracts tables from a .docx document. + + Args: + doc (docx.Document): The Document object representing the .docx document. + + Returns: + list: A list of tables extracted from the document. Each table is represented as a nested list, + where each inner list corresponds to a row, and each element in the inner list corresponds + to the text content of a cell in the row. + + Example: + from docx import Document + + doc = Document("document.docx") + tables = extract_tables(doc) + """ + # Open the .docx file + tables = [] + # Iterate through the tables in the document + for table in doc.tables: + tables.append([]) + # Iterate through the rows in the table + for row in table.rows: + tables[-1].append([x.text for x in row.cells]) + return tables + + +def convert_older_doc_file(file, output_dir): + operating_system = platform.system() + docx_path = file.replace(".doc", ".docx") + if operating_system == "Windows": + import win32com.client + + word = None + try: + docx_path = file + ".docx" + word = win32com.client.DispatchEx("Word.Application") + doc = word.Documents.Open(file) + doc.SaveAs(file + ".docx", 16) + doc.Close() + word.Quit() + return docx_path + except Exception: + return False + finally: + word.Quit() + elif operating_system == "linux": + # Convert .doc to .docx using LibreOffice + subprocess.run( + [ + "soffice", + "--headless", + "--convert-to", + "docx", + "--outdir", + output_dir, + file, + ], + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + return docx_path + elif operating_system == "Darwin": # macOS + try: + # AppleScript to open the file in Word and save as .docx + applescript = f""" + tell application "Microsoft Word" + open "{file}" + save as active document file name "{docx_path}" file format format document + close active document saving no + end tell + """ + subprocess.run(["osascript", "-e", applescript], check=True) + return docx_path + except Exception: + return False + else: + return False + + +def extract_text_from_doc(file_path): + """Extracts text from a .doc file by converting it to .docx and processing with python-docx.""" + if not file_path.endswith(".doc"): + raise ValueError("Input file must be a .doc file.") + try: + output_dir = str(Path(file_path).parent.absolute()) + docx_path = convert_older_doc_file(file_path, output_dir) + + # Extract text from the resulting .docx file + doc = Document(docx_path) + tables = extract_tables(doc) + text_sizes = set( + [int(x.style.font.size) for x in doc.paragraphs if x.style.font.size] + ) + paragraphs = [ + ( + x.text, + True + if text_sizes + and x.style.font.size + and int(x.style.font.size) > min(text_sizes) + else False, + ) + for x in doc.paragraphs + ] + os.unlink(docx_path) + return paragraphs, tables + except FileNotFoundError: + print( + "LibreOffice 'soffice' command not found. Ensure it is installed and in your PATH." + ) + return None, None + except Exception as e: + print(f"Error processing file {file_path}: {e}") + return None, None + + +def process_word_document(file): + """Processes a Word document file, extracting tables and paragraphs, and saving them as JSON files. + + Args: + file (str): The path to the Word document file. + + Returns: + None + + Example: + file_path = "/path/to/document.docx" + process_word_document(file_path) + """ + tables, paragraphs = [], [] + output_path = file.replace("Raw", "Processed") + # Check if the file has a ".doc" or ".docx" extension + if file.lower().endswith(".doc") or file.lower().endswith(".docx"): + try: + doc = Document(file) + tables = extract_tables(doc) + text_sizes = set( + [int(x.style.font.size) for x in doc.paragraphs if x.style.font.size] + ) + paragraphs = [ + ( + x.text, + True + if text_sizes + and x.style.font.size + and int(x.style.font.size) > min(text_sizes) + else False, + ) + for x in doc.paragraphs + ] + except ValueError: + try: + if not file.lower().endswith(".docx"): + paragraphs, tables = extract_text_from_doc(file) + if paragraphs: + logger.info( + f"File {file} was converted to .docx as a copy within the same directory for processing." + ) + else: + logger.info( + f"File {file} could not be processed correctly. It is likely a pre-2007 word document or problematic." + ) + return False + else: + logger.info(f"File {file} could not be processed correctly.") + return False + except ValueError as ve: + logger.info(f"File {file} raised the error:\n{ve}") + return False + except Exception as ex: + logger.info(f"File {file} raised the error:\n{ex}") + return False + else: + return False + + # Save tables as a JSON file + if tables: + if not Path(output_path).exists(): + Path(output_path).parent.mkdir(parents=True, exist_ok=True) + with open(f"{output_path}_tables.json", "w+", encoding="utf-8") as f_out: + json.dump(get_tables_bioc(tables, Path(file).name), f_out) + + # Save paragraphs as a JSON file + if paragraphs: + paragraphs = [x for x in paragraphs if x[0]] + if not Path(output_path).exists(): + Path(output_path).parent.mkdir(parents=True, exist_ok=True) + global args + with open(f"{output_path}_bioc.json", "w+", encoding="utf-8") as f_out: + # TODO: Test if datatype causes a problem + text = get_text_bioc(paragraphs, Path(file).name) + json.dump(text, f_out, indent=4) + + if not paragraphs and not tables: + return False + else: + return True From a7a6392313b7d82c53131cb521963f32ca2265ee Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Wed, 16 Apr 2025 09:37:39 +0100 Subject: [PATCH 008/125] Merge conflict resolution --- autocorpus/autocorpus.py | 406 --------------------------------------- 1 file changed, 406 deletions(-) delete mode 100644 autocorpus/autocorpus.py diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py deleted file mode 100644 index 8eb8c77e..00000000 --- a/autocorpus/autocorpus.py +++ /dev/null @@ -1,406 +0,0 @@ -"""Auto-CORPus primary functions are called from this script, after initialisation with __main__.py.""" - -import json -from pathlib import Path -from typing import Any - -from bioc import biocjson, biocxml -from bs4 import BeautifulSoup - -from . import logger -from .abbreviation import Abbreviations -from .bioc_formatter import get_formatted_bioc_collection -from .section import Section -from .table import get_table_json -from .utils import handle_not_tables - - -class Autocorpus: - """Parent class for all Auto-CORPus functionality.""" - - @staticmethod - def read_config(config_path: str) -> dict[str, Any]: - """Reads a configuration file and returns its content. - - Args: - config_path (str): The path to the configuration file. - - Returns: - dict: The content of the configuration file. - - Raises: - FileNotFoundError: If the configuration file does not exist. - json.JSONDecodeError: If the configuration file is not a valid JSON. - KeyError: If the configuration file does not contain the expected "config" key. - """ - with open(config_path, encoding="utf-8") as f: - ## TODO: validate config file here if possible - content = json.load(f) - return content["config"] - - def __soupify_infile(self, fpath): - fpath = Path(fpath) - with fpath.open(encoding="utf-8") as fp: - soup = BeautifulSoup(fp.read(), "html.parser") - for e in soup.find_all( - attrs={"style": ["display:none", "visibility:hidden"]} - ): - e.extract() - return soup - - def __get_keywords(self, soup, config): - if "keywords" not in config: - return {} - - responses = handle_not_tables(config["keywords"], soup) - if not responses: - return {} - - responses = " ".join(x["node"].get_text() for x in responses) - return { - "section_heading": "keywords", - "subsection_heading": "", - "body": responses, - "section_type": [{"iao_name": "keywords section", "iao_id": "IAO:0000630"}], - } - - def __get_title(self, soup, config): - if "title" not in config: - return "" - - titles = handle_not_tables(config["title"], soup) - if not titles: - return "" - - return titles[0]["node"].get_text() - - def __get_sections(self, soup, config): - if "sections" not in config: - return [] - - return handle_not_tables(config["sections"], soup) - - def __extract_text(self, soup, config): - """Convert beautiful soup object into a python dict object with cleaned main text body. - - Args: - soup (bs4.BeautifulSoup): BeautifulSoup object of html - config (dict): AC config rules - - Return: - (dict): dict of the maintext - """ - result = {} - - # Tags of text body to be extracted are hard-coded as p (main text) and span (keywords and refs) - result["title"] = self.__get_title(soup, config) - maintext = [] - if keywords := self.__get_keywords(soup, config): - maintext.append(keywords) - sections = self.__get_sections(soup, config) - for sec in sections: - maintext.extend(Section(config, sec).to_list()) - - # filter out the sections which do not contain any info - filtered_text = [x for x in maintext if x] - unique_text = [] - seen_text = [] - for text in filtered_text: - if text["body"] not in seen_text: - seen_text.append(text["body"]) - unique_text.append(text) - - result["paragraphs"] = self.__set_unknown_section_headings(unique_text) - - return result - - def __set_unknown_section_headings(self, unique_text): - paper = {} - for para in unique_text: - if para["section_heading"] != "keywords": - paper[para["section_heading"]] = [ - x["iao_name"] for x in para["section_type"] - ] - - for text in unique_text: - if not text["section_heading"]: - text["section_heading"] = "document part" - text["section_type"] = [ - {"iao_name": "document part", "iao_id": "IAO:0000314"} - ] - - return unique_text - - def __process_html_tables(self, file_path, soup, config): - """Extract data from tables in the HTML file. - - Args: - file_path (str): path to the main text file - soup (bs4.BeautifulSoup): soup object - config (dict): dict of the maintext - """ - if "tables" not in config: - return - - if not self.tables: - self.tables, self.empty_tables = get_table_json(soup, config, file_path) - return - - seen_ids = set() - for tab in self.tables["documents"]: - if "." in tab["id"]: - seen_ids.add(tab["id"].split(".")[0]) - else: - seen_ids.add(tab["id"]) - - tmp_tables, tmp_empty = get_table_json(soup, config, file_path) - for tabl in tmp_tables["documents"]: - if "." in tabl["id"]: - tabl_id = tabl["id"].split(".")[0] - tabl_pos = ".".join(tabl["id"].split(".")[1:]) - else: - tabl_id = tabl["id"] - tabl_pos = None - if tabl_id in seen_ids: - tabl_id = str(len(seen_ids) + 1) - if tabl_pos: - tabl["id"] = f"{tabl_id}.{tabl_pos}" - else: - tabl["id"] = tabl_id - seen_ids.add(tabl_id) - self.tables["documents"].extend(tmp_tables["documents"]) - self.empty_tables.extend(tmp_empty) - - def __merge_table_data(self): - if not self.empty_tables: - return - - documents = self.tables.get("documents", None) - if not documents: - return - - seen_ids = {} - for i, table in enumerate(documents): - if "id" in table: - seen_ids[str(i)] = f"Table {table['id']}." - - for table in self.empty_tables: - for seenID in seen_ids.keys(): - if not table["title"].startswith(seen_ids[seenID]): - continue - - if "title" in table and not table["title"] == "": - set_new = False - for passage in documents[int(seenID)]["passages"]: - if ( - passage["infons"]["section_type"][0]["section_name"] - == "table_title" - ): - passage["text"] = table["title"] - set_new = True - if not set_new: - documents[int(seenID)]["passages"].append( - { - "offset": 0, - "infons": { - "section_type": [ - { - "section_name": "table_title", - "iao_name": "document title", - "iao_id": "IAO:0000305", - } - ] - }, - "text": table["title"], - } - ) - if "caption" in table and not table["caption"] == "": - set_new = False - for passage in documents[int(seenID)]["passages"]: - if ( - passage["infons"]["section_type"][0]["section_name"] - == "table_caption" - ): - passage["text"] = table["caption"] - set_new = True - if not set_new: - documents[int(seenID)]["passages"].append( - { - "offset": 0, - "infons": { - "section_type": [ - { - "section_name": "table_caption", - "iao_name": "caption", - "iao_id": "IAO:0000304", - } - ] - }, - "text": table["caption"], - } - ) - if "footer" in table and not table["footer"] == "": - set_new = False - for passage in documents[int(seenID)]["passages"]: - if ( - passage["infons"]["section_type"][0]["section_name"] - == "table_footer" - ): - passage["text"] = table["footer"] - set_new = True - if not set_new: - documents[int(seenID)]["passages"].append( - { - "offset": 0, - "infons": { - "section_type": [ - { - "section_name": "table_footer", - "iao_name": "caption", - "iao_id": "IAO:0000304", - } - ] - }, - "text": table["footer"], - } - ) - - def process_files(self): - """Processes the files specified in the configuration. - - This method performs the following steps: - 1. Checks if a valid configuration is loaded. If not, raises a RuntimeError. - 2. Handles the main text file: - - Parses the HTML content of the file. - - Extracts the main text from the parsed HTML. - - Attempts to extract abbreviations from the main text and HTML content. - If an error occurs during this process, it prints the error. - 3. Processes linked tables, if any: - - Parses the HTML content of each linked table file. - 4. Merges table data. - 5. Checks if there are any documents in the tables and sets the `has_tables` attribute accordingly. - - Raises: - RuntimeError: If no valid configuration is loaded. - """ - if not self.config: - raise RuntimeError("A valid config file must be loaded.") - # handle main_text - if self.file_path: - soup = self.__soupify_infile(self.file_path) - self.__process_html_tables(self.file_path, soup, self.config) - self.main_text = self.__extract_text(soup, self.config) - try: - self.abbreviations = Abbreviations( - self.main_text, soup, self.config, self.file_path - ).to_dict() - except Exception as e: - logger.error(e) - if self.linked_tables: - for table_file in self.linked_tables: - soup = self.__soupify_infile(table_file) - self.__process_html_tables(table_file, soup, self.config) - self.__merge_table_data() - if "documents" in self.tables and not self.tables["documents"] == []: - self.has_tables = True - - def __init__( - self, - config, - main_text, - linked_tables=None, - ): - """Utilises the input config file to create valid BioC versions of input HTML journal articles. - - Args: - config (dict): configuration file for the input HTML journal articles - main_text (str): path to the main text of the article (HTML files only) - linked_tables (list): list of linked table file paths to be included in this run (HTML files only) - """ - self.file_path = main_text - self.linked_tables = linked_tables - self.config = config - self.main_text = {} - self.empty_tables = [] - self.tables = {} - self.abbreviations = {} - self.has_tables = False - - def to_bioc(self): - """Get the currently loaded bioc as a dict. - - Returns: - (dict): bioc as a dict - """ - return get_formatted_bioc_collection(self) - - def main_text_to_bioc_json(self): - """Get the currently loaded main text as BioC JSON. - - Args: - indent (int): level of indentation - - Returns: - (str): main text as BioC JSON - """ - return json.dumps( - get_formatted_bioc_collection(self), indent=2, ensure_ascii=False - ) - - def main_text_to_bioc_xml(self): - """Get the currently loaded main text as BioC XML. - - Returns: - (str): main text as BioC XML - """ - collection = biocjson.loads( - json.dumps( - get_formatted_bioc_collection(self), indent=2, ensure_ascii=False - ) - ) - return biocxml.dumps(collection) - - def tables_to_bioc_json(self, indent=2): - """Get the currently loaded tables as Tables-JSON. - - Args: - indent (int): level of indentation - - Returns: - (str): tables as Tables-JSON - """ - return json.dumps(self.tables, ensure_ascii=False, indent=indent) - - def abbreviations_to_bioc_json(self, indent=2): - """Get the currently loaded abbreviations as BioC JSON. - - Args: - indent (int): level of indentation - - Returns: - (str): abbreviations as BioC JSON - """ - return json.dumps(self.abbreviations, ensure_ascii=False, indent=indent) - - def to_json(self, indent=2): - """Get the currently loaded AC object as a dict. - - Args: - indent (int): Level of indentation. - - Returns: - (str): AC object as a JSON string - """ - return json.dumps(self.to_dict(), ensure_ascii=False, indent=indent) - - def to_dict(self): - """Get the currently loaded AC object as a dict. - - Returns: - (dict): AC object as a dict - """ - return { - "main_text": self.main_text, - "abbreviations": self.abbreviations, - "tables": self.tables, - } From 68adf8ba4842155b5a7b895a96486cf72d8358c8 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Wed, 16 Apr 2025 09:40:16 +0100 Subject: [PATCH 009/125] manual correction of merge conflict --- autocorpus/autocorpus.py | 406 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 406 insertions(+) create mode 100644 autocorpus/autocorpus.py diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py new file mode 100644 index 00000000..8eb8c77e --- /dev/null +++ b/autocorpus/autocorpus.py @@ -0,0 +1,406 @@ +"""Auto-CORPus primary functions are called from this script, after initialisation with __main__.py.""" + +import json +from pathlib import Path +from typing import Any + +from bioc import biocjson, biocxml +from bs4 import BeautifulSoup + +from . import logger +from .abbreviation import Abbreviations +from .bioc_formatter import get_formatted_bioc_collection +from .section import Section +from .table import get_table_json +from .utils import handle_not_tables + + +class Autocorpus: + """Parent class for all Auto-CORPus functionality.""" + + @staticmethod + def read_config(config_path: str) -> dict[str, Any]: + """Reads a configuration file and returns its content. + + Args: + config_path (str): The path to the configuration file. + + Returns: + dict: The content of the configuration file. + + Raises: + FileNotFoundError: If the configuration file does not exist. + json.JSONDecodeError: If the configuration file is not a valid JSON. + KeyError: If the configuration file does not contain the expected "config" key. + """ + with open(config_path, encoding="utf-8") as f: + ## TODO: validate config file here if possible + content = json.load(f) + return content["config"] + + def __soupify_infile(self, fpath): + fpath = Path(fpath) + with fpath.open(encoding="utf-8") as fp: + soup = BeautifulSoup(fp.read(), "html.parser") + for e in soup.find_all( + attrs={"style": ["display:none", "visibility:hidden"]} + ): + e.extract() + return soup + + def __get_keywords(self, soup, config): + if "keywords" not in config: + return {} + + responses = handle_not_tables(config["keywords"], soup) + if not responses: + return {} + + responses = " ".join(x["node"].get_text() for x in responses) + return { + "section_heading": "keywords", + "subsection_heading": "", + "body": responses, + "section_type": [{"iao_name": "keywords section", "iao_id": "IAO:0000630"}], + } + + def __get_title(self, soup, config): + if "title" not in config: + return "" + + titles = handle_not_tables(config["title"], soup) + if not titles: + return "" + + return titles[0]["node"].get_text() + + def __get_sections(self, soup, config): + if "sections" not in config: + return [] + + return handle_not_tables(config["sections"], soup) + + def __extract_text(self, soup, config): + """Convert beautiful soup object into a python dict object with cleaned main text body. + + Args: + soup (bs4.BeautifulSoup): BeautifulSoup object of html + config (dict): AC config rules + + Return: + (dict): dict of the maintext + """ + result = {} + + # Tags of text body to be extracted are hard-coded as p (main text) and span (keywords and refs) + result["title"] = self.__get_title(soup, config) + maintext = [] + if keywords := self.__get_keywords(soup, config): + maintext.append(keywords) + sections = self.__get_sections(soup, config) + for sec in sections: + maintext.extend(Section(config, sec).to_list()) + + # filter out the sections which do not contain any info + filtered_text = [x for x in maintext if x] + unique_text = [] + seen_text = [] + for text in filtered_text: + if text["body"] not in seen_text: + seen_text.append(text["body"]) + unique_text.append(text) + + result["paragraphs"] = self.__set_unknown_section_headings(unique_text) + + return result + + def __set_unknown_section_headings(self, unique_text): + paper = {} + for para in unique_text: + if para["section_heading"] != "keywords": + paper[para["section_heading"]] = [ + x["iao_name"] for x in para["section_type"] + ] + + for text in unique_text: + if not text["section_heading"]: + text["section_heading"] = "document part" + text["section_type"] = [ + {"iao_name": "document part", "iao_id": "IAO:0000314"} + ] + + return unique_text + + def __process_html_tables(self, file_path, soup, config): + """Extract data from tables in the HTML file. + + Args: + file_path (str): path to the main text file + soup (bs4.BeautifulSoup): soup object + config (dict): dict of the maintext + """ + if "tables" not in config: + return + + if not self.tables: + self.tables, self.empty_tables = get_table_json(soup, config, file_path) + return + + seen_ids = set() + for tab in self.tables["documents"]: + if "." in tab["id"]: + seen_ids.add(tab["id"].split(".")[0]) + else: + seen_ids.add(tab["id"]) + + tmp_tables, tmp_empty = get_table_json(soup, config, file_path) + for tabl in tmp_tables["documents"]: + if "." in tabl["id"]: + tabl_id = tabl["id"].split(".")[0] + tabl_pos = ".".join(tabl["id"].split(".")[1:]) + else: + tabl_id = tabl["id"] + tabl_pos = None + if tabl_id in seen_ids: + tabl_id = str(len(seen_ids) + 1) + if tabl_pos: + tabl["id"] = f"{tabl_id}.{tabl_pos}" + else: + tabl["id"] = tabl_id + seen_ids.add(tabl_id) + self.tables["documents"].extend(tmp_tables["documents"]) + self.empty_tables.extend(tmp_empty) + + def __merge_table_data(self): + if not self.empty_tables: + return + + documents = self.tables.get("documents", None) + if not documents: + return + + seen_ids = {} + for i, table in enumerate(documents): + if "id" in table: + seen_ids[str(i)] = f"Table {table['id']}." + + for table in self.empty_tables: + for seenID in seen_ids.keys(): + if not table["title"].startswith(seen_ids[seenID]): + continue + + if "title" in table and not table["title"] == "": + set_new = False + for passage in documents[int(seenID)]["passages"]: + if ( + passage["infons"]["section_type"][0]["section_name"] + == "table_title" + ): + passage["text"] = table["title"] + set_new = True + if not set_new: + documents[int(seenID)]["passages"].append( + { + "offset": 0, + "infons": { + "section_type": [ + { + "section_name": "table_title", + "iao_name": "document title", + "iao_id": "IAO:0000305", + } + ] + }, + "text": table["title"], + } + ) + if "caption" in table and not table["caption"] == "": + set_new = False + for passage in documents[int(seenID)]["passages"]: + if ( + passage["infons"]["section_type"][0]["section_name"] + == "table_caption" + ): + passage["text"] = table["caption"] + set_new = True + if not set_new: + documents[int(seenID)]["passages"].append( + { + "offset": 0, + "infons": { + "section_type": [ + { + "section_name": "table_caption", + "iao_name": "caption", + "iao_id": "IAO:0000304", + } + ] + }, + "text": table["caption"], + } + ) + if "footer" in table and not table["footer"] == "": + set_new = False + for passage in documents[int(seenID)]["passages"]: + if ( + passage["infons"]["section_type"][0]["section_name"] + == "table_footer" + ): + passage["text"] = table["footer"] + set_new = True + if not set_new: + documents[int(seenID)]["passages"].append( + { + "offset": 0, + "infons": { + "section_type": [ + { + "section_name": "table_footer", + "iao_name": "caption", + "iao_id": "IAO:0000304", + } + ] + }, + "text": table["footer"], + } + ) + + def process_files(self): + """Processes the files specified in the configuration. + + This method performs the following steps: + 1. Checks if a valid configuration is loaded. If not, raises a RuntimeError. + 2. Handles the main text file: + - Parses the HTML content of the file. + - Extracts the main text from the parsed HTML. + - Attempts to extract abbreviations from the main text and HTML content. + If an error occurs during this process, it prints the error. + 3. Processes linked tables, if any: + - Parses the HTML content of each linked table file. + 4. Merges table data. + 5. Checks if there are any documents in the tables and sets the `has_tables` attribute accordingly. + + Raises: + RuntimeError: If no valid configuration is loaded. + """ + if not self.config: + raise RuntimeError("A valid config file must be loaded.") + # handle main_text + if self.file_path: + soup = self.__soupify_infile(self.file_path) + self.__process_html_tables(self.file_path, soup, self.config) + self.main_text = self.__extract_text(soup, self.config) + try: + self.abbreviations = Abbreviations( + self.main_text, soup, self.config, self.file_path + ).to_dict() + except Exception as e: + logger.error(e) + if self.linked_tables: + for table_file in self.linked_tables: + soup = self.__soupify_infile(table_file) + self.__process_html_tables(table_file, soup, self.config) + self.__merge_table_data() + if "documents" in self.tables and not self.tables["documents"] == []: + self.has_tables = True + + def __init__( + self, + config, + main_text, + linked_tables=None, + ): + """Utilises the input config file to create valid BioC versions of input HTML journal articles. + + Args: + config (dict): configuration file for the input HTML journal articles + main_text (str): path to the main text of the article (HTML files only) + linked_tables (list): list of linked table file paths to be included in this run (HTML files only) + """ + self.file_path = main_text + self.linked_tables = linked_tables + self.config = config + self.main_text = {} + self.empty_tables = [] + self.tables = {} + self.abbreviations = {} + self.has_tables = False + + def to_bioc(self): + """Get the currently loaded bioc as a dict. + + Returns: + (dict): bioc as a dict + """ + return get_formatted_bioc_collection(self) + + def main_text_to_bioc_json(self): + """Get the currently loaded main text as BioC JSON. + + Args: + indent (int): level of indentation + + Returns: + (str): main text as BioC JSON + """ + return json.dumps( + get_formatted_bioc_collection(self), indent=2, ensure_ascii=False + ) + + def main_text_to_bioc_xml(self): + """Get the currently loaded main text as BioC XML. + + Returns: + (str): main text as BioC XML + """ + collection = biocjson.loads( + json.dumps( + get_formatted_bioc_collection(self), indent=2, ensure_ascii=False + ) + ) + return biocxml.dumps(collection) + + def tables_to_bioc_json(self, indent=2): + """Get the currently loaded tables as Tables-JSON. + + Args: + indent (int): level of indentation + + Returns: + (str): tables as Tables-JSON + """ + return json.dumps(self.tables, ensure_ascii=False, indent=indent) + + def abbreviations_to_bioc_json(self, indent=2): + """Get the currently loaded abbreviations as BioC JSON. + + Args: + indent (int): level of indentation + + Returns: + (str): abbreviations as BioC JSON + """ + return json.dumps(self.abbreviations, ensure_ascii=False, indent=indent) + + def to_json(self, indent=2): + """Get the currently loaded AC object as a dict. + + Args: + indent (int): Level of indentation. + + Returns: + (str): AC object as a JSON string + """ + return json.dumps(self.to_dict(), ensure_ascii=False, indent=indent) + + def to_dict(self): + """Get the currently loaded AC object as a dict. + + Returns: + (dict): AC object as a dict + """ + return { + "main_text": self.main_text, + "abbreviations": self.abbreviations, + "tables": self.tables, + } From b8d15045efaf8d5b22eee6a916b33afb2b31b8b1 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Wed, 16 Apr 2025 09:43:50 +0100 Subject: [PATCH 010/125] reintegrated file type checking changes --- autocorpus/autocorpus.py | 483 ++++++++++++++++++++++++--------------- 1 file changed, 297 insertions(+), 186 deletions(-) diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py index 8eb8c77e..798c31bd 100644 --- a/autocorpus/autocorpus.py +++ b/autocorpus/autocorpus.py @@ -2,24 +2,22 @@ import json from pathlib import Path -from typing import Any from bioc import biocjson, biocxml from bs4 import BeautifulSoup -from . import logger from .abbreviation import Abbreviations -from .bioc_formatter import get_formatted_bioc_collection +from .bioc_formatter import BiocFormatter from .section import Section -from .table import get_table_json -from .utils import handle_not_tables +from .table import Table +from .utils import check_file_type, handle_not_tables class Autocorpus: """Parent class for all Auto-CORPus functionality.""" @staticmethod - def read_config(config_path: str) -> dict[str, Any]: + def read_config(config_path: str) -> dict: """Reads a configuration file and returns its content. Args: @@ -38,47 +36,116 @@ def read_config(config_path: str) -> dict[str, Any]: content = json.load(f) return content["config"] + def __import_file(self, file_path): + file_path = Path(file_path) + with file_path.open("r") as f: + return f.read(), file_path + + def __handle_target_dir(self, target_dir): + target_dir = Path(target_dir) + if not target_dir.exists(): + target_dir.mkdir(parents=True) + return + + def __validate_infile(self): + pass + def __soupify_infile(self, fpath): fpath = Path(fpath) - with fpath.open(encoding="utf-8") as fp: - soup = BeautifulSoup(fp.read(), "html.parser") - for e in soup.find_all( - attrs={"style": ["display:none", "visibility:hidden"]} - ): - e.extract() - return soup + try: + with open(fpath, encoding="utf-8") as fp: + soup = BeautifulSoup(fp.read(), "html.parser") + for e in soup.find_all( + attrs={"style": ["display:none", "visibility:hidden"]} + ): + e.extract() + return soup + except Exception as e: + print(e) + + def __clean_text(self, result: dict) -> dict: + r"""Clean the main text body output of extract_text(). + + - removes duplicated texts from each section (assuming the text from html file has hierarchy up to h3, i.e. no subsubsections) + - removes items with empty bodies. - def __get_keywords(self, soup, config): - if "keywords" not in config: - return {} + Args: + result (dict): dict of the maintext - responses = handle_not_tables(config["keywords"], soup) - if not responses: - return {} - responses = " ".join(x["node"].get_text() for x in responses) - return { - "section_heading": "keywords", - "subsection_heading": "", - "body": responses, - "section_type": [{"iao_name": "keywords section", "iao_id": "IAO:0000630"}], - } + Return: + (dict): cleaned dict result input - def __get_title(self, soup, config): - if "title" not in config: - return "" + """ + # Remove duplicated contents from the 'result' output of extract_text() - titles = handle_not_tables(config["title"], soup) - if not titles: - return "" + # Identify unique section headings and the index of their first appearance + idx_section = [] + section_headings = set([i["section_heading"] for i in result["paragraphs"]]) - return titles[0]["node"].get_text() + for i in range(len(section_headings)): + try: + if idx_section[i + 1] - idx_section[i] <= 1: # if only one subsection + continue + idx_section_last = idx_section[i + 1] + except IndexError: + idx_section_last = len(result["paragraphs"]) + + p = result["paragraphs"][idx_section[i] + 1]["body"] + for idx_subsection in range(idx_section[i] + 1, idx_section_last): + if ( + result["paragraphs"][idx_subsection]["body"] + in result["paragraphs"][idx_section[i]]["body"] + ): + result["paragraphs"][idx_section[i]]["body"] = result["paragraphs"][ + idx_section[i] + ]["body"].replace(result["paragraphs"][idx_subsection]["body"], "") + + if (idx_section[i] + 1 != idx_subsection) and ( + p in result["paragraphs"][idx_subsection]["body"] + ): + result["paragraphs"][idx_subsection]["body"] = result["paragraphs"][ + idx_subsection + ]["body"].replace(p, "") + for idx_subsection in range(idx_section[i] + 1, idx_section_last): + if ( + result["paragraphs"][idx_subsection]["subsection_heading"] + == result["paragraphs"][idx_section[i]]["subsection_heading"] + ): + result["paragraphs"][idx_section[i]]["subsection_heading"] = "" + return result - def __get_sections(self, soup, config): - if "sections" not in config: - return [] + def __get_keywords(self, soup, config): + if "keywords" in config: + responses = handle_not_tables(config["keywords"], soup) + responses = " ".join([x["node"].get_text() for x in responses]) + if not responses == "": + keyword_section = { + "section_heading": "keywords", + "subsection_heading": "", + "body": responses, + "section_type": [ + {"iao_name": "keywords section", "iao_id": "IAO:0000630"} + ], + } + return [keyword_section] + return False + + def __get_title(self, soup, config): + if "title" in config: + titles = handle_not_tables(config["title"], soup) + if len(titles) == 0: + return "" + else: + return titles[0]["node"].get_text() + else: + return "" - return handle_not_tables(config["sections"], soup) + def __get_sections(self, soup, config): + if "sections" in config: + sections = handle_not_tables(config["sections"], soup) + return sections + return [] def __extract_text(self, soup, config): """Convert beautiful soup object into a python dict object with cleaned main text body. @@ -94,15 +161,18 @@ def __extract_text(self, soup, config): # Tags of text body to be extracted are hard-coded as p (main text) and span (keywords and refs) result["title"] = self.__get_title(soup, config) - maintext = [] - if keywords := self.__get_keywords(soup, config): - maintext.append(keywords) + maintext = ( + self.__get_keywords(soup, config) + if self.__get_keywords(soup, config) + else [] + ) sections = self.__get_sections(soup, config) for sec in sections: maintext.extend(Section(config, sec).to_list()) # filter out the sections which do not contain any info - filtered_text = [x for x in maintext if x] + filtered_text = [] + [filtered_text.append(x) for x in maintext if x] unique_text = [] seen_text = [] for text in filtered_text: @@ -131,139 +201,160 @@ def __set_unknown_section_headings(self, unique_text): return unique_text - def __process_html_tables(self, file_path, soup, config): - """Extract data from tables in the HTML file. + def __handle_html(self, file_path, config): + """Handles common HTML processing elements across main_text and linked_tables (creates soup and parses tables). Args: file_path (str): path to the main text file - soup (bs4.BeautifulSoup): soup object config (dict): dict of the maintext + Return: + (bs4.BeautifulSoup): soup object """ - if "tables" not in config: - return - - if not self.tables: - self.tables, self.empty_tables = get_table_json(soup, config, file_path) - return - - seen_ids = set() - for tab in self.tables["documents"]: - if "." in tab["id"]: - seen_ids.add(tab["id"].split(".")[0]) - else: - seen_ids.add(tab["id"]) - - tmp_tables, tmp_empty = get_table_json(soup, config, file_path) - for tabl in tmp_tables["documents"]: - if "." in tabl["id"]: - tabl_id = tabl["id"].split(".")[0] - tabl_pos = ".".join(tabl["id"].split(".")[1:]) + soup = self.__soupify_infile(file_path) + if "tables" in config: + if self.tables == {}: + self.tables, self.empty_tables = Table( + soup, config, file_path, self.base_dir + ).to_dict() else: - tabl_id = tabl["id"] - tabl_pos = None - if tabl_id in seen_ids: - tabl_id = str(len(seen_ids) + 1) - if tabl_pos: - tabl["id"] = f"{tabl_id}.{tabl_pos}" - else: - tabl["id"] = tabl_id - seen_ids.add(tabl_id) - self.tables["documents"].extend(tmp_tables["documents"]) - self.empty_tables.extend(tmp_empty) + seen_ids = set() + for tab in self.tables["documents"]: + if "." in tab["id"]: + seen_ids.add(tab["id"].split(".")[0]) + else: + seen_ids.add(tab["id"]) + tmp_tables, tmp_empty = Table( + soup, config, file_path, self.base_dir + ).to_dict() + for tabl in tmp_tables["documents"]: + if "." in tabl["id"]: + tabl_id = tabl["id"].split(".")[0] + tabl_pos = ".".join(tabl["id"].split(".")[1:]) + else: + tabl_id = tabl["id"] + tabl_pos = None + if tabl_id in seen_ids: + tabl_id = str(len(seen_ids) + 1) + if tabl_pos: + tabl["id"] = f"{tabl_id}.{tabl_pos}" + else: + tabl["id"] = tabl_id + seen_ids.add(tabl_id) + self.tables["documents"].extend(tmp_tables["documents"]) + self.empty_tables.extend(tmp_empty) + return soup def __merge_table_data(self): - if not self.empty_tables: + if self.empty_tables == []: return - - documents = self.tables.get("documents", None) - if not documents: - return - - seen_ids = {} - for i, table in enumerate(documents): - if "id" in table: - seen_ids[str(i)] = f"Table {table['id']}." - - for table in self.empty_tables: - for seenID in seen_ids.keys(): - if not table["title"].startswith(seen_ids[seenID]): - continue - - if "title" in table and not table["title"] == "": - set_new = False - for passage in documents[int(seenID)]["passages"]: - if ( - passage["infons"]["section_type"][0]["section_name"] - == "table_title" - ): - passage["text"] = table["title"] - set_new = True - if not set_new: - documents[int(seenID)]["passages"].append( - { - "offset": 0, - "infons": { - "section_type": [ + if "documents" in self.tables: + if self.tables["documents"] == []: + return + else: + seen_ids = {} + for i, table in enumerate(self.tables["documents"]): + if "id" in table: + seen_ids[str(i)] = f"Table {table['id']}." + for table in self.empty_tables: + for seenID in seen_ids.keys(): + if table["title"].startswith(seen_ids[seenID]): + if "title" in table and not table["title"] == "": + set_new = False + for passage in self.tables["documents"][int(seenID)][ + "passages" + ]: + if ( + passage["infons"]["section_type"][0][ + "section_name" + ] + == "table_title" + ): + passage["text"] = table["title"] + set_new = True + if not set_new: + self.tables["documents"][int(seenID)][ + "passages" + ].append( { - "section_name": "table_title", - "iao_name": "document title", - "iao_id": "IAO:0000305", + "offset": 0, + "infons": { + "section_type": [ + { + "section_name": "table_title", + "iao_name": "document title", + "iao_id": "IAO:0000305", + } + ] + }, + "text": table["title"], } - ] - }, - "text": table["title"], - } - ) - if "caption" in table and not table["caption"] == "": - set_new = False - for passage in documents[int(seenID)]["passages"]: - if ( - passage["infons"]["section_type"][0]["section_name"] - == "table_caption" - ): - passage["text"] = table["caption"] - set_new = True - if not set_new: - documents[int(seenID)]["passages"].append( - { - "offset": 0, - "infons": { - "section_type": [ + ) + pass + if "caption" in table and not table["caption"] == "": + set_new = False + for passage in self.tables["documents"][int(seenID)][ + "passages" + ]: + if ( + passage["infons"]["section_type"][0][ + "section_name" + ] + == "table_caption" + ): + passage["text"] = table["caption"] + set_new = True + if not set_new: + self.tables["documents"][int(seenID)][ + "passages" + ].append( { - "section_name": "table_caption", - "iao_name": "caption", - "iao_id": "IAO:0000304", + "offset": 0, + "infons": { + "section_type": [ + { + "section_name": "table_caption", + "iao_name": "caption", + "iao_id": "IAO:0000304", + } + ] + }, + "text": table["caption"], } - ] - }, - "text": table["caption"], - } - ) - if "footer" in table and not table["footer"] == "": - set_new = False - for passage in documents[int(seenID)]["passages"]: - if ( - passage["infons"]["section_type"][0]["section_name"] - == "table_footer" - ): - passage["text"] = table["footer"] - set_new = True - if not set_new: - documents[int(seenID)]["passages"].append( - { - "offset": 0, - "infons": { - "section_type": [ + ) + pass + if "footer" in table and not table["footer"] == "": + set_new = False + for passage in self.tables["documents"][int(seenID)][ + "passages" + ]: + if ( + passage["infons"]["section_type"][0][ + "section_name" + ] + == "table_footer" + ): + passage["text"] = table["footer"] + set_new = True + if not set_new: + self.tables["documents"][int(seenID)][ + "passages" + ].append( { - "section_name": "table_footer", - "iao_name": "caption", - "iao_id": "IAO:0000304", + "offset": 0, + "infons": { + "section_type": [ + { + "section_name": "table_footer", + "iao_name": "caption", + "iao_id": "IAO:0000304", + } + ] + }, + "text": table["footer"], } - ] - }, - "text": table["footer"], - } - ) + ) + else: + return def process_files(self): """Processes the files specified in the configuration. @@ -271,12 +362,12 @@ def process_files(self): This method performs the following steps: 1. Checks if a valid configuration is loaded. If not, raises a RuntimeError. 2. Handles the main text file: - - Parses the HTML content of the file. - - Extracts the main text from the parsed HTML. - - Attempts to extract abbreviations from the main text and HTML content. + - Parses the HTML/XML content of the file. + - Extracts the main text from the parsed HTML/XML. + - Attempts to extract abbreviations from the main text and HTML/XML content. If an error occurs during this process, it prints the error. 3. Processes linked tables, if any: - - Parses the HTML content of each linked table file. + - Parses the HTML/XML content of each linked table file. 4. Merges table data. 5. Checks if there are any documents in the tables and sets the `has_tables` attribute accordingly. @@ -287,19 +378,33 @@ def process_files(self): raise RuntimeError("A valid config file must be loaded.") # handle main_text if self.file_path: - soup = self.__soupify_infile(self.file_path) - self.__process_html_tables(self.file_path, soup, self.config) - self.main_text = self.__extract_text(soup, self.config) - try: - self.abbreviations = Abbreviations( - self.main_text, soup, self.config, self.file_path - ).to_dict() - except Exception as e: - logger.error(e) + file_type = check_file_type(Path(self.file_path)) + if file_type == "other": + raise RuntimeError("Main text file must be an HTML or XML file.") + + if file_type == "html": + soup = self.__handle_html(self.file_path, self.config) + self.main_text = self.__extract_text(soup, self.config) + try: + self.abbreviations = Abbreviations( + self.main_text, soup, self.config, self.file_path + ).to_dict() + except Exception as e: + print(e) + else: + raise NotImplementedError( + "XML processing is not yet implemented for Auto-CORPus" + ) + if self.linked_tables: for table_file in self.linked_tables: - soup = self.__soupify_infile(table_file) - self.__process_html_tables(table_file, soup, self.config) + file_type = check_file_type(Path(table_file)) + if file_type == "other": + raise RuntimeError("Linked table files must be HTML or XML files.") + if file_type == "html": + soup = self.__handle_html(table_file, self.config) + else: + pass # TODO: implement XML handling self.__merge_table_data() if "documents" in self.tables and not self.tables["documents"] == []: self.has_tables = True @@ -307,21 +412,33 @@ def process_files(self): def __init__( self, config, - main_text, + base_dir=None, + main_text=None, linked_tables=None, + table_images=None, + associated_data_path=None, + trained_data=None, ): """Utilises the input config file to create valid BioC versions of input HTML journal articles. Args: config (dict): configuration file for the input HTML journal articles + base_dir (str): base directory of the input HTML journal articles main_text (str): path to the main text of the article (HTML files only) linked_tables (list): list of linked table file paths to be included in this run (HTML files only) + table_images (list): list of table image file paths to be included in this run (JPEG or PNG files only) + associated_data_path (str): currently unused + trained_data (list): currently unused """ + self.base_dir = base_dir self.file_path = main_text self.linked_tables = linked_tables + self.table_images = table_images + self.associated_data_path = associated_data_path self.config = config + self.trained_data = trained_data self.main_text = {} - self.empty_tables = [] + self.empty_tables = {} self.tables = {} self.abbreviations = {} self.has_tables = False @@ -332,9 +449,9 @@ def to_bioc(self): Returns: (dict): bioc as a dict """ - return get_formatted_bioc_collection(self) + return BiocFormatter(self).to_dict() - def main_text_to_bioc_json(self): + def main_text_to_bioc_json(self, indent=2): """Get the currently loaded main text as BioC JSON. Args: @@ -343,9 +460,7 @@ def main_text_to_bioc_json(self): Returns: (str): main text as BioC JSON """ - return json.dumps( - get_formatted_bioc_collection(self), indent=2, ensure_ascii=False - ) + return BiocFormatter(self).to_json(indent) def main_text_to_bioc_xml(self): """Get the currently loaded main text as BioC XML. @@ -353,11 +468,7 @@ def main_text_to_bioc_xml(self): Returns: (str): main text as BioC XML """ - collection = biocjson.loads( - json.dumps( - get_formatted_bioc_collection(self), indent=2, ensure_ascii=False - ) - ) + collection = biocjson.loads(BiocFormatter(self).to_json(2)) return biocxml.dumps(collection) def tables_to_bioc_json(self, indent=2): From a827e5154c969c965e15303c1b7fd6edc3526859 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Wed, 16 Apr 2025 10:05:15 +0100 Subject: [PATCH 011/125] Git merge correction --- autocorpus/autocorpus.py | 453 ++++++++++++++++----------------------- 1 file changed, 179 insertions(+), 274 deletions(-) diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py index 798c31bd..b9dfa7b7 100644 --- a/autocorpus/autocorpus.py +++ b/autocorpus/autocorpus.py @@ -2,14 +2,16 @@ import json from pathlib import Path +from typing import Any from bioc import biocjson, biocxml from bs4 import BeautifulSoup +from . import logger from .abbreviation import Abbreviations -from .bioc_formatter import BiocFormatter +from .bioc_formatter import get_formatted_bioc_collection from .section import Section -from .table import Table +from .table import get_table_json from .utils import check_file_type, handle_not_tables @@ -17,7 +19,7 @@ class Autocorpus: """Parent class for all Auto-CORPus functionality.""" @staticmethod - def read_config(config_path: str) -> dict: + def read_config(config_path: str) -> dict[str, Any]: """Reads a configuration file and returns its content. Args: @@ -36,116 +38,47 @@ def read_config(config_path: str) -> dict: content = json.load(f) return content["config"] - def __import_file(self, file_path): - file_path = Path(file_path) - with file_path.open("r") as f: - return f.read(), file_path - - def __handle_target_dir(self, target_dir): - target_dir = Path(target_dir) - if not target_dir.exists(): - target_dir.mkdir(parents=True) - return - - def __validate_infile(self): - pass - def __soupify_infile(self, fpath): fpath = Path(fpath) - try: - with open(fpath, encoding="utf-8") as fp: - soup = BeautifulSoup(fp.read(), "html.parser") - for e in soup.find_all( - attrs={"style": ["display:none", "visibility:hidden"]} - ): - e.extract() - return soup - except Exception as e: - print(e) - - def __clean_text(self, result: dict) -> dict: - r"""Clean the main text body output of extract_text(). - - - removes duplicated texts from each section (assuming the text from html file has hierarchy up to h3, i.e. no subsubsections) - - removes items with empty bodies. - - Args: - result (dict): dict of the maintext - - - Return: - (dict): cleaned dict result input - - """ - # Remove duplicated contents from the 'result' output of extract_text() + with fpath.open(encoding="utf-8") as fp: + soup = BeautifulSoup(fp.read(), "html.parser") + for e in soup.find_all( + attrs={"style": ["display:none", "visibility:hidden"]} + ): + e.extract() + return soup - # Identify unique section headings and the index of their first appearance - idx_section = [] - section_headings = set([i["section_heading"] for i in result["paragraphs"]]) + def __get_keywords(self, soup, config): + if "keywords" not in config: + return {} - for i in range(len(section_headings)): - try: - if idx_section[i + 1] - idx_section[i] <= 1: # if only one subsection - continue - idx_section_last = idx_section[i + 1] - except IndexError: - idx_section_last = len(result["paragraphs"]) - - p = result["paragraphs"][idx_section[i] + 1]["body"] - for idx_subsection in range(idx_section[i] + 1, idx_section_last): - if ( - result["paragraphs"][idx_subsection]["body"] - in result["paragraphs"][idx_section[i]]["body"] - ): - result["paragraphs"][idx_section[i]]["body"] = result["paragraphs"][ - idx_section[i] - ]["body"].replace(result["paragraphs"][idx_subsection]["body"], "") - - if (idx_section[i] + 1 != idx_subsection) and ( - p in result["paragraphs"][idx_subsection]["body"] - ): - result["paragraphs"][idx_subsection]["body"] = result["paragraphs"][ - idx_subsection - ]["body"].replace(p, "") - for idx_subsection in range(idx_section[i] + 1, idx_section_last): - if ( - result["paragraphs"][idx_subsection]["subsection_heading"] - == result["paragraphs"][idx_section[i]]["subsection_heading"] - ): - result["paragraphs"][idx_section[i]]["subsection_heading"] = "" - return result + responses = handle_not_tables(config["keywords"], soup) + if not responses: + return {} - def __get_keywords(self, soup, config): - if "keywords" in config: - responses = handle_not_tables(config["keywords"], soup) - responses = " ".join([x["node"].get_text() for x in responses]) - if not responses == "": - keyword_section = { - "section_heading": "keywords", - "subsection_heading": "", - "body": responses, - "section_type": [ - {"iao_name": "keywords section", "iao_id": "IAO:0000630"} - ], - } - return [keyword_section] - return False + responses = " ".join(x["node"].get_text() for x in responses) + return { + "section_heading": "keywords", + "subsection_heading": "", + "body": responses, + "section_type": [{"iao_name": "keywords section", "iao_id": "IAO:0000630"}], + } def __get_title(self, soup, config): - if "title" in config: - titles = handle_not_tables(config["title"], soup) - if len(titles) == 0: - return "" - else: - return titles[0]["node"].get_text() - else: + if "title" not in config: return "" + titles = handle_not_tables(config["title"], soup) + if not titles: + return "" + + return titles[0]["node"].get_text() + def __get_sections(self, soup, config): - if "sections" in config: - sections = handle_not_tables(config["sections"], soup) - return sections - return [] + if "sections" not in config: + return [] + + return handle_not_tables(config["sections"], soup) def __extract_text(self, soup, config): """Convert beautiful soup object into a python dict object with cleaned main text body. @@ -161,18 +94,15 @@ def __extract_text(self, soup, config): # Tags of text body to be extracted are hard-coded as p (main text) and span (keywords and refs) result["title"] = self.__get_title(soup, config) - maintext = ( - self.__get_keywords(soup, config) - if self.__get_keywords(soup, config) - else [] - ) + maintext = [] + if keywords := self.__get_keywords(soup, config): + maintext.append(keywords) sections = self.__get_sections(soup, config) for sec in sections: maintext.extend(Section(config, sec).to_list()) # filter out the sections which do not contain any info - filtered_text = [] - [filtered_text.append(x) for x in maintext if x] + filtered_text = [x for x in maintext if x] unique_text = [] seen_text = [] for text in filtered_text: @@ -201,160 +131,139 @@ def __set_unknown_section_headings(self, unique_text): return unique_text - def __handle_html(self, file_path, config): - """Handles common HTML processing elements across main_text and linked_tables (creates soup and parses tables). + def __process_html_tables(self, file_path, soup, config): + """Extract data from tables in the HTML file. Args: file_path (str): path to the main text file + soup (bs4.BeautifulSoup): soup object config (dict): dict of the maintext - Return: - (bs4.BeautifulSoup): soup object """ - soup = self.__soupify_infile(file_path) - if "tables" in config: - if self.tables == {}: - self.tables, self.empty_tables = Table( - soup, config, file_path, self.base_dir - ).to_dict() + if "tables" not in config: + return + + if not self.tables: + self.tables, self.empty_tables = get_table_json(soup, config, file_path) + return + + seen_ids = set() + for tab in self.tables["documents"]: + if "." in tab["id"]: + seen_ids.add(tab["id"].split(".")[0]) else: - seen_ids = set() - for tab in self.tables["documents"]: - if "." in tab["id"]: - seen_ids.add(tab["id"].split(".")[0]) - else: - seen_ids.add(tab["id"]) - tmp_tables, tmp_empty = Table( - soup, config, file_path, self.base_dir - ).to_dict() - for tabl in tmp_tables["documents"]: - if "." in tabl["id"]: - tabl_id = tabl["id"].split(".")[0] - tabl_pos = ".".join(tabl["id"].split(".")[1:]) - else: - tabl_id = tabl["id"] - tabl_pos = None - if tabl_id in seen_ids: - tabl_id = str(len(seen_ids) + 1) - if tabl_pos: - tabl["id"] = f"{tabl_id}.{tabl_pos}" - else: - tabl["id"] = tabl_id - seen_ids.add(tabl_id) - self.tables["documents"].extend(tmp_tables["documents"]) - self.empty_tables.extend(tmp_empty) - return soup + seen_ids.add(tab["id"]) + + tmp_tables, tmp_empty = get_table_json(soup, config, file_path) + for tabl in tmp_tables["documents"]: + if "." in tabl["id"]: + tabl_id = tabl["id"].split(".")[0] + tabl_pos = ".".join(tabl["id"].split(".")[1:]) + else: + tabl_id = tabl["id"] + tabl_pos = None + if tabl_id in seen_ids: + tabl_id = str(len(seen_ids) + 1) + if tabl_pos: + tabl["id"] = f"{tabl_id}.{tabl_pos}" + else: + tabl["id"] = tabl_id + seen_ids.add(tabl_id) + self.tables["documents"].extend(tmp_tables["documents"]) + self.empty_tables.extend(tmp_empty) def __merge_table_data(self): - if self.empty_tables == []: + if not self.empty_tables: return - if "documents" in self.tables: - if self.tables["documents"] == []: - return - else: - seen_ids = {} - for i, table in enumerate(self.tables["documents"]): - if "id" in table: - seen_ids[str(i)] = f"Table {table['id']}." - for table in self.empty_tables: - for seenID in seen_ids.keys(): - if table["title"].startswith(seen_ids[seenID]): - if "title" in table and not table["title"] == "": - set_new = False - for passage in self.tables["documents"][int(seenID)][ - "passages" - ]: - if ( - passage["infons"]["section_type"][0][ - "section_name" - ] - == "table_title" - ): - passage["text"] = table["title"] - set_new = True - if not set_new: - self.tables["documents"][int(seenID)][ - "passages" - ].append( + + documents = self.tables.get("documents", None) + if not documents: + return + + seen_ids = {} + for i, table in enumerate(documents): + if "id" in table: + seen_ids[str(i)] = f"Table {table['id']}." + + for table in self.empty_tables: + for seenID in seen_ids.keys(): + if not table["title"].startswith(seen_ids[seenID]): + continue + + if "title" in table and not table["title"] == "": + set_new = False + for passage in documents[int(seenID)]["passages"]: + if ( + passage["infons"]["section_type"][0]["section_name"] + == "table_title" + ): + passage["text"] = table["title"] + set_new = True + if not set_new: + documents[int(seenID)]["passages"].append( + { + "offset": 0, + "infons": { + "section_type": [ { - "offset": 0, - "infons": { - "section_type": [ - { - "section_name": "table_title", - "iao_name": "document title", - "iao_id": "IAO:0000305", - } - ] - }, - "text": table["title"], + "section_name": "table_title", + "iao_name": "document title", + "iao_id": "IAO:0000305", } - ) - pass - if "caption" in table and not table["caption"] == "": - set_new = False - for passage in self.tables["documents"][int(seenID)][ - "passages" - ]: - if ( - passage["infons"]["section_type"][0][ - "section_name" - ] - == "table_caption" - ): - passage["text"] = table["caption"] - set_new = True - if not set_new: - self.tables["documents"][int(seenID)][ - "passages" - ].append( + ] + }, + "text": table["title"], + } + ) + if "caption" in table and not table["caption"] == "": + set_new = False + for passage in documents[int(seenID)]["passages"]: + if ( + passage["infons"]["section_type"][0]["section_name"] + == "table_caption" + ): + passage["text"] = table["caption"] + set_new = True + if not set_new: + documents[int(seenID)]["passages"].append( + { + "offset": 0, + "infons": { + "section_type": [ { - "offset": 0, - "infons": { - "section_type": [ - { - "section_name": "table_caption", - "iao_name": "caption", - "iao_id": "IAO:0000304", - } - ] - }, - "text": table["caption"], + "section_name": "table_caption", + "iao_name": "caption", + "iao_id": "IAO:0000304", } - ) - pass - if "footer" in table and not table["footer"] == "": - set_new = False - for passage in self.tables["documents"][int(seenID)][ - "passages" - ]: - if ( - passage["infons"]["section_type"][0][ - "section_name" - ] - == "table_footer" - ): - passage["text"] = table["footer"] - set_new = True - if not set_new: - self.tables["documents"][int(seenID)][ - "passages" - ].append( + ] + }, + "text": table["caption"], + } + ) + if "footer" in table and not table["footer"] == "": + set_new = False + for passage in documents[int(seenID)]["passages"]: + if ( + passage["infons"]["section_type"][0]["section_name"] + == "table_footer" + ): + passage["text"] = table["footer"] + set_new = True + if not set_new: + documents[int(seenID)]["passages"].append( + { + "offset": 0, + "infons": { + "section_type": [ { - "offset": 0, - "infons": { - "section_type": [ - { - "section_name": "table_footer", - "iao_name": "caption", - "iao_id": "IAO:0000304", - } - ] - }, - "text": table["footer"], + "section_name": "table_footer", + "iao_name": "caption", + "iao_id": "IAO:0000304", } - ) - else: - return + ] + }, + "text": table["footer"], + } + ) def process_files(self): """Processes the files specified in the configuration. @@ -362,12 +271,12 @@ def process_files(self): This method performs the following steps: 1. Checks if a valid configuration is loaded. If not, raises a RuntimeError. 2. Handles the main text file: - - Parses the HTML/XML content of the file. - - Extracts the main text from the parsed HTML/XML. - - Attempts to extract abbreviations from the main text and HTML/XML content. + - Parses the HTML content of the file. + - Extracts the main text from the parsed HTML. + - Attempts to extract abbreviations from the main text and HTML content. If an error occurs during this process, it prints the error. 3. Processes linked tables, if any: - - Parses the HTML/XML content of each linked table file. + - Parses the HTML content of each linked table file. 4. Merges table data. 5. Checks if there are any documents in the tables and sets the `has_tables` attribute accordingly. @@ -383,14 +292,15 @@ def process_files(self): raise RuntimeError("Main text file must be an HTML or XML file.") if file_type == "html": - soup = self.__handle_html(self.file_path, self.config) + soup = self.__soupify_infile(self.file_path) + self.__process_html_tables(self.file_path, soup, self.config) self.main_text = self.__extract_text(soup, self.config) try: self.abbreviations = Abbreviations( self.main_text, soup, self.config, self.file_path ).to_dict() except Exception as e: - print(e) + logger.error(e) else: raise NotImplementedError( "XML processing is not yet implemented for Auto-CORPus" @@ -402,7 +312,8 @@ def process_files(self): if file_type == "other": raise RuntimeError("Linked table files must be HTML or XML files.") if file_type == "html": - soup = self.__handle_html(table_file, self.config) + soup = self.__soupify_infile(table_file) + self.__process_html_tables(table_file, soup, self.config) else: pass # TODO: implement XML handling self.__merge_table_data() @@ -412,33 +323,21 @@ def process_files(self): def __init__( self, config, - base_dir=None, - main_text=None, + main_text, linked_tables=None, - table_images=None, - associated_data_path=None, - trained_data=None, ): """Utilises the input config file to create valid BioC versions of input HTML journal articles. Args: config (dict): configuration file for the input HTML journal articles - base_dir (str): base directory of the input HTML journal articles main_text (str): path to the main text of the article (HTML files only) linked_tables (list): list of linked table file paths to be included in this run (HTML files only) - table_images (list): list of table image file paths to be included in this run (JPEG or PNG files only) - associated_data_path (str): currently unused - trained_data (list): currently unused """ - self.base_dir = base_dir self.file_path = main_text self.linked_tables = linked_tables - self.table_images = table_images - self.associated_data_path = associated_data_path self.config = config - self.trained_data = trained_data self.main_text = {} - self.empty_tables = {} + self.empty_tables = [] self.tables = {} self.abbreviations = {} self.has_tables = False @@ -449,9 +348,9 @@ def to_bioc(self): Returns: (dict): bioc as a dict """ - return BiocFormatter(self).to_dict() + return get_formatted_bioc_collection(self) - def main_text_to_bioc_json(self, indent=2): + def main_text_to_bioc_json(self): """Get the currently loaded main text as BioC JSON. Args: @@ -460,7 +359,9 @@ def main_text_to_bioc_json(self, indent=2): Returns: (str): main text as BioC JSON """ - return BiocFormatter(self).to_json(indent) + return json.dumps( + get_formatted_bioc_collection(self), indent=2, ensure_ascii=False + ) def main_text_to_bioc_xml(self): """Get the currently loaded main text as BioC XML. @@ -468,7 +369,11 @@ def main_text_to_bioc_xml(self): Returns: (str): main text as BioC XML """ - collection = biocjson.loads(BiocFormatter(self).to_json(2)) + collection = biocjson.loads( + json.dumps( + get_formatted_bioc_collection(self), indent=2, ensure_ascii=False + ) + ) return biocxml.dumps(collection) def tables_to_bioc_json(self, indent=2): From 8fecb3b5db8e6b9ff27dc55a5ea808118400208b Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Sat, 19 Apr 2025 20:44:13 +0100 Subject: [PATCH 012/125] Type hints and optimisation --- autocorpus/word_processing.py | 262 ++++++++++++++-------------------- 1 file changed, 107 insertions(+), 155 deletions(-) diff --git a/autocorpus/word_processing.py b/autocorpus/word_processing.py index 38881d8b..7b1e428f 100644 --- a/autocorpus/word_processing.py +++ b/autocorpus/word_processing.py @@ -6,13 +6,18 @@ """ import datetime +import json +import os +import platform import re +import subprocess from pathlib import Path +import docx import pandas as pd from bioc import BioCCollection, BioCDocument, BioCPassage from BioCTable import get_tables_bioc -from docx import Document +from docx.document import Document from autocorpus.utils import replace_unicode @@ -96,6 +101,39 @@ def extract_table_from_text(text: str) -> tuple[list[str], list[pd.DataFrame]]: return text_output, tables_output +def get_word_paragraphs(doc: Document) -> list[tuple[str, bool]]: + """Extracts paragraphs from a Word document and identifies headers. + + Args: + doc (Document): The Word document object. + + Returns: + list[tuple[str, bool]]: A list of tuples where each tuple contains the paragraph text + and a boolean indicating if it is a header. + """ + text_sizes = set( + [ + int(x.style.font.size) + for x in doc.paragraphs + if x.style and x.style.font and x.style.font.size + ] + ) + paragraphs = [ + ( + x.text, + True + if text_sizes + and x.style + and x.style.font + and x.style.font.size + and int(x.style.font.size) > min(text_sizes) + else False, + ) + for x in doc.paragraphs + ] + return paragraphs + + def get_bioc_passages(text: list[str] | str) -> list[BioCPassage] | list[str]: """Identifies passages within the given text and creates passage objects. @@ -289,92 +327,6 @@ def get_table(self): return self.passage -def get_tables_bioc(tables, filename, textsource="Auto-CORPus"): - """Generates a BioC XML structure containing tables. - - Args: - tables (list): A list of tables to be included in the BioC structure. - Each table should be represented as a nested list, where each inner list - corresponds to a row, and each element in the inner list corresponds to the - text content of a cell in the row. - - Returns: - dict: A dictionary representing the generated BioC XML structure. - - Example: - tables = [[["A", "B"], ["1", "2"]], [["X", "Y"], ["3", "4"]]] - bioc_xml = get_tables_bioc(tables) - """ - # Create a BioC JSON structure dictionary - bioc = { - "source": "Auto-CORPus (supplementary)", - "date": str(datetime.date.today().strftime("%Y%m%d")), - "key": "autocorpus_supplementary.key", - "infons": {}, - "documents": [ - { - "id": 1, - "inputfile": filename, - "textsource": textsource, - "infons": {}, - "passages": [], - "annotations": [], - "relations": [], - } - ], - } - for i, x in enumerate(tables): - bioc["documents"][0]["passages"].append( - BioCTable(filename, i + 1, x).get_table() - ) - return bioc - - -def get_text_bioc(paragraphs, filename, textsource="Auto-CORPus"): - """Generates a BioC JSON structure containing text paragraphs. - - Args: - paragraphs (list): A list of paragraphs to be included in the BioC structure. - - Returns: - dict: A dictionary representing the generated BioC XML structure. - - Example: - paragraphs = ["This is the first paragraph.", "This is the second paragraph."] - bioc_xml = get_text_bioc(paragraphs) - """ - passages = [ - p - for sublist in [ - BioCText(text=replace_unicode(x)).__dict__["passages"] for x in paragraphs - ] - for p in sublist - ] - offset = 0 - for p in passages: - p["offset"] = offset - offset += len(p["text"]) - # Create a BioC XML structure dictionary - bioc = { - "source": "Auto-CORPus (supplementary)", - "date": str(datetime.date.today().strftime("%Y%m%d")), - "key": "autocorpus_supplementary.key", - "infons": {}, - "documents": [ - { - "id": 1, - "inputfile": filename, - "textsource": textsource, - "infons": {}, - "passages": passages, - "annotations": [], - "relations": [], - } - ], - } - return bioc - - def extract_tables(doc): """Extracts tables from a .docx document. @@ -403,25 +355,34 @@ def extract_tables(doc): return tables -def convert_older_doc_file(file, output_dir): +def convert_older_doc_file(file: Path, output_dir: Path) -> Path | bool: + """Converts an older .doc file to .docx format. + + Args: + file (str): The path to the .doc file to be converted. + output_dir (str): The directory where the converted .docx file will be saved. + + Returns: + str: The path to the converted .docx file, or an empty string if the conversion fails. + """ operating_system = platform.system() - docx_path = file.replace(".doc", ".docx") + docx_path = Path(str(file) + ".docx") if operating_system == "Windows": import win32com.client word = None try: - docx_path = file + ".docx" word = win32com.client.DispatchEx("Word.Application") doc = word.Documents.Open(file) - doc.SaveAs(file + ".docx", 16) + doc.SaveAs(docx_path, 16) doc.Close() word.Quit() return docx_path except Exception: return False finally: - word.Quit() + if word: + word.Quit() elif operating_system == "linux": # Convert .doc to .docx using LibreOffice subprocess.run( @@ -435,8 +396,7 @@ def convert_older_doc_file(file, output_dir): file, ], check=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, + capture_output=True, ) return docx_path elif operating_system == "Darwin": # macOS @@ -457,80 +417,73 @@ def convert_older_doc_file(file, output_dir): return False -def extract_text_from_doc(file_path): - """Extracts text from a .doc file by converting it to .docx and processing with python-docx.""" - if not file_path.endswith(".doc"): +def extract_text_from_doc( + file_path: Path, +) -> tuple[list[tuple[str, bool]], list[pd.DataFrame]]: + """Extracts text and tables from a .doc file. + + Converts older .doc files to .docx format if necessary, then extracts text and tables. + Deletes the temporary .docx file after processing. + + Args: + file_path (str): The path to the .doc file. + + Returns: + tuple: A tuple containing a list of paragraphs and a list of tables extracted from the document. + + Raises: + ValueError: If the input file is not a .doc file. + FileNotFoundError: If LibreOffice 'soffice' command is not found on Linux. + Exception: For other errors during file processing. + """ + if not file_path.suffix.lower() == ".doc": raise ValueError("Input file must be a .doc file.") try: - output_dir = str(Path(file_path).parent.absolute()) + output_dir = file_path.parent docx_path = convert_older_doc_file(file_path, output_dir) - - # Extract text from the resulting .docx file - doc = Document(docx_path) - tables = extract_tables(doc) - text_sizes = set( - [int(x.style.font.size) for x in doc.paragraphs if x.style.font.size] - ) - paragraphs = [ - ( - x.text, - True - if text_sizes - and x.style.font.size - and int(x.style.font.size) > min(text_sizes) - else False, - ) - for x in doc.paragraphs - ] - os.unlink(docx_path) - return paragraphs, tables + if isinstance(docx_path, Path): + # Extract text from the resulting .docx file + doc = docx.Document(str(docx_path.absolute())) + tables = extract_tables(doc) + paragraphs = get_word_paragraphs(doc) + os.unlink(docx_path) + return paragraphs, tables + else: + logger.info("Failed to convert .doc file to .docx.") + return [], [] except FileNotFoundError: print( "LibreOffice 'soffice' command not found. Ensure it is installed and in your PATH." ) - return None, None + logger.warning( + "LibreOffice 'soffice' command not found. Ensure it is installed and in your PATH." + ) + return [], [] except Exception as e: - print(f"Error processing file {file_path}: {e}") - return None, None + logger.error(f"Error processing file {file_path}: {e}") + return [], [] -def process_word_document(file): - """Processes a Word document file, extracting tables and paragraphs, and saving them as JSON files. +def process_word_document(file: Path, output_location: Path): + """Processes a Word document to extract tables and paragraphs. Args: - file (str): The path to the Word document file. + file (Path): The path to the Word document file to be processed. + output_location (Path): The directory where the extracted data will be saved. Returns: - None - - Example: - file_path = "/path/to/document.docx" - process_word_document(file_path) + bool: True if the document was processed successfully, False otherwise. """ tables, paragraphs = [], [] - output_path = file.replace("Raw", "Processed") # Check if the file has a ".doc" or ".docx" extension - if file.lower().endswith(".doc") or file.lower().endswith(".docx"): + if file.suffix.lower() in [".doc", ".docx"]: try: - doc = Document(file) + doc = docx.Document(str(file)) tables = extract_tables(doc) - text_sizes = set( - [int(x.style.font.size) for x in doc.paragraphs if x.style.font.size] - ) - paragraphs = [ - ( - x.text, - True - if text_sizes - and x.style.font.size - and int(x.style.font.size) > min(text_sizes) - else False, - ) - for x in doc.paragraphs - ] + paragraphs = get_word_paragraphs(doc) except ValueError: try: - if not file.lower().endswith(".docx"): + if not file.suffix.lower() == ".docx": paragraphs, tables = extract_text_from_doc(file) if paragraphs: logger.info( @@ -555,18 +508,17 @@ def process_word_document(file): # Save tables as a JSON file if tables: - if not Path(output_path).exists(): - Path(output_path).parent.mkdir(parents=True, exist_ok=True) - with open(f"{output_path}_tables.json", "w+", encoding="utf-8") as f_out: - json.dump(get_tables_bioc(tables, Path(file).name), f_out) + if not output_location.exists(): + output_location.mkdir(parents=True, exist_ok=True) + with open(f"{output_location}_tables.json", "w+", encoding="utf-8") as f_out: + json.dump(get_tables_bioc(tables), f_out) # Save paragraphs as a JSON file if paragraphs: paragraphs = [x for x in paragraphs if x[0]] - if not Path(output_path).exists(): - Path(output_path).parent.mkdir(parents=True, exist_ok=True) - global args - with open(f"{output_path}_bioc.json", "w+", encoding="utf-8") as f_out: + if not output_location.exists(): + output_location.mkdir(parents=True, exist_ok=True) + with open(f"{output_location}_bioc.json", "w+", encoding="utf-8") as f_out: # TODO: Test if datatype causes a problem text = get_text_bioc(paragraphs, Path(file).name) json.dump(text, f_out, indent=4) From ff2e0a8a82e97a86eb6b362ccecf4adce25f21fd Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Fri, 16 May 2025 09:16:04 +0100 Subject: [PATCH 013/125] Add codecov config files and upload coverage with CI --- .github/workflows/ci.yml | 6 ++++++ codecov.yml | 9 +++++++++ pyproject.toml | 2 +- 3 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 codecov.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 085a1703..66d6721b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,6 +25,12 @@ jobs: python-version: ${{ matrix.python-version }} - name: Run tests run: poetry run pytest + - name: Upload coverage reports to Codecov + if: ${{ matrix.os == 'ubuntu-latest' && matrix.python-version == '3.13' && github.event.pull_request.user.login != 'dependabot[bot]' && github.event.pull_request.user.login != 'pre-commit-ci[bot]' }} + uses: codecov/codecov-action@v5 + with: + fail_ci_if_error: true + token: ${{ secrets.CODECOV_TOKEN }} - name: Check docs build run: poetry run mkdocs build - name: Check types diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 00000000..e686d224 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,9 @@ +# Don't fail CI if coverage drops +coverage: + status: + project: + default: + informational: true + patch: + default: + informational: true diff --git a/pyproject.toml b/pyproject.toml index 080dce04..4c3266cd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,7 +79,7 @@ module = ["nltk.*", "fuzzywuzzy.*", "bioc.*"] ignore_missing_imports = true [tool.pytest.ini_options] -addopts = "-v -p no:warnings --cov=autocorpus --cov-report=html --doctest-modules --ignore=docs/ --ignore=site/" +addopts = "-v -p no:warnings --cov=autocorpus --cov-branch --cov-report=xml --cov-report=html --doctest-modules --ignore=docs/ --ignore=site/" [tool.ruff] target-version = "py310" From 1363286e5114e8b688ac72aa9b84819c4aa44852 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Fri, 16 May 2025 09:22:25 +0100 Subject: [PATCH 014/125] Add codecov badge to readme --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 2cfe39c9..5faf8511 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0) [![pre-commit.ci status](https://results.pre-commit.ci/badge/github/omicsNLP/Auto-CORPus/main.svg)](https://results.pre-commit.ci/latest/github/omicsNLP/Auto-CORPus/main) [![PyPI version](https://badge.fury.io/py/autocorpus.svg)](https://badge.fury.io/py/autocorpus) +[![codecov](https://codecov.io/gh/ImperialCollegeLondon/Auto-CORPus/graph/badge.svg?token=6WWASKCH66)](https://codecov.io/gh/ImperialCollegeLondon/Auto-CORPus) *Requires Python 3.10+* From ebddbd155cf107eb5fccc9ffb1133bba3bc2de41 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Mon, 19 May 2025 20:09:05 +0100 Subject: [PATCH 015/125] Implemented word document extraction --- autocorpus/BioCTable.py | 140 -------- autocorpus/autocorpus.py | 3 + autocorpus/bioc_supplementary.py | 2 +- autocorpus/word_extractor.py | 140 ++++++++ autocorpus/word_processing.py | 529 ------------------------------- poetry.lock | 28 +- pyproject.toml | 2 + 7 files changed, 173 insertions(+), 671 deletions(-) delete mode 100644 autocorpus/BioCTable.py create mode 100644 autocorpus/word_extractor.py delete mode 100644 autocorpus/word_processing.py diff --git a/autocorpus/BioCTable.py b/autocorpus/BioCTable.py deleted file mode 100644 index d83278b1..00000000 --- a/autocorpus/BioCTable.py +++ /dev/null @@ -1,140 +0,0 @@ -"""This module provides functionality for converting tables into an extended BioC format. - -BioCTable objects include table-specific elements such as cell IDs for annotation. -""" - -import datetime -from typing import Any - -from pandas import DataFrame - -from autocorpus.utils import replace_unicode - - -class BioCTable: - """Converts tables from nested lists into a BioC table object.""" - - def __init__(self, table_id: int, table_data: DataFrame): - """Initialize a BioCTable object. - - Args: - table_id (int): The unique identifier for the table. - table_data (pd.DataFrame): The data of the table as a Pandas DataFrame. - textsource (str): The source of the text content. - """ - self.id = str(table_id) + "_1" - self.textsource = "Auto-CORPus (supplementary)" - self.infons: dict[str, Any] = {} - self.passages: list[dict[str, Any]] = [] - self.annotations: list[dict[str, Any]] = [] - self.__build_table(table_data) - - def __build_table(self, table_data: DataFrame): - """Builds a table passage based on the provided table_data and adds it to the passages list. - - Args: - table_data: A pandas DataFrame containing the data for the table. - - Returns: - None - """ - # Create a title passage - title_passage = { - "offset": 0, - "infons": { - "section_title_1": "table_title", - "iao_name_1": "document title", - "iao_id_1": "IAO:0000305", - }, - } - self.passages.append(title_passage) - # Create a caption passage - caption_passage = { - "offset": 0, - "infons": { - "section_title_1": "table_caption", - "iao_name_1": "caption", - "iao_id_1": "IAO:0000304", - }, - } - self.passages.append(caption_passage) - # Create a passage for table content - passage = { - "offset": 0, - "infons": { - "section_title_1": "table_content", - "iao_name_1": "table", - "iao_id_1": "IAO:0000306", - }, - "column_headings": [], - "data_section": [{"table_section_title_1": "", "data_rows": []}], - } - # Populate column headings - for i, text in enumerate(table_data.columns.values): - passage["column_headings"].append( - {"cell_id": self.id + f".1.{i + 1}", "cell_text": replace_unicode(text)} - ) - # Populate table rows with cell data - for row_idx, row in enumerate(table_data.values): - new_row = [] - for cell_idx, cell in enumerate(row): - new_cell = { - "cell_id": f"{self.id}.{row_idx + 2}.{cell_idx + 1}", - "cell_text": f"{replace_unicode(cell)}", - } - new_row.append(new_cell) - passage["data_section"][0]["data_rows"].append(new_row) - # Add the table passage to the passages list - self.passages.append(passage) - - -def get_tables_bioc(tables: list[DataFrame]) -> dict[str, Any]: - """Converts extracted tables into BioC format. - - Args: - tables: A list of tables extracted from an Excel file. - filename: The name of the Excel file. - textsource: Source of the text content. - - Returns: - A BioC format representation of the extracted tables. - """ - # Create a BioC dictionary - bioc = { - "source": "Auto-CORPus (supplementary)", - "date": datetime.date.today().strftime("%Y%m%d"), - "key": "autocorpus_supplementary.key", - "infons": {}, - "documents": [BioCTable(i + 1, x).__dict__ for i, x in enumerate(tables)], - } - return bioc - - -def convert_datetime_to_string(df: DataFrame) -> DataFrame: - """Convert all datetime objects in a DataFrame to string format. - - Args: - df (pd.DataFrame): The input DataFrame. - - Returns: - pd.DataFrame: A DataFrame with datetime columns converted to string. - """ - for col in df.select_dtypes(include=["datetime64[ns]", "datetime64"]): - df[col] = df[col].astype(str) - return df - - -def get_blank_cell_count(row: list[dict[str, str]]) -> int: - """Counts the number of blank cells in a given row. - - Args: - row (list): A list of dictionaries representing cells in a row. - - Returns: - int: The number of blank cells in the row. - """ - blank_count = 0 - for cell in row: - if not cell["text"].strip(): - blank_count += 1 - return blank_count diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py index 2030f350..2a421ff6 100644 --- a/autocorpus/autocorpus.py +++ b/autocorpus/autocorpus.py @@ -14,6 +14,7 @@ BioCTextConverter, extract_table_from_pdf_text, ) +from autocorpus.word_extractor import extract_word_content from . import logger from .abbreviation import get_abbreviations @@ -337,6 +338,8 @@ def __process_supplementary_file(self, file: Path): pass case ".pdf": self.__extract_pdf_content(file) + case ".doc" | ".docx": + extract_word_content(file) case _: pass diff --git a/autocorpus/bioc_supplementary.py b/autocorpus/bioc_supplementary.py index 28af7995..8af511ba 100644 --- a/autocorpus/bioc_supplementary.py +++ b/autocorpus/bioc_supplementary.py @@ -218,7 +218,7 @@ def output_tables_json(self, filename: Path) -> None: class BioCTextConverter: """Converts text content into a BioC format for supplementary material processing.""" - text: str + text: str | list[tuple[str, bool]] file_type_source: str input_file: str diff --git a/autocorpus/word_extractor.py b/autocorpus/word_extractor.py new file mode 100644 index 00000000..c8117af1 --- /dev/null +++ b/autocorpus/word_extractor.py @@ -0,0 +1,140 @@ +"""This module provides functionality to extract text and tables from Word documents (.doc and .docx). + +It includes methods to handle older .doc files by converting them to .docx format and processing them. +""" + +import os +import platform +import subprocess +from pathlib import Path + +from docx import Document + +from autocorpus.bioc_supplementary import BioCTableConverter, BioCTextConverter + +from . import logger + + +def __extract_tables(doc): + """Extracts tables from a .docx document. + + Args: + doc (docx.Document): The Document object representing the .docx document. + + Returns: + list: A list of tables extracted from the document. Each table is represented as a nested list, + where each inner list corresponds to a row, and each element in the inner list corresponds + to the text content of a cell in the row. + + Example: + from docx import Document + + doc = Document("document.docx") + tables = extract_tables(doc) + """ + # Open the .docx file + tables = [] + # Iterate through the tables in the document + for table in doc.tables: + tables.append([]) + # Iterate through the rows in the table + for row in table.rows: + tables[-1].append([x.text for x in row.cells]) + return tables + + +def __convert_older_doc_file(file, output_dir): + operating_system = platform.system() + docx_path = file.replace(".doc", ".docx") + if operating_system == "Windows": + import win32com.client + + word = None + try: + docx_path = file + ".docx" + word = win32com.client.DispatchEx("Word.Application") + doc = word.Documents.Open(file) + doc.SaveAs(file + ".docx", 16) + doc.Close() + word.Quit() + return docx_path + except Exception: + return False + finally: + word.Quit() + elif operating_system == "linux": + # Convert .doc to .docx using LibreOffice + subprocess.run( + [ + "soffice", + "--headless", + "--convert-to", + "docx", + "--outdir", + output_dir, + file, + ], + check=True, + capture_output=True, + ) + return docx_path + elif operating_system == "Darwin": # macOS + try: + # AppleScript to open the file in Word and save as .docx + applescript = f''' + tell application "Microsoft Word" + open "{file}" + save as active document file name "{docx_path}" file format format document + close active document saving no + end tell + ''' + subprocess.run(["osascript", "-e", applescript], check=True) + return docx_path + except Exception: + return False + else: + return False + + +def extract_word_content(file_path: Path): + """Extracts text from a .doc file by converting it to .docx and processing with python-docx.""" + if file_path.suffix.lower() not in [".doc", ".docx"]: + raise ValueError("Input file must be a .doc file.") + try: + output_dir = str(Path(file_path).parent.absolute()) + docx_path = __convert_older_doc_file(file_path, output_dir) + + # Extract text from the resulting .docx file + doc = Document(docx_path) + tables = __extract_tables(doc) + text_sizes = set( + [ + int(x.style.font.size) + for x in doc.paragraphs + if x.style and x.style.font.size + ] + ) + paragraphs = [ + ( + x.text, + True + if text_sizes + and x.style + and x.style.font.size + and int(x.style.font.size) > min(text_sizes) + else False, + ) + for x in doc.paragraphs + ] + bioc_text = BioCTextConverter(paragraphs, "word", str(file_path)) + bioc_text.output_bioc_json(file_path) + bioc_tables = BioCTableConverter(tables, str(file_path)) + bioc_tables.output_tables_json(file_path) + os.unlink(docx_path) + return paragraphs, tables + except FileNotFoundError: + logger.error( + "LibreOffice 'soffice' command not found. Ensure it is installed and in your PATH." + ) + except Exception as e: + logger.error(f"Error processing file {file_path}: {e}") diff --git a/autocorpus/word_processing.py b/autocorpus/word_processing.py deleted file mode 100644 index 7b1e428f..00000000 --- a/autocorpus/word_processing.py +++ /dev/null @@ -1,529 +0,0 @@ -"""This module provides functionality for processing supplementary files. - -Extracts data from various file types such as PDFs, spreadsheets, -PowerPoint presentations, and archives. It also handles logging and error -management for unprocessed files. -""" - -import datetime -import json -import os -import platform -import re -import subprocess -from pathlib import Path - -import docx -import pandas as pd -from bioc import BioCCollection, BioCDocument, BioCPassage -from BioCTable import get_tables_bioc -from docx.document import Document - -from autocorpus.utils import replace_unicode - -from . import logger - -WORD_EXTENSIONS = [".doc", ".docx"] - - -def extract_table_from_text(text: str) -> tuple[list[str], list[pd.DataFrame]]: - """Extracts tables from a given text and returns the modified text and extracted tables. - - Args: - text (str): The input text containing potential table data. - - Returns: - tuple[str, list[pd.DataFrame]]: A tuple containing the modified text without table lines - and a list of DataFrames representing the extracted tables. - """ - # Split the text into lines - lines = [x for x in text.splitlines() if x] - text_output = lines - - # store extracted tables - tables = [] - # Identify where the table starts and ends by looking for lines containing pipes - table_lines = [] - # keep unmodified lines used in tables. These must be removed from the original text - lines_to_remove = [] - inside_table = False - for line in lines: - if "|" in line: - inside_table = True - table_lines.append(line) - lines_to_remove.append(line) - elif ( - inside_table - ): # End of table if there's a blank line after lines with pipes - inside_table = False - tables.append(table_lines) - table_lines = [] - continue - - for line in lines_to_remove: - text_output.remove(line) - - tables_output = [] - # Remove lines that are just dashes (table separators) - for table in tables: - table = [line for line in table if not re.match(r"^\s*-+\s*$", line)] - - # Extract rows from the identified table lines - rows = [] - for line in table: - # Match only lines that look like table rows (contain pipes) - if re.search(r"\|", line): - # Split the line into cells using the pipe delimiter and strip whitespace - cells = [ - cell.strip() - for cell in line.split("|") - if not all(x in "|-" for x in cell) - ] - if cells: - # Remove empty cells that may result from leading/trailing pipes - # if cells[0] == '': - # cells.pop(0) - # if cells[-1] == '': - # cells.pop(-1) - rows.append(cells) - - # Determine the maximum number of columns in the table - num_columns = max(len(row) for row in rows) - - # Pad rows with missing cells to ensure they all have the same length - for row in rows: - while len(row) < num_columns: - row.append("") - - # Create a DataFrame from the rows - df = pd.DataFrame(rows[1:], columns=rows[0]) - tables_output.append(df) - return text_output, tables_output - - -def get_word_paragraphs(doc: Document) -> list[tuple[str, bool]]: - """Extracts paragraphs from a Word document and identifies headers. - - Args: - doc (Document): The Word document object. - - Returns: - list[tuple[str, bool]]: A list of tuples where each tuple contains the paragraph text - and a boolean indicating if it is a header. - """ - text_sizes = set( - [ - int(x.style.font.size) - for x in doc.paragraphs - if x.style and x.style.font and x.style.font.size - ] - ) - paragraphs = [ - ( - x.text, - True - if text_sizes - and x.style - and x.style.font - and x.style.font.size - and int(x.style.font.size) > min(text_sizes) - else False, - ) - for x in doc.paragraphs - ] - return paragraphs - - -def get_bioc_passages(text: list[str] | str) -> list[BioCPassage] | list[str]: - """Identifies passages within the given text and creates passage objects. - - Args: - text (list): The text to be processed, represented as a list of lines. - - Returns: - list: A list of BioCPassage objects. - """ - offset = 0 - passages: list[BioCPassage] = [] - if not text: - return passages - if isinstance(text, str): - text = text.split("\n\n") - text = [x for x in text if x] - # Iterate through each line in the text - for line in text: - # Determine the type of the line and assign appropriate information - iao_name = "supplementary material section" - iao_id = "IAO:0000326" - # Create a passage object and add it to the passages list - passage = BioCPassage() - passage.offset = offset - passage.infons = {"iao_name_1": iao_name, "iao_id_1": iao_id} - passage.text = line - passages.append(passage) - offset += len(line) - return passages - - -def get_text_bioc(parsed_texts: list[str], filename: str): - """Convert parsed texts into BioC format. - - Args: - parsed_texts (list): A list of parsed text segments to be converted. - filename (str): The name of the source file. - textsource (str): The source of the text, default is "Auto-CORPus". - - Returns: - BioCCollection: A BioCCollection object representing the converted text in BioC format. - """ - passages = [ - p - for sublist in [ - get_bioc_passages(replace_unicode(x)).__dict__["passages"] - for x in parsed_texts - ] - for p in sublist - ] - offset = 0 - for p in passages: - p["offset"] = offset - offset += len(p["text"]) - # Create a BioC XML structure dictionary - bioc = BioCCollection() - bioc.source = "Auto-CORPus (supplementary)" - bioc.date = datetime.date.today().strftime("%Y%m%d") - bioc.key = "autocorpus_supplementary.key" - bioc.documents = [] - new_doc = BioCDocument() - new_doc.id = "1" - new_doc.infons = { - "inputfile": Path(filename).name, - "textsource": "Auto-CORPus (supplementary)", - } - new_doc.passages = passages - return bioc - - -class BioCText: - def __init__(self, text): - self.infons = {} - self.passages = self.__identify_passages(text) - self.annotations = [] - - @staticmethod - def __identify_passages(text): - """Identifies passages within the given text and creates passage objects. - - Args: - text (tuple): The text to be processed and a boolean which is True for header text. - - Returns: - list: A list of passage objects. Each passage object is a dictionary containing the following keys: - - "offset": The offset of the passage in the original text. - - "infons": A dictionary of information associated with the passage, including: - - "iao_name_1": The name or type of the passage. - - "iao_id_1": The unique identifier associated with the passage. - - "text": The content of the passage. - - "sentences": An empty list of sentences (to be populated later if needed). - - "annotations": An empty list of annotations (to be populated later if needed). - - "relations": An empty list of relations (to be populated later if needed). - - Example: - text = [ - "Introduction", - "This is the first paragraph.", - "Conclusion" - ] - passages = __identify_passages(text) - """ - offset = 0 - passages = [] - # Iterate through each line in the text - line, is_header = text - line = line.replace("\n", "") - iao_name = "" - iao_id = "" - - # Determine the type of the line and assign appropriate information - if line.isupper() or is_header: - iao_name = "document title" - iao_id = "IAO:0000305" - else: - iao_name = "supplementary material section" - iao_id = "IAO:0000326" - # Create a passage object and add it to the passages list - passages.append( - { - "offset": offset, - "infons": {"iao_name_1": iao_name, "iao_id_1": iao_id}, - "text": line, - "sentences": [], - "annotations": [], - "relations": [], - } - ) - offset += len(line) - return passages - - -class BioCTable: - """Converts tables from nested lists into a BioC table object.""" - - def __init__(self, input_file, table_id, table_data): - self.inputfile = input_file - self.id = str(table_id) + "_1" - self.infons = {} - self.passage = {} - self.annotations = [] - self.__build_table(table_data) - - def __build_table(self, table_data): - """Builds a table passage in a specific format and appends it to the list of passages. - - Args: - table_data (list): The table data to be included in the passage. It should be a list - containing the table's column headings as the first row, followed by - the data rows. - - Returns: - None - - Example: - table_data = [ - ["Column 1", "Column 2", "Column 3"], - [1, 2, 3], - [4, 5, 6] - ] - self.__build_table(table_data) - """ - passage = { - "offset": 0, - "infons": { - "section_title_1": "table_content", - "iao_name_1": "table", - "iao_id_1": "IAO:0000306", - }, - "column_headings": [], - "data_section": [{"table_section_title_1": "", "data_rows": []}], - } - # Process the column headings of the table - for i, col in enumerate(table_data[0]): - passage["column_headings"].append( - {"cell_id": self.id + f".1.{i + 1}", "cell_text": col} - ) - # Process the data rows of the table - for row_idx, row in enumerate(table_data[1:]): - new_row = [] - for cell_idx, cell in enumerate(row): - new_cell = { - "cell_id": f"{self.id}.{row_idx + 2}.{cell_idx + 1}", - "cell_text": f"{cell}", - } - new_row.append(new_cell) - passage["data_section"][0]["data_rows"].append(new_row) - self.passage = passage - - def get_table(self): - return self.passage - - -def extract_tables(doc): - """Extracts tables from a .docx document. - - Args: - doc (docx.Document): The Document object representing the .docx document. - - Returns: - list: A list of tables extracted from the document. Each table is represented as a nested list, - where each inner list corresponds to a row, and each element in the inner list corresponds - to the text content of a cell in the row. - - Example: - from docx import Document - - doc = Document("document.docx") - tables = extract_tables(doc) - """ - # Open the .docx file - tables = [] - # Iterate through the tables in the document - for table in doc.tables: - tables.append([]) - # Iterate through the rows in the table - for row in table.rows: - tables[-1].append([x.text for x in row.cells]) - return tables - - -def convert_older_doc_file(file: Path, output_dir: Path) -> Path | bool: - """Converts an older .doc file to .docx format. - - Args: - file (str): The path to the .doc file to be converted. - output_dir (str): The directory where the converted .docx file will be saved. - - Returns: - str: The path to the converted .docx file, or an empty string if the conversion fails. - """ - operating_system = platform.system() - docx_path = Path(str(file) + ".docx") - if operating_system == "Windows": - import win32com.client - - word = None - try: - word = win32com.client.DispatchEx("Word.Application") - doc = word.Documents.Open(file) - doc.SaveAs(docx_path, 16) - doc.Close() - word.Quit() - return docx_path - except Exception: - return False - finally: - if word: - word.Quit() - elif operating_system == "linux": - # Convert .doc to .docx using LibreOffice - subprocess.run( - [ - "soffice", - "--headless", - "--convert-to", - "docx", - "--outdir", - output_dir, - file, - ], - check=True, - capture_output=True, - ) - return docx_path - elif operating_system == "Darwin": # macOS - try: - # AppleScript to open the file in Word and save as .docx - applescript = f""" - tell application "Microsoft Word" - open "{file}" - save as active document file name "{docx_path}" file format format document - close active document saving no - end tell - """ - subprocess.run(["osascript", "-e", applescript], check=True) - return docx_path - except Exception: - return False - else: - return False - - -def extract_text_from_doc( - file_path: Path, -) -> tuple[list[tuple[str, bool]], list[pd.DataFrame]]: - """Extracts text and tables from a .doc file. - - Converts older .doc files to .docx format if necessary, then extracts text and tables. - Deletes the temporary .docx file after processing. - - Args: - file_path (str): The path to the .doc file. - - Returns: - tuple: A tuple containing a list of paragraphs and a list of tables extracted from the document. - - Raises: - ValueError: If the input file is not a .doc file. - FileNotFoundError: If LibreOffice 'soffice' command is not found on Linux. - Exception: For other errors during file processing. - """ - if not file_path.suffix.lower() == ".doc": - raise ValueError("Input file must be a .doc file.") - try: - output_dir = file_path.parent - docx_path = convert_older_doc_file(file_path, output_dir) - if isinstance(docx_path, Path): - # Extract text from the resulting .docx file - doc = docx.Document(str(docx_path.absolute())) - tables = extract_tables(doc) - paragraphs = get_word_paragraphs(doc) - os.unlink(docx_path) - return paragraphs, tables - else: - logger.info("Failed to convert .doc file to .docx.") - return [], [] - except FileNotFoundError: - print( - "LibreOffice 'soffice' command not found. Ensure it is installed and in your PATH." - ) - logger.warning( - "LibreOffice 'soffice' command not found. Ensure it is installed and in your PATH." - ) - return [], [] - except Exception as e: - logger.error(f"Error processing file {file_path}: {e}") - return [], [] - - -def process_word_document(file: Path, output_location: Path): - """Processes a Word document to extract tables and paragraphs. - - Args: - file (Path): The path to the Word document file to be processed. - output_location (Path): The directory where the extracted data will be saved. - - Returns: - bool: True if the document was processed successfully, False otherwise. - """ - tables, paragraphs = [], [] - # Check if the file has a ".doc" or ".docx" extension - if file.suffix.lower() in [".doc", ".docx"]: - try: - doc = docx.Document(str(file)) - tables = extract_tables(doc) - paragraphs = get_word_paragraphs(doc) - except ValueError: - try: - if not file.suffix.lower() == ".docx": - paragraphs, tables = extract_text_from_doc(file) - if paragraphs: - logger.info( - f"File {file} was converted to .docx as a copy within the same directory for processing." - ) - else: - logger.info( - f"File {file} could not be processed correctly. It is likely a pre-2007 word document or problematic." - ) - return False - else: - logger.info(f"File {file} could not be processed correctly.") - return False - except ValueError as ve: - logger.info(f"File {file} raised the error:\n{ve}") - return False - except Exception as ex: - logger.info(f"File {file} raised the error:\n{ex}") - return False - else: - return False - - # Save tables as a JSON file - if tables: - if not output_location.exists(): - output_location.mkdir(parents=True, exist_ok=True) - with open(f"{output_location}_tables.json", "w+", encoding="utf-8") as f_out: - json.dump(get_tables_bioc(tables), f_out) - - # Save paragraphs as a JSON file - if paragraphs: - paragraphs = [x for x in paragraphs if x[0]] - if not output_location.exists(): - output_location.mkdir(parents=True, exist_ok=True) - with open(f"{output_location}_bioc.json", "w+", encoding="utf-8") as f_out: - # TODO: Test if datatype causes a problem - text = get_text_bioc(paragraphs, Path(file).name) - json.dump(text, f_out, indent=4) - - if not paragraphs and not tables: - return False - else: - return True diff --git a/poetry.lock b/poetry.lock index 478fbac6..2eb9d8d1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2523,6 +2523,21 @@ files = [ [package.dependencies] six = ">=1.5" +[[package]] +name = "python-docx" +version = "1.1.2" +description = "Create, read, and update Microsoft Word .docx files." +optional = false +python-versions = ">=3.7" +files = [ + {file = "python_docx-1.1.2-py3-none-any.whl", hash = "sha256:08c20d6058916fb19853fcf080f7f42b6270d89eac9fa5f8c15f691c0017fabe"}, + {file = "python_docx-1.1.2.tar.gz", hash = "sha256:0cf1f22e95b9002addca7948e16f2cd7acdfd498047f1941ca5d293db7762efd"}, +] + +[package.dependencies] +lxml = ">=3.1.0" +typing-extensions = ">=4.9.0" + [[package]] name = "python-dotenv" version = "1.1.0" @@ -3596,6 +3611,17 @@ files = [ {file = "types_pytz-2025.2.0.20250326.tar.gz", hash = "sha256:deda02de24f527066fc8d6a19e284ab3f3ae716a42b4adb6b40e75e408c08d36"}, ] +[[package]] +name = "types-pywin32" +version = "310.0.0.20250516" +description = "Typing stubs for pywin32" +optional = false +python-versions = ">=3.9" +files = [ + {file = "types_pywin32-310.0.0.20250516-py3-none-any.whl", hash = "sha256:f9ef83a1ec3e5aae2b0e24c5f55ab41272b5dfeaabb9a0451d33684c9545e41a"}, + {file = "types_pywin32-310.0.0.20250516.tar.gz", hash = "sha256:91e5bfc033f65c9efb443722eff8101e31d690dd9a540fa77525590d3da9cc9d"}, +] + [[package]] name = "types-regex" version = "2024.11.6.20250403" @@ -3842,4 +3868,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = ">=3.10,<4" -content-hash = "e056cd80a80d198aebb03f56524339ba094d9202dbbc242031681883e9272f19" +content-hash = "9aa4f30d08ca2ce55ec0848361b28ecaea08f2966b692e0552fc3ff19b79daab" diff --git a/pyproject.toml b/pyproject.toml index 9229a3e5..244a3213 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,8 @@ pandas = "^2.2.3" marker-pdf = "^1.6.2" lxml-stubs = "^0.5.1" pandas-stubs = "^2.2.3.250308" +python-docx = "^1.1.2" +types-pywin32 = "^310.0.0.20250516" [tool.poetry.group.dev.dependencies] pytest = "^8.3.2" From edc80164f90cea490390e5985e3fad622bc69e56 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Tue, 20 May 2025 10:13:31 +0100 Subject: [PATCH 016/125] Added the macos ci skip to PDF test. --- tests/test_regression.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_regression.py b/tests/test_regression.py index efaf3a9e..f16a022a 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -60,6 +60,7 @@ def test_autocorpus(data_path: Path, input_file: str, config: dict[str, Any]) -> assert tables == expected_tables +@pytest.mark.skip_ci_macos @pytest.mark.parametrize( "input_file, config", [ From c08a0bf04e13e731363d3a08ea7244a562df4da9 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Tue, 20 May 2025 13:40:50 +0100 Subject: [PATCH 017/125] Added local output files produced from running tests to .gitignore. --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index f96a9527..d7711bdf 100644 --- a/.gitignore +++ b/.gitignore @@ -166,3 +166,7 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +# Output files from running pytest locally +tests/data/Supplementary/PDF/tp-10-08-2123-coif.pdf_bioc.json +tests/data/Supplementary/PDF/tp-10-08-2123-coif.pdf_tables.json From e3960ce8af06e36c30329b9426f983948216c6e8 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Tue, 20 May 2025 22:14:50 +0100 Subject: [PATCH 018/125] Word test additions and old .doc document conversion --- autocorpus/autocorpus.py | 2 +- autocorpus/word_extractor.py | 25 ++++++++++++++----------- tests/data/Supplementary/Word/mmc1.doc | Bin 0 -> 316416 bytes 3 files changed, 15 insertions(+), 12 deletions(-) create mode 100644 tests/data/Supplementary/Word/mmc1.doc diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py index 2a421ff6..fec8837b 100644 --- a/autocorpus/autocorpus.py +++ b/autocorpus/autocorpus.py @@ -331,7 +331,7 @@ def __merge_table_data(self): ) def __process_supplementary_file(self, file: Path): - match file.suffix: + match file.suffix.lower(): case ".html" | ".htm": self.__process_html_article(file) case ".xml": diff --git a/autocorpus/word_extractor.py b/autocorpus/word_extractor.py index c8117af1..5235a7dd 100644 --- a/autocorpus/word_extractor.py +++ b/autocorpus/word_extractor.py @@ -43,26 +43,27 @@ def __extract_tables(doc): return tables -def __convert_older_doc_file(file, output_dir): +def __convert_older_doc_file(file: Path, output_dir: Path) -> Path | bool: + """Converts an older .doc file to .docx format using platform-specific methods.""" operating_system = platform.system() - docx_path = file.replace(".doc", ".docx") + docx_path = Path(str(file).replace(".doc", ".docx")) if operating_system == "Windows": import win32com.client word = None try: - docx_path = file + ".docx" word = win32com.client.DispatchEx("Word.Application") doc = word.Documents.Open(file) - doc.SaveAs(file + ".docx", 16) + doc.SaveAs(docx_path, 16) doc.Close() word.Quit() return docx_path except Exception: return False finally: - word.Quit() - elif operating_system == "linux": + if word: + word.Quit() + elif operating_system == "Linux": # Convert .doc to .docx using LibreOffice subprocess.run( [ @@ -101,11 +102,13 @@ def extract_word_content(file_path: Path): if file_path.suffix.lower() not in [".doc", ".docx"]: raise ValueError("Input file must be a .doc file.") try: - output_dir = str(Path(file_path).parent.absolute()) - docx_path = __convert_older_doc_file(file_path, output_dir) + output_dir = Path(file_path).parent.absolute() + # Check if the file is a .doc file + if file_path.suffix.lower() == ".doc": + docx_path = __convert_older_doc_file(file_path, output_dir) # Extract text from the resulting .docx file - doc = Document(docx_path) + doc = Document(str(docx_path)) tables = __extract_tables(doc) text_sizes = set( [ @@ -130,8 +133,8 @@ def extract_word_content(file_path: Path): bioc_text.output_bioc_json(file_path) bioc_tables = BioCTableConverter(tables, str(file_path)) bioc_tables.output_tables_json(file_path) - os.unlink(docx_path) - return paragraphs, tables + print(str(docx_path)) + os.unlink(str(docx_path)) except FileNotFoundError: logger.error( "LibreOffice 'soffice' command not found. Ensure it is installed and in your PATH." diff --git a/tests/data/Supplementary/Word/mmc1.doc b/tests/data/Supplementary/Word/mmc1.doc new file mode 100644 index 0000000000000000000000000000000000000000..c3d22694882ef5cdada267212ae30cffff5036d2 GIT binary patch literal 316416 zcmeFa3t%Kyb?05f+u#9gY{NoA3Sq*GQA=t`Ga8KvK_ksWGb7Jf()ckBOG@1(be@JlQ@s6{?=WTrb*YAG!ySn<*eD{DQAP3|DZuS5D0FS`e-}<}G4KKRjVUF{U z7ypi(T4t7ik8+&f_`lnn$2v==mQMY~KmEo(*_-j-7e4GVXX4_AIG^~%Z3bRg5jHr^ zV7yT5^LM}dxaeD_&tZZ0m=9+kZNKa9hn{FZZ}#_+@NxcX(Dn7x!E^Dyd$@k#lN{%E z8vL98&T*a{EBEkyj`KyXf1T@(eyHR8pC56Y%T)eH`PJs{i+;>rPyhY=PdW|@!1*G7 z^{<=1ms9TEX~%gB55D|>;~eMvBcAIx@3jNVx7NRK9K!du?$}SyrCrbEV|YLPSM|pK zh4qDh!~TY4^nILm{mzsfkKfAJJL$iA&%-Wk%~x1Yx?YV>{9jm4{8~4k!+v$DPxaQQ z=$Z+~S?s35F3VgaB;sG#zYyOZf9diQKlG-*h-NIryIY>+IG1zvKhuuK+DFark24O! z+dpGmNBArJ)4w|@_eTEyC3v8JZ)aIu$n*LRN;={D&CUDpOZC*4*YG`IRpETywU_yX z^oQdd{tf3V{2u-dpYQZHESLTp;$!%?)AQl=@crTUPJh$y3BQNq9DeWgH>@|D-}K+D z?x)LtufB)#)v4W`u7~aI^>3&8M4KPWdcPGSxflF=xN|YTFML?9_4mtvKSp4&;Jeja zu34-3l^h?JUC%8yOO0Z_)?CTd{Cct8@G1?r(OmLtxn`-_th+V8QLC;NyxMwY#rI0C zw^Yyh^}1WExL(CAmdnjbHCHXK`VF5aSG`7&zk8`T=hc0lUai&|+ZT&e&eyBPuW$$5 zW6d1(-PkO;3!_&I?(-{Nv0nGw{nev+OSXbmXE$y%N5*)@0Ex5 z%^u6lUUlr6DR-Lo*8D;DfJcuT>l&3gKVQrhE551hIBhL^xpjBPiAr(JuhokbnyVJP za@o(j*X$g0=e;GbK?|!j2DduoZXccTSG`)JtP!EQeYQEKeJU0{>dqBQ#RenmF8W@s zP^_%DhpP2eE|uKj?`3<&j(SVgn%AiEDs``zt5uimc*WZ@>&AL`pIg!Z*_Mm7dV>)< z);yK>S|hZ5JUKwC6~9^jehts|D`E&)_iCP*^$=q#-cqsT7Q8jz%~e5^;!?B0wEH(U zy;5mit3dtqOLl;Z)rtmKm(Emcr-!yrS6OTI)oLZL+4Y)@LbbLR3@%q$Y0Dls9%BDq z_X&rtKfZtd=Slrt>*^ZOWIt$bo&F+H%lXy)BrU(_%O8Ojf}TixILmD%U+|R zVgWrdU!S|~6{^*t<=XbCJTq*_%U-uEDl_D9C05T|Fj+0a({{4_h%S|*$_FlKclFZk z;lHV+dZPw;*t>nc6P>M#&NeH|cp2;}RdbNo0L?-&4c&HkEbiMmsX|ay!;m|@=+<-es^+i3Qh2~~OI~e7G`c;rX%xJMd4mUegplz_k;Uji)U*nl zmAuaz85;Og#jmse>P2tJbr)HP`J%T{scMK=nKVTWAzgIKsp22H~ z5*1Zfz0u6C54i{ZihtdzUu(E?i~EM%`Pl>Rj%k0j;1~0Q?n1LtSzn_UgYNXypnGCr zYUdzj83M5fCQ8I$wOJo@>xJqWF4c<5UU^mgg=Sce3wILR@+`f~m@jo7dXvwT#D>J4 zRz8l#7+>N@+dE zaD#T50AYmTgN*-btyuPIMJi-y%m6bL#Ri16yhJzaZhh6y6_<-SDr*$uLWsV@uo}f2 z<7nPw@jwef$d<3sH4g*|h;JaI3_;mzEiC12t4%nvR#9aogNRUeM;UiB#(G7DE*l(H zi{ax({B_ase0{Gww^2-j@wZ1;4P_i-Kp1nWx|o=#^tF0v`-R(QLF;P1nbR8Iz8Eg> z&E&8;i>%=;*I-Gs+RT<@F0(`o@*!jRteIxbV%ciV&-iDN*6K(z-HCd2mxaoxN$zX+ zq9?dhGc&VSxhIb;?w>#Q{Mp6*bMExQ(dSRkE>11Z9=poDX&{GGqdv{f4D20fRu0z= z9a@DIU>9&8Fi9fKGb}JF(6b1atK^_zVCIlJV+l8d!#0C=(e#L6bYc`| zij9KnF|dZbXq#bW@486GkUP~P8;nS`xx#KhyBSc;UYz&Z5rB0u6<&?mfU;(g95gNgiOUuIa+QA9yj8f{ zqNd(Z;7G7o)-x5~&%@(`0Z{7=;f+ydu}u2XsGA1Vn>lR^mz$+vUEwbc|kb*Ohd*c{A&k}@pR#w;*d175+Id*A8 zJTrV(2Fan;pgFz1kqS=CALT)jy*hO7%$YMo!Duk$!vW=i((RfUpU8yk22DUh49f)U zG*ey9#Hmdtm?YK~j|Bb6AVUP*$$&7Kh@a9JyZtuckkVl%~ZksY0vemg7ow7*t# zk0*;CV7I=ynrLZIgDQqccxD0ku$-F)AZmU_N&vB)8wN&4MzaHhy9Tpk&wCz>3Vlbj zQ@N3W!HL0amU1_%(z;eFY$KY(6-ZojNjt#dkf~@FuT`LU81-wx=5uJ6I^%9%|Ll0?A=VDQnNPM>==ZG5{U`KHaSt%5gS&4 zaL;4<4dcx<5uu=AW<+3ljA3gLnL?Jp`AxAszhuaQ*}0f)9qP40peHe+IpTRkd{H%u z92)h6NVP^G+SdfCo=91-yzIk{5dKK3Dqd;uNuGiT7RUpvp^3|JB1`|nxT|%`~ zGP_;Gp`{L0n<`qhn+mccVGTP*=MSCSKb#$zKZNYF$N>0j)spREG!$$nPX|&k1z2C+ z>MHU>C`xE9T2lblE`u|zH3`l~+=($Ro(8K|nz)XjB0#Y`E%fhR$Ha&W}Qau9(l}dC& zB-$(p&(s=Q)%9}qwD3#y)`}JL*(>F$kS2EAJfAPa;K0Q}BC0B?-cK1YDpjlCp-6`Ix0K3e zCx$0>x#jX9I(XbGX?R!_+*x9+FgU_^zG-&NP9_#>8@l3D9CaB{B^|wVR_V z(tm9&ob^((vJ%{2?}#L>mEVNFl-0Ygu&RDHyI{>mlon z?`F_}EHX|p)+@Dkb2U^EX(O>x!j{0x8*X-VXt%pET)I*OhB>p7lk@2fNDix`%=pkA z_n?XhwWcLDT2>_5*RL~ryPEXeHS+0iVhPiO1(YOct_$y*}CPQ~U^TRsP{UV)2}}=lYcg8h9^JV;*6q<)xAoen%e_^<-K?iz zKV{l(RF^cflMZ_Xo?0xGWGO^Zr4FD>w#XalCIU^jQJ*3g+wHifS_>s5?=U)E38HLQ z(I#34Bh&k5-5qm_(>u+WF6mLU7qv3vQEgyh^H(ov(Kt#3YV)a>WMy~$k;*!zPTx|S z&QRB(Y77zUB##onE*b`chFR=RT#ZQ-F@cI_jrtM+1|iZ#-l|DIJ!_Qk*yq?e7#_~J zw5`CHQd$JTid`+qQ5t!ux)rIk6p<2B>RzLT=hdP^$-BL4=pow%@4y6F9pHNVhMV_! zr8>+_N|!YnT7>3_+<*jD>%tw|p-@94g9F1e%MOo`!r+7X$TAFLi7|T-CZbR&qIA}N zfSWExkfOG|#_C}<60Lnih??38veiUQ4UH;E>_r6!AzwF0Bie3Et%0e;b63RPk!RU5 zT0U;1Q}x{{H)EZ^%H>-HXBjcInh*A&NxKN+ADUqj-8Zs9Y~nRZF&P+shAJ#+PT5#b zRk~8fQmW2`8D#s6>2#d4287*=3rpr?(Jz!TR~7vdyKE`%tg=;tZnRxco5bIt$vPsQBN4%f#|Q6f@Z6=QX;*8SZ*YK z8E+BsnZ+lx|l znY|hlLMeku>s*rlCUA3ME!d^nbBDSL1k!(x6EHHU^R!Ktjd3Gx$)ckxjQfy6Bs#lAb zcc|EypC6a9nb*QcHnn(-wzYVftD?eNTfFw1r^Rdc%;?FPi6i6ZYw?Odzro@~qt3_T z6)M(bZ3unOM1ER65yloKM2jp>2E!v8RXD@JOhOsVT$}wxXKxAZjMyzozX@3nw)(*#XPQazYAbm;kzn~+sK z$8}1MhllivhK9{(cGxtEreo=^*yOnX*?r}Vxx?zp^F zb*XNRj9Rd0TIAA4^{>cP^-X;&b4??sCsn83ua*{hlX|9dw%^N?(il`}k;Z0)(zd5| ztcs>)TfbTw_R-d%yUD(4{=ynGzN_>`ZJnnjz09T^PvL}mt5LG;uLnI+|1>MM&l}sP zagL9X-tC)l(m03Xq&X{dRXw-kVP`>eY_UwEqVdr9S1G$>%5Hq0Fiq{WBW&?TbJBi% z!qHQ0Rm$jnbxy0;=QLwU+*K)!z1k{l6?SP>o8X&fv^DpdPt8Djb>z8f`zf3i{tWQG z0n;muhP@~J&zSZIKYZ@*1*W;Hk+!oa%&bwS9@H@79CK!al3C6T3^`eT#+=>GFzw8l z8b`UVYZ*tMMx9-J8xNkJbr$%!%E5}1o!B*znV1~84o+mOP+UWc3g5>*H4wNUOf8`# zTtj^To#MVR0L&XtUfI+_GZS}=wR9D8AD@?UHI(yXSh{v)$1{mW@U-rCN^ylrBm)4Rq7 zv*Qnhjc!ynx>>Y#>@5SKvVb&dc63}qw2~i)*%});g)fnqg8mh)6r(-Z=fgZS(ceyU%Rn7zIK)&>&r5|dGEw{eJC{`%=Tp&}|0pE0BGv``d_7!x*%);N=w4H)g5 z8FWlc=8!9AOH9Yn4aP)o>&0TN-b!c17c} zyUx?BFn!(ChfXd|PMt4}k3XNL@oH3(#xodb)y;u`gBUaA{Q;4aq1t>Fk%G?YYzPO& zt~ub&4Xx&v2kv)g>v1V^FiIK2wJ4vlFj)+;-dsDPmAO;Cb@eKS>|$BQUT_Ees$rYN zw<&2_$2k&bGguhA zU*+tMLfZWuL@t_Vha#Jcb|%td`#dErxb)!CIK*v35BZLVmR-A;hS5Z5FRrJg-`6fh zdT?v$iQ7x6K`Ge?WOJz*Exok{$wDOyi>x=+IwJc_neVd4G)&2MyM5UtbU(gFN?Tu~ zt54crw9``itPMxKlLbcOFonHnk#UGyMLJtito`MT83Su&39Uo6SN6#aebWrcIu_oq z^FG<1G=Ak^tYndpbxmUj`cob;-|x zyMlGBgW^ijzk&w?m7Vb=w`X9Rh9 zeD~zY_{0z*)t;g3uI!$XUE%|g+&sQJqx*z{p(G!7?c#1ILaStZ8&ywD>Lgq5naGUp zndBYaN!atrV{##y$|q81XgR@DJ!_obG15eyz8q6BK#1eJBtWId+#K94p0m5y zP_F4!9vSnmJ9YKBL$Zl>Ax8JlqsPuFgl4S%dGxkXvdk(E&()eeNB=x}|2%sCJbM2; zdjC9n`}64AXCn_4L{~}fX@R%C$~h(@S=Xb87?JEHnb!&fQg8&$yn*`*0seF<%~D_l z)yNknlH!wUlA2Ex0^F#LcRMHFQQ9CRhoZ4gdz@dFW5p=TA)^%U>gHbQkiOUu%`hp03hR;wfZgY|v2x)u5GN&_LkOlf#zO_vTR zZ!|ITUz8?f);tw@yvpm-dOBLIg#1s_8WyX=>8-W$?36A?zong#j;6`GLhoa>Nt!9? zOyp@Q4U(+mp%<$4V@5|}%A_?up?9&oO0AEx-l2Y|*YX;b2Xg4~YsV>vPiv9)%P&-= zG!|1p7VG{+?mE}h&x_ouDIYx#GbIiAa#9_jt6;~0Hz z#A|A}vA~-71>?AkZA4lsbm}kz-^{L&2?5M2ec=%#hR| zQc;CwD^3KYc|*7z@pOd7mX4I_(Q`6)wVK75xN2$51lmxqMarX~5+cz=qFUr|kL;~c zu%)(^O^y}C{rQbiO?os{DU&Ten}h>ty;$R+AWP-|rGi};T|pSiuD#~vD966-TtWLI z&B}OoWNdh3a$+Pin$3=(RoOi{K0$#%sS@4cQm9E8<8ymWO5zorG$p&cw?n3P?9miS zOJd9rf#%AxZKk08jM3nhDnzeurwUmpa=w~-p(K#5ITsLFcoFn+7)t!^hVeX2zia)93sB;3F&tW*uBtLW5 zjGkvsI>-a9Xe>xK@Vmeh;y!zMV%RxH?f3Wl!#2*qMRndnwyc}@U4$>{^C&+$ks*ta zWEN}d5*L4oOUmvb+RfStBdROb58gwv(#9kGSc`-X+1MaJ zDu#1W6dkC8)ERcb__wm3v609KFvVhxLLF#FLmx~Ik@&XCc$Z@Q#w=WK5<-(mN^!vx zU)IpU)&xDXo5aLmum|hnXrN5kVN57_&DxwK#;UEL0cNw&vu*Hn;hNd^$#=0K_$MUVuvJpn2jeF;6QO26M5TSEjc;GP%#Ar71?)@oSKN#vx z>)BeT<{#vGE#!Aa3-FDuOVWJH_h|=Ou;bjb-f1c~g`BVG2GUDPUnUKtK3fkfjgj^# zp&wXFt7bjm7T71)v*0!^g7y2FrG`1qSlhS*+K1>odzJbXQ6Y@4uoAp29Bpaoa?WW= zO510(hT-Y*2GIhfsg!0>des8$la4byEw4R_)wWi6hu#(XquF!x$^KYxJ(PK&EZ*0( zF&KSaTZ{8XL{@rBnBBh(U7NdoCh6&jkcD#`o`Hx(Qc&8!K{$&Otr5p+8T-O`x(ePe z10jecsYr1!`A7wF0VGzX0t^v9s*GNH0atDlUs-t_2rFZwoW&d|gD96gA37ys;w%$k z&8rZ@mHjtiU1JGh;vtj>M?@T1q6;HYlB0kmWO!6oh)=1EDr#6N99hra+Pju>UbcGU zTz3-CL|bd4x!RSradP53t&NAKcP&oL9Xxox*2ehrX=|ez)s3|=I&;&>cNXbqBTgBE z31)|D6@e;noOF-L+dpt@vjHGhh}>pXHDMr#Ko}40BicfQ^0~KaLx@L#e-*W=?42fB zA8}Y&_U$5JB@NxOgtQBO6%V>DJ5O}PDy1gg%!hafO6XwS$O*QZAx?z?#CtUxYI0fO z$)k#+?LmS_nZBBL#sZdywjpDE*?~WClenvjv&`#ch!0+B!s2GoLNTChn0CsrUkGyV znMk66wTJ1lhvg0t0ZbuJ=h($~>+bdF%j^EK9y8hh0vnMHa~U3<(f}HJi=^1pXh0@3 zRndt&qhcd!G!D=dmO>tUV?Iq_NxjC*Mij!1GO5vycf7srh_njs)H2&a{)V=_!|Yrc z6P9*lOv+|=kBkqmdgW8{(VZL`8y(6{?jFicOis!bH?S#XC&wl-dv@>H!=?6iq~LYt zRNG-n#Y{=dHNJP}V;gPzbZSSz=>uHAXWY7syS1+br`!C!?&n2wam3I`p9$d)?=feA zq7)jv)W^fE^cg|+j8#y`jv?5S4|Dd|Yz&ZnxJH;&V|Z+In1LH6AxdeOs9I-sGEXGwb6+pl*URW|dHtb-v*r5W_(8mT(pRW3_pNJ1XgSHzjc2WpJ|9yWZVz#?N zl-5%fd%CA$lc-rc9kF4|7S`zVB9-%)RjSOOaP?`qbr)@#)gW-+%HW0qm-U3ZqqZCf&LyC+hsqps0ldA z>I)C^^&^Wu2qTNS+uLX~F#{^Ori|P0>qJ!Ob*wZiq=v>UtI)bj$_N`|r6W@^Ue{GL zqWA@zhYfJCarDgm;_UwJ2zq3+@JE} z9ur$ur_}YIQxGP^xDrhVhxIw1A{(p(OnbV*+S}l!)@8}~gT>^`J=zq>2NKECIMV3E zFd+dp+F%rCmOJ(c+wSm`ysR-YDJSWY6+**<^tJ>WFr0d!vH$&vh-^7w&+eV!Tfe+m7085ERbcpK*$EdRb&f9Q1`f!oCxq6~o=fU=lgSy(4eyyL1&F19;hy@>T}Jj6-|D@GRJ9`N!(_55J#qK_$%l*tqHS}$X=s;R&$9AVpvwvamXU8P7(8PSkJ2)|L%w$u!* ziE_qye^L-D#f02Y3HpF3w50Gc_OgixYUYDux_J|^3HhMPoBXp1>S8J)O1kpJNni+q zo<;LS+}2KU2-(<%q0;0hsJ60)Rx8TIJeBosgS~!Yc5=3M{n5UZv86sak&|MR88Yz- znU?QeoV*~0nBtT5^CT#aTZcReKdjO~KPLl(PxdmAx_(ZEC|Pvq32ri1_H#0bCl8(C zjKsjr?j<*voDB0s$Fcfi$gyV>2de~vQiL@wD8>sh*=+nKC^kVjlvZ0odEv`qt0tz4 z33;w>(Rk}zGvDLEa*?@622xW@4n1u5{YDAV>&Aa^>d48`!I>@tw~Y?A&$$p>?@IMN zdcNn?ch8KToS7gaIF547S`iD%K!F@_1YJ63PQb|hVz`M4r9N!VH4HbeK6PYQ{p8_c zoeQcD$DiMDQiw)%b5dwNmCiMkqlh{i&KJ~!`EFJ?J;Vef@X;g*$&mucD0}kj^&l}$ zAXBvh{h2sHL4;c>vxhm@g(%XR4OU=8QQR;*a{& zSL0Ka$g0YZ)O#+YFs`X!EQ&py>0`IfYRMnC>XP)@0HjB9OU)vl?YKLOG z+MM9ZL~cv;k^xIGRh3IZaaWbMN;TR{Q8jbj=3$Z?VKb;JuZG@UGBH_|e>~57m7BYj zUBiyO$8|-V)%e=%6Y9wfGohX-W^9;q%I3*R&rj?A4swp~3nI_jHYzJu#o6|qnu&v} zaac0#QGRvRs`qJ}6e+ez%T%{=RH+4vrbQd`Pbs6ia!}dq>#9>Vsij5Uq`V;-1KaO* zxz=q@C3S9{b3NTxi;K1fjj!^ptF7~9?9?WU5y}Z)p-mP8G|o%3(#Gl4+*$pXa6-M+ zDB1QakC%Fs%(10;4_Aww1C4Whj5OzZS3KUZeTVXRsi$^46pc8~ca4%F54UjL=1*HP zWjDT0n5K5x5w>{Kd98*kS+!}s>U}n5aJYJsxEuCOZB+!}CElf3ZL)ebqc+O1a6gPm zoE}eQHnaT{&I*5QwiJth%D`eJY*7Z=Zsj(!Sy!gHt8urpD9lvc=6X=WjB|_?ZA)gI z5o*rzGv@3jlgtwRQjMcr*ERcTlw37gXFPa*mRQUS&Q(zj6WpDk6DG_|6I`SvY#JiBmW+(c)fyc|mXl#eTabS9DSZBL65#8Tmv!h@Il zhmyxK_j$EuA}RTC3c&x3k&cKHHx1-bOk_Bw+o&TD+%RC@H8z+XS1PXb5zQb@#pvjH z9#$Sbas2SyiXidl8KwJKm49B$ zDQ>Bf?RZ)@yiAlYolR1?DV2#*yg%e;Hjexv{C8)9$8_dNmeVLoyz+O6j;K8=)NHwx zeKNzHv)nP!?X{m+FRwPL95TZNCXh?}$(Xbyv&PnbGA7?w8krz!DZ7t;GNyhqro96T z1<#8!^Yci?lxEoX^O-p5Er_Tvy@=VSu3a^U;s^$wGtL)#2%0OujJf$)Ir-t1QL!g( zZJ`D(89by==_R6~)=hX`f-WmlQoXv2cc676R6Ox@ofcAEv7$!DgU3g`9@5q|uWrJd zn*baoVsJ~xgck|{EK5zJ=wYw!D;BqVs=1;-=@32f?S0p{Q>?A)lx}tuH?gJ>j8bnUQ;cad`N5(cj){4z? zGP9I96JR%S#s$Bx73yn+I>qbkr|52(cpfxbp?&ziOEnbffD$>pUCxc3k^3%s;ST

f3PL^xnN=uIIdXCI-xwyx!d8w-A^iQtJfgq!i@<{5J zSte$L3r#BG&>K4BhQ23}g1Sl8lNI-5(Jz!ToZ~7HrU@@0(n_MUvi?MPrc^7miwtFI zPLK^j>4J(JV+wNxsR=8J1FD4Gd9Riy;bfyw&G&q~b^6fp!gU8%l;o|814g55kGGnu zeIKK`IdlA=_A!!T)9ORXKOgg%<{*$Hp@29Fxlk1B0lBVPvqt=LzgJ zUQ8XltA>pBHkR=or76elIxJWgv-kaxx_@xq+Sg>s2dMAI)YoJs{lfmuXtHEqnp#?t zQP&utI0-(@`D1bd%HFGNsbN$O?!<%AFXd`@-;9lpjPA;gXA<9dC<<5KU7)lFkCN&B zj@vmZ3+47Xvi@4*pp8kgZY-89A9DC$kx;+>@-kAT4)9}Fv?fb9?W)(z6*E?5#Gl`- zull(nR>!&|KVy;%UC_x+3P)ZJY>$RT+r6+DHcrg3tOA@{Q;P9cbTOSfYpxXR0d{PZ ziW*8r=^^(Zt+{U1uQlAc#eKu`vj=2O^*CI=TFfJlHgPr4>9WDj$*tWT*B&~ylaNw! zs#gBWyjyAJO1_d=BNLNUlBg_EtCbH~th}N3DB&tD(l}uwLg3FLiMC!;By~d4;XWPg z^@IM+El!(Gw^F{wyNs>1b<|OEbSl?eG_H7WH>^pS(ZErg83|7Gn~=@4t>W$(d4|OI zoL}>oBAmR^r}6xE&vvD4l8+yY%nxhdu3wJ*`$m zA~qX`eh9aI2)BL+w|)pW8J8U~Y#KIH_+2xQWi6|+}AisqCJt@=_3B-p24sP6%Z)}qG@btY-=}U6h zAj+6y`GiCW^F$%x%*?Nr0`_gb)k`|IU)Aw4SE30riP5o{R+?Jdx6>FHTJlDYBlpN< z5)?w|4RJ<#yW*NToNr>&N#0tdp{;fpX$V2cNI-NWc-eZauj*Y);*n&d>`z7S9J197 z6VHoPNoT?eIgM6asW8kuZO1cJr4h9d*(&*r2^S!7u_c6>khFtt#VbobQ>=pON(pld z$J!J~-bNZ&S+7l*pa+AJjHw2mzVaL0;!&u zNiu!Wb82fiTjrw4B<&@{w-pn2Ng`w*zJ>B+5}R6?8xq5cA74188#`mwvgtvHhCZnU zMs?PT{hNFyJ_BX%G^3=BbTz8Vq;)zh|PPlwj>rn1CBCuHHL1`Ew@aV5RX95 zuR*O>8DX>ADHT~(QtcBo(_NC;W6964E?WcREvve*Q8#bTGn0Qk_)I&XM;g5nD%;~V zKLbV5@o-#T&3R``AUjajX0Dr=C-2No?aA`~GJ74~U01e<;raOtg_ymtdxjn*mXTLs z6O<+ZJ4*gqb-7eM%O23Glgrh!#qh#TDEQO{iWA4wAQsDNjnmpPuqgJLs1;w>fdyAh zB!I9tETUxd<2PtR7@->qPgEDWS5YuGi>BcTToH}MeXKQ66SEYJ0z0{8{IhJA2`T5Z z>o=GwbvH-8S;UqwZeo~Tt->^dxGy{<2Ehu|2r=+E-pW$qTfJG&`KwT5V%P=dBvB!5 zwaTP+0p+GldRb!r8qr{{fly<^|yrECw!GUmaN}N0? zu>J|es-z(}SJ!>cZnN~4p-WH}1_8V@n|lcep*6s`Z-=i-HN? z7;p)D6qFrxNkmahAXtV-w9{xQLem|SPe(yk4M6HOj0&8cFgyCIhr{{Z3l^L)rFk*K z5rpUyHKu+Ka|9oYoaDC--$qM;RzTJ|`&7K*ausqNmJF)Ml^B-9RW+@}@l8v#(pY8; z#sLt}e$k7OLiENmZBuj5v(Uys%_1!gln2F4&0mqiNDoYE9s+b2j2wb^z?aqa z8lS^kSRzASulZ=ztMLqZZE%pDphb))Q4>wV8`B!9W0e@KikQwS0ex9q$(D3{yTO21 z1A2A5_s0DL<%!eF-PuDY+uB3T)$Z(}W9NC`ao^Ds*B_WaaJXlWw;<+1{P_(r7id(+ zm`gIV3sh+JEou_npGd5PHl+>u1*`H)bhO#5s-NUDb zil>VCA#~FtLnGOtk?~QA%q}ckr7~!(CnvJHa>~}4Qh)dO<__xdh3u}(!sssE(w!>!*vQe6ltM`LwqlukSGW+j7w>xO2_InN<6(^-tB~8EdBtH9Aq`kkrkb zM2@1r&o%$HdX9v{wOTP3&q6!rky?hvCkoFrIqMv+bl$tM>6em)rb^fWDxO!1b>}d= z{aTzx{BpAFLXKlwqC?>s>_bCtn{lKuUQr7xEVd7JaG2BB@t#ZaPWmAsWod?@FOxg0Jg)nuLX==&3A#HaF9l^Vk1A{V7ND(q*`FfM+wvg5_(5-d92 z!l@ES)JVPbKyjr>QssE-4p-}6Nmh2iua&%W=i-$e^pnSDz4QZ%`m z!%=Me_GuYm;$uAzx7<+aHMj=F8#n`FOg3=lHB;p!+>46wCLT@C!$P&OQf$U%He7Yd z%BE_kn@h1$i#TX3HB)6ypY{n}7+*!lhOSK^$yCWdU5__>Dpg>gUn-WWYb?Lm>o!CN zFbCl_D*zqN#qJ--T$g@Nm9-mRN^=_uuwJWcu}T)wB?!1&SoR?kt%b2+Q6K^Rtyihm z-J?}P_H0^GVom}-4iBYAXWFZ=H%_Fho*J4?wQCxi5{tw=2v0CeD&D-Hfh8C+h9`(Q zRE-a8$*c9`d}{S3sAqZ>X1p~_d@dqoJvr@1Qw^H&D}`#Qyh-P0$Y34EiX-V7SXZ&f z=QrFw&E~|5ubT8^%~aXz#7Sa$)b>}-h?>XVlE#Srb)zeYuc!1|_T#X(#L>(J=uxsg zSEZVKpjaxdt|ss+J=qMb*nS*o_N8jEw1t@#iHSA+@KCz(2eBc;Mq_1W(siLeIp;!c z>hT_?hx%X>WU0hU98A?STlc)v;Cg&E6uBeLD~@=j=DC8xn8#b49)KfN!kZ^hFg@N! znn`Ms#=IjQ%bb8cr(+OGv`&7YseI>Wp>Smnkp;t z8q(dM1~x;n3exy5X+U|vVnsH-qt^IF#mpd>9v}Slx}~yp_Dt)f^Nf?X^ytqacHupm zV5}RDL*k|>Gge8(Pt;Dd4sODFeAMzk1@s&$K|*Twm5WX z!}2}ul~A|Dm)*=zdI(jfn3Rdrqj=m~^*Cu>#CxI|=lbcvI_|9}#NoLOOvVUZwJp#C zjUY6~o$*RJ=zqNR>2bIkWzpZmOT^f5@i-S7vh-q^hfXC%B2A)1!pMq%AX{3CBTSm$ zEuhazOwQC$noceF9y_2$A=aEU2_}d&&Nu7v<)7}{!s(h<*qqcYVAohH#zmId)B;#o z#b*UYYf^qq6oOH=(zyTRj0-VKl)?3-5_TNFPav(okDa4s9KYhvUyKA)}$x1;2Wa{=GPv z%^tt@)Fcb3iz5KzZB<_8YF8?+?5^`vc}-t;^`VoClT+ub@`^v7R(YvWNtG9a5h}P2 zVuMj^eddoHJu5~w2(+Z^v)Q7{ZT*@9?%dF7etF=2$HwtEG71X`SC4AQTV>{mGaqV< z4$Mqh#LP~{wZ>8;gPBVP7p=i8hB$+yu|Y_Y^~qWiumi{iu%hmxigq^|khmDA!c2k5 zK>6$ztqxroaZvDD7fH4w#&#b{b?ucTJ$wZ4+NucS^2`F$mIUrBA#GNw0k`!JdHmDKUQYFgRVS5gn8 z6)1fr_1=NLl6oMmZ9xg$S5o(t)bj60>HGRh>SAe~ap^0m`%3B`TqX4b96ot`&}gl< z&sN+y&SvCtRqI5WR3ArfwdBEEl|#RDe8y2<4*BSyIj&)RPZi~Zqa8X;V7*Q8Kf_%L z2i4#ZubiNBR>D+)s_Wu8cEMUPcbNR?8jJNCwJE9yJcCakuh> zvNGlkYWzQQ>s%q>6zWWlbX0>=M~bF{JF(Hp%Re#xBKfJt9Z|V0WvLCf1LHAcgKNE9 zY{!#YPFz&!afOpQ3D5Wy4qdMjDn&KpDH!QMdsT9dpT7QF24Tg1ktu+@ zwt<5_2+Qdp;qsDB-W-cU0K z0YH@S3{rFg1$$zHEOv4>sZ~r4W2{G<4>Mt+EQwSiPUEm|{PkflI^VfcB@$WW%uMT6 ziI;C3ow1wQ4(U`t2?$us66BiWk~;Zi4w0DiXQs!Q)=|qXdyFwQIEP~_bXpj}VWLES zy~Lo%g^0%MtmB%8SDnt?Y#1mkz_2?lnBxVK2ITOpfd_It4%W!}BAFmfXVGwil*jYF ze9Ttt8pdR7-w`U&P*1Igcv&u0jhoi`ueqwZd zY;Cr={ zlk7g`G*RYwlUeB^t^4974)y^sB2)Wei&PnGmSHf7qDmm($aE^0zjvQ-~{2p+g=M3^YEYpuC z{C$rnEb;;Y62WzMOf4>QWN#6A6fg@*IuS(iiGq2E`mjj2neor+Yy;o>*`pFPrXMGm zLx%40d3zo@?hoQ`Wj^lGnXQna zsbqVG)>D%@ak@PdnbAG+km`=#&8PCwwBo=rWEV z2d}-3(j9GyxZapr){gDX_;oMtczcv%0qMgiWQU6TP|o5{o$v>L@MVwEUg6TA?Nbm# z!k-{FAlfZ)sJb4UrPW5C&6o+@o814HJx?ZE}J7vY%V7{svo2Oy$v!^HRqLt}&WVZw8T1;o+3h8oeT zH#}4qKRA(izdtSM=Nhj>*-^u6+3D=8{aoY9{oT(s{zI8-T&Gx85jxP`Bx3mXBl>=? zBHWpXzEki?#da`JWWxQn5z?f?G&;&PB4PQt(^BX!p&~tJ_uegSphUur#w~<8V(WI; z7)Yq(YynDGqzyD<3QmjgX!fYYoZ zadOF81r*sSN7NoRaOPQ#qYzxFlq{s+JH1D;t?jR0TbgNhnL%QA+v6kVYFE1b(cR~H zd}Qp?#?}^7* zAEX}X0`*(@&!uH-nvx}xVO?$V&AY1nR!;UB*|&;;_R*#ED&HzWnPhva+_klanN96C z=`q!7m#F_L(?E}@Z0@Q@?TQy~r1Ky_|=|a`c zRX!;LU^uGDOzl3kYE0FaDm|<5caE}op2(T^tBzybkv2A2(+uTwt;W4_RR0c2A9rTR z53iBcI8M=$5`Q&+s#i0m&+4Z}T6OC!HXl51x3<289FdXO8lnQ6ot<0QX_s>~zh0@b zky#;5oajI+>xQxOov8*UYfCvo3SlyO=oA_H!G;VN3fvQF35~hOC!#^qkT*heLDgi> zM8C5_&;+)rhLXhg6rC{IGwuMUbbEb&t69YgN{zKBq>d2RhsmNdAfed=U%;rMMK?*Gf7-cvpYMic6#{ihRWwdb(QpnxL3xEY-Mz}}k% z6bs3mTdfc5z2Sxd^n<$w9*Co#yLH%Z7E_Z?*6=2~Q=^?x-f!7MSt}i~eG@iKT2N`- zWz{n)(ks@^NaG|!ZxCqCjCK`PZ@gfPWfeBBW!O6tzNWHx)xOgDN#>;srB8OSz1ku-RJ_+B1mXn#Fb2YEA}s;P-6BjJAx41 zMkZ<2iS&gB)UuzEk09c)arhvvmx4$Sxd|Ua>u@JiWfTS*0Tlx#W)>rK@=@*~-jeo( z5nCt1FR2^|@l|3|Y2!ge0>F@a2%nOw!g2d&<+vJb5hWMkKvyMH1~-XG-tdULY;+my zGIL~KAjb3%(O}IEQif8LP%C;l&IWh|(C`o@N6E1&vTx#>zDf25cvRcJmkNt_97z+#=tqsig zJC0A!@3h$rkiVDMSVleCBqI2ZIcAlpo=vV&KB5#kn_*(eo@p zQgl>S28l2h`c~WZv#6!2uz|EJ8MU|9H&Vfg`J-3@!aOf$&YT$vMuRCI4&3DH!(+Q9 z#wRl2y1`MnK^!QvC{|c+nSkhIs>_);waEmN#M zda)_}9;WWmzFx@+r}0V_{p4tmrJj*}KokJ4q+)b|^8{-?M|{lUWwA!F=aml-ZBZCd z9|BZ_*o%hC879PV2mv8I&BML=uQc~uHM%~3dN(-Ug(_pBLsXyOdRMB9$%*rnBMwdP zTAY|Wc<_AXi1_nqIYN!?q_M7vE^*IWp7bdv@=g5Z+hmd)9{XZ&0pRbvSeo!z7*AuOtAxzZ<^0dvMQIY&Sb;Yz1kK8*U-{vCaCi&HAy; z`mxRW2{`%*IMTb#egck=-KV3@^kbW`QA%V#=*KqeC*T<9C*T-JN32H&+E2g{lF@W3 zhdHuT^%HPJo60no>BlyU#Wr&uJNEV&G?imjJf_F9m)M`0DS!v%kpKCaJY2m3$J}r6cW1isbden=j z5C4c0Hs+@u^1%4U04fr+ScHphR+koEy{wwfa;C;aF1AhSgDez~&Uw_~~zUr2MeBqnGKLD3NdA|9%yWaVq-}suB+;rxqQ#YNt zde_w{oN%1oA-5HA>%N7~$Yq5dpLo)xg>O$DU~EM4Mh;)fj9!L#ahc;h!d`P8_s~;n zpZ#{?QQLm>(!%q9H2l8rif!Qos|6K%qAj=a`lW?mzui}T%00t4Sp2M}WHk$$6guBDM?=ntPsENO0&BVWWg-V4R0ylQ!hen*%X z&2q4+gw@fiim!(7iS632{UY#cKy$BoAGyeJ-VFR2@SDKyA90+0z|Z}dJI0U+wX8P zGtRcTKSUf3H4|npw!Bbtx-YFs3rm07+2%ZfdOO!GCB??wU$)Mc*X-x)p3;=!yWJqJ zkRhe+7Qz@yLmv{pdG8dnZ3{`|RsJ`_j^#*YU$Qecoy^%>9T4 zTy){YW;9^r(!!UfpAHSUVQn-jcl>Zm)ua_rIdtJ3k>&<}MZ@A{rIJd~pRQ=wV~!k& z-q*g4En3c7Mh^Y=&FYB8U<*1(P2XJJl%hIM(VRRJI0{?^+z8wT{2lPW0hjgvbYK_| zT^Iw#fl1&>;A)@kFgCC3j+*;VC;F!SYDkS7 zc1OCuMJZ|#a>sc|q89qCT2iz+UW5^By=U`sRzsL>^+wFtVx(!)*7N?Jk@qBem3m9J z^L|D0U7KP36EtV91bz{CBk(35r2YS$&!YeT2e|fP$GHx;_mlU2^0(jms`Z~fI24=L z_8(b;B8!)q-{EX+)*2^z;$_coNfTQ=VP~l8C)kaNbZ#acMEDq$RXZUpIT=F`Cosfybh7d>n8w@C4vbfqMbxDJT|! z?LYyj08d30cp5Mc>;k^{(J$Wl#oKOO<>%zBt24K@$A-WfY-UOB*zW|bBnJ@hPA z)Q$O1Z+LS^b`!6Sp7^WkmEAqTocB_;Yy6-kYIP57rzH}i!sQ)~aNB;OU1&>d!iQos zBr!hGb1AyB)pPX5VzB5+H!ZwS^SK}RFz}O4MZXWc5BN6lU0@lR|E0h?fja=k zLH1`K@EzbW>;*0ao(kOiiF@C5+q+)=uG?0xU7a#N!j7%}^|G3y{`E4yJ+5imbLqer zx*9!EqF!*mM@;K|l|)Z;+p3E-Z##h>Wj{9o>;WbL?eDGxo(=p2Z~*w~Cx82sZ~f%U zPaPlMYEC!a2czwt{E?SFB+f~?pgkJ& z-fHh!dWhO{^;24!)9!j8D%(w~9;>xJ3XB0-^H&1T27Ur~4sZnc(ue=ctJY3V?MSY^ z_TQp5zwP?p=&A+Ma=&QcrM>Wkh?htTi0#V4q3I-5@unW1h~D(TaIGJ`Xq$6s;l6RL z-niYPG{1|$b->R8Aieq-YKV;Ta?`1{+H74gh=s_#*H%;Ol^70&N0sTeUwYXHF)kyTi90HEniR{f&bkj?@2knzm?#9=q~= z-C+SUq@7Av7<0eRrd=7`&Spy!(=7%+LG$}!=5v|3(_Cr}zX6QzM;^Qm-#-uk9t85` zo3Z=;Eco!N$cVoIPxyW81@}76uly}KfbXRC249X(a^lNh#qT7(d@6qDu8v9 zg2b11#-B;td0YHW;>#Q2ciMe^bj&npEP$^*$o6K#)gx=`ZLU7!sjqBvRZMGR4HAwv zUe#99!AH;TP%gP$UZ%R%{cge3a8Wql-)7EdGuZWjBfvX%InMt8-VMA5xD$9UaOrOJ zg1{4j7yP8-yb$P_;5y)X;QhyW2k;@_cY%9>F9ToE-_LybGw*owJAUyUA8vgJzK54z zq+3y`cc8aQ$`v9ZA*YvR&_Q1-J?v%XcSsWw`dVs;Yic*t@XU7l1eJ<(5Y`nN+n1Fk zUVKAKf4iYJ-X?^IwdaA6Z$0`Pd?+jsxb z-FLqI&ez}lvb)dN58uA~)ZKTUG*{;Ex0!g@>lu0dj`vq{7MB}0j1f<^pEJ#7`#K}- zJ9Veuml)rz#$Rn?$;aE9c)q(?-Wuz=#VXpf8FoHZ^KcRH1VD8D>A*99Jg@@10C+L* z@HMt|SZz*m8<0T;j<9tT_jJPQ~Dd_X+oCBPlP+kmfq=C04&$d9(bVN9mkb^ZhAi&_f1z1<1TB_MjYq`KjQYL4Y>Kh z;7_;M;XGOM@_WEtz&*hK1il3P4e-GiI?nF^zX$w2@CU$$ffv3Ax(&P-IEhSf1Mu&G z=L1WCzS`~!iX2VMcZ5|9k|8sKk$dx0+lUjhE&Kf%X= z&jEJ>_W*wdJmih=0ALE30TzL4fSZ9A0&fG}4t(o#cinl{ou7OA=YHjLcm3R%yHGd-|<$&=|lQnx&_~Zx$hR!z-imw zG!F>dJD_p@b>L-h#Rn941@H^N=YYF`dw};N?|%UJ5b(Re?*ZBa`~h(Azp@tuW`R2q zwBHN7ANT<9LEv|Q5C0GJ$-qZ|%x@zX0$E@b7z4(EI&cPf*89;r0U6+HpZnU!zV@EC z*+0MXo?Gmv&%G`9a7*KsJRdCuU}isXh4WZ$I;GolEhSO=-wh z_!+%r)0NGved6qofo1v~Z*e$ll7Q0p620jbKb#-eIFGZJ+Xd_aCV{4Pp96mj{2lOtKjS{|JHWPkS>wPXfky!s0FMUd0PPWv13&&{{LFwIz^m9Z{vz;8 zz-xhD23`kfFPZ&2Z~~YI7Jx`bG#_4n_qXrv3y>sxHVd}4gI zq63>^0}|xtN3UiE^gBKS8u_g#(54s?VZf$%1_rc$e}nKnhJXCv!^B(s!d7S3(4&V=fzXHAh{5|k5z@xuo zbvw86_vOI9{Ilac`Cs7qpMLKf-h0b?OIJ@`{WL6sy^n0KBlXJ&dxu&T7#BLe$T{zs*QjkV^O3eQ-j~hh|3b~@n}Lr4v;T_j2e=)0 zJMd26kAY7De+qmG_%!gPkALa6zVxeKdU5f^SD^*!{$H&vRYk4psV~lj(%ouK|Cmz3pVB%_Z}huHUurb2pU2-M(6~NyHPFJguDxLT;tkhUf8vUbXEh$3 z)*$P!W(Q60hO$o>-q)t=FIc*?TdGsu-UURRg8W#gs0zUyv0f&KOz;WQE zz{`PO1YQli0k{Kr4mk6rKl;-Dc>T?C8MuHoa5M0wkG|t|@A%T|__=lMhUp#W$=w*Aiyryb z?%Pdot**SMr;vr);Z-CzaevcY0dLjw@Zg{N?e5!3-`eS=ItmEUTiR(Vw|=T8Y({O% z9KH~E8}NI;Ps0N|;8Vb-0r7+Zc)~M)Ujp6;+y>nH@&ESm*M0of<%Q{mv4v>WclyA{ z7GH41hB1qed>qq5=`Ok-?V6r)<5cHbo)gZGB)@%EJfhPwSS>)joJ4dXW9!*!{+Y?1 zWXdZqEvQ$Q6*MO?Ka_67#0bNCnonBTla%3X*IfQ3@LAyVz!w0?0RId4D)4n+>0$7F z;C;YHfG0j2FIiv$*aNIRahr1n_{Kfo_}F`Hf9;EJuiSpa?KiwtV<$hRPX}Mni83UwM{VA8F3>C3vF@bR zY~ftGM7Rew>pgng*A8u>IgCo3yYgLK%d|4qat%Ub3 z(R{rccn$Dc;Fp2d18)Rw1KtGu8gM)CMc{t|UjnXRkMJyD1jquTz!;#t!#6(rk>9%g zwYPudwIBKHM{YiK^G_YQ`P80po*4nlxi=lnuKJYT>4bf1&3<}B60_giD6`tWd2RAk zZlX5sOC(RToQ-V~=5AeTQ~Zyxs5`uU_w!%T@r{>gzTO7B9rz0H55WHh{uP)Q-sT(! zo(H@J_$A;EfsX)p0Urh4k_A5i&H0;O`CA|Pt&hCc^(5w6}SR z>fChhqf(cq>sDAgUU*=(gndoj-ANcv&6mxr94G|qpudt9VJzDd6G4L|r<-mUccB1>a0vHBHfV=N}ZRt>H zqO@UZd%mz`dz@&N(=FC&>S30TbtO-BUt!d)cB{PmYFhofk60YFryUk|-=0TlezSn| zeQyHZ4E!VTe}R7jyjkRb;O>vU^_I8hu6**9%=vTM`xJHmRwtSrD+_dx_Pei;7PaH6 z-B+MK-FNzAd-I0blN)Gv4=sT{sU?>cHr+`?^WTnIbpN&sG`Bvm46FbJpahhGDsaJJ zvUC+^w*?oBDiR;!r%?Q~mFSGd2ahKBoFjlE=!&TGYCAKh`!`A{@eyrG@c61A|m zxuLdAsb+ULOD&tx)=M{?lK#yf`VpwR2mBhD`SSj`SjxPGp=4IQ`6~ITtFZ#}8w%9x zWaoAlilsF#R&FL$Zh?HzWWQ}?{F@p&o-X2hXGrf{h!xtGE<(Q9Mt!{!E0Rv~uyDHO z704hSE0Ru=u|QVZwPG$-VmAFAawIBkXer+HbTy0Q16BguSUu^+ELLk|w2l=>r!zT` zKSR#Kcwo8IvGALmkR7TX*ZtTtRt| zK`8$mXr%<|dFsx4H#Up9Xj@3J9I2yCZe;3hMbpGMl;4*{=+`ohVxy$Y#m8Y8Q#Kp0 zI|mEMc_sHSsdOt|iSpz%rc8EZD4X3qGCsWOl}`otEK@gO#p#^CW>28S; zh5-hKZWELa0Yw23kS-|+K|nx6L~4+fZX^UmMf~=`xO(Hg_x}IuTkBivdtv=%PVBSK zv!DIce$JV_?NILa9@byirvBznc--859FaWVC5P|pHGEhrFFq$LH+LkK&&JJ>$L3pU zUI#bef&$=&fe!*+UIpU?;yNt%o{j-`h;_GeM7jI?=EyPDZb&TT$oD)raB9v#)4Tu` za3cjB$X!DUs-kKD&hI%@j5FkzfdvO^%R5328!Yj?=jcEc)?f{Ka5acmkytwv@VGW$ z&3WLXfm3v|N4b2h_FI*Zq9Z}94V2twPurpJ1G*QuEpXOYptWxQ+DXf6SgW0NHt+_D z^%s|2paai%uFEibF0Icw&&LvGaj~Ul1395%r!whD@wpLMaQ1NX0JS<|Y`$OM4pIh=NGn??KVS<&0v~V7+ybajcYcmoK zVnawGpnd_NoQsFGBg)MlY0FDNu7kV)S`HZOM)A8`iJYq3Ik?|(4`&c_Ldh*t5XfSj zP*4u!>yC|w8xRI)I{+@Pd(INNKvh#&M^;P47zCyQ=A>#EHz$-W#sjX!4gaw%3)%(b z5l{fv<%TPuK$Tb&$T@;ipqLWK3c0v|9R*&v?B1{twBq-IEc;E6Arbv)C@2AH+Y>e@ zB-H`xfN=&){{yjRk#h`dGSUWy2 zKok{4i1XR(72*{a5ESO6FcmZ>RR=>>XSgEhC@ZXol|5VwNb`3(KyBE+(7UhQ2>1u? z3*;S;=mPF6AS?-eNLbro!M6|~1tB1u@XOwxk|rP^B*F)Z@jzFB59l&Lo)91pbQ0j2 z21K8;8`uK${RjjBb>9QQ|1heCCjeQ)B9VKF2`+)u@T=NGjR0vrz73Q|BtG}`KK zTlA;i;kPXE&m#(6_&EnI1DCmK9vfS=`2{7C_UV*M=GC}6+$ zcn!zSe-nJ91bhriio0MS6Au@KgdF1bUdrfe4rfma0Ofzq+y9U?0>p~}4aA_~XMxQF zX&|^9Xn-1^SOpw72Q*6e+qd4=T@^K8>+LacV{rH^5|U)zJz@T#r(oO+WM{8QJb%t| z=)5ilI3r+WfxJoVIV&qH3fRA|vi9ef?P)Fi>^IZ(wc8XxLT#_@;4zvtTtHG#=%nB& z2_QKFj7_?+8phR0@XcOM-oQ z=m7W<5f>EZM<5_eB*1SDhwE6`1J8dCG*{Qk9*c6m2gL^(Iu|$%Vyva8b52!5 z4gMpC_LG+u5E2#O`<9vIb-{qAO5ZL$4nO>7w(+lu(*W%i0C9^t%FYpm!J^>GazO4- zAlZz9Or?p3lcN~;Jt8iCxO2GSiofJT2;_us@8D7qlOq`07jt^guI5Qy5J@d}6!p93$- z0c8uk2#AvS1$g;|fx9NB(e-d~aoh{7!5uH49D!M-pin@92o&`#6xPv^fJQa8&@mty zz&Qe&1TuwS`+F1u!~z7R*dOx3O97bn`>TE}Y>fey?zb!ULORISp{#rXkrY~3BoAQv z@9Ws}0g$`>`PL_)9@2plpr`g6w5{zP>3byoCbxh)g{PsxqB-n#G1+*BS zf`WkJ+vg#;maMWO91b_*gsUs+C@U&y>Zr@=!4C@v(ojHu|2JMnf!@_xnE>x=0lWj) z0N4cB0uY$QgCPJ!0j>jt0^9>g2gnA<11JS311JZ$xd47Y1h6{rY~WGiRd?H~dRG;| zzuv&=z~9jh-cyQF`wzdu`kVKd{>eL{!2PUe z^;=~B>3zY&P=DP5xDAj500VP9KyyBjeVg||9eis7)Bw~1aDurXR$f0VkMTq5$ms~` z{9V67=Ruq4w4?r-_kY*7pl`mu5j|r4A7+3AGf00e_1CMu{&*AbM_K5KpH>inero#D z>H&Md5(IBrf35e=eu?`p-eUb{683lU@$Y&cyiW@C{vY2N{muz;s6V*?p!q${06_D5 zTmpdR_=p5(09Xd#1#^Ay14w+E_d^PNFOM{2H)O|xfBy?w;{QYvpfW#N@Rh57@$Ub3 z0zpr;I^F~=fL7J}o1Xen7MdOW@5%yA29tMu|0Uj^%&qW!MIi!c?Ez>4kAGAa%-m3P zAO_I;kFNl$75sL^PgH*==_h#vGdKN46_Avl@9~FURR55-vnKsNX#agFN~j;N1Ka@c zTET?G! znrw8>{6ld7R4U080)GAe&DvkyY6flnkLN)x(D}b9^>?}jYWX^^JoNd!w|~zu7%lBy zso$T6G5n{ip|fA*{m-mF^c%vzKKor2{-x&ps8d?&uYb2n4b9&J1ASf%0QEE5w|>41 z`WNcko(*6Zw!m*c-hw#^`t<*6#Hf8ucr(8 zW$vI~ioN_<>VDa2`T0!E&pU-b@BH&E@bY&?5M!6o=?UsrS9+b4i#b%)F!><_aM?aeauhog`Z{IM(~&VM`#2^{Po zq`N@?-JqYLzU~FU0rUe50Q7BweSiZ%2B-l}14skN06=mf4`2^u0g{j9!RM7(m033f zpnoTmEM?K(^9g9+&sO&jy7fb5|4d7M=m97NHUICm9V+#EJ^fM34?pzhmgA2zPW*Yf ze`}|{>N!-`Us49R1lh#DJP+Fc)n@*)Qd3eve_7W-sNWm_&;X7A-T?jpfdG)cUjhgN zfOI|@AO>K2{NPHA{hF<^enbXaIohdNV*PzzV<`0A%M$0rmrc)t|dkgPjbW zB$+|i{!jbp$8~0oJ3qJYyFmQZPrt|nke{E|$o^jZC;9m;J_3>0Z}Rh9m;UaG!}52& z_02!J0<l@Q2*+xe_>^*pgwE^cn+`xunh1PU>krI#B@giH~>xoC<6ol1Oh-Y-$j6_ z_TpOs*2TAqZ&|mi@}GoJef`}%tvxTHFEnct=&!Hiw`**N|C7A`rW^lMhJU*Pa0!Yk z{ylI1pY1`wgFm+DyHWX5YX>aRZ&&=JY2PpU3rj==#qE~?t^!;G2m!bb5DE|mkPVOn zkPDCpkPiT+iv8{H-EW&i=+nbbt(i2LM?Bzn6!djlKV+ zxurKtb3;3g|DW~2|GK&V-bND9+8^}q1uOT^+CveP6IlNmS_9jI1P`2n);Rsm4nupu z4*%XRgL06CK7fE7;@>9_ZI3760*w=xi!bxSi(6z_if<4e2Vjl9LC6+8j(?&UED2Kl zbl*_`^601g{?~2(Zw3EvZ~$6zRsm-FjTn+&h#KARLs>>NjsMHRKiQ#z5v)c6Yx0m_ z|0+}i9>`lj^9{$r$W|S|@p>2m^e+n;9KwcJ@ChcCff2##!RG&Om?XiSH>f zu2O+xe<=Co^_MZ&fwM}gN-#VyJM1v{hkY4>$;0-6T|7MKb073afDe5V5)j}M5D^j) z?SYt-gov1wn23mkjD&PQ^ar-c$@Y^&J5VKGFWpCok59Otn230<$iMpd@&rarvQGpr z2OsY^Y#%iqJ~iH#r!X)u#UtD!4{GAxAKpHE0zx8UKn58oP<{ZAkB<+CCIl3N(gEN+ zaF&DzSp;N>4ryBvA9p=0cqt~0gjKGjfkvlml}*SRdzo}U?UAF$=uWV6oIJ%T3|>GH z6PJ)zP*hS@Q9Y}xr*B|rWNcysp55Eoqa57aJv_a$$eN_R$fv0sH(cL>1lII>$A4!-95d1xc=7zgX0sEQ`0lEbMp&p>+d!; zx884m_z1NN4~GA>S>W?e+eHo9wGZk8VyIns`@8_~sR;;K1c(mGY7<+z9y%^~iR7?c zOj=0;DXWmqDvdR^YddM)Q(p1||w@Y6t=X`K1 zPCT#TDvKZKq+t$O8e3yZ<4WiI_+rX+*Ly2B-1)h)*;9$xYblHf-lbUs-SA6omO+^& ztBF@O!;-UnKX|`9GrzK8POwW~)>GCqYZgoWFfWDvU7iKS8r{jNvKSgIa+agHGun^b zEDZQ}Gz(IRmW+apNm{y6TWZNBiy}R!4moJ@rk}!Uu+mo>R(P))4oyA1*1&eqRO~ZX zhm`E9!~5pKM4Yw=k+d1}YHpTh%6-Q!X??eQpQGKw!nooN&)-ZI^k3pkJ(@zczw0l`s9A1fV|>0$}pa#FjpqMoGHr9KJ84lXvKG2vEJ4A zR`=y|P0|{RUdS+|;_f&VS)fynS+WrM*Rqdhg(4z++iF=n(#_M}Wtd+VNfCR3plS+J zOXgF!;e7oW*0a)e*G(g7#y3gX=Jul!T&=ioqSt{^A<4?VSu%zNvcMWopUN$DH(Nhd$vfvp&2pkU6+*BD3_tvFHJVZJztg zBTHgcABxYoRL{3-c?a%X`pjkNeaG!^xjxRFCs1H~hW!X!gi^rVGZkCS_|&j8mi;C& zkt0rb-WUGpmRjYhyRm0pqGfk@h>EaYV~6pbHP5-7^>sKGUm;e1gKnv3fx6B0Vs@=- zKSx$~X}Nv(r*omphSOZ#)tZdko;ciaSaE(solWaGoBBX5%EnLXvCK>T<|X+-$2VIi zhqC6)mIn@1-M)vK*z#B7xS1_jH`|lcxkj0nH-Ds^962f?Hj*OIFQ0uT$tm=O>`?tN z#+s;iyEHc@0xopxshm51Hz>~Ro!MYqV6eMxSh54jYb&*VFUsfn+wRpUbI(0eWQYl0 zvp{2MPZI7J(;KozmJ6F~Rb|hsdA{P0a3R(5$sYo){c!Dl-b=)X zhPnh7X~$V^$zHF;yb1a{W|Q{3#J-+avW6E$4UZfl338`;u4ytm9UGm}e4X7D$@u9M z56Q5XM~7M5Q$h3r<&#qz9D@<8TA^s;C0_0pe$Nu7c&1dF!#HxBtJC?Xf*ex4YUU&Y z9ud4K4!=7dQUPxz`n~R%sX22*zK5F79WqS zh38eI^k`1++W-3ogq$Y~I{dBrxU$Hz= zjG}kFYxl0`_;y^Os#nhO`E^<(fh|+YCY|@o6^-uL=0rjM#k(&M_M{y+E>*E`%G;l;X{z7Xsn9`p8|TvLmTZJ^G$iZDuz6 zsy-1u?nbZ(CtQoy;6D;>uy9IQE+K!Dkyxj=qF9u+Vgk`=W)oTzG_29ax4q;%RMgmEI7=?hH#(pB6-2M5FY8JS?zF6Cl9y@Z^5MzPUw+8*^yKFIf-nlD8P&j! znN%K4u?18T)|4Leuw`G|t1CLn4~9+5man9Ui!Gj2lckkXsc_BLQKc*95Ic>YV}7mM zbjodwXh^rr3?}@el)f&8NKiYw#^Y3+fktEZhHJ=+ebe`CZ64e#;BHn z;F_EG;bR`@FTTKRHZtLndRSlWxCQjf@>qlnBeHu#1#>(}tsEnH_fUu1=*hAqbvZS( zoNEm%GCp%6;y9Py)ghr}#mODC$jVcZl3FflsgWlKo*I~qygd9O*{(^D%uO^gIj6I} z2az~#Zt$kbH!aD%QHH>&NG|1V%a+WjQxo3^Q|eT|Op!N-X75{J2ZYouI&k6ag0iPx$C@6PZaGE7m^W_-P7PEIy zgooki3A)7Ki~f1SekF4P%8mJa9|^_?}LF zn$%>zeEwqGl~?_y$V^;J(O&oZO!zVGr7FP63xfjOdRBQszd zPN@4n?`EaCr3P-QZ>L+nkoYvZtjnJx%e%0J#iLB6i7l@4tZUeUv~#fFqUZaS8wLHr zA5LqC)()FwlPq+>C?bPJ){ z3us1dsv)c7L7opM`F$2GTqi$uxdnaZ8BFLMjHYtHxR1l$s4)P`e7JPlfr~Xt6F^*0%>2Hj?Tx9m`CrAyOe_FYUvj+=NqtUz17MSnwPfV`yhIC$i$X4 zD8JRA#c)n`u8uO9XGgC41G3`O;N{9MuyM&$rQ||G4c_bw#`G1N)OoY3UfuS)N(BT8 z>33B(zrc={Z#?$#Aj?rxT<4`c9n{w0rtZbVLmtF1U|+|ByVqy&RQVBXF+9*CdEyIL zRDCseV5n(4(`bs~N(`=SN8aVOk5hm20E{km{0poZwIiNigEnZg5*D^!zK?qK(81+8&@ z8O~|M7g)H(aUhp;3WnUF7m4*xqgoGHXmmwfYDVO=Y^RRbpiIhE9*}dR*I^R-=L6}7 zMjHs+EEpy)YZf>uXPL1libR8N ztF4yCm!}-&R-z=lb4U#w2?Jqw&_|1)jiFn+TSO}m{)F{sJJFr=J~`f z*44D-~_*F`GefTE2XyDSh6xmr8w4ruuFVpDt}jia2f3xAcOKctq$tKg04GU_$qB-7^lkp@db z%k9M0YL5&$PwiQ=!qL8umSaI=gWHBp27~Gucp@!iV;+dv3A+_+JWX$(3l=NXdAK8D zUWdQor9`ND>e6ofbK~vX6U;}<^Nqdf->$Wu9dj<{dTg@z;!Imq&4)FMmf0&{uoGmG=u(PaE7@rcE|M0}-gnD3((Ok5G^~|Lo&=Qyl)e*>`I_i#lpo0FEc67Q z$A(i_w~d<%PBw+;lq28i2kKC`dHRl4BCc?0!i`LwM|%g=#;N>r`gfG;yUK5>bm2>g z5_lClO6O~FYX>fclQWIpl)jT~W?xWH!mT&P%ek9!Lc*3j+e=iDtV7T0?E=}Y0tvFC zTKO_U9yvRbcD{oTCLIHI3Crt+cB}Ix2^*3RU6cd4;zP98O|%3%j*RT`G0J zuw5tNTz+NSuXfJ@)o<`_`0?P>TKFa+zPO0zZTW1mL#rh7NzcrA-zX6+Xpz5oP2kq--ZW}RO2<5;_TXmEa~iv; z$If%zN;VWO1LVGy4)0Q_7fVYb=Yk0t@LL@QE7#8XM&2qheu2G_&{}}HpxW)-^6~i? zqx32LL<|h!Qt-fJXkb{b_~LUx7qtAj)#Wd+yVZ}M7EPB-y1qnn=%iRp9J?1iAv)GQ z?fUArvs(pDK;1h(o!I+L+z9E)Y!0b@@w6tHNPp$enE}*3=l%0+uI$oWe!O+)^;|h0bz$t<5Knc) zVpES(rjU|{csj|m5yQt?+sPRtWjI097@tfn<6QU4`_YdxU)bJ%V1zqVFF=NU`8cDq zV9>ez0_vzOjEh4y!d;Zq{bJqZBkJzzBD@7@_VUN!;H>$-cd?jPPC^_-`A^v#l`N8PoR`rW0m{twxF~&8skI(Yb_irs2dmwt8;=_uE2Ue0|Pwyk!a0{e* z;o2V;`nr7eVO=hQ%&q3GZ2+MOu2wP#y&HFwMYCMTKu%LP_Tehwt$H!6;1`%z1y|1M zN|PmJySps`1Lc_|2MR=Yb9kboap^YU>#&KDef$$2s> z!bm=_9c8WKl5Ywk01} z9jFJ*)gFrVOKR#kbcS*}I+q!-l*5EzL7mek%8B9CeV;NuPtsd|o2*%jIl9iV8P_tyn;Ou&XH20Un7Ol3H!_xwcnhy zoo31~)Bnill-V`*0wmh)f*g0QdFWP$UdBylwc$sC zRxEXjS=p}Oa<|cP#jCh5%>CC-vZJ3KCOpcpzZxq$@}|2sr|r2R!t_9$&=**LM(Zc` zyRSPnTZM;%dUQYNk?&+|r>}loP$x3%9+3BhZx(UKHFURTvh+LNp61v;7L;nT>AJ=$ zGH(}j^h7ZK@P6dFf;di51toOGs48Bn}rIGXE>xgTXPu~%{ zR5%k6H*M2X?kTWsVVqAp(N*k=R2NbytL~aiTz2DnYjb%tJ_}KfPubI`W%RlYMTa@l zO7c24q!Dg8IMQ%1SLuyl)U08Vzok>==u!3XUtqQYHL5YLF|mHy$2#2& z1$yRvcT5}wfv+m(KSGl)w=n9LP#{PbcKA z3###8T$Qp-{?6HW(r)(n(1(|B{9EA#QVb$ zqj3Av1XWuDde^cIM3;tiC-%OJ=D2Bng*nSNnk`<)i^!TLmeQ>~kv?b0{mP*tT+=XDB41eyBj4Bj}d$`I~nShRC=^%V|NkYX@cdVrEX z(^J;i{PM7WJt>dB?uEu1EYV;&WaMtnv5$I%7v9?OM20mqsl~p& z&em=l*UDbOR|LO&!srI~u+yR$&BP*|44rA}#+8`Xk*&{MZcnPO_LyoQ<^#P+Pu!Xe zZOrVcAuMU#o@^-9oXDyTK7-G$La=6HN$EKD>S>9}B@5-r)9=ic;)=YR)oKP;jVOW;JHfbw1%W*NoMGpPSiRSI0X>PbyY-HqA!KwzDr$LT`b9nb|h3wD3 zqQzC=zGbSbd5v9PVCV-0`7EDbTy498%i8YDL0Jg*URBab*jf@}U=qtOqTJChFRk}G zQBMAl0qJMRxaMV67rd#Flur{mL)QIh$SwM_N;#=;grF)>gyb<%r!$(mqY+2x85Map z1*SiwW8a=e6U){`8g;MlcHK@;Z&+)cZ1z1J9~?5VJzf_{{^3+)NHIu2Ckwn@-legp z^&R<$+S>Mrd8i>_+b_Q+*+@IeU{G;27|(41Ei^WAtv>{L#~8-VpE6tdfdl( zs8at!WxMl%?>sd6a(GK~wz2)6xX-q_z%JU~xr1bLRLmb}@DeuUJR0vz_vo}?v*Cr0 zN|ib#`H$d+N=|8a3;KHE+1&K9UX{%~Kw=-oCAVgxj`M>ToF>OC8oOcHuAS;?1P0!G;&k?blS%Tq+B^r5(vrDzh4{xW7 zW;=G?ryyN&-fE$Y%R5Z9qIVkIF1B^N5Z3D0&k;0MZ1$-wt*OShOE)D(~u@4?NcEZ+y~;mRaZ`tl0^j zTDlU!R&n{1RWxSoc-&}p>cGeTB~OQiK#!IQ^!yhX>3q_K$_MhU*#sBNY>r1c-`hNM z-$&+2>Of9e6lF`&UCGSzWW+_D+pc5u>ig$u={>aWu5!6)hG-VwU=1o=4ant z-iQ~K>DHUqd7l%f`0yfDc1K-!{1I7qM)X0|&K|@uGZDN=21B7?lH?=eo#W>kHV|wf ztn|gFszQ2$-soN2@$HuqC_wvW@Xu#CKf$K?uqM?wjTgwvUP^ytd5cf?eW|I4^od9c z*9)flu=RpS*ADHqnS#@st+$b!*;3V)g?921@1R*b{hw8ik6?FOHP$_-gQ?a_jy>D> z0+Z60IpW}@^VEXk`7u)1{?O9~qr0Q@-mgiimEQXHs%H<42+6w0bAouHnfbtUMHXZF ze3o%PEu%coJGXi%U*_y0c}=qBEc?>d6O9jJ)-PkxB0XojO{O#t;B~rbE&2x6gAtHk zG;Of#rwXF7neJziS$ui(v^YbV!LFgT)VqYo5dAx1(u@hpH?;LL%)&qRBPRB-REFY~ zw_=;5HVXm{HMzcaNEmz^nHkP*ak`aJtRRg8P4~DXIY^^xqUIC#v_e78@%yjxP1Q9^ z8&%qoEi8%jc}NJaAeb!Q3Ra zeL@(NtmStJon8Lq%thv#ge##nSr+yZcNejGoVno_m3$49!w_hCu-x1PLEa<$^9AR7 zJcUh;#94Ih=w7(+@KLZ%Y+igsn1=*VE z_KE1+A<;w@7!4y;0dg^Es-Low&iaVgr}jq^)#L6Pj9nUEU=0Rb3}^<1=J&T4$Uk%C zt(qm`CQ%ruMQhqK1}3yde)lhp9e-_M^v5iEk<~rarRWmN*U(aOLIvSU6ZXMl96V(XLV7m zh;*&;8iJJ1&b>D}Wa2Tl0Je|O+K~PHo)$wTK=s|Mm@~#Zn)fqkCACk5~j_z z1&1o%3_PQuSW2uuc*o9)^{h*#r0$BNM}+p0b*U{}1j+5ge9gl#-{v?jr;t7Typ1Cp z%#OE8tWd3Wl(jC2L!Fx^JLXdC8L^ofzQc0|jb-Ua6s}z->(rFD2v`%O2_`5m*G#dx z2J^~4K`?N{e!71us)ti<*(FP9>(3IY{h?dxM_NDb2zA&;wok3KO?~ReALApj z@OGFW8_GOkG#%z@?pe>KHKdqr{Ah$LV%WMWx54pMTuiy^L=@wqqqUQofIAXDy8uIo z`T{#&OVrI%Xv!D!X3?0|t80GifEkThtWX@Td~%JP&NE&l=b*DYeKfC6p= zJJJ<-$Kpd1@GA>^TJk0&M|kIa+-Qw&G~pQs9$$NqIgxtEj9UQzpmbM@hLB+q_myGe zCN{~#@Zk|xVQ(t_|aDZVv6 z?HW%Nz4g~~J_V8^q9(mlpUmHR%*PGpdj~ZRYD7)TS%<5%$EBK76=V*(O^m4O)Hj(Z zaq>RPd;Pp;>FuV<%diIm7`{YKU%JIIOR`)3GOX`r;nQwAxo;X$@f^53-**xPc59H% zJdSN0*Aqz?MUH7Edmr0Z7R7CmS)h4%^)W6l6T?F~JUdM;d7&nVcyNSmCC3~Un)Xnt zqt*@(#G!un5_DUB8`|- zgg@Ifij2~gnZ|K;u%9uDbWly<5m_Bu_i1tKxUf*i9&bEdjeH_SBwl#ECM`Mjiqv`k z6Y1r{CFhr?Vn!b`kH23gseCUYVV~M&==!Lt1wFHJz&p_tC;X<=#FXe${JEx#}?tAbXM zaBX;|O7D&zlTWMZHirRS)37OP8ML)V*k#PS{r5hLg>R zj!u+Dg*!VYTR&FZZ#(s|FNbT1STbwdPOswei^T9_L040|%Mf=c&Q92yI}?)&niP63 zYWaDD>GsIoKA{~?HdwX286a;^r;-)K&H1hpcNfEPpXqU=;bcT%>51uDUwLOvrO#R6 zxTxSA@yTcI+s?)1W%2Tz-3#X9dn~opGMBd@;2&YYDkVKT(&GrCvroBt#VJV^k;|zwx+o zsd6=LEcN2-cIH}XGp&J0l2e}9h-&t;l4d&2sj1XVf3CGFyOhD(x~q%w^{c5%&7V%6 z#lY`2ql0pXxjoZOJXWwZNP}cx^>|W(Bs!oOI5oNc3}p%-CZ6 zzC{5>;;qtcKG&@FYewa%WNou@J0df&98G+ZXNGEWUJ6)@gW=Y;Skx}5I{Ur%ZWibS zHx(}-X3FOSGON@IVwdl-eSR!@Ea+N&Gi~vDT=$E|6F~!-dOb_W>wAU8Gvs8deM2?z zcH0fVkMD}ak&}FbvrkNivbwG@FP$qRMhySvfZiZWxSVSh0nd)c(i#_ zs_#-}emd>VOGVgB)E&WC+|n|e>3@4tGo)+CyL(o%XC_G{Cekg1Frc0%GRK2s%RiA@ zTj_L(*-{XpKO)D{{6zxig?N<x!k;uQP9{E8L#`#N!=2`=YX|;Ebm=5wY1;(cQY^f$gb>qy)06l6pT}VcGu$ z#<@7y~SNTF_f(K_7R2b9ggMCBaB}>Y0=Z&>{ zqVo=x2+oQXUeWB@QJ*kBtf5fTK6Q6vtI%uznkP#}dmwjr!WWn-hvIG7Ygi4pq?uYy zXT+_@)Buhb5$kI`Nh>?dt1aXAXxBevW{A0H(iC7mi7(JuWFzP?LNoAaVw&*oh0Vk6 zuO>YrC_m>*-D)ukXBR6B?AKbOjd!o8ToZ`$guOgNv+_QOu*ic)LM-D>dyH?KNs`x6 zI?rgtiN28~4Et~aR$acz(|$FA2P0)Eb>E0q?4E8!GMZk#n{KlCV5?_+b$Il(n*LYS znoH$H^IP4T7$3S1Au-+g^D+axD$zHzgHdcn2L0bu^=kA(NYnPjNVzh zvaIWEFu$YE(&J<$z;Wz6dZbqElQ2qTmFHz~=-L*8tM)eE*qf^xC0jKl6;VQDcMUWL z3Y_h7?eKt*lSZ)WMs{<>B{<5QcCfrbD`FCuH`R7htLp&Pwb!t4WQV_NX=SXpatyy} z*Wn!|LVACB-_`eYP7T8k{j*Lla)?!?pE5qJ$NT69?CUP?IHZ znp$i@W@dIorIbxfYo@dDHdEh>THf{RPPIw%_2Y+&MZ4Z2_lb*JJz?l~BD#{UY7dJc zH9Nl+8lrP*PEV67WW3iC_h!&lL9lZeyvIRx$%*~8Yu+fi&*Dv*x)iq6>C(CzCz6C% zh<)bu{WaH4ZM1t^mE7WX@XZremZ0x(%)rvw5plS!Td7vO7x(SCT3+ZL}mDb<}9yK%g0eKI&aJ z$GFgWBapq_x46AF4WCfnX5XVc^eLXKcNcCRDm~%AbFRtDS*nWa^cNUip<2x_j^n=S zM0|DEnb0%FlTR8t5i`E|g9*tw6U66j*-G(ol{*YPh3S4Gw~|B8PT@vYf*lLtk#^>- zS;R@Bv8IBN5^F2d!Sq*a2;0wpGPu1u-fJ6d{Dw1dwUeNyXoE~$QVZogayb`yIE6wH zQ+VtxOYrdJ>jP{|@x8HU8~u)0@CcnPO`f{AEPHK(y_BQ>Y}4Uag+y;}Et@md_+*)R z)1t5Gqr4{Z3(-^!5&_q?=F7#q)8>I-pWiA9R74Msp(S zT7seINiU@uS;T5xo>Rt%rE!D9GrjZ4kGbB~477NP8osv*?T*SlJ{f; zYE@ScHCGc34J{-obP?|IafkPjS(3gn9zw*)EnZ1}@tA89UuSjnQf_Ohc6UJ*#rp&m z*+QA8L8T#0PM)Ht(wfPt7U`_3aO^iL4aD_{Vm{<$*r%VXPf$y=sZF7B2r&qLK7ouF z*Ns%B*`jJLdflVm!=(23_^WtU*7NfgJQI8aSDKFyf|p)C+-_(QNkQ8y>Uk-grR2D| zP)##hYC|=kN+d9EJjf9;D>2yMiYF-0BDZF#=8eB4xt*EaKsG$LaO2YZmtsYx+n7(J zuT30o8r7kL>cYbuXeV1SxZz70c!C*^d(D9nZ8BrfQ=HvjfHJgdf zC!O0TvX)jvTP(x-VU;2uJqoIw6BfqOY;P!6G5S)LZ@js-Lp>fuXjD)UV|7@K?j4JE z%IU>b)?G^9&qd3-JiC1+m?wP1+c$^jnMk`YgNNYb*X?Ra9N1hL?(NY zv(uUt`n5U74lguyhaVA|^A_DXT)#`E{7E6GK|W2B2yL+Sk@zYPA2}|G{SM317ex5y zKAT;7#)!vJM6>2Le2D#Bta0@$aY}vnom2W++E@6+nGwpB+)Cq-PpBLg8qODQ)A^V6 zRaf@ai_maB*I`#8EIoCah)CjYPi>21(Otc(V;i0Illo1UY2MoqoR17nS^8j2za6N9 zE^oHfQyhA|xJ$EhVLN8kuaz$7DOGTaz~fh&1%g(H$h0gCn^<-jCCOq6mQt4BvUt&O z&^BBD4ug2cj_+DfNRXY(;2=&?=78k~PfrV;ckxbdXP%JF;z#kk-jwl@zFqX>b(`&| zCL6;=hnD1Q^Lay*4@$E6xb&25K&FN_>)MKOHQ9uko=G+?B6TpZH1oBYQF(J2dObXk z&(+K`^L#(&tNW9$_wR^+B(8i=LjyC-?*4_3hD$4-bkI~q+ai9Mm8JdVr*0RPWpZF% z%(AOA%+5?*sK+jE28cfW$XFR(%aPmcW98>uI%GB^G8J>w_CNr?qPx!B%r!cwv~sGm#E%`!S`cdbsGEsIX-R0VV>D6~&lb&5HDot)ioAV? zw4&}HJzx5|7pfU-D?UJbO)b!#Ivzi5>|NNSg9~hVwuV<8BA(-}Zo)MsR3|2O^g*r$ z{h^Z&{8!fOwp?EET6<6S9I+?9Z$mRVxYB658c}&0eUzF=d@{SJV#h7hY$0KSLb_L# zEgj}m#JU}|Dl~!LFe4q#rZkmZz+$0m5XT=c>|Zotj+LQMNEE5~B(yyGaH&peJH()_ zvZT}lejs0Ez5}(}TVm!fQ)%x(hEEdV8xx6sy2Fz%o%cX3cRr5he3r8WPM_rkvSdRf zbB#l%%r;KTd+URbHC?H5J^D0k!-NYj}uLWgieXR0vj$`Z=<_ zD?WZa@V>wG1x5QI+@mZd$1Wb;s5&PZ2ZdhYEfGKS%Wt1Z8FFOq_E~V(tdS?+s?R<2 z|71iJxH*qcRr@22SqmFOB#g7?Qu+~?&dZ6zN^zI@kG z-5ScnTJhL01dAVxP&R)Ya7dAiaF;<;YqY$_->>3TtvG{8!qw2jMXMhTL+YcB7}Uo$ z>E0oXdi!L4SuuUf^TgJL<*cw;{d|iy2ShLl(P4y8bAM&ABH=^xh}q9p969-iq+%S- zTE!8HkF6Q)o|1T5lbxUYf?w7=D_pM{bFV0EXq&V3slkltfX870w`&y73XDD8p?&?U zcAXw!?hHO+Bw8`+Dtz_0>1K>;ak#l4rX36q z$xEkQd>C*og$AvSC@C)m6{7vY^A-WF?0Iafxi8v26`K;r8Sk>MT`0#mz(-i3T!{Cj zvMQcni%7;U8bzsp^0@ehl}W=FN0eB&ooLqkGS2HlgUWQXX*Z%kO}j*8QZ96{tEO*+ ztTBb-Rgrb?it`m^)~RfUv7~Z>_$6ud9l9gIMQ=01)MNWD?lwyH&UYo_=#(P3_|lB$ren+Hzd>E9v?B3U4M&P z5;!mFv|=7E(UBoxEj*{mSVK3Kwl-ZWvQH#VoL-hAZK8bRvzdj2$pOt}9OQLfbh-sK^llun7SG9HyC#6l~_CHeUUS8igMq@29u}7+&X7ph)m1t1=4LJ^C-@n z4@)k|CzsdiO!syyxmO3&<$<+em{Nk3V=it=_0P>NeHi~#Pw9WEhr1}wv4g~m+2q5a zE!4IjRd^tak*S%WF5NBW z(udxxl+ z1w6dCilr~k96A&)6*F(a0pd<+@Z3~}=L;-AZ(YzQW{cH{r&CJLD=9hn4r!`ZP2^$i z*mW^2nM0O0lQef2Ppr~N)0->|t~3QG?;bpI5z%q;go4Uv=?@%_W+RCU0va2QA1wahmmwLCM`gWi6sF=_Hlfx28dghQ9w#+WV|KJP1v%+VdNnqM3>Q4Gv~ zlx?zZw_!KIJosr#YjtySUYg_cJ%4&9iGxvut=FBdu;O1PdDFo7?p*Qbyday!Vz!6d zpU2pl@+Qfet9u^KY*<2=MGH3QsOpS?gPpw!+0f;gOn<49v^igwM{BNbr^DOnDD*(G+=?_-`3u&kE$PgRKKdj z$NYL^zQ4XKumFSc&F6^H9D)t{=3Zky3G%kLCn(&E)}%|NYYH8NMn*>3X3U1oo|#Ws zYp8O>YWOB)TcQZ9;C5_VX~)>f;-YMN={8(G1X2+Vr4!Px44*LZ5O%a%VZGT-ji*Q5 zL4UKvK5_fTvyBDRlZww7mtk#3CyRXLo+PmNhM$68`Do~`;bUZ%;H^J@m#UCb#!T;$ z@_7>c?0J1#T2{iXlUO2sl>#|Q>htM>*;96`ho>pcNL^fC9SV(3qq>F} z7ssE+-KGncKCg51{7w@>dzh`wiQS%beJBl|wFp;lA~Sa5&8m3^{+81#-7Tlj;EjlR#EO6T@h@T10`2L3vD4#VPK!e13xYkDrF9{W8)#%;*s(Dwv-91&Lae+EShnV|uL zbkA!3Qomv^_$bfqJNpiJ^3TP;2L2>!5LhMBKB3|Fxn{Dyj2>f>B5*+&-1}wE`eor-Ht0j^1QF*Te*9pvpIha$8;}zyX_iC zhzDb^<2bG9Q6yYSppQo8r6lkJeF3I08OsmKx#xT6Bg=nipV?R8x9tnzS+pM#={WZI>8uREK;-x0O6O+))K zQ3!5XRN&XHe%hWI@vevQ&htdnbg7|&((tP8LxvgaUc>M!;h^z{!*&KliNrY`U;`%= zs!>atO2#en=#QQ}44RddkJzp*Bv}gp6jy)X-Al#Z71m?Bv(#ol7#0e9S5vNhE6{ua zYBh~)2_zHbZTkCHC-ED_Gg)|cOt>Nr3sbGY`eoPHU2`^TOai0W5_X6``Gk;ma))u#B9#ag$8P3qf+ zdDn~{gVwoJXA7$m)~5a4$gyv>D~1op$}{O&P~2PG*!j>X4uh^cR~O;SKN#KEjUw*H zA>5pklUkQISGqQ&x*YNOV6Fl6=}GfjQQ0F_!&b1q)FgRN%LvX#Op3bRE79%r)shBX zq~Mc+YYyYXWi-2+Yb%nbWILR8t~*ZBwM$EBY##G|8|2C4(zKq;E{=!AzAd@3zqh!v z4IdGbMcT!!6PBZ1>pQUnX;vGuoQF~}&akfAs z;NW1@yYwG5&tjKKx6=GQ73I0h3=N~bU9;4#ZY+Vk3<9SBn&s{E?-yzsb+pl3g+yPO zT;S9i#J)6)s>3>YTY16nNhvYxcUDu|UfB~Y!*4=QYRj4}Eb9?Qw{f}y$Oo-(`bMqe z3yCJt)SoG{xQ-5L_MM>WHWG#sF?Hk?YSIy@V}5I$cC!_uY9#4}aD-&3tiKX@kzU%{ zkX+06@HhgG3EACT>W>xKU|sS_Ju7h?6m= zJ5Q}^X(AmO>|b6Sq$%>Qd*ZC>8h)dv>Hcl4vu$1oQAqj}^XX|N#&@{ODHW<(Sg4u0 zjoqJ!W6UXu8IBE0qG`7FZf>p{{ZY+Vx4DZ-xFhT&lsFh2YhA6Sdu4NM;9L$@9V?nZr8(if}ls*e+*|V->x)*vErh?wh6A*jub=Z^!_#1FdyF6iGZsJXTgn zNof^09QCStE0BFn`xJaSips)I8BVxq011)&tLPR45__8E{tI|jJSFhwPSVP4aSF3! zcOtry8MDU%xG#3us#b{;0tgx8^sZy#myYbdANW$^Sh@cIS>=&$AFE=zTl<)7t=jJ5 zK#Gb>7Ci-gr~63!MAdv(sofMiY%M`p*LDXT{{TvuLC#hpPRRMY_Ky9it~C40twUV9 zWl+V7vB2w|{?+qe{5(U#zw{j+_tkjU{1r>~nTJSwuL|flpw8r@g>ISu01kgD{PF() zgv(w30HBP&_x`$PPMjUCV3eMvQ!A{EeJAnb_VdtwF?;~i{tth`rO~aft+okoBeskb1G*I%9FCdqO}Hp*+?(=8Lb^U+ z!U4RFSn#|Kb54pGSwozAFH`w{ky6O9ljY=sMiE)LKTleYAga5SK;+~$4<5ZU`BBP= z*lEtW9FV9!?_75{1DsQyG9$o_M=hPXVZg^H7^+F! zw31qpEO?EIg*aog2bx~2$IIG4>y8iU`qS7Gx#e-7Hs zJuHn@!*PO34vI-0)NI?0m;`~y>MCUl0WuB89N`B%Q%2-mp=I{noc>j5C9pMk+<>~H zfwy;I#W?)Q%!4C68$ZgAFLBru5uEc*7;sMmX*dJF(wR|Dlvv%k$R96YE9@WmBQNb) zul~*-weF2-HHje7we$8pLLJ{LlgbnL=bHHKNM{98ibvrwq>go#M^#oU`+sZEOl5@u82EIi7gtc93 z#9smQ4-skz%_Ys0Zp_Ma%8y$5x5C#F&!no$l2?K&N|1E)W~sR%^9{q1jt3r;n?d6r zjYw2*GfZ{C#%r2ejE+SZ$vx?=s#oskj`Z`~3~^03+s97$tYqx&2gu*I_v{yI@vl#B z8TboMwi=XgzTy^K89l};;lCSrbH_geBTWxf)8x010HrzWU$cy#DF9@iy!Edv{h|K= zWLD=Pc|$q(t)Ylgk0i`zO}$b1DdOmL4-;t!Zk4xe-?SXlJO%Ko z-%`__eQqD!5yHxT?Rs~{{{Vv?EBIOBtF1Fow|^!uRwoIx5IS|QH}Lnx6BpVvyRyM! zC2+jvv~QIw7(T;%Rq&Hcp3XRQr9v0(0ot`Z2c=*5drw|Fog!ZgyJad0xcN`n@ z8@B+Sab3ohGhV`%cPfes{DAj0oSTx=gOS!eLE@b&#b)bIVj&!?f0bd~Yknp1Z;auX z$U#T>hIs2;-o0&fE|XHWvq?O)JCu$EaoSJ%M~G*1g+mf^wDJyp1!c()HFfV4UuYV1 z>owZ$BW_1g$6xd7T&An>15L1sE2yR=fCZ0X{&Q3Ko50%sxvQCe(8~mkmJR$MXZlu0 ziT)8>+gY^MmtJPpA1FLfsoSV^E!q55j^g{w6a0#V430mQUGPtcZEtlcWYjGqNXwRu z!ecd?r}!=i^n@)D5~7|}jx*?SRIMP?E+CRCK*(ZakmnitW}mZ5V(gDhJ~W$BKVY$s zYJ;>D9Mx|WUh8^Qlx}Su_Me%t)K)*kjXO{+CG7HN2PzJK!n=#RX*C@j>nPW7fL6FAI|_ zDJ(_N7;Oah_p4FlS{fq8hvHVYurBtlCG!IA*n8JkWiFSd&NTu+mDD#q&OPeo&w%VM zbO`5xaUj8E2am0Be-JcnO$-+Hk}{pZaNOdlMYW?W?Q~BU+}y_{t=pkie5ca3Bh>WA zwqbTxaxPSg^808tEk99)z&ip^a!q#L5z;O$jl*2cyX5F=SjEI^a{hy+X}b0ImLmif zI6G@!#6oKugK1-Itg*&0O4mE#?PlJ`Rd^zkYoQH$;;Me_%UTk!gMn)U}TjMLv}i;z
CXxDk*XB?B z6w~&l*M2K_)@=#(0xhiAszxLwLuZrn^{%+m_6wPu+}!N>*W=&CeP`mYjjnb5M)<#* zJ04pnspsj_@-@JDum1o&7ykJ*EV2e|%tcGla=BX?;3ya> z+jn7ratX&Iej^=o%{U~HhAhkmzzX??gYSR{;~Zqt1eHcm;Dgk2pYlJ?0;CU!_Ae?p zQp2FGefKBb>?)Ll{ac@yaMmrZNzE;JbV5F6tOlpE60xHgN?N)Xrw^k5DDbx8KlY&nQjq*3yhpT(8t>=gp3ajpb}4u^GwF`TqdxQmV;@NdO~n zAmm_vRVKiRADJ6(ko=&IG3)+&nw}eBmQGqQa;bwI3+ zgek^GLjkn)1lQH}_L18Su%H9Ieo+4a!L*<7Pale(1N=Rz{?@+}VSPH+#kx{kcy~&l za?2zEw;P5+K2>fu=ab3C2+!*r@+?ce_yVf6IZ~61o!|MItyY9mIE_oW0~G>XB<>YI zI*VD7AF&yC{HM^@I=(BC%Hv^=#b~B)F^Ez*Ccjs&d^M+Dz+jzOfgI!xYnIl3XCH+c;b}EL z3&AXeVT>andE2Z7CG1g)%y~+?O%`5Ln4k47(NBjU@i2uE*s99JPtyNo_Dz^*6a&W}EXn9yS;*~l_vSJ1x_ zzh_U0x+Kps=@a=;o;HlvF>&zIP}C!a*H_ZrP%a&%PARL3%^pAD{RG|U2_o8!n8_q^ zO-*<3($7}Yqqs;!PG$gz!Q=dExw-Il)|GnJR!V%8d|_9&^{p=r_0;(uMBv0TeyndTWtt(NgXQhidOebotoZE0^Kv( ztPMhR@Uu?$MmYZEpAqq!gjO8HO;-gcJgp4zM3qWRJNK8yAD{I*wJU!taNPN zN1ekiPfS)s*Dy%M(67$?kTF!9)~1Vdp|VS;;(L@(VcUdW-;HHnX+u)DGqfWp;Hfpg zqzi_IJ3$WU!t&%+HmvjMn#76amQpztqzfWG9@DOMnC>Ku6SpjDL~1%Noo6!XY}>%W z$>~X_L2)!5YTFzYCxxn3D-Hdn#2;g2U`q16>u7<6W8sI7RBd)i9Elsb;=0)F;6{cU zSocWS`9RJ|qfXJUWrZxn4=&^Kg&D!GrGI6Q*(B@I!{VPB*t~GA85T&$-H)wlDJVx$ zqp8<^$o~KV?T(Fa;~x=e*Z>0pu;iaw`Y1+-m_-0|z@^i)+bt%>Jv&cALsGj)qKp#E?TY5Rdz!5gL&hE@()=Oe>#q=Mt`aL~i}T!$)%k1tVt&!O zzmEKWbEwODL~SpZ1bFDV;=Z^2rG7RikF?936GWZX)*{2?V~6*z&!3E58MvO}=UljE zM71pAaOy>RI7~8}Z`9|jJx?6{u0A{I-T?3vu-NKIm-9kL&B4w+tMcFC4~j0lSK=$3 zR`TO>JcPHouU`GL{xRQpx5w9(3wBy4rYVe^V0wF3fJ(x5G8V?{hH5|a>Yw+>{{R~DdZ%&(n{}npC+ie(!L7coeS8 z!=3EGf+)yfN9#`+m2e{+KqPUy@bsrgZnrZcu{bz5&rzOh?k|Qv@J26)za0MnXFWgT zPr@%1$Ejc3TlvkSn0D>}XJWVazP;;c(Uj>rk$Z3d005qEddI^}6M_?{{wB{~Y*dCQ zLtuln1Du|<)O=9*Rpb8vgB~up@HfPt2wrI#rK4hdOUO)Yzn@XZQCzv)2nwtLJdE_L z_cXdf5gAnxl0ZBhbg2PHBXe{e4_^G!N_0h#lmv`1=AIQt-aQWo(t=aFE$EuQrJ(9C z>e@Bzw)4R2EH@C!%JI6Kf(Yahj1%cuNK{hyomL;nDRjQkm~_<`X$?Y_%z z;&_{Gp<=;8#W_+A=@6H1F-UBDyI2#S%>><@`#5PI5d1mt zQ%9S{7Mgv$em1a zMkJm#j=a~|U$N)>5To|Y{jhv8o;dK=f}_#2SIC1@*QFNeF$8B~x6H!_IRd|S{uF<~ z9sdB}p1%k@M7}BTzwG&^O?e&!AKHv=5in078-w`gps%LvG}|pM);kS4)+iu_LcFJ8)c!?(ZLioHce;d3g_LG$JEU%$E z?e8Mb5=7&M9^FSnUK2f$T;y%%zk2@0{{U&P*$?)4{jWS0-UR)iJ{oEI{g?;r($5|y zwqyKJ$S}%t&NG9KL9feSkN*JhYcIl!j}zT$zX^Y6S+w0YX$wQAYSzmmz%kRz1aACA z6xAx7R9|9e7ZdQlD+y+BB1S(q7#Xk8FZdw0{1X>Z{jYy$>u-s_5&SmK;6DgRms*$F zV*S3!fqd!RLNC zeEt>sN$`hB{hhuD_$m*F{{RRy?Gr%J?AP};8geQi^(VVA?nx%PBc-@veMzc(Q_=MeIpVdr z%A)o)pQPWvi!DdlWgrIUp0)CahrCDeBg4A1wmPJ_w>=oQYW2^6KN_^<)aJ3&Zc%oy z3K;dJI!+4Ydro8MzX^Cy*jov%rpq^DT>t}UQPZ_Yq~6=#>Cs(Vva-4YqJU9v8Sq_}|rEi#OC$>~i?%W0U1Zc(`Y z76!b(;~uT4Xc|@B(oV`q`9L+we$AR+jrFe-&#h|Oq=v^CSj(IfQsVh;Gw1>`FkErg zh51J&h9OY19suN;&Hn&|yG0kDWj6p4*yQ)Dp1^6~6P*1qPB|okGmgi-5j1iynHz!+ zLMcf3v+Ym_g6m4tuj4UZSj8eS#dW}RW?Xu6 zPE*{U%8q%bqbrpj0pl;&mqC>MtHQ2}+{Y*Q*O+`j{g-@S5(r|`*aSa%HS3D|L2`-) zG3`S|8x5?WgV>DbwT>a$(ibFebMr69zX?25q-%o8>rZx%%NcfB$CP34)%nS za4Yo7#D5O_C*p`29Y;sEV#A3%p^;umuKvWD3(a$P;q68^X1K}yb`hB5w>^TU;z6F^$Eu+ZFVejXz^=9%%~e3MI;J$S|p|Kh}N& z`1;c8?Cqh?{oII+bVpQ8nalWo^G+6qHC@gZ3I}Rs--Fv+Z$4b*vxXhLtGw{X?7QRR zbiZWLV()7obT;WJl$@Tqf>R$`~!`c{@JD(Qb$XPiH6N8%d?*@D+@MnS}Rnc_R z-m8x`RRxVVUJ+UvC_A1x@C)`p(!4(mTIY@JR#^rbI0@(-90+BUg$goB=qrTyf$_J({u8y7CF)5cf)70nWTz>!TSf@g@$ba14R`}fZ9`qW zRO1OAc_zMU_`&h=_r|ilt*)W|f~O({?_BT3Uy8aHjvhgEc_Wh90a%e-3g38o4LQ7- zSUFw+HQP#(-I&f-MQwY=FwdzAD|k3#gMnW=f5Aw;Ciq|B55Y^nvdx6ra}3^H;n z+qFGT%SzMc@g?kmRunCh#eP43!BBr``~LunKM^iGJ!h%2TUt9LV}Am@8c*24&!Rb` zuFsRaX|A`3JVidEDJ{9qNjbnKvKyp_furuEm>O1 z)%U7L9!in4lkPn~m45R70KvVUF4U92R$c({J>K+9-(`YR#@Hjf`9lv=i_FjZX-=5!RAO5k$@EO zUD%i`1`1BI_nO#+E^3O~97p^e1Mo-S6yLKRk@0TxOVh0Ub8D@`Yo=Tw2|d-jljb1< zmsJBi^sm7;RY@2CNzX+X74!%E6dU&Y___Ocd~3Y%@5BveLt|+(#iD6Q=gGBz91Jh_ zjxt-leE$GDJIW@PcH#iz*1Sv#uBl$HUY(7qS?oq)QUq_$UUA5z^1~)J{*<8tjH~h+ z9ck>SQ*>nEhI*RrLP;UGW^(LsPBJsnri@O~U?vARJ;>`#>5vrT9Pv^KLmabn>yEW! zQg3% zuRPa`4X(_OoaA$wo+%J6HXoOu&st7A#*y6qt3TkH{{Zk+Z-8I$P+x_XdcKjbS!kXz zw^WB*@TKM2q{%+q?|?DPv9=h3GDjfv$@*Vd@wbG$IPpiXc$&h=r+u(3)!HGEy5U0+ zlaEUNG(0`x{{Rs9R{3>54r`ZIHxd5;tGKhcXrx{-kjg4T7ckvl{7u*A zcw96_eO_lM!??>FR({H&e(N>%wnz2#;{O1NT27s$NpYoW5nL7@D@5CX{Hy1m8hC?J z@g|`s+jTi3Sq}kZBLco({{Vt8e$^VM$FJFH$4~LLzHhZ%HJaAuSx#Ak{nCF7SGN3N zzna$gN2J{xpY9s&rAxUET*h9rY_^(|*4DbT?97M$S{^Hq(qi#Wo#GY^tXvqGx*1MC zD&Fwj?XkUUolfaqO}N^}nx9bc^!lC9zp|TRD`x_`qX;Fjo~J=;rs~(4IgDJ$W9{>G ztNK@i=Csjd65JJR7E*ee<~9ESiME~ywSvOsw-?4SQfi+TT%(RlcBLeoY|P&0x%?^ppfwK;=+Nm}+{+mGhgQB@S3L{l z3i*sn8u@pEQWZ`}^{BjM7uw>tyRnaP$YOKaxF?5;mW6ZMbM4J1#JUc*XqTF9sSK$g z1#uwHKGkvYw}H4Gg!ZqFG%tv{uZC@5p3hTsae!mpUgz*LA^88sV43}auM0otSemv14w@LRD(FB_F?3iJ;W=+Nu> z4bGC}e)&iI4b)bL!q3_R;aA1m81Fn6;@eAyWdz%qK#+b0yHyHK^W5=R z+S`q8Lt|<7Yh|_#_g9LXCMxJTCdqd#+HxaPl-M(e-G+*a~P4X z;3hc+NEj=cnZc_x2ixoEk?T&za6$T42cmw{KLPwjrN^x4KNEDxZKMH~Q#Q~?;st5i z{Brn3c{Db*zY(-%l6DIMeZDYUo`7@7p-l}PSWw;g^u;x-tt1(HSlIiZr=u%?P>d0r zgHFgrCvfSW4N{Zn%Cci)wYNdGJ8(Jb7o|wK>KnFk?O47b_>16w3qbR0-Ye7X%wUqT z-J-W1#OA&`{ja~^uKG8^`=#&?z~2X4>w3k+o?2fx9!y@O0i5%S+A34nS|jc0U1L(r zKm)1GWNY3t(R@8@*Eg3@u><8*ZgXEAd>s9vJ~;T#!Vv2p5WGzyTu!@;MFIo|wgq;U zemXb2>X%|+j!QRM+7zaqr77rlUlM*UYTB$V9+e|Q83Y}#*1l!Td8QIoZdkBxtBzZq+P z3O)(nT(#7QUN%ghdi+HAlj3`?6Zo$0Q=PYYlCdo3Jm$Yu{{Y~&zqBr!2Za~Fx>Uf) zBmVJ6YW$76k*1u?_ym$jBDKfMmXsj9S)6rb2PJSLz$6jtSG+x8KC7%-Sf*Relqepf zHAw*>MtSMdy<7G|@Eo2x_{FAJOKH6pRW9D6HRC;Z8%-Z&{5#NWJ^+5o`j3yUZG@7D*8J6PbO;8k%Qbm4UIC3+L?Z!?x2P4;~r%H5kGEL>Gou>ja z7oYNewdBJQii1{|l{M7^KexnuURl?WASLdrctKa?{>A$;Vk7E6$e`PNl z{3g7cTeXR=G?M$HyN!5IPaQi~j2Rpd#OEa7_peXW{{U=XjmhB2JOQig{{U~WkqYg; zPchUHjBd~K&3R<9yt0{5N6yC}^flcS)`X>?_5p!+AQO)C?ZGy+D?iS}0gUY# z=A2oTPT~L~)2(YAh#soQrFR_l=}w5@%e!Qe$f%Uyw2h3O;PeKa8limT9CfBNnPH>~ zrJEV)$9j@Bzzh_8)dCHJJe4CgVk>KjlY9{|MF5rxc&mwV(dsVZzcAo*rmTdWs(aHU zEXR;Q!Qh`tVb1bc;~%J|Hh!po!3KPEo*VFHw;Y#6PunxKpN^fY?~fGt_TK#-?PXoc zGC8ly@AxFPnXFCVi`%^`b)O>O4EB>3$>A#8$D!cD6F3rF9o} zmKs&G7O4=Fw0UR8O8Ix;pT(V9Nxo|lHr%iT7O$ZEJ>n+T>|$wLZVBt2)tyK<5SleD z)9n)ZE>6w(z^;`vPqkXC5TL@Z99C|PYY~lB>}|^i&MU6b=C)Yc>H-^Q0Aq^gdz7^} z)8VueI^E>5zsfUS3Hx1Zek<@5vv|9}chkXlVmF3@LJu7Fucoz|8b4WWm&;I~kyqEq8+3<&0)$bF;IxT@OA_JJmQ(me2Ab-IWg?smqi56#`WNCy!>t3yQ9w0~IxCptC*^d{6~v_oPUz9DWn=PR_A>B) z?F9b-670XRE^lnoDVHxB5rL7{+P_}B2z9aHyJhhe!IJG_0$B(dW9wLc0{9)^p9I~$ zrKMw{6How7fekqr9aRGweBC(zd^@q^+eufv}RT2?)~7pZT~gqvoU zK2iSwg1>&$z8Cmy@Um}*O{yZ?>M=6Nir+Tc{44PfhwU|A5x${gsLvi|-d!Z;u6VD} zFWQ&(c=*NpSo~4C@eZeRb}jc41WpNDWc05E`0M)z_}k#;f@RlNOP+RA+{_~+jw{lm z2q@}hCV1Dx58D3#;tzrT7RBI=C&W{riE#1>NNj&P`P*3iwLUjKv2S%@@dr|e?2`wH zlIej~2ZNDZzsBDa>y58Vrrb*E1F$I7!PrCgtA&lgIqRC4Q-gaAH)HgJ;cxsLSNnPV zJn%F=9J=_PnvdDx^5C0M(_ofP1_3V0SmWH+De+hS4hjDNf`$A_vX@I=jB{8dqm!oF zBnRn~0N2BJmJ{sSIu6y{{3rN1FT@*iwgfbzjhW~06)J^0hcu6C_!0Y2{?#8Eejs@| zhs14HP`Q<}BV1fN=O^o5p_)YYRz?!PRx?WV~xhB^!(3dFx#+ zk#BbmqTITW0%xyE;k*T+oodN-8_Uqr%_iZVPCHkoJ*(R30utc1%woE2rGV#t(Hysi zPd{-jfDLGPt65DO!j~8FU;z>2pK42eCe;*3$%Y3ayzlm?@u2XB!hK6ol3lA6yAhmm z#dOh%l|+o&YApPH{{Vuj{7-|%zZfoTV3@Fh%9Y)k`1(<~i7I$r2dS?>{i6JBbK`G` z+Loz1m)&;69Q)S*%*BrM0ORtkG1WQiyKGEVkj2`>C}zevZ1gqvC;Ss{;f9x^==UEJ zb(>ID2%13dI8X;%_7(A`fjlvD;|~*B=r-%Q=9N?^>B+Cr-x%3g{>neF_2tagiY|2< z53@j7bG_sNc>KxyD-1O?R=Q7NP1zp_{?-2g7j++u{{S0xZx!8Ld61iyj@1dy)LsD3 z)PA++Tc!T-{Qm&@)akAyy18i?LW0axsRx|&sJxkdTk`(^>(&^W(Vb@6DJ2~bs6H?M z0Kqc<0BFyJ5wz{0i~D34GDMu?t^hf&3GoksykFr7MW=|gDeT=q-X1msKIXrsABx`t zG+jd1akEpw3<~f@ewFbb?Z5jPcpF&o<<;WpP%NHNCu~DJbDHLLU_$dh8Pq!xUuZeW z3g_;U)B=qfE$OW;GIT##_k9xH3Xb9VW z>l2;Kf0kPqz%;7o zfSa;AgGUjmUV8o}sWe5IBw{g;MoAgxwJdS~#{(RG2ilt$E^}O z)fyyP(=7$<{5I;`Gle|lbM4-~*!`TpW#0~Xx4{wIT2LdKUS00!HApSMqhe=ayna#VOn!mN5?Bg%N zUlTOeOj^d_$W_>WMz0!nDlpvplV7Mm@K-+yd^phl65LXtko^!@4noT)qU969=Kj4@jwYP!(3Sa6z3iyMrMW@?eGQGXs!NRHT3Fs^IgW-RV zd>8Q$e{J|*#&#NnPCw<|;aoSj2EPW3tf!K2I$^)UyHA9Fv}eV?f%@cH&%@ss-Pw6f zmbdda#N*JbWPX*?3{6Pr#mbM}jZ4Xa3Zejd}US~1px40 z?H%Ey(R@p({g1>j!V=;M00GbUm!3s?YE5`$RIs@?cFGhv&3aVks>ebPYw*3qI#rL` z?i{OQ105?P;&qk3h%{rU%d`bTb5HP{mGk&+DerO$ugqV)dH(>#pNVo=*uIBlAVPAi zL8(cpXiV*6!n{*_G*1ypd1mAkW4H$EgI>q`JN!|TTfB+}xWZ=*xaTIk8^y3+!Fw#1 zQo9L(BcEQFtq+3Q{-xoqehWR@73JNMeQK)4@iAH-e(2V3bbi>be4sx9YQ~y%7$ccu zm4Y)KG19aA8R8gjd?Rkr$(J#VjymGCyg#non4Jt@mhFo3CAqYAJp=Z0@Q$0}e;NM( zY3i^nlG$OS-Z=-geIfzQNEri;E9ZaN*H?p2@ng>^$DDSsJXg^J=J~ULMR~RFb4ctr zZ?-w>+|smRmv6N$a(+;K=)ea#?Of8`6d)MD!K98gLSz}qr;N8kG3`m`L%c>@1L%9y z%Wwc2X*nD>r9De+&N%Nv0;dFZ=9Gs&ck!&=;3|yZVKk(wqjbP9_oL9+OCA`yW2qTX_H%!;;{{Z|Px%&m{9vjs3PmjJ6ySu&9 zZ}(f?3yd+hm~sX{;c=d|`Ay&-g%^WRihHHV4hvw{e0}wD9)^9#hA!5_%rb z>@o0@#(p93^}WTFtj6DX-Zt@%p{7sa%`*DpEgse}jDaBauh75P2jG5>;lB>sYHMk@ zx(3eCRXqucKBE1YJ{`^A{Q?_l{q5ms_UN=ym5Qud)sMFW-Ro0zk~NtqdK$u z4_fd)_$W8Ug!r}lOzZlj6SG^}+{{ZvofA63Ey?L$cv7!Bb{9pJ{s9MAJtu|um zA;AGF**fio?}Ki|u}_i) z-L!jGmnm8yDBS*V{{U*83sTYkDc;4X#8r%qwp~bAOw?=dF8z; zQIA3lWw{5~P^F8I-0@G18(}+%&qI!s;S(zc$5G7}3k^?{5)aLew4kwV?AaOPigAp6 z?2pQT9h(^K)~#5zv2#S%E_5rzy0+@QN3VMO6ZUHTzVu%QXro5eE+vM2pbWQ8Fe~E% zjtDz*lb!`Q5~O2{j+m`sCY6&jwa;w*us>=oYva#|7R&9jHI2N4#tUR0@+-sK_;Ij%HZJk6vD*h@&`-MgLwC(bBOTZWrCq|z(D1j5 zyg}g2Zr{Wn8r0{ty0?@vOAMt*{6VkPU-%`b?Ze?8j}pW1wEZXiHZ=qpyfC zHk;vb7l!_3b?Ofl^4G+j4_cp9Wxi(f3lq1dYv|9}qs7-h6ut~-{wlkOJ6-AVxya8Y zMr*ejKa zc$f{NuoV`M;|)ttSnQ^qu%nQB_5T10p4VQ~#rw100~W$Z&EB+;k@Yvh`@Kp0BM;hD zw|QU!euBL-z%U4}%+t2@+niU={{XWOj&(gu{{Uj^I*J@N5p&a}eM8}|3rQ7++hAoW zjOR7wRa~sr8yY%o-}ZInbLr?&yv&jYIL&=2;N4Yqe-GQ;GlJqU82i`GwssFROD(r2 z-1o0-_$4*N_;AN@fK{=I^YIZ@Hi_7=!615bO1z9W;l(7oyv(sA0n(QZjonABam%2= z)Q&(jjc*jJ9z+gDIj3NB-}%#m00_=F?NKejKnOS)Janf7VmYMFdvvD^1IO!GvJus?YDvp;MXm?MU+`TSZMDDIjvoYhD=gPnqCk#-kO1pnhFaf(ue>*R9YRX~lk+{>J_SUk~_JI~#(eswlx7E1vz7{uF4JUIkk> z)4-8a8}Zh?+u^svolCK7n|z!{{Zk(zlnYz)%2hYtWlfsGg8yEzXe^+#4nd_$8h!){QLgizYu2e z$H%>5>24ZW(&^M>``7LF?cwn&PWXB7XI8u}6~(LV0Q*<-`(Qf*^dy3}q< zGu%$nGn2_Bk0!rSKWptXPxxK`00iPoD9MU@_+p3->`7CP%Qg97FPh}E?bPJ+)aJWu zN0S3iKg`=Rj0fZ&TiabkQe;M4W0PMfe%~HA@K=O< zA$zClx~wp)O0LMqQ40C#HK>qeXj&NJI`!<;Mp(t5%lH zJM9CKdMK$h{Vsdig}tmS`5-O_di@js0D@2d0KpA@H2&CLDARr;d{xkH{5SCOIYhH+ zH;|NQ8#;l|Vl*u4>qbIsWZhEAM8!r}JT|{{X?W zzwlD)8=-F(fxJIBZl>GBF)$Ua;qUl13;qf{s-oOqXxA&5)tl%aF*f5ZO(@M$OQ0jX(m-0PpTuY~mbr(C_N*j!2w{{XzB z2b%d;{t7q$00cDss{a7sp87<7I@g7ti##JNf+z7GhvRaV(hsK1Ca@cMOBly&xO}mFA z+prk!y(+uH;OyFc>-t{!iT?lu(*2OUM;wq#@MA#=PLXv+9@$VCV!4ZFB&lGAu z+EMr#3v>e;Eu<(9bF`e?D6O z0D=5d;h2(bYe$~hTxD8Vhy+%Cp000V8?a1@GDp3B_x{#D@J&Aed~5h?d8BCiJXW_t z7W+K0#-ORiekS}({hod#d?xt8Kf?`cK)4sM-}w+?GyCtG0E6k$rc|vfBR8S@-TN2b zT6jP9Qt;K|%jTkK5iw!c?h5k1+7HL+<6GHHydKKv^(ZzY!iTo#NG}2mJC`MA;p0(*wO6E0BpL7{@IQ7@Jn5a$I9eQ)e>s?BC zbyg-Yu~NgSHQ0D-;j|NKl1uhr!*M0Z>FZmX=Yp*N0I|t|)m?v!9XeK$b}>A|!rmd& z{3GHhrPHq5m6zrj#})SH!5@m3z98^)5t+9#pbopMupC! z)P=fSe(8YuJu8P16wpPq&!M!PZsyWs=Sa$@<^Y=Z@7epr@ZZ}>X44JL%7SafVzRW< zL?xm;E=dR4xt|*RX4QTHU0e8`-M`TUz(@(>8Lv95F0?u#?YZ{~k;o^nQ%it8QPRAV z;TP?L@Q30qlMJ^$FR-z?c!79st`!0L5^K@)e;D{r#B4@^;*CD^`dZ#3hyMT_Cb%S; zdYUawL4Z1g#VG-C^L6SyDY0RWILJNfJL|~oEgtSvVyTkBMr$U3n~s2Uni@rmEagUc zIW_V>?JNHP1tz-qN8?C*4dK6sdW_P@;acijW3!XgSFL1H!i6BGrQ=ml20C zjsVRKJ8C;LcD^RpGpa00HV`g4)pgVmKkn{9!OvRr4~yRxEqpU<@uQJ6VDVgU!cW>J zFB4w>0AyXvOvY`rC^oNQ^F;bBHS5@sh`Qh}OxK_MQ`YWpboLiBou}o-{wsIS&9IYs zvr| z?b_@kX_%4}=L0qF-YSmo#FDo8n3e<)nzQgP!k^ez(#vxuL6qT0>s`@yiHp^pFM)Im zABH+)53_7|0lu~Bo*wvt;mNF(BbG8GAY}U1Uy5`qJyXNKEvRpoqKfk7OZ^jBg9zdj zm~LEq)IE(OWPKy3cp&(*!*fGnF&p9`c6yrpkN*IIiu@DRz8!e}^5eu%yQ{M@7ac*( ze#rbB)+}zc7#p4N79C6%mOPQYB(=lA>*AH&mWo+-Y#y;1T>g~0SZ z>+Y)^N^7lfeUxq)+t=LI=~``sbkORrBn>Ex^eQ>`tz9xOZ2oh|UbR9DZ>(;+}f8eLy z7oWu61vOMl9Qk(aK?AA9ehm1g(d>1*t7jlG%-K`hn(D+W;VHG|VI=vA1cw-00o>-h zzuBtEZ+8Ih zz(6(m<}l0)U~`ab(Z_qRr5@2=i4^x^k%n@r7~u6D^kDx0&lINopygMb^Pa+=wjb{= z=U!o<`eEWf_&D$O(Y>9YL2D~jZ@jX`9M{dCv_Je5qxP@;rY4y_B6!(eDfoD8ZIuAY zJY;jw*XYiX{{RM)_>G}5Yk#&rpB%Z!SUevif%yHTdw#X;pR@P;8rAzGco^I2e;z(1 zYyLC4joZ$&vc{ihDv`$sT2MjfB1+^`sBb2US4Z>ywl}xaG{z|%`mw%F5oG$Vm2fl@Kx_&b}h^LiS~b>=i%|Qcjn>T_xJhz(t1HpH# zaqd%ag(l@R)mNodMt6wlww2YiqMx0P>N|!t5Rv;$`IzP8S{EEWS&yaM470O5FbDkc zeny-$u%efkob)H96{{L|-u&Wgwz-_m!rR{z;e`QVAmc(sI(b^X3^u7U|7GkNf<4GJ z_y6$?pRtC$9=ldFHHgZ|Tw%6PW>vB73NxoA^2x(tT2E>zLm*k?Hc= zu~g*&qZq$)XDs__7lq{8X|qFiV~01HLV{ki<;&F|!I zs-cON?B%^t0S?J3$M7InGQwpfyX(kx6#~;F;Y0G=Ihop z(%hCiCmH*1v%E?BuY|6(>l&tbPAF~knls>Qjy$y=UJ-WC@wF;(Ws7?Y?pA-o7lqa5&^wxYr=WA^?H|Yd9CzYqz75Hyx&sY5=x;TsVtn0Ii z1#xF)&C*;$n9#)l=G8d01kYDNB5XT6Q$=RRo$Rs6Iu zSl=XBsV=2kv&(F};w^##r8l3{V`}^UbxpLmo}*h*SCP{jGjCi|Nn?tMv!a)A;0h;3 z0#k@%g+*P*K%u`HW)DZ`$3fqun7Txyop4$Sk3duQpnj#Cxtvsia{j8gW@`kc(qoig z_*DKkRrVoy(IhuWIk0VU&MkH$s%N|U&yFH}Z+ceBi@6ffTQ1!FBSM^WX@vXLm}(@i zST|X#1Z%n0{zc`zSZa>X!vU5o8V{*?u+d2P8||0QOf|I}RLorVt$dLQ)UmM}@9e(2 z1;EMoI zB#o3Ko~Rvr=xdYVHcEo zM-I;AMY$A31(VUb!80y`Yu^dV`*-iIC=x&B*wZ+;K13IurAgZ z%p}w#kD;aAro-YD&)?i2lq4(s^%`-Xy~=vS?|Tcm^!c7sEh2gsyqm9;_Tr#C15NV{ zs>6YIb+GWBRf-^obbrI&dj{$>H|ceimuW%3fe#KrI298l@ell*b?rbqxbKytP7OVs zzu8ZPgE~M2j+?yKP1Ea_d`YN5!TosXUI6Ed+*hsKlUpttf8lgDF}79JlljTI>D0_~ z$iHXOM+$X+jnTF#eb4KiJ%#H!T&$WDzHSeMMd{0Poc%&&OFX}BY`ZX=o6*UQy4(8i z&jEK6jeL2zfZ=l8;JqD0r2oWE!;RX=6gY9)n3zkY}4 zye?|Qr2R9wx9zwG-O0s0njR9pe&$$b*v;0`?=ck*-FD6PVbsL~HakE{^ws_*|1{|< z`Opup>uBtU@}Kn@wLu*oUN|uo37q}2FWECWoCfXWNCu_ZPMWs%aqJh5tl7E$3+Q4> z3T}$lYJ6UBk8}4@S%g5>%lGcRlJ6j5bF&J!4RT6zfcIzE5Yyj@#V?($#T8wbUQRZC z3@1u@`tu~%2w!|dwE-QH0@WQ5$Ozzv)efoYVKo;(AjFAmr(kO;%v1NT!6A9AV z@VC6yn44?RqilIirST)^Y`;qhQfO84W@JL#TK&^>VnBhWxAjoGuKb{PIj<&(tj(1F zo9ma*rnqizk~eQ@I9`&_TOIY-nK1Y*zp%47lSsC*bwmMDNVSb*6>wtmO8L?r_?yYY z@g%ZMcRypySA7p3MJhVYU;7yrsRn{xZmG*7up=uks!j+AB>A%kYEe_BZh6!N^m@8- zjkV(wUCF{{Ui!j?lR?KiL9JpdSUHL=x^+lY?A#m2Te83JSBwWu=nO%x8wtJ5->;2C z!6-4{PE+%tHbO6+)Q0aR^|J>;c!lXDuxntN;xjq6i?Rd;T&OY4-3WamguagFi;FSx z8z?G1rTpoHZV_v#e?tEs4WvpUN<-F)eX>)RMB?Z5RR6)*qIv((S~EFw^%sv}-=eRG zx|M$`I?w3`k?YKBO*|W6A3Rp6{MFVnAE#-zMO#LHQn3=AvYCw=2od(Efv+&{>qU=h zM1!94+&6g_S3A<;TaeRq@uu&G&BqPTQnT0z+%MFCzddR&Mw1rQzx! zeWV$}{_Mvl)2MU|7K~iZLF7qI)FIp7SN2z+MSf=Ny2#M$X1bXjFPJ+~7nCX-j0(LP z5&E3r!(W=Wxzk3r3gcDHwU}J~Tdccu>uPo+_o0&IS^&6P(((B9ngnI9va&pU929A$ zp0vW8sa5&Bri5bHKO4)5dX!x>auw74;;5InynL!t9Sicaea6MN|uKQ7clLX?lJ6BrDfyy7u$$U3h-Y!?>!S?L-`c#X}Vd zK{#{xPCbiqU2^_EQ~Y06(JZ5KCrKb%PWrbe%}67alc$U>inpKies1X6yufussFd1!wXH0Ll{%VTo8=XF9R2WMD%K} zM&t;Ye(-U~`hp|}q*3Q2!2~XxYhT7kpDX_3X1bw7>Tt8;(&s(d;y%W*f2sIRP78>6 zH#e>)H3be6I=?y+f45z)EVJq{O*yl)q7dSg@#LdyKpTDZhZOAb29#cG$T~Hb?2hjl zqg^)Mg$~e%LNO}~Gagbty0`Y|67|pTHcqKZiE&`Lwto&QC1l?Dq-Z1@z#)ikNsv!q z4-ndT#S{BEq5W{#iHwgVM6*JBZMHL)(q9vEFmrL`@9Z9_)3Rc+#6)=yT0s=B!F%VE zhwiP&?Zcd!sVo!DdyWWJaC?qK6Yl*QRIl37{Pi0ao%TW9C zotvZ@R~n}QY^mz~yVF1aBYNG(`sMHwv{Onfc7Y?FRx?18srxBXZr{>rB5s_5qY_9S zJWqd^7&-z)iQ$C%diO!ZFn-T{$8)KTVvgFg76gqkJNH>iO5o62R6V8eF_b{Yj)lL( z9z~befT;G`K1Zln-(#Yfq%W5)7A=D@e(`_&9-G@kJrrruZ0B5*B)NciL7)g6Rj>=g zd~@6wDG~8MWRmX%^|sIwNL#PDV3})>cyKREZwmKh9#{B>e>s7**JSHr!7DVF+~A%j zM@(O(JU`UP;P|G`CJ) z;saRFUQWvWLmw&_xstVpK+yWy#L0ck<*9$VY~i$BB{k4<^$#SK@S#4z*1<;+M5z4s*&M%>5B`4~&) zdSJA4dZKLQ1PWpBp9o|oDd&00_|U3f*ef}_V8hPD0VKs z2oYlHK9!YIJJ2OE=%|t)^^{*rF*vw=d3Lp2Vxm)LouV{E=7DU9cq{$_%Wp~ku&>#~aC zK#CE`UCq3;*#pEnh+@}IN{?xAh|Rw{F?Smx;{SO{MI0|)Yg;u_zCb#bu6~dsY;x-? z)=B__uNFaY8Gu^BGh6@mrZO!k`bwlPRo8QILXVm;yS-@va-a2)=ncE3++YaClvD{^ z-jxSSzG1($5I5f{X4-v>J z1cefN+4J8wt;}`7yj=Zer--oJ`iL$nCYU*aW&kj4(dY~;KN9crUCGq^9ze9hIC(m6 zEd$o$i~dw>UhcypQoTD&F8kmuJ-d$oFC=JD615$Rw|$ge0Bc*YA@($V-Y3293r5qU z0t`)}$Iljh9QyCR3~6F$zI8C+IIHJz-r>IJ^GR9gEIbW{9fJFOTMrx6LYguME+>!sH?s~}(o>o;SQ!n=psZ+9)I1uwUw_uh_2fbK_3K||L8s(Dd6~e;NnX?Gt^@$)Pqz--e-V)!&M6jRim)Hn+}8E0T9|uP6POvu5Kw$%5_q?(=Ro(OSI+ zPPds-I8MbwH0E z@JcH!O18TJFIFO&`-9x&)5>(1QNg&lw|&!IAVuTkFY_PNF)3`JmiAqchk#nx>?ejemi<>)l`m#ng&NTY~j7ny3LxXTt?AP4(sVmrL)z(0(z~loQR`sPsTRUY_=2R>C%f z7N$)gQ^SOuoCmuGa|mG$>U=nzj_>Xb75Hp3ah=0!rIyu4jVpl~m2J~^IZv?)Y#~%y z5sd7EfO$J_UbKBL^FYP@dw7wb#?Blz(;##d@F+46T;hS-kB9l3I7mU_+Z@9T<&Vp&abV42YTKINh0zQ}FEEGj2l zUr#O-f!F^TuERBd-)&@K#j1E{Am={(FF-uyj^;U8kyJ%=dk=8Gdr)bC#mz1#h#i11 zX?=tz<1ImjCR{l@u|03yHKZpFerqkAkK!H)9BdZ()8kkR7Rl)29i6EQ-u>7J9(E$E z{6|C&Lfyvrb~r!jOKBM84D&;Dt&%M|aM0FIqW_kXzc{**=FOD2G)}!vTU%S6$%Jk5kY8~>-fu+VbHIeRocF!jQ*&){+oL{LjAvT;Fa-Uk4G$9`j`sMJmyj0T`^eCr>b-HA<7l-hO2Gsb8h2)%i!w zHUlE%!JXK*Ck+_!DXivs!GAx3(*2&N^H;d-_Qdi;p(KLn$#49*1`n4M33v~-)7 z!_|94Z{sK}BIf6mp>AbU@}NzKo8^=b!w6me-ltEb$_{>h%odtLzuv(F=r=Ri62d$| zERJbt(Vl&+BqCi6t=%#d_Q4ACqaRFtn3?Ymd9i0Znk9M2tlSE}apAv8vgbK)EEn@OYfgb%V(E$IbEFv!Jg>cdWlg%->=KQQbg zD(8gOr71()8Zo22r9h&@JzS7&IsOe+$++{w9Nfo~@rlMWlVj+K2GHM=hb^+2lBPyF zvjphrWNVuL0C<-@Cbx;!JObquHX@d>hx0z3$=|ytMR)}xm*9ysF zEA&{)mF$9j2I>;?IGHt8sW2w0_4t={Hn}JlsyE=#qn{u?QM~XvoUwXO-(fZ_vkf0b z`;@psv*Nk~M22t}EWHIF*H%UJmG6(gziX9)kHaYfmm!$fp!xmtcU6;~R1vb^Ho>W~ zyy4;+W5IuP>_fhkgtKiGLEzu4_a{s1EU^IsM#8JqL`tHF7X;_0-1aTSlV0oDY=9!# z$Z%)KQO41GN%e1*zl!`TQ&;o+s;0ckw@r3&3nKiqNbcd3iRzw;_8^1Kun9RK$^wD+UPY`E$@rg^| zYkKQva`1{EJ5U^=X0)3ExcJ$tru{Po`GoI6(gTTaDq2iYqm0kj?qbeLZ5|l#3pXLu zLDsGdsy!U?6RFp#n5CJaPZL#r--1|RsEx^fq^Ev7aiI(mCmCEuyo3AJFKLUpam+k*_MwhY>q!P*qWdB-V@%XVkQjdC}ZCy zn|1JzE>ZWxCF*ZN+PGk@;AEpouCPscUJ|y+ufGOMXbf1_bj*-W+^6$tF0xu=#u||- zN@k`lzM|cw4RPhefO58$9?_#W-`AY^cq za{D-QCd;u`Ln(j!tUf-F%kgR^>`8b%(dVCG#4ByOxDp?Z?oO?3Rwj{zC`^c?z|8Ana3=ARs<75IuDns?ETfxS>LY&fP_ey_3eRGoP^<^If zhXvXP4qRA0!7x-i->u7)Gy3L2!@Y)h4Y+syBQnG?hOLq4Ev>w|lX4V|Doe7PIvgK> zIGJVFJYs)-{^xXH-%1z@{CW8*$~De5dUlTM>~vS)61;VQ*0mwSU=wXKIb(s>e?-KA z4>Z=UUmcu1efAB%5nmgVq=m8};hep(p6RGy4QTjvyw#7^Wn=T|E0=x_ z6d_d-c%ds?nEDWD5H*0w>y<)G7nJ@w?hC3a3i1gG{HpLO6~VaTgcJHGh>_p}5-e4B zMg^R{e0VENG&Q5@>CogqnJ|tPTF8JGdS9jCA~W91KyMr33^2jK@I)3MYF2>-NBz2e zmF`!r`uM5eC2uXJpWk>nn0K4Adu6Nb1qKvmjb)7dS(gM#)l^6ANLo()a)>$zOagv{ zM2)s9o4;I`f$A2|mrp9_z~I2qK!KnL0$*sCLZpQHF-6(796Sa1MVTN<5xgX_!?nQl zf`D3=o8Go|Plv_RwSVk=y3lDM`IvDFtUjhs)37J#I zNH&U{$5NTC*RsBfQZXnlK;xQ=Fb#CM(ADT2_~d#`k=TUJ^8QCOZ34jdLw?VgI&^szk)^^$NV;&}y@quR~y{MD1! z4!WiqzNZkJ0O4mRq_TD1npD8;tfGj+Bq=})E8ID0@v6$v{TpL$TCIM=Ooitv@v@_e zsSnKLebXsiOPfM@ox)x9kbV%B+xhphI`b2e!^k+!_$n{(BdE(a5bT5S(ZlduW_|5t1mWk?@JTx&a>r}66@#Sq;S&zU{}QZT*54uWV+ z_LS^|3v6;=cZ}tu>mKQCz4WdGav$;Zfs9jxhj@kA{90QZhpr*p-lgA3AQqfdqcRhI z8I%&R4eV6h*d-37n`8p(>SCc0Qxv=5uQ@qsj7@10$zX7wS(e@Zb3vOzSR6!Ry@BMh z*2@WJ*Ed?KO#N1*XLLikmunyeqzGYb5e13JbeXSsTl#@MJL2rm!bKM|{~MKDxRtu$ zpR%oD81y_5_r78`bv7;IrZzvhuX!-Zt&gKN&lY=)Jk0x}@!JfqI+5R+Sl9D0EvhwM zGS*%o`ozLaI71|r3J5&>k4Ufn`UyhmJre`v=a1e}qqKO_aTUgwAFzb_yV?2BHtpv0 z+r?HJ$rDXD7m@r(riwfli3j!U!dO=Vap?`yn$e5(NWjRIEukkWjo6TJ6G_yqw!{UeD6xSzX7TJ| z1;CWDDR0w8j-^5bxH0*>PX-fT2Q=YD`7jUH!z-iF(CWkb-o{!WFm>KN=Qm2Yfg!?dn87my9o6iFj(}FlE!-C1xQP z&$@m61eVt$>K@1s=_*>#z5U{V^l?tgnSBS~m!`tgdUPTv!b3~(5@WUR>9}oM+hV0+ z{gON&G*A%6fHR75Vs7^hOo%XwuDFA~Sv3eUyYfICMdQ^<@t|XTli?~JL zzM(hXce>Oyd|^As1K^B8@p%VD{FFez13+t`u3&ZS9n-mJ}j}K;~%19Dq@zfsY zbdyS&d^0(uw)G!|G`Ba1Eh$#Jg64tPGHnddC!wMKu(6W|O#)Ob>viSGhw4Ocjj;Vm zXwiGrlCUP*6~uyDHL!1)Q0hs@qE77b(btqPYwYt^!R?}QAHY!lFDoBAWI;JjbguIFvP(KO^|EXi#*EP&5oRG*^V1hHp?yn8E;#KIX$13uo zC2r}rmi{T{H<)-GCO~_dYj9H}1vGPFSDZ3TbdVm2m$SVH%$p;e=-Z&mX+nA_3}9*Y zzpXx{WU!3>>8>8&mIbyiGlju>bMD5Qx5{`qhbjg$!81Wn#JtQP;LKarQ=2(p@tbpA zE;~j@w8`;Cucz?$pK&TSVt|Mr9|Bnbw*GuGNtr}*%o@`hO3GAIpVg8iPR#!Ui+gZP zHP?JTHq%DxuNn@au^3Pe>yCbOLg{^C#kj6VzL!IR<-Z88%@n9_s%(JSMXPa@7-;O8 zvi>^>@!NQ?`E`@AK`H}O0G~TL@5M=DiM!5&gg!Q^ad&=TxdKKLP@&f5O7>@x|apT{*R zk7`qsy!Rz7}gYp5)aTiG(!Xe#X0oKH79JD&b&PKdqjWDGbMu8Um zOt3Bywr+nM7Rpe3-?Dh!AYx9rEClYBLb5pP&vfK8ngs_^OS#bUvcSKvmkdP0t_9_kK**^uBJcix0HEc}cEzWu2^jsqM zPmA3<+(UT^;xZka5oU3w#;X1DyOTvyn}bh!nb$I49iv(-Z^K^4FFs@Tcs4o;!HHY| zpC!~N`WF&<1Xy(MpGHdMMzZK+YB7vV8+m!X$rZ&WFAz}thu0U#eU+gpI)j6z$D z>`k05E~`FW=-->;3LV=3pOd(HI0sPT_`n8kiMx=PRgCE>j<1FRR9lyLt%hfw+^ny| zdX3yzPu#uaZYJ4siw}+fneFBo{*+o!W;}n2nTbNJM*Jo7E3O*<;@*`bJl_I_qGCqx zk2;o)Wb#FYzdc=Rdq2@eg15lwp~%P-pL;)#WKbNvm-cq1jc;kk-4SeP(f4TSLvoKd zzsI8kU#f!$R2&K5$x#M_S#m@ctozGg?9jbYD!d*B9G{`1GH4Y2snPfW^rUlC7nk7p zdue`d-=I%w{?^>-S}V!92!su$XqMC0!)P#a#O;pG`CK66>||l^tSid?T{_GV^gl2aas z^hX4b5!hz8AP2VB1}iYbzHcz=J#^}0m#5ks;yEc3H6b?6oYsL82=cAAw=E%4Epd1ywO_G{Q&ON73RXOMQ<@xdM0G?6t2g!fZ-mIP*8 z=$cjYdZToJq=1Br{vS=^lXK99!FNXGGWQCbGubXg-En!1@dbE9uJ7N;JiM-)&fl1}P@!5o-GH zyJ3@*VVdD85LgfCTp@1w<|vy`7zN&B=(8t;Mpv)>M?{O~P8zqtRxj>}l!rs7C^fdN zoYw+eyRW(-wjnF8@*{)pSIAs4RWj53_l8q#zIL_{wAQHsM=Plt3)HNk|ABgA3R}5O>zWvqZ zZMd69%tn4nJ0nz$R##{I<%`7(w}A-J9UmgB^&RQ@BPI*o$>k1V#y)Sjr=3>L#7 zx!q=7-BnhXFS*%nQwme{a!`l%1R9NGH&9PeQtg^|YxJ+ya(2Tbau~7x-U|#r?!5WA zvfcYinyI*8iCOn_}H<#?}Mof1CHM^Cqcc{u%>;2GQV@yfC z_mvB;z18ym2^fVye_k7Ek-4dD^mBJ-?IpT_{8@ua`MVyyeRK#SN;vnt(q^APe|d)Sdtz~ zL$x2zQS)yrj?z!Q-E~9I`s=yD9T5w&v!;m;E4P^{_B`{aW6f3Yn6z%!yHG2-FjNjt z0x6eUAAOfv@$OY$!e*4{x}WW=31o)AQr3h4b-RWwM4-tqd0jG1K~Yi-W?Ppc z*4B5K5FQ4O5ri@1)K%v5P?{K9-u%~#V}Z9KHmxjQ9rqXTeIl@AoJsGi|NWkia4=+r z8_C}R9|9{Wd@Uzo%{AQDCk$^XkQrFFlfaiS$Yzb8sM&AyyxwuOvF4B`e$3I^7Bhix zK|~@*q_bJgaefkcYEV|Ks@#+j`TnB&>W`PA3g@rO*G9is0gc}A&b2yHQU-1v@C<@{ zV0Yj{%mzhJo(a9dd_Pe6I0yK8u)r?yWClcxf8jYPJ^j4yvBljqHkyc5pq62t7aJd6 zcRrq<*#ES=hB~4A0a)_;$G|`+s9H9nR-V{=fakM4jaxf?BWCj4e!S+0#>}FpY>--l zqFK-E3e=r@r>&2C_v3T>*d!y^y_Ur;t9vt^tF%I+C9ek9@?6&APd66Ln3PA$TJB>W zjp2lrDq4jF<9rxK*^GOmFlI}VpM7(T%(=EFO8r4tHVjPJvwP~+w_YNZ7UR0Opwfc; zzXit;SF>~#DVefy6@GmOmq8x+^9f|DtM|Q&k26~JWtsX&l%g-qJ62@dFlfBO*ZSyQ z9%7>DR;y49v;Fh>g^%cuTw#-WSo+hWdMtpGfDNpXd*n4vOISugm zHfq8uh)LzC)!=8&u71zKhutp*Rq(OctYWwja2Nywz=JBt5frjYRdIx;g7Qhw3)R2r zE*eDQyLij(5Aug;^=wW*iC-6G#jYzmG}OlL-K)=(dmp}i55a~&rmkX)R~Kx0AhZb) zZOXW$Bk=g)<^rIPc-^jv3@2i_2_2JeW)IQ~HJ#Wzv2KV|knx=w$@+P}&}{M)C$(r> zS8qT^iQj6Zibqg+j&kCxna-H#7yfPN+;{tzf%ah_!pI$&t~_ZBUwxKc#1(>w6RgXJ z%&UCB4P!c?<@flj8jU`15kWirZ7@YZ{Y=INPc^#t#W8f3DwZHOUgl|=_=vxwgUDU@ zUo9t#*|Ot!Lrfq+UJqM=)6BP<9;F*oqLxVZDSK%Ot`p9~1r&FX#M#K4|9bnd{P=v> zDiQDut` z4mYJ3->nG<6<$~NoZN7*g{jMi7kmmO{xh*y* z$U{pfio)Ugp^vY4FDfGWg}hmpu1n7!0Qo^!M!g8_jLqDGehKaE+5p>oVa`yXjNZ5a z2M1O{eyW%jRr~dlQ&W4PY#D^b?5-4xSmYBXi@F)(>moX})?i*hi0>eFCtJ0vB&a~$ z=$Gri{F_|#VNj)2=hbs=;bGtKdL)IOd z@UsSKP2ZG9M*#Xyp!022OGJ;%jK&tf$<^#ubt?JWr~NLuae*g69i|97;iE_ZB0a2a zm6^Mo8Bf30vB)gHhPaQ{`MtEm(%PHrN#w8;|6_m-yPuOKB&Kdl2cWt^9Wqa(#bP+C zz}-|~v{Z5w96>zyITaI9-TGz_%3gzyJ<(QcBPR1^k4+!^N7Nn{09a)1yvM~iQS~+> zZbK};MwnDNglNqf3#{VL;6T;jM*v@{N00QW`JM?a8p0nqyLwMaFUaX!g)^=H zYlw+pdtX67c`+;qTpMXADDlL7(YPTfii34hNdxKz^>FYZ9p&@s@tky70|-P7Aa583 zLFvTGvMlY)M#~e$hFW!?{4ya7j!|WQzL6|j!7g^K?q0u!@pcpFu=u7H9YO>`_~ae= zhw0U@)tWyJ;fr0k*e^&~UM9GDYT+BRTn$IZVkg>5Nu|ihz!#0>RP->v?i0s0-E5_J zBY6&cz(7=|M7Ac%2_ENZND`d&y|;W;k-{zfUV81^_48OwfEA}0wEATlomt?oeS z2l8^{6=&X2Htm%mL(e5%TblqKPKV-u1r{T_=OLkU3@jJ_RpZzn6 zR3b>>0(tfsfL+w(h~-7}2bG+dA4ZU7tuf)m`Td{le=4%WN&t{_dHx^g)Sd21I9$y+4_@(mzih-#99 znkRS=<7t-xfkWJLo?{vp-Bk@zROj;vAUNUN7& zCy+Vn?`Gj@^RhV^=L4C>OO|$2?4%tpS}r!g3ZwepwQrXP_5#P)qyLCp_Qeu%C^+t7 zXCu*TSjr#K39OYApF;D#_+HPh^BnTRH!Z=PoH14sIN9}PF?N`MQR>Y3#Jl$H4Ojio zhVi`xig(f^KzX9?Tej0t4dkYjD==XB;D+@wAPL4yKzD z720`RHM(DH{i`TmR47%GME}{Yr9jUr*{<32QkwTCVf9#w za%Vc`au%c?d1B=6U}g(;4Gf@qHDD-ty3GV_>5t}CkFYoF7Z#l zsXpR@Z-Mgd>4+RW9fXVt0(3Cw``0qK`fe6TpR*_`X=S7D;S)1iqMJ>DO7y<@KezAF zq`gBojF^k4Gid1>mC8b?5AhF(Z=fV=+uanrT{-kEGqq7KWoG`?BuP?!A)kdM_OMxt zej$2FM6cZ)NQJqG@iaZP^wWuk`V}w8znOYh!X!P+&=_Z2jQ@=R9VfhF!aP1>?~7INilaCM29PV z`FB!f)b)Y*#|pYhJo}%8MbrG>FR5}EyD7=FNesbQ+TY|at(o`QFGelN7Nz?Ji=1c85ABwgXdQ+EU^nm_O>TA_Nw`C(za-> zErU9e7WU5Hn-oemOSj|Y{$lwAo8o#x%iLv4Ho!Up|6-iB2SFrw8Q?9o5axM89X;O_7(Q7-c z#LMwLt=EwB822$p*i^&(*Q3OvnF^7)FxF5?U1&Tmuh2>hyCC;zG^i z@3hvX)bZe_gG=zwX04Xi-nGY`ZWw>li8}mx5>=2V3sqsP*5*LIqBvUrKURz#46fa0w zYreWeNhLik2e~VC<)l8L6m)Kh3-!Uwu6xI<`S}QMSUoyccL$&qV3db+<5XDTkycE& z#lrT$;k7twrTLKeW+v_ILCcRbaHD#ynL4cz`U$tzKDDEV0}8OJ>y%N~I)wHyN7_D; z^VzSq@oJ)jk?jk(1Gx^+Vf4vg_ls?k^KV5}_T7g@lH49i3O034AQ{k+?54r%b`UWW z@{3%t|Gcts%ca>>Y8|MCGX}r~r2tTiIay;GJ)KYLUm|l1R8ua{w)=uQn@v3}wRLRh zn?KxP8V-!M1#j_U;50XX0!epN6ee}h({0#`=GMzLKY@6R~vQ zh%vG+FfMJ$J1b(T+XHgPR(iT~@E0=QUyJe$amG^n+hJVezUEL?`^9|b8_?GzvTI#O z5}MbSU7yvs(%}Mr1%$fogT?R~ROiPgE3kL3h=@mep_BdsB#R%7r6#>g^dH`uNX*yG z?U*p>`pu*8h}8nl$VG1tFC46dGVR$@7(e_ErMhSoScF zLHyR5Fn?5x4M$3XUon|tU(|d4^gsg*t-T@di9E8mtn?m9PN4ur7Avtlbo*N)$x4Od zciUaFjTyHeN7Gt4>S`VtL&hsSCmT_>L%Lnkmljy4nGdb!dk?KkZt)af(5NKza?&AjafLmuQd~mC zYOK|Z^!Q@qI;Ke{RV!sK$1mOMAs-_$0a?}6YSpf@h^J6)t1=Qf{UuK_lGeU5x`Ty%J^S}KJ}JoZ8lyBUK?csauO`9 zdb(juc7~zxXR=X&Lh~z&E%khNsez8((pQfKetpj#jcSwhr<$%gqU-i__z_{v_INu3 zo6C&jG{V?bdzJn$)8thT5{-BP>TRnBkULRftLgO9IQ7K zSJ}X;?{HInAdRL31?NboWa?L1%D_-}-# zTbT1K&E`~DRzkdyC*VWTn(J1&OCOOOqU37qz@lY z+1}L}`IFzcK(vwbnC_KAXhLU>guDPR+ggLzLp%)Q8t$*g5nR0YMx;t+HGRK+@~FI) zOscIm(Mn!@Q_v$(r?CF9RE&1*RPxnH#^vBS66c6&-a#vw22n0w#%sLkI2{gPy5H*# z&B9e;uJv%~$Wl1@O3~1c83Vg&aF3(f@n1(N|NEfMxO=8a&OCZ&K{6!16VgNFE_nUdtCi}$7`SocZVHSZq1vn-ad1dZC_!C&`fn0 zaQl8|brW2A9lN`^U3t%-49h-OIX0e6Q2Fgu_+-m7m};UD(_8 zU-YM(c-oQuuBOq1Cn_8`_`OMs;Gdpiu?+o$+MdAqAktdUS58*xT*}pXsavie#Bb-{ zZjKj@#(~#aS8u}>y1Hc~Q=oqNA7kV3v*}ip;oJpd&=vJKmIhMHvOjVp4Giiidc);9%ls| z?M}(aw}(b+O-dn`=(jC8OwJFGvfTPW+YEHus(9WKFmdPBu6|L1mckwp0=Vs>U!iQi zD{VZ$4`XD36EudCBJhdk?>$+th}pWM`l!TkUh95zaLE8)H&+aBg=F#=k<(wzj+>E06x4~w$kYAY z%|Ll2Q2dXhvkq(Of8RKW0)li%j}&R?W+F;TNcTiKq(O2tNSCCP(h`$SX&9h{r1WU% zO=_^Q&+mMH|8iZxb+&WP^SqzuzF#-6yzZ%czX>b@p6lV^k^V6WD|g{s?uR1Z;osC~ zV3v~wNUMI_rF=ElBcZ}dtdx3udyzD0SgBz6VL5qXFec6KK&LVCg=Ponnao#t`p8*} z>;x5s61)AlKjRcj!MJ1;843y!w zml`*rH5H!pR8Ku9|N8G6e2lnnqiT95aJ2CUbESeRPC20&3445D z`rfVl=495LPBe-Q$wZ~&+d?5AV`hXi>}+b)50HLw_Wml zBO*k4Q#W?kFHa&CO>y9wF zrQ<(XEiYPrGc1|utJ;a44$_(}UR5M|y*HE0IqKr{zr2 z1^J`v|Lp4PNP($v(i87}P_9GbOG-AGCIv&GZ79n;M@?3`Xcv*G7w#TBqet zWtw!Lubw?gSZy<(GE2VUqB?`xxp+qUcoFq^u$L7_ zbxeAn=6-96vV zV?yBK9KMia;EXz?U$kTqYjuXG@Ck^|DhRep?Rq3dXvkV#NnZ30fSCRSWWG!pU8~Wh zew%bjhk{26pLQ#k!S7j7h~tSU>~&2=2K6Q=aG>Y~T#xQXPm0;E^$cYxKo)di7 z!rv>Zbo%M(@*`_tb%h6hJtwJC=_Q|P?HYgT!5SCKft0M{=z{Q0A{gp4ci!HK1D7Y~-LP-%w7_lxz6+UV2ZchNq zb40Pp!g|8&rsAa^E8%~ik^DSTn8E(2Y$xF#C^@X@!n_K%61yr`JtZ*Vv(a=toES!m z-pA4XIQgBT2i4o_Ec;aYh5yLs6uoQqJYdDUDB2*xV7jPf7m0=B47470#xxX3$*`G? z(QOsC#9?WDMnI&^uw!s62*3MP?Yfn8&Rt11HG>UN4QtKByONv4p14fhA3%bQmB$#3 zZE6Xgqz9!$&#EO{ayW>vhxMkAg$!bB1J2<3=+al{LcbErVXtRo0d_H;sufd0j}`za zz={c~IN}xjPv4MDcmMJaVgc@?^*z||or={00LWMEDrPUNR4_?_rFN?zV6=cAe`UJS zECb&WfuDa3j#)?#Xgv)U7)zw}$#VH8g61>Md5;i@5vn`L-q9csW+eLI*KhSN;^J{Q ztoe-cfIx4ik}S8s?`&8%FyuPPcZmxumi9kND3EE%_#VoRzQ@pw;7L(zBwO7v`4JF1 zz5`KF%20Mt*#BY3Xfm@;>PJ|O(6$r=bcFWl|2!34+rQ7%x*L;el-V#?rnuw`u}T}y zH#?P>8ji}!uq?`#DIUc8=8^Ixolp1*m`44WyKKL7y_fyvr!QM>Z6fo5@2)KeBd`0z zt+Lso)8$_;8~z1M7|**2*I9E1r2gv5o^p8?3@QL4Ep(T3Foxgbw&~-(c*ZAoaOp1e z@>e8SXpFkm5OT@hnq0(VG|=?$ZK8oXt-c(+czO#H5(;kCir(UlWs6Pi>jB+_&dQk6 z<9x~3&3W1@#!^NK*El2NBOe)gP52z(GYDHI7{qZSuc|SkSc5dJ(xaTM(m1Pb z$1i(7RB)_ZNO@dT<9~P?yRb=Jd~}x{Qaa(dTe)HqB|#&wt7}Ph05SRxZ*b7h;|6!9 z?QP?A`Rw~{LTBhLx$j)z)L}T~&-O=m;4J0ZA$Q*ngmS@<*Nq7?QebNyKFrE=XmLk) zI;E^|nshf%#R5 zV7(+4syV;a;EppFez0wr%$zn~@MJw_edU=0A{BxuylxXee3ZbPrWVtBB|K!DkBIE(tClo^3!+-;c{XwAF`j**G1D=IIzhCGVkKCbjB* zTi)~1k*A|9HfZje?fXQ)TP>;kYI-?%jE9SOX&Xp3IU_P}e4O4dTl1JmyY;myLd;Xu z)k*B5AZN-5xS^DFd`V%}Z_QjQ7|it(&VVq~cabxcw*Ii3|ng&otRI-g5)Z_c^Qq<^(3PurH0}+3VPghFCq5f*9maEf|0FTgR0Jx zcGAluNFk&p3`_4b$i{#wSmqr#3Tr9vjPoFqYZ}M`DXCy&6aRfui1`gu0vOg3I)8PSH(Mlha*?S0aoyWtqy=q%x7Y1$2kl6*j?W0dgiT>yeIJ}!acZN zt3c~Ju{F7QFpai-CE5|AGjM#5c>Dq!;~%X0*-_ry6hnY0K4E^6x5ZGFfeCRVgu2gP z##Wxpv-~C;YmWi!DHE2lGWSK?qIP z_b9!L_0z3YIE%GEGdDh5@f&~pc#bHa*&iVF0p0849E_5-YHv)F(EaxMO=g8O)82DS zTG`vVMd~phaZC9OyLZ2I>r~YkSi2VEc>R6rC@1kppxq`t&`I$3RA4ma)O*hg4Ik`W zH9m7!SGQA%g+1V}EGmw5z0krvjDoe~45S`+xt7F~IlLn?lJSwsqqEyejp}&{WUYh^ zfaq2Oz#~DnD{Fm!#`Njn*Ew@0Qp#^K0H1+(Sih*P#-uz<5k!om3Fz7;N_N9CB2L7#KG)>ENN2KB?bs>S zyaPU<$lws^R0L69GHA1Icql`|(XEEVXCxRA>dG-u+KiMg+=S37efpO{)y{AYvqWa+ zDOGQMe*a2`R!QRmCXCw4@^jvzMX^R++5lcc07yIFLH8?v$ZPH8i&m2JJ_TbRPQlyj z=OhxrHW^+FAoH%3fRhn{T%<{oZ(^aafU|p+)$>VH<8jy4MVdhRxDPU;^^8w^U*(Hr zgT!s8@fT9pM5#tgi~f|{^U4TvtW%`nRfPNKT|M1X;zTR4x@u-lFlKODJ6wv`+!_K5$lVlDsu_SMJZ-|ix;zy!SVksR}}0! zRALB_ez7QVWMzzcJFIr0NPbjr1la;$<<(!p-!O?xHASDi^!K)OD*Gd2|M634cr9R92PKY=L6mz`q_| z193jojIGjjfq=}92IAav|By1AHG>^$Grc&oPKSJPV6eIY8BiJBX@{#=i(rDVqH!Y| z4_bU1na|xlrS|WW&mZZ13jOLfQSlWD4eJ*NJKb3f;uI1GrLW~VioC@8;_(a zt5qH#Q+?e^zD(6YX;d8RvD6DE-LLHTINE-2w*%*13<1zaqwh*DV98)oA$PVD?V)Gk zOA7n68ZtV+XgU=~bF&Rt2IYgP);oR}njUnbx-;lV{%{+^KH9X0OZIh>cJD&7qSu+{ z{Knp_-;hwj6&6rb4%=qw3ubkhPF#Fu=>c&DbM<%t$?`;&m49ye3CxjynbG#&e5Yf9 zkzael;t;+&-oKxCnit=K1Tr>Mz{5;U0p5RIJ`j&^wr8e{^a_I!-;0S9WW!*o2~3ZI z$(EG73~?fZN03@Dt;B@*FqFQ%xh0o~5~)@f;s>~a!J zO+N6@7@qYCeQjTsI_#R+_#%$vaX3*!)&GvVifo<5iMg9id--s0vhwrt@@ZIoGGZ zKYKD}&`(&>d2i&!-&MYygqEsR)0OX1Z6uyRZ6BGtjQz*>#%PS?UeH4NpRyUAIUKv0 zlYfSmpC_G>26s^w>g)u_8GRs*l)LiQy76DAHm=^(=o3Oz;O$z~! zC&v2eol>Ixd%d(c{tz{kNDB6iU$v|G=U-<6pnM%7u=#JNH@8N}QXEO!R%&uD?J<_4 zY$|-c%z_;oTt3)Mv*`xOJi)b)U3iHj-&}`u?-rGkp3v? z@%XM(H~0I? z^+P1730iU!%%}_tsCPc4XH*F-QjZewS+7hV_ivw_RT(Rqxgz9Kto^u_$j1WB`U`4; zobaOLuWZoOWmpq5WGnFEL|3cRjLWHvzwxO;DgO1R6kWlFQgmqrv@Q4P_@3*!QbwAVx%cyEv0OF4f~Tn&8IhD7Ag|G2Gb z>I-Qv2shmh7r4>B%LF7>3B65~L%|+wfIq>l)8PwVU%DJsQM4~-@7u7q8!Ml;Do?U` zW^h6Q)gqNKFLJjT^hNJh^Os{}wXHGa-q(UqcidVheRXMwYF9@h5X~F-Ymlo#16C}e zU=ZeG=X>xgB@ga{J-P&9+d4232U6A`54wK0kL5bd?y@7U{|*-i2SUBIo+Z4oyxx|a zH*()uVs1^@9ZW%Ey1|9NyZU8PkxWK&_H{zqp4urm66{zUSIms1VI8Tl*%MmDtF~uV zKeEP0E#e*$ATV`iFa^o7FAi>L&zVyXIKAj_TKnW0^9^~o13izC2|XG0p3Z{{74Esg5x8$RI>whmZx4_%1njHDtHY1i|V zp||V|6YoMG=!f6XaIFkdGEMy<&k{$;DJ&h@Ya1L#zc*ztQ;{K494qifSnHuI#9%>E zHz~+%@>aOC!tGPRqvC;996fqgXC8L{d7#K+6WgND{or|>&%$wt(JSiA~u3diInVM^YEySD2+JZP`2 zI#nD(v9XCSjjFvU57yrEc$^!ln7GZebsuHv_ceW%OefjDIKMD!YBY}M%Y>vZmkYu+ z-`o9u@*jfbjtLFf`*>iMIl1~`R$MVp_cl9>g`D4El5j@ux}5!(oxEgUVgV5VOM^18aL=(SPgT+$O{?>d&1-f>IQ} zit|`Hhqy9Ye)bJ%qzQlJ=k24WT*r8I%@&@a$k%3!TsI$kFGr~Mf?0A5%ZTpZ^hj1B zZbhZWicqWXGGs`G_q(1SxxCS4kK+6@D2HlY6Glc(bB_htzsqWtJ!f*h^PXS zWOO7mU|G)3&Z!G@q|xW|ag&o;A*%iVRv0ym(!iWZOH_lZvX(<2mU9l-7(2-=#3Wbxasp(w8To z=XKknHnWnS>;J&T0@HWRbX3Uqs-2b{8eisIyZ|G+55*1ELDLM641^uhJHDX?7LH|cb%4~QLkUyXK2shRH6@ySCT_+ihmPA!jVl7Rqd@U3PA4ZPrGhIfnGv(Aiz7m2 z>ke2C^lwC11dg~J1xfB!d2d5fZJNLL_VWe{em?dOdLO8_N@&nmmqn9IbfiF>qr-oA zYPq6Uu%uo@!$zC#kjFB?w$McRej-=g;IiEWTWY1b=JS_hH-8*+z?GWVYZNH?RG8P6 zbv4N+O)G=U?@c9bM;_T)as3qVm($VW6#~)HioxNpG%CXSt)bM`4cUr@LuGz0Sl2ET zVYP200L0rbIYG}G_OwrB?X|7{W$OG?4Qq59J?p>_Z0n*0|HphIxXY>e`Qhp0VzmsPuSLZJ6rOmJU5f0t#P5^I9tyHNt@|CHC-?&d?!uSxaPatC$pLlNHiH;c!+q)v^SdD6-x)%ugFN7n$g5@YhJeg9 zW#Sa4rH+FHU<(tO5Y{hu0W`=e=%~R~)Sz7CNrSBW<%+-yW7+c1sS(IBWaJj|QP53d zLioT`E%p=ss0@tRUKs0%2Qvm#7mRH{+ABEf3=%rWdtG7&Y-_UJxES4|$^)}@cJ+^! zL3k6Zc&j_J6kLjd2l|vmc+e-g^7yO&1n3Q%A2vs2^<9is3)wl@X$*!g+_!Ccwi^c) zRy?R0WVB5Qld~ZCd-p$^tRr-;4LIDC7bZ`3zVv!(oHVNQAk==lcx79;GY-S6+ojt= z)#jNgd#4ShL0fF`CUt|@+ovU0KEIQ_CoNOI(g!3VCuNQpoAhuk>|}_`-8709MK9Es z@pOvrWL<;5c>KAtGT3_Zsfc62@to>Ld;VnoDRLx{E{ga)ClN?=SzuoT&HEvxdCeB_ zBW5A5T21(UR-5Y56x~^VY9iSQZ0UyEj?y)yv`Ki3YpUC<%xuMUlIIlxBS^%2Q&6Jj zy`~D;FOD2p-JApK^zEr9x5K5W_*Ve#sw0#WN5)FDA~>JxDJDlkx6FRhL+{MSHv5I> zXKJLhyLHwj?;x4^!1>_!m1XtbP6iSHr<8xTrBqm%@%EG2dFrPRdp{tE55vJ-R+@ws z1#k|$ScSI>lD59iIvOCOr`(>QeG{@eGJO*!4INrtrl)%M8rf|U3WF(ov%D3_BM{r% zL9=D63;Xx?oh!Z-M5NX`|LS;=d!+47L{a11u>O;M?45YptB@Rw9ok15=@1pTm25pD zPDvK$mGl8t)e?ACi z-S9&GXxYjs7~DozwoUHZP*XYz6$QkV8cI~q*$vv`p2q>Ie0#s^{xnW;^Sp*f6i^L^ zTmmmP5oA%JBho~_cjjWj>bC>cE@JWELrfT=5Y(^%3^ZV9AK>e6`B#uwR%Erb-Fgho z`h{EWZJ>zhk-&2Pm{tH+*7^|ZX6Mz{DsdLdC4Mib|Bao)63p9sFs(W%J#9`tuIJ$x zYS?GNT)^s;oLn>Jx-+ z{4=No^e2Q$i8kbM`+@|k%a(8CN-h`U$wJqGcjt0mXy$hBQq(r&BMvX%Vue0KhD{Gf z?};}laA|;mjJSf>0;JYYmxeO5D7k?=W)#s#kL=toN*?aVQtijG+JyAU$LQi7mSS`# z9JlTx!#?h2woZvAI}51lK7W13JL4t{Q;tQ%tZIz#0|=Tjrlbt247hV&9faJ)m}$4n z#8{tCP-d?p*p&Vt!;IJnEbb8TBSm$8u*u9t$xi?K#|(=-$@C|Afcj92?&sb!xgsAc zwr{qNCupUEV^75o%zS<-cz3_+`>r5BlBX2SriE(Syo&StvJSW9ugw@5k1(iKd=LWQ zEl9YAi!#GDjIPdxg-9G{7c4@!Vjr!~?-ws=1IKadmP(@mDX~v}O8US=G%6A7Dqkr2 zUii|Z88KS%$`ldxY=&mepa^HbBzyDEL zVR!alGLby)f}b4wXJN|PAx-Xo@TXp1?q#)-M>cLMPd)IK{p+KiM3pGwaQgIl*_m6U zx2P7a%g(oO{6o>#^jq@w#C~uEJu2M4Tc2U@IR5t%7HM-M!EkXlZ#;$)?%Y2^(WmTq zgcPhY)gfQgv+e^w=FBB+q#oSmWl8unVsvjm{l;kR-l7ft{Gh$q(3OHw?y+zqgGCX= zo1^@C|D-A?p5meUvSaA@FLGrD1^oQ-cB|~I8CW4HsUsKqJDCPsYPHz8GcPYDE+Jw4 z_KN*^muFo;I2^QJZuNcPGFYEY;6OYn^sw=6i}$yrVd|5C*FndU=E45)38T@$R4K0C zW^sZ6E84nTm>05oNwoP7+!f`F>vOh5VwFVBxzOR62eVl>?#GDrGECJzs?PQSPbqI+ z>13YFyT_H#IZSX)Yp#MV`?hTTZV&my)`Oy>&Al=|CocV8)Qa`r6xCd|U>8Rx{JX~< zb?bZv@>w|yHM^gTzrL3W1-|_IcX#y)1{lSF^~E={e(_2p|EfA7G`LtyPQ&`9QrT$k zN})`{p|qp_blt?6pK*&4RKfNv2735@AScUN*9unr3)=p{e)Iy5dfSs92wf`k1Dd0k z7FuHQ^k#B)+jM{5d=jPH^;lQc{?`O}w#G0-&}sjT7DCu|DhZn8-0-*Li#lhocQM#T zB91th8}q3v`Ma{ya3>sx z6Ue~Wzt;m;5F?YFJK=FX(rbTjKMAoda`P!h!0FCwBCb>K7s=K>$;e9E_OBoPLyd3z zSAM3-7C?a(ZI=t$m>y56%T%NpZi8b~nC6VR!^gel6HM0z@wXHq3tAdC5^`TWVKz`MOxAEsKGLkzUvpUKg)ln8>@3|-S z_0}lT!)KOxTmE*yZmtLt(0M26Ea-uFODwM)T*gMHt@V0Pvur;qVOxniM5Hz1-8>gx zhsEAOrlt_1Vk!-)*?g1q11UA#wJ*%z3`k?h}_q@!X zjzM1q!(x1ibHqlH=b#r;3=LVw?ab%yR}}GFXI9i}brOlL?*i4IE6fkGCs!uaRjiVB zJ*`Mh9wi=r{ZH*)U?U5jn1-T2%&C7}d^ky0Qeg+t4>5tN-P~asYOgWEhEteFi`EzC zwR|9`ba- zk2ckK7PSi(uqp5+-iPTf^u(g)5qG>poqij!mkjzZ^BCOhCza`LS|0a7m~a!2=H#iA zZqNtC+9&%VT9gAtK2cVhvoX;Z;I|h0EZ?`wpG;$AMo$%A1r{G4p{BIZ)rc-xkuxeu z$&W%q?$1}%%7d&C@Hj?P%Jpz=}$=!2rj#pQ4uB<_p#s9DC1B;*og^-6WxYrZSV=`U;bV3gq{>$7ew7$T|MkN z*MADFg2Ev!td*%DqAsdcOxmTMYuC!Ka)*Mo1F%a|kI3HJ%zUiO-Nznz*Y*30*^^er zZz$~_#k4D2z!o7(XIrl}l6ycERmQ4>9L+eoB>P`2tQQT3w@>n%f3=AbzAwmJ{*oQQ zQCO1tNZM!EJ3S!NNVZjd$+Yd4dU@xIlXk!#G=K( zzlbdLzrQWZOSH?7q2+H1Ddj$$VxRZHOINKYm8Ue*RZb&!=1h4_k^9e6X0L$LfPut& z2~*i3tHsjf+t2t-T)X<7FfD?fAX3f6djup{_I-+Nu7SGmTg2?Qf6g4ra<4@vO;~df2%mR|22z!Ah$2b{C+cy;ERSg=i5oX zz||ULNfl77C@`@^b8}cfQ$H!z3=)uwETbe0pnJ0#s`jFn-SYh-uY?w+1_H&~?Gifj zY2r+F%5KDEHNtay?>@nq1%0mA<~0F&PWq|$Amb%xt-5LCw^j07Ru{f-0_Jcm|JNt= z8kMfgr%}(^fBTu8*nx&=V>Gaxi(1mj%@BqI6)lu%TKXYgNPc6-&tg^U;$&~4d#}jV zc=3uZM%L@q5&ONG>tgxFL8Y?8QsxU1T0iLe(&Ao-WqcXWZ`%Yh9s)qf1&nq<{S2vX zR6k+Gbcl+>TigJOt2_I3Zx=uQ)AjYp>VVYeIVyJf%n?G(y%ORgFT??pJL!Hr*^}C- z#Z)x+K9*LP?*aoR2}m?w`xh-BSP(T^4y`$0A#r29NJwvt8UXdMC%d3`%U&*w&wheqIqfIdNP)H>%`EG;JU@5=umv#EI*i&Z z#1RRPgL0d_SZT(@#_y0~h2>a+!^3DGIlmf{c74vFmWw%0HEEjT7+)Q#_P^2E*;g>2 zac16ORBYs%WNZvzP?VpxeEY0l5q-!nOaO7mz6HH-PMRo8@Q-0l2A&j8@>fJe#Jb

8{2J#Z#iaAl1dl7a}X9DhYQ0NppjB{UjuIwmjRC2y&WqYqI+_`+n zS{EfS1R(*S2_ess!{;i26>EaO#R?pW(n3=;C)K?|8Sl)U% z%py9aFO%{F)daDzXhcVr14e3@`FEjsf z_euFCZ%lp=KXkqVB3W_1b$f2rDg68`#SgEbmg?V1+{#-7LB`x^^sEi}Czp(0+B#R1 zO5RAb2zXh-hpd#~`VA9JsiHKotYqfed36+JP*&V@kma(1)MWCLX*$awld`|p83A7( zba{>ugZXXiLWGcDILi4$>ELb4A4l=<@Zum$O&^RI{cchn$h9EI->+d7|4P>MPUdTb z7j+ELwBhDQ#UOIoss!8cfO3No@%)iR37(Bn2ah4904|R6^!%8tCC#{*WfNlOPjQM3Nj)AG0{&;Vx z|9xr6L_je-mHg29DO%g_+h1ACJM*SKMRmGGHQ$fHMRc1#-}`80?acZG3fXBqYzx>G zh=N!_e{RsV^gc#wOB6F&s(O@+$E)lIrJ7?OqW_2VvPNGbXW>C@5q?$M8qJs`4&vXG zefQKZV6Jz`U_{fpc>Y0%bm+U(5Aq@9L3>@_HfJ?|X&MKCx|8txErSqD*nj^!Iw;T~ z;qZ+{>%y<-nm28UW{Kt+%`t9)$^8@&iwu&GEdY5$bk)&{mJi(0jWvJAbL?Nfne8g^ z*^#KQmRJ%!k5R>KK$?3`9-xAbOz+AN?5a9~{_xq%C&%yG3@}$e5j=?T57S zZ$a3m`j88Olz3YeOyLeNGdn>ZhB%`?xK=rR_&vc@V#}ZqWnIqH-zdvnj8C|wC}4aV zQN!|TKk_|S?~hsBc$tJ<5==(`Ff+$^5#15c;BM#+_w{DM&Bi$Whj|I#c{BL0i#LmJ zE+LbZg%F*A%b9C}~De4EQ?s?e1Lh_M;ZxM))rLnzF60fecV)Oz>xiMQAy#1o3 zEaZW#iii4*&&Ko~GTbLu>n)cMwt9ItXTP^;_=w{3eH>wiaBi}vj(8@(p4!WJo={|0 zV&`?vCVT3oW0drLi9crQ*JRwi+UC_53J(3R_J4zHxqU9msXJ^t2uW>=?wtEZ9=csH zteaF}gAGd4I14((uNheH#HVL95%aP2C$aALsX1N-4?YBzg1KiS<_ zKo#QQ^1v|WD0p_>SOrt^c>0!H@y7%6M)Ad5-?y}un&MbBaD_{708+_el8EP(6!G%g zsw5SHaD}GSk&oQ)(G;)f4b9WcmTwbY7_S&4US z<*7E!s>V)AP1^d`xo}%O^}}DJ$xElYnnmwtTK}~9ZhKXYv7ZWL$g!1P;9BZtMMFpH zncsyUmpzztEkm+Zmanb?$E$oJ4Bc-|*){Q|V2DMll~2%eIhWBL&;paM7I6F{`xSUKPk|Co~O1@YCv`GhCsWOTzy_Rh%xe7{Hl zd=+(2(g^)hHU11m>p_ZHM1%k`ee`En9VA?EDu%f#{qh{_!3ot_6LEv4c+Zhyj2<{&lAyT%sU-n~%a34XwN-&G>!^m!cxv9L~_; z=&Q}W5SP;Utv9zOY5%+;bJdf`KsW6~za4M8KJ4}aj+fjW9+U;zw#@02>?vg!vEUgI zEdU+7h+!w>S7v$3k`N|v&OQnn3LD8cjf(1r>$#|MVH2AHDMCP`+c(wif|-1V7x^i0T3bc{Jx)gUT;HveuIc!kM9m*gBrV3Ai! z6sRY4B!{;e^ovvyOAZ9#f0X`UkN7x?s%|kuK?#;?V;;}v~e{R3#e50>~YJHV}kEfENeSBM(w?G#ZOVsBugn1uWj1wj{~vXzwD zenhJ$WS%;ZcrEm*D&0eeyM~cnV6r}mnnt#&db;k)pMW&Gem|NvD_s@GmXLcv)Nwx> zB#Xnd;k-|m`fFVdJ#@7x^aM|m_E%_k(g+TUUEWcbu=pf+jt#!`IWaAFaEsTnd&H!- z#H?}lnpNsjX>J!ZJj?2m=WIsTG9jmEtJXQ|fEWMHp7GWs{=ht@FLrxiF)h}~UbOLi zR)+30Lut;L+wCc%f=R+*zJHlQU|3DR-6I-Vb_8lg@`t>sP&GOkD1C=_X|sLc$FtkK zF}`Wp`3);UoJst(9bjqen>(_QN(@7{H4A$2>)PQ}*H@aTr(CVUix>D9Z`?07rn|37 z#DMT=;n+6&FM-6PK6+!kZ8~&(RR|Rdh_p&*3v&44e$}|N+W2jZ(Gt&F-uGyx>!6a` z;e_TIe8rCBjlzjx<)wp?s9Gx4GAzWRet6AvOf9S7h$%3{(BJ0eQ}Mzf;3_AkmT?X1OLDgusR3IanbmlPCN*o<%P2A4SiP7szY}Te(E6BdI%zZ~*4CoZE9$AK zwxa_S_xp_K2@L1TA(#s62vCrWaRL@jtS8Mig&d_FBy#u-xw5zT5Mtw**Z|k&+2ey}PA1uJ1_d>B3%Ky( zuZIIEh9G3lMJzq8!ORVvyjcFOXB3d`V}Oy@{_@iX`Omc`kO3rAzs8z_A`yq= zLXZYvG*?(knBxItM2S_*`T#lD`7WKc@AeV-D;|Au3jEFk>nfwP|oSgBrS(AZZ#R-5pvgK zia)TcIFxRLq^q{@wcg!`$y8Dbv}c~m-`|Govi@}Ob^e6=yUcx-lG>AN&>ASq8$t-> zHVsizx0au`droY5c_j(#KkWgRdXXs9@4aR03^WGhkI5k9+5mppva3YlahJcs{p#hoLyqyzde%{|s+C z;HK#$f%M3RqxI2;?t{M*kzwR~Q}{-t3`zHtY?~i}hNh=e`Bc0gj{j2))13)#I<`7# z2*Or!U!E+~$tRHi3UQnG$4RM)g!8i9fHZw=h9aV3I8shZc2c$#6$6jdIpVpCK$q@83^HBF^@pxHGl#Jyw*)iEk{w@8?4!#FHFRj)~!#vEBqXgcMZC9}yrjOFP zeGnYlW+iG1O3)Mt}&0^OiiqwPl^^VqUyr#W1pia z22boI>&P$E#M#YAJV_duqPwGzv0 zl~SW+-u~4>;a&yaU_smR&|ZuIbZCR3*F7wnqFl3K#f6hYDYJS+{n^hHay7jmsxwIZ zZR+regX__IvwDYufe$qj(R4P8=~2Z%vHD-XZHQX~&}YEg*8&Z(YUjKf;fY^1nN)oy z3)Wosisesyvj%-RJmxYr>aqM7!3#it96|BR#RFmSa)EJr4Y}BUs75J{kupuJ%v;=2 zfc{}S`1z}v-BUMxKf(18VFR73EA+vLk;AUlLU;Qgb8f7~Bo5+FF0%-D9C;wva< zUHK}oXwvTK=#E?iUg|)1hpL{xeQQxwcvGt>ROXF#Ac537qV#^kER*{uBQiVs&tA&6 z(JOU~@ZURj?NH?+DzMuMXWUtSD3o%?$I$w0bvr|uHk?kDTrnB%nhm=TgpohwZ3#{V zKo{yRVNI1Iu0nQ}?G4>FbaQXFg^I`mtWUMxD?}0#4=KW2y94J8-uz!S-qjGs`Q=j`!dmZ1Ze|hk5~9&AQLci_}op81SI< z!gbTj`;WirJ!N37`3L9_g^M#+32o6jV(F$vB&jCZu!zwiIr#Kg;mjiKGRX=|p%mW_Ls zkN|^V7h`^hmY1HSRFksxT341p>dM(iCc{qMRNw2io|e#PnL)4NAdx|e_;p%z2O=2Q zTFUENd}e(?#VIHK`ie`^2`#PMHaFQ|^DfO&_Gh;`;rXGL<>ZO!>U7A-rt4>b1KisW zp~H&$ZR)cArC+@I@Fpum{^MY%+ya(NdSYLSSTWRD!?L*GpG-wF;1J3JLM8yRv&v>g z@fI_T`mYp2OOXL*nI$m>kazYn0?-#-7A!ux#Vf4jph?>$$)Asr;SyoH&|9Ya@k-Ac zEe#}yDo2{jKUKJ3gpF)0Kfa1aE~Y3+tYN{S^pd7#rRPnNJC3O{oydzqQz`Fxe}zTh zqASBTgE4+{9=q`lZ|Od?N;{@Q-h>UD#Ni^7Z}QR1Tf?Vy2wj?f$-=1N2hGEcv@wW_ zt*WrWm*`SVFB&s1=arHp+onXCvCcwdNm(mREyvO-s{it(?F4fKd|V~SU-5kE=uD6C zxz(@TcR~e@nN#R6^L%B2SB*sc7@XtEY|$m6&DQr+rZy&z+*1}m*!M(-V-8`=M@~0!Zqa6#c1gwZ!-zLUm@S=SDDZn#)Tg^+`sbg2KTTHDZdiZ@YQz z`!f~)j8(?^-ef#eV?)fk3=3He$G=XnC1>~mR|wNIOsWmee(jIxfG@7+e@+kcWqII6 z(8e5gJBQXm>y43*j0vuE0-eQEonKmJB<)5w&+!~t{>fCTaC<8mL5~nDP{))%|C{Lo zM9N{7E}yyms;y!s_UL#Gj&MAJNODi`RC|qT!T44V)y`N9NO*d?4OFg8ThADd?~^^^ zEr+?3G@P_Dd+}W#5AQsp-RIV@{Hylv>>hgz(X1l>*P*Pj*gmz~@{gkj{kNB2$zq>d zNg;gMnx}U^$tG>8bF*z1FSdYxsqZ|{qb4^{hy_|maG@(}w9Y0TRaAz2#A3eP$6sIR zRTi_?^a&v%p3&4w2!hHwWxr?gSz?|_XPQ-i4O_X|vqc0ML9>i3Y++f(`3?`m@p*7> zOT+o!8IEB_-nPTJ>Qw*3;|}aQN(!1!huT-uG#U05R7jK)4J#EgUTH-z&{yeD)mhA0XNSIZJclXe;XFrk;R;Z?vcHn9pw^m`><24P;edR@ zrBE)jT#~2Vw@K*^U5QafS8l}(tb|bogc%mr?WI0;-fQ69-A_G$2)Y0pLfr%?&)?sC4fLOeef-T ztI|zK%B4`4TFa}c+^&!T3rXxIT6!obE+$07eoKg7g>JZfMvFGV)4Ng<1L#=pZ{2-w zE>2hud8XlqIpeusH8C3E4K7su%cOKR9}`Y|TI$~Cf7@b;M5pJq2xQ;DR|#7AW9oBp zoHHhdEotvR-cxnyy*mA3-?^~9;gwpV)RH#aa0({tk+D+w;JO)LRL)ECp(yK=GQ!V2 zLnlwL_d{JsBEn&=E<~lmrOpwv;5qa30#yH$=Oh}hYX_UYHa(MN6+&p zsK9XC(3b2qHzl8*W0t}lzqW?zs@F^o0mH;IM}%qY=nlj{0j4j5F zQ(YMx!bWjq<;y**{jfM3+#ZfM4?WmT#Xotz_bAT3^`+Dy(#}@N>6-O(0xgku5UG+tU%HmeCM7^Z{~ZD<`%V z`)2cu#_Yxg;GTtupmN|sYQ|`9_H2VHb<|IS9hS^+fkO~l zLJT>^w5;`7+r=Uyw_pKeu{_YDoDVsv{Td%&OR7;cXjX{t2?Lnw$qID`e+>`Zu z`aJ(~Uq^;Ts1mE;>i)AF`-@-L_R;}<6$p3&Lbx#=@j_v<)~urpc!P@nN8DRR#nFUa zfJ1N(1PBfZ8eD>V2p%A~yF-BB&IAY$+#%=$*TLQ0CBfYW*TJ1^=l%AqoZTP0`)7O3 zRCiTZch^&Wy1VL;dk@C|OKceG6YpP8J9EKPB7lHtz_$sIHNIWkkswO5Ct7uid@+wm z(*X6ezjg!xP!_)*4@VN43=4S0GE~pd&a=Hlcy~Q$E&&! zP4~M9Mzsv{`=EOlh4;p?1?UQus!EgMyK4=X70hy9^wlbNoT!EmUVC9Qk#y#-(n!(4 z30$(Ki}3l!_c2et;HEGDp>e?iE0)aUtW*>+h=JZSG)mvi+-gWj28E4xADKM2A|Gpb zWl5IBl0ISz(5+?PDRB=#1(8^HBuSC+&4TXJoIe)YhnXc{<;jkEkZ(=>iICcM=zhmQ zJIzFaV3A}FkbEW4l~sR4Dsr!q>|qob?8>Vnppy7R;*Pb(D=y4oS;=5}3e0 z*!4H_F2p=3TFgT*r^3VBUFx|roaOeP!BE*>B>`e<!cac8rl0RN?3AU(})^9T=0;^!I1qyo=souLXFLD3z7Gs4z`&upMw6K)DKbFet?I@||r2!YB7 zG0O$34T83wYafDT`_~mrzB(EU?D)CE$$1!IG8@wh|0>ry(kIzZidt7fTFkAGxGIDS;e* zV2q~|;BzDFzrgA10>>^$^7<+|=ZKLv)FB)Y{`exZAWO>Fi`pV=khlVv2(5>&)sA&^ zL1O-Rs_8UIIFUyEWMj6!xVqIT9d@zFLWghczOX0NIcok7qdaFEy%D9KB zd|jO19o6DY3`{Jt%hWFsZJE9sG5Ez-5H+8NJ@ zCfYOI#= zZgg`Xa*WXhp=2#Ar?`Q-cL<(hnH&zN@6XO-+Ga~=@SGw6Ysb#%(psk|zw!}olqe$C z;4=mA`m@!QpwzX-K_8f+g8I>QJ&Pt8sZSldiHO=^4kpX)M3&}yphE4G9I)&SsAv~g z_Q7P&uMT((sVj$OSks3?G`8g^aO?n}?X_D?f5hiMH=?A~m08SS+V_oW-?RaPL&O<( z4}TG-7dJ%uO(? zCn{B``VMpVXMVJRYX)q}xz56`E^1Z!X`ACx$dsLo9Kl^gVivd|Yqjk)UhAi?ZO2Wk zO+*I=anvb;Cfj9hp9*c)u-r~xdFrXb@9F~=w`451aobV2pX?^Z)>DjX0nM(M1?1(5 z@7J0cA|d`A?ZUcP_?|M&Ih6BrLId;kFg8^!1RuPfNQK(!SjdwVbVk)jNnu>XN;l;w z23xL>R_Pq`U3riP9DZ@d?AAzoufb?y>p9PKg5ZTgd$C?zF8pOU>FfSwW0x_%rw0LF zKe*8`9Xws(Hh$>HpN_T#2Q2ELJ?D z0nTv9NVn)N^rf5ZNMoDnLZz8eCDzF03Y>d7;w)SUV!@t9VPXJks6S{jZ_3wVeMRq6 zYU7HJ39VwsZxj`y+2q@W@Pv5WLV4KpyGP7hwX>K{&qKEn>!sN8Z>ZgDj_1CbEc0?P zqfbKxGp3SyH`By2j22$?q(K6Oc~f31zxNJK8gy%*K@QcwF!}`UzBq&20GK`Phlk)> zMyp4B|AWm*BLIzhJiPlH)3m{xunIJYdPeUQ&wMfPOww=-AcEwml!MnG&u#W{Jt?ak z@pFhYb&v+JwN6prj}vr zvWgpy9sD||z7g|5z_4+*DBn(K|Am|*RVKG|lSX9I{s1V;3xp%PKjtOZ&;9{cjixS6 z=|!_2i*5>{;D&seS`YG{`%I|4Wg?C`e}AUhm8U=cK~HYyT`Y@JFCYF_1d3{xJ%J<7 zw}sI|9oj%sF z%Lq;;_rPqvr+hbUJobXXUDrT?0%FWyS`$ttlAtSJf>M^Xvv9ClTLOigu$kyCykIMx zz7GwW%K9h#odyUL?KM$u+bN@(^Fq$rc~~v`nr1P@JWC4_8@$a#P`zcAaqjJY;4_oS z)a3|-`sSmBImV`fRK9tb?OfehUZAdZUoIW5stv^!lf z*mo|mnAdZaas7I!go~Q>-8j(ap41p)IKD+Omg_?-m#n}pV4F8kBnWX{Yem8f8t~_y zfGJ=(kH)-lAjmw?V%Wcr&YC`YgA~*Vl6)acvnH^62{q+E5q4Dt|k|mFkH^9__=Dm ze}s`-N?>MBVBH}(L8SETFKsl zxDos0-lpP!7$BR7SErZvYP?9&?&2CQ3rHkZ+GF#h$PG z+zsDMSz0@MIZ6I#-0O@h-y5P8?wZC~ml#wF7KCW1Cp*@8fe%sXe6lWc8AuFXt}Hx} zfV&vLU+i!f5!a*B>2`9dmRL8m3xCM{3HC#H__95c0Hk^E+ik0xLHeTa8f>G;louN6)l+d9Mt~$Q4)UB2SHdgTrlWk@p$=d zL)(LLrhq=Zp*DJSl1o|gPV4q;of#6+LF5KRX^dF^_6t={qf?9PG>dMl($|MEmYMHP zAtvk_RRPxPuN{xC=DHWDf~lVh+e!A&k++Ih-+a3nDt zY5uAyZML9%USW324Eur7W(KVdSH0$0l$an~7bQOHPdd|+R}`otUhF>~(ugz4*lbaL zVT*Tb)GO!@v1jDa{-}w>x<;_F1po_CiFwq+=0ZI$y^cgHpW*g!hZr>i|E;) zmEPq~d%n^B>E|mC4Z@?R2&QPES?u?Y6k*;k`J6pbN)@2{v)AXJrrw=5eg#k@VSZ{V zl~Y){9|IWrH{Evun}UH$JKp4#UOI_sE8W=8${T<%str?7UdZt{+leJs=Of7@bSUe} zv|AyJHD2KtViH-(WXZnwCwD!Igwz-I6Zr-Qi6=~FwYh?#U{ljf zvm-r>;w;eTHf1k@p6C$McY6D-;s|(70638Ie@Q(wO3*z)C7QTe-ins=_M*@K<)o`) z5Ck5vuPd{#-R`tCJOFBF^WXLbVl#cxF)Khei+ZDrZ7p)HrZHbX^E3Hj`&&HEchqR+ z=h!Hk6wKFtUQn8nz#kYc2S5|rNZyUfwJFJmFHzt=zJy)3o#7sBoOQ2V+nfcTL6pv@ zHLbkl6uiW~SDaTYbc1>2V{c=u>FCbES zLv~I3)a1GrK}z6{d3MhlQepBXVB0G|hM0VpoOG*HIoUDAG@H(jd(RQyR(Bi%MiO*S zrjNGltqvJT(O;S9!&aK~le7vgcUZ3?zoT|An+_OrKhy7Vjo_&*+3R5%b}kcfS|Iu!92XC{>vj3t347LgMZfoRXRHOShmz*Er`U7=|_ZkyNW6aOKCcple64p3(Q>ILDpG@s@_@mnQ(; zQ7ZuCje5ko2^F|vyow>+FMkn02`Q7P2};`3^APrv%LzZX3N1)Zd04y4ILj!HMg`o( z0zv*mX&-~3{xrA8+cxO9tgDbN641I1On8@n-0S|X0C;C0k>C134_uZ!#b76e1s`-m z*lUR$GU~mPWg_m(qYJ{BM=NxpgeP)<+Oi;Av3HvXd`n$h@8O8=KOOcaAbRo{%Xk3s zVa*;!QOS%7Sk~YlIwDH*pK`m6NLsnw$iz;ApW6D$A1|q;{BH-O~ZU4^-i+o9jG0;q$sK`(G2( z8aTZS0@Mz)0Rt7_?r`S$eEQjf7$?gl!f2g|*kaOhPo!80s++lsf->XUYEIC=k8c{6 z#)7-i%Il0d%~(UKlf2Oija0tw6Cqr`R`sk==V)kQg@@l}+P@F(zyA3x^A}>k(xcO} zixgp*)QdyiGTJaL&^8L0JD7u0Dv^`7^|6=08!w`JiPxiJ@({&7(sMJb8I_w6EbK;_6 zmg<()@sU8L--AQ%QEbNbu0U2DIaXFgXzN0Xa4q;Io3SnOfHyz;UgT`9Yfpx+Y{V5<`{5ld0H#HAqZCd8s1j z0Mawvy6o4QEwfonnoR&>A7_llcT46LcOj9VNf8?R=KvE54uCEem(;U-m_kygDPFmz ztCsCAs8xpKdfzD95Y4XRmHv`v&gOoCb28_;l0&^4(on4(jQO=RP;X9RR$|bOz1|tUKqz3`%U6l=QO|N?lpsFc=JWD^K9;& zwXLqPRM6}j@juT}lEAmt@Oc#RcY0jCvwp(fNUQubd#_>`{YtlNt29T9|Eg5MF{n8@ z8=&B7_9d&Zv@v{4Xj1Y~Hxzs-Y}J4RZ|vD;95}$eDYRseVA~VI#DC;{c`QYEY?4j6 zLUxx?W6urfJ8o$kj)P!!6)%AL9cL1L;@Wzx@%n>n+_8L46aG)kK$Y)C1Ybm1Jw=4| z3T}df%1T!e|tNCE4~0}fE+*;AZm~e$R1<@ zvifh^4CDxux&qq{K-s@7>i=?{|D_nfMLz=jTtH?ZXW+Q6K&j1tM^FRD?*sMsf%5(T z&SU~|1NJ)pckX|+oPfLqko$LLSD+1Y|A(vc1Nr}dTLbhH{9mU6UG+^BgbK&tKmZ{l zKQGhbfXKhyl;UIOrN;*Wv!xw&gnxBFCV2l+W7J>L6!2A`?Eg~p@BH6&&`Wt~IcX3A z0s;t#BL{*0E`cOLFMuop!oT#zzXb{LUy6)`gouQKjDqst9~B)91r;3?1qBTg4ISg( z0&HPnVqpEt{5#429Qpzo5fK>!6$SObD*vyNznvgFv==N0Ifw{vK`-zS5b+TH_JXJY z`H}x?-ap~~TM%9#A|a!o0?oh#D%8CMnvaMGG#VLbF;LqN*bhR&L&kr@DS`4z#RT=O z69HF1Ts9ibhpH|@)jx1rZd2z#bPS@`#3ZD2^bCyenBMd7^6?7@N`91*mXVc{S5w!} z)Y8_`H8Z!cw6eCbb#Zlb_we-c4*DJ(68a-7JU$^YDLLh5YFbWiUVcGgQE^FiO>JF$ zLt|5OcTaC$|G?nT@Z{9=%L>iXvP?*5-#2q47&5erEF zZ{@-RJ*lEzki@&4KF4hQqGoC{&Vbxme*vnD4Mqw7k4=L>?YC>3Jq8y7-JvF>Ypj*-7*A*mOgFkbZn-{rI7f0(B$cRx`GExktxv0{-6N z3lFw%?Qe=KJ$pk`64b(Sk$zOloMPFVwC~-sn@ap!V6-c!g57o&Iz=kk=)kZx%v}U` zV?uehLUKdMu9vO)Se4ISgnJ1NJCGr8E@+Ko%2ly}Mv_tC_IX1cU0l9NbLk}v-dx_4 z^}#cqNoRZ07p7gABo@)PtR7?X-4)jRvDm(xHpmiee}uf7xD+Vj5fRns5#;aeWT1v@ ziBaw9HcqLZBof#Yg7eiX71~IKC40a0;tNti`G@t}1`}-a?+NvKZa5X|>_=e2gu4e! zDA2bD_%X3%_%*A3$gs#Jkun=b+f#fx6CwMA#k`u=xlYj){C0 zEJ<&IcsR0Ua@9p_%rQ7KT|IXMe*plz9hw}5?ERr{1Gz9@=etlD(<$-IS6^1m+~{2H zr(J9!9L1JM@u4$;m!i9WK~ObP-{xGIqXXt4!`)x|Yb3RbTpH2m)ab;ED}Xg?L+k1^ zk`n)cFP)%@$aOd}sJ^wYHgS$b|3g>eV{V*+8@xxOk+HIT(xpm13&jfxBh6@*-N>TC zf%@bY%!tw$u6350-$ivX*EMT#zcn-*we*r>Kq@thnXSDvLS3$F+M=RBq_-FpYHE@_Ckr#6$i<#iNgHXIy>v@wv`lq0PmHOQw~51;INcxb&!2sds?kucs?WVx{&)`>7M%#ZubEUk6ij!r+(;G3wH)6YwaNA}h88@bhmv=6|UH^he8&51q zoQHc;W~)RRnQl$}jm5WcYH3JRk7~*ibt3U@fSGYqBT@}RJ-9vmEzv%NX#Ib#2To(B9A7NTGNscx4$-Z zKXt{ZJL;%C&v+sY%*PJ@jM#t7nTZUkOeL1bIm@mNBu zuS+fWR{eex1dM$3uQStvw^y0CH1A*!I}2)>?Ki{@y0>#@oK0QjXhpn}O+7IjRt!L( z{pxhnhHZH;)wbwpJvh|K(6FzW6KDG`ciIKz;`Xq zFcA;)JjwSGi_)vF`$HSf&=hPm;&<32vG`SbV-6K5@@)EkW<^L*=Qx6se`$%_N7G=3 z)#AyN+7#3AFs9z_2^aD_3bN$FnTym{}aKO6aKv zi6jPWpXpz&o#1pLW=VtfH>=&i1Zzfb8t^R0vHKFJuaGpH`jx|X8n zIZ0h_WD!Dq^z1)c!PsBfmKM!hQ!-B(Dwy!>N2^g(X33psEUf%;)soE#W7AaC z1bCOT1)bdG*6i}GMFgae4VA8sOl(#u&USl`wItxtnb{LX6C1}U3fS~Za@9%Y^wok< z+Z$fuEar})*A`eMxwbhl=bEA1U!Dybjs%{{y&m`e<*bi*zcTq0+Tk(bJHP9YE_g5> zE>qo*LSRQHhWDE+EDWwT4PxA+*8ocztYT3}1OCMy^EPY~+Z&8^%e5kG*Z*# z_n@?MPsR~D*pe^P94U!s9!>tnGLX?w_M%l|eMNF}E%?Zv z7yowPI{b|4#})3&p0l}r_hPUQYxm-!-4t;)&UPTfBB@P%< zVsT7Zrih{QH2R(>W`8d)u+$rIgJE-7vz~)2-+=P|yx@6jl()pUqgCFH|7Y z?cSW_*Oa5@8nz0R2?w(jWbZ=o46sk%Z|-}YC)wxg@eiI3BKf!Ov;YA|aAnA1M86DlWMz3^@SbU%Pof7VkR*KD-(B|jT zbzHno>gh|nJ1CEy@ga4%=W;3AHZ5MP7g8V^^*Q%UM9-#)PV;iZQRcCucwaxdl*HQE z9r?8Aiz&90n<_M0zh`IDvFhVrH-*(-4Gldu z0&eq0RPtZqy4=LFIsp|`wE#sM;4WrHn4n)(&A%_Ny1Zlmg-W^UJhG%|1fivYGH;g> zl4;Y!J@Iy_>s2|TmE%h1ZbAkQpv{KrcGQO)((->EIfs#uUtSB`l$D0YS_!*Yp4%Ip*lR-RKN0I{RW7F9*@t zkqpF!in|@?f*pX_?s;qY1NDQ;<6e}(7aaUVEq3v=RRx_-wrh<1Oxnly_Zac$#|xvX zY|h@4yZeV)$3!BU_EWrlCO!6pREDFVT)o-?O= z`7U{ee_Lu4+r66(?N`lHa5!UM8H#K3Po+;USXo|WNC9jYLlg)TZIa;Z9s-}u=B_RV zByg~m$8U!;ry8F3j<0NLIPC^+DusizPI!!PmC-P{0wwMwafh1_^6%=8AC8y2u%nZ7 zj$!p5D5dprlz?F#)6lT~!=6^CQ%;wSe;hc#u zvGLZsk2fY}Q}f(bB&Y?K(JZO4*1D64K1~XtYewhyH)@kDT&FGHcS5c+?A*mjuUAD(-y^Q zf#3c5e8TL??G_Wr^z78=*FCT^W9z^GyG2B z<^F#`Dvf7Eb=N-=r-8Zyu2Nf8dTI;b)HhmfySF;#X-%G1&;_-0QDJh7|S zi~@X2(zsMuW8T`TIAR=qxle1LWdZ&um&Yw^WvyqmwQj_|Oq z-$Pr4IDOlnSD)22*Cfh$ODjcbT`wmnT@6ZFeujEFBKrQs7jgXB5nN7o~3DZWqfmQ35k!0qwk6?O9gV8dj`L1 z#SX;y&lI{8FzI_8@@uJ#3uVB7A$x+J%wT_h!lzL`%HeJ&>>YWzfQ-u`o)$Y+F1+`& z$PVEG&za!9I)bQ&j>qNl*yJ4HquZNmTMVI*Y0DAR0)jBA?BNOl^8Uh ze?DJ6Z!6K_7$I><7*Lr8+~MrU-jO&n9w}In846&BP=t8qH%~oVet%==GCjj@f12wk z`r@k~$|jUhAd_>Biyc`imq$lyf2(V0>VPk(g2oEC*R|4xeP znZB-STXUt5_cyT#!VF;zM-NQ8UI;U8qWLUNo?2a0?zq8IhA{d@#cN-)>OYTGxw^Kb z-ZqPwTlF6Ih4bsF(AvZFpR;;_;vrn{@?BbF{dn=6I$@p%9?yx*xhS_Etz>o^J5`la z)Ga?fjSOS7Yr+bW)k8R0i&KU@tQS7W&P$NQEqp#PRFG&rQ#tz_(;1uUSVyMN#zj1d z$lPqGJ7U9B5{LsMKOwjX3Y6$)l3nW+C8(Tvq$s_8F`-jg87~CEyY=Z4^y{rx;Ay|z zOxrL{BSGx9jD2?j8jXted^444XbB^Oib2PReG?4NDC^)Yay7Rq8-GDRR$t9gR5-E} zHl9&t&{`(yUmh z-trI@a^Wk0u9#MHsFfit#7L0iyeOG?$Afp9mj{Ai2f!ivzmkDpD8Qu?EG!Cbc9zs; zF9#ijAEJ01X_{pjmXGm^ntwtHa z%}wtv1yf`w;z5k^%MW=68#NQbYUxI)qn;g|%_tBUrX#Lr zN6$@y&AGN@kpxa@?g#dbx}S|SM?0EQNxf3BUcUruKlLF7zAwDhgU<>nR1h25z-`iy zybThGQoPqf6OOSdCa%AQFUmswf45-lzdpw!8_d#Y%;n|?EceXqBNBacCKl>8=F+9b z+svDqFImu+t9LklGW$ngk(r~}6Xh;Kzj0+Nrv%NcpJsIlux+;UJPJ&kcfkGTLU_hx zdPu*|>L%7#_+V+pt?Kp^Ng@Y|`j#rZOew>vl0Gly;)`RJH7v-t4OjNF_j})b^hpH5 zb6TjV%z4HT>HqpxrdJCf@Xov86&z3TVJ(1~+n;Rn-f%X_nXTc!cdVNU*)lC8L5d`0 zu511M7i0-0TN!~V|M2h^a(rof&U4bdC=*<}VeLpKYTl1PpGPjW!)=d+Xx}gXUXx0v z8%)~{bF>2n1z{H<2@k5vk7{$RMwF2C&m=V$Pa~K^ud0maKT|Z{CRC9NvTFo80oXD|^qp%qTbm1<%DMypu_& zwapZ40*pR;Q9ap0-O)x$WyE|o^hW4kFg9Q}X$~kuWy}arwDbazg5F3w1^vc_dZ+|PYGoBw7kf67XQKLDsMl(mlWvJBWGHrHC))j811=si# zT5*S3!-d7)ONIrrEN~K_v{||ZS3|pEQ;)Vu&tXo4gEj`?SvpG6xcS~~g~7Bo@&gz{ z(cf?&x(3P{cD8&ak)7Nxl}>^dQz}EnlX(|;y~>dP`)OIwfkI0ve+2*gyXenM-wI+h}p0I-Jc-%CB$b$lC}xJ@waM z(&4$9Bq<=1`E*mv9q2yom6RR&8ciDqU3lw$hB+*K6dT_nJYfKEcsT5HV~`8gp46RC zP@McGANNcp-yPKH&lD(4sps3q=hSa)Q(wkzGqR=oXVJN%!>kJl2rX_N;~4S4FA2(Q z+CS`ivU?>7H>`7dHRMEz5&I!%Zkw>8P{&r+w_V+-iiu-j|KAh=4uvUZT$Q}DcI=N| zWE%AUVns}Rdh6}4A<|acU}dKq>6Gyo^hW)Qk(%o}K`Wa*AB}Xmt*E6j`)|=#Wg5-o zHH%yI4G}i)>W2hO0jf+@@%!*G0#8$~&M_G$R~9!!QH?GE;^OQXmWqm(9Vb8QQNR%u zy7^Xbv-A99byqhxHNMA}J6*hrOsjI+2#tQ_at-Zk!TIi^R@d^mIt*ICKuvy4wM)no zwYzA1PW}>H&a43a5ntUTk5-uI^@T&oX)0eooY`}VEw7Sfw}FuR;~U@OPM{EHCBv(bJ_O)#i zgq9I;L&W-DJbot`ZYaEEEXFIi5jLAbDEmgFxmzzy(7zfv@ zyFPIP`q7Xi(RVQS#j|ry z8bZg zs~vdI9X?Chzj70*`P|uXqa9Y$Q2e4!nBBy$)V9h-v)PVY;4b4`C)I2d^j%h>ZXvd^ z(o-_>NkYZpoSOBH4^*w2?_N5@LYzjgrH<&%iwu+M#5^78X@{A3L@?+WFln?R!*4cJ zZ?$^Fi0NLpQ{IA$cuvFWsIXUi&&C9QnK>2}*J{DQN=DBa=>?Y3k|~GUQzv_l^>(u1 zV<)GmS_oj;(~EbGN5>abydDOI@fE2dRIddil9?+;8{nuL75!0c#H}_K1x9ke_VI1F zZAFQG^DG3H*PiQ0xKPwhvhFhhppYG{aw`_o&@Rp`vpOP-3cgizH z$qczX9g196Q&m$tRgraYKVSYA6erwbW67BlhM<}5lH@vg_7`+i3S-AO5T*5jAn6{Y z$%OjK`Q)t}YPv_Tyb|V%vbDGiS&!jF@IXk8#VM#3Hi1v|ieaBh2W^TWrQ2A>jBw@& zMHa!lBJBsM!(`1VJYr>NHTt3$Y6`aav7mHP`q=vkvVeWE9+ zsI(*{N`oh7>i7meUgc)8C60ildC*P6l=Lw!{^jA<$-pFsR&3nfiZDas(0)Tk1K!Qk z?m5zi-YEOyjmUXg5AUSu270t^>GHIXn$Bg$>_3HT0?(6$l_Rsy>Vxb}?8_A@^zw6d zrRzI7nvE)h&BgXimY#@F@F2^_UD2ZHT4XexX-A^ilfG6k%plf|a9x{8j~T3nSNz(R z9*uXA-vI_KS>f(a5%fyV*2Tx~#s)T;-Jd(~DP#pTRjhtT!D1GSbj-@YJ?|emf{v}m z&vAPVRJkcn(;e1od>h9(m+}dBuZ@sek=-n?N7rg6dVWyKCZ&Ld3-eT`3vI4u?1h8o{2GDF4>J>-rh*kl)fAt%`jd+#|4=)=|TtDj|tq z9$SJ{zN_5CNOs;(4Y$^8+(cfEy;75`0Q{S+gE!Dl2AqhgjeS63@(04KXM8=)V$rOv z5>~dqrFNOW8+u>`)!zF&*$sD1M+uZ$`h{4a&tx<9xrzqOydR=*tQ|~Ok;Vw_qJEZ8Q+wKyW%4{0=Nn9rv7w2<^p9pwfIG~FwxKLB0eCmH z)eWgUl--5z-RM%a|{P1NWNx`c~Q!rNEN;ed|PL>A#$ zhp*?Pic2FloZP>pQPqM#H_Xw4BEmR({V7#7BHYTl{`Ume7!nhg=eqjb;>VN{=uYq= zcPw*e_=CF6bbgma7U)zD>f$}ymG&iJ?i_0RG?naZt4m-u&F0D+6(9-nLa7*o2^CW> zkrRsykej z7z)AmOwpM+A=1lv0TbhM?61+qO#0DBYL6;xT9nEY^%{ZJG*byhJb!5ThC8JlN)UYy zj6nN$CU>!ZS-H9RQ_m3H!@k=8%D}H9@X&gFJti39ju|Lk=6q8*ZkkL)0*liCv^bG( z3Y}Rc#~Mut8^)H%S|)q8=Rf+wxpEo!clSRYIu*QwSUL;h>7P_>a^2AFl6@7VoJq{f zZJHrhsD3Xsejdpt$51Q8u!m-PywY7&WrHu`zCP8yNaZV-r`h`E=c3;_c*w_@=5@oZP znrfSEi%2n+w$R8)Gw}&lpdV`KevDyZ?+A*52?L0hvw(jk!{a9dCzBw>*pCz6vc0Iu z-LC$JJuUPG#i-6sT(*QOQ2DdnpA%o?Lfe|)P{pWfrP=elaS<@-k+zYIvX3{V#wSHV z4JxjtQC}3&iU_($+w3@PznyjC>nG-t+lCE0WsH(#ZLZjLRGM;JU|hjNIQOkpZbOA) zC6F)@3ki|JmYu*ZMQ}Cq+CW9mQ+boZ8v=(Wy^|L12OA# zk<5xxHsV(-GO(4QxQsKwZa;9Rj<(2>HQI(;sAbK%1?z32#tV7RCJNSaFO_f(ns3J# zT|5)y4qx)j0T1+Gn!p0}rIO*VOlV_t{GVBxW(KXa2z8Fi2z4iM4@ zlVP-*?6g2|{Fg~l1K*rKN^bNw$Dw%qI44akUF0n(Pq7~@Dmja3xy?e{?D!6*sEf$3 zUd3&9TlUnBv4w)+TCl#XWYM{Gg?bOA-{t_Qkt_fk1Xn-B?Xz$AaO+0a{uL=KAZECd z$MN=^bVV^kDpk*5yJK?+3h1#E*$9>4QGha7@aKL%JbWrw7_wn?G<8O#gZe|*sZb;{ z?-NO~`l9SWq3tDw$EH~B|X^Y+Lu7eu0=hdPwZPbH5YXY|Yaih0n?Y<s)JGIGejNF6V0P6Uhc z-9n_Br!L#fy034%{WE{jr#qrSu$jR&%LYR!>YLQlz^)Yk^BaAc_8ZX0ehN}7d67aq_?KusmWUilet-$nD z8}o^&lMx&!7Og58dn>d!h+sxiZ&uZ{csN|IsFLu>ZWfQxx|2F#it;ZgQ@Lo*l-PqM zugWm*lIpqj+0Ch^sD)u^PAVI0Sj@;@TRv{J=@Icv|7g|@C-5S3 z-o&{3|8e5w+^$$&f^4n?FM+ks?E4GXPtkpOQzTk~KEY!`Pgpk|*e2Evv;>1Mcf?yw z+1d0sM!oK+5CgJyug!nEDk{Ku6Hvq6)FgM$0-DXFe%3=tX zjc${f|AL5=n$`3cNAmZRar5^2hu_}dt?5HzTd4SDj8($;=byO81f-+KYUdzegOPky zwV*`ERW@Z@h5xYTq6W0S3NE%uneRo>%sQKEv&kyD$Xxv5+hm>^e>|O8G7v5lhzOF% zX{3Ve$vHSWGfuP!qK?Dr-{&q_P+dG!4 z(mo-4jSw#}cvTUV+tK=a{lTIjo3%Cn^&p2rpn-(IZloDiN}JpBtEaMOZch&}(=*!1 z$dGrmsYu4J{(@%v+$Fg_-_YTv31i$jFsZt|Q{-8A4*g@Z-tj{hDc!Nw#{35i$#;R_ zeik*#O~D@R?Wzqob4wN}7b8cnHJ2NR zW@)KU9w34G2^rL7(P>-C_%(J4&^NOa`j2-YiqbF{JaXO!+uPF?30J*unSJIonit;4Bh6geY@~jPE?0yybRl5Ml$1yyn(4>$kDB(8ja@^RW!k=aUM>!J%UD==)6?}) zPR=bkKQnt;+)7#!evc%}x$_#&vDB`u$Y89y-FeSYI2tyzqp@CIX`+@mrA@+5gXr3o zYxSFxP)|-af!OR9Y0B7gur7xE!k<|FM=@mWK6rlA(w1?&Vhc z@SSY`RS&64r&Im4NHJ?xIyZFEVR2qaA`Fpd&@<>dcGfM>{y4=JlE{Vil6kB+GN*R= znq&Ucr+hau%5Tq@Q*P+Ni5}}py+|0|5c!k$;03V(wf&J|7WqR)Bzwn=tNXc8UFF^t z%O0&D9h_87;v2>LcrK!b`Hhk)=mkwiw{9zYcqUh+DURM*E9xv-HBrkizA``|z3e z=aRw4u$E>@nc4R6r&uu?chchwO1f`~7Q!XOjVrZOY6gw^4y10ZBe}v~+^INoWm2Kh zF`XT#!e(V(LN;e*3Cvg{wQ?laePU2`O;1#}@NRL!YAHQa;~c5xc}Q=~UA%tCqE^ z+9oHm(RU9c)u&xI$oekW=_+&bC#<19#H&QL(J#&vlNbs-cd8y09f{IQUo(z=KQRTY zSDU4tyPE)YLaPMR>aE~qe+j1jQsk?}hIWeExh~zShWV>PNor!c3B$VKDuR4wiuhpd zhilFg>4ehWYAJLis|@#orG=-=L>bhP@u}#H%kmNZ-@Y6K`g<@MK#dK(oyFpN@#0V6 zSWxHTZ_|4Hm|~1vAMEGx#_%`XQs-Gg9`ZFdxuH@?m3eqcgap;LE~9u#G)Pu7^-KqH z6yLRzM(~}%6EHdXlYWP@H1l#+qBodq+BjL6B_Y|(AJ5->_{#r9RoRF3oSDhD2}a10 zz*&un^fBx9@gPDkGRjy1J03SPlom(I#83jehSP>aZi1(npg{%kJv{T*W_J4|Y}RCv zo>4}$zc6u1+g5q3Y0tK0*^E0n#5D*rIdILOh(QX-i3Ss!N6D3a)05s#w@ zBYfT`y(EolSY*v*UM4q9%qN?d-*p6-2P0;ow!KPy{XCUZ5q5=1iC@Kk%*@ zqUof_z}n22N|vIxNg)gUZ#m2h0_1T)#JO0p$xO10@N$-2@)W&H2pJ-8i#y){; zJOI+);2Ziw-i2nt?};C*Rl8phFUz*aSh0(T`j#5AC=@UYFX*YI{rAU|OC>j!i02nR zV)B6{HCSCcjy(N)=@>}(>3>j%AKmGY{k){l-zyuy4Xeqj^;wE}$#}v0%j`DtvO;<4 zbg_To4bj2;XB_751{Ckd6egV2c8B9V%d=Q*$9d3MzJEhtAJLc z0J=-kKwW50L*_v5~u?VLULeXi`DKmEYFyYlLo+WA49 zwNm52t=n#mmmB=aV>`p6bq33Jm7s$Tanvagt-r~l-H&BO`o>lTRya!l>ziB4N(YN_ zU2~soVW4~`?uPEnmAmu#2Q5vua`HU9OIbdrBAXMvApqDFF@${uTxNWOo#k$w84{6Pv#70=>aQ?e#Uz(jzG7@_*x?` zzxMjALM?^0OZlzr^_mQaU$4ZTGeLIrbD^3{?-~-7v=fCP&7Q9wNFPzIz9nn1=RFsO z^0fZ$RO!Kr&EWpJGXLTAe_q=AHjP8q;ObbKXG|2Upj&B=mnXc}uQLl;$7#x&lCglE z)JkZ(f&kj&?g8c_I=9MXP$5dZ2K>!p+-Fk_DtBjd2GT|2G{=N93mpU-O z4PQAQs3IEp&~%1y^JxFybuhSMfmtC~xj!u!X7njmcPoo9&Qg4n)TjpwTqtMq`u7A|eMqEmth ztY)AfxMsL=eN?9BZ~RL8PkNSk2!QYGKs{Rx{Lp3(={dlsY0!j;W{iLO3`w@v;y0nl z(E>fktsSbTFxi+`Ljs2!Q=6#M#W@%J7sbjSMV1=>5^o=4I-SBsy#-GCUtpkeV1nhd z>T&o+qjDlT;kFN{R&H+kmKsk&D@<|bq88c{-WWcx{^$Uu^gEz_pF}7Z!vpHRo?H#=IzAtg zmm0eG!`{&u5ly)!`la8CpKTJ>`N&>;qlP;5L9HTV6Wc48cDc=kiY{`p|CB+D_ZND8 zMb^hI3nw=$32)G!D7Q>o3dnh2E49MRe9wR0wE!vPYD}~%YyJEM?}eUM%e{+-Ci)m< zp^Rr3fY4cIBMF;=rcN}cxam?Da$5nP4?yG^%W(i?4?sii^&O$3Kg`R_Wlc76_NnA) zy0SLh#bUssUZ|JtvE#V-i?9187VQ;8c_h{`hDc9bfVFeB?tqqq8kCE z`J8@OPh^BV-1r6v2M(~&A2R-EILJ_=61jP|D|cB-_i@&jDR)WyXc_ildu#txyKw=; z9)U>XM}1L*o*kOf9M0CB0$N$I1VXXTWLK~M=7PP=O+_{7Mu#|kSuZs)!}uhI63Qn* zHZwG0-n0efl*o07sU_+RnH*r66yFk)761?geK(s7`njDxznZSdoFVBwX_{a?!JJW- zF@|qv^+mp$M0I|CGw78mYK=&fC+IRt(#;JG8JrDH*r9G!C!+fum(<|4Y@K1zkxiQO8 zDRY=X>}XNrdQ!N91?zG^LqFY_$&GuLrE*{WmglJjIQ*{zh9-3b1ev5*HVPOWkZ6`^ z^aSNMj1XzpiSJKAlvHw5`s^9(OvPPhG@+ri&=vES$k5o%Qq}ZGW{5Tm8Z;)fTuXxv zM?LNUAX4$6{x)7=CPn@HSiY9Wk?;UVo0zmrtHO5MtQ{Uy|CGL|TWcfQf8G6XQEF!`Kw_<`h; zb$VRQ4F7lyhM+be?7+zUI@+4LitSqv*7wMwt}A^*8|DGmUx*3;z^xoaG^b#(2lxlC zPgCdH*p|Upx~S8mCHuUiFuU=jQEc5O_Gel$Kt0}mO3jHMnxKw@Waq|%V40!>Ind_% z03~7g5^VGYw#b#r-Bu&=3Uo3);01p@6*U+#yt$qr)7T{vr`cYIZ~Ues>8W=r5>vHj zMyxV_arfvFpL?(5jmgfD=L{~bTX;^oWC|rFpSN@3?jqRngsvYUaPbE-ufc4V=6624 zHDl#i&i-n`a5qf5TRl#fcI%b5DY?q%Ix|pX1(DFTP?{VcKHSt*u))8gQ_#v0K zjAe@8MSgG|q|5Q*+W@lVd3U^udhF;RU6LmiQi?;YltZ=m#JmIbp*g|2a3(T{a=M`G zu+43Sj6sSgrIRmaZ%$m~D)!6M)hu#H?MIzcl2({zXf6JA`KZ9utj%nWOW<)yV4{sb z-kb-zs83?=AdBk$hxTk1TG;LmYZD<@vm9DK{$?fk*1%oD zUt-lU&SZc3Aa6*G3ttTrSLzGQp}iHTuNK`(pJIJHXu1Dd1@BI&{|olZPQ_u~Jxn@* zf&R-9gP4V`wC#?oQyy#8pS7EFz{#1Hw{p{_fHe#&-MBkK(Sg4g%2ybJkX?RH*%6H= zJ$#t=!w{X~Ch_HN)c-|&7GidJt-H;KE{xI)sA~AHr)7D9LVD2hAc*!?@w@G{H^+s- z_!l(-OoNUuQqE)xO`{_DK_0XL_Pg(J;))E&WCLN7g0nj}*=*94+})d*#tVR3eoOBP z{*klxGZ)^i5`!b@PwlDR#&p-l9N)R>mq%@10*s60yMD;GIw2NC3YPv{>rNd<`fKK6 zZmU24Ik%n=mN6*IEHibHXibc2+;I4LjHz(-sp0#KcwkR}hhKPEw|Pu}GZ!Ur$CQpy zRmf5hFozcfeVxuPnC4wbf$(6Scd=tPFyl1){rw7}P#GLYy-t(>s+znp0vMhHxaL2e zeOr^j&*HPw9#NILJzuL$B|FT0dCdIGO(*7M$(9Yhca5}1H)hCweGsFvE zSN-6jf?6H_#EJ|L zUqHk71*AKt7hWe125fW41mbtbb*tnKu28%iQcYk-7un7R!3Xq&n^>W=-MQF5i}^18kq^n z3;RWwMlV{?5w3O5KO(1yyc<|!Y|oI__>gtvCHZm~kXq{GWWkK(+=ICo8lp2!k2c?& zbe1IA6+URi^1II&8Jy4ZcgtB0%Rc>**K-ZL*i0%}g=OqdZH^4PLe#8b zl9uj7$-SA^>JLsiy_e@ZnLc(P>5{8ME+S<$G9D91>m1Lw)Y$abFTQ>>6o5Jd|E%Vs z#kk@{go+*NOrp(FK>ID^Pl;GWS!vRHWK1Y7ae`zDXJSt9T_^jHYb=4o*Am&;2cPT8 zZXgzj&uOycq9452sMvmUj4lGH+rF+a@bxUpVg@-+82{b=l>Gg;>(M1JB3{C%Pw}0S7m^AVA z%L!+G#Pyepa^jbxn&iM-E9u(bwZR@o;Jf$e)T>bR!(X> z)BWS+cN?H(?JLKoxuyaM{;ckq=!9zgUOpM??mRY(<-7*Ql6aiobQzD0@qcI6{HrzNhnS06VAea-9Yd3u#;UpNFZb% zBd}>fE_GIJ#RO%2eQq$3i_oSGk({cz2xr~T$nDY9Mp%?sfG2x3B}NC;5UCW7|8mlv zp1ra;^#!G1Ine}nfrIN`1qjps+yG}-VnRvxb&F14-Q^_zR((%^;|$ICs_MnLHO|y@DCly~bqf{rRW~rg|btZ&+48wPQcNcYeOUOQ-HOCF%QP zsQE0IN;9~emeSIODK2PE{f_x|SvO{IJT{j!bGePyI?OaG)AZ_BIBTiE zKTZ4^RZOQ%LkwvOeks{11Gk~LC+z4wZCZy$z68;0qv2+chxAohjgWRT8-Lh2Wf*-S zf3PVM)nW+R>{weIdfqlqo>~0W#!Lq8RV&4m*G~3YHQnvuue4@j3q>88mz|osE@m`7 z_l%IUQP=KJ6}U44hWAxQ_1+IX3b^q?(Fe^J?5JO_RM-ohQF9JOYQl-nlk@ACi-MN^ ztnBP&1TioDtA4xDr4HgOjID1Hpvj=^Af#db60SZwSF?L3fj-lsV7Fb64u!M4eJ>NV zoC76(vXHQw^MgffJ9K1cXL|eU{kkaW@#LXWb0&9HfPrU^x%7rXpi$$`=^y^2feZ}d z#hU{5CmMfq(`Xo=V=YqEj4}dtK928fd_81(pKN+xE8@VyEra!|=k3YXcWQ)Q{l2Et z7if=d@(sQQ-(?ej7;B;vL%NXYvgq4%Abl~NPP13et=LDkhGVN0OZ;~Fs=v)&I83xD zEviMbZ;-1k)o%8Ws@=!t!LPI}P4W;Oc3um(5m#!uyR~6oSGnRR^Kzy@#ihs`J0a0s z-<2H@H(=b^IRJ_bnQ6bd!0EMsONB)L!fmToDekoTLPe#nk_Y`RZWjWRzjn0^;M?{UMfXOS;hJ+$^?6;py=V@DZV3Wc ziS-(t7rPdyag*>LUhwZ&2!(ckxBKOO5nTtq$7VN;ri);<7$q)&>QiQhXHn1G>C*a@ z0ZQeV3iV*Qw~#z$%O#%ZMgn0u#Fn}X>!=nqi64N|NEKg5+x$I9ZE_!-tlPQui-WUK zt}%rQPH0ptZ!dCiUJ^Q#Tg!SE&N(0UD7?|$&5fO*x^6<&87LRc$5Q$324^OtARq+iJ%b#No zr$O19FNb?4&JO0Gu`LO&t7mP-slyz#S${dh`DZgEi8=8^iMz|oc;(vDwqp&6Zi<(e z7kb)2(U|t{ zM?k*(|p0LjDA^!4N3%?>`RjCcx8)yC+Is@K)ui?(&rf7DM(w~&%C+=}) zzxUSKX*Q+W@ccyS9ZPv(S8yx+fvD8&u6VE#va#(u<=bTLKqhSt%sn51j@Ovm+cULe z1j1nc;tdytY_>FXcl_NeQTk}X+PC?s<-2CrF7V*STP0JnUo4JMiJpOPr>8BeR71_T|O5HZ#`ueR;Snk1tiha!#{O6Eqsoo)a zoAp{WLTHsf@rZhdLH1k9sZf6$xPgB$ zCJUGO(5JT%sT@|oJKiPzPUEaW`&Lnq;hW#8nu;#`CSI}% zoR7zu`h(fh_W~4ppNtFDnqQUsI8icUl|+boZ(-z6&MGx$)!X7-E_}{7B=y%#A>i>R;;+scTB)t)sfpcn$Hf^- zeT@-cS)|3GL@&E+=Re%=co48{Yld3eWZP)+sZmVQJ z&OqZ96+K=G>=AhzXB(Sd2HrT;%owUlsX4tdaFTX+MU0k0$W(N@^;!cz zD{irR9~-O#+gt!#p9slItjps1(@owGm$w2qANuuX z9y;e@ALagfCLv{YSdwz;r4tiP3ewfl@Ov*O*D?wFGYqL@K0QEtAO!085QV4_@{#h?lyRNF+WDz9EbY z?B$RPzwIt?JSZg7l%dHR@g3Us>hYYeZ!#fMtN}A+sFyzv`dD$+nfNK|()AxJ_q6AK zc&t@7I;Jf-rljBl03@OIEr=DmZ?=HJ36BByzZwa=;bwwMoax@XeZBNlvq|LLTmSA9 zu#*;lwU0CrDt~dY>sI%F&M>iXUng>H0{NiZg{}2?TwY==8%^e$%(qBxu&)DiT^4z2 zY%eqZ{_K1%hN{~XC3Gxs_uj-b;CjH{|MlbOdb)3Qa`%B3m}L47P@2SnU_LK(0oisJ z8wR_UY6-H51}DL|qwZr3&)*?l$nYzVxJw#Z(K5vZ#aPo&ew^7jtWSYHXU(84|Fy`9 zD)aTlU}zlNf?^*g8G-mGhT1*`KdLGGX5L*IaP{54+9|KtL<^#~qWqb^EBZMR0^JyL zUM1CKdlVKH_K&Sjuzl0P?cLj0qeqaG z|KYKfxm-^Ff1UN~1_ACuPM~=p=mC28PFQ<$RjsK&R$T<7Td{ttr}0?jM;;XPP8u{+ z%=7yo#L6GpuY}sk!4jgV`I&#Z#X1nM-w{A(FnKpc2#X#HdJOZz+Ja8KeZy+oljyyJ zApd*7z3(+>5D;;jz5NHp6L)M&_mp|40?Kk5etS3}XHsk&qKPc&dWQNiB&6f_e*8L$ z0`woA2@)hr7oDuyV}Dx8v04NbOYghZ@|SdV%Ju87^}cE*N#EPYK1x6-6s;c)(n+Bn5XbUztaajy0yQ2va6HB3Bgy_;Eq z-|@@@^AXv~6w4N(nSKNBFzE|#1V@S$=+}sA$~b1_(;7mE4{?$ckPN6HDKCrJqwV}m zR=959EuT(;o3EL6L5Jq%Xe!0Qsf(xM!O(#F=K!|pdY0?7y1$@d?Y=9*jcC?r=xlzxf%=}^%11*idF3xb`RW~wLlg}rLefOCBy%GxFV zd58uM6|zhT$*EOyC3iM7C`cTAO1NHMLiFWe$Cg_xP+h?kka3w@hjh_|oY_bw( zKw#tN%a`H;^W}4l(1gbBry2}(H?Q+Aa5NAg8K+CQt0>le!KK3WwHL5y9)XyQRd_sg zvFzrdo)*FBa+Zz@T-{L`@Ed!$L`HewnMCfgmv8&40>z9BQu=oLuHO2g0sVxc^zgU= zimd5&LWok50777M0dF%&eQZd?*jWI75qd;4L0o==3+nyU-W|C0fL?AuG3n3N?2?+{ zANA?h@8$Ppa1psQO6=HoTplT%Gi(@@tJUXR0Ci>IS}QKRG#{vhF#050X89jh&vBA%+ig^2%^0?M$DaL4GyKFIJkq zwWfTe(H_s)*EGJ3UwiVdXzrrIhgrjhb;Be>x(8Faon4sA66ySDRZbx3VnVyr*_m-X z`l^gD!Yu24BS??-qe`_sUGz^v&y=hTV2k;|ky}<6OW}B}U2EtDOU)tLzM;{uu{~L@ zbL`Jh&1~MnoR0wS3zJ}9J7~DN?HW~cOY?&@KO;PZK>idL_mG^5YhK-*c=fN3q-4nn z!{HB?OdvL*IpKbs_b$%M{owg`KL2QUV^@c2977bQ-)5LDD*!quh*I==wR3IK>yTNr zbvLQcEu|RBO#@^k6kG&X?#EYM>Wli~|C%Ybr})Kg%xSF31pnZmCcz(O-~jE;XGXg^ zG1i=T37Z%#x<_K@K1w5g{t<9rKmOrk;vKVh&>hsa%F}D0OeKhGLk0+WIh~d-2KZ6Q zeI!>V0?N%w@zXHYbtP>dUqqTr&I>NevbDD}<1E(twI2*}6-A#u>zS8z~W)ygn*%a7Xu(p+4`N%wdcH18nYNM$w{U2o20)uI~B0VL_Y z4plmo9s#%Ys`#TU3dN6C1TTIAergv3mRw#a?E|qq3v3J z?U0{7RBK{^c0ka3Fx|D$Fn;Zzrb6lACqINa+h~}?s=8W_%ti7x0(a5x>0(1a4%C87! z0s7X)j6cD>C9{gniy9i6YCJpB6a37tI|H2a%?_o5$`B}z`K7;7DVpf)--IS$nb9x` zZ~y`1*7rh}v~uusGA7yg8AhUIfucwu*@au$z|eIyY`01KInvfr?X+x zqSpfb95hHpy(T_eq|$CFx9INscmuy7|MSlNXcoFk7hU>j!uMuwl}$n`s0^ zx4Akim{r*etIU+D$Yh#cmfRT`kvCS%r*_n0mQZYc0NCA?>0HyJ6G5?fv3*Y0$2_Tc zy9bc2F7&4|3qL;?SW0b=X7;P7P#_+!lH)% z@CICfjHfXIMhtD(xZ; zpt?#y2QxYr(T14%zRZew9shU;qXx3f-Rz35fZf-_wxh=Y9YM3Blm-+zZfc>sN-|BB z-F`3takvv3ngcLs5ow$_xv}czOD6Zs8~jjZ*FqA#e5H(9&G*v0@_WtwK5+$yeY2zc zue`fa`bqOLS@EyP$t6kQU9!0J-i`dc`JxJqDzqONN|u%vEQi(4|GQ{Bvd@V{Ocm!N zgb$|Y`~hUXplAh!DNbJB&E*X0^bTbg<~>+)F->8g<6`Jh1>1bLsSVnt7}s?~f+O&s zhDehBW6M!|Wm7a!6JmIuAMOBPc1%CW(~fe@Fw%SWk6Y%R!gwb~UnO@n^TWmSfvb$2 zh#NAA0!Pd9_?x;RoR4L!vILJv7PuFl9KP|R>+wS4jV}d-Y(GCia!LAa-pwJL4nuVg z7m6L5al9J;oZd`jy#;S^-qweovS@2i6j~nct@EEN_$3jX$8zB093|kG@>eZ4vq-g` z|M27%Y(kN*p~fr-n8n_CG4D;(`=H%@gQIST@C2a2v8U5!VkQHF{nLD$FlYO=mc!j3 zh@a*yOp{q9>xIW!*$R>~nNr(uN2`UM!A_o;~Fst^< z&itGdzc}rYC)iBw_k1{CzKMoyGHBU-KoEg{Mf!JYQVu4K^G>Cn-0yA$dh`{cx#J*g zUui!dh&oY_J`33_P@Vj#0O|UIEW%LkJTak}^*tO`BYEW={z%|~CMCHkIdI?NJ;o2k-EmFj93=n8 z+B@lTJY)s|`ic|WbA|GMO7xP{=Z!LgGaJN*LeR!7Nt*eke&7p*`1>4%R{<7?+FAo~R=IR3q++aH^DxdL4cT=(pm_O;=IJ9As_%|4UA02$3oUDxd#Y?D(7 zGb8dn4UR8H6J>2^s`y`fN$pJt;m<@x z)!@}1{p_7+TYYCgktJ1dTvYqyU92{BR@Eg+v9lKK?wd@*aBFq<(zexc%1`)DjmM2n zOG;^$z>4c~d#5pRpNupHx0Gv+lgL1EiGnK4k|BZe8S?Xg%cT;Jnucnl=l@Y+hvoDW z50kx4wh5a(QR^N!G$gy7ia*CLRyOLYJ$k7d{W^8J#@Gqq=ppr#Mny+gyoyEm73lo< z?)P^G2Va3=E>2^9t>xo_w?=la#XNjDQY z5G^a|LJ`M^<;TUFYxQ`WUS8xBzGx9GIcAJVdp+*JvsCajrJb&Jdpdny6hC=_>2cc2 zS+VgKwp5D`8JN$9Fo!Sf;~h*CTfbOkXc`v?BBI5J=_e!*;*$0&WwE(s{T_VF-`{Kq z(?f4Cd@hOnvQLs4zXsfO6{u{^G~qZ6n1ypew&Bf>HeAlAGlE{ELFb$Mo}@(h|FEXw z@3g;1$0af|M=mda*Z)ug4-cN>iW~}~FfU6dn5$ZEe-wvE{he_e&rYr!sOotnVh|U( z@s<80-RyBckxrBu!K(RtM*ZlBL(`&U1@^`Ld9TOr4@A$+Sm(730?u^eS-bCEhLEC* z?6|rUF=XWvoTjor?1t}UuuWTC61W6-j<6wfspgzeZrK99?N&7bL5VM6-vGO3)wnQ{ zB5+|cvJ7Sjx_r45aie@b7%++(OYRtol7k(nVTK|DAe7wGbWUWYe%lK}KK9C`Daa|O zLkCA*ez>pbA_rqQ0Sy$#bfMqQh8-6&-ZK6BOg=N^3zf4;sQG=cjx_6zEHVBM&$70i z@Ref58C?1aeiCK;>zQR7^=Q!`iJ@V`DHe+v4VN+M&1#G1Gy@xKLkn{XG-yyMx^`MN znFu>^CCQ*$O|JAVz!=t2zIWuucUjR~$$wjXUfu^l>U5*|Qoanz+1-h1dR~Pv#U+er zh3oP+&2(>o*=RfX`Ikbp?!R3c+P^2fdD>krJp}5uo|k+EHfIG}5_2j8`u?D!(L^VU zwYc(HBRI#7k$`?&H@#T49GmKJ$ceO;==|_&cqlQJoCwWEm5rM0FL2N%5gnWV@rC~4 z@D}(Mu(6T4NWN{r51G?x-&OZct$i5!q!K#@sOK553v8NNXuNhp`m2*C1WP@si8FjSEaBIA`7k=m%e z8Ov})Q}@sCUTfSI!1|&ehMti92*sPUdV!D5t{KM(+y!v=15@A3?Gb4ZtYP;^AiI{w z9Czbou>GvBz*$aVWSLr|Pyr7Swpen<6h)K4g>%=Sa=b;Hng&Y@%`6K8rA+$OTKLZ!YYb0ZiII`6lmt7Eg&*6Y2H1r zZH4wt1&jZ~BZ3Eb=zdcv3-Rtsp#{ja(E@am!kV<7085q5+n8COSvcK8mn@!!Z&Y>v z1xDdF&tb)#{GUZd3hbQAgtzf^_fMkv>5e!MCJ_$B80#6hJ%GLN6RS3ye_QA=?wM@1 zAQAf!^8><0d-9u0Vi?AbI5aL)z+IBJ!V_mkT)9Ft1>C;W!7m}Cxy8j^){R3f)*|Sh7$)%$B&uuLlqq8(&A4%pRN5 zH5=*Fzy9Ka5`wHFKY-A@7Dtkj7@q&|@D#XLoegp(cjSV64|CYVgCsUvSTB#l>vJ)5 zRdC{n?bOZ5W0y@QH=&;|=sFF*YY1=T5PBM6Z<9JN%Y7^DvxP5K1)aTgPuEZ6!B?C2 zslNACJ1Ew|Wg5{pIte#VG?Bn0>p7w*@~X&GFc}X)-W}al9_~J-g)pFsdaK4w`dIlI zTXIaS^=Ym0(`P`&m^vi%*aR;06r=0a7M&i~$}PSU>QYAE(*FL;fU%?&-l(!~(UZCq zKF}7MOAX6P^kE6?85;C&TR-9jx#%>)1i#Y@%!&dF!-g%Ib}BYC8P}EvpLEeF!H&F? zLg|Q)!OoN_ac((YZ-#IOnR%O?d9|CZcc1W`N}5r4ulTRd4kJ0>riMiC-SJaw=;1ru z@1=S7RqUO|#ZQ7oOm zzGbdGLn4sW=f`EmM*i3)R+xuu%D;}>eE$o+|qA*(QOgN<+#UGxy5Nq4^22$$p zdO14lL|v_DH55nhn(Q>CtH<+Khv&TsOnKC8ox)%mJ6g@8`9z3+1I zLyWz+De7i{1p#{CBxBYk=FiK={25?;M*vZv{8!vg##J^e?J z#HD}!ZjaxxpXd7-={e&aA|Z1rcqP}q;W4Q9?_+?+0P$E7c$g#a>Yk53_=kSS^eDCO za4&-9Eaz8cK)7am##(SUpS~;Qo>H)Ok2Z{8v-W!#8Qp*ayIv+e8MmjQP7R~iwjDwk zGd>%0Dew8N^r!Dga{!sKXk6R0#_QyR?owOBLjXm{hu?=9DA!kyhIDwG|NZTnI!W8V zlIa`$0zCB*UnzflG%2o<>tnlDuk6wML6Lnoa{&exIx*X%97^~sB^v}3uiBHz$qut` zw!BLTkS9~shl;-e^b;cVFZDlIvO60s~FH&pm(sz<@K)(ixzi;BDZ`74Y zvJKJ?43WvrEpkn#!Bc3V3EF|f<~iz$AF60BmHH%n^kdsvED&bOg>CtYXR?9>3z(GL z__6^5D!B3gqb49;NbKD8rvn42ERb(z4K^^a@~k93$#z!VEFqaER5SGLt&_dWx2=bh z*FgrfrQY6Go!7z}{_e^y@3aettHTyJ%l$vFlIdl592=2z{GgONP_X)hVth*?xNQjX zQO;5k=(?WN4Zi)v3I3{OEb`ZVh~V#^+G@AmZ-c-4Y}3d|KtbTrJq2DvdW-#f8;>=p z=cle%kf!SKk(J;A33S_(V9)s~LO$RBS=rAGXi`#U>iO-Qn%j;Vqe#}qv-Y4cnh&r+ zf99Ms(=7Hgk=tqi1YSXFd3Cc_F8|@V&1~Ng_^?!||A(jMX8qjDUcZlAK77K3n~lQB zPk5LIwp3y_g3WfHQLk^9S=V0(dHX!(@eN@VSo_d)I?tp)4Dm~|ar~3#;)+88|4WPp z@^?m6*Z@Iq&+mq-G|4UO{+%{FShii(-xJ4baYGvU@j?>pl- zB3;r>%ZtCSO9)M;JNF{L*U^~nQ}&=gL~aB+Hm|jZ3BUi_VOQfUlf3fTTvejo|8zXW z6CP@PrEx=e*8U8d%Ome?QH=}0Itp~Eg8Ek&I|*`Gs+l$6U{I9zmBYN4rggwX{s93q zSpom>;KqTn>A%_A>P)8CBx=@VR_UJsRRhKkd)oh9ez*L*q?Pjwhxf7*Dgt*&5RUOr5~IuY$~ zQ7hzNoYw`N(aNEsbip}&Utck8?eT=8Y^_MDQ$6ii0T}HvR9=$eWc58sgd|v+t2#LJoH^^_q&$@ zcDUlL9QL*`oe>IUopj@7#?)xv#mOsIWR(cyUoU82eQ^Q%B-n0~Nj97u6Odq%jmO(Y z%xN>ktEpPJIeb_u+4VGh!ccRT&%p431z1bRCYVpTkB&Go(5-tuuT zO|jNc3AGJdI8s$=8aq+P^##wf2{aCU}lY81v@cW zmjx%G%w>^rJy_29CN!ufQH4{~F41$kyHp!y(xdPQU4)#}nUd2f8x)lz72YG45=U1c zETWD5`-Kv3%YOW}RJ@9N^wWo7UtR3E^m)J5-T`*XtMsU>>7-ltz6ipEvqS8C+<%68 z zM9+=O(}Y(eZ)`Il$eCPa9nf%k%wdDzO4aV_G_-}`F${*XYzRGtnXHwhyD;^W91_iT1fWCq!A)W!MVcX$!*ushrtb9q=sS zS?ur+yMfMtt7xQss$enqt8{oKr=1I;D7olS$8sy@%nLp9pZG}&dw@ah$3};1gsdDh z`3gh+w#=ycF5Gl#)7;7^Ig7Nhfdoexw^Z;87t3Uo`hzNHm1-jkt`gXw%T-WiL`8k4 zfS$FK*|ZcD3ZdwsfZ<4Bqwdt6a2g%`<;N@oaT(!kL029$7%6bx*t*f5;J_Kqly6s{ zd>kYfm#3HWG2pbXxo_pq3PdEith3?jJsRYRBG{~VY}`47XLDPg-lP=+hRo6qO`Jq zum3iOZQhdef4$Ga%A2mETQHI+@l)74iNz1eMXtc#m|(P1g_BHm(3eJA@V5s{%s^9T zzO)X1xj{9ZqK5CK70$PIYw7pJ?uvc8+qHq>MuO*}PfLTTbOM6)!BYZ+j8(+(aUb*( z=l&L21(s263-3NC|32!iI-O)5V{%HnK1o@hLbPGy=OVGjKiVp}vMS6n3tp>f6|im} zXx=M!+}iLqV(h#~t5VJ>5xwB_6)S9K;j%9hA5CfdW(0UmX5Ril%t-a=k1-l+9dbma7ZZkvtL(~wPENE!DVY}uet{0Lim z5-s6b#j;`xH1pVCdxG{_s>OK2?}GK{4`1SuY|HuIy~DZpDH`M4o|q@xZt>D5H)LK` z?;7WeXU_i7srsa<)Bx-+WNMxHHf-apak9}A1nBqwYSw7ZAM^gbcab{!_7F=#CQ!Q7 z+{PX4m*MxC1pE`buqN029ds!mOR1I(6XmBzR1Cwo8%FC4IRUDJ*$1| zkx)b~R)5t++aL)(O=tcFS;5hkLpYW`HoW0i|Q z_2oIan>>@9dpTu}O`P;BLgt5%HT{RDtfAWn$YK-v!yNR_;v)`ZMrw7({WDagTAmpD z`-jIv7w>oDmG2#gQ0>ND!$sGX%OnSd?Hl=kT9*MTMKMi0{??R7;-&B+7}e%L|Im!@ zOD@vSV@YJ*QwKQuh#6Nx);tGaQ+^{9W!}Uq%_d;5%xsY&N8=G_$5Z5-!t9-mImDgt zSg1J(>QL?TN!rLj{zFhrDGoKTaeGyXLNL$_hP+36@LC*`37%RZ5vIy4PKPB&>0`)Pw>j@Xj zvSeW<{dP2K$DpagvUU$rbmpJ}Lk>TW=ifpv=gOB85>AF~hLs1->Y9L0s2RiphB6K@ z+0XV=({0m4`)*X_9cGA?z03!|A1D_h8l9GUV zg9CP7hgX#RUhi2v#)y!2 z96mY1b40x;*)50isR>- zgKjoGVS=RpFvE2tBGBn>Q|?yP$1-|KdaMt9&7YvWITZ|kkNi?$s*=h2BVG+0+Asfx z01onlXzH1%_PuwYi{c^~-d4~|$EG`orE`YCCcAVf=_I*>lN%HTqd|oAEk-McF?#9Z4 z>F)G)C%C-9*+YpjWfdDUm6%%bh2Bmcs}q+jWOwXksF(_2M%-W!ZKgwjTeJZE5R1}> zqh=Cy-0R*y(x)fy()tUXrX+4ilH5-7BBzGSBf-p3hq_FS+rK`Lj2*NIck%4Qa60~~ zDnD1c_%FSOgJCA<-79Bp8tjz6!L*Chv(8@E*TD<>xr5_gWRiW?K<^K9qcN!#&X|w) ze*sRDPto28rTeQGI9*dfEqOoF-TdD_74JSJ_xU=X^?M)RX`^|F`;*nBYQ=vn2j^No zhEO=~gaN{PMa48uA4?(izor<@@cm2K0QM>1mXOGVuxSb7%O>m(L~D5pEpC7hxdsFG z{EvXR?T;iA;r>;Ojj4dF8s~Ar9WWev>Y{=T5fvKI!yx`7!Z@3B_)5JCX=B?T@!h8NiG!x?_sNec( zThWsGc+ibrFiz}H-E)pj3!S<79zfklUl+VR{KdN+)=`A;=ySv}?4iW8Y%`>ZOypf= znpVU0krZ(gb=14Io{@^;=fua-cTW4Tt_ZHL428m~yDgPjHy?`WB!68__DcBzn6Q(R zgkKuO+f@p?n$C!mE(~xMmw1or+85TNB23z|3;&9c8mTF?7EcxEOr2!X9~MtcX`ABY znTM0%PPjd7|5j!2Nps3mS|k4r3S%8-r4f{z1eUu!x#sv)J^cn&9P+=u$kY#0cCE8t z-+brKH1AZ%OXIdm4&2>0C%ZE~m@#x*w=?=K33{%{x3DcETm{opE7YvTcue`4vF`Q) zNg4dIfL3cQfCvCplXlEHb%rbx#D)L+)zFWYUWM!#G)2!k>B)sgvc>=~Sdji%X{SpB zN;2*u6kx`f6R)tXCK@L{80brl5a&M%MwKcJ&eYwiZK}Dty9!(mvuzc!FA~oJi^Bc^ z9$!$f!9pvi#mI|g>&(TG(5nm-6E#%b7gq7{SL|hgKWDmKn@Un>zf#2Pd0>r^Xaa8FPDQzc4CS>+@yfk zL0+)SJcb{LS>%FK6AQZ2D;#I~lJ9HNdziM6LYYwAr|tas-6;w~=8B)MS3X-!kP@z{ zHe4j)s3!^xl?|uoxBnxX1#ecjzjD!}MEmeE$9IaQ4yNRvaDT4fF7LmJ(nGhRcVjIlj_gbsO*J4}r&G}tE}fyCy+DH4j9yA~QEw9y?-%Me zSfV|aLN*ahzIMB*Qk4l>a8HPh^ZwWDFVw}cUz6t6E^P@6>Uh(Hnb_3Ze?62Ty-Lm( zVCdK>+qlvL=TlRgFxz20e;_7^0eWCO%Cw=4&rrIpuRq=k+g@j&-f;&((|`XgB~iA1 zpyXN0`z)LS8mYY=8+# z$;vjt&RWWy=0ii2Dvr?D31D))&H%jTM}&J;=U zRmHEoZth^a>)&9!aZU*MUX^nlihWKmZ~N7ryTW~62LD68NHE^Aue~WljTnJv&}pxA zLX9|)GZIf%v2hS^o#d`1;lt)OBa|tKcHwu=zl4OlQU|^6qbU8%Jr0NI>=c+R!En7y zH{}ex3ZH~3_JWuR6v^}?4o~yUp&Y{FYv#Sn?NgL@rQIG!@lk&gOJ2LwEgL>?c+@=*&<$nNzh|B9;7H~@X` z^({XILSAB^9G)JX^H&(GMSaSOweYp&&ckA(Dy`dee%A#Aww2e{A{S!*PDAWU(#4RV zCA&-3mQzk5l6niwXx*wB5X^V+LcAh-MK$do%Q0VfM+~@HwMSw64Ib6Ya}%t1n?=%Q ze&o4^eZ2IStk|M7+}k&gRh;`Zp-hm6se2G#v921)?8=<_vv7f{VIrjkA-FrihU<85a%c1 z^6C4T%)n}P05%tvx$A_EhA`dJ%hMb8Er@nRg z$?3M}Y3mxFdeL=*&u~5hh9vs1M%1>ulRtg@eTAZZgy@Icd$Im7Z0ufwG^Wi*oN*Ow z$)^gRUA*TV6wb?(v|#_OSKuz$0r-2+J*9whDB?}(`cRHd)BL9BamF?Eq<7IQO#I@t zBd^C(YK0wen&-AVIyB;euax5REh}h*@Q96g5hbNCoMaL1y#s#$>@xGeb>1nd_V zUaAjC-mzi*2iDAWA$A$yl&K`mjh>ml%C!6`f?V)lf!3f?WKF2i_$ZFr>d_!MXq;fD zGkMIw44z%kuz4~tt64e(1@|bRgYIG;pnu=G?V9HM@<6}9BdVU`;gBrsKQg}}aBqIg zAZ@l0lX1G1fK%JiAB>5uJi1rcM-lG7`vc9X6! zxCXjJRhBb-0Zx^1#nP0yycRXIcq$IE!(CtKt(I^cy z+Bpa}*~Ml$T42;(zOA|aI^qRjiHNkRrggYi|CU@x+mE){xM~W&@?5prA z(&Lsx%74O8)VAKZ(18}{Gt%g?CH!D#;)PTZ82WOUcTShUh*~Y+V`iWMI?N=%(W4w1 z4O;Q%9baacUNN0;vYEuS#A`m5C+kkHt%5$YX0cc}7m2Hmrx4x9iBLtW_PU0H#A8A8 z+TQQRJh?+vCDDP5o)y6tmZy4l%LlVnVQZC# zatgEQd)MqxRrRLyr>$xQm7mS%hErPbX|K@)pVov7H-YV+h&52+%1GF?WK*rB>dnmz z!daFvJF+q!?#H7|6v3l__*+RMr&^~yTBiIysL=m|$EVYlU1PjsN?lTH;vZ3&^OFO% zf>_W7d{8FfI7|=MdQk(P2l zc7)#X87#G`etE>b6ew!Y1ttkNEkpWGWs&rLIP)iKpSP@gy2gQ6(}k%AW*p)A0l*>) zVyzm{wOGK+yr-v>_Bkb?%2q!qK(g_;B)aDz?DdWcoqf;^6>1_}27(`Zgr;>Ds_%@4 zqw{b%e2$z~qZ7j6qPibB%uL9rMTav^qwoXuw$V!27_?tn@8Zle8sf*g@sfvk_#bDd z)Q}!_XU}kEDVFU3tX=V?263`*%an;7&7$$fJ$zquScR7uVMxT{#^00LS{lGG&fpgC z^B{7&U3BVpY{Qt2Z9;jN&_+?9F`jP;R8+XhXr2G{D1p@m%hJtRi!vRw87&JBXU$q` z0XSVs6>sti$7tbMqLMz$f=u>6MSNWhOn)t^P{{2ZnNk&VmlGuZsFRl$AD%svp;0s6 zyx~cIwS8gbXff2^m~HfiFGXgoML}%e?WiGNA&00byuWh&FTdGUdhyy1F%X>x8~N42 zJ>#$-k&rR?w1Ld03FLQ%QozEh4VK z1THP+Gwus7eIfI|cY>nM5334{=jqoLRP1b5CWhe=hI%$X+l8{(!WzhGGD$b%0oWQo z5`;I4ndcp5xN|)UrrT3ex9Rct{in<$yF@S<%<`>DLYkoWc{Y23h-8rx`co16c?3Kb zXOQ^x<}bb>yCa&hu3DUlI8Q&bB#~J5;BvwCMM@Suc=rr zxEFe_n()BW0ZxCdmzIdVJJ%qo=~{4VstOwx(68PRUKdbiCytq3vAL|MLz6-R-gFky z49V+b1-`(#ZL}JzicDK4=Vh^ZYr0hWu}JD6Klu_IJj@E-`5zgYp({?xMTG%AtLs%- zh+&`j=EDCUSsOwpLyM4e$x6OFr?Fx{?1hx6bH0;J)4dN@-Z=^;-v zKlVvuc>R@PM3F|&Qa&FZ$VGAS@if&X=8{*37TZY*U`)Jlf;MkCE2-kd;Xs?jdf+vB zr=5XFDi&`G6XFt57R_G?@((I-!@Udnx5Wjnj~;&u`cS!cZ*w+>_*mqUMf8np&979` zZF(9%Et&K2l`GVLWG)Ey9s@-CldQ+Aw_4rx*;J9x4btoRSMv0j0IFoO!tU?cNQNBE z@djC(1iD^Rgyy$`=v*TllY9IsgJ;@ zvYl{U^i$D>tj*?h)r%=9=WqFnSJ?Xtmz3Q$dQ+o#?;p1D8pDUz|Boz!k8ZDE3NG-M z?#1Q1WL197^L9)pAA{E{xONjxR~N2Wv`!)sZWQ!N0>y@f0yQ-Cj++ zZOj)JhJ+?0m`n!h(knZtEd7vK;$~m)hnlw*DRUWe&~nE#fe?eHBZi(uqVQRi$^)Xu zomO?+arl}|cX<#a#sZHdHG*6O^5;zsmkKy7;lI%N3G5axQVc74#7OgYt&zE8mu`5Lg=sxQcf3E&zsak}+D&warnG&#QF>=HH z@1qT8cmzp<2EGZ_4CE#0D!mQqD+h4O83&Jy3dQ%St~#Y-C2YAHu|0Z@u$@AfSX<3k zT|g~QF5_qQW8gFW!jOmcOUEduDJ3o(9n+_#*{{khX=r!ApW;2+kV*1K<6^|j|z_?uNYy*{%*tm z$HvYCufCx3uD0&eeF9tDi7ThBH+{9oyQ4c&GC9Af3oZmNNn6a?mEEWz`@V;F<}QN3 zYyW`U<5_PJ7GFq=>9xR-hK`R1;k>*=WBjlvdolG8nS*oyt{z5tp>EF!?B#$_N;H1| zPT>4bDcm}Xlk6q&J+i5imaM-@sfXlZJF$E`fQykC`oZ2GV68mB?P2y67l>nupw8&i z&G+RWno4Gk4@f!&@MpUKqNkb|{pVqQGsgpi4kWCNV62bbmF-ZOq{j}MYx-YAtjLE0 zz)?B1x6`7J{kn(iA&)3C2}7`2=b}O6=FM^InA_T>AJBT9_eYbykf10(`RbIhpC#=6 zoJ8J`zAPVB&GI8z-$~I&3laolXcCRpOVN$TP>!mj(spMge5f!7r{*fWUT}l99}QqA8$1-8KB6h)TNQ6B~KhnGbJ_=N;*rP zxyHM^wYkCiawzbA(1n<8xh1B&)2-AeWIz#{0yD?6eL2qP9haD+Dp1Ar>Dd~6UFJg4 zN3)b-v(jJVE~S=HF{<1y{F8u_Y@G1>cAViKyQy~cAC;Szltcr3k#`%gyV~0ElUVJO z{$bI9YqG`dXIXQ+cgGHvLnhf>M95a!0@dB~pwAAvO1c}4iY7iQC=BaA#vkXkOlE$& z)Ki!I)b=+=#U8{yey~5>V(XNs-XY2%7!PGfY1QI`BE0p)T@QAL@!xLv^;eCY!mrX? zu7!z94yK(eKjIF%dDH^~i?0XyXj=i)_T!sdLW!kwzuTZy1Mbm(j&@9ZtST?DQ>`L4 z^3nG#D=5uw1>Xd61M8tLyG%!HUFi`e8u#Aa{<#P0C16j3@TV1(aq?X=PFh<(snKiM zPpV1Av^Ky>bG2J!jv$%8>FAda`=0tn6(7B);7UYmy#8MZuQY*M?srU6!Y>(&&W+Bq zbiimbJ7V8K!mJOYa(2e)>7=LD_G2;)hv0F{Q27ne6wI6JD{2drlC*f_N`7l80TM}! z!0GXPw3gRN*e6drD>8+ZE~Gr1$Sxfw0AnOMe*3VAHvz?druU43dsO7H;+KlW8b{U! z57qnplcS?oOfH!Iz3E*aMcRxDgHL+hC?yiI>!QqMKPD2o6(V}C2gD!IUGM7t!(|4dZLh9u zoA+n1-0UCnIw=QAI%F1{Cvksy-RED^2RQyDzji}YQ>=yWCNU{sgvw#9p!p8?>6`hh zHqqaJfHRXrqjP#%)gP1x0lMx;+B2>nVhN-vZvYF zRt)oHC`JtvM{~aSl$=l2Klw(;W`TRys7f48C54c|d2E0ak=?Zm%IyV#B)+?s&q?Iy z&O-(c5}57m;BUn#1ODtw>wUiHR(BgNM z{6o=wc}rXxUr`V8I4XH8e}ZmcH!jTa)|n2OpV#O`Yn{ov33+zZP29fazI)5dH|lhM zWUfRSm?)eQ4H6r39`0>?-5lEQJTd(CZ9c&$$Q;B!CLG_WWlMoykOyW0qM^fy2z3 znV?8){?{< z7Pn1t@u(#ajXr|#1R4FP6YlQvZj*&=59UPNZ50&0a)|63-b1$jiD zi7gt>vIH&Ai3XU?p}DQ!Oef5^le`F&qf6i!yE9GQJ^tCe!EM_DFoR5Ay%$k=AZRJk zN707A(3(W`eoZ4waIcFVB4~C(ot=(cKk==t&^0RzG{9JdNLlM-)|!eg@AGnH87MGuX#!6R=&_8GR&0 z^(~!htwlLS6Ga}N9xHx!!AWs~Jo_X+Wi&^;=o8HFLXT+h^IDwn@~`DzF%QaBmIF}3 zg*1#~6efdTH+C(_PkSN%wNK+2Npdp1N`uMc<~<}i9Hg8SC`{Bk*IV}Zx;VviKF?~5 zC#dr`@bhNkk_`3NnpAYbFAo<_&xnXpsK@?BH!B;tgU20GVEJJtTf?K9+f|FL+T^12 zeQjsg)C|j(^L#6GdsRgq*0hCX!O6h&=Ah?C{775m{Q3;&T_Hy>5VKaTjbbf5h_ z_WK15$=9?*xcrZdOZ8)3zN6#E)CG#DZ?Oqvy--YyS4JRT&)GHix0*YjY&p13l&}Rs zoyD)d#U*)-K z1Yj6W0y?to1271Kh6m0c48tgqc1DKtSx=!R@Lm7xvSmc1ZrLUypnTbc?9==B$u(8Z zYkKgd_7tvdG@gTKH*5epSwBz1Fz|x6be9nkGd6V?vN}kTV_#z6g$dE|F9bWIPKbrg zeuv&{DgrTCA!VAgIg8kS7bXL8PN4)$*X%cR%4FuF_gQhFV5{q2|B=yGwz?J-o*mVl zQO4^-Hmp@~E*%8bu4*$#+$#9-o+7dTjI?gPl$CSGd+%|t)ucdFhE~`;hLwjfhLwZr;_TNhPd<|0Y)f9QJbR{I zgHilv<3i;kO7gEk7Qdt|DBxEdEvRmKQ_|Iy?=3xbEJ6QEdO>+nVMlcqwi%_Z;XHUH(a$vMclYy#VA@>m;NA4n#E%X{i>#Pb^; zPpaZSx%XOFt`EO_ zek_hB+VM$Qq|5(RnywvN4!wR?mXsCRLEos= zeD_a};hH|=t=~cS9!nnnd|^9*eKp1N%sq66r0X)CbW}UgITtV3xzGG{y>8=UycV)D zx9hGaJZ66SN&o4kXmm{1wG<)in9-h5b^$^r)enNC8aFgIC_Wn3JL;d7I_1>0v>)3} zj!1f$T=x;nD2{VX^Z4@T(|07%15EXT2)K!Z&7fuWU%f38TQ$l@%Omj;-ez}awu;B(xPzaFwxF@c zCmNdHZnCv`_*-;B>=OZzkzqe)cTBvakhXED3mZ0x!8dk5(XBR~y=%s);gkEV5k^T5 zg>OEATO6rIsqnW>xvKQOS9t}`Qr^+*@xOe=n=l$!$dT!ujb`|AO?3$(Ai7%)qO$Md zGr$g`i^?H702=68W2;$D_{;)ZqHWW+3W>k@Kp(N2Wg;wDI%W35tpE=2E5C^b-h9Tctje2M2~zs4vf9vlOf7?wTqBqtJ&*#qUMq6WP)^dcaiKpQfzS>&aiE#~tetp{S~1OK_5 zE6>xBrR&yye^N7j3|3Nn+pfEC#tL&96MjGV7dyrgT{t&S)mtb3RPAbgYs<)DuHh;y zi2FsrQeGgP-5fOFRkl~c%cPQIBWU=BX{v2qed7_c_x_YhGxkK_9y@tvB&4;Y!2v;Cx)v3VJ;k(f8wy;xG(*9 zFCv`%l?$@)1EmOyx6n8UyL_p|kilKDG(dw*+= z0h>Xza1~<1i?(>0S>QwV8@--vu~S2*Eta+)f~G4hf^q_dJc^nt!fyqm<@E0PdhSTx zfgIEO`ON%t?>^)-(8JM1V1JaR3oJyq#w?FTqb$MWsmlv;%Ckb4BP*>|OVZFAi}9?qc&*;)F|zbe)+a zz6!FehCghT#6@{Q{@tug`gr5#j=mzJI6&F;W%L3aM_zQ0)N`dzjUB8cecd2bT%u?S zeP?e0@YEF7jH1?bc>j)Pzd_V9%hkE;y^aP_7!*7JJ00gYrv`n8>z>tb7d9?W3q9;Z~zQ27MhW=Id=_gE327a zO2{*YW@gu!Lx(DJb?rIMLX+e?8929K8b!eIpmS@DVDZ4&xw|0aI6bdo~2|)Cg}MlgXbO<3;c!7)VdGXW@4A zsZ@knHwB;0c1cb3lIg}qn$*9Pi;qwmBfMSo3PHK`^(J>D#q;sU5*_BKTxW83aiRU)d+|VCwan=|$ zH;?QYwml+SCTS2S#&1_{ecDkN*vq7hg7Z4eL}VZxfH&t>tJQJM2h+8u*#!BYBg~c< zm0F>xsE14%yZQ=cN-q++T~{^j`{Y0`2%tvQ;?ipeBrj@9o26%4YTVgANY&BD$MHq3~0yq&af%+nEL8`i>LpsUpth+eY+}YFx*yD!`DwI7nB{ zyz_Q_^i1Ynfeca`;02ZMWnfb!$)4dxvko&a{F3raoZAbKur4p>03-ZxK_ys6sh=e} z@97R$9X@%kk1W{oIZe`MAtwVWVSn?nUGrNJAzcR$8_#-8E{(-&G(^OHt}oriM@5QQ zucQeDD%IiZPv zU+MogsRVpZ3{0DZcO?)jy3R&!jcRdGYu%!sT+xB0G$kHy^UmH`#^&v-)^F?{U8r^9 zSUV=fT_lc+ZskR?zESRlp*(GbQxq+&E!pR|o!v=Cy0I&I0f8EMMJ%< z0cLdpHBu8;Ob*^SMluu+vv4PM2N&EW!e%Ki5QBg=dt)S}k)&~sWRHH9$qQZM9yrr* zOqJZpYUnU!QJ^S3)U0I;K(|}5_GAeFo5gqC&UvGCIbV3ZLSk^?&qoXb%)(}KT7uPX zmeBk-df=ChC?$poG|1nWY#@)N&jga=g6_4ZaV;g3ktcB#8LZH`j4eE?8p||S{;f~* zX(!6*Dx;MS?}_Q4R(e!ITCOKLm+g?Ihi#?M5A#ay#gH`lG&fDx{C{`1hKgCt3YoU= zxkwMWa1%khGLBNop-pHgB@o$<+I)=xkUR8s03YNao5EX=&Cy3yyJ zF6(@+mor^xCafn>{tWi}hBYX0`Hn2%w9TA`apbo@cHC)yNg6(H-)6<`iNL4F)Nx~>jI zgIq6N-N?_^OTmBpQghpW-HQLV_j>GvPE{8c!Sv2EFKVinwpr%B}m z#MN>-@!E6bki5^LVgd4G@fUmm_vG2--yy?nCd=UvBJH`==pEF+5#v?QyWK1r}@M!(~tE{V_u_o(PSx0iOj0xvBruX44sI71U-p-e<39 z7QvAij>wduqI9dpxo|~4j;f@CoN*$K36f)y;djV#vJppa#{o zp4^9Jvl#hY^!a}0D-n?`^*^M*h++he$F|aWE#4~kCpln}xMH61mgw&3)NDC?_~Q9n zt5sYpM2qnCl)l-$rC+n3Wj}`wrUsD<<+6Gqw30Qqp8VHv7j6p22Gx#4A6LwxSVorq zrOp8D9F&se4)tKoQx@LH{={)wna(|M6JLGc!53FJrDLMi)*C4^OUG}0{+yQvga;}4 z(B@OWo0Tto{#MN9IQauv>4}?_aETAf|P|}%4GcOKoR{CdCzXRQU;f}zaJ{sw41kLtlfq9ZK_$K zDqHAJAGOL}LA)m5SaVM?uw;gqbjX#j#>A9+#&XmVwHych$(5~~hX};%QOzpVEVnsVI4Oiwpd(zhSBqsV8Xc^cmw>@Ss8hqL3oY_Kn zl`NZg4Tjr9emvv3DYi3b)}XJW7E~%BR%O=|BCYDfzC?}NuztVWHDB;-iF+ygF;%53 zou28Ku89P+TR14ibMU)3eJ}eUP6b1<&vX0PEP)@7TjGVz$YK=*ZFsLBqsIrtr;w!) zgzPf+0()~qqu0{2*pCxKiS=$;^tuf;-@+r1_y@_PKKE zG-I&Z{g(6fM!rxu6R|?VNif<$x6RXEoW8rt^A4Kcz@W;^9TMj(U;7MvTRmAd;hVwYhI0rN81kO} z{J^+xWY3LBJl|m#NwayeUqt6=A^J)2o-?*-c}-80#!^tS?##fGwHNWMjL1FH4L$sU z_E?ov{daoKx`!ZhRdk7j*r9K!+dlo4nxD0Kk5duZabsxS;C$uH&g zm6D+G-xq+HaQAK|1(t5&%>1prKu>5<)YF-HMbjf6pi}|nupuELQV^jPV7BPse)#n% zVB<7B9_4FG5$aw(KU6$xW#DdCJmj`M!%@O0HdguStGSRCKhP~NQ;pr}im4d>St);WKAetro%^pRzugoc!nP(ZkUaPthI_vx+R?_J3rtFl(Wi zRAdE6*RTBlJG8Y{)>b-etEuLkq*u09Mrt)ldq>@Wd$a!j^9#nUP5aWTGY|L=L?uAk z$ozw**)3a5e%epDlQ3KEboUxfisx;fp=R<6Vo3@Mx;u7^D1P3_p^>s#N47=hhHqh% zv`rXS66|^dtg#BCegE709G<7xwdZ%Zrp>Wul@p0pWZ$1t6aM!sOpry>g5Z2I5Ew2( zR@%wGqGamjp;bd=D}G-jl~N!clKxO=(BBwxzMsqAk~SFnbZ4gCJBG`ee)5gAD0%GB z`KOA?U9SV_3GU!Kf-EsrYNd`Ao|S3OpWJeu$Btzk%dR!~^Rd|%$PMHutoT|~HGid& zvdb)0^)|!uEpJ>8OejUYci@&&ad8Kp==vQ1ZkfHjB_!v0{`N?^=vHE_FAmaT*_PP! zXjUY8oHRBowXzcJF`@9cEArec$l4xb`k5WqCz@Mb^GpXL;Jg1gStP}5*GVG$dlZ@m zxfK6g06E)P=y(*9_3TMy$%AI>BjJ*!dn1u|4RppdI#>tgl7;6E=S)1MIiKC0eBUrx zp!+NIwz9jma6|D+_}wC-TkOK=Z+{7Hkijjy{C--YG}$#?ea9}n}j z_7)aQOlSHzRuLP(eKeiw8^+n$84CAcD@p&nlByh6m-i6-;gg*f)i4*R3JuLC>00vS zMauMi3$dUNa|R!?pU-HT-b@XAq0BpJy6pROXT)Mkx2Pd`)lJZ)vGPi-4$~;DcNF@U znyjM)y7+veZmOoa#(K^DSFc)$*bAU8BY*lr`@!x{+vzAo0aH5UT3*=wYWm=D=A|tuM6Ugmf*X6<)@X( zLe^XU;}{{k0nf3*f{g|39Z38OQ<5~JJ2tK5Gu z@+8P|qqRIPUglS2S%07XZl96K!}@c+Pw<5f zo6%M<4>3a2O-X(C4!c`lgby&sX0bR<*;+G^S)wImdq|TAx8*wC5my ztV-!VGa*yiR`}5!Bo|6Z&smeA{vhgu+u= zUuDB=Je^67$DJovh3`{2ipeRrKxYJlm4;xL>XL7$R!7u(srR&Y2uG`G{ncl+U!9IM zyPv=ZJ&3B$TOy336<#?)oqSMCfi&GSzA@%@RZC{%H}9a(6emZR{2Exqi)`xhQASY_kRnXpzkpGJdTc{yc~;nYEnb$!w>j(;@i zCHD!-Fnoj*PBnRqA?01V5WZc%8R~Ad_O11FX7~hTi;nm>va9? zr{T_O&prurC@g#Ysq4qSx<4>P35?^TcUWe+dVT-t>-9|WAZRz^<0y;UIdl&4yv(8S zJigRr_&ex2%5vKxyd|)iO%cc)kE?ct=>@Hbn~T4UZCQ%%Yta+!v?z&iDM%HWYvpMi z)?}I1pJ$M(v;qVq5JR71AK))*%)oW}Y>LlOk43=QpXxgrQ|9h~!bR$F&F(?jsXRts+ky2A@%K_1j+@xa$W#Dk#v5UqU8S zc*Lj2Ol)iTly2DCPJSP3QVsDluEB}sQZ7l2(L^m5BkW<%+RFn{K;=&s%$-CC57bS;M99Z z`K^(R_wRxEA$UhFt->v;Mr`A;`{MW^W~|g+?)t7Lv(HS=>o;^w?}tfv4V_UKU*Wdt zjiRhOO2PCH?3QPDdWqf)CYw0Y``~`uarII8<G;RV*ebKX7OGwjeQU ze_cvXO5G(`?y=-usJhZbcAuPc3Hiv#mxWYv`&Q+wNni-m55A$+T6njmDdm1Lt#Hb>qcJZ$oTX zl-(`PmCM)GHM6%uW%D?OlFYR_#)WjBA1>YLZ@ETA0HkrSg- z&U2-p=XB=DsEFgOln3uT{BBJaO^Wk(TLueabQ*WC2 z1^W$o)`GUqruxhhTrupWbtCs2pERLo3@JH8(2Dtfh86BAWxihG*?PI$ZvIn!F@ccx z{s}|gO(X0hI;&Oae*TOh3BOHGV5`Wx=K9LCG&Z3}8%Q>_0spFuaxIZq4e2}F(c50G z=mqP(+3L_U1OF8#;U8ndk#%|XD=5HUdp5$5tgqZ58CqKCxQ*8|WRzvEsZXufu(kDl zDY;v8Xtk1wI9m_U)g$UYZNX<#nBnr3t~m*6)rjh`z=C@Vry(co6{U(N5-vE10G&1RPp2#(`^deB4GzHuN=tU*OP z9dww-ZrOVnQPoFvx8~jXhYp_q$Xvyx2Q{y90Z+v~P$Lw1pKbuov+xjb`~sD%<>waB zvj`5<$O!RKLs$PLfS?0rp0j93alikVxa4UyY2)S3PE|UkVqD9LBfz!{NLZB`BDgGz z4`-U;X)DZ8^@cph*eb5)uKo_7d1a9g>CJn)rXEcY&MDV^a`pQcMg@cP)Wj*54;+6# zL+r`3Tvy;9)4e$Nu|2%|nMnOFWTf{=7DwHY%YsFS%ym#oAYujS`J9lpJ4hBj`O&Mo z|L3+lnAbt>6Db^C@p)qco>GKRRpx@YN0;yL^-|=$?97j}*Nxt=qa9FUDN)8OeO^M+ zJmWDvZgK<)Gqd+4L=~9$yg$M=+h?=K^nkeH*KFr#{dRO7m?Y_M2;yf(Ui|T{2tb|yv1*XJ;2tLa%#V`9txdlM*XF{ zroOQhCvT$4&>B!GUNpQHIbQ!~VQzF!AnW^=(gF94_|j8wTo4sM5mLla7a(k3JK}$I ztI_W`9Ezm=H5khXIw3^hvLYfMXw)=^q3dxBxoomhu$lIl*iu^F1>rhx_b(} z`LRJsrkAu98Wg_3Z-Cy~-8_s8dRl-(oD z(09=2K{FCDc&-GxO##8FMyAKrBXKr`mfN~QjO~+era7t{Yu!hA<5@EuhqR9stQ8F> z(E=_99P@LlEgJwNtwl;J$SVfTVBOb(K05_Z6Ur*IxBeV3Kbea5Dr2M!z1-#l88Cc3q?zBfY{o*KxuYj}ky`iHON>|ws{rm2Rp<5MB8{N{fg z>z*=%o#l=`NhYZ4TVs_1p=gYVWVYhx^z)phpgn=~Oi|XniraM486|BnQBRocyn2Cy z{vS7bDIzwPNNG9tdsL6UbTk3(-jU>Q`c{fbe@wa3$5Ox0FCX#8S`gz;H@U>L{ZJe8 zAObLs+>a~OO4Rhd@oD4!XMuJNJT|iGLA7T5T9nDj2w;IItOqu2)}|=kTq1;TAbq=l ziAO0<=F;cRt-S&t5C7IVy8j_ZLIGFdp#9NR&dN)f#bJ@!D@*0P>mH8gyH$w72Ll%5 zO2818dZX!B-MTy988F#0QwjTLY+OlPrQjhajN9ia(u5dPcK@)DPm1zZ{7K)?ifd$g zN}W4YR4Z`)Fd}*ipMt_<84-YHXm2OH_S4*u-=N!sSc}h^=$k4{;!}k&%V$w~_$1sP zY2MbUlBia3LSFr=)YV6u%jKtO_=^>}oZ=uyvF!IWStw!0rf;sNva1i6oVPJ2o|=z!cUOptklo%flM%sWe5V_(UbYVQHQCbN6x#<|w)DGR zHVOWdG4*CO%cqW)p6~UjC06uK)n0|{bbCfotxd^vA}at*x@vyqKSz_AX6B3~HU}n- zf8APyw0K|?Sp4rGUG`F$J(%MI%eg9so*kzo8KfBaBLf0&&iu3RBn$AI3U*v0VL_V5 zcQ0pYg08{h`%H00y&Ci3bAynP19AjW*kzvq<6tL{*bz$vU!C5BXd#6tS`j_@Y$Q03iLF0<<_AIq}x?XmNqKrjEG~R$k7)Aws=A zU~U&Nmf-w>T^1UZQ_D`S{I;p`eN#Ml0DV|7gb&N4qnu?<4N@%KvTW7%;VyoS z@m1o5IG7jud9}?XyTpeOVa}j$F7gNqbY9=38QYippP91HhFJCjJB8ISG(tdew+{@RAlg5X7tM5EKp) zIr5^D5^0^$2~+r+XrSe3lkHUbdb?^1;~$#XVyP5047Oaa6I~vuBbL3iy;q z2#RUvCx-W;6#!JPgI(TGm9;)ReKRkV2IBjEKVi+cN8Oig#<{3BUm~rT34s+oH>Jty z!z!ZG-*!AYBCWz;a$lq~)W3GCniE+6yKjbnYU zV}oaxbSr;$YYI7Otm8{ZIRe`I8fwp4@0$HR0s<$EJRDAUC2loo%kQOmVM}?THFn#s zaiW#Rw$}J|ZcM?B6k3@`m3HIQ1>r^#v=lV%RJ@6kMEZR)-d1& zmv?CVv6CYUE<{oFdMx!h`dj*^5VF(U@lJQEYAoi9PdlXb#w3ne-o zLDOCk{7d~?Lp>*>>%xy8f}kmqA_2NOLd(7Pn3+E91+Nym>G> z@KwdE{f_%AQDILDu$MEeuCC!BzT;bIk~l-h8SPD!hd-7JWP=Z@ZFP$lk0d(D_@I0V`HlwiJyA&G%zNTmPOr;~ zpG*mG_ZH5WHg#DA#7!@OLqZ0>u7kPkw7>}DxV(1gZc|y9l|5Ny`j*u<-K6;yFc*i;zJt$eZhDCpd0Ige4!ak+zXU)mY-gTJ~!m>C&PcVpFtP?;i=eOJn| zKrk_{gF<#MRhQea>o^>fRyEzM+T)#N-4oZLsDs4N3L2q`T-Mox&e6GhVrPb3!Jg(Q zf;h`LD2MwC6-nUfRR9g94X8|r_Y$>-*O5rm*Q#8jgVd;o%0eUq^I5(iuF-awxwFx~ z@oZMru(Ha{_T1!2*k@L%6VP)%D`st+e_y(ouPIQlK&%B6Y0!9hS?do;ZOog!s=VP2 z@B1a^!nm zNrrwo*07h08hzYgnMKha>j93LyX5XX`B2LJ%7$S({(ni2G=fVv<)6GO?xB)B(hi$I zoA+#nM*fFG0h03zf=GWaROu<`e^dPODUV)S0Bb%F$H5ns$06Yt)-6l9U0_D`!L9l8 zV@s2?)rr8V7qHpn*#B^9LL={KZsj_fem8Vq@%ggcGTb&{OG{S|*2Fv2)jeMjeG^!( z=5fgp{bG{t^ry2;I&nSiG{x_dY4(_RlcOC$r_rCH{z@;BApJcG6X98D`cNol%mYk) zXihd?55r&dKU?F#qEz^2rgYwy5B1r*vy#i(uG9O)_eQ+O#+RDCE1|*N=5dd(FutV$ zVs)V*edZ*wvc@ozr8SlK_TD3M3iFI!tRrR&1u8B552stG`~aPWXP^2Sc~L#Rd0ZOR z<_%%@p&jdgtvPzfUDkJEd-y@U#&JtNtkY(lc~KSMHlW}+s?Oc@+F=QXL+jK*OhFXv zK)jt@hm9GUG<&|f7dgL~_j?rML1TT{QPS3m8Nhm1INm=#Ve(2TDE z2|Ais1G9x^^M|wmeeIs;s@mM|g?zmHlik$4Rpb#D8sHbOntEjJ9d?C5th(XX^ zdB|Ue=4y+Jw8=pZysElnY)tlI^$b={Tj$~zqG7c`ck%$>RTcnMD5JcqB*giq zyJ7uG0dYGQ%aYDwC63=z64EFfvR_*2Wwl$K_B@QjLGwObX~OeWG)t+McviXkSS0d# zp0#KnT{IG0JWTGtj65~#k9UW#EQ;Z4t1x{SupIFmzZdpX?lXywE(%om^uct)2bNvk zCWUy{@u@T2s?rxoSiAXE>x34{ibp4YqdT?W3sS1d8v*A^rxc9QGDVmgHOY+|@>kkU zjN`94n8@yri*E=`RxZeLR~a9A^f8UsCtYpNNAJ&14Lpmz2SPoqt`x}k9Fd>7s0i4u z40DS|*yhTWf0(f#@3O7`!;yP=yI=h-#quBFCh zVYfl-6n_DX4O(UIFy`)_GsyP&IG6tFZ{^m0SHey zhu?8u_V3xC2_kNRH!Rsm3%&cnDUGVtKKwIE7Chm-MrhDZsEteXxB)$9?#(>>elUa` z9!hHhfqCu_d~C~u8;Lp0HA}^L^oWO4-;Z9?WS8zweodfRqA-UaVKUJ?t_e8uExeZW|3BG zynS}uMh>(Kt>{Kf;gl@N^C_?F=NS{$^aS~8S9J*5<34oU#)#!+FU|g6_h(yi=uzdq z4&GVYbmLiXdX7vb_7f*W$@H`iT0C$0hxM~us;(kux|hwxP=Jsay~Tc`1qjx%HMu9B z)!fgk;`ym8-UT2OP-QvlIcx z>285sSyNk>HI8RkxC4otK>ynZYy7v5HN1q)M=RClge(ijB*j=@kFtLn#prxcS?}dl zVB<&_I&W2%u@G~Kar*dE?2A`{ugExa{g=;fe;5vrMe8$rSBLNIBH|5z1p=g|{+tX2g$3s;CBTFvn5WWSWllw!7NAPLP zoy+wa^v3Cs76381KKf`xu;(%dVT{Yko~&r0eLv9IsFmccAtp$J@$4+?dJ>gmkj2S%Op`YJdu=*49 z;5(Y2$F0XxnEq#X2k*Al^2lah1j>4#k%e+vu+ci#?&o=x)#ZU~7`&%)O)asH5}1RTge*r(Dvw#pxh$`K`d5)5R#uT?U@OIawEd zH3`NIeHqGG11dDBJF9tkm}0 zC+R>7zvFtSg2gJ)Z;@$e@JnD&+suzFncOmZEaOBkS!OzBAf8*G>X3|&TK{sZ@oOAY z%wEZ~k#6xOWX$4;>n&S_BJ)~F+0-jWR=0WKAI^-|nF=QAPopvoZ9GUgJSXNy zfg8yDF(>L0(VuDu?1#+TZ;)0JitHU3$AGmA*5^um*hxG!-s7|7^bG%506E;j!M*wK zfhN_C9bkOI>9h?E4`sN247m)rjIP>E`T6mfj#KPNdbe&P6ri@W97x;39##n{H;4-3 zju(eWzX6~TZdlljl%K4he2WXgS%YDy@ z3qctX<#dDR+?Ec*KaN6@Le>J_1I=Z(fD@kRt{T1<`k44?5SGAiyq#z~K}&Wp8W^2) z8!Ck*v9=-qHATdq@_00Fc56wND-#l!vgoui2Q$c#I#?S6489938B6g|U&;=N4&sJq z3k^Esh!FHExG!4Cl)*Eu=fzQD3(EgCp0qm@nK(Ds2%9T*d<3D)F9dbJ!UZ50@oSE+ zFQY!Hqt)*ar#4#DCO0dVF!Rls3?A9p>=}w^|Y58vT4u zG%IU(Wmx!;)PeDAv@99w!j#91Rgm46CM!^!wS7o_Z^2Np;E;-qutZO zq^Slvubz(BC$OJLfr@-#PbzloW-a%QpES?%-*rsRmtK}tQF^&A1EiOl7U`;FJQVe8 zG^RO1C!w|DLCuk;-mUiJcKC+c(7y)a21%TGnUX^xumglgy{VjF$H_Vl^X=9e6>WjC z;*942iVlo^cT~J`zvEq+{DV65?5=>YN;?Y4*h~Z!v3A2dsQe=91sN7s+vl@tZP=p* z6iq-pt*A2LfkLIA4mYE{15^I0Apr4OrEtpJYT&iKsjteM1e%K1JxpGaU!(<$h>5qj zrSOs>`XD&Dy4OZPFz>>;YE~aZdq*^J_R|LDXhjQhAbKGX@)G-m?14Nf@}5JbSmt$- za=(nbg%_kl8taal+CV}PD#dz(BYV`x=7)P-v!zE!NsOH(T6rLZ2up!ltU9tx6fF8u zCSn^Q@i z1p_Ax4(#r)XCD*i#-kR7caPRu)b(aLGzTOP231 zLR)iZRb*Kat<5HTTD3M|Nxp%G`4+c+sLo{rl-cdVerW_)J7ktN^CN@Wy9Ax!SSWX^1T_T?8nMPpBunm~l9KKE${YCTE=Q1TAS}E{ubtZr= zI5Ga1>a7xG(XScdA9f2p>OvirL%(f+J~}0o?|u)3I;L7wNzj(yg$*}6bXs-Dv{J0- z@}ck7oZ$U7ZyV7|YXehMI`lhUD*MhMc*}R}`F?JzCL4eY{#KDfv}=wdy#EI9xgFpZ z_-Yyj^Y(jd5wGl{2g;TrIB}xb+I>MJ9eG;{X~CML&YI+MWlsxPqN&lHy&$0(uPbK( zSwpwOy+?wgdGOtJvU`&Fk^ULMKYX6`jh^`d=j&9EC4*AG%k&i{-r)5D`OOdY{JK+X zTSLF|eWoWEM#~z!^gK$nG)t>O#z3UNF%gwxEV%jBR#xQW{7=79K&(X#D0iay0EkbSRppE>MN^m6 z(xO$Bq*0CoU%#iPVp0m}CNUw(hp<~Jy)B7bc5Y{9PwrtZ^Y%(0L>y~U)7GAJBK(Ms z1%AbRLwY~KP}(!j2sZBCR_2YA>G;+8_B2Fzpvt(*)18NGviy&nr+G0^`JXFN7v_oT z*?PIlKOs`>45FD-8ebL%rm4|c$5c$1d};wT)iB=la7ihcFG{|5RBl6vX1icZ3|I`+ zf57BqC6YrKcNFg7x(nU$msNiKg_~LRXotghAll;z9$ckvHLiL~0@JDX9)ysl~uNXS2vwb}?k-epBsVgi7l zWhBW^opmAJ16>?5TtJHq`>L0pNMFxq-a4b#zknF`byvsT82;&l-dm!KFr|hW1A06x zQ({@q|22NTr^D85M)wh(-^pl)a+!w3EQgC&`1|!7H+B;Kth=FHn;@+tZ!;YG52x1f z4`3DzW$k#D%wTrl@usM#AhEadv+~c;U^E~SP&jUUt9p?qC0o{ioUb;P@WJ3aGu={S zMb3IT(6@QIc%y>mD1UlA?X1eWyLyl_jkY-Yz9~q$c(7C&c@};+zV8q7$HF&$$f!;? z4nL?BQ)kYKJ^?Wdgt|ZDz#mhs@ItH(`sFm#ehLwpJleqWLtLM17ZHPU4Zk0VjIt!8 zDn#R48-lB`wPvB4=a~$|{=ek2o}E(<`AJ_J*Dvy&6cBEGrn}Y)5Pt5J7~{>&s&e&} zmwTt3r#jsqww?(#x}7x-Iac}pb1L^+t)~C@qt^T{OVY^{qh;{|;`y6JfNy!PO^o?( z7J1{y?_?Kvx%*?|FWR0>J{MI3nV3_(+T>7U=)Kw*y@zZ}4#rSeD-uVFS zu_ODg86v_Zn?fevom#gjzoa}?NH2$+m!&8G16d~B0}nOQxM+J&6HK%TRRZ4(r5v@E z<+CE|!i)2n+N3WnFgGrMD=^({^)M7xF7xjcE_Wld@Bv05Xx7fSA=Cf1j3VUCR$REB z^Rot=bqXP6Hn{Dj^oz-}&UjhDGW#`-yt(rr=yM(*Ib|SF@HF$^W5VU3)g<1@d+|5C zy)K_uv@agCt*B+q@zthLp!OO~(C&|C^Hswu{VFqV=YViB@}1%RSo(d)_}#@jk3jK& zmHn&u_f2s`w+9s82J-vinFlV`{&q62Zwlp<&3Tg=XV|BAg@#FR=h<@V0%7CAF{v1C zCeriWc9uY1x~8>xGCe&AXBwjyqjCvPNdm`>o3{qmwT1gxJ>R+zpK;69QakSkp!^v2 z(Y`<#(U^w6YVY`}7oS`IqFqI&v>bSy>pIfCM#*N|SAM6~|5kVns}x1X-}&|&kXWoV zgYavn>w6oTrrps0j7USgk=IIfh4u~nc1%VdLYd5Dfuhq1PUFdaGl^=s zL-dq>frHb39Xzl-&M&g9i&mIuNQuwt#f%$f48Rlf1eA@#ny>K#=6*L!7sU)WI?{0_ zqz+=B{;5OM*2Jf#EsbCLBK{m|*l|Gp)Xh}66y#phi8<=zYpNsCR?8DQhlQmsD<`qO zN^=cGzUsfx3Fp@{PcCalhsw+kvtFx~Q)>&0=V)m4Twca?e(La7(iBe!GvogcC(?`L zSqjdgtb4y>2n98O8V%mu@(;E@lVqw8)OllQU?7XSWuNhuq}g4wHDKE%b-&VQ91lbO zRxPU#PU;kwHdIAWEKP5=`SkI5$$XXMk@ry|^E9y-a3wpGV6~q+wTWBJ-{t(3Fma`> zS>;tD{vBoUZU(xtwEtx6L-79AV6}B$il1kf`%q~|?!k*~Cms*pb2f|~w&sQfV3w21 z*Tc1_AuN!XQ(7vHqpj(=j+b|h9lW=kXp26X*3Rg;xql{a|B`e+nm+F;12mH?nT#Be z&-|_8gVnnBBA^Oa^8;$=hsxS{PNK@6qcOB3`h!6Sp$7G48>lLSM+bR%Zc%N|wp~(aX zo{lD;5}Dq;SWf(Q$QyxLML0ATTr@X|E9>Rnd(WB$y-$A=b6~<>#^!=Y4m20dSL)8} zY)j=c9f;4#+b1wl=2|r;N15=Cj{%rx19U08UBdw zqdwyg35MTgb$M{7e@g9D+w=&ax<&pg?GK@y3F59^ zh2%uG_8xHK2xCV5;pP9-2d}Fd=`Z4ZGWSD2i#aQ*qDiS1WC+e4t*{2%G;Wk8*`R@x z&7*R!W8wY^T-o_927wB%?=#IG#-?O^VGZ&&%$z!C+URF!5@WUau(O!K!3*ds=-L3t zm2mk-{%b!iqmJp7VJoX*x7H>^(`n2+LsCsuCh@DqEoR<2$2Lf)ElEv=n+59Bl%_H8 zRg;Zztn=cy+HoMuL;kUV@t#V{idYwRZKm6WT{v4k%cGglP4=4Bhn{&qxKKw=94`Bp z6th<6<#-$GQR3f22^QLvd=y^ato8x5EcJGj{^(L8^h0zeiPR^eKSJeFx8)%E`|+0K zV_vlF)Sh{LCNCj78PA}kWnRX*K*9gVW3jLKA19yeFx^(u)Zeq*`sJ&%G4#iO%0#74 zhbdk*z8orA(N8V~ym!I0o8vuYvQ!%+;6MtVXC*2r+#7+0M4MW1Y&~DFj=c==t8LLZ zHG|d3n~m{a@`((pEgkKkkDB`6c7KcBBEW5eenKXw-YD4ek^T(e#K5bPG{DUg$IKfC za4n+oYDfwbJ|7M&2gszw)qwq^ZQstEZHMWj~I+ytTKTxoJW_u1fT>(#qpLM-OucL#-|)>v}Qp^lrSaike08wT0OX^K_%9W2bI8y2594x&Xcs{^JMr(;&8@PN4Gxwf}0 zE2PIX2!g*o^kRF!o8VZ+S_2k}q3oBL>yIYGBo(^|yiaXhUDfxoMWhy#`Z1UVaC6q# ztR`d}I)0lMC1Dn~t8c%u)m4@ws!mp&FGY<3*`Zcf9>pllnf%A!m1Z$WGL}UO_Oi0H z=3*()WEN~?v685Sa)JoUn*la}lVgIGxxr%+$g`c}|8s%pNJ2Yjl(d)1>XhOJT+fGN z*IzeHo-V8iy&H3r*89hG2LkU;ZK>y|RT^!jXI}f)CAX~SN66|FqL}qp_TxUR=zHCEmt8B;rj**Ms&9ALLA|hlmKO! z-t;x*;ffo`N&^N1dGT(8!8QsgP2Upyi>;V@b`eV9M| zDBw=haq?JJ{tjv@z^d~uEAf`}M1CA#5j9|kw(L^+9uD`yW)=2Q?|4>WF|v*oD+K8l zUr#L~4F|9Whmc(OtEu#vi#tz+yndZM$JFN}v&Bq?GNF)a<7By8T_{WFZ0y;cV_FBQ zF;G$Jp4C*5o|4r`6EzYZ-90NU?x9mKN$*{*hDg@V-0 z(`+`myWLago6&zb)(U1#tS@!O|K<%uhT7lK1b(QS?O^FI!hnG%4d?D3+~S~5GJEY4 zKUE8xk~P5!PC{+l-FIju%vlUrk6zmPWo;Di)T=*Tqse~krV_J-)bm@Q@?F8U_O90$ z_QV*EN8dj;e_N2WKud5vPl*6f8)m542vD+!`M^w?@FENbdVb41c?8XI)Zz0;Ur3Gn z$E>c24vkt}QL&8?>Xsf-w0S&_D>2RnaL|%a6MNMXDjZ*PkDT%dDrrwEtUFL)K z!j1DelL$VM{s3~H3k(J(Fe58E=j-r~#R%12Idf07FbZKZw0X*jGpVtd5$Bk4{$Wz| znB&%b+zov;e0|3gRV~KtUDjDw$|277qcq~8x@1p1zMmn;{UGyWn{_4*Y4UI_t6KBa zC!iFTK0} z>-?_q!6RQoF#q|ScE7=12udg-cN(0s)*xrSQ7_T(;yEu(d_shI_x-C$#J?*3LP8@P zwK%|b^M!@CKI&<0=jWZ8xbq(-au4|o?j)^5x(*E7Mz*BonX%*)PyF8Y+5fW5VWLJZ zb@GO86tdRd0V=3K5&FFe@4NPUTkijG=3vvw__vDxTnuLIgmbs(kIgG#Aw!{L@(!mD z$tVU)OdU9Nvg!S)f2b%q{5|Y8%wQOHw7wd~s%|X9p88eKYu%3o{+cQis@Tp8mbd8c z)B_P=AFTI;5+8eAOl2;;Yh?T?Cn^E85%;w3J38bo>z3=S0TXf(Zq{1QlThz#W3R$vB z#{es*(ACKOMX?Mt@1~lJNZpBF*C1Un`&vK!HA;#yfNYsMlAav2gq$yAP!P#fW zSy?&14srA+hRDn4ZK2i&)p`H`ITVpU!0fs?AJ0dmgULt6ewhiE_&Hhh|} zULOi@IsBpyyE&m7>5I}($pSXee5aAvAl&PvaTzZ5NF zvD6=H$6*}9H08|p&bPU;aDS*6%mc0z;tZ#NdgG}-hJ>R_t_E4TIibp=+J4HL{|@ij zBAz^bXmmNMhbk!x0vExLL~34Z(oTJ!%nmMe?a8-Kvw>vccz@(AX>4tz!g;wO;Z?2F zqt`Z#Sa5E9k*^7Fwez;|qA@C57Le5W@SQ`I=Jsu@=mERE0$>-fO1#xR3(sKC_35L1?vUhV^kgnR!0ghq0QPBQc!D=DQimS#vsP`=9 z^Utue{CXG`lvkKHA&6^v%!~;4ldj}_P{frXFhRw@Z)k)T59cXeM1wnR&oSB`x$Ub9CW z_nAasf=~hS%Ya3KWyw81B|9T)ABXztuu*UM{vTtrA+vZ5@D0lEuRmXB#CDJIfsvOn zPk;Q>)CA?C*UGI9Kwkim{+zn%`pg~aH0Uw*=1uecv_?QbhI&74`CF0{vJc!o8Dv=g z!08A&_hx#RDL=&V?0#S?xo??m#{`SG(-s{5yx24X6OJ9li%uT6j@poq^31)t zo(*aM*`_>9|#?xgDF}+|{1XA=%gE5PnM@N1@ z2cCR=^Z;`oJ2t!g!AinPBP&$s|D^$m6J@a>?1$Lxk84u+!6I=iJVf49!$MC2;Fqig zh9RPNwG7AR1O4JL`oGKS!qTzn2Irkif*1tWj@N-o4t}!YZ18&EYJShFlO=?QU1wDh zO`vlYHt6n#9p5uCu{+j5Fa4plVA5Qa1XdAuvK+Cc8yr}l4|KDlKCmwR)_gu6 z!btSiG0JF@jq_i1r#f)5v^mtQzKl)|c|1yYz+2&RM~HU;8-bEw7P08UmBLVGlB&l> zYTpR{Q7O$p+YFmwo*EGp~>t83^(0&q+%caejuXj+elE)oM{5 z8VQ_t-h@%8L%C*;t4YIDKqfh7WC0I@t&gMwp2V$u%C(+``hZB$viHQz2^!CyCsUqC zin9ML4bhu}1{vKm*SxvQ)O{ys?D%s!$h{hwr%UMbV7m)R5B%#+YC+0ucStcwo@8W2Qxh$1gDg_et|_Fv=I9GGb&$Fa>g znM@sI{J2vNfEdL#vgI|SHDj3zrsss3eDDopZ=G5i+kC((zXv1{?TY58+suv^Hy`3? zB&;+YX%QCD_5SsB{;q^Rguc%7$7=7U>%Bqd^P;@&cyhNYa`ZKG&8o65#eev-)F!~5 zG^nzZ%SAcXp?~BRR5(EGm9ywvp5l&LI%Z}uxMZRUTnH3IjPerLb16cw4-uoBA0}x= zi+LVw4gxt54chmlXNzi|$VMz3yX9?@V^a>6koHRn1f+gLutKp!wMl@KzV_#-_m~ER zQPf>iK#U>}BoH#3PX=N)(x?K9^n~ErvVWp7rvQq~p5`%P(;r3Kuu(A&vbMSe%rCBS zp(&z8Ssz+BT%z!^MnFdW!tQ+@7AP%?#r)n*rVc(cK)VAsra&6%vXtGs?szkco|WX7 zG-job9hUQ`HGn=n1o|NS+W9?(Nb*q?jO;(!{r$Kiwl?$ zcHGkvcFPoQCRshezn6#r_#M}yNN^wy1h+=Q7Qg?=mLV6%Qg zgQspugUAo5AhTqFRygfr7r6EA^X)T@mV~!Xr^WnSk+Vg3opQ*@Hdho(55-a=p69%z z^2vJVxA|hVi1C!G*Cm5TPkX7;3(7-?Dx;F=z~q_$>i-o-@u78bruP4ovfP%#yNsR? zGV|`{M@#tT;O4(QBOzP}4~c63-ep1D=8W3gOt_x=Y3t`=#rMo~(}TB%=HyHZjbBGX z8>9OC%9JuUzgVJ5??wD&j-0wmd(?@1b`&^s>%2N$0hOCYGg|fRn@jf#fKS!qYC^h>wS|bKWtF-$s5+E_1~El zxpL()c~v(eXrqoq!?UjS|G0BX>X{zjw6CT;`m*;EbfFNIj*wBe-vncU+Js<7nVuR` zGRN^m5^gyn1iVer2?ZgpxJ@CT{!uW_r%P6oMs)T4MQ!@r-h1!ZsfM(pzK-#k|8RWb ze%CE;*hFhNaP3*zhVbw=TeLNgyRGm2D?e_Vc+2RSk0>&_?N%XfCB(TbkF0!9MSesY z!UEqZ^*s3`y)==eI!-UfOOrxv*<}v0(ELic8%ye-MA)^L;}*lcG`h~VJNhb?WI*s` z`YE|`u?BIQhrf;FxH*qm+PA1tX#H9GT&kt8l37;j9F(7k`n}7B(;nTX{gdE4Z5!Nv z&%uq^cwVlVu+*j`VeTw0%XAq$>jsF-7wSQmJ=H-=;t;%I{hS5)(LDL+xoTwwygyw> zJRIT5>8H3rO$~|J(2~^~N(6|?w)Fk}~v-NF0m*V@=a(t12XxY@E0f!eLb?XGKLk(NAl=Jo` zuqXceg?{M|0WR(3_EfRwcNz$u(9e00m$}(>^f=_*6)PsRDZ>DeH@&_3_t9yWP1KTiCW+5UV@_XdR+eKRb@O8<;*E7v-2w)7V^ zU2l>I%$TJR$X(qcwnIk0W94zE&%9X(I-3q5$13ðgu_4=CtC7*_X6y6cOTb35^ zc*!_VtTiZio)$%gNozy>U0-au*!s5K?1X41us~}?!oSy4K;EmXo>AqSoWz%Mt+Ftc z8}`h(2U1}lijnZ^r(re-BRteLfS7qbiT~*l-NC;{bjY!H{v6rm%cJ(%g-lF))Qf(~ zW-Ya~mJf&UVI*nt<0eZobo!dRzo)%hM#SvOKDOsX2-Hg zSFwk+kdtTRW`5@P&DD0jK1&%R^`rJ|OPZ_gS0&ao#f5Ts@8)Q(=Xk9ORJ&1*V7;kd zsJ}y1W>Pk6X#tc-bHmL=!6W{^{|?nG5fOIagN*3Nm04CM-j;Vn zdzN9fG|(J*9nYt~saW?==35&JN6lE?ywZkJ{#lZfbL635EdG&&v5jwM@<&w z`wa3q>r0q2<|N$zV{6R3*6^p!p@OCm-3@$~-1DDfCWlE#i>Ai5H?E>r-b@CPom1@f z>F?XSbFDes9UT~i9h8s72NFKLx;A9x$elz+t!;=frHCL8-UtB5{D6r{>M*mT~Ze&CPqCCxqyftzv)ujzav}yAE~d(LDNAhJIyd znkC5}&&9MJV*5#uF%6Y!K)c_y6>@;p9@P#Xe&0kW@eI-Cs*l>v6aQiJ52cN(mIf0h;gE zSg~R^vcR_=f*#<@3|CdbVeYjz|C%T_#xEDs%V%ZF5 zf5pDFd`yFfM^{nn*_`fztO4Q=^(wXVx2kfdd9tf}oiEw|o;c;QoSuT80LNSB(sG#n zU17*WsaZ0BTxJp~xf2dt_<61j5q+159n^>6^ZF~)$vD%QaXWeuU#qg~$RazVMR{+C z#Bi1iohvll0LR!~&w;?bn5pAD!{y@7D6?MrR}^x*N$AVIU~{lix|R>=NHhW5V+|i$ zSP7AixA?wAbokaaVO}&xbN@7n#iMB6^L$wcrTKl!-uP$(414ME2~QHyp^t_7JD1$P zz8{0@tMc#RxX|r8u6AzZ0Y(k^q*=}-a zDY?-E@SjG#3}!R_lKDABWS#n#Yk>-)so_f@H$4x0b&)^1PY$F$68Ldm_LRiP(B!T3 z(_6bJW+09LOD;2p z(f8@1qN5C!%~D-$*wt1%jYVB)s$_(Lg|_XlmLAkvU+l-d^t`XSb>y}oY~KiS$7=p5 zX+0HsuGa8|e#5XFpxvi-h~7TI>TGl-tOHT!1_sXLoFhNy(J9$FXq!*1jI4vY5~**) zNo!4$YU;#%)$J=_q5=CQZ_!4|Qo({m?+WjkkW10Au=BDu#m!L(@R{H(FHK?D5(G1C zu0)%BF-1Ir9)dWMg$VTLt{8St-J$i(J2d?pDSvr^8A#YV2bG`x^Ii)o-cja3@aquz6?ZPnRZ6B(#_I^uDFqM#c*{#w0=WTdtwsqr zG9OZw0X||~lA{&R)*Cu-5Lhz5I;G}@tHrpz&Qyz^m zXdhG`^-?=1m-77Apr#2mQ-9)pWxfeaY%nn5v1U6`LHz}?y)_S~3i-LA>T$SHqa?Na z%IFd30&4K^M)obpO$%yBb7L4#GbSksWn}bf)xnK9VM1%=b**YGT7RU9!a#MEG!2B0 z;3VHl&WaS&E-qoJSR3@?a6Yk@S1P#1$!eR<^|2R343fl^xM@rmrOV2o?*cpD$f8f8 zplsZF8GnTyC#pc2F9^a%47Q)UE z__ay(9YppoCQX7(@t!L-a?VxS9|U?sM?3=vBUO2X7W7w{BL*(t;u77l0+}$j}xFS>77rSpN^onwJgn-w}dbMAkAo_+QfbyoVjL>`oeZSb22ys zDZcfhD59LKzmdRG_CEiWdej0P?=*!_&>vW&m#@!H+5WfmiPk_ElQwQN^6 zBs+AMJU)-7I)Qn4CSDuOoB?kgWe+pcVw#I4o4b2W`VaIjN5prb8X7~CN_Qd%{#FyqN~GmdeU4nGEe+C z%>n~69h+u9P^GN$j3O`gd89Vgv*HW9G20a7rc|FI@vPNLN*ivXZ`17-?Djro6NO(eVQNruuNHDknJc1OO(v zs8lRj9CdAp`g3GlGCiR!7rP(;0$z5@qar$h7?H>6tv4c|q2)}L`Q4$>t+Mq+;-8se zTG1#T_vDuG+>j@Wep?%`B*g3qH18#WBdPU^j(pT^BWs`Wyk63Rj5A{{)Xjp%MYP>j zQKFF{%J)$j{QD?}hdGV@*Nd(Luzl)ZxOE3EHi_Z^ z_UGbZRZuf_BQ9v3!SxOX50AvpFUdu_|VTupOOC+#Pe7fxO2eGH_;TBppHCa z7tw z$Na2H^^cKR;sF`(E%5gUf%Aq)OV9@gA@<=k)!P^H5rs-=8$(QA9uu(Yjh|NviN^Da zfCAmG7JkLCCMpqAyj)Gw=IVv|d1M((RFLYzU;IJ4;1d;w$EY1Rn-fF+T2Qx1p@Ba{&mY1D_Cq2#vWg%r zm$6<(RmEper}JRxCGD`1MFRIwZtBQDY$#?ToOicg&n!!;S!kKm{h?97nRtUNJTwdqY zVdF?dRumOHA;~HzCFjsS*aXRL(k53T$E}>^N?>DG#>a?iF}MA^^7}(iZ5m}dH}UHd zM|otkee}b(Otv7<7GBOoUZ7*Sl|6Lk;l^j_9IZO#mz#+zT*qPh!*7di3e-qizx_HZ z;HxGu)tfuqRk^H7GrAxsFI@wxy2_`ug-edAHK6zi{(dBv(T5I!9n?$OTL;(!*`K`H zD6ddp_g|7KdEVR)&S@-K`2kYOO&q`%*wm5r1kmBSJz=@|nXwKDM>C0?c!#tsPiJ)R zjZNPY_I!%F?0gvXHrXVK%2h@*7ii@%x#|cu54-@2)_*KtW_GMZ2_`W1WTp0^nuc&} zD%JFJ&03h65@dPav^oTP|8#6@z{IiTU$)e z(HD_cG&4Fn2L(EoD2OgVGc0`6B?68yaRZS&#F(06AqQFmY3}y81!1NXckG;9w5+mc zXn=LI1wTnb4Ru%nHXl55E9q?2t}T>;dmOij+PT#>f+udPOl>`{8Ac?1vN~8exHw=` zY>>b-svQoKaLHAQlN|G!L{@)5IJpHQMTVOm3pF>}5}Z_v7>F7bd2{ipQQd+Ardruj zpHFtiIIV`8v2x||L>Phi+Rg9e`@Qw< z{bt?wW@fGVYv!=_Io*e-J#!piS>y6Woy-(b4WGKldnZ=<$Yp&kUbXthh%ajEyq3ShY7URw=zL85 z@D%;TxHEKUQeKstXb>e2QNDgDYDI-2eV)_Pd&HL(SHU1vTi_F3$*=VJjS@t_q4Y2J<-W5}uHxx{YqGExuUCvSz|&k`_RWmW}glp0I~i z`E%u;4fKbQ_5^K|AB)|HP^W6h0Y*K5#kR&txF`@cqysU3m`0hP-1yN}ydR^3+?(R3 zQA_YSvy)m1T$p3|!qOc=1LfMdniHEpjqeQ{vXV6lIlXYS)PO6e2R%zn{AGs!)}a}R z5ap<8!NKP+4)5s1{UBo6UMrv@YMI+-NzqF`5XTkfkgS!SUOseUQQEs6(ZP>2dXvL3 zcT{8&HR|48Sz!Frmc%k{o&MV)+Q;9^@6Ib&vNH2re{erDHOe>|@gk5j-POJU|6vrH zlN&iqeM-)LV+bS6T*m&sM!`1RGX3!3wifBzOhgWIJAs^WGSQ%PKP1QHjKfE2DN9Xb zoBzS7HCa=rga40V1`^Q(r<_J);ftvUG=9c8w#>TX7ZK~BA+If!)Gx*yOd7&&w|8)N zLEjf>P<~%KNzV!OE51Q9xZ8%@25M9m+#>OxMCW`3AZ}PhuuZdM3zXl`n}sed*ITC z;~8@A@cRs)MP%J`AF%c%XGO$_bi86r+2?(J9g5U$;4P}?li=8v?YwtUP1DCrjF!w> zgefD(!)?p=nFVII=lGm6U-A5$r%s=|>C|b}AUlcK8~ahDhLuC8x63g6=>(YS)Q=Z# z=u(V0Q17{9j67ABsv;aA47#sazoOj3l7&MjN0e*$27iP1-iMY(HEsIaF?kX~kZV*PViO8GniZ|w^o&*@jntW$Dh36~}W%Nxw1^1FJDmteXMZ9a#T=4kD z+NHwHlf(5Q=$EeQU~UU=;>H9u!kOPq?0WBZf@$+vx^{Ba)DZbqvbn#lTn(fNO%9mz z7ogyY9S?xWH9$Z^FSmRkl!;Nyx>=A!-vwPw^}|W=cu(FovDjM$_Ot~JPFCrZBM&m+ z%-F8=rkbdYPC&g^aKM@vBR9K^RO<>aUUHNnO6hlF?qAnS*UNCoWgt4Uv{)PZPle({-v9a@ozw_04XfCrW!m{YLr$Ygi9$zb` zxLK*S%r9ty+*)2)wbZKGYr@s(ey;LkeekxzCE6`DQc}+T+W&T}Dfd2~Pi{o#Jw!+a z)Xf;#Uz`VfI;FshKGnLMb9RNLMZf$2n?IsV@*h^iG$wG~-Y4NKy$(h*Y}ld`CN7E5 zihjsVn8xP7RLw;>MqB*20HqQ1%D1-#>oJRsFfI;*Krz>+=lwA{rw3>e{~Bo2z(hm` zE?Rs0-7O0mXX{~Sj9uN#OG_`~8RWc!-pdI-qjAM~HQGm66Kg*26VqJ+*WW5`0AWqY zFlyJh@S_kwdj=wrpEah$&v4E6(N!!Ed{q6T#Z*m|)O{3d+rDb`YYmS2T=1RXDdq>A zfOb&7;=4<0Vjqbn__fJ2hD-iDpl{sn%=Pl=&$Q#miJn-ESpq^PG3Vic#iB4e9(k~J z{2s;-1l|e(K_T7lXshLJgB}n2VhFuE-0stalg0a)A^zS3UFnqYH$17FbDyeat`Bu8 z%yWJrYi|jZk%pU{U(vzr7%#LR;KZ2a!0$3samkyM;9x$$>*tp>^R<;Nqh@>3ceV{K zEh|KrIODbV&rQS((z;EBwFKVZy#iXX1=@V{9bUbWj4N>Hq>0B zdem!YQ;6QZ4uOGh3go0~(dPYWP2^?=Il_$#OKZb8^vfD9HkU#@EOM%>_7=>#X{GZ` z7jDay&rqaaNr_=J3!U~8I8J7}k>20VZzKm=Q;U-K2^`Z6R^GXraN=mQj*CW?lvN(X z7p7NnkJzc|CbfYMRfY~N#1J;22W(N5UB+ilzkO~?^P$0nEx7%~rHh(VtCmj>C7hq} zzHv9TAfKGqJ*oyWZJ{%+>7b-t!4fEkScG(i8MIJ>&q#LEq!roAN#F+%AS%I)gbU-O6QT#-Gc%d*+cTN(A(?=K!mZOFC$`tZ@usSKy4 zOBKWpFQ-sJ{fSF$j(w(77U*;!pUT~mZ+yYiJnc?yRwKSa=Pc{Bc#-l;IU-MP!t83T zOv|;DFS3EVuWfr6y-N>-eh#_U?FsI%v53;=gGrN#3wtsc${)bR_33x! zOrYb53%N^mj3P?-r<1l-gthcTgDEostUY*(wm@IKQS zgxoLVU_+gx7x8Dc9B+)t(O@ph$vb4>;COe2>nu$I119lHX{Cb~N#I3QEj~sn#$*K0aRh-9%~of{ z4c)M&f_v%Zw*3L?(CP;NX^)gIYPxxlOx5z@*wEM4gI6Z!_%XFr80Fh>AM;14>x_?% z;$8?6L(>LyW{PIq*wuR^hac}PV0d$;xI$qDPe*nOC2?wtCFPUG%Z?WpO{}}N8nt38 zJGrk*JoG8QG|nc|x{T&Y&|Zw0rFY}WmYr0yJEk~Z1wm`tx`P=STC=Ux@eWCXz-UEw0A7Lrw;NkJ^`eklyIK~ z@_Br0kxk9%r;oPN^;Ss4Sp9eP=yB+g!6lOo6HGIF6nrKoB%vd%RVD0m&}m1!je9+A{I+G!;kJ|5&U2d3b*V_%PKK_D&AHu z66*Ox++F&hC(CdW_<=N9r;{^usd4%ZUu4t3d6gh)?Sm1Y^bTU!M9@oQRrftm0a*}W zO;BA0mchJdIp-GCCgDXdkC@Gk3x4L(cRWOk^vibDjHjV-Q@=}9SRfXJQ>9IQt8H;b znyvTt=zVE03K1=1@~>o}@(Gf;-i%9A;G9yfO@8v)3KzQGGm!|-r34~oIrwbRx=dLR zFs+d80#03x05gOifJ?R9!xOYd^=pS%Fti9T@h#aNHol_$kKm_XN+U-m!^OYEl9s3HWXWbBhS9@Aq(5XNM1-!hd4TS)4U9( zxc$nHC;y(@B^P!F8u_9$%1ut5BE;bd10|i_8)HEf#YR+=&qa`xi^Tnrz2I^{bK7yYl==UIb2pHAgvA+_*Frm2|jmTOQ`GPL9GDY;<^(!Uk>&XFW>KE@`fbh zS3u8{QIwJRS4Hu58?)AT6&^xq`nm+FPYjQqsC{f}8GEU_WvW=(cCJ98OF$!cDRZW1UnmVu76L z-d9A0$4$Hj6Wh1-oT+rJ&F;g6Q0BQh=C*ubHFDNNwL}<(2=k|$tHKYzi9CW1qf~s( zV4U-5M*6-8;S&{w=lF5^^Yk>aAr6VOU4RYgTN*1Iz!$&pRz~9y$*ykLy4IHr?<;DC zRd1Op?kYHkY2Jrl%G?Z^Lnk0^{dxI`pO1X9fXXbli_Ea8IV-z>x- z#ArNB%;d}(VW1Wb{pB9GF+(@oZ+lF^t&L7cooq?QG-|k> zThdb_CnBq=k7{zw+v^jbXJYaFJfLdt$o02v&(LZI97^6R4b|CG}h&}L!evQiKBbA2!0JFsK*@uHl@u({bc zW#kG^xKG4iY>$l$d|5;>LL>LO7ToIGbeZEbtBKm?0#{5G@UK*)dt?Dwpb@`HZ?OyU8Zw=5OHyQUaB22 zokbbb-YrAak#M+co-8vN$?U4yvxJ=xUR`7IvJFMEb_d2ZY6V+T{zR+~UMf|+KfIIK zPYaNjW#Y1Bu&%f-;hD)>!0sdEYBh%K<=dpU?0MyU-qDQ=Kev)rB6s~6@BQTS(PH3# zc;4djCc_%frsZp8jDc{qesR9ic6SpJTrO%6@j|~Xp;Pn6aTTHU=jZ%%Qg=w!8t1j> zS2Qkzj2x(k&IDNkr%NXr+IUCQQopOWuu-)p>1ZBu&o&6ymPQycJdFDOq4SrHJC82$ zPYGtb`PPO}a<$>Yy6Si?)oiZS;_StgI}`@c#2$DibHy|$|KYRHgt&lNaht2F`5l+s z@H`iu{mY}K2)7lx3#LjozB%)uY#mBiO`uWZCz*(q0|sl+V`m;wcg1@@T`_b_@U#UJ z=Q931jW%b4*40nHBE01}cU2KBbL81kw^kaE*bnR6a_2;?tRqgOVL;TQm;^eq=_E8 zjVX8GbeG=!G{2TzGEsee%|O6N{&iozvcH1~x+s>-U467;$Q~ytkFxrKoH3j6*1bEj z#X-n%*nSsN*{A?veh6*V{?;{Lb%$@>Sf$WgYJ`3o4Jiytv5FForo5SdP)Q!R-NM<~ z8Z{g=;JPNltNT@SDBnqoy75E2`U~6jmFInu?DtZ1sP_YO3LSzxJcvAMjHlF|n5T(? zGszPITRd2C2+S3j8j<6b1%z2z=l8es73LH%tiOqxJT3P{Pwo*IX0DBH?rM3-ob}SQ z##KsW?nggsNZZu|95|?dBoKb%XJoy6$nV1mPuNhqj~Y7CX>FgW5x6hU?3=9Q&#m$? zZCAyXHq0iof!PxjK(wH?x$4&Au{7aU@=ZjRjY)QaA1qOtw|8A*!)M1buxE_n-g;D3 z0%<$2yW5k)P2=tFmQW+7?6MWj(e$ptxtBaKpbvp|QUb%e;0xm2HBN5Z@&0)7wlt`Q>_Vqx@Y`l<#FiTF6}P_Dz}je~ac-W;Erj8xSt4XcX_12ju7 z^K)*h__ETrffKGD^=@Vk72KG~LXc12)|eT#<+|1d5q#yw&PA`_dIv)=6NCC1(w7@G45 z$I0(9ma2UobFJKg+|6*PEUHTy;a!MD_k2|#sz(E}6=Llocr&PJ^D90OiS#5Yx8cV` zlz^)koj8Hjl$|VtFLa#u`Bv)SrDw7We_|a%4cXSS^am7WH#=H~3HOkutMveuS0TC^ z{8qzmL|PkA*Cpyl4}~IE(F;8?@FTjS&K}wF^An0PFY+tNv)p2DMWhzXvh9EsBRbzB z|8v+Df&FUZa&C8BZ`q=#nb`M>y5Cr+lg*>ZaPNgWDEr~%+vK(97No-nE~}{yI*37o zQQE!aUuQIU9pkZ=2}r+p=bLtrcAK&XzU$Jj3^c z42zSqxmgQRD`EPfwJkao-thxn!tlcWzVagrTiHZ7F#jqs=OWw9{WzPg{5UA#}74i<0NA81y{39Hr0`}PbYdK zBlOeBjo8#8TWz>C!&pubKc=XbH|e-fY$Qj2Hfj)^n`d!RW@rBK*U}m*DGhIm@v?5m z%!Q%=Q^J!WDe-CWEkgsMUgkr$;Q~s!a+p_3adS+ijGHUHBa-S-!FPSWk{QA zlN~+b${BNs@);x6t9=z~DlYN`s@QKnGVQ$wYnNCzh*%T~t|_VQ8M&P6lHo!HtC%I@ zP&_qiyXN#dNkkpycOcQqu)|?!-c&4&LCUut|x z7QSDLf5N5H9bG)+S;!9bC=q)*XP)hlxexfv4GDm}Y3)z@=j zFp^>(^$e-fl-Bw|CeZc0E54iIeEGg{x`>(o{N>l>aytByM=oSpeCWV|5acIn7dKrY z0ihUU+LATLfRe(pkKQB3Lfa=;eJ^Gjs|EG@+9N6mqVyWuCF5YGgnLg8qVd3d(li_r-dQC=Orp?(x0) z4cUV>H?pODnWK)$$5QOouh&XFW1o+)yh}4v2s1x=wx9S5G9*s>q7lVk#WzJWF7w@Z zGPi*=>H&%?B7eRiUhMPJA)c06))|{Q*Yo~_^`}_dQvCIq<7VyJ+|PgMOmtcCWpx`( zhIGqSQYNSd#Rm?A!)&+PAzBF=meCA!%WEA*GkLS~zu92L5;>q+7+Ryk(u~7kc{Ta* zrpAHUlj6DvXr;xui*>I|O?7FA{Q(jbNRJGOUs~%dwSKbM79=#}!WZsuKiChMZ#+mO z+ovCuOetCRX8j~+c7(tITr+ku)z*q>4IYu7Z$m0pwcWqqSxPLfo`i|GGxqC&+(lPx zVj87ic{@ZE9yy)7YPX6rx_@HhxZV-I z=XPJ)tqO4F%jG@c=0#Z3b|)kA@!Eseb%jr##RF+Nwe5Le$3f$Xf-nNi=$ zYcUu%Is(ZYLS(f#a2Z|Q@du>3qDF;Y2e#5TBr1_C+YcAQ1gne8{aq826uEeGHE)`b ziEn-OfOEro1YK2^*b&K;?6R7SstAEiGB5XrZNA(mub8XA%V{7W4_5~|w;0&pXnHrj zj5Yvl@OysGU1qS&HtcP;e~D3hE9?C9{pc=(2o5es8TO~|vrI`QW|rp1iKBl&3>fhR zIpI4ZBVOE1&3eY5t#-mo@taJ9E{b7`zj)w{0^7s*eNB3m^)TF^)qD(Bq5(P;8Hce( z`xTD_`g?}!&4`B(21U+8)RJcoH}9_~CF|1qXCrB`Jnjz?=v8)Mux~zkcjCdA8qj?k zie#qC2MUbgxA!5+WtMM^-hZ3uAj2c>N;~D}5sUtU_4=MDFYlR-=Lh}CyoKiC*O1T!3~i@yJZ*YQPWdq5r)h%Z z8;R0)B?ZYkx39(BD?SNw^X3LYX9l*P%_c@&$ez(Z6RKJvqDQSWz@*xc9SE3q>2W5( z>1cT7Vmmq!IL3IlCIUI4(Y+>(cI_bh>7d>))x$qL+QHKqxp&mBl@TAaB1(DHRGIV! z;TAeQ%TaT{BFslzU323dJa(1&E- zVu|w(d81~8QYno-AEW7#`4tCyEY9Hjr5%VP*`U5QKR?^GV?N?W4JYCBw=<=mf7V~1 zLVlIom53X>n7LNDnHWlMeF&~Zk7AfmKTe`A67M<%6L+@Q=Unp3Tg@1Ry4%9-r)`!g z?g&`D)QWszZHc7mJwZQmOT3T@#zC`Ox=eYqyz3oQF(LGaN#uu?w#05UiUP=hbS0`=rvU{$kdD|GmqDF*H+s zr8GTwX1(6{Xc;xJMMPivPJs!-jf^OLd;YBvqKFIi+QN@$Smk=s)GQHuIZ;on>_nKJ zJRv%$rPZPue@T5dLk=;aRDdL!gbYw6oy zBD7&wnt@g!I`C+zHo@htH%hy2Cgn2L-@ugou+U`wk@`|!n&*C1AK+j8$ng4A@SMaF zU^Rq`G2J@3vjmS?+)UWr9EdtMH(BF8dgiIZbew)Knhomk=;s%z@fVTLxBJBZ_B3Hs zVuV;O9_036N88M~qTF7l&s>k3kphHQ>qq&fB&E`~{pW^pH1$S%OTTN+IGy+#5?(nZ zgNO6<2(H_lApG);Y9IagAL^fC%e=TzRITs${JvLWQ1rU{8(wZP)L3t3awL!$CuBrL zydvVlfwGhq>?N`OtTTcr+NEWx#RWm%0`VzRH4gqjn&vrP02kA0m()$OR5-_5W!`wQ zy_62bcXqW4{CF~{GN>2u@|LR5=-aTNAEHqJZKwn6uyzTE%R-*3MJqU zi{)_4sx-Kr#wpSn;_3q{0&u;Mx{X%ZGh#?6Tn4obs1m4*l`0w;ScD6hQhB+ zC6~ij+@2+utft~)Hb2fQ&3CEOw#i!=?Tjt+0JkQ{vF?G=pcW9qr&1_ABSiHJ({Hm=k8p0L+wu5Cs zP_aoy8~mmm{db*bIaeEBsS$OgV5aXHS1doun` zroqWPl~>klo|1dQQ86Y*z;-wNo-WFR?KhQO>rg!d5{K|c0JGdkhK)P$GT3QS?T56cT9Op{#d@O+AVyd?`18W4huJScv!#%9Wx^45Js?d(9Wy2Se(iyBgA+U1Oarv7sLBZctSGJ%iA~`xdaH~UzL2@Sz05 z&u_H)J@@Hf7vWTXzrXT=g&dM%rdc-vl^awim|=XO7$vgL%-ND_@o7_IoOV00R&%xq z8BrTA?*9sYMK9!)<7F}Ic;4dAS`$cfxQZ(6QQ(+a@4gjoPzrS~N;9|U|+YL z>2q;g={w@*()8>THEmbz6SUjTo-teL?0YIXqO&nuSTNeO0%~*Q+CUj}a?@iW`1S#c zIlKC6W_xcD;I`i#%&p=h{voKyHL-?$j(tH)n8-i?*^EEmfS@W;E83g3USC^T9?gnb zF&m#!&nv%!XK)u+!$3AnE#T}A2#RJb>37`= zyVL$geZwzOwxcDV{G=xsZfD0fs@<-s$1hRU+jvr5p1qHYqj_n^$+hnH64`xPD1%7? z!?@rUJdy+T6i~ZqR4yBTmLm5nVT|L``>{0Htdyah4${Oe+=+OE#;)Zu_A_9Xw$_0_ zvls761XD&Fd5^N`H&=5#^71S!sd6&VvK&NcT&vyAuGfzj^VfFkQtrEMP)3??0*A{s zLGtaD6OD>ww=;??2%`>u^j?XjrQFz=g=N0Y;#ZE{Puo<05>$kDF6jvUnv)T>`I}&a z$5INZQq1I2~ols3rjh0lQA%g1~Bg^(k} zHztTA3Pr1Yr_@@qP+}6L)=Zw$@{?EHFhT}46F{I@ zZE9`!GcoE{F!6#IcNIrNUCxquWR5ylv}Qw=TP5^%Y{MC8PllTC6p?3*LB{`>t4$_sV+%-eqb2rLTY_X(GHB}*)V#*1cGGg<0;R-7dSfLW=kdy?GNEHs z=?@M&wr)=L>d&tG*YJ@n18z9z*vr@Ghu8Xh?0sg6y^e0*ttrtFtW7A_kH1wSQrjyI zTx}Z>fMbD^>Meh{kGo-eJB=!5M8tu&_n3gy2l4Gh2x-wx<^_R|5ju%EX)_+DH(+}& zeI5mARm~zE>s^SyM88TZ2yBbxgE=8t5&mXIE2f{)Y`kVuCZwxM4L-aqs2wEig~c|^anF1BaLpYdI!NT-8@p8#QN|BM8+1ZEbd?W-S2~dJ(JD(Kh*K}Em2=h z?;UHXa7w>M_vlWEUCWzE6~0UL*G#qjiX+vpkXv3`7Gf*-&1;k8i0xVpO|zMv;L}NF z_6?n=GAD52FkQ>0Q5fhV6Ieu^!R7-(Xfp6<7^pt!BA5A)ue{j`H1*#Y*65UU?8(UzGQLL$r!?s79bGsQfj91_Bl2$#AG|>VRx8P#SgfseMa=)!aS`5 zDY(-sZ$0$Aygw7~x$-4oHyLJ+0ijp94&kNHwqAut5y6tC&>S}^4O>5_+Sf}SHv@}= z%aUmfXv~D9d=sE~2jpsh!XC*X{`1(8&1#!Qdt}j z&`3;q*Jsazj1$vUVj~z@+uc=-_j=e6U9w)&#kL>L;B}A?#9Nv+e*#415GYU>p#MQ2 zcfL1NJIxXSC@(*oSOlDw5>D6pOprPyTaYf3Y;PW()l>{SOm62$^*#eFF-8-JJTiPQ z9Qxi@5zK<-iaN~LkSD!-o6=)V{3B8q8|Hva3*D4vFln`YRl3e!-V%lPOso0XhFJch zs>FL$a1ZS#f3uLecjb|uxyol;sz`~#ts5m}w?eJ^&fRix_;mZi1MS|IG7?Et$!IGr z)wCUE`d2eZ_Uhn`S`7L6B`-|~IYzPo8fZCvBnGIAg7trv+gI1QNUWyHEqjAsjFxli z?0gzw9s0IseER^H2(fp*1dP}BgQWq5s>lw~j$G=>qNnrqDwQi50;wGKi5X6(ZK)2d zuB7ccLZCb0EEZ9(ZGJxYn{y0iu|_K!P9fyJu#i53EnGPWV~Rr76^Cn7HniOgX9{QK zBrh}AL)AVNh^W=z2KS@0E}x<)wwis~2w*f0w%}T*j37yIPWCG|dT_8-$e`+sFm=og z>TF!gb?8df30Oa|k!y8X+w)Mk=@>GIvtTsFc zl10Z1`-Yt3+QZ{qxV+BPHqzg?HRcj(hipc)BRb!{4t&5+n?J#ITK+O?>m@oIuRU-R zuRcqgm4TrQMfoO>xzp@M7%3R3!1D8#QwKBJas3nzd-ss9 znxZ57=$+&uqdX)$vcUz?^ZPir@Th$f!|$`XDzT%r_}zY{#Aim z12e?)mgJ^g(Vi~2A3CHrobM%p+ojvq#W{dgd}5Rk)ZnBmK&Aad$KAJ5aL(N(= z`Ayk#7nQg>97>b_fJ8U6b8Cge-(6bi zTC!%eG)lI&gMBh@>Xr(!G|J32%hfRTdPWSqDz5cW$02etEJ=1?{9H-%7k1|hv1Loy z$MhWZoY5sqqy+1$>uCGx^Jc1swye0T)305k6bd?9MHImNsZ}xJ`&3k9Lzbl+)IqO# zR!)Y`C7HZb ze(icmwLgDCu6SgukIcTLhwGWk(UR0)j@>sPqD;NO%zGh7X85JYO@%jv0YakT!7!jYe3bR=k zeh*vR6;pWu9{N_cC4@@k&)o?2>A9wZy#~lXlrybD6PnaDx1y4vO7jZ_GiwXM4ZDtY zA!355k9fXDeKn{dU@&LQyzN_W)Jc@T>1848m;ggiN{UhS10EGi*c}hwZ}Qz0bpTFd zb8`2i{PEr9vbFSQU5Ya(^qLjC`}aL(>O@9Y?OcOj%fSY6@WoSVM|jMyx{dg(!5o+J zjvsjW4XxezEU8L)uO~)3?}u~&p>UmI7us4n(fTmXsay7*4NJq}_C|e|EjrD+hbI1u zJ^s9Qu_^>sYXhWD*s6wH=1Lfn?--T9ov@7aF93_@jokKxy# znR-ue3E|7=D{N=u;-D}uPWmO_1iS$ zPLj-KEo4}AvdjF8j>*Nf=&Z5A{KK88|L_Qu>|LD69( zR&`qZnu%BcOG+=bo2Z6&9qloNbKh&YzL=HWRGLXW=V`~&+ZTU8w0|8YhB$|if~|yG z>Y93h_WTv54`KgANGyT6M(l^|& z!E0QN4K&q?VLtpu5ZPU~hr964=D^@(q~D#MatDuvN8qk(EYz<@kzoaOhX zMG;qE9htz=SlWGkLf=pawYE|qn)R-DRn_%vaVhs^K5`D`{mi)kE1V_^uq|$ z`+{Tke95BXf3fr+hM5e`2lQg*W&w)QLmHyq zWc9{V=WbSO{(77|amcQ|P>>=1BO~&%-e2ysTvD!(4%_!vk{30(eJn`b&X{19r~zT9 zm(|QgRxK%$98`Chyg!GV0#{O1!v9IbwStMwOS$esR}sydnQD_RqUG&7)cwZ+bF#e9 zr#5=wtlmfS@48FRD2~`Mu_y%`bPAvaH-u_Rs(_B@Pj>LedtH-zUm@6;no=J&qlD7^ zmQqBtV!7Uwv1+u~leC$@x_n_8*W8Ld=_r@Ps1Ei&AXkAfXSImIN%|FZ;n<46z4VVa zZ_VGgh#ZT2Kfx zy}oJL$F8p+Rz~} zET47XV(f#qI9M7;s-gT<)$84(x6}!=$HxrJFt5Z$>1gAEFULb%?04lR%5McZvo~ws zD$usic+0XhtYg&88Dg8@%wYI6LuVT8%ZQb{?b1fiQ)s#g648mQhVgnZXBQ9EI?-gl zmAFJDm5cp8E+tL%uBl2VQ)9eW?z(S-_3exBlCku#_R`@&zK-CUzLx85c;7W~@Priw zFo#N1LDu$9kZXJKM`S@Ih?R!tnHF$$m(ng?e0n=B*YqysjO^>t$Zy{apaqW5YOm!q z$}y7Yl$uwEF1l}X&Ean%o29RHm*r}rXGTs>{blPDf>N|QQ6%}>V=S9z=r*H7C2cO_ z`;PAGBHzXV0~Z5ki>A(f4!t_n$`%5>a4>$RUsoq3#CQFRVua6Ygs7zjMhV%MIyzJ4 zCc#^NU%E$d^SKRw+tiyb<<@XhO)Yoj-xc??tM!E`d?|oe797lvz27VlzdUT83(UAZgv!Kk@ zM(zY8_OwN|$E*(XjS|Hazq#(kemw4x^3;i=H9}J$TZPVa@7q=2mP5B&1dU`sXXG)T z&voD?6M~+o>Kim{`3a(-VUCP4bB#d2 z(k`CquDub7Csvn~{9|#7vX!cl!iZ0KD`wX0y9c%3Y&ZL(Hmd{8roVVjX7KsZe!fQnB2SpoGwqiGp66YOo*uQ|6LF4#&ru~Ex0m* zAq)>zUg3~7(tVT~AyPHLy;G}DzCA$#69ty(sRe3&NJuw401-?JhlEhBgU2MwNmK5g zc8?ziV5_QgrHuPmClaCIM(ya7Jed3kqgI^%J4Xd*Ti771+;^jgN|77NR3-Yhheys@f_V<22oTYrre#FZVqNNtX|W#EyBQ zRORV6;ghT26NOOHuEeko=K2MM;^qYD${u-_57^I^D05r$H`6B%4mjD!F1JWkT~RWA z3Nigqs+4}tcF!91GzSVEC8l-Y-D*jAt-wo|v6(oIt;FFgP(iZe`zsXXd*`H>(0nU<+Qac(0AwKyGE>B8Nt8G<8&HJq+D5~-;V;P4FfGFdH zvDOU}hqlNbO|)nk;QcD`T|qtS&ce3YR26Wl@aScjEYwu)K;*S>inCUkpTil&7)U+b zv8L@Zt}SJQ>u_;X_njiKK3`*CyTsFEWQ{p2n5h-|W1G%Lq7G!v4vJ$I9)V8u>YF>| z&`vgL<1H9BBCEjmHko66VXA3O@4Q!E)C}}i^DUb$$!bz)U)!~QjI{f>JZ>uK zUkI^%NtjruXnR78;cA;isQ48^jdTeP^wB?j;tj^cKE62rAzGxkNWeHLm$aB+_ni0k zv+I@$4*OupMlImw8$-7tNCY#Sm*cYM&z}dB=wd6&?i~L7!Y4B*tM%h^FEwi&sE)AF zy)fw}UCyO?ud`7y6vpLhBSAkAh9Pwb!71@m@hUJe9i`lT)K6!rEI{iz_@J&d`gga* zGNiQdf=GqlJ?8t&_k@|tU3B!dtr^S;SaWr)--F2@BO7uapjMb!kr$1ZnROY3(M554 z+Uz{idX^4zVE3fI&fO9WgUN2kLpDkK{<>JGS}Fn@JIK9zgH7m|1I^x0 z={KdOykXPjmN=355|^pr7?ZCo{nEX*W_}j97r9o-L&RHdI_JN)H&w8~Jg#Sg65j(pLz2R?TSj`ifn394(AmHkoSB zCfrI7!%*cp`sE15b|>#Q8BMNOE?1@P_WSa3*j$2FnX1nN#eLVbQbbcajBHQ-fS5WS zu0XT46uWg7Z?C(}o(4$vk>WvQIvO8V=+ZTICNDvzyoj6A50f?1^*K;OUU!ZdTawg$ z`Me5^ifS^S&h@BLtRvNSxe{)1fa&~h{6#LK8L-mNiKecvSg}?qbPQp^$hz6Ttk;|- z41{(zWDQ5)I6oa)s=(6`RS6J$hmb5)-XcV&_|73dkkI6L9jW+Q)uwORj_Ja`hx}Ot z{r}hhiUUq}SU~qdSRg_yQV>1}=bt|i=nm*Ekm(};IsY4wzypE)ci@W+0{!pczr&#b zO^;30q_FA8vq~) zNE2u_)E{i%Nkj$$jcOSJ@9;o6AUBXF$OZHY8-h5Ed&IFmgEqxadLqcX&bXf${4(2n_NDu-OAN2?LnuKtLKhByguF zvI8*MfgW>$%sH?uIIuuGAo07Q%9=tTEp-lnd_b%K&4UpUZpOmcs#uY{o%_Yzo#OCopSod(8|KkHL@aEOu*VrIJ z0Fej?!g~Iv3wVNv{|8{vkag9Jdgqy-=>ECB!`>Ax@# z{u>5Xkn%tL_)D24i1jhhE?9sZ+`a#=^h5ynzyMu#|8VSY`~Bsg!#|YL0KEN=R{TGg zE%i^?Y*nl45~G{!#(ZB+EAq+_8XCK_F@n#lK0-e~8RqX{7xx zqVfv({#QO;f~|QvFZ7p#h{A477+lAm#3WM7aa<;RTfC3f%wp zIxxr=_{RTFEB}?^zn28a1Sl~SOX8mrv%E+^kFoyBr0qX_#1+Wp1El<$*A2k)w|t4a zp_X?;H9_@2TO|K&8(^d$r6d9Y9>0KA{oAAdO2yy1J5cApa$@(-{Xa`K0nqQQ-cDk> z0KFUp^sHM!ps!**2kL_LuaScA-$si6DHojoi&4P}7##kN41Zf(4@B?;3+O|7|Fnk? z11m@mn;4`7)Z}k3{#&2_N#$R;27wqs4F5%naY0ybumC#WV7+|%<`x(lK^d%Apzgmi zM8E`kO;ro@!~dhb`+%zIXd6BL9IA)~#jZg_u%Lq2z=C2&5y9R?ItT&+B36jKi$PvwjaJ$rVWnO(?FJoQ^szoveq zS-5ta#Sm&9!u@b;lwuFHU=KCoL}o->Ej~JniL}+G#92*S$)hQiwFJw*Si*;tDfGLr zduD}Si@j>0WT-97S*M(Bdu^;+6URR6QPHG>KHe%-g0<%%qAlJYhDL?h-W)DcMuv^s zZTXh2Qk+gKX?JTJ84(*85$n@_KvZB%4~O_Z;j5>d?0vARMblR!-w#jSdMSEmg(a8H zENsz#Nab_+i{0K(rT63!HO!9R?Eh%H_s(2ZV#`|iotZhP!`jKW=I1Mx{8RfMI`j+~ z88GD5@>MT>c~Ws#n!S^^;qdaSUzjELOiSx@=SiWe6=yDO{{#QnwODfaz%l@z6q{^@kU|blRx;zV5w#Zt%eIo=LY$ z`B!gk{=@g(6AqPmSLJHkFo)!r_vd#dITUl4zHPJb-F<5su=i$Wa#{UdQt^MI?xm1K zwHT{T`##Zu;W3U5>UYHCZ%Lv&qo0o4GyLq3Hs=l&vx{?FlGNC7(Gba~kPu>1@@mP4 zSG|s!@FnySi=Vz);rf2<_%|6-?^zd_wbJC^_l+Bz49SRd-Ip-z_{hb}>~eLBj~caT z(A6zIyS>jYs(0F{yuV`#@yoR zwOW1mbCI@b&b`)7UVc1t)4{5z=6dZq~(Dt&s(K# z+5YC@yHBoY*nqE#3}*$$p%h|60{a9ye)Q|<8{r!q7_D}WJf}Q!AGEe`%NVktf6`@V z|6LWD`fWT(g^M(vQ=;L~`RC?cO}lezxbvBtg)YqY$lImz9Minx0#7@P+4gL3;L(ZY9sh%a`0^F+&=Esn*s8B+E_(#SPiu3KN+Qt3jOrb$s(q z_rH5p@%yF^E0xP_;PUCnuZ{f5U^ZFLHYL^NrL+dFS(1Q<5x<`_3=uoMhp>V%qkpmZj~y zjha5_lz#fulY!-XCNG{if7sikM`wP$`z|5!#;KlrqaB0pC+2D~;M~gxMZ>IY#(rx! zYeb)?7uP5Fo^ISW!!~6?`NfYt$G%t_zWbLmfxFk*6tpyYXt`t3)t-yn@7%lDc1W=! zKiYV;Fo^MY@LQeUVT8w*DWlz-gFH@dx$>axgwXF77#($sEB5Z!ycWA^TeX`PU$XYk zR)$5!el;*9*l&Jla(D}~l8@4AHfkL>-7Ge-%T>c>ng*r58g!y@9uH&7KI?*p-Z*#U zj@|CLRs4GIzjAKp-Zy~{HyZDMzcHiYfj0X$--z7o(5LW=yooCYbYGYD@Z|$zSHqOO z`DbdZ{l=C^8WMggBdzzU ziKhciBi)MJoO95*&g|u3I+k>}k!aH_`CaYJaUuKWwQ;oeKeM$` zMws}l3Vc>P_re8Z98UY4vfNVPqTk@@np0-G!Goyqp#vnsDP}@XbAgUmAUJJz-#M=Fvr_`6oTe>}6Ja)z-cR zzwWf>_1okIrH71pAOGWp@uTN;xD{AD^Wm$~?|%Gl*#$TEc{5$UZIJNm)$fjeyYKbu zSCOSVrZ^j0P0MIp!g6dxQsd1>{k>0hK0IyCghQ7*<|*6iWTjfk-rpOAITU^|$UZvN zdFhP68+~uzw@4ed&3Ni$`}7OPdo)jPXj^Vv$EdwU4(E#+9dj{b;kn#tHP(GKb9A!p z>L7=5f!C~#n0DB`%cbwHS7$HwcK&I7Y@w&k4i$)*p3x*D$+k%u*Q2k@R-``3w`5X} z{>vS=-}WE*#QXgI&F5?z*NI*jx;13>k@V@I)6S$v9<5b(Tbt3F?AL5N_T~5yt5a4t zdR@kMQt@@)k3G=0@bNN}x?E0A+c0kBh*GUa`W*Rrv4Pj<5q-~_kD1ybEFyR6w%ael zirIHua<%`Z8gWw^oqcCNE6wR%;O!zepEh%8y=uMTtR{hv21S={RA^C;wJX!>UOR2^ zF5l3U^{V;Lya*}x%0+d|E@`+nN#xFwcIgc_&3GZE?)CIb#bLZmBSwx z4qszhaMc%9mof^w&Y5SPyIRmTv(p*XgEucUoAdSM6H6vkSpLrb)Pt_e+r_LZdA8Mx znC*q_hv&c7Vfg+7vr-eSL%OA$SQX`#wmPHNu;ON$uRp#qd)MLkjCwt6%a%$y)x7J| z0i`cYO#ZR6le5O6ZsGdpM(?&<-SPX2=K_3&Uk#npJ}9koio>fL592FNPMM{-ih4O_YEslDa;~I=Bz@! znx}0Es8?>UecfF(7WN7D+0^z#`Eg?!7Am!{!0a)ud7h*#$oRi|Eyf7ajh?mJ!E8(nXoygE;@YtLWD+}xC>#K@h;E8d&kuJO1%X?If^Tz_*Zz+}m_ zI^iW|U3))rOyONCdqjSHZ%|~H0pMZx;W>^$#sXfN8ez9D-*sR>^rM6W!&oxXoP5SlgMO}72 zFUrO0kC~3VXOre_WlTw~)aA2X&U6_N?REL1D{@wb#54DSs)a_p8rDBoWaM`{EvA&a zJ|=a|dBYL6Yv!%hz_!h!$9dOQ&gC%Lckj&5UDLlgZs}j`hEvGO61nR9Fs<*b{e8AS zn&j5DNb9ke>jtdvHEdF(^W(z7Q#>koma}bJ!NF-pbiVQ}ljo$4YyT#3t;?dx9dFw# zTH5f~u2lC|{f5kcK3g;Ru?*DKR$Kv2>9h@7pwtxjfRi!onk` z(#y_X*~;idx1Bo=-TZCUH3n7W^O;j7`>6RuR@K|%_W@+jvVEAl66yI-1}^)I*|R_& zW-LTT`J=3mE?e|b@@Eezdw%3YNM}D4k~VjeJ&f!-!G|#BoBcjEf3iz>dl-`~5;5kl z2Wvk&x9Y3cfZULETwrueXk>(|jgv!F8|%Ob|Hy#Qh+tQn4(*#&aj~(EiS>;L@C}cQ z2z0d>5Ex_Apnl%m^?d92hevz*Mp+X;L`)rDSDTR7*r+;=jxqk^7ky(KBBKH$7#9>7 z-N!eU-soV*fN0+JwhC*;vQM(-ja#mtSx-HLRK% zMpr->UB3SQ6ik>Rt3yA8@wy?uO;cSzq`H2Z8u}qM^h0Xuht$*$sihxM%f>n+JTxMV zHBv1b>!8SRcijMemyKh6m3pfqG^(6lkS)ovo0+MGQ7uI2N{6kZEyt^Rj*nHMU(bPV zRTvwM3n)t2f?~G}LM$aa|9s@aDT@y-Ks{hVk(y-5n^1pcRaT^#t%#GIhzHJm_jnsRrk)b~^?v+NSAU741 z$B_6&BdfvUWH0t11^aOT2XPoja1_UI94BxRr*Q_~A{A%x9nRwdE+P$=pzbkU!Bt$t z4XBezIx_G*Zs9iW;4bdrK2)vC1CSI#Qcz1n!*O8%l0%GO3=`yn+D4Pv9(73b$@UZh zf1ys|iAKK6THifPatNoOYP7C{q!m-lWv4iUhp;meX$?ON#7Z26hcUID7=z9noB7z* z1<}$@WFtGvRuo~|4y`6K40}+8`cWq=z@R!J3Fur`q-!IQo*0FCEvaclEf2PL8<9qM zh{v5po?~HG;?PTEEf)KVj0hnP7#2!}Y#6m8unMRCxsS-N7)W(d`zV%!6{wjY;)*S> z8!1u|qtI`x$N)4ONA1KE5qk_o*BQhOchPY!{Ro&R6264{3J6^(msUr-fv=550ufqPBYi7sq#vH3jT;Zn!G+!&FJT&SMelHqi3sY#I~U5FHbMOsPW;f#kTz2@r;X75CjQ}9H~jzY@5eFx zxchUl`olEZbv`w1LLQC^Tan2vuE$Gvoh56)PVxq9wEvOhbcs7(dttHnp6^J%q4!Hbc_)MKt z@{5r=ZxXQCGZjGg#z1mw=M&~JW~7>)<4Ft*bi)l9o@g$jkJMRp)Qh273{`42&rN7I zU`936;u#ZbyF?27)}1{>BQ?=~6e82p{60jEeQwj};;2y}=HmiD!dcO|kH}caNfjl@ z?ja2<5|LU#>T3+luUF`2Y}#cHYwpjd4zR20>a7YzBL zfQSvdlx~a~T>;h67QPsOM2tofrr{fKT9TDmhiy>Tw*5GQRLo!}U5H|A^fItPIaES5 zREGLkx&fr^I#8bS&YrI2A6380Ps1LRK zwLl?Kp)^h*4bG&78%AO>uHg=z;T1ArNO~Ko?SvvIg{JUA8+1i4L}CTDBb2=$8iO$c zBQXg{n1Pv4S=aU01o(X=qvPOQN;6s3mG7WSx)nrMKA z@PIeEBMu1|iAhMtH+Wi9{G?j9A2o;{?!blIA$i}4xXZ1Y3^ks3^9ns z5RAnXe1(}#)!O(`h5^kVi zS@v$&zzNk*4~@_Q7m$wc@dVHC2EV|>mNZ9EIHE3Gq4Jj8pOtp#f&h#{GLFv`#+pU zk|ww{mb?oFl7|?B9&<%PkdBAgwuEg8yQOR=oH|Ng5$Dg)MmsW>-C@GPYk?}Li>Ybs z7x)S9V0DQr6&k=Do@kGwNX2UyT;@6j3zR?^bVCUGV-zOgE6l_SY`|vh!*QI&1>DAC z`~rh3?1SinZU{gS;xHKFu^xNT=_=_7l?gUOIaEbMv_uG^(I1HzizIBtVVuPcG`h|) zgIQRK)!2?bIF8e}g76HsIpQ!3BQOa`n2m3+0;{kEN0ADZAUvxkg^VkUU*}6hH}-LU~k%GuomD(#i8Z#m^A(eu0R> z8uEBMQJs8ULrlkfEWvtABtJI|GqC`N0yJ_0Cy|Eh_yO-=5~vYVSfUWVKowL+4Y=WW zkVekp05@Rt1GcXqmu?p+34ZCmvM{o+MxPU9TiCegb zhj@k;cn^zEjTA>qbjD4bzAx+M+YIhHE4R z_wgfMLHcN<8^X{B127chFcGs+EQ0Nbil~X&Xo_wKLT|(%Ao4=7=v*b zkCj*hQ|@o&$Vr2oec=D+eZYug+=z0duC2Ot47eb1Ex{;o8kCn%&)JP()L0ZYB+m<# zmoN7P*D2)XdRqdH{*?8x8AtJe^ZIjq#rb?DT5%rtLTdtU1O2&t80XKC(4S+SI2XF$ zpP$33vp)aUpQn*3ho~}$DrXci)|D&9(zYW#R2ghhW%6|{f0CjT}23D<9~?_S1S3(ZNJ!aN5?z022SH4lfD!H?9||)e~2}XzL!OR8N9Z!-mKTLe>KWk(h0MdW&m= zm~X5LV!p912xDe9W_DvW`)Kms91ZTO_#gS%fC_5^ipr%6C@PmOAQJ*I$sWkEXP=9W zE)X|!T_A4exgezePIRuAzE$DeYpUJH5FWWjopa zb)<8d>SoPl`iWEQUq||qnQrz=X4y`xzePInkFnN8dU+n*tjqKK$B~ZDubVYK|38Yf zdqLgo?gjtjNEa)to3&Ws|2We3i|S^*U-TbEdV2}o?AuFxBGP0~wRx?N(^}=PKJHif zw2%8$p6=s*{Vj_RZ567%$;?!J1?8e;Z>sR~7#V}70E|A>T~JEZFq!(PF(1#N-U7_S z?SVWlgyr(N?!PxxDY;;(&8X?3t1+P5bKGrXu3+=K{dI%xHOyOlO#HwKs*l?~(ux!A zq_F12cf^;g^YJjt%FD`>iJD(tH<#+tPeIj9-qft&jMXkfg{U9;EB94YsLF$b>$rnn z;InOuxRMUZLkITKEyc?&vDz=u;|HofBmStml@-2sYUqdQ<6CdQ$(s6gpticMy8oTk zXk(*d`cGn0wyd-J^{N4JDlGl-jm&qn(yzIG*kD7?t{>9Ie(>6E`o-%~qesmP-Spw; zX=$GNs+aOY{r!M3%^~%M3=u_eRll4n?d=U2`wbkHbf`tjktbM?@~%$w~%Qrmks1eSTBw#tV~~3^o*VZe&erXe1HwM&e^{A_MD~ z$nJ0xY3yREeS7Fpys6aTM$*V;X4Gfq*1mh9-dp*)8g~Xg^GoR=`6Z0^q-Ho-$hEu$ zWN71pvc%j{M%osV!$XQmf@cXC?om=McP=F-&A*U4+`L(5U0Tk>mlpMwNWr{iq)YoU za=UR^vF0B1&d#=SJF>iNDpNsHs#C&nuP8~O6{XDJiW22&FQG&2wePI7a<3#Vag}6i z>&kMYauwbRag?xVM;YZ*RenLo+EvBQubOm=t42Jk%chz&rEh#q>Y|-xh?$EtZBtuZ zyz9!1$huO$x~rTk*g$I7HIT!O4J6*mO&lHF#KqH1O7?9i*X$dKrDG$?#ErQh(@gwr z-DPw~ce!BHLVTQC$fZ&qV(#G~Mcq85ZlTuV(xkN{`?coX4lg-T(_5a@^%nQMZ6wyJ zjigs=qkYq_P09A!w?pQ&ZZF{lJ4idDPO?3$ldL!BDh9c_N(1w*GN(mX@hZ_xOuBZH zQ9ZiJfUZ5HWw#!3-LHpi%GFce_viWnzS08!k?jw^ud?dJdFZnX8mvpi8lOtCC zV&xYoW%>q6uCQR~I4D@Imk*In^+IHfPl)tu8YaO(ygSpdkJQZ@Aunwrq+Z)d2{Me5 zpmx#X)iXv?B4fm@PONyC_mjY({bXXnI62UkN`Ztp>@axIuEh{1EZ1J5(;_O_1C55~OxOq7?HSCW$s9WkcmrvbE7D`8sTr zH0&@&w)#wvBR&(w&T5jZGMOwT+Dw-9R9FO-{7O9AB};#qDo4YoO7&{fq)x;%o)?)e zKRHhqXQLVN&SQow={Q3U`OK0+jc3d4PP4@)Y_?oAnl-hSDa{v&PyfYIz;uahD7;kc?3T%{^2;Q}Xu0fbuu96B zu9jt8R!g%+Yjtl6xv!J@-s|K^r*$%Lz&dGBV52msu}NlIZu*ET1_v(HI6 zVtPvUTbz>KeNV|6m(vp14p_vtr_NR+dGd)xP6*s_}O+zRG#I(&)St zY=2(*2GAaSUMlCmD342Al$Kr7#G}9^S=RKD{8ZqwENOR{``lN=BjUP**xulNPP$yK zmo9~EGNebX45?Q;Lsq-pl1Cx8WLLl2l2Yuh1P9-hXa7#Zs=t#_LGNTgA9u-UktwSJGo?s;ruM9->hILO2i2(GOgHtG znE`h(6b@B#30d8Y)eckb&AK^s&m!0}#1T$BGFA_+Xa!I3jKbV@=#C!Tl3aj=SdJA~ zhmG8Fqsnd7X6(j3!rG4mIEmAs%1w%(G|IpZ56J32mDL}W)qgvye`{8Ml*Uav-`=eL#H{`!S^YDz z`niFtoo;?sKR02u{kyaJ)3f>yW%WPG>QB$=CyS+>KGwiZ+isH8pOn?F@-5Pfr_Qqa zJ7)E7&gx%p=%$`)QA16o11H}wL~_!I&<<4>LVcr~N=?nC*8cv!TTR>CDv6*B3``7- zjf@QpjWX>eyvu>XJ z#%cyb<}r|r%+=zikMTT}G&GI1)2kOV)x5;;w`H-aM$G3R8QpqGN=5C|q>T2-DYd4) z>}5{dK-&%i2mrlenf)^zN zvf43B+EBM`jEsq}iEd5PGi`a>G^_b|O;wue(#TpoS%X;Cfk!gP969A0Q%s&6G9WXn zO_>6gGe_H$oVhICd}gpY4?b~|n+)t1waEI*F8YkbFlic7|9+?%qM8N9@=QfOE0MLs za(@193XC-#YaAh)<;!6OtNwnTL!73Sf74`bB7(?Me|Q_UopQ;q>P5v*H@V!s)d;g^ znfb-cmhRi+q$zeLe2%&xs@!(9Pli`U<-@Zz7TliEp9)0c8LQ}<`P{_G%q*zy&1x6m z4B^XpM!jCH`+R{qvi#Yf)ob8==?>*==FThFpK3}mki5UPP(V7cpXOoeJVF8YQ8Ueq3);PC zThdndO4ah!?rKF_zu#r=F2uM2zA-VO5rNi`{rp3Gqx<$#7lQ25sW`S)?}>0l(vMSz zhQ8a043X+C2G3Cc=*XDJpjd0KprBCyKUkxstzA(q3q($;{%Kw$$tZl+D<7~rJnO&9ZRT-8}ZDpe> zt4BfA0Stz!h8Y1>Z!iJsk-n)=W&Sx(W%|WXW%||Nl{l)#baeo`X{)f0L&Yh3Sznct zRh6V_t4d4NR@G;^@^c>TG8pvPwiOFk@m91S8&nHXg?tt1_Mx%Sf!d(7qbnQf)`>CN zyAf(&EYvJ&J=HSr)5B!iZ`Z1RHBQCf$Gb%nANN)*TJzOYHC)weboFZAw5^wqciX1T ze4Mm%WglAYw{>pF^Z^8>qNe8e5)W;ANLD{BZNEzM(x`$EVi(Jnh~Yb>j{F5sH66

Q{|w>*G_3ZuO1VP(qF++&Y9Cu3et`f}~oas_5$@ZrVyeRqY+J{EmU8Y;>)_ZktX^WI#A81D*pet z><-MMmZ?Xlwp+iRCF$3PqFc{DR$6{~Nq=0Hem(We(6{x=`fshL+Q-xZQ7^zZ*4JA7 zRQc`GEPTskU}S7ybmWIYH65xt)UM@F&8B|6m_TaTA|tFr16*xtv~`MTJuoXi) zZ+Jh3G*d(L6K753+b=ef@5F@qw~XwkJ=WCT9v$Hu-q6o4nz%74G9u=;AY!BY1*#An zvzF(m605#SzMLi}1#(g#Ck1j+ASVTKQXnSZ%;8?#ruEsw(5Fa=R+it9$vX>|O}!l?GLQS7m&4pS(CqK$VMB z8Ib2dwE1dPMzuy6*q|&_dBzUqP#zUf5$buwN~jD~u62MTs)Aec+Q(3;(^h$P^`Lq! zID^~ilvnvi6Y8QKT%j`P4d8}`XoSXSf~IJO=5R*~s3!|O&#Th1FPtwOEJs*no}Lgw5E3t=NX`*nyqch27YL zz1W8o?8gBd#33BU5gf%a9LEWq#3`J{8GMUWoW(hOhx53Ai%7#IT*eh##Wh^V4ctUJ zGVnca;WqBzF7Dwze!v4f#3MY$6FkK;Jjajt2`}(7Ug8yA;|<>87yOEMc#lk|GQN7& z(Ex_v(O}_~c%BF1_(v|7;XTiR@tQpUHG<!&_fPol;436!DyHFU zOvenOCl9DIYhn1}gTfQ49u#aM!+Scc_Tft6T=)mVeIScmo4fQ{IM&Desi*oN)c zft}ce-PnV@*oPGC#{nF~AsogL9K|sl#|fOoDV)X`e2Y|^#W{S3^SFSENW&#u#uZ$} zHC)FH+(bGu@I7wfHtygq?%_UuzymzQBRs|vJjF9S$B+04FYq&7;uT)w4c_7x{EByY zk4&fwzk2k~0ERGvF-%~JTrfj!m?IDJA|LX@0tKKxJZuT|QEK&?RS^`0dI+vKN}wc4 z;R{%yG^|kuHYf{Q*r6QCqXH_z9+jZZLse)yz!6p9gledc8mNg{aE1$NqYmn#9$caR zr(grPp&=TfF`A$$nxQ$|(E=^ufmZNDYj{D$rw#44XovRb0QG;-Izf%^Lc1%vp*wn@ zCw!pBzx#xstZ{0*^51OKqx+HM9IVOtsQ7cOQIDwm{x;2j+L4>9mwx%G{deZi&Ay+ZHad-y_A!6m ze_s1L^JbqWAIn#Vk_rCq@+-6GpPT7I>O|^Clk;mrTm7Fl^=gIM4u7{P!SdCb7~}6Q zUxn?g4q6&xa;|Er>Yjz#_AOX?kTy41WqM0HEn{r>61AB`OFQTM;? z{3pxTFH`Ni|Lt?k$MMVQ{-2cse0ed`Ft_gb$$suG&%V(l(!XCH{^5c3Cm(pXuI*ff p|DjM_TSeg6PCWz94BBH`bvx+ic5smwwKw+F?{qos|1l}>e*ivrYmNW_ literal 0 HcmV?d00001 From 0a1eccc29bccca82df705749d0a11eb22e46530f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 20 May 2025 21:18:07 +0000 Subject: [PATCH 019/125] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- autocorpus/bioc_supplementary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autocorpus/bioc_supplementary.py b/autocorpus/bioc_supplementary.py index e28625ac..91f5e305 100644 --- a/autocorpus/bioc_supplementary.py +++ b/autocorpus/bioc_supplementary.py @@ -200,7 +200,7 @@ def __build_tables( class BioCTextConverter: """Converts text content into a BioC format for supplementary material processing.""" - + @staticmethod def build_bioc(text: str, input_file: str, file_type: str) -> BioCCollection: """Builds a BioCCollection object from the provided text, input file, and file type. From 50cecd9a34c4fe1a5683b690856b2444fe533136 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Tue, 20 May 2025 22:50:13 +0100 Subject: [PATCH 020/125] Word extraction now functionally working. Regression test corrected for word extraction. Removed unwanted bioc attribute "version" from ac_bioc module, it doesn't align with our key file. --- autocorpus/ac_bioc/bioctable/json.py | 1 - autocorpus/ac_bioc/collection.py | 2 - autocorpus/ac_bioc/json.py | 1 - autocorpus/bioc_supplementary.py | 41 +- autocorpus/word_extractor.py | 32 +- .../Word/Expected Output/mmc1.doc_bioc.json | 846 ++++++++++++++++++ tests/test_regression.py | 55 ++ 7 files changed, 950 insertions(+), 28 deletions(-) create mode 100644 tests/data/Supplementary/Word/Expected Output/mmc1.doc_bioc.json diff --git a/autocorpus/ac_bioc/bioctable/json.py b/autocorpus/ac_bioc/bioctable/json.py index facd687b..94860bcb 100644 --- a/autocorpus/ac_bioc/bioctable/json.py +++ b/autocorpus/ac_bioc/bioctable/json.py @@ -68,7 +68,6 @@ def default(self, o): "source": o.source, "date": o.date, "key": o.key, - "version": o.version, "infons": o.infons, "documents": [self.default(d) for d in o.documents], } diff --git a/autocorpus/ac_bioc/collection.py b/autocorpus/ac_bioc/collection.py index c6641aaf..f4e69d67 100644 --- a/autocorpus/ac_bioc/collection.py +++ b/autocorpus/ac_bioc/collection.py @@ -22,7 +22,6 @@ class BioCCollection: key: str = field(default_factory=str) documents: list[BioCDocument] = field(default_factory=list) infons: dict[str, str] = field(default_factory=dict) - version: str = field(default="1.0") def to_dict(self): """Convert the BioCCollection instance to a dictionary. @@ -62,7 +61,6 @@ def from_json(cls, data: dict[str, Any]) -> BioCCollection: date=data.get("date", ""), key=data.get("key", ""), infons=data.get("infons", {}), - version=data.get("version", ""), documents=documents, ) diff --git a/autocorpus/ac_bioc/json.py b/autocorpus/ac_bioc/json.py index cbc345ec..64c159ff 100644 --- a/autocorpus/ac_bioc/json.py +++ b/autocorpus/ac_bioc/json.py @@ -78,7 +78,6 @@ def default(self, o: Any) -> Any: "source": o.source, "date": o.date, "key": o.key, - "version": o.version, "infons": o.infons, "documents": [self.default(d) for d in o.documents], } diff --git a/autocorpus/bioc_supplementary.py b/autocorpus/bioc_supplementary.py index e28625ac..7cbd9793 100644 --- a/autocorpus/bioc_supplementary.py +++ b/autocorpus/bioc_supplementary.py @@ -200,13 +200,15 @@ def __build_tables( class BioCTextConverter: """Converts text content into a BioC format for supplementary material processing.""" - + @staticmethod - def build_bioc(text: str, input_file: str, file_type: str) -> BioCCollection: + def build_bioc( + text: str | list[tuple[str, bool]], input_file: str, file_type: str + ) -> BioCCollection: """Builds a BioCCollection object from the provided text, input file, and file type. Args: - text (str): The text content to be converted. + text (str | list[tuple[str, bool]]): The text content to be converted. input_file (str): The path to the input file. file_type (str): The type of the input file ('word' or 'pdf'). @@ -222,7 +224,8 @@ def build_bioc(text: str, input_file: str, file_type: str) -> BioCCollection: temp_doc.passages = BioCTextConverter.__identify_word_passages(text) elif file_type == "pdf": temp_doc.passages = BioCTextConverter.__identify_passages(text) - temp_doc.passages = BioCTextConverter.__identify_passages(text) + else: + temp_doc.passages = BioCTextConverter.__identify_passages(text) temp_doc.inputfile = input_file bioc.documents.append(temp_doc) return bioc @@ -255,21 +258,21 @@ def __identify_passages(text): return passages @staticmethod - def __identify_word_passages(text): + def __identify_word_passages(text: list[tuple[str, bool]]) -> list[BioCPassage]: offset = 0 passages = [] - line, is_header = text - line = line.replace("\n", "") - if line.isupper() or is_header: - iao_name = "document title" - iao_id = "IAO:0000305" - else: - iao_name = "supplementary material section" - iao_id = "IAO:0000326" - passage = BioCPassage() - passage.offset = offset - passage.infons = {"iao_name_1": iao_name, "iao_id_1": iao_id} - passage.text = line - passages.append(passage) - offset += len(line) + for paragraph, is_header in text: + paragraph = paragraph.replace("\n", "") + if paragraph.isupper() or is_header: + iao_name = "document title" + iao_id = "IAO:0000305" + else: + iao_name = "supplementary material section" + iao_id = "IAO:0000326" + passage = BioCPassage() + passage.offset = offset + passage.infons = {"iao_name_1": iao_name, "iao_id_1": iao_id} + passage.text = paragraph + passages.append(passage) + offset += len(paragraph) return passages diff --git a/autocorpus/word_extractor.py b/autocorpus/word_extractor.py index 5235a7dd..f49fb8f6 100644 --- a/autocorpus/word_extractor.py +++ b/autocorpus/word_extractor.py @@ -10,6 +10,10 @@ from docx import Document +from autocorpus.ac_bioc.bioctable.collection import BioCTableCollection +from autocorpus.ac_bioc.bioctable.json import BioCTableJSON +from autocorpus.ac_bioc.collection import BioCCollection +from autocorpus.ac_bioc.json import BioCJSON from autocorpus.bioc_supplementary import BioCTableConverter, BioCTextConverter from . import logger @@ -129,11 +133,29 @@ def extract_word_content(file_path: Path): ) for x in doc.paragraphs ] - bioc_text = BioCTextConverter(paragraphs, "word", str(file_path)) - bioc_text.output_bioc_json(file_path) - bioc_tables = BioCTableConverter(tables, str(file_path)) - bioc_tables.output_tables_json(file_path) - print(str(docx_path)) + bioc_text: BioCCollection | None = None + bioc_tables: BioCTableCollection | None = None + + if paragraphs: + bioc_text = BioCTextConverter.build_bioc(paragraphs, str(file_path), "word") + + if tables: + bioc_tables = BioCTableConverter.build_bioc(tables, str(file_path)) + + if bioc_text: + out_filename = str(file_path).replace( + file_path.suffix, f"{file_path.suffix}_bioc.json" + ) + with open(out_filename, "w", encoding="utf-8") as f: + BioCJSON.dump(bioc_text, f, indent=4) + + if bioc_tables: + out_table_filename = str(file_path).replace( + file_path.suffix, f"{file_path.suffix}_tables.json" + ) + with open(out_table_filename, "w", encoding="utf-8") as f: + BioCTableJSON.dump(bioc_tables, f, indent=4) + os.unlink(str(docx_path)) except FileNotFoundError: logger.error( diff --git a/tests/data/Supplementary/Word/Expected Output/mmc1.doc_bioc.json b/tests/data/Supplementary/Word/Expected Output/mmc1.doc_bioc.json new file mode 100644 index 00000000..9908a236 --- /dev/null +++ b/tests/data/Supplementary/Word/Expected Output/mmc1.doc_bioc.json @@ -0,0 +1,846 @@ +{ + "source": "Auto-CORPus (supplementary)", + "date": "20250520", + "key": "autocorpus_supplementary.key", + "infons": {}, + "documents": [ + { + "id": "1", + "infons": {}, + "inputfile": "/mnt/sda2/Projects/Auto-CORPus/tests/data/Supplementary/Word/mmc1.doc", + "passages": [ + { + "offset": 0, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "The occurrence of a multidrug-resistant tuberculous retropharyngeal abscess in an immunocompetent patient: a case report", + "annotations": [], + "relations": [] + }, + { + "offset": 120, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Tiresse N4* , Oucharqui S2*,Benaissa E1, 2, Badri B4 Bssaibis F2, Maleb A3, Elouennass M1,2", + "annotations": [], + "relations": [] + }, + { + "offset": 212, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "1Epidemiology and bacterial resistance research team/BIO-INOVA Centre, Faculty of Medicine and Pharmacy (University Mohammed V), Rabat, Morocco. ", + "annotations": [], + "relations": [] + }, + { + "offset": 357, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "2Department of Bacteriology, Mohammed V Military Teaching Hospital / Faculty of Medicine and Pharmacy (University Mohammed V), Rabat, Morocco. ", + "annotations": [], + "relations": [] + }, + { + "offset": 500, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "3Laboratory of Microbiology, Mohammed VI University Hospital / Faculty of Medicine and Pharmacy (University Mohammed the first), Oujda, Morocco.", + "annotations": [], + "relations": [] + }, + { + "offset": 644, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "4Department of pneumology, Mohammed V Military Teaching Hospital / Faculty of Medicine and Pharmacy (University Mohammed V), Rabat, Morocco.", + "annotations": [], + "relations": [] + }, + { + "offset": 784, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "* Oucharqui sara and Tiresse nabil have contributed equally in the elaboration of the work.", + "annotations": [], + "relations": [] + }, + { + "offset": 875, + "infons": { + "iao_name_1": "document title", + "iao_id_1": "IAO:0000305" + }, + "text": "Corresponding author: Elmostafa Benaissa : benaissaelmostafa2@gmail.com", + "annotations": [], + "relations": [] + }, + { + "offset": 947, + "infons": { + "iao_name_1": "document title", + "iao_id_1": "IAO:0000305" + }, + "text": "Tiresse Nabil: nabil.tiresse1@gmail.com", + "annotations": [], + "relations": [] + }, + { + "offset": 986, + "infons": { + "iao_name_1": "document title", + "iao_id_1": "IAO:0000305" + }, + "text": "Oucharqui Sara: oucharqui@gmail.com", + "annotations": [], + "relations": [] + }, + { + "offset": 1021, + "infons": { + "iao_name_1": "document title", + "iao_id_1": "IAO:0000305" + }, + "text": "Elmostafa Benaissa : benaissaelmostafa2@gmail.com", + "annotations": [], + "relations": [] + }, + { + "offset": 1071, + "infons": { + "iao_name_1": "document title", + "iao_id_1": "IAO:0000305" + }, + "text": "Badri bouchra: bouchra.ba04@gmail.com", + "annotations": [], + "relations": [] + }, + { + "offset": 1108, + "infons": { + "iao_name_1": "document title", + "iao_id_1": "IAO:0000305" + }, + "text": "Bssaibis fatna: bssaibisfatna@yahoo.fr", + "annotations": [], + "relations": [] + }, + { + "offset": 1146, + "infons": { + "iao_name_1": "document title", + "iao_id_1": "IAO:0000305" + }, + "text": "Adil Maleb: maleb.adil@gmail.com", + "annotations": [], + "relations": [] + }, + { + "offset": 1178, + "infons": { + "iao_name_1": "document title", + "iao_id_1": "IAO:0000305" + }, + "text": "Mostafa Elouennass: elouennassm@yahoo.fr", + "annotations": [], + "relations": [] + }, + { + "offset": 1218, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 1218, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 1218, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 1218, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 1218, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 1218, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 1218, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 1218, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 1218, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Abstract:", + "annotations": [], + "relations": [] + }, + { + "offset": 1227, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Retropharyngeal abscess is an uncommon location of tuberculosis (TB). In this report, we describe a multidrug-resistant tuberculous retropharyngeal abscess in a 21-year-old female patient who was treated for lymph node TB for one year. CT scan revealed a large retropharyngeal abscess that was aspirated intraorally under local anesthesia. The diagnosis of TB was retained by molecular and histological study. GeneXpert MTB/ RIF (Cepheid, Sunnyvale, CA, USA),performed on the pus, showed rifampicin resistance and a first- and second-line drug resistance test using Genotype MTBDRplus VER.2 and MTBDRsl VER.1 (Hain Lifescience GmbH, Nehren, Germany) showed TB highly resistant to rifampicin, isoniazid, and aminoglycosides. Treatment is primarily medical as it combines specific antituberculous antibiotics, and aspiration for drainage of the abscess. Our patient was put on long-term 2nd line anti-TB treatment. ", + "annotations": [], + "relations": [] + }, + { + "offset": 2142, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Key words: Multidrug-resistant, Retropharyngeal abscess, GeneXpert MTB/RIF, GenoType MTBDRplus, GenoType MTBDRsl", + "annotations": [], + "relations": [] + }, + { + "offset": 2254, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 2254, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Introduction", + "annotations": [], + "relations": [] + }, + { + "offset": 2266, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Tuberculous retropharyngeal abscess is not frequently reported in the literature and pre-extensive tuberculous retropharyngeal abscess is even less frequently reported [1]. Early recognition of this condition is essential to prevent serious complications. The diagnosis is difficult and relies on a combination of clinical, radiological and biological arguments. We report a case of multidrug resistant (MDR) tuberculous retropharyngeal abscess in a 21-year-old female patient treated for lymph node tuberculosis (TB) for one year and discuss the different diagnostic and therapeutic elements of this pathology, highlighting the contribution of molecular biology in the effective management of MDR extra-pulmonary TB.", + "annotations": [], + "relations": [] + }, + { + "offset": 2983, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Case report", + "annotations": [], + "relations": [] + }, + { + "offset": 2994, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "This is a 21-year-old female with a history of chronic headache for several years with Chiari decompression surgery performed in 2017 and latero-cervical adenopathy diagnosed as lymph node TB on bacteriological, molecular and histological arguments in 2019. GeneXpert MTB/RIF performed on the cervical lymph node came back positive for TB, without resistance to rifampicin. She was then treated at another institution according to the national protocol which includes quadritherapy with isoniazid, rifampicin, ethambutol and pyrazinamide for 2 months followed by bitherapy with isoniazid and rifampicin for 10 months (2RHZE/10RH). The evolution was then marked by the disappearance of the lymph nodes after one year of treatment. Six months after the end of treatment, the patient presented to the emergency room with severe headaches.", + "annotations": [], + "relations": [] + }, + { + "offset": 3830, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Otherwise, no cough, chest pain, fever, or loss of appetite was reported. The patient noted no signs of trismus or difficulty breathing. She reported no known allergies and had no history of smoking or drinking alcohol. On admission, physical examination revealed a body temperature of 36.6\u00b0C, a heart rate of 90 beats/min, and a blood pressure of 117/75 mmHg. Palpation of both sides of the neck revealed no tenderness and no lymph nodes were noted. Examination of the oral cavity revealed no pathologic findings, and no posterior pharyngeal wall projections were observed. The lungs were clear on auscultation and no neurologic deficits were noted on initial clinical examination. ", + "annotations": [], + "relations": [] + }, + { + "offset": 4514, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "The biological workup showed hemoglobin at 12.6 g/l; white blood cell count at 4.8 G/l; and C-reactive protein at 0.8 mg/l. In addition, serologies for human immunodeficiency virus (HIV), hepatitis B, and hepatitis C were negative.", + "annotations": [], + "relations": [] + }, + { + "offset": 4745, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "A cerebral CT scan performed as part of the etiological diagnosis fortuitously revealed a peripherally enhanced collection in the retropharyngeal area after injection of contrast medium measuring 19x21 mm, associated with an adjacent necrotic adenopathy measuring 10x06 mm. (figure 1).", + "annotations": [], + "relations": [] + }, + { + "offset": 5030, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 5030, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Figure 1: Sagittal CT scan revealed a peripherally enhanced collection in the retropharyngeal area after injection of contrast medium.", + "annotations": [], + "relations": [] + }, + { + "offset": 5164, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 5164, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "A cervical MRI was realized later and confirmed the presence of the retropharyngeal collection. (figure 2)", + "annotations": [], + "relations": [] + }, + { + "offset": 5270, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 5270, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 5270, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Figure 2: Sagittal MRI revealed the presence of the retropharyngeal collection", + "annotations": [], + "relations": [] + }, + { + "offset": 5348, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 5348, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "The abscess was drained under local anesthesia. 02 milliliters of pus were aspirated. The specimen was sent for bacteriological analysis for Mycobacterium tuberculosis complex (MTC) and banal germs as well as for pathological study. A molecular study using GeneXpert MTB/RIF (Cepheid, Sunnyvale, CA, USA) resulted in detection of MTC with detection of rifampicin resistance in less than 2 hours. In response to this rifampicin resistance, we performed other molecular tests, including GenoType MTBDRplus VER. 2 and GenoType MTBDRsl VER.1 (Hain Lifescience GmbH, Nehren, Germany) on the pus to confirm rifampicin resistance and also to investigate resistance to other anti-TB drugs. It should be noted that this technique is not validated on extrapulmonary specimens directly, although many studies have showed a good correlation with the usual resistance screening methods. The MTBDRplus VER. 2 showed resistance to both rifampicin and isoniazid, while MTBDRsl VER.1 showed resistance only to aminoglycosides. ", + "annotations": [], + "relations": [] + }, + { + "offset": 6360, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Direct examination after special Ziehl-Nielsen staining was positive and cultures on Lowenstein-Jensen\u00ae (LJ) solid medium and Mycobacteria Growth Indicator Tube (MGIT\u00ae) liquid medium were positive after 32 days and 12 days respectively, thus confirming the molecular diagnosis.", + "annotations": [], + "relations": [] + }, + { + "offset": 6637, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "A treatment was initiated on the basis of molecular data. The histopathological study confirmed the molecular diagnosis by showing epithelioid and gigantocellular granulomas with caseous necrosis, without histological evidence of malignancy.", + "annotations": [], + "relations": [] + }, + { + "offset": 6878, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Subsequently, the patient was put on a long-term protocol consisting of 6 months of bedaquiline, levofloxacin, linezolid, clofazimine, and cycloserine and 12 to 14 months of levofloxacin, linezolid, clofazimine, and cycloserine.", + "annotations": [], + "relations": [] + }, + { + "offset": 7106, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "After 1 month of treatment, the antibacillary drugs appear to be well tolerated, and the patient is still being monitored.", + "annotations": [], + "relations": [] + }, + { + "offset": 7228, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Discussion", + "annotations": [], + "relations": [] + }, + { + "offset": 7238, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 7238, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "TB remains a major public health problem in the world, mainly affecting developing countries [2]. Its incidence has also increased in developed countries, partly due to co-infection with HIV [2], the latter being more frequent in extra-pulmonary forms [3].", + "annotations": [], + "relations": [] + }, + { + "offset": 7494, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": " The 2019 WHO report estimates the number of new cases at 10 million and the number of deaths at 1.5 million [4]. TB usually affects the lungs (pulmonary) or sometimes other organs (extrapulmonary). Excluding laryngeal TB, TB of the head and neck is rare and constitutes 2-6% of extrapulmonary TB and 0.1-1% of all forms of TB [5]. Retropharyngeal localization is rare [1].", + "annotations": [], + "relations": [] + }, + { + "offset": 7868, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Infection of the retropharyngeal space and subsequent abscess formation are mainly due to acute bacterial infections of the head and neck region, especially in children, injury to the posterior pharyngeal wall, and forward spread of spinal TB [6].", + "annotations": [], + "relations": [] + }, + { + "offset": 8115, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Spread to the retropharyngeal space occurs via lymphatics involving persistent retropharyngeal nodes or by hematogenous spread from pulmonary or extrapulmonary sites [5]. In our patient, the retropharyngeal abscess was probably due to lymphatic dissemination from lymph node TB because radiological exploration revealed a centimetric adenopathy with a necrotic center adjacent to the retropharyngeal abscess and there was no evidence of any distant involvement that could support hematogenous, pulmonary, or other dissemination. Tuberculous retropharyngeal abscess in an immunocompetent adult is rare [6]. ", + "annotations": [], + "relations": [] + }, + { + "offset": 8721, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Drug-resistant TB represents a major challenge to national, regional and global TB control programs. Some MDR strains have developed additional resistance mechanisms to second-line antibacillaries, namely fluoroquinolones and aminoglycosides [7]. Each year, 500,000 cases of MDR-TB or rifampicin-resistant TB (RR-TB) and nearly 200,000 deaths are reported worldwide. ", + "annotations": [], + "relations": [] + }, + { + "offset": 9088, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "In 2019, the reported treatment success rate was 56% for MDR and extensively drug-resistant (XDR) TB cases and 39% for XDR-TB [4]. In Morocco, where TB remains endemic, the 2014 National TB Drug Resistance Survey found a low prevalence of MDR/XDR-TB (1% MDR-TB among new cases and 8.7% among previously treated cases) [4]. ", + "annotations": [], + "relations": [] + }, + { + "offset": 9412, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "In 2019, 235 cases of drug-resistant TB were treated in Morocco, and 1500 cumulative cases have been reported since 2012 [4]. MDR extrapulmonary localizations have rarely been described in the literature [3,7,8]. An Indian study published in 2014 reported 3 cases, including 2 lymph node localizations and 1 cervical cold abscess [3]. ", + "annotations": [], + "relations": [] + }, + { + "offset": 9747, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "MDR extrapulmonary forms are more frequent in young female subjects with a history of TB [8]. This is in accordance with our case. Another Moroccan study published in 2018 presented 7 cases of MDR extrapulmonary TB, of which 6 patients had a history of TB and 1 patient had a therapeutic failure [7]. 4 of these 7 patients had additional resistance to second-line anti-TB drugs [7].", + "annotations": [], + "relations": [] + }, + { + "offset": 10129, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": " The diagnosis of MDR in extrapulmonary forms should be made by tissue or biological fluid sampling, but this is sometimes difficult [3]. Tuberculous retropharyngeal abscess can present with variable manifestations, ranging from asymptomatic to subtle features such as odynophagia alone and neck pain, due to early stage and lesser severity of the disease, to life-threatening respiratory obstruction [6]. Our patient had only chronic headache that can be attributed to her Chiari malformation. In addition, the general condition was preserved. On throat examination, swelling due to tuberculous retropharyngeal abscess is usually located in the midline [6].", + "annotations": [], + "relations": [] + }, + { + "offset": 10787, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Radiologic imaging plays an important role in demonstrating the extent of the abscess and the involvement of surrounding structures [2,5]. CT has an accuracy of 89% and MRI is even more accurate, as it allows for better soft tissue analysis and allows for the assessment of vascular complications, including internal jugular vein thrombosis [2,5]. Both CT and MRI in our patient showed the retropharyngeal abscess. ", + "annotations": [], + "relations": [] + }, + { + "offset": 11202, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "TB was first diagnosed by direct microscopic examination and the discovery of acid-fast bacilli in the abscess aspirate using Ziehl-Neelsen stain, and then confirmed by culture, which remains the gold standard method [2]. ", + "annotations": [], + "relations": [] + }, + { + "offset": 11424, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Molecular biology has demonstrated its effectiveness even on pauci-bacillary specimens by allowing the identification and detection of resistance to anti-TB drugs through several studies. GeneXpert MTB/RIF is a rapid, automated, World Health Organization (WHO)-recommended nucleic acid amplification test that is widely used for the simultaneous detection of MTC and rifampicin resistance in pulmonary and extrapulmonary specimens. It has a sensitivity of more than 80% in cerebral spine fluid, pus and biopsy fragments [7]. In our study, GeneXpert MTB/RIF (Cepheid, Sunnyvale, CA, USA) allowed identification of MTC and detection of rifampicin resistance. ", + "annotations": [], + "relations": [] + }, + { + "offset": 12081, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "In addition to the GeneXpert MTB/RIF, there are the MTBDRplus and MTBDRsl genotype tests which allow the identification of MTC from pulmonary clinical specimens or cultivated samples. The MTBDRplus test is used to identify resistance to rifampicin and isoniazid [7]. The MTBDRsl test is designed to detect resistance to the second-line antibacillary drugs, namely aminoglycosides on the gyrA gene, fluoroquinolones on the rrs gene, and ethambutol on the embB gene [7]. The MTBDRplus test and the MTBDRsl test have a sensitivity greater than 80% for the detection of resistance to rifampicin, isoniazid, fluoroquinolones, and aminoglycosides [7]. The discovery of an additional aminoglycoside resistance makes the choice of treatment even more difficult. These tests have been shown to be effective in detecting resistance to anti-TB drugs from extrapulmonary samples, even though they are not validated on these samples. This has been reported in some studies [9, 10].", + "annotations": [], + "relations": [] + }, + { + "offset": 13049, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "In our case, the aspiration was positive by GeneXpert MTB/RIF with a detection of rifampicin resistance. The MTBDRplus test confirmed resistance to rifampicin and isoniazid and the MTBDRsl test showed additional resistance to aminoglycosides. Later on, mycobacterial culture on solid and liquid media both became positive after 32 days and 12 days respectively. Pre-ultraresistant TB (pre-XDR TB) is defined as MDR/RR-TB in addition to resistance to any fluoroquinolones (levofloxacin and moxifloxacin). ", + "annotations": [], + "relations": [] + }, + { + "offset": 13553, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Antibacillary drug resistance can be primary or secondary, primary drug resistance is defined as resistance in a patient who has never been treated for TB. Treatment with anti-TB drugs exerts selective pressure on the Mycobacterium tuberculosis population, resulting in a decrease in susceptible bacilli, an increase in drug-resistant mutants, and the emergence of drug resistance (acquired resistance). Given her previously treated lymph node TB, it seems safe to assume that our patient has acquired drug resistance. ", + "annotations": [], + "relations": [] + }, + { + "offset": 14072, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "In recent years, significant progress has been made in the rapid diagnosis of TB and drug resistance, as well as in treatment: new drugs, reduction of the age of indication for certain drugs as well as modification of the classification of drugs used to treat MDR-TB. ", + "annotations": [], + "relations": [] + }, + { + "offset": 14340, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "For MDR-TB of all forms, the WHO recommends a short regimen of 9 to 11 months, which includes a 4 to 6 month loading phase with high dose amikacin, moxifloxacin, etionamide, clofazimine, pyrazinamide, ethambutol and high dose of isoniazid. In the maintenance phase, patients are put on moxifloxacin, clofazimine, pyrazinamide and ethambutol [11]. Another recent WHO review in 2020 updated the recommendations eliminating short regimens containing injectables, replacing them with a short regimen containing bedaquiline [4]. Another WHO trial approved by the FDA in 2019 recommends the combination of bedaquiline, linezolid, and pretomanide for ultraresistant TB or XDR-TB for 9 months if the three molecules have not been taken previously [4,11].", + "annotations": [], + "relations": [] + }, + { + "offset": 15087, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "In Morocco, the short regimen has been adapted for some cases, but the old long regimen is still widely prescribed. This long regimen is based on 6 months of initial treatment with bedaquiline combined with levofloxacin, linezolid, clofazimine and cycloserine, followed by cessation of bedaquiline and maintenance of the remainder for 12 to 14 months if there is no resistance to group A and B molecules [4]. Our patient was put on a standard regimen by replacing aminoglycosides with bedaquiline. The simultaneous medical and surgical approach seems to be the best strategy for the management of tuberculous retropharyngeal abscess [3,5].", + "annotations": [], + "relations": [] + }, + { + "offset": 15726, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "As with any abscess, the mainstay of management of retro-pharyngeal tubercular abscess is drainage of the pus. Therapeutic aspiration only has been used successfully and can be repeated if necessary [2]. Anti-TB drug therapy and conservative neck stabilization should be the initial treatment if a retropharyngeal abscess is due to an extension from cervical spine TB, with a stable spine and without any neurological deficit or with minimal neurological signs [6]. If left untreated, internal jugular vein thrombosis, mediastinitis and airway obstruction are potential complications [1,2]. ", + "annotations": [], + "relations": [] + }, + { + "offset": 16317, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Clinical, bacteriological and radiological surveillance is recommended, as well as monitoring of treatment tolerance [7,11]. The prognosis of MDR pulmonary and extrapulmonary TB has been improved thanks in part to the prescription of new anti-TB drugs such as linezolid and bedaquiline. The success of the treatment is related to the number of effective molecules still available [7]. However, high mortality has been observed in patients with XDR-TB and HIV infection. This could be explained by its synergistic relationship with TB and the emergence of MDR and XDR strains [7]. The HIV serology of our patient is negative which could further improve the prognosis of her disease.", + "annotations": [], + "relations": [] + }, + { + "offset": 16998, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Conclusion", + "annotations": [], + "relations": [] + }, + { + "offset": 17008, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Retropharyngeal abscess is a recognized but rare presentation of TB. Unspecified symptoms and unusual location often lead to delayed diagnosis and treatment. Through this case, we highlight the importance of gene amplification tests in the effective and rapid management of this disease.", + "annotations": [], + "relations": [] + }, + { + "offset": 17295, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Competing interests:", + "annotations": [], + "relations": [] + }, + { + "offset": 17315, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "The authors declare no competing interest.", + "annotations": [], + "relations": [] + }, + { + "offset": 17357, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Author contributions:", + "annotations": [], + "relations": [] + }, + { + "offset": 17378, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "OS,TN and BE have been involved in drafting in the manuscript, BF, BY, CM, AM have revising the manuscript and ELM have given final approval of the version to be published.", + "annotations": [], + "relations": [] + }, + { + "offset": 17550, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "References", + "annotations": [], + "relations": [] + }, + { + "offset": 17560, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Supplemental file: ", + "annotations": [], + "relations": [] + }, + { + "offset": 17579, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 17579, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Definitions:", + "annotations": [], + "relations": [] + }, + { + "offset": 17591, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "RR: is defined as isolated resistance to rifampicin.", + "annotations": [], + "relations": [] + }, + { + "offset": 17643, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "MDR: is defined as resistance to both rifampicin and isoniazid.", + "annotations": [], + "relations": [] + }, + { + "offset": 17706, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Pre-XDR: is defined as multidrug resistance (MDR/RR-TB) in addition to resistance to any fluoroquinolones (levofloxacin and moxifloxacin).", + "annotations": [], + "relations": [] + }, + { + "offset": 17844, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Ultraresistant TB or extensively drug-resistant TB (XDR-TB): is defined as multidrug resistance (MDR/RR-TB) in addition to resistance to a fluoroquinolone (levofloxacin or moxifloxacin) and at least one of bedaquiline or linezolid (or both).", + "annotations": [], + "relations": [] + } + ], + "relations": [] + } + ] +} \ No newline at end of file diff --git a/tests/test_regression.py b/tests/test_regression.py index f16a022a..96ddd08d 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -109,6 +109,61 @@ def test_pdf_to_bioc(data_path: Path, input_file: str, config: dict[str, Any]) - assert new_tables == expected_tables +@pytest.mark.parametrize( + "input_file, config, has_tables", + [ + ("Supplementary/Word/mmc1.doc", DefaultConfig.PMC.load_config(), False), + ], +) +def test_word_to_bioc( + data_path: Path, input_file: str, config: dict[str, Any], has_tables: bool +) -> None: + """Test the conversion of a doc file to a BioC format.""" + from autocorpus.autocorpus import Autocorpus + + doc_path = data_path / input_file + expected_output = doc_path.parent / "Expected Output" / doc_path.name + with open( + str(expected_output).replace(".doc", ".doc_bioc.json"), + encoding="utf-8", + ) as f: + expected_bioc = json.load(f) + + if has_tables: + with open( + str(expected_output).replace(".doc", ".doc_tables.json"), + encoding="utf-8", + ) as f: + expected_tables = json.load(f) + + ac = Autocorpus( + config=config, + ) + + ac.process_files(files=[doc_path]) + + with open( + str(doc_path).replace(".doc", ".doc_bioc.json"), + encoding="utf-8", + ) as f: + new_bioc = json.load(f) + + if has_tables: + with open( + str(doc_path).replace(".doc", ".doc_tables.json"), + encoding="utf-8", + ) as f: + new_tables = json.load(f) + + _make_reproducible(new_bioc, expected_bioc, new_tables, expected_tables) + else: + _make_reproducible(new_bioc, expected_bioc) + + assert new_bioc == expected_bioc + if has_tables: + assert new_tables == expected_tables + + def _make_reproducible(*data: dict[str, Any]) -> None: """Make output files reproducible by stripping dates and file paths.""" for d in data: From 84b5f19a06635cc1b6db78f2de5d2bbaf9a37815 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Tue, 20 May 2025 22:50:44 +0100 Subject: [PATCH 021/125] Sample word document for testing --- .../Supplementary/Word/mmc1.doc_bioc.json | 846 ++++++++++++++++++ 1 file changed, 846 insertions(+) create mode 100644 tests/data/Supplementary/Word/mmc1.doc_bioc.json diff --git a/tests/data/Supplementary/Word/mmc1.doc_bioc.json b/tests/data/Supplementary/Word/mmc1.doc_bioc.json new file mode 100644 index 00000000..9908a236 --- /dev/null +++ b/tests/data/Supplementary/Word/mmc1.doc_bioc.json @@ -0,0 +1,846 @@ +{ + "source": "Auto-CORPus (supplementary)", + "date": "20250520", + "key": "autocorpus_supplementary.key", + "infons": {}, + "documents": [ + { + "id": "1", + "infons": {}, + "inputfile": "/mnt/sda2/Projects/Auto-CORPus/tests/data/Supplementary/Word/mmc1.doc", + "passages": [ + { + "offset": 0, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "The occurrence of a multidrug-resistant tuberculous retropharyngeal abscess in an immunocompetent patient: a case report", + "annotations": [], + "relations": [] + }, + { + "offset": 120, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Tiresse N4* , Oucharqui S2*,Benaissa E1, 2, Badri B4 Bssaibis F2, Maleb A3, Elouennass M1,2", + "annotations": [], + "relations": [] + }, + { + "offset": 212, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "1Epidemiology and bacterial resistance research team/BIO-INOVA Centre, Faculty of Medicine and Pharmacy (University Mohammed V), Rabat, Morocco. ", + "annotations": [], + "relations": [] + }, + { + "offset": 357, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "2Department of Bacteriology, Mohammed V Military Teaching Hospital / Faculty of Medicine and Pharmacy (University Mohammed V), Rabat, Morocco. ", + "annotations": [], + "relations": [] + }, + { + "offset": 500, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "3Laboratory of Microbiology, Mohammed VI University Hospital / Faculty of Medicine and Pharmacy (University Mohammed the first), Oujda, Morocco.", + "annotations": [], + "relations": [] + }, + { + "offset": 644, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "4Department of pneumology, Mohammed V Military Teaching Hospital / Faculty of Medicine and Pharmacy (University Mohammed V), Rabat, Morocco.", + "annotations": [], + "relations": [] + }, + { + "offset": 784, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "* Oucharqui sara and Tiresse nabil have contributed equally in the elaboration of the work.", + "annotations": [], + "relations": [] + }, + { + "offset": 875, + "infons": { + "iao_name_1": "document title", + "iao_id_1": "IAO:0000305" + }, + "text": "Corresponding author: Elmostafa Benaissa : benaissaelmostafa2@gmail.com", + "annotations": [], + "relations": [] + }, + { + "offset": 947, + "infons": { + "iao_name_1": "document title", + "iao_id_1": "IAO:0000305" + }, + "text": "Tiresse Nabil: nabil.tiresse1@gmail.com", + "annotations": [], + "relations": [] + }, + { + "offset": 986, + "infons": { + "iao_name_1": "document title", + "iao_id_1": "IAO:0000305" + }, + "text": "Oucharqui Sara: oucharqui@gmail.com", + "annotations": [], + "relations": [] + }, + { + "offset": 1021, + "infons": { + "iao_name_1": "document title", + "iao_id_1": "IAO:0000305" + }, + "text": "Elmostafa Benaissa : benaissaelmostafa2@gmail.com", + "annotations": [], + "relations": [] + }, + { + "offset": 1071, + "infons": { + "iao_name_1": "document title", + "iao_id_1": "IAO:0000305" + }, + "text": "Badri bouchra: bouchra.ba04@gmail.com", + "annotations": [], + "relations": [] + }, + { + "offset": 1108, + "infons": { + "iao_name_1": "document title", + "iao_id_1": "IAO:0000305" + }, + "text": "Bssaibis fatna: bssaibisfatna@yahoo.fr", + "annotations": [], + "relations": [] + }, + { + "offset": 1146, + "infons": { + "iao_name_1": "document title", + "iao_id_1": "IAO:0000305" + }, + "text": "Adil Maleb: maleb.adil@gmail.com", + "annotations": [], + "relations": [] + }, + { + "offset": 1178, + "infons": { + "iao_name_1": "document title", + "iao_id_1": "IAO:0000305" + }, + "text": "Mostafa Elouennass: elouennassm@yahoo.fr", + "annotations": [], + "relations": [] + }, + { + "offset": 1218, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 1218, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 1218, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 1218, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 1218, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 1218, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 1218, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 1218, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 1218, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Abstract:", + "annotations": [], + "relations": [] + }, + { + "offset": 1227, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Retropharyngeal abscess is an uncommon location of tuberculosis (TB). In this report, we describe a multidrug-resistant tuberculous retropharyngeal abscess in a 21-year-old female patient who was treated for lymph node TB for one year. CT scan revealed a large retropharyngeal abscess that was aspirated intraorally under local anesthesia. The diagnosis of TB was retained by molecular and histological study. GeneXpert MTB/ RIF (Cepheid, Sunnyvale, CA, USA),performed on the pus, showed rifampicin resistance and a first- and second-line drug resistance test using Genotype MTBDRplus VER.2 and MTBDRsl VER.1 (Hain Lifescience GmbH, Nehren, Germany) showed TB highly resistant to rifampicin, isoniazid, and aminoglycosides. Treatment is primarily medical as it combines specific antituberculous antibiotics, and aspiration for drainage of the abscess. Our patient was put on long-term 2nd line anti-TB treatment. ", + "annotations": [], + "relations": [] + }, + { + "offset": 2142, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Key words: Multidrug-resistant, Retropharyngeal abscess, GeneXpert MTB/RIF, GenoType MTBDRplus, GenoType MTBDRsl", + "annotations": [], + "relations": [] + }, + { + "offset": 2254, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 2254, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Introduction", + "annotations": [], + "relations": [] + }, + { + "offset": 2266, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Tuberculous retropharyngeal abscess is not frequently reported in the literature and pre-extensive tuberculous retropharyngeal abscess is even less frequently reported [1]. Early recognition of this condition is essential to prevent serious complications. The diagnosis is difficult and relies on a combination of clinical, radiological and biological arguments. We report a case of multidrug resistant (MDR) tuberculous retropharyngeal abscess in a 21-year-old female patient treated for lymph node tuberculosis (TB) for one year and discuss the different diagnostic and therapeutic elements of this pathology, highlighting the contribution of molecular biology in the effective management of MDR extra-pulmonary TB.", + "annotations": [], + "relations": [] + }, + { + "offset": 2983, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Case report", + "annotations": [], + "relations": [] + }, + { + "offset": 2994, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "This is a 21-year-old female with a history of chronic headache for several years with Chiari decompression surgery performed in 2017 and latero-cervical adenopathy diagnosed as lymph node TB on bacteriological, molecular and histological arguments in 2019. GeneXpert MTB/RIF performed on the cervical lymph node came back positive for TB, without resistance to rifampicin. She was then treated at another institution according to the national protocol which includes quadritherapy with isoniazid, rifampicin, ethambutol and pyrazinamide for 2 months followed by bitherapy with isoniazid and rifampicin for 10 months (2RHZE/10RH). The evolution was then marked by the disappearance of the lymph nodes after one year of treatment. Six months after the end of treatment, the patient presented to the emergency room with severe headaches.", + "annotations": [], + "relations": [] + }, + { + "offset": 3830, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Otherwise, no cough, chest pain, fever, or loss of appetite was reported. The patient noted no signs of trismus or difficulty breathing. She reported no known allergies and had no history of smoking or drinking alcohol. On admission, physical examination revealed a body temperature of 36.6\u00b0C, a heart rate of 90 beats/min, and a blood pressure of 117/75 mmHg. Palpation of both sides of the neck revealed no tenderness and no lymph nodes were noted. Examination of the oral cavity revealed no pathologic findings, and no posterior pharyngeal wall projections were observed. The lungs were clear on auscultation and no neurologic deficits were noted on initial clinical examination. ", + "annotations": [], + "relations": [] + }, + { + "offset": 4514, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "The biological workup showed hemoglobin at 12.6 g/l; white blood cell count at 4.8 G/l; and C-reactive protein at 0.8 mg/l. In addition, serologies for human immunodeficiency virus (HIV), hepatitis B, and hepatitis C were negative.", + "annotations": [], + "relations": [] + }, + { + "offset": 4745, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "A cerebral CT scan performed as part of the etiological diagnosis fortuitously revealed a peripherally enhanced collection in the retropharyngeal area after injection of contrast medium measuring 19x21 mm, associated with an adjacent necrotic adenopathy measuring 10x06 mm. (figure 1).", + "annotations": [], + "relations": [] + }, + { + "offset": 5030, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 5030, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Figure 1: Sagittal CT scan revealed a peripherally enhanced collection in the retropharyngeal area after injection of contrast medium.", + "annotations": [], + "relations": [] + }, + { + "offset": 5164, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 5164, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "A cervical MRI was realized later and confirmed the presence of the retropharyngeal collection. (figure 2)", + "annotations": [], + "relations": [] + }, + { + "offset": 5270, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 5270, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 5270, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Figure 2: Sagittal MRI revealed the presence of the retropharyngeal collection", + "annotations": [], + "relations": [] + }, + { + "offset": 5348, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 5348, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "The abscess was drained under local anesthesia. 02 milliliters of pus were aspirated. The specimen was sent for bacteriological analysis for Mycobacterium tuberculosis complex (MTC) and banal germs as well as for pathological study. A molecular study using GeneXpert MTB/RIF (Cepheid, Sunnyvale, CA, USA) resulted in detection of MTC with detection of rifampicin resistance in less than 2 hours. In response to this rifampicin resistance, we performed other molecular tests, including GenoType MTBDRplus VER. 2 and GenoType MTBDRsl VER.1 (Hain Lifescience GmbH, Nehren, Germany) on the pus to confirm rifampicin resistance and also to investigate resistance to other anti-TB drugs. It should be noted that this technique is not validated on extrapulmonary specimens directly, although many studies have showed a good correlation with the usual resistance screening methods. The MTBDRplus VER. 2 showed resistance to both rifampicin and isoniazid, while MTBDRsl VER.1 showed resistance only to aminoglycosides. ", + "annotations": [], + "relations": [] + }, + { + "offset": 6360, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Direct examination after special Ziehl-Nielsen staining was positive and cultures on Lowenstein-Jensen\u00ae (LJ) solid medium and Mycobacteria Growth Indicator Tube (MGIT\u00ae) liquid medium were positive after 32 days and 12 days respectively, thus confirming the molecular diagnosis.", + "annotations": [], + "relations": [] + }, + { + "offset": 6637, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "A treatment was initiated on the basis of molecular data. The histopathological study confirmed the molecular diagnosis by showing epithelioid and gigantocellular granulomas with caseous necrosis, without histological evidence of malignancy.", + "annotations": [], + "relations": [] + }, + { + "offset": 6878, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Subsequently, the patient was put on a long-term protocol consisting of 6 months of bedaquiline, levofloxacin, linezolid, clofazimine, and cycloserine and 12 to 14 months of levofloxacin, linezolid, clofazimine, and cycloserine.", + "annotations": [], + "relations": [] + }, + { + "offset": 7106, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "After 1 month of treatment, the antibacillary drugs appear to be well tolerated, and the patient is still being monitored.", + "annotations": [], + "relations": [] + }, + { + "offset": 7228, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Discussion", + "annotations": [], + "relations": [] + }, + { + "offset": 7238, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 7238, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "TB remains a major public health problem in the world, mainly affecting developing countries [2]. Its incidence has also increased in developed countries, partly due to co-infection with HIV [2], the latter being more frequent in extra-pulmonary forms [3].", + "annotations": [], + "relations": [] + }, + { + "offset": 7494, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": " The 2019 WHO report estimates the number of new cases at 10 million and the number of deaths at 1.5 million [4]. TB usually affects the lungs (pulmonary) or sometimes other organs (extrapulmonary). Excluding laryngeal TB, TB of the head and neck is rare and constitutes 2-6% of extrapulmonary TB and 0.1-1% of all forms of TB [5]. Retropharyngeal localization is rare [1].", + "annotations": [], + "relations": [] + }, + { + "offset": 7868, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Infection of the retropharyngeal space and subsequent abscess formation are mainly due to acute bacterial infections of the head and neck region, especially in children, injury to the posterior pharyngeal wall, and forward spread of spinal TB [6].", + "annotations": [], + "relations": [] + }, + { + "offset": 8115, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Spread to the retropharyngeal space occurs via lymphatics involving persistent retropharyngeal nodes or by hematogenous spread from pulmonary or extrapulmonary sites [5]. In our patient, the retropharyngeal abscess was probably due to lymphatic dissemination from lymph node TB because radiological exploration revealed a centimetric adenopathy with a necrotic center adjacent to the retropharyngeal abscess and there was no evidence of any distant involvement that could support hematogenous, pulmonary, or other dissemination. Tuberculous retropharyngeal abscess in an immunocompetent adult is rare [6]. ", + "annotations": [], + "relations": [] + }, + { + "offset": 8721, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Drug-resistant TB represents a major challenge to national, regional and global TB control programs. Some MDR strains have developed additional resistance mechanisms to second-line antibacillaries, namely fluoroquinolones and aminoglycosides [7]. Each year, 500,000 cases of MDR-TB or rifampicin-resistant TB (RR-TB) and nearly 200,000 deaths are reported worldwide. ", + "annotations": [], + "relations": [] + }, + { + "offset": 9088, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "In 2019, the reported treatment success rate was 56% for MDR and extensively drug-resistant (XDR) TB cases and 39% for XDR-TB [4]. In Morocco, where TB remains endemic, the 2014 National TB Drug Resistance Survey found a low prevalence of MDR/XDR-TB (1% MDR-TB among new cases and 8.7% among previously treated cases) [4]. ", + "annotations": [], + "relations": [] + }, + { + "offset": 9412, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "In 2019, 235 cases of drug-resistant TB were treated in Morocco, and 1500 cumulative cases have been reported since 2012 [4]. MDR extrapulmonary localizations have rarely been described in the literature [3,7,8]. An Indian study published in 2014 reported 3 cases, including 2 lymph node localizations and 1 cervical cold abscess [3]. ", + "annotations": [], + "relations": [] + }, + { + "offset": 9747, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "MDR extrapulmonary forms are more frequent in young female subjects with a history of TB [8]. This is in accordance with our case. Another Moroccan study published in 2018 presented 7 cases of MDR extrapulmonary TB, of which 6 patients had a history of TB and 1 patient had a therapeutic failure [7]. 4 of these 7 patients had additional resistance to second-line anti-TB drugs [7].", + "annotations": [], + "relations": [] + }, + { + "offset": 10129, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": " The diagnosis of MDR in extrapulmonary forms should be made by tissue or biological fluid sampling, but this is sometimes difficult [3]. Tuberculous retropharyngeal abscess can present with variable manifestations, ranging from asymptomatic to subtle features such as odynophagia alone and neck pain, due to early stage and lesser severity of the disease, to life-threatening respiratory obstruction [6]. Our patient had only chronic headache that can be attributed to her Chiari malformation. In addition, the general condition was preserved. On throat examination, swelling due to tuberculous retropharyngeal abscess is usually located in the midline [6].", + "annotations": [], + "relations": [] + }, + { + "offset": 10787, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Radiologic imaging plays an important role in demonstrating the extent of the abscess and the involvement of surrounding structures [2,5]. CT has an accuracy of 89% and MRI is even more accurate, as it allows for better soft tissue analysis and allows for the assessment of vascular complications, including internal jugular vein thrombosis [2,5]. Both CT and MRI in our patient showed the retropharyngeal abscess. ", + "annotations": [], + "relations": [] + }, + { + "offset": 11202, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "TB was first diagnosed by direct microscopic examination and the discovery of acid-fast bacilli in the abscess aspirate using Ziehl-Neelsen stain, and then confirmed by culture, which remains the gold standard method [2]. ", + "annotations": [], + "relations": [] + }, + { + "offset": 11424, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Molecular biology has demonstrated its effectiveness even on pauci-bacillary specimens by allowing the identification and detection of resistance to anti-TB drugs through several studies. GeneXpert MTB/RIF is a rapid, automated, World Health Organization (WHO)-recommended nucleic acid amplification test that is widely used for the simultaneous detection of MTC and rifampicin resistance in pulmonary and extrapulmonary specimens. It has a sensitivity of more than 80% in cerebral spine fluid, pus and biopsy fragments [7]. In our study, GeneXpert MTB/RIF (Cepheid, Sunnyvale, CA, USA) allowed identification of MTC and detection of rifampicin resistance. ", + "annotations": [], + "relations": [] + }, + { + "offset": 12081, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "In addition to the GeneXpert MTB/RIF, there are the MTBDRplus and MTBDRsl genotype tests which allow the identification of MTC from pulmonary clinical specimens or cultivated samples. The MTBDRplus test is used to identify resistance to rifampicin and isoniazid [7]. The MTBDRsl test is designed to detect resistance to the second-line antibacillary drugs, namely aminoglycosides on the gyrA gene, fluoroquinolones on the rrs gene, and ethambutol on the embB gene [7]. The MTBDRplus test and the MTBDRsl test have a sensitivity greater than 80% for the detection of resistance to rifampicin, isoniazid, fluoroquinolones, and aminoglycosides [7]. The discovery of an additional aminoglycoside resistance makes the choice of treatment even more difficult. These tests have been shown to be effective in detecting resistance to anti-TB drugs from extrapulmonary samples, even though they are not validated on these samples. This has been reported in some studies [9, 10].", + "annotations": [], + "relations": [] + }, + { + "offset": 13049, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "In our case, the aspiration was positive by GeneXpert MTB/RIF with a detection of rifampicin resistance. The MTBDRplus test confirmed resistance to rifampicin and isoniazid and the MTBDRsl test showed additional resistance to aminoglycosides. Later on, mycobacterial culture on solid and liquid media both became positive after 32 days and 12 days respectively. Pre-ultraresistant TB (pre-XDR TB) is defined as MDR/RR-TB in addition to resistance to any fluoroquinolones (levofloxacin and moxifloxacin). ", + "annotations": [], + "relations": [] + }, + { + "offset": 13553, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Antibacillary drug resistance can be primary or secondary, primary drug resistance is defined as resistance in a patient who has never been treated for TB. Treatment with anti-TB drugs exerts selective pressure on the Mycobacterium tuberculosis population, resulting in a decrease in susceptible bacilli, an increase in drug-resistant mutants, and the emergence of drug resistance (acquired resistance). Given her previously treated lymph node TB, it seems safe to assume that our patient has acquired drug resistance. ", + "annotations": [], + "relations": [] + }, + { + "offset": 14072, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "In recent years, significant progress has been made in the rapid diagnosis of TB and drug resistance, as well as in treatment: new drugs, reduction of the age of indication for certain drugs as well as modification of the classification of drugs used to treat MDR-TB. ", + "annotations": [], + "relations": [] + }, + { + "offset": 14340, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "For MDR-TB of all forms, the WHO recommends a short regimen of 9 to 11 months, which includes a 4 to 6 month loading phase with high dose amikacin, moxifloxacin, etionamide, clofazimine, pyrazinamide, ethambutol and high dose of isoniazid. In the maintenance phase, patients are put on moxifloxacin, clofazimine, pyrazinamide and ethambutol [11]. Another recent WHO review in 2020 updated the recommendations eliminating short regimens containing injectables, replacing them with a short regimen containing bedaquiline [4]. Another WHO trial approved by the FDA in 2019 recommends the combination of bedaquiline, linezolid, and pretomanide for ultraresistant TB or XDR-TB for 9 months if the three molecules have not been taken previously [4,11].", + "annotations": [], + "relations": [] + }, + { + "offset": 15087, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "In Morocco, the short regimen has been adapted for some cases, but the old long regimen is still widely prescribed. This long regimen is based on 6 months of initial treatment with bedaquiline combined with levofloxacin, linezolid, clofazimine and cycloserine, followed by cessation of bedaquiline and maintenance of the remainder for 12 to 14 months if there is no resistance to group A and B molecules [4]. Our patient was put on a standard regimen by replacing aminoglycosides with bedaquiline. The simultaneous medical and surgical approach seems to be the best strategy for the management of tuberculous retropharyngeal abscess [3,5].", + "annotations": [], + "relations": [] + }, + { + "offset": 15726, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "As with any abscess, the mainstay of management of retro-pharyngeal tubercular abscess is drainage of the pus. Therapeutic aspiration only has been used successfully and can be repeated if necessary [2]. Anti-TB drug therapy and conservative neck stabilization should be the initial treatment if a retropharyngeal abscess is due to an extension from cervical spine TB, with a stable spine and without any neurological deficit or with minimal neurological signs [6]. If left untreated, internal jugular vein thrombosis, mediastinitis and airway obstruction are potential complications [1,2]. ", + "annotations": [], + "relations": [] + }, + { + "offset": 16317, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Clinical, bacteriological and radiological surveillance is recommended, as well as monitoring of treatment tolerance [7,11]. The prognosis of MDR pulmonary and extrapulmonary TB has been improved thanks in part to the prescription of new anti-TB drugs such as linezolid and bedaquiline. The success of the treatment is related to the number of effective molecules still available [7]. However, high mortality has been observed in patients with XDR-TB and HIV infection. This could be explained by its synergistic relationship with TB and the emergence of MDR and XDR strains [7]. The HIV serology of our patient is negative which could further improve the prognosis of her disease.", + "annotations": [], + "relations": [] + }, + { + "offset": 16998, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Conclusion", + "annotations": [], + "relations": [] + }, + { + "offset": 17008, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Retropharyngeal abscess is a recognized but rare presentation of TB. Unspecified symptoms and unusual location often lead to delayed diagnosis and treatment. Through this case, we highlight the importance of gene amplification tests in the effective and rapid management of this disease.", + "annotations": [], + "relations": [] + }, + { + "offset": 17295, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Competing interests:", + "annotations": [], + "relations": [] + }, + { + "offset": 17315, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "The authors declare no competing interest.", + "annotations": [], + "relations": [] + }, + { + "offset": 17357, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Author contributions:", + "annotations": [], + "relations": [] + }, + { + "offset": 17378, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "OS,TN and BE have been involved in drafting in the manuscript, BF, BY, CM, AM have revising the manuscript and ELM have given final approval of the version to be published.", + "annotations": [], + "relations": [] + }, + { + "offset": 17550, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "References", + "annotations": [], + "relations": [] + }, + { + "offset": 17560, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Supplemental file: ", + "annotations": [], + "relations": [] + }, + { + "offset": 17579, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "", + "annotations": [], + "relations": [] + }, + { + "offset": 17579, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Definitions:", + "annotations": [], + "relations": [] + }, + { + "offset": 17591, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "RR: is defined as isolated resistance to rifampicin.", + "annotations": [], + "relations": [] + }, + { + "offset": 17643, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "MDR: is defined as resistance to both rifampicin and isoniazid.", + "annotations": [], + "relations": [] + }, + { + "offset": 17706, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Pre-XDR: is defined as multidrug resistance (MDR/RR-TB) in addition to resistance to any fluoroquinolones (levofloxacin and moxifloxacin).", + "annotations": [], + "relations": [] + }, + { + "offset": 17844, + "infons": { + "iao_name_1": "supplementary material section", + "iao_id_1": "IAO:0000326" + }, + "text": "Ultraresistant TB or extensively drug-resistant TB (XDR-TB): is defined as multidrug resistance (MDR/RR-TB) in addition to resistance to a fluoroquinolone (levofloxacin or moxifloxacin) and at least one of bedaquiline or linezolid (or both).", + "annotations": [], + "relations": [] + } + ], + "relations": [] + } + ] +} \ No newline at end of file From 05d1a426a54a7e497ecc819c54f64f1fab91a84f Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Tue, 20 May 2025 23:51:22 +0100 Subject: [PATCH 022/125] Added type cast for word bioc text building --- autocorpus/bioc_supplementary.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/autocorpus/bioc_supplementary.py b/autocorpus/bioc_supplementary.py index 7cbd9793..f77f3abb 100644 --- a/autocorpus/bioc_supplementary.py +++ b/autocorpus/bioc_supplementary.py @@ -1,7 +1,7 @@ """This module provides functionality for converting text extracted from various file types into a BioC format.""" import datetime -from typing import TypeVar +from typing import TypeVar, cast import pandas as pd import regex @@ -221,6 +221,7 @@ def build_bioc( bioc.key = "autocorpus_supplementary.key" temp_doc = BioCDocument(id="1") if file_type == "word": + text = cast(list[tuple[str, bool]], text) temp_doc.passages = BioCTextConverter.__identify_word_passages(text) elif file_type == "pdf": temp_doc.passages = BioCTextConverter.__identify_passages(text) From 33090c13a11ccae69bbe3782de3f0e9663dd11e2 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Wed, 21 May 2025 09:12:34 +0100 Subject: [PATCH 023/125] Updated expected PDF output with latest bioc key changes --- .../PDF/Expected Output/tp-10-08-2123-coif.pdf_bioc.json | 3 +-- .../PDF/Expected Output/tp-10-08-2123-coif.pdf_tables.json | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/data/Supplementary/PDF/Expected Output/tp-10-08-2123-coif.pdf_bioc.json b/tests/data/Supplementary/PDF/Expected Output/tp-10-08-2123-coif.pdf_bioc.json index 8f5113b6..46bac838 100644 --- a/tests/data/Supplementary/PDF/Expected Output/tp-10-08-2123-coif.pdf_bioc.json +++ b/tests/data/Supplementary/PDF/Expected Output/tp-10-08-2123-coif.pdf_bioc.json @@ -1,8 +1,7 @@ { "source": "Auto-CORPus (supplementary)", - "date": "20250516", + "date": "20250521", "key": "autocorpus_supplementary.key", - "version": "1.0", "infons": {}, "documents": [ { diff --git a/tests/data/Supplementary/PDF/Expected Output/tp-10-08-2123-coif.pdf_tables.json b/tests/data/Supplementary/PDF/Expected Output/tp-10-08-2123-coif.pdf_tables.json index e62ba301..f09df8fe 100644 --- a/tests/data/Supplementary/PDF/Expected Output/tp-10-08-2123-coif.pdf_tables.json +++ b/tests/data/Supplementary/PDF/Expected Output/tp-10-08-2123-coif.pdf_tables.json @@ -1,8 +1,7 @@ { "source": "Auto-CORPus (supplementary)", - "date": "20250520", + "date": "20250521", "key": "autocorpus_supplementary.key", - "version": "1.0", "infons": {}, "documents": [ { From 3b8aca1685e7e2c13aa99689917072bcc4180b3c Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Wed, 21 May 2025 09:28:14 +0100 Subject: [PATCH 024/125] Type hints added --- autocorpus/bioc_supplementary.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/autocorpus/bioc_supplementary.py b/autocorpus/bioc_supplementary.py index f77f3abb..ed9dee95 100644 --- a/autocorpus/bioc_supplementary.py +++ b/autocorpus/bioc_supplementary.py @@ -224,30 +224,25 @@ def build_bioc( text = cast(list[tuple[str, bool]], text) temp_doc.passages = BioCTextConverter.__identify_word_passages(text) elif file_type == "pdf": + text = cast(str, text) temp_doc.passages = BioCTextConverter.__identify_passages(text) else: + text = cast(str, text) temp_doc.passages = BioCTextConverter.__identify_passages(text) temp_doc.inputfile = input_file bioc.documents.append(temp_doc) return bioc @staticmethod - def __identify_passages(text): + def __identify_passages(text: str | list[str]) -> list[BioCPassage]: offset = 0 passages = [] - if text is None: - return passages if isinstance(text, str): - text = text.split("\n\n") + split_text = text.split("\n\n") else: - text = [x.split("\n") for x in text] - temp = [] - for i in text: - for t in i: - temp.append(t) - text = temp - text = [x for x in text if x] - for line in text: + split_text = [t for x in text for t in x.split("\n")] + split_text = [x for x in split_text if x] + for line in split_text: iao_name = "supplementary material section" iao_id = "IAO:0000326" passage = BioCPassage() From 992fc9051f26e135c968e0e6d8c65fe5bd07f605 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Wed, 21 May 2025 09:51:03 +0100 Subject: [PATCH 025/125] Fix URL for codecov badge --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5faf8511..3607dc78 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0) [![pre-commit.ci status](https://results.pre-commit.ci/badge/github/omicsNLP/Auto-CORPus/main.svg)](https://results.pre-commit.ci/latest/github/omicsNLP/Auto-CORPus/main) [![PyPI version](https://badge.fury.io/py/autocorpus.svg)](https://badge.fury.io/py/autocorpus) -[![codecov](https://codecov.io/gh/ImperialCollegeLondon/Auto-CORPus/graph/badge.svg?token=6WWASKCH66)](https://codecov.io/gh/ImperialCollegeLondon/Auto-CORPus) +[![codecov](https://codecov.io/gh/omicsNLP/Auto-CORPus/graph/badge.svg?token=ZTKK4URM4A)](https://codecov.io/gh/omicsNLP/Auto-CORPus) *Requires Python 3.10+* From 6a0ae52e3efb7c17e479fa39013dc03c38fe6959 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Wed, 21 May 2025 10:08:47 +0100 Subject: [PATCH 026/125] pyproject.toml: Move `pandas-stubs` and `lxml-stubs` to `dev` group --- poetry.lock | 228 ++++++++++++++++++++++++++++++++++++++++++------- pyproject.toml | 4 +- 2 files changed, 198 insertions(+), 34 deletions(-) diff --git a/poetry.lock b/poetry.lock index fdc0a55f..c1d22599 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand. [[package]] name = "annotated-types" @@ -6,6 +6,7 @@ version = "0.7.0" description = "Reusable constraint types to use with typing.Annotated" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, @@ -17,6 +18,7 @@ version = "0.46.0" description = "The official Python library for the anthropic API" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "anthropic-0.46.0-py3-none-any.whl", hash = "sha256:1445ec9be78d2de7ea51b4d5acd3574e414aea97ef903d0ecbb57bec806aaa49"}, {file = "anthropic-0.46.0.tar.gz", hash = "sha256:eac3d43271d02321a57c3ca68aca84c3d58873e8e72d1433288adee2d46b745b"}, @@ -41,6 +43,7 @@ version = "4.9.0" description = "High level compatibility layer for multiple asynchronous event loop implementations" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "anyio-4.9.0-py3-none-any.whl", hash = "sha256:9f76d541cad6e36af7beb62e978876f3b41e3e04f2c1fbf0884604c0a9c4d93c"}, {file = "anyio-4.9.0.tar.gz", hash = "sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028"}, @@ -54,7 +57,7 @@ typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""} [package.extras] doc = ["Sphinx (>=8.2,<9.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx_rtd_theme"] -test = ["anyio[trio]", "blockbuster (>=1.5.23)", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21)"] +test = ["anyio[trio]", "blockbuster (>=1.5.23)", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1) ; python_version >= \"3.10\"", "uvloop (>=0.21) ; platform_python_implementation == \"CPython\" and platform_system != \"Windows\" and python_version < \"3.14\""] trio = ["trio (>=0.26.1)"] [[package]] @@ -63,18 +66,19 @@ version = "25.3.0" description = "Classes Without Boilerplate" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3"}, {file = "attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b"}, ] [package.extras] -benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +benchmark = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] +cov = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] +dev = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier"] -tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] +tests = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] +tests-mypy = ["mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\""] [[package]] name = "babel" @@ -82,13 +86,14 @@ version = "2.17.0" description = "Internationalization utilities" optional = false python-versions = ">=3.8" +groups = ["docs"] files = [ {file = "babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2"}, {file = "babel-2.17.0.tar.gz", hash = "sha256:0c54cffb19f690cdcc52a3b50bcbf71e07a808d1c80d549f2459b9d2cf0afb9d"}, ] [package.extras] -dev = ["backports.zoneinfo", "freezegun (>=1.0,<2.0)", "jinja2 (>=3.0)", "pytest (>=6.0)", "pytest-cov", "pytz", "setuptools", "tzdata"] +dev = ["backports.zoneinfo ; python_version < \"3.9\"", "freezegun (>=1.0,<2.0)", "jinja2 (>=3.0)", "pytest (>=6.0)", "pytest-cov", "pytz", "setuptools", "tzdata ; sys_platform == \"win32\""] [[package]] name = "backrefs" @@ -96,6 +101,7 @@ version = "5.8" description = "A wrapper around re and regex that adds additional back references." optional = false python-versions = ">=3.9" +groups = ["docs"] files = [ {file = "backrefs-5.8-py310-none-any.whl", hash = "sha256:c67f6638a34a5b8730812f5101376f9d41dc38c43f1fdc35cb54700f6ed4465d"}, {file = "backrefs-5.8-py311-none-any.whl", hash = "sha256:2e1c15e4af0e12e45c8701bd5da0902d326b2e200cafcd25e49d9f06d44bb61b"}, @@ -114,6 +120,7 @@ version = "4.13.4" description = "Screen-scraping library" optional = false python-versions = ">=3.7.0" +groups = ["main"] files = [ {file = "beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b"}, {file = "beautifulsoup4-4.13.4.tar.gz", hash = "sha256:dbb3c4e1ceae6aefebdaf2423247260cd062430a410e38c66f2baa50a8437195"}, @@ -136,6 +143,7 @@ version = "5.5.2" description = "Extensible memoizing collections and decorators" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a"}, {file = "cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4"}, @@ -147,6 +155,7 @@ version = "2025.1.31" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" +groups = ["main", "docs"] files = [ {file = "certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe"}, {file = "certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651"}, @@ -158,6 +167,7 @@ version = "3.4.0" description = "Validate configuration and produce human readable error messages." optional = false python-versions = ">=3.8" +groups = ["main", "dev"] files = [ {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, @@ -169,6 +179,7 @@ version = "3.4.1" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." optional = false python-versions = ">=3.7" +groups = ["main", "docs"] files = [ {file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"}, {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"}, @@ -270,6 +281,7 @@ version = "8.1.8" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.7" +groups = ["main", "docs"] files = [ {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"}, {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, @@ -284,10 +296,12 @@ version = "0.4.6" description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["main", "dev", "docs"] files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +markers = {main = "platform_system == \"Windows\"", dev = "sys_platform == \"win32\""} [[package]] name = "coverage" @@ -295,6 +309,7 @@ version = "7.7.1" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.9" +groups = ["dev"] files = [ {file = "coverage-7.7.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:553ba93f8e3c70e1b0031e4dfea36aba4e2b51fe5770db35e99af8dc5c5a9dfe"}, {file = "coverage-7.7.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:44683f2556a56c9a6e673b583763096b8efbd2df022b02995609cf8e64fc8ae0"}, @@ -365,7 +380,7 @@ files = [ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} [package.extras] -toml = ["tomli"] +toml = ["tomli ; python_full_version <= \"3.11.0a6\""] [[package]] name = "distlib" @@ -373,6 +388,7 @@ version = "0.3.9" description = "Distribution utilities" optional = false python-versions = "*" +groups = ["main", "dev"] files = [ {file = "distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87"}, {file = "distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403"}, @@ -384,6 +400,7 @@ version = "1.9.0" description = "Distro - an OS platform information API" optional = false python-versions = ">=3.6" +groups = ["main"] files = [ {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"}, {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"}, @@ -395,6 +412,8 @@ version = "1.2.2" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" +groups = ["main", "dev"] +markers = "python_version == \"3.10\"" files = [ {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, @@ -409,6 +428,7 @@ version = "3.18.0" description = "A platform independent file lock." optional = false python-versions = ">=3.9" +groups = ["main", "dev"] files = [ {file = "filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de"}, {file = "filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2"}, @@ -417,7 +437,7 @@ files = [ [package.extras] docs = ["furo (>=2024.8.6)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"] testing = ["covdefaults (>=2.3)", "coverage (>=7.6.10)", "diff-cover (>=9.2.1)", "pytest (>=8.3.4)", "pytest-asyncio (>=0.25.2)", "pytest-cov (>=6)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.28.1)"] -typing = ["typing-extensions (>=4.12.2)"] +typing = ["typing-extensions (>=4.12.2) ; python_version < \"3.11\""] [[package]] name = "filetype" @@ -425,6 +445,7 @@ version = "1.2.0" description = "Infer file type and MIME type of any file/buffer. No external dependencies." optional = false python-versions = "*" +groups = ["main"] files = [ {file = "filetype-1.2.0-py2.py3-none-any.whl", hash = "sha256:7ce71b6880181241cf7ac8697a2f1eb6a8bd9b429f7ad6d27b8db9ba5f1c2d25"}, {file = "filetype-1.2.0.tar.gz", hash = "sha256:66b56cd6474bf41d8c54660347d37afcc3f7d1970648de365c102ef77548aadb"}, @@ -436,6 +457,7 @@ version = "2025.3.2" description = "File-system specification" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "fsspec-2025.3.2-py3-none-any.whl", hash = "sha256:2daf8dc3d1dfa65b6aa37748d112773a7a08416f6c70d96b264c96476ecaf711"}, {file = "fsspec-2025.3.2.tar.gz", hash = "sha256:e52c77ef398680bbd6a98c0e628fbc469491282981209907bbc8aea76a04fdc6"}, @@ -475,6 +497,7 @@ version = "6.3.1" description = "Fixes mojibake and other problems with Unicode, after the fact" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "ftfy-6.3.1-py3-none-any.whl", hash = "sha256:7c70eb532015cd2f9adb53f101fb6c7945988d023a085d127d1573dc49dd0083"}, {file = "ftfy-6.3.1.tar.gz", hash = "sha256:9b3c3d90f84fb267fe64d375a07b7f8912d817cf86009ae134aa03e1819506ec"}, @@ -489,6 +512,7 @@ version = "0.18.0" description = "Fuzzy string matching in python" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "fuzzywuzzy-0.18.0-py2.py3-none-any.whl", hash = "sha256:928244b28db720d1e0ee7587acf660ea49d7e4c632569cad4f1cd7e68a5f0993"}, {file = "fuzzywuzzy-0.18.0.tar.gz", hash = "sha256:45016e92264780e58972dca1b3d939ac864b78437422beecebb3095f8efd00e8"}, @@ -506,6 +530,7 @@ version = "2.1.0" description = "Copy your docs directly to the gh-pages branch." optional = false python-versions = "*" +groups = ["docs"] files = [ {file = "ghp-import-2.1.0.tar.gz", hash = "sha256:9c535c4c61193c2df8871222567d7fd7e5014d835f97dc7b7439069e2413d343"}, {file = "ghp_import-2.1.0-py3-none-any.whl", hash = "sha256:8337dd7b50877f163d4c0289bc1f1c7f127550241988d568c1db512c4324a619"}, @@ -523,6 +548,7 @@ version = "2.39.0" description = "Google Authentication Library" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "google_auth-2.39.0-py2.py3-none-any.whl", hash = "sha256:0150b6711e97fb9f52fe599f55648950cc4540015565d8fbb31be2ad6e1548a2"}, {file = "google_auth-2.39.0.tar.gz", hash = "sha256:73222d43cdc35a3aeacbfdcaf73142a97839f10de930550d89ebfe1d0a00cde7"}, @@ -536,11 +562,11 @@ rsa = ">=3.1.4,<5" [package.extras] aiohttp = ["aiohttp (>=3.6.2,<4.0.0)", "requests (>=2.20.0,<3.0.0)"] enterprise-cert = ["cryptography", "pyopenssl"] -pyjwt = ["cryptography (<39.0.0)", "cryptography (>=38.0.3)", "pyjwt (>=2.0)"] -pyopenssl = ["cryptography (<39.0.0)", "cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"] +pyjwt = ["cryptography (<39.0.0) ; python_version < \"3.8\"", "cryptography (>=38.0.3)", "pyjwt (>=2.0)"] +pyopenssl = ["cryptography (<39.0.0) ; python_version < \"3.8\"", "cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"] reauth = ["pyu2f (>=0.1.5)"] requests = ["requests (>=2.20.0,<3.0.0)"] -testing = ["aiohttp (<3.10.0)", "aiohttp (>=3.6.2,<4.0.0)", "aioresponses", "cryptography (<39.0.0)", "cryptography (>=38.0.3)", "flask", "freezegun", "grpcio", "mock", "oauth2client", "packaging", "pyjwt (>=2.0)", "pyopenssl (<24.3.0)", "pyopenssl (>=20.0.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-localserver", "pyu2f (>=0.1.5)", "requests (>=2.20.0,<3.0.0)", "responses", "urllib3"] +testing = ["aiohttp (<3.10.0)", "aiohttp (>=3.6.2,<4.0.0)", "aioresponses", "cryptography (<39.0.0) ; python_version < \"3.8\"", "cryptography (>=38.0.3)", "flask", "freezegun", "grpcio", "mock", "oauth2client", "packaging", "pyjwt (>=2.0)", "pyopenssl (<24.3.0)", "pyopenssl (>=20.0.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-localserver", "pyu2f (>=0.1.5)", "requests (>=2.20.0,<3.0.0)", "responses", "urllib3"] urllib3 = ["packaging", "urllib3"] [[package]] @@ -549,6 +575,7 @@ version = "1.12.1" description = "GenAI Python SDK" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "google_genai-1.12.1-py3-none-any.whl", hash = "sha256:7cbc1bc029712946ce41bcf80c0eaa89eb8c09c308efbbfe30fd491f402c258a"}, {file = "google_genai-1.12.1.tar.gz", hash = "sha256:5c7eda422360643ce602a3f6b23152470ec1039310ef40080cbe4e71237f6391"}, @@ -569,6 +596,7 @@ version = "1.6.2" description = "Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API." optional = false python-versions = ">=3.9" +groups = ["docs"] files = [ {file = "griffe-1.6.2-py3-none-any.whl", hash = "sha256:6399f7e663150e4278a312a8e8a14d2f3d7bd86e2ef2f8056a1058e38579c2ee"}, {file = "griffe-1.6.2.tar.gz", hash = "sha256:3a46fa7bd83280909b63c12b9a975732a927dd97809efe5b7972290b606c5d91"}, @@ -583,6 +611,7 @@ version = "0.16.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86"}, {file = "h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1"}, @@ -594,6 +623,7 @@ version = "1.0.9" description = "A minimal low-level HTTP client." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55"}, {file = "httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8"}, @@ -615,6 +645,7 @@ version = "0.28.1" description = "The next generation HTTP client." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad"}, {file = "httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc"}, @@ -627,7 +658,7 @@ httpcore = "==1.*" idna = "*" [package.extras] -brotli = ["brotli", "brotlicffi"] +brotli = ["brotli ; platform_python_implementation == \"CPython\"", "brotlicffi ; platform_python_implementation != \"CPython\""] cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] @@ -639,6 +670,7 @@ version = "0.30.2" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" optional = false python-versions = ">=3.8.0" +groups = ["main"] files = [ {file = "huggingface_hub-0.30.2-py3-none-any.whl", hash = "sha256:68ff05969927058cfa41df4f2155d4bb48f5f54f719dd0390103eefa9b191e28"}, {file = "huggingface_hub-0.30.2.tar.gz", hash = "sha256:9a7897c5b6fd9dad3168a794a8998d6378210f5b9688d0dfc180b1a228dc2466"}, @@ -674,6 +706,7 @@ version = "2.6.9" description = "File identification library for Python" optional = false python-versions = ">=3.9" +groups = ["main", "dev"] files = [ {file = "identify-2.6.9-py2.py3-none-any.whl", hash = "sha256:c98b4322da415a8e5a70ff6e51fbc2d2932c015532d77e9f8537b4ba7813b150"}, {file = "identify-2.6.9.tar.gz", hash = "sha256:d40dfe3142a1421d8518e3d3985ef5ac42890683e32306ad614a29490abeb6bf"}, @@ -688,6 +721,7 @@ version = "3.10" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.6" +groups = ["main", "docs"] files = [ {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, @@ -702,6 +736,7 @@ version = "2.1.0" description = "brain-dead simple config-ini parsing" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"}, {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"}, @@ -713,6 +748,7 @@ version = "3.1.6" description = "A very fast and expressive template engine." optional = false python-versions = ">=3.7" +groups = ["main", "docs"] files = [ {file = "jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"}, {file = "jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d"}, @@ -730,6 +766,7 @@ version = "0.9.0" description = "Fast iterable JSON parser." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "jiter-0.9.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:816ec9b60fdfd1fec87da1d7ed46c66c44ffec37ab2ef7de5b147b2fce3fd5ad"}, {file = "jiter-0.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9b1d3086f8a3ee0194ecf2008cf81286a5c3e540d977fa038ff23576c023c0ea"}, @@ -815,6 +852,7 @@ version = "1.4.2" description = "Lightweight pipelining with Python functions" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"}, {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, @@ -826,6 +864,7 @@ version = "4.23.0" description = "An implementation of JSON Schema validation for Python" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "jsonschema-4.23.0-py3-none-any.whl", hash = "sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566"}, {file = "jsonschema-4.23.0.tar.gz", hash = "sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4"}, @@ -847,6 +886,7 @@ version = "2024.10.1" description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" optional = false python-versions = ">=3.9" +groups = ["dev"] files = [ {file = "jsonschema_specifications-2024.10.1-py3-none-any.whl", hash = "sha256:a09a0680616357d9a0ecf05c12ad234479f549239d0f5b55f3deea67475da9bf"}, {file = "jsonschema_specifications-2024.10.1.tar.gz", hash = "sha256:0f38b83639958ce1152d02a7f062902c41c8fd20d558b0c34344292d417ae272"}, @@ -861,6 +901,7 @@ version = "0.27.1" description = "Python extension for computing string edit distances and similarities." optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "levenshtein-0.27.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:13d6f617cb6fe63714c4794861cfaacd398db58a292f930edb7f12aad931dace"}, {file = "levenshtein-0.27.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ca9d54d41075e130c390e61360bec80f116b62d6ae973aec502e77e921e95334"}, @@ -967,6 +1008,7 @@ version = "5.4.0" description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." optional = false python-versions = ">=3.6" +groups = ["main"] files = [ {file = "lxml-5.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e7bc6df34d42322c5289e37e9971d6ed114e3776b45fa879f734bded9d1fea9c"}, {file = "lxml-5.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6854f8bd8a1536f8a1d9a3655e6354faa6406621cf857dc27b681b69860645c7"}, @@ -1115,6 +1157,7 @@ version = "0.5.1" description = "Type annotations for the lxml package" optional = false python-versions = "*" +groups = ["dev"] files = [ {file = "lxml-stubs-0.5.1.tar.gz", hash = "sha256:e0ec2aa1ce92d91278b719091ce4515c12adc1d564359dfaf81efa7d4feab79d"}, {file = "lxml_stubs-0.5.1-py3-none-any.whl", hash = "sha256:1f689e5dbc4b9247cb09ae820c7d34daeb1fdbd1db06123814b856dae7787272"}, @@ -1129,6 +1172,7 @@ version = "3.7" description = "Python implementation of John Gruber's Markdown." optional = false python-versions = ">=3.8" +groups = ["docs"] files = [ {file = "Markdown-3.7-py3-none-any.whl", hash = "sha256:7eb6df5690b81a1d7942992c97fad2938e956e79df20cbc6186e9c3a77b1c803"}, {file = "markdown-3.7.tar.gz", hash = "sha256:2ae2471477cfd02dbbf038d5d9bc226d40def84b4fe2986e49b59b6b472bbed2"}, @@ -1144,15 +1188,16 @@ version = "2.5.3" description = "A fast and complete Python implementation of Markdown" optional = false python-versions = "<4,>=3.9" +groups = ["main"] files = [ {file = "markdown2-2.5.3-py3-none-any.whl", hash = "sha256:a8ebb7e84b8519c37bf7382b3db600f1798a22c245bfd754a1f87ca8d7ea63b3"}, {file = "markdown2-2.5.3.tar.gz", hash = "sha256:4d502953a4633408b0ab3ec503c5d6984d1b14307e32b325ec7d16ea57524895"}, ] [package.extras] -all = ["latex2mathml", "pygments (>=2.7.3)", "wavedrom"] +all = ["latex2mathml ; python_version >= \"3.8.1\"", "pygments (>=2.7.3)", "wavedrom"] code-syntax-highlighting = ["pygments (>=2.7.3)"] -latex = ["latex2mathml"] +latex = ["latex2mathml ; python_version >= \"3.8.1\""] wavedrom = ["wavedrom"] [[package]] @@ -1161,6 +1206,7 @@ version = "0.13.1" description = "Convert HTML to markdown." optional = false python-versions = "*" +groups = ["main"] files = [ {file = "markdownify-0.13.1-py3-none-any.whl", hash = "sha256:1d181d43d20902bcc69d7be85b5316ed174d0dda72ff56e14ae4c95a4a407d22"}, {file = "markdownify-0.13.1.tar.gz", hash = "sha256:ab257f9e6bd4075118828a28c9d02f8a4bfeb7421f558834aa79b2dfeb32a098"}, @@ -1176,6 +1222,7 @@ version = "1.6.2" description = "Convert documents to markdown with high speed and accuracy." optional = false python-versions = "<4.0,>=3.10" +groups = ["main"] files = [ {file = "marker_pdf-1.6.2-py3-none-any.whl", hash = "sha256:48fbc6353e6fc3510d30d5682a8974fc9d6eb58a13e7c3f525ed6973b721f108"}, {file = "marker_pdf-1.6.2.tar.gz", hash = "sha256:38725082c89c0aec5e28e4f1df8f3974ccc0742c2265f0342c20e52fbde90bf0"}, @@ -1213,6 +1260,7 @@ version = "3.0.2" description = "Safely add untrusted strings to HTML/XML markup." optional = false python-versions = ">=3.9" +groups = ["main", "docs"] files = [ {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8"}, {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158"}, @@ -1283,6 +1331,7 @@ version = "1.3.4" description = "A deep merge function for 🐍." optional = false python-versions = ">=3.6" +groups = ["docs"] files = [ {file = "mergedeep-1.3.4-py3-none-any.whl", hash = "sha256:70775750742b25c0d8f36c55aed03d24c3384d17c951b3175d898bd778ef0307"}, {file = "mergedeep-1.3.4.tar.gz", hash = "sha256:0096d52e9dad9939c3d975a774666af186eda617e6ca84df4c94dec30004f2a8"}, @@ -1294,6 +1343,7 @@ version = "1.6.1" description = "Project documentation with Markdown." optional = false python-versions = ">=3.8" +groups = ["docs"] files = [ {file = "mkdocs-1.6.1-py3-none-any.whl", hash = "sha256:db91759624d1647f3f34aa0c3f327dd2601beae39a366d6e064c03468d35c20e"}, {file = "mkdocs-1.6.1.tar.gz", hash = "sha256:7b432f01d928c084353ab39c57282f29f92136665bdd6abf7c1ec8d822ef86f2"}, @@ -1316,7 +1366,7 @@ watchdog = ">=2.0" [package.extras] i18n = ["babel (>=2.9.0)"] -min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-import (==1.0)", "importlib-metadata (==4.4)", "jinja2 (==2.11.1)", "markdown (==3.3.6)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "mkdocs-get-deps (==0.2.0)", "packaging (==20.5)", "pathspec (==0.11.1)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "watchdog (==2.0)"] +min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4) ; platform_system == \"Windows\"", "ghp-import (==1.0)", "importlib-metadata (==4.4) ; python_version < \"3.10\"", "jinja2 (==2.11.1)", "markdown (==3.3.6)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "mkdocs-get-deps (==0.2.0)", "packaging (==20.5)", "pathspec (==0.11.1)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "watchdog (==2.0)"] [[package]] name = "mkdocs-autorefs" @@ -1324,6 +1374,7 @@ version = "1.4.1" description = "Automatically link across pages in MkDocs." optional = false python-versions = ">=3.9" +groups = ["docs"] files = [ {file = "mkdocs_autorefs-1.4.1-py3-none-any.whl", hash = "sha256:9793c5ac06a6ebbe52ec0f8439256e66187badf4b5334b5fde0b128ec134df4f"}, {file = "mkdocs_autorefs-1.4.1.tar.gz", hash = "sha256:4b5b6235a4becb2b10425c2fa191737e415b37aa3418919db33e5d774c9db079"}, @@ -1340,6 +1391,7 @@ version = "0.5.0" description = "MkDocs plugin to programmatically generate documentation pages during the build" optional = false python-versions = ">=3.7" +groups = ["docs"] files = [ {file = "mkdocs_gen_files-0.5.0-py3-none-any.whl", hash = "sha256:7ac060096f3f40bd19039e7277dd3050be9a453c8ac578645844d4d91d7978ea"}, {file = "mkdocs_gen_files-0.5.0.tar.gz", hash = "sha256:4c7cf256b5d67062a788f6b1d035e157fc1a9498c2399be9af5257d4ff4d19bc"}, @@ -1354,6 +1406,7 @@ version = "0.2.0" description = "MkDocs extension that lists all dependencies according to a mkdocs.yml file" optional = false python-versions = ">=3.8" +groups = ["docs"] files = [ {file = "mkdocs_get_deps-0.2.0-py3-none-any.whl", hash = "sha256:2bf11d0b133e77a0dd036abeeb06dec8775e46efa526dc70667d8863eefc6134"}, {file = "mkdocs_get_deps-0.2.0.tar.gz", hash = "sha256:162b3d129c7fad9b19abfdcb9c1458a651628e4b1dea628ac68790fb3061c60c"}, @@ -1370,6 +1423,7 @@ version = "0.6.2" description = "MkDocs plugin to specify the navigation in Markdown instead of YAML" optional = false python-versions = ">=3.9" +groups = ["docs"] files = [ {file = "mkdocs_literate_nav-0.6.2-py3-none-any.whl", hash = "sha256:0a6489a26ec7598477b56fa112056a5e3a6c15729f0214bea8a4dbc55bd5f630"}, {file = "mkdocs_literate_nav-0.6.2.tar.gz", hash = "sha256:760e1708aa4be86af81a2b56e82c739d5a8388a0eab1517ecfd8e5aa40810a75"}, @@ -1384,6 +1438,7 @@ version = "9.6.14" description = "Documentation that simply works" optional = false python-versions = ">=3.8" +groups = ["docs"] files = [ {file = "mkdocs_material-9.6.14-py3-none-any.whl", hash = "sha256:3b9cee6d3688551bf7a8e8f41afda97a3c39a12f0325436d76c86706114b721b"}, {file = "mkdocs_material-9.6.14.tar.gz", hash = "sha256:39d795e90dce6b531387c255bd07e866e027828b7346d3eba5ac3de265053754"}, @@ -1413,6 +1468,7 @@ version = "1.3.1" description = "Extension pack for Python Markdown and MkDocs Material." optional = false python-versions = ">=3.8" +groups = ["docs"] files = [ {file = "mkdocs_material_extensions-1.3.1-py3-none-any.whl", hash = "sha256:adff8b62700b25cb77b53358dad940f3ef973dd6db797907c49e3c2ef3ab4e31"}, {file = "mkdocs_material_extensions-1.3.1.tar.gz", hash = "sha256:10c9511cea88f568257f960358a467d12b970e1f7b2c0e5fb2bb48cab1928443"}, @@ -1424,6 +1480,7 @@ version = "0.3.10" description = "MkDocs plugin to allow clickable sections that lead to an index page" optional = false python-versions = ">=3.9" +groups = ["docs"] files = [ {file = "mkdocs_section_index-0.3.10-py3-none-any.whl", hash = "sha256:bc27c0d0dc497c0ebaee1fc72839362aed77be7318b5ec0c30628f65918e4776"}, {file = "mkdocs_section_index-0.3.10.tar.gz", hash = "sha256:a82afbda633c82c5568f0e3b008176b9b365bf4bd8b6f919d6eff09ee146b9f8"}, @@ -1438,6 +1495,7 @@ version = "0.29.1" description = "Automatic documentation from sources, for MkDocs." optional = false python-versions = ">=3.9" +groups = ["docs"] files = [ {file = "mkdocstrings-0.29.1-py3-none-any.whl", hash = "sha256:37a9736134934eea89cbd055a513d40a020d87dfcae9e3052c2a6b8cd4af09b6"}, {file = "mkdocstrings-0.29.1.tar.gz", hash = "sha256:8722f8f8c5cd75da56671e0a0c1bbed1df9946c0cef74794d6141b34011abd42"}, @@ -1462,6 +1520,7 @@ version = "1.16.10" description = "A Python handler for mkdocstrings." optional = false python-versions = ">=3.9" +groups = ["docs"] files = [ {file = "mkdocstrings_python-1.16.10-py3-none-any.whl", hash = "sha256:63bb9f01f8848a644bdb6289e86dc38ceddeaa63ecc2e291e3b2ca52702a6643"}, {file = "mkdocstrings_python-1.16.10.tar.gz", hash = "sha256:f9eedfd98effb612ab4d0ed6dd2b73aff6eba5215e0a65cea6d877717f75502e"}, @@ -1479,6 +1538,7 @@ version = "1.3.0" description = "Python library for arbitrary-precision floating-point arithmetic" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, @@ -1487,7 +1547,7 @@ files = [ [package.extras] develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"] docs = ["sphinx"] -gmpy = ["gmpy2 (>=2.1.0a4)"] +gmpy = ["gmpy2 (>=2.1.0a4) ; platform_python_implementation != \"PyPy\""] tests = ["pytest (>=4.6)"] [[package]] @@ -1496,6 +1556,7 @@ version = "1.15.0" description = "Optional static typing for Python" optional = false python-versions = ">=3.9" +groups = ["dev"] files = [ {file = "mypy-1.15.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:979e4e1a006511dacf628e36fadfecbcc0160a8af6ca7dad2f5025529e082c13"}, {file = "mypy-1.15.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c4bb0e1bd29f7d34efcccd71cf733580191e9a264a2202b0239da95984c5b559"}, @@ -1549,6 +1610,7 @@ version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." optional = false python-versions = ">=3.5" +groups = ["dev"] files = [ {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, @@ -1560,6 +1622,7 @@ version = "3.4.2" description = "Python package for creating and manipulating graphs and networks" optional = false python-versions = ">=3.10" +groups = ["main"] files = [ {file = "networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f"}, {file = "networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1"}, @@ -1579,6 +1642,7 @@ version = "3.9.1" description = "Natural Language Toolkit" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "nltk-3.9.1-py3-none-any.whl", hash = "sha256:4fa26829c5b00715afe3061398a8989dc643b92ce7dd93fb4585a70930d168a1"}, {file = "nltk-3.9.1.tar.gz", hash = "sha256:87d127bd3de4bd89a4f81265e5fa59cb1b199b27440175370f7417d2bc7ae868"}, @@ -1604,6 +1668,7 @@ version = "1.9.1" description = "Node.js virtual environment builder" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["main", "dev"] files = [ {file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"}, {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"}, @@ -1615,6 +1680,7 @@ version = "2.2.5" description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.10" +groups = ["main", "dev"] files = [ {file = "numpy-2.2.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1f4a922da1729f4c40932b2af4fe84909c7a6e167e6e99f71838ce3a29f3fe26"}, {file = "numpy-2.2.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b6f91524d31b34f4a5fee24f5bc16dcd1491b668798b6d85585d836c1e633a6a"}, @@ -1679,6 +1745,8 @@ version = "12.6.4.1" description = "CUBLAS native runtime libraries" optional = false python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cublas_cu12-12.6.4.1-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:08ed2686e9875d01b58e3cb379c6896df8e76c75e0d4a7f7dace3d7b6d9ef8eb"}, {file = "nvidia_cublas_cu12-12.6.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:235f728d6e2a409eddf1df58d5b0921cf80cfa9e72b9f2775ccb7b4a87984668"}, @@ -1691,6 +1759,8 @@ version = "12.6.80" description = "CUDA profiling tools runtime libs." optional = false python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:166ee35a3ff1587f2490364f90eeeb8da06cd867bd5b701bf7f9a02b78bc63fc"}, {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_aarch64.whl", hash = "sha256:358b4a1d35370353d52e12f0a7d1769fc01ff74a191689d3870b2123156184c4"}, @@ -1705,6 +1775,8 @@ version = "12.6.77" description = "NVRTC native runtime libraries" optional = false python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5847f1d6e5b757f1d2b3991a01082a44aad6f10ab3c5c0213fa3e25bddc25a13"}, {file = "nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:35b0cc6ee3a9636d5409133e79273ce1f3fd087abb0532d2d2e8fff1fe9efc53"}, @@ -1717,6 +1789,8 @@ version = "12.6.77" description = "CUDA Runtime native Libraries" optional = false python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6116fad3e049e04791c0256a9778c16237837c08b27ed8c8401e2e45de8d60cd"}, {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:d461264ecb429c84c8879a7153499ddc7b19b5f8d84c204307491989a365588e"}, @@ -1731,6 +1805,8 @@ version = "9.5.1.17" description = "cuDNN runtime libraries" optional = false python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:9fd4584468533c61873e5fda8ca41bac3a38bcb2d12350830c69b0a96a7e4def"}, {file = "nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:30ac3869f6db17d170e0e556dd6cc5eee02647abc31ca856634d5a40f82c15b2"}, @@ -1746,6 +1822,8 @@ version = "11.3.0.4" description = "CUFFT native runtime libraries" optional = false python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d16079550df460376455cba121db6564089176d9bac9e4f360493ca4741b22a6"}, {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8510990de9f96c803a051822618d42bf6cb8f069ff3f48d93a8486efdacb48fb"}, @@ -1763,6 +1841,8 @@ version = "1.11.1.6" description = "cuFile GPUDirect libraries" optional = false python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cc23469d1c7e52ce6c1d55253273d32c565dd22068647f3aa59b3c6b005bf159"}, {file = "nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:8f57a0051dcf2543f6dc2b98a98cb2719c37d3cee1baba8965d57f3bbc90d4db"}, @@ -1774,6 +1854,8 @@ version = "10.3.7.77" description = "CURAND native runtime libraries" optional = false python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:6e82df077060ea28e37f48a3ec442a8f47690c7499bff392a5938614b56c98d8"}, {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a42cd1344297f70b9e39a1e4f467a4e1c10f1da54ff7a85c12197f6c652c8bdf"}, @@ -1788,6 +1870,8 @@ version = "11.7.1.2" description = "CUDA solver native runtime libraries" optional = false python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0ce237ef60acde1efc457335a2ddadfd7610b892d94efee7b776c64bb1cac9e0"}, {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e9e49843a7707e42022babb9bcfa33c29857a93b88020c4e4434656a655b698c"}, @@ -1807,6 +1891,8 @@ version = "12.5.4.2" description = "CUSPARSE native runtime libraries" optional = false python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d25b62fb18751758fe3c93a4a08eff08effedfe4edf1c6bb5afd0890fe88f887"}, {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7aa32fa5470cf754f72d1116c7cbc300b4e638d3ae5304cfa4a638a5b87161b1"}, @@ -1824,6 +1910,8 @@ version = "0.6.3" description = "NVIDIA cuSPARSELt" optional = false python-versions = "*" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cusparselt_cu12-0.6.3-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8371549623ba601a06322af2133c4a44350575f5a3108fb75f3ef20b822ad5f1"}, {file = "nvidia_cusparselt_cu12-0.6.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:e5c8a26c36445dd2e6812f1177978a24e2d37cacce7e090f297a688d1ec44f46"}, @@ -1836,6 +1924,8 @@ version = "2.26.2" description = "NVIDIA Collective Communication Library (NCCL) Runtime" optional = false python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5c196e95e832ad30fbbb50381eb3cbd1fadd5675e587a548563993609af19522"}, {file = "nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:694cf3879a206553cc9d7dbda76b13efaf610fdb70a50cba303de1b0d1530ac6"}, @@ -1847,6 +1937,8 @@ version = "12.6.85" description = "Nvidia JIT LTO Library" optional = false python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:eedc36df9e88b682efe4309aa16b5b4e78c2407eac59e8c10a6a47535164369a"}, {file = "nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cf4eaa7d4b6b543ffd69d6abfb11efdeb2db48270d94dfd3a452c24150829e41"}, @@ -1859,6 +1951,8 @@ version = "12.6.77" description = "NVIDIA Tools Extension" optional = false python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f44f8d86bb7d5629988d61c8d3ae61dddb2015dee142740536bc7481b022fe4b"}, {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:adcaabb9d436c9761fca2b13959a2d237c5f9fd406c8e4b723c695409ff88059"}, @@ -1873,6 +1967,7 @@ version = "1.76.0" description = "The official Python library for the openai API" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "openai-1.76.0-py3-none-any.whl", hash = "sha256:a712b50e78cf78e6d7b2a8f69c4978243517c2c36999756673e07a14ce37dc0a"}, {file = "openai-1.76.0.tar.gz", hash = "sha256:fd2bfaf4608f48102d6b74f9e11c5ecaa058b60dad9c36e409c12477dfd91fb2"}, @@ -1899,6 +1994,7 @@ version = "4.11.0.86" description = "Wrapper package for OpenCV python bindings." optional = false python-versions = ">=3.6" +groups = ["main"] files = [ {file = "opencv-python-headless-4.11.0.86.tar.gz", hash = "sha256:996eb282ca4b43ec6a3972414de0e2331f5d9cda2b41091a49739c19fb843798"}, {file = "opencv_python_headless-4.11.0.86-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:48128188ade4a7e517237c8e1e11a9cdf5c282761473383e77beb875bb1e61ca"}, @@ -1911,9 +2007,9 @@ files = [ [package.dependencies] numpy = [ - {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""}, - {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""}, - {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, + {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\""}, + {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\""}, + {version = ">=1.23.5", markers = "python_version >= \"3.11\""}, {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, ] @@ -1923,6 +2019,7 @@ version = "24.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" +groups = ["main", "dev", "docs"] files = [ {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, @@ -1934,6 +2031,7 @@ version = "0.5.7" description = "Divides large result sets into pages for easier browsing" optional = false python-versions = "*" +groups = ["docs"] files = [ {file = "paginate-0.5.7-py2.py3-none-any.whl", hash = "sha256:b885e2af73abcf01d9559fd5216b57ef722f8c42affbb63942377668e35c7591"}, {file = "paginate-0.5.7.tar.gz", hash = "sha256:22bd083ab41e1a8b4f3690544afb2c60c25e5c9a63a30fa2f483f6c60c8e5945"}, @@ -1949,6 +2047,7 @@ version = "2.2.3" description = "Powerful data structures for data analysis, time series, and statistics" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5"}, {file = "pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348"}, @@ -2035,6 +2134,7 @@ version = "2.2.3.250308" description = "Type annotations for pandas" optional = false python-versions = ">=3.10" +groups = ["dev"] files = [ {file = "pandas_stubs-2.2.3.250308-py3-none-any.whl", hash = "sha256:a377edff3b61f8b268c82499fdbe7c00fdeed13235b8b71d6a1dc347aeddc74d"}, {file = "pandas_stubs-2.2.3.250308.tar.gz", hash = "sha256:3a6e9daf161f00b85c83772ed3d5cff9522028f07a94817472c07b91f46710fd"}, @@ -2050,6 +2150,7 @@ version = "0.12.1" description = "Utility library for gitignore style pattern matching of file paths." optional = false python-versions = ">=3.8" +groups = ["docs"] files = [ {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, @@ -2061,6 +2162,7 @@ version = "0.6.2" description = "Extract structured text from pdfs quickly" optional = false python-versions = "<4.0,>=3.10" +groups = ["main"] files = [ {file = "pdftext-0.6.2-py3-none-any.whl", hash = "sha256:905d11e62d548e307933c25865a69c8e993947bb5b40b1535b0a2aa8f07a71d4"}, {file = "pdftext-0.6.2.tar.gz", hash = "sha256:ff5b92462ac03ae63a23429384ae123d45c162dcda30e7bf2c5c92a6b208c9de"}, @@ -2078,6 +2180,7 @@ version = "10.4.0" description = "Python Imaging Library (Fork)" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "pillow-10.4.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:4d9667937cfa347525b319ae34375c37b9ee6b525440f3ef48542fcf66f2731e"}, {file = "pillow-10.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:543f3dc61c18dafb755773efc89aae60d06b6596a63914107f75459cf984164d"}, @@ -2166,7 +2269,7 @@ docs = ["furo", "olefile", "sphinx (>=7.3)", "sphinx-copybutton", "sphinx-inline fpx = ["olefile"] mic = ["olefile"] tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] -typing = ["typing-extensions"] +typing = ["typing-extensions ; python_version < \"3.10\""] xmp = ["defusedxml"] [[package]] @@ -2175,6 +2278,7 @@ version = "4.3.7" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false python-versions = ">=3.9" +groups = ["main", "dev", "docs"] files = [ {file = "platformdirs-4.3.7-py3-none-any.whl", hash = "sha256:a03875334331946f13c549dbd8f4bac7a13a50a895a0eb1e8c6a8ace80d40a94"}, {file = "platformdirs-4.3.7.tar.gz", hash = "sha256:eb437d586b6a0986388f0d6f74aa0cde27b48d0e3d66843640bfb6bdcdb6e351"}, @@ -2191,6 +2295,7 @@ version = "1.5.0" description = "plugin and hook calling mechanisms for python" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, @@ -2206,6 +2311,7 @@ version = "4.2.0" description = "A framework for managing and maintaining multi-language pre-commit hooks." optional = false python-versions = ">=3.9" +groups = ["main", "dev"] files = [ {file = "pre_commit-4.2.0-py2.py3-none-any.whl", hash = "sha256:a009ca7205f1eb497d10b845e52c838a98b6cdd2102a6c8e4540e94ee75c58bd"}, {file = "pre_commit-4.2.0.tar.gz", hash = "sha256:601283b9757afd87d40c4c4a9b2b5de9637a8ea02eaff7adc2d0fb4e04841146"}, @@ -2224,6 +2330,7 @@ version = "0.6.1" description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629"}, {file = "pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034"}, @@ -2235,6 +2342,7 @@ version = "0.4.2" description = "A collection of ASN.1-based protocols modules" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a"}, {file = "pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6"}, @@ -2249,6 +2357,7 @@ version = "2.11.3" description = "Data validation using Python type hints" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "pydantic-2.11.3-py3-none-any.whl", hash = "sha256:a082753436a07f9ba1289c6ffa01cd93db3548776088aa917cc43b63f68fa60f"}, {file = "pydantic-2.11.3.tar.gz", hash = "sha256:7471657138c16adad9322fe3070c0116dd6c3ad8d649300e3cbdfe91f4db4ec3"}, @@ -2262,7 +2371,7 @@ typing-inspection = ">=0.4.0" [package.extras] email = ["email-validator (>=2.0.0)"] -timezone = ["tzdata"] +timezone = ["tzdata ; python_version >= \"3.9\" and platform_system == \"Windows\""] [[package]] name = "pydantic-core" @@ -2270,6 +2379,7 @@ version = "2.33.1" description = "Core functionality for Pydantic validation and serialization" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "pydantic_core-2.33.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3077cfdb6125cc8dab61b155fdd714663e401f0e6883f9632118ec12cf42df26"}, {file = "pydantic_core-2.33.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8ffab8b2908d152e74862d276cf5017c81a2f3719f14e8e3e8d6b83fda863927"}, @@ -2381,6 +2491,7 @@ version = "2.9.1" description = "Settings management using Pydantic" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "pydantic_settings-2.9.1-py3-none-any.whl", hash = "sha256:59b4f431b1defb26fe620c71a7d3968a710d719f5f4cdbbdb7926edeb770f6ef"}, {file = "pydantic_settings-2.9.1.tar.gz", hash = "sha256:c509bf79d27563add44e8446233359004ed85066cd096d8b510f715e6ef5d268"}, @@ -2404,6 +2515,7 @@ version = "2.19.1" description = "Pygments is a syntax highlighting package written in Python." optional = false python-versions = ">=3.8" +groups = ["docs"] files = [ {file = "pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c"}, {file = "pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f"}, @@ -2418,6 +2530,7 @@ version = "10.14.3" description = "Extension pack for Python Markdown." optional = false python-versions = ">=3.8" +groups = ["docs"] files = [ {file = "pymdown_extensions-10.14.3-py3-none-any.whl", hash = "sha256:05e0bee73d64b9c71a4ae17c72abc2f700e8bc8403755a00580b49a4e9f189e9"}, {file = "pymdown_extensions-10.14.3.tar.gz", hash = "sha256:41e576ce3f5d650be59e900e4ceff231e0aed2a88cf30acaee41e02f063a061b"}, @@ -2436,6 +2549,7 @@ version = "4.30.0" description = "Python bindings to PDFium" optional = false python-versions = ">=3.6" +groups = ["main"] files = [ {file = "pypdfium2-4.30.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:b33ceded0b6ff5b2b93bc1fe0ad4b71aa6b7e7bd5875f1ca0cdfb6ba6ac01aab"}, {file = "pypdfium2-4.30.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:4e55689f4b06e2d2406203e771f78789bd4f190731b5d57383d05cf611d829de"}, @@ -2458,6 +2572,7 @@ version = "8.3.5" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820"}, {file = "pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845"}, @@ -2480,6 +2595,7 @@ version = "6.1.1" description = "Pytest plugin for measuring coverage." optional = false python-versions = ">=3.9" +groups = ["dev"] files = [ {file = "pytest_cov-6.1.1-py3-none-any.whl", hash = "sha256:bddf29ed2d0ab6f4df17b4c55b0a657287db8684af9c42ea546b21b1041b3dde"}, {file = "pytest_cov-6.1.1.tar.gz", hash = "sha256:46935f7aaefba760e716c2ebfbe1c216240b9592966e7da99ea8292d4d3e2a0a"}, @@ -2498,6 +2614,7 @@ version = "3.14.0" description = "Thin-wrapper around the mock package for easier use with pytest" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "pytest-mock-3.14.0.tar.gz", hash = "sha256:2719255a1efeceadbc056d6bf3df3d1c5015530fb40cf347c0f9afac88410bd0"}, {file = "pytest_mock-3.14.0-py3-none-any.whl", hash = "sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f"}, @@ -2515,6 +2632,7 @@ version = "2.9.0.post0" description = "Extensions to the standard Python datetime module" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main", "docs"] files = [ {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, @@ -2529,6 +2647,7 @@ version = "1.1.0" description = "Read key-value pairs from a .env file and set them as environment variables" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "python_dotenv-1.1.0-py3-none-any.whl", hash = "sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d"}, {file = "python_dotenv-1.1.0.tar.gz", hash = "sha256:41f90bc6f5f177fb41f53e87666db362025010eb28f60a01c9143bfa33a2b2d5"}, @@ -2543,6 +2662,7 @@ version = "0.27.1" description = "Python extension for computing string edit distances and similarities." optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "python_levenshtein-0.27.1-py3-none-any.whl", hash = "sha256:e1a4bc2a70284b2ebc4c505646142fecd0f831e49aa04ed972995895aec57396"}, {file = "python_levenshtein-0.27.1.tar.gz", hash = "sha256:3a5314a011016d373d309a68e875fd029caaa692ad3f32e78319299648045f11"}, @@ -2557,6 +2677,7 @@ version = "2025.2" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, @@ -2568,6 +2689,7 @@ version = "6.0.2" description = "YAML parser and emitter for Python" optional = false python-versions = ">=3.8" +groups = ["main", "dev", "docs"] files = [ {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, @@ -2630,6 +2752,7 @@ version = "0.1" description = "A custom YAML tag for referencing environment variables in YAML files. " optional = false python-versions = ">=3.6" +groups = ["docs"] files = [ {file = "pyyaml_env_tag-0.1-py3-none-any.whl", hash = "sha256:af31106dec8a4d68c60207c1886031cbf839b68aa7abccdb19868200532c2069"}, {file = "pyyaml_env_tag-0.1.tar.gz", hash = "sha256:70092675bda14fdec33b31ba77e7543de9ddc88f2e5b99160396572d11525bdb"}, @@ -2644,6 +2767,7 @@ version = "3.12.2" description = "rapid fuzzy string matching" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "rapidfuzz-3.12.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0b9a75e0385a861178adf59e86d6616cbd0d5adca7228dc9eeabf6f62cf5b0b1"}, {file = "rapidfuzz-3.12.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6906a7eb458731e3dd2495af1d0410e23a21a2a2b7ced535e6d5cd15cb69afc5"}, @@ -2750,6 +2874,7 @@ version = "0.36.2" description = "JSON Referencing + Python" optional = false python-versions = ">=3.9" +groups = ["dev"] files = [ {file = "referencing-0.36.2-py3-none-any.whl", hash = "sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0"}, {file = "referencing-0.36.2.tar.gz", hash = "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa"}, @@ -2766,6 +2891,7 @@ version = "2024.11.6" description = "Alternative regular expression module, to replace re." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91"}, {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0"}, @@ -2869,6 +2995,7 @@ version = "2.32.3" description = "Python HTTP for Humans." optional = false python-versions = ">=3.8" +groups = ["main", "docs"] files = [ {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, @@ -2890,6 +3017,7 @@ version = "0.23.1" description = "Python bindings to Rust's persistent data structures (rpds)" optional = false python-versions = ">=3.9" +groups = ["dev"] files = [ {file = "rpds_py-0.23.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2a54027554ce9b129fc3d633c92fa33b30de9f08bc61b32c053dc9b537266fed"}, {file = "rpds_py-0.23.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b5ef909a37e9738d146519657a1aab4584018746a18f71c692f2f22168ece40c"}, @@ -3002,6 +3130,7 @@ version = "4.9.1" description = "Pure-Python RSA implementation" optional = false python-versions = "<4,>=3.6" +groups = ["main"] files = [ {file = "rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762"}, {file = "rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75"}, @@ -3016,6 +3145,7 @@ version = "0.11.10" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "ruff-0.11.10-py3-none-linux_armv6l.whl", hash = "sha256:859a7bfa7bc8888abbea31ef8a2b411714e6a80f0d173c2a82f9041ed6b50f58"}, {file = "ruff-0.11.10-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:968220a57e09ea5e4fd48ed1c646419961a0570727c7e069842edd018ee8afed"}, @@ -3043,6 +3173,7 @@ version = "0.5.3" description = "" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "safetensors-0.5.3-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:bd20eb133db8ed15b40110b7c00c6df51655a2998132193de2f75f72d99c7073"}, {file = "safetensors-0.5.3-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:21d01c14ff6c415c485616b8b0bf961c46b3b343ca59110d38d744e577f9cce7"}, @@ -3080,6 +3211,7 @@ version = "1.6.1" description = "A set of python modules for machine learning and data mining" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "scikit_learn-1.6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d056391530ccd1e501056160e3c9673b4da4805eb67eb2bdf4e983e1f9c9204e"}, {file = "scikit_learn-1.6.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:0c8d036eb937dbb568c6242fa598d551d88fb4399c0344d95c001980ec1c7d36"}, @@ -3134,6 +3266,7 @@ version = "1.15.2" description = "Fundamental algorithms for scientific computing in Python" optional = false python-versions = ">=3.10" +groups = ["main"] files = [ {file = "scipy-1.15.2-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:a2ec871edaa863e8213ea5df811cd600734f6400b4af272e1c011e69401218e9"}, {file = "scipy-1.15.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:6f223753c6ea76983af380787611ae1291e3ceb23917393079dcc746ba60cfb5"}, @@ -3189,7 +3322,7 @@ numpy = ">=1.23.5,<2.5" [package.extras] dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy (==1.10.0)", "pycodestyle", "pydevtool", "rich-click", "ruff (>=0.0.292)", "types-psutil", "typing_extensions"] doc = ["intersphinx_registry", "jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.16.5)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0,<8.0.0)", "sphinx-copybutton", "sphinx-design (>=0.4.0)"] -test = ["Cython", "array-api-strict (>=2.0,<2.1.1)", "asv", "gmpy2", "hypothesis (>=6.30)", "meson", "mpmath", "ninja", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] +test = ["Cython", "array-api-strict (>=2.0,<2.1.1)", "asv", "gmpy2", "hypothesis (>=6.30)", "meson", "mpmath", "ninja ; sys_platform != \"emscripten\"", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] [[package]] name = "setuptools" @@ -3197,19 +3330,21 @@ version = "80.0.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.9" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" or python_version >= \"3.12\"" files = [ {file = "setuptools-80.0.0-py3-none-any.whl", hash = "sha256:a38f898dcd6e5380f4da4381a87ec90bd0a7eec23d204a5552e80ee3cab6bd27"}, {file = "setuptools-80.0.0.tar.gz", hash = "sha256:c40a5b3729d58dd749c0f08f1a07d134fb8a0a3d7f87dc33e7c5e1f762138650"}, ] [package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.8.0)"] -core = ["importlib_metadata (>=6)", "jaraco.functools (>=4)", "jaraco.text (>=3.7)", "more_itertools", "more_itertools (>=8.8)", "packaging (>=24.2)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "ruff (>=0.8.0) ; sys_platform != \"cygwin\""] +core = ["importlib_metadata (>=6) ; python_version < \"3.10\"", "jaraco.functools (>=4)", "jaraco.text (>=3.7)", "more_itertools", "more_itertools (>=8.8)", "packaging (>=24.2)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1) ; python_version < \"3.11\"", "wheel (>=0.43.0)"] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] enabler = ["pytest-enabler (>=2.2)"] -test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.7.2)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] -type = ["importlib_metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.14.*)", "pytest-mypy"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.7.2)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] +type = ["importlib_metadata (>=7.0.2) ; python_version < \"3.10\"", "jaraco.develop (>=7.21) ; sys_platform != \"cygwin\"", "mypy (==1.14.*)", "pytest-mypy"] [[package]] name = "six" @@ -3217,6 +3352,7 @@ version = "1.17.0" description = "Python 2 and 3 compatibility utilities" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main", "docs"] files = [ {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, @@ -3228,6 +3364,7 @@ version = "1.3.1" description = "Sniff out which async library your code is running under" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, @@ -3239,6 +3376,7 @@ version = "2.6" description = "A modern CSS selector implementation for Beautiful Soup." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"}, {file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"}, @@ -3250,6 +3388,7 @@ version = "0.13.1" description = "OCR, layout, reading order, and table recognition in 90+ languages" optional = false python-versions = "<4.0,>=3.10" +groups = ["main"] files = [ {file = "surya_ocr-0.13.1-py3-none-any.whl", hash = "sha256:2704a97f5de625bc747eddf87874635cb8be164c4c9373207a022648325f009d"}, {file = "surya_ocr-0.13.1.tar.gz", hash = "sha256:af4004448eb8798aeddd4aa709c2f4d3795a3ec7bf12252595b481a65f799a52"}, @@ -3274,6 +3413,7 @@ version = "1.14.0" description = "Computer algebra system (CAS) in Python" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5"}, {file = "sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517"}, @@ -3291,6 +3431,7 @@ version = "3.6.0" description = "threadpoolctl" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb"}, {file = "threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e"}, @@ -3302,6 +3443,7 @@ version = "0.21.1" description = "" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "tokenizers-0.21.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:e78e413e9e668ad790a29456e677d9d3aa50a9ad311a40905d6861ba7692cf41"}, {file = "tokenizers-0.21.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:cd51cd0a91ecc801633829fcd1fda9cf8682ed3477c6243b9a095539de4aecf3"}, @@ -3334,6 +3476,8 @@ version = "2.2.1" description = "A lil' TOML parser" optional = false python-versions = ">=3.8" +groups = ["dev"] +markers = "python_version == \"3.10\"" files = [ {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, @@ -3375,6 +3519,7 @@ version = "2.7.0" description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" optional = false python-versions = ">=3.9.0" +groups = ["main"] files = [ {file = "torch-2.7.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:c9afea41b11e1a1ab1b258a5c31afbd646d6319042bfe4f231b408034b51128b"}, {file = "torch-2.7.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:0b9960183b6e5b71239a3e6c883d8852c304e691c0b2955f7045e8a6d05b9183"}, @@ -3436,6 +3581,7 @@ version = "4.67.1" description = "Fast, Extensible Progress Meter" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"}, {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"}, @@ -3457,6 +3603,7 @@ version = "4.51.3" description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" optional = false python-versions = ">=3.9.0" +groups = ["main"] files = [ {file = "transformers-4.51.3-py3-none-any.whl", hash = "sha256:fd3279633ceb2b777013234bbf0b4f5c2d23c4626b05497691f00cfda55e8a83"}, {file = "transformers-4.51.3.tar.gz", hash = "sha256:e292fcab3990c6defe6328f0f7d2004283ca81a7a07b2de9a46d67fd81ea1409"}, @@ -3529,6 +3676,8 @@ version = "3.3.0" description = "A language and compiler for custom Deep Learning operations" optional = false python-versions = "*" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "triton-3.3.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fad99beafc860501d7fcc1fb7045d9496cbe2c882b1674640304949165a916e7"}, {file = "triton-3.3.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3161a2bf073d6b22c4e2f33f951f3e5e3001462b2570e6df9cd57565bdec2984"}, @@ -3552,6 +3701,7 @@ version = "4.12.0.20250516" description = "Typing stubs for beautifulsoup4" optional = false python-versions = ">=3.9" +groups = ["dev"] files = [ {file = "types_beautifulsoup4-4.12.0.20250516-py3-none-any.whl", hash = "sha256:5923399d4a1ba9cc8f0096fe334cc732e130269541d66261bb42ab039c0376ee"}, {file = "types_beautifulsoup4-4.12.0.20250516.tar.gz", hash = "sha256:aa19dd73b33b70d6296adf92da8ab8a0c945c507e6fb7d5db553415cc77b417e"}, @@ -3566,6 +3716,7 @@ version = "1.1.11.20241018" description = "Typing stubs for html5lib" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "types-html5lib-1.1.11.20241018.tar.gz", hash = "sha256:98042555ff78d9e3a51c77c918b1041acbb7eb6c405408d8a9e150ff5beccafa"}, {file = "types_html5lib-1.1.11.20241018-py3-none-any.whl", hash = "sha256:3f1e064d9ed2c289001ae6392c84c93833abb0816165c6ff0abfc304a779f403"}, @@ -3592,6 +3743,7 @@ version = "2025.2.0.20250326" description = "Typing stubs for pytz" optional = false python-versions = ">=3.9" +groups = ["dev"] files = [ {file = "types_pytz-2025.2.0.20250326-py3-none-any.whl", hash = "sha256:3c397fd1b845cd2b3adc9398607764ced9e578a98a5d1fbb4a9bc9253edfb162"}, {file = "types_pytz-2025.2.0.20250326.tar.gz", hash = "sha256:deda02de24f527066fc8d6a19e284ab3f3ae716a42b4adb6b40e75e408c08d36"}, @@ -3603,6 +3755,7 @@ version = "2024.11.6.20250403" description = "Typing stubs for regex" optional = false python-versions = ">=3.9" +groups = ["dev"] files = [ {file = "types_regex-2024.11.6.20250403-py3-none-any.whl", hash = "sha256:e22c0f67d73f4b4af6086a340f387b6f7d03bed8a0bb306224b75c51a29b0001"}, {file = "types_regex-2024.11.6.20250403.tar.gz", hash = "sha256:3fdf2a70bbf830de4b3a28e9649a52d43dabb57cdb18fbfe2252eefb53666665"}, @@ -3614,6 +3767,7 @@ version = "2.32.0.20250306" description = "Typing stubs for requests" optional = false python-versions = ">=3.9" +groups = ["dev"] files = [ {file = "types_requests-2.32.0.20250306-py3-none-any.whl", hash = "sha256:25f2cbb5c8710b2022f8bbee7b2b66f319ef14aeea2f35d80f18c9dbf3b60a0b"}, {file = "types_requests-2.32.0.20250306.tar.gz", hash = "sha256:0962352694ec5b2f95fda877ee60a159abdf84a0fc6fdace599f20acb41a03d1"}, @@ -3628,6 +3782,7 @@ version = "4.67.0.20250516" description = "Typing stubs for tqdm" optional = false python-versions = ">=3.9" +groups = ["dev"] files = [ {file = "types_tqdm-4.67.0.20250516-py3-none-any.whl", hash = "sha256:1dd9b2c65273f2342f37e5179bc6982df86b6669b3376efc12aef0a29e35d36d"}, {file = "types_tqdm-4.67.0.20250516.tar.gz", hash = "sha256:230ccab8a332d34f193fc007eb132a6ef54b4512452e718bf21ae0a7caeb5a6b"}, @@ -3642,10 +3797,12 @@ version = "4.12.2" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" +groups = ["main", "dev", "docs"] files = [ {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, ] +markers = {docs = "python_version == \"3.10\""} [[package]] name = "typing-inspection" @@ -3653,6 +3810,7 @@ version = "0.4.0" description = "Runtime typing introspection tools" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "typing_inspection-0.4.0-py3-none-any.whl", hash = "sha256:50e72559fcd2a6367a19f7a7e610e6afcb9fac940c650290eed893d61386832f"}, {file = "typing_inspection-0.4.0.tar.gz", hash = "sha256:9765c87de36671694a67904bf2c96e395be9c6439bb6c87b5142569dcdd65122"}, @@ -3667,6 +3825,7 @@ version = "2025.2" description = "Provider of IANA time zone data" optional = false python-versions = ">=2" +groups = ["main"] files = [ {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"}, {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"}, @@ -3678,13 +3837,14 @@ version = "2.3.0" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" +groups = ["main", "dev", "docs"] files = [ {file = "urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df"}, {file = "urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d"}, ] [package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""] h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] @@ -3695,6 +3855,7 @@ version = "20.29.3" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.8" +groups = ["main", "dev"] files = [ {file = "virtualenv-20.29.3-py3-none-any.whl", hash = "sha256:3e3d00f5807e83b234dfb6122bf37cfadf4be216c53a49ac059d02414f819170"}, {file = "virtualenv-20.29.3.tar.gz", hash = "sha256:95e39403fcf3940ac45bc717597dba16110b74506131845d9b687d5e73d947ac"}, @@ -3707,7 +3868,7 @@ platformdirs = ">=3.9.1,<5" [package.extras] docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] -test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8) ; platform_python_implementation == \"PyPy\" or platform_python_implementation == \"CPython\" and sys_platform == \"win32\" and python_version >= \"3.13\"", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10) ; platform_python_implementation == \"CPython\""] [[package]] name = "watchdog" @@ -3715,6 +3876,7 @@ version = "6.0.0" description = "Filesystem events monitoring" optional = false python-versions = ">=3.9" +groups = ["docs"] files = [ {file = "watchdog-6.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d1cdb490583ebd691c012b3d6dae011000fe42edb7a82ece80965b42abd61f26"}, {file = "watchdog-6.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bc64ab3bdb6a04d69d4023b29422170b74681784ffb9463ed4870cf2f3e66112"}, @@ -3757,6 +3919,7 @@ version = "0.2.13" description = "Measures the displayed width of unicode strings in a terminal" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"}, {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"}, @@ -3768,6 +3931,7 @@ version = "15.0.1" description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "websockets-15.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d63efaa0cd96cf0c5fe4d581521d9fa87744540d4bc999ae6e08595a1014b45b"}, {file = "websockets-15.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ac60e3b188ec7574cb761b08d50fcedf9d77f1530352db4eef1707fe9dee7205"}, @@ -3841,6 +4005,6 @@ files = [ ] [metadata] -lock-version = "2.0" +lock-version = "2.1" python-versions = ">=3.10,<4" -content-hash = "e056cd80a80d198aebb03f56524339ba094d9202dbbc242031681883e9272f19" +content-hash = "5a9ea0b67b0f2a34dd8eefe66121575fbde3712aa3e3dd37591e163299c507c2" diff --git a/pyproject.toml b/pyproject.toml index 50ea0f00..e907f6c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,8 +36,6 @@ fuzzywuzzy = {extras = ["speedup"], version = "^0.18.0"} lxml = "^5.3.0" pandas = "^2.2.3" marker-pdf = "^1.6.2" -lxml-stubs = "^0.5.1" -pandas-stubs = "^2.2.3.250308" [tool.poetry.group.dev.dependencies] pytest = "^8.3.2" @@ -51,6 +49,8 @@ types-beautifulsoup4 = "^4.12.0.20250204" types-regex = "^2024.11.6.20250318" types-tqdm = "^4.67.0.20250319" types-jsonschema = "^4.23.0.20241208" +lxml-stubs = "^0.5.1" +pandas-stubs = "^2.2.3.250308" [tool.poetry.group.docs.dependencies] mkdocs = "^1.6.0" From a60b4a52fc1debdb723b3c3938ed97dadb789891 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Wed, 21 May 2025 11:21:03 +0100 Subject: [PATCH 027/125] Make code robust to absence of `marker-pdf` package --- autocorpus/autocorpus.py | 72 ++++++---------------------------------- autocorpus/pdf.py | 71 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 61 deletions(-) create mode 100644 autocorpus/pdf.py diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py index 9a0e7045..ee161fb5 100644 --- a/autocorpus/autocorpus.py +++ b/autocorpus/autocorpus.py @@ -6,15 +6,6 @@ from bs4 import BeautifulSoup from marker.converters.pdf import PdfConverter -from marker.models import create_model_dict -from marker.output import text_from_rendered - -from autocorpus.ac_bioc.bioctable.json import BioCTableJSON -from autocorpus.bioc_supplementary import ( - BioCTableConverter, - BioCTextConverter, - extract_table_from_pdf_text, -) from . import logger from .abbreviation import get_abbreviations @@ -91,57 +82,6 @@ def __get_sections(self, soup, config): return handle_not_tables(config["sections"], soup) - @staticmethod - def __load_pdf_models(): - global pdf_converter - if pdf_converter is None: - try: - # Load the PDF models - pdf_converter = PdfConverter( - artifact_dict=create_model_dict(), - ) - except Exception as e: - logger.error(f"Error loading PDF models: {e}") - # If loading fails, set pdf_converter to None - pdf_converter = None - - @staticmethod - def __extract_pdf_content( - file_path: Path, - ) -> bool: - """Extracts content from a PDF file. - - Args: - file_path (Path): Path to the PDF file. - - Returns: - bool: success status of the extraction process. - """ - bioc_text, bioc_tables = None, None - global pdf_converter - Autocorpus.__load_pdf_models() - if not pdf_converter: - logger.error("PDF converter not initialized.") - return False - - # extract text from PDF - rendered = pdf_converter(str(file_path)) - text, _, _ = text_from_rendered(rendered) - # separate text and tables - text, tables = extract_table_from_pdf_text(text) - # format data for BioC - bioc_text = BioCTextConverter.build_bioc(text, str(file_path), "pdf") - bioc_tables = BioCTableConverter.build_bioc(tables, str(file_path)) - - out_filename = str(file_path).replace(".pdf", ".pdf_bioc.json") - with open(out_filename, "w", encoding="utf-8") as f: - BioCJSON.dump(bioc_text, f, indent=4) - - out_table_filename = str(file_path).replace(".pdf", ".pdf_tables.json") - with open(out_table_filename, "w", encoding="utf-8") as f: - BioCTableJSON.dump(bioc_tables, f, indent=4) - return True - def __extract_text(self, soup, config): """Convert beautiful soup object into a python dict object with cleaned main text body. @@ -343,7 +283,17 @@ def __process_supplementary_file(self, file: Path): case ".xml": pass case ".pdf": - self.__extract_pdf_content(file) + try: + from .pdf import extract_pdf_content + + extract_pdf_content(file) + except ModuleNotFoundError: + logger.error( + "Could not load necessary PDF packages. " + "If you installed Auto-CORPUS via pip, you can obtain these with:\n" + " pip install autocorpus[pdf]" + ) + raise case _: pass diff --git a/autocorpus/pdf.py b/autocorpus/pdf.py new file mode 100644 index 00000000..b7d5b79a --- /dev/null +++ b/autocorpus/pdf.py @@ -0,0 +1,71 @@ +"""Functionality for processing PDF files.""" + +from pathlib import Path + +from marker.converters.pdf import PdfConverter +from marker.models import create_model_dict +from marker.output import text_from_rendered + +from autocorpus.bioc_supplementary import ( + BioCTableConverter, + BioCTextConverter, + extract_table_from_pdf_text, +) + +from . import logger +from .ac_bioc import BioCJSON +from .ac_bioc.bioctable.json import BioCTableJSON + +_pdf_converter: PdfConverter | None = None + + +def _get_pdf_converter() -> PdfConverter | None: + global _pdf_converter + if _pdf_converter is None: + try: + # Load the PDF models + _pdf_converter = PdfConverter( + artifact_dict=create_model_dict(), + ) + except Exception as e: + logger.error(f"Error loading PDF models: {e}") + return None + + return _pdf_converter + + +def extract_pdf_content( + file_path: Path, +) -> bool: + """Extracts content from a PDF file. + + Args: + file_path (Path): Path to the PDF file. + + Returns: + bool: success status of the extraction process. + """ + bioc_text, bioc_tables = None, None + + pdf_converter = _get_pdf_converter() + if not pdf_converter: + logger.error("PDF converter not initialized.") + return False + + # extract text from PDF + rendered = pdf_converter(str(file_path)) + text, _, _ = text_from_rendered(rendered) + # separate text and tables + text, tables = extract_table_from_pdf_text(text) + # format data for BioC + bioc_text = BioCTextConverter.build_bioc(text, str(file_path), "pdf") + bioc_tables = BioCTableConverter.build_bioc(tables, str(file_path)) + + out_filename = str(file_path).replace(".pdf", ".pdf_bioc.json") + with open(out_filename, "w", encoding="utf-8") as f: + BioCJSON.dump(bioc_text, f, indent=4) + + out_table_filename = str(file_path).replace(".pdf", ".pdf_tables.json") + with open(out_table_filename, "w", encoding="utf-8") as f: + BioCTableJSON.dump(bioc_tables, f, indent=4) + return True From f145ce889fb1a5ac35f0d8cc561daf3a44291b6d Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Wed, 21 May 2025 11:24:44 +0100 Subject: [PATCH 028/125] Move other PDF-related functionality to `pdf` module --- autocorpus/bioc_supplementary.py | 66 ---------------------------- autocorpus/pdf.py | 75 +++++++++++++++++++++++++++++--- 2 files changed, 69 insertions(+), 72 deletions(-) diff --git a/autocorpus/bioc_supplementary.py b/autocorpus/bioc_supplementary.py index 91f5e305..2022cbcf 100644 --- a/autocorpus/bioc_supplementary.py +++ b/autocorpus/bioc_supplementary.py @@ -3,8 +3,6 @@ import datetime from typing import TypeVar -import pandas as pd -import regex from pandas import DataFrame from .ac_bioc import ( @@ -20,70 +18,6 @@ ) -def _split_text_and_tables(text: str) -> tuple[list[str], list[list[str]]]: - """Splits PDF text into main text lines and raw table lines.""" - lines = [x for x in text.splitlines() if x] - tables = [] - table_lines = [] - main_text_lines = [] - inside_table = False - - for line in lines: - if "|" in line: - inside_table = True - table_lines.append(line) - elif inside_table: - inside_table = False - tables.append(table_lines) - main_text_lines.append(line) - table_lines = [] - continue - else: - main_text_lines.append(line) - - return main_text_lines, tables - - -def _parse_tables(raw_tables: list[list[str]]) -> list[DataFrame]: - """Converts raw table text lines into DataFrames.""" - parsed_tables = [] - for table in raw_tables: - # Remove lines that are just dashes - table = [line for line in table if not regex.match(r"^\s*[\p{Pd}]+\s*$", line)] - - rows = [] - for line in table: - if regex.search(r"\|", line): - cells = [ - cell.strip() - for cell in line.split("|") - if not all(x in "|-" for x in cell) - ] - if cells: - rows.append(cells) - - if not rows: - continue - - num_columns = max(len(row) for row in rows) - for row in rows: - while len(row) < num_columns: - row.append("") - - df = pd.DataFrame(rows[1:], columns=rows[0]) - parsed_tables.append(df) - - return parsed_tables - - -def extract_table_from_pdf_text(text: str) -> tuple[str, list[DataFrame]]: - """Extracts tables from PDF text and returns the remaining text and parsed tables.""" - main_text_lines, raw_tables = _split_text_and_tables(text) - tables_output = _parse_tables(raw_tables) - text_output = "\n\n".join(main_text_lines) - return text_output, tables_output - - def string_replace_unicode(text: str) -> str: """Replaces specific Unicode characters with their corresponding replacements in the given text.""" return ( diff --git a/autocorpus/pdf.py b/autocorpus/pdf.py index b7d5b79a..b5496396 100644 --- a/autocorpus/pdf.py +++ b/autocorpus/pdf.py @@ -2,15 +2,14 @@ from pathlib import Path +import pandas as pd +import regex from marker.converters.pdf import PdfConverter from marker.models import create_model_dict from marker.output import text_from_rendered +from pandas import DataFrame -from autocorpus.bioc_supplementary import ( - BioCTableConverter, - BioCTextConverter, - extract_table_from_pdf_text, -) +from autocorpus.bioc_supplementary import BioCTableConverter, BioCTextConverter from . import logger from .ac_bioc import BioCJSON @@ -56,7 +55,7 @@ def extract_pdf_content( rendered = pdf_converter(str(file_path)) text, _, _ = text_from_rendered(rendered) # separate text and tables - text, tables = extract_table_from_pdf_text(text) + text, tables = _extract_table_from_pdf_text(text) # format data for BioC bioc_text = BioCTextConverter.build_bioc(text, str(file_path), "pdf") bioc_tables = BioCTableConverter.build_bioc(tables, str(file_path)) @@ -69,3 +68,67 @@ def extract_pdf_content( with open(out_table_filename, "w", encoding="utf-8") as f: BioCTableJSON.dump(bioc_tables, f, indent=4) return True + + +def _split_text_and_tables(text: str) -> tuple[list[str], list[list[str]]]: + """Splits PDF text into main text lines and raw table lines.""" + lines = [x for x in text.splitlines() if x] + tables = [] + table_lines = [] + main_text_lines = [] + inside_table = False + + for line in lines: + if "|" in line: + inside_table = True + table_lines.append(line) + elif inside_table: + inside_table = False + tables.append(table_lines) + main_text_lines.append(line) + table_lines = [] + continue + else: + main_text_lines.append(line) + + return main_text_lines, tables + + +def _parse_tables(raw_tables: list[list[str]]) -> list[DataFrame]: + """Converts raw table text lines into DataFrames.""" + parsed_tables = [] + for table in raw_tables: + # Remove lines that are just dashes + table = [line for line in table if not regex.match(r"^\s*[\p{Pd}]+\s*$", line)] + + rows = [] + for line in table: + if regex.search(r"\|", line): + cells = [ + cell.strip() + for cell in line.split("|") + if not all(x in "|-" for x in cell) + ] + if cells: + rows.append(cells) + + if not rows: + continue + + num_columns = max(len(row) for row in rows) + for row in rows: + while len(row) < num_columns: + row.append("") + + df = pd.DataFrame(rows[1:], columns=rows[0]) + parsed_tables.append(df) + + return parsed_tables + + +def _extract_table_from_pdf_text(text: str) -> tuple[str, list[DataFrame]]: + """Extracts tables from PDF text and returns the remaining text and parsed tables.""" + main_text_lines, raw_tables = _split_text_and_tables(text) + tables_output = _parse_tables(raw_tables) + text_output = "\n\n".join(main_text_lines) + return text_output, tables_output From 81c1594843fca42e63effda8138082f25078c57c Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Wed, 21 May 2025 10:31:19 +0100 Subject: [PATCH 029/125] Make `marker-pdf` an optional dependency Closes #258. --- .github/actions/setup/action.yml | 2 +- poetry.lock | 214 +++++++++++++++++++------------ pyproject.toml | 5 +- 3 files changed, 140 insertions(+), 81 deletions(-) diff --git a/.github/actions/setup/action.yml b/.github/actions/setup/action.yml index 494310d1..f20db13e 100644 --- a/.github/actions/setup/action.yml +++ b/.github/actions/setup/action.yml @@ -19,4 +19,4 @@ runs: - name: Install dependencies shell: bash - run: poetry install + run: poetry install --extras pdf diff --git a/poetry.lock b/poetry.lock index c1d22599..18feb3ae 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4,9 +4,10 @@ name = "annotated-types" version = "0.7.0" description = "Reusable constraint types to use with typing.Annotated" -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, @@ -16,9 +17,10 @@ files = [ name = "anthropic" version = "0.46.0" description = "The official Python library for the anthropic API" -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "anthropic-0.46.0-py3-none-any.whl", hash = "sha256:1445ec9be78d2de7ea51b4d5acd3574e414aea97ef903d0ecbb57bec806aaa49"}, {file = "anthropic-0.46.0.tar.gz", hash = "sha256:eac3d43271d02321a57c3ca68aca84c3d58873e8e72d1433288adee2d46b745b"}, @@ -41,9 +43,10 @@ vertex = ["google-auth (>=2,<3)"] name = "anyio" version = "4.9.0" description = "High level compatibility layer for multiple asynchronous event loop implementations" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "anyio-4.9.0-py3-none-any.whl", hash = "sha256:9f76d541cad6e36af7beb62e978876f3b41e3e04f2c1fbf0884604c0a9c4d93c"}, {file = "anyio-4.9.0.tar.gz", hash = "sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028"}, @@ -141,9 +144,10 @@ lxml = ["lxml"] name = "cachetools" version = "5.5.2" description = "Extensible memoizing collections and decorators" -optional = false +optional = true python-versions = ">=3.7" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a"}, {file = "cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4"}, @@ -160,6 +164,7 @@ files = [ {file = "certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe"}, {file = "certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651"}, ] +markers = {main = "extra == \"pdf\""} [[package]] name = "cfgv" @@ -274,6 +279,7 @@ files = [ {file = "charset_normalizer-3.4.1-py3-none-any.whl", hash = "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85"}, {file = "charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3"}, ] +markers = {main = "extra == \"pdf\""} [[package]] name = "click" @@ -398,9 +404,10 @@ files = [ name = "distro" version = "1.9.0" description = "Distro - an OS platform information API" -optional = false +optional = true python-versions = ">=3.6" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"}, {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"}, @@ -413,11 +420,11 @@ description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" groups = ["main", "dev"] -markers = "python_version == \"3.10\"" files = [ {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, ] +markers = {main = "python_version == \"3.10\" and extra == \"pdf\"", dev = "python_version == \"3.10\""} [package.extras] test = ["pytest (>=6)"] @@ -443,9 +450,10 @@ typing = ["typing-extensions (>=4.12.2) ; python_version < \"3.11\""] name = "filetype" version = "1.2.0" description = "Infer file type and MIME type of any file/buffer. No external dependencies." -optional = false +optional = true python-versions = "*" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "filetype-1.2.0-py2.py3-none-any.whl", hash = "sha256:7ce71b6880181241cf7ac8697a2f1eb6a8bd9b429f7ad6d27b8db9ba5f1c2d25"}, {file = "filetype-1.2.0.tar.gz", hash = "sha256:66b56cd6474bf41d8c54660347d37afcc3f7d1970648de365c102ef77548aadb"}, @@ -455,9 +463,10 @@ files = [ name = "fsspec" version = "2025.3.2" description = "File-system specification" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "fsspec-2025.3.2-py3-none-any.whl", hash = "sha256:2daf8dc3d1dfa65b6aa37748d112773a7a08416f6c70d96b264c96476ecaf711"}, {file = "fsspec-2025.3.2.tar.gz", hash = "sha256:e52c77ef398680bbd6a98c0e628fbc469491282981209907bbc8aea76a04fdc6"}, @@ -495,9 +504,10 @@ tqdm = ["tqdm"] name = "ftfy" version = "6.3.1" description = "Fixes mojibake and other problems with Unicode, after the fact" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "ftfy-6.3.1-py3-none-any.whl", hash = "sha256:7c70eb532015cd2f9adb53f101fb6c7945988d023a085d127d1573dc49dd0083"}, {file = "ftfy-6.3.1.tar.gz", hash = "sha256:9b3c3d90f84fb267fe64d375a07b7f8912d817cf86009ae134aa03e1819506ec"}, @@ -546,9 +556,10 @@ dev = ["flake8", "markdown", "twine", "wheel"] name = "google-auth" version = "2.39.0" description = "Google Authentication Library" -optional = false +optional = true python-versions = ">=3.7" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "google_auth-2.39.0-py2.py3-none-any.whl", hash = "sha256:0150b6711e97fb9f52fe599f55648950cc4540015565d8fbb31be2ad6e1548a2"}, {file = "google_auth-2.39.0.tar.gz", hash = "sha256:73222d43cdc35a3aeacbfdcaf73142a97839f10de930550d89ebfe1d0a00cde7"}, @@ -573,9 +584,10 @@ urllib3 = ["packaging", "urllib3"] name = "google-genai" version = "1.12.1" description = "GenAI Python SDK" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "google_genai-1.12.1-py3-none-any.whl", hash = "sha256:7cbc1bc029712946ce41bcf80c0eaa89eb8c09c308efbbfe30fd491f402c258a"}, {file = "google_genai-1.12.1.tar.gz", hash = "sha256:5c7eda422360643ce602a3f6b23152470ec1039310ef40080cbe4e71237f6391"}, @@ -609,9 +621,10 @@ colorama = ">=0.4" name = "h11" version = "0.16.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86"}, {file = "h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1"}, @@ -621,9 +634,10 @@ files = [ name = "httpcore" version = "1.0.9" description = "A minimal low-level HTTP client." -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55"}, {file = "httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8"}, @@ -643,9 +657,10 @@ trio = ["trio (>=0.22.0,<1.0)"] name = "httpx" version = "0.28.1" description = "The next generation HTTP client." -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad"}, {file = "httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc"}, @@ -668,9 +683,10 @@ zstd = ["zstandard (>=0.18.0)"] name = "huggingface-hub" version = "0.30.2" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" -optional = false +optional = true python-versions = ">=3.8.0" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "huggingface_hub-0.30.2-py3-none-any.whl", hash = "sha256:68ff05969927058cfa41df4f2155d4bb48f5f54f719dd0390103eefa9b191e28"}, {file = "huggingface_hub-0.30.2.tar.gz", hash = "sha256:9a7897c5b6fd9dad3168a794a8998d6378210f5b9688d0dfc180b1a228dc2466"}, @@ -726,6 +742,7 @@ files = [ {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, ] +markers = {main = "extra == \"pdf\""} [package.extras] all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"] @@ -753,6 +770,7 @@ files = [ {file = "jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"}, {file = "jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d"}, ] +markers = {main = "extra == \"pdf\""} [package.dependencies] MarkupSafe = ">=2.0" @@ -764,9 +782,10 @@ i18n = ["Babel (>=2.7)"] name = "jiter" version = "0.9.0" description = "Fast iterable JSON parser." -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "jiter-0.9.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:816ec9b60fdfd1fec87da1d7ed46c66c44ffec37ab2ef7de5b147b2fce3fd5ad"}, {file = "jiter-0.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9b1d3086f8a3ee0194ecf2008cf81286a5c3e540d977fa038ff23576c023c0ea"}, @@ -1186,9 +1205,10 @@ testing = ["coverage", "pyyaml"] name = "markdown2" version = "2.5.3" description = "A fast and complete Python implementation of Markdown" -optional = false +optional = true python-versions = "<4,>=3.9" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "markdown2-2.5.3-py3-none-any.whl", hash = "sha256:a8ebb7e84b8519c37bf7382b3db600f1798a22c245bfd754a1f87ca8d7ea63b3"}, {file = "markdown2-2.5.3.tar.gz", hash = "sha256:4d502953a4633408b0ab3ec503c5d6984d1b14307e32b325ec7d16ea57524895"}, @@ -1204,9 +1224,10 @@ wavedrom = ["wavedrom"] name = "markdownify" version = "0.13.1" description = "Convert HTML to markdown." -optional = false +optional = true python-versions = "*" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "markdownify-0.13.1-py3-none-any.whl", hash = "sha256:1d181d43d20902bcc69d7be85b5316ed174d0dda72ff56e14ae4c95a4a407d22"}, {file = "markdownify-0.13.1.tar.gz", hash = "sha256:ab257f9e6bd4075118828a28c9d02f8a4bfeb7421f558834aa79b2dfeb32a098"}, @@ -1220,9 +1241,10 @@ six = ">=1.15,<2" name = "marker-pdf" version = "1.6.2" description = "Convert documents to markdown with high speed and accuracy." -optional = false +optional = true python-versions = "<4.0,>=3.10" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "marker_pdf-1.6.2-py3-none-any.whl", hash = "sha256:48fbc6353e6fc3510d30d5682a8974fc9d6eb58a13e7c3f525ed6973b721f108"}, {file = "marker_pdf-1.6.2.tar.gz", hash = "sha256:38725082c89c0aec5e28e4f1df8f3974ccc0742c2265f0342c20e52fbde90bf0"}, @@ -1324,6 +1346,7 @@ files = [ {file = "MarkupSafe-3.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:6e296a513ca3d94054c2c881cc913116e90fd030ad1c656b3869762b754f5f8a"}, {file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"}, ] +markers = {main = "extra == \"pdf\""} [[package]] name = "mergedeep" @@ -1536,9 +1559,10 @@ typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} name = "mpmath" version = "1.3.0" description = "Python library for arbitrary-precision floating-point arithmetic" -optional = false +optional = true python-versions = "*" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, @@ -1620,9 +1644,10 @@ files = [ name = "networkx" version = "3.4.2" description = "Python package for creating and manipulating graphs and networks" -optional = false +optional = true python-versions = ">=3.10" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f"}, {file = "networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1"}, @@ -1743,10 +1768,10 @@ files = [ name = "nvidia-cublas-cu12" version = "12.6.4.1" description = "CUBLAS native runtime libraries" -optional = false +optional = true python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_cublas_cu12-12.6.4.1-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:08ed2686e9875d01b58e3cb379c6896df8e76c75e0d4a7f7dace3d7b6d9ef8eb"}, {file = "nvidia_cublas_cu12-12.6.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:235f728d6e2a409eddf1df58d5b0921cf80cfa9e72b9f2775ccb7b4a87984668"}, @@ -1757,10 +1782,10 @@ files = [ name = "nvidia-cuda-cupti-cu12" version = "12.6.80" description = "CUDA profiling tools runtime libs." -optional = false +optional = true python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:166ee35a3ff1587f2490364f90eeeb8da06cd867bd5b701bf7f9a02b78bc63fc"}, {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_aarch64.whl", hash = "sha256:358b4a1d35370353d52e12f0a7d1769fc01ff74a191689d3870b2123156184c4"}, @@ -1773,10 +1798,10 @@ files = [ name = "nvidia-cuda-nvrtc-cu12" version = "12.6.77" description = "NVRTC native runtime libraries" -optional = false +optional = true python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5847f1d6e5b757f1d2b3991a01082a44aad6f10ab3c5c0213fa3e25bddc25a13"}, {file = "nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:35b0cc6ee3a9636d5409133e79273ce1f3fd087abb0532d2d2e8fff1fe9efc53"}, @@ -1787,10 +1812,10 @@ files = [ name = "nvidia-cuda-runtime-cu12" version = "12.6.77" description = "CUDA Runtime native Libraries" -optional = false +optional = true python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6116fad3e049e04791c0256a9778c16237837c08b27ed8c8401e2e45de8d60cd"}, {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:d461264ecb429c84c8879a7153499ddc7b19b5f8d84c204307491989a365588e"}, @@ -1803,10 +1828,10 @@ files = [ name = "nvidia-cudnn-cu12" version = "9.5.1.17" description = "cuDNN runtime libraries" -optional = false +optional = true python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:9fd4584468533c61873e5fda8ca41bac3a38bcb2d12350830c69b0a96a7e4def"}, {file = "nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:30ac3869f6db17d170e0e556dd6cc5eee02647abc31ca856634d5a40f82c15b2"}, @@ -1820,10 +1845,10 @@ nvidia-cublas-cu12 = "*" name = "nvidia-cufft-cu12" version = "11.3.0.4" description = "CUFFT native runtime libraries" -optional = false +optional = true python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d16079550df460376455cba121db6564089176d9bac9e4f360493ca4741b22a6"}, {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8510990de9f96c803a051822618d42bf6cb8f069ff3f48d93a8486efdacb48fb"}, @@ -1839,10 +1864,10 @@ nvidia-nvjitlink-cu12 = "*" name = "nvidia-cufile-cu12" version = "1.11.1.6" description = "cuFile GPUDirect libraries" -optional = false +optional = true python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cc23469d1c7e52ce6c1d55253273d32c565dd22068647f3aa59b3c6b005bf159"}, {file = "nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:8f57a0051dcf2543f6dc2b98a98cb2719c37d3cee1baba8965d57f3bbc90d4db"}, @@ -1852,10 +1877,10 @@ files = [ name = "nvidia-curand-cu12" version = "10.3.7.77" description = "CURAND native runtime libraries" -optional = false +optional = true python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:6e82df077060ea28e37f48a3ec442a8f47690c7499bff392a5938614b56c98d8"}, {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a42cd1344297f70b9e39a1e4f467a4e1c10f1da54ff7a85c12197f6c652c8bdf"}, @@ -1868,10 +1893,10 @@ files = [ name = "nvidia-cusolver-cu12" version = "11.7.1.2" description = "CUDA solver native runtime libraries" -optional = false +optional = true python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0ce237ef60acde1efc457335a2ddadfd7610b892d94efee7b776c64bb1cac9e0"}, {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e9e49843a7707e42022babb9bcfa33c29857a93b88020c4e4434656a655b698c"}, @@ -1889,10 +1914,10 @@ nvidia-nvjitlink-cu12 = "*" name = "nvidia-cusparse-cu12" version = "12.5.4.2" description = "CUSPARSE native runtime libraries" -optional = false +optional = true python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d25b62fb18751758fe3c93a4a08eff08effedfe4edf1c6bb5afd0890fe88f887"}, {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7aa32fa5470cf754f72d1116c7cbc300b4e638d3ae5304cfa4a638a5b87161b1"}, @@ -1908,10 +1933,10 @@ nvidia-nvjitlink-cu12 = "*" name = "nvidia-cusparselt-cu12" version = "0.6.3" description = "NVIDIA cuSPARSELt" -optional = false +optional = true python-versions = "*" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_cusparselt_cu12-0.6.3-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8371549623ba601a06322af2133c4a44350575f5a3108fb75f3ef20b822ad5f1"}, {file = "nvidia_cusparselt_cu12-0.6.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:e5c8a26c36445dd2e6812f1177978a24e2d37cacce7e090f297a688d1ec44f46"}, @@ -1922,10 +1947,10 @@ files = [ name = "nvidia-nccl-cu12" version = "2.26.2" description = "NVIDIA Collective Communication Library (NCCL) Runtime" -optional = false +optional = true python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5c196e95e832ad30fbbb50381eb3cbd1fadd5675e587a548563993609af19522"}, {file = "nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:694cf3879a206553cc9d7dbda76b13efaf610fdb70a50cba303de1b0d1530ac6"}, @@ -1935,10 +1960,10 @@ files = [ name = "nvidia-nvjitlink-cu12" version = "12.6.85" description = "Nvidia JIT LTO Library" -optional = false +optional = true python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:eedc36df9e88b682efe4309aa16b5b4e78c2407eac59e8c10a6a47535164369a"}, {file = "nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cf4eaa7d4b6b543ffd69d6abfb11efdeb2db48270d94dfd3a452c24150829e41"}, @@ -1949,10 +1974,10 @@ files = [ name = "nvidia-nvtx-cu12" version = "12.6.77" description = "NVIDIA Tools Extension" -optional = false +optional = true python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f44f8d86bb7d5629988d61c8d3ae61dddb2015dee142740536bc7481b022fe4b"}, {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:adcaabb9d436c9761fca2b13959a2d237c5f9fd406c8e4b723c695409ff88059"}, @@ -1965,9 +1990,10 @@ files = [ name = "openai" version = "1.76.0" description = "The official Python library for the openai API" -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "openai-1.76.0-py3-none-any.whl", hash = "sha256:a712b50e78cf78e6d7b2a8f69c4978243517c2c36999756673e07a14ce37dc0a"}, {file = "openai-1.76.0.tar.gz", hash = "sha256:fd2bfaf4608f48102d6b74f9e11c5ecaa058b60dad9c36e409c12477dfd91fb2"}, @@ -1992,9 +2018,10 @@ voice-helpers = ["numpy (>=2.0.2)", "sounddevice (>=0.5.1)"] name = "opencv-python-headless" version = "4.11.0.86" description = "Wrapper package for OpenCV python bindings." -optional = false +optional = true python-versions = ">=3.6" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "opencv-python-headless-4.11.0.86.tar.gz", hash = "sha256:996eb282ca4b43ec6a3972414de0e2331f5d9cda2b41091a49739c19fb843798"}, {file = "opencv_python_headless-4.11.0.86-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:48128188ade4a7e517237c8e1e11a9cdf5c282761473383e77beb875bb1e61ca"}, @@ -2024,6 +2051,7 @@ files = [ {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, ] +markers = {main = "extra == \"pdf\""} [[package]] name = "paginate" @@ -2160,9 +2188,10 @@ files = [ name = "pdftext" version = "0.6.2" description = "Extract structured text from pdfs quickly" -optional = false +optional = true python-versions = "<4.0,>=3.10" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "pdftext-0.6.2-py3-none-any.whl", hash = "sha256:905d11e62d548e307933c25865a69c8e993947bb5b40b1535b0a2aa8f07a71d4"}, {file = "pdftext-0.6.2.tar.gz", hash = "sha256:ff5b92462ac03ae63a23429384ae123d45c162dcda30e7bf2c5c92a6b208c9de"}, @@ -2178,9 +2207,10 @@ pypdfium2 = "4.30.0" name = "pillow" version = "10.4.0" description = "Python Imaging Library (Fork)" -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "pillow-10.4.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:4d9667937cfa347525b319ae34375c37b9ee6b525440f3ef48542fcf66f2731e"}, {file = "pillow-10.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:543f3dc61c18dafb755773efc89aae60d06b6596a63914107f75459cf984164d"}, @@ -2328,9 +2358,10 @@ virtualenv = ">=20.10.0" name = "pyasn1" version = "0.6.1" description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629"}, {file = "pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034"}, @@ -2340,9 +2371,10 @@ files = [ name = "pyasn1-modules" version = "0.4.2" description = "A collection of ASN.1-based protocols modules" -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a"}, {file = "pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6"}, @@ -2355,9 +2387,10 @@ pyasn1 = ">=0.6.1,<0.7.0" name = "pydantic" version = "2.11.3" description = "Data validation using Python type hints" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "pydantic-2.11.3-py3-none-any.whl", hash = "sha256:a082753436a07f9ba1289c6ffa01cd93db3548776088aa917cc43b63f68fa60f"}, {file = "pydantic-2.11.3.tar.gz", hash = "sha256:7471657138c16adad9322fe3070c0116dd6c3ad8d649300e3cbdfe91f4db4ec3"}, @@ -2377,9 +2410,10 @@ timezone = ["tzdata ; python_version >= \"3.9\" and platform_system == \"Windows name = "pydantic-core" version = "2.33.1" description = "Core functionality for Pydantic validation and serialization" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "pydantic_core-2.33.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3077cfdb6125cc8dab61b155fdd714663e401f0e6883f9632118ec12cf42df26"}, {file = "pydantic_core-2.33.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8ffab8b2908d152e74862d276cf5017c81a2f3719f14e8e3e8d6b83fda863927"}, @@ -2489,9 +2523,10 @@ typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" name = "pydantic-settings" version = "2.9.1" description = "Settings management using Pydantic" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "pydantic_settings-2.9.1-py3-none-any.whl", hash = "sha256:59b4f431b1defb26fe620c71a7d3968a710d719f5f4cdbbdb7926edeb770f6ef"}, {file = "pydantic_settings-2.9.1.tar.gz", hash = "sha256:c509bf79d27563add44e8446233359004ed85066cd096d8b510f715e6ef5d268"}, @@ -2547,9 +2582,10 @@ extra = ["pygments (>=2.19.1)"] name = "pypdfium2" version = "4.30.0" description = "Python bindings to PDFium" -optional = false +optional = true python-versions = ">=3.6" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "pypdfium2-4.30.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:b33ceded0b6ff5b2b93bc1fe0ad4b71aa6b7e7bd5875f1ca0cdfb6ba6ac01aab"}, {file = "pypdfium2-4.30.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:4e55689f4b06e2d2406203e771f78789bd4f190731b5d57383d05cf611d829de"}, @@ -2645,9 +2681,10 @@ six = ">=1.5" name = "python-dotenv" version = "1.1.0" description = "Read key-value pairs from a .env file and set them as environment variables" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "python_dotenv-1.1.0-py3-none-any.whl", hash = "sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d"}, {file = "python_dotenv-1.1.0.tar.gz", hash = "sha256:41f90bc6f5f177fb41f53e87666db362025010eb28f60a01c9143bfa33a2b2d5"}, @@ -3000,6 +3037,7 @@ files = [ {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, ] +markers = {main = "extra == \"pdf\""} [package.dependencies] certifi = ">=2017.4.17" @@ -3128,9 +3166,10 @@ files = [ name = "rsa" version = "4.9.1" description = "Pure-Python RSA implementation" -optional = false +optional = true python-versions = "<4,>=3.6" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762"}, {file = "rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75"}, @@ -3171,9 +3210,10 @@ files = [ name = "safetensors" version = "0.5.3" description = "" -optional = false +optional = true python-versions = ">=3.7" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "safetensors-0.5.3-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:bd20eb133db8ed15b40110b7c00c6df51655a2998132193de2f75f72d99c7073"}, {file = "safetensors-0.5.3-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:21d01c14ff6c415c485616b8b0bf961c46b3b343ca59110d38d744e577f9cce7"}, @@ -3209,9 +3249,10 @@ torch = ["safetensors[numpy]", "torch (>=1.10)"] name = "scikit-learn" version = "1.6.1" description = "A set of python modules for machine learning and data mining" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "scikit_learn-1.6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d056391530ccd1e501056160e3c9673b4da4805eb67eb2bdf4e983e1f9c9204e"}, {file = "scikit_learn-1.6.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:0c8d036eb937dbb568c6242fa598d551d88fb4399c0344d95c001980ec1c7d36"}, @@ -3264,9 +3305,10 @@ tests = ["black (>=24.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.9)", "numpydoc ( name = "scipy" version = "1.15.2" description = "Fundamental algorithms for scientific computing in Python" -optional = false +optional = true python-versions = ">=3.10" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "scipy-1.15.2-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:a2ec871edaa863e8213ea5df811cd600734f6400b4af272e1c011e69401218e9"}, {file = "scipy-1.15.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:6f223753c6ea76983af380787611ae1291e3ceb23917393079dcc746ba60cfb5"}, @@ -3328,10 +3370,10 @@ test = ["Cython", "array-api-strict (>=2.0,<2.1.1)", "asv", "gmpy2", "hypothesis name = "setuptools" version = "80.0.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" or python_version >= \"3.12\"" +markers = "extra == \"pdf\" and platform_system == \"Linux\" and platform_machine == \"x86_64\" or extra == \"pdf\" and python_version >= \"3.12\"" files = [ {file = "setuptools-80.0.0-py3-none-any.whl", hash = "sha256:a38f898dcd6e5380f4da4381a87ec90bd0a7eec23d204a5552e80ee3cab6bd27"}, {file = "setuptools-80.0.0.tar.gz", hash = "sha256:c40a5b3729d58dd749c0f08f1a07d134fb8a0a3d7f87dc33e7c5e1f762138650"}, @@ -3362,9 +3404,10 @@ files = [ name = "sniffio" version = "1.3.1" description = "Sniff out which async library your code is running under" -optional = false +optional = true python-versions = ">=3.7" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, @@ -3386,9 +3429,10 @@ files = [ name = "surya-ocr" version = "0.13.1" description = "OCR, layout, reading order, and table recognition in 90+ languages" -optional = false +optional = true python-versions = "<4.0,>=3.10" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "surya_ocr-0.13.1-py3-none-any.whl", hash = "sha256:2704a97f5de625bc747eddf87874635cb8be164c4c9373207a022648325f009d"}, {file = "surya_ocr-0.13.1.tar.gz", hash = "sha256:af4004448eb8798aeddd4aa709c2f4d3795a3ec7bf12252595b481a65f799a52"}, @@ -3411,9 +3455,10 @@ transformers = ">=4.41.0,<5.0.0" name = "sympy" version = "1.14.0" description = "Computer algebra system (CAS) in Python" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5"}, {file = "sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517"}, @@ -3429,9 +3474,10 @@ dev = ["hypothesis (>=6.70.0)", "pytest (>=7.1.0)"] name = "threadpoolctl" version = "3.6.0" description = "threadpoolctl" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb"}, {file = "threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e"}, @@ -3441,9 +3487,10 @@ files = [ name = "tokenizers" version = "0.21.1" description = "" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "tokenizers-0.21.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:e78e413e9e668ad790a29456e677d9d3aa50a9ad311a40905d6861ba7692cf41"}, {file = "tokenizers-0.21.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:cd51cd0a91ecc801633829fcd1fda9cf8682ed3477c6243b9a095539de4aecf3"}, @@ -3517,9 +3564,10 @@ files = [ name = "torch" version = "2.7.0" description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" -optional = false +optional = true python-versions = ">=3.9.0" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "torch-2.7.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:c9afea41b11e1a1ab1b258a5c31afbd646d6319042bfe4f231b408034b51128b"}, {file = "torch-2.7.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:0b9960183b6e5b71239a3e6c883d8852c304e691c0b2955f7045e8a6d05b9183"}, @@ -3601,9 +3649,10 @@ telegram = ["requests"] name = "transformers" version = "4.51.3" description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" -optional = false +optional = true python-versions = ">=3.9.0" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "transformers-4.51.3-py3-none-any.whl", hash = "sha256:fd3279633ceb2b777013234bbf0b4f5c2d23c4626b05497691f00cfda55e8a83"}, {file = "transformers-4.51.3.tar.gz", hash = "sha256:e292fcab3990c6defe6328f0f7d2004283ca81a7a07b2de9a46d67fd81ea1409"}, @@ -3674,10 +3723,10 @@ vision = ["Pillow (>=10.0.1,<=15.0)"] name = "triton" version = "3.3.0" description = "A language and compiler for custom Deep Learning operations" -optional = false +optional = true python-versions = "*" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "triton-3.3.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fad99beafc860501d7fcc1fb7045d9496cbe2c882b1674640304949165a916e7"}, {file = "triton-3.3.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3161a2bf073d6b22c4e2f33f951f3e5e3001462b2570e6df9cd57565bdec2984"}, @@ -3808,9 +3857,10 @@ markers = {docs = "python_version == \"3.10\""} name = "typing-inspection" version = "0.4.0" description = "Runtime typing introspection tools" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "typing_inspection-0.4.0-py3-none-any.whl", hash = "sha256:50e72559fcd2a6367a19f7a7e610e6afcb9fac940c650290eed893d61386832f"}, {file = "typing_inspection-0.4.0.tar.gz", hash = "sha256:9765c87de36671694a67904bf2c96e395be9c6439bb6c87b5142569dcdd65122"}, @@ -3842,6 +3892,7 @@ files = [ {file = "urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df"}, {file = "urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d"}, ] +markers = {main = "extra == \"pdf\""} [package.extras] brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""] @@ -3917,9 +3968,10 @@ watchmedo = ["PyYAML (>=3.10)"] name = "wcwidth" version = "0.2.13" description = "Measures the displayed width of unicode strings in a terminal" -optional = false +optional = true python-versions = "*" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"}, {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"}, @@ -3929,9 +3981,10 @@ files = [ name = "websockets" version = "15.0.1" description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"pdf\"" files = [ {file = "websockets-15.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d63efaa0cd96cf0c5fe4d581521d9fa87744540d4bc999ae6e08595a1014b45b"}, {file = "websockets-15.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ac60e3b188ec7574cb761b08d50fcedf9d77f1530352db4eef1707fe9dee7205"}, @@ -4004,7 +4057,10 @@ files = [ {file = "websockets-15.0.1.tar.gz", hash = "sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee"}, ] +[extras] +pdf = ["marker-pdf"] + [metadata] lock-version = "2.1" python-versions = ">=3.10,<4" -content-hash = "5a9ea0b67b0f2a34dd8eefe66121575fbde3712aa3e3dd37591e163299c507c2" +content-hash = "5a1dee7fcc054de35ca27951eb1cba6fa330a97fcb090de354c4e7cc0741d70c" diff --git a/pyproject.toml b/pyproject.toml index e907f6c4..877ce316 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,10 @@ nltk = "^3.9.1" fuzzywuzzy = {extras = ["speedup"], version = "^0.18.0"} lxml = "^5.3.0" pandas = "^2.2.3" -marker-pdf = "^1.6.2" +marker-pdf = { version = "^1.6.2", optional = true } + +[tool.poetry.extras] +pdf = ["marker-pdf"] [tool.poetry.group.dev.dependencies] pytest = "^8.3.2" From a78349eab179586945be44b64f78432c7e5c17df Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Wed, 21 May 2025 11:31:24 +0100 Subject: [PATCH 030/125] Update readme with instructions for enabling PDF support --- README.md | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 3607dc78..74e4369e 100644 --- a/README.md +++ b/README.md @@ -22,12 +22,20 @@ The documentation for Auto-CORPus is available on our [GitHub Pages site]. ## Installation -Install with pip +Install with pip: ```sh pip install autocorpus ``` +If you want to be able to process PDF files (only available with Auto-CORPus >v1.1.0), +you will need to install (large!) additional dependencies. To install Auto-CORPUS with +PDF processing support, run: + +```sh +pip install autocorpus[pdf] +``` + ## Usage Run the below command for a single file example @@ -125,6 +133,13 @@ To get started: poetry install ``` + If you want to develop PDF functionality or run the PDF-related tests, you will need + to install additional dependencies: + + ```sh + poetry install --extras pdf + ``` + 1. Activate the virtual environment (alternatively, ensure any Python-related command is preceded by `poetry run`): ```sh From 51e5f715f02d7d404133f80beecaea9519bef073 Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Wed, 21 May 2025 13:22:29 +0100 Subject: [PATCH 031/125] Update .github/actions/setup/action.yml --- .github/actions/setup/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/setup/action.yml b/.github/actions/setup/action.yml index f20db13e..02d625ff 100644 --- a/.github/actions/setup/action.yml +++ b/.github/actions/setup/action.yml @@ -19,4 +19,4 @@ runs: - name: Install dependencies shell: bash - run: poetry install --extras pdf + run: poetry install --all-extras From afe1a1659658a74229d373423827dd1c2cedd4c4 Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Wed, 21 May 2025 13:35:07 +0100 Subject: [PATCH 032/125] Suggest --all-extras in README for development --- README.md | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 74e4369e..572f6b02 100644 --- a/README.md +++ b/README.md @@ -130,15 +130,11 @@ To get started: 1. Set up the virtual environment: ```sh - poetry install + poetry install --all-extras ``` - If you want to develop PDF functionality or run the PDF-related tests, you will need - to install additional dependencies: - - ```sh - poetry install --extras pdf - ``` + Note: The `--all-extras` flag is because of the additional dependencies required for + analysing extra file types (PDF, Word, Excel, etc). 1. Activate the virtual environment (alternatively, ensure any Python-related command is preceded by `poetry run`): From 0bc2764fa1679f2fbba85da0938115d86ce5ebc1 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Thu, 22 May 2025 15:51:19 +0100 Subject: [PATCH 033/125] Fix running Auto-CORPus from command line It seems that since #235 was merged, invoking Auto-CORPus from the command line has been broken and we just didn't notice (the regression tests have been passing). You get a confusing error message about there being no files or directories. The first problem was that `run.py` was invoking the `process_files()` method of `Autocorpus` but should have been running `process_file()` instead (seemingly it was renamed). The second one was that in some places `str`s have been replaced with `Path`s, but the code wasn't updated. I've converted the code to do the conversions in all the places where I saw errors, but there might be some I've missed. To reproduce, try running Auto-CORPus with the test data: auto-corpus -b PMC -f tests/data/PMC/Current/PMC8885717.html --- autocorpus/abbreviation.py | 8 ++++---- autocorpus/autocorpus.py | 2 +- autocorpus/bioc_documents.py | 2 +- autocorpus/run.py | 4 ++-- autocorpus/table.py | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/autocorpus/abbreviation.py b/autocorpus/abbreviation.py index 9b00ed01..c636ba88 100644 --- a/autocorpus/abbreviation.py +++ b/autocorpus/abbreviation.py @@ -398,7 +398,7 @@ def _extract_abbreviations( def _biocify_abbreviations( - abbreviations: _AbbreviationsDict, file_path: str + abbreviations: _AbbreviationsDict, file_path: Path ) -> dict[str, Any]: passages = [] for short, long in abbreviations.items(): @@ -416,8 +416,8 @@ def _biocify_abbreviations( "key": "autocorpus_abbreviations.key", "documents": [ { - "id": Path(file_path).name.partition(".")[0], - "inputfile": file_path, + "id": file_path.name.partition(".")[0], + "inputfile": str(file_path), "passages": passages, } ], @@ -425,7 +425,7 @@ def _biocify_abbreviations( def get_abbreviations( - main_text: dict[str, Any], soup: BeautifulSoup, file_path: str + main_text: dict[str, Any], soup: BeautifulSoup, file_path: Path ) -> dict[str, Any]: """Extract abbreviations from the input main text. diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py index ee161fb5..267bffec 100644 --- a/autocorpus/autocorpus.py +++ b/autocorpus/autocorpus.py @@ -138,7 +138,7 @@ def __process_html_article(self, file: Path): self.__process_html_tables(file, soup, self.config) self.main_text = self.__extract_text(soup, self.config) try: - self.abbreviations = get_abbreviations(self.main_text, soup, str(file)) + self.abbreviations = get_abbreviations(self.main_text, soup, file) except Exception as e: logger.error(e) diff --git a/autocorpus/bioc_documents.py b/autocorpus/bioc_documents.py index ee9a4d7d..a1c8e2fc 100644 --- a/autocorpus/bioc_documents.py +++ b/autocorpus/bioc_documents.py @@ -35,7 +35,7 @@ def get_formatted_bioc_document(data_store) -> dict[str, Any]: return { "id": Path(data_store.file_path).name.split(".")[0], - "inputfile": data_store.file_path, + "inputfile": str(data_store.file_path), "infons": {}, "passages": passages, "annotations": [], diff --git a/autocorpus/run.py b/autocorpus/run.py index 2744c917..3d4b7649 100644 --- a/autocorpus/run.py +++ b/autocorpus/run.py @@ -16,11 +16,11 @@ def run_autocorpus(config, structure, key, output_format): """ ac = Autocorpus( config=config, - main_text=structure[key]["main_text"], + main_text=Path(structure[key]["main_text"]), linked_tables=sorted(structure[key]["linked_tables"]), ) - ac.process_files() + ac.process_file() out_dir = Path(structure[key]["out_dir"]) if structure[key]["main_text"]: diff --git a/autocorpus/table.py b/autocorpus/table.py index 35dc42d9..1210fe11 100644 --- a/autocorpus/table.py +++ b/autocorpus/table.py @@ -279,7 +279,7 @@ def __format_table_bioc(table_json, table_identifier, file_path): formatted_identifier = table["identifier"].replace(".", "_") offset = 0 table_dict = { - "inputfile": file_path, + "inputfile": str(file_path), "id": formatted_identifier, "infons": {}, "passages": [ From a417a080dd9c5743da22a1b09779661150d43a09 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Thu, 22 May 2025 16:05:51 +0100 Subject: [PATCH 034/125] Updates based on suggested changes. --- autocorpus/bioc_supplementary.py | 6 +- autocorpus/utils.py | 38 ---------- autocorpus/word_extractor.py | 124 +++++++++++++++++++++---------- 3 files changed, 89 insertions(+), 79 deletions(-) diff --git a/autocorpus/bioc_supplementary.py b/autocorpus/bioc_supplementary.py index ed9dee95..8f96580a 100644 --- a/autocorpus/bioc_supplementary.py +++ b/autocorpus/bioc_supplementary.py @@ -208,9 +208,9 @@ def build_bioc( """Builds a BioCCollection object from the provided text, input file, and file type. Args: - text (str | list[tuple[str, bool]]): The text content to be converted. - input_file (str): The path to the input file. - file_type (str): The type of the input file ('word' or 'pdf'). + text: The text content to be converted. + input_file: The path to the input file. + file_type: The type of the input file ('word' or 'pdf'). Returns: BioCCollection: The constructed BioCCollection object. diff --git a/autocorpus/utils.py b/autocorpus/utils.py index b05f4aa4..8ee3b2f0 100644 --- a/autocorpus/utils.py +++ b/autocorpus/utils.py @@ -11,44 +11,6 @@ from lxml.html.soupparser import fromstring -def replace_unicode(text: str | list[str]) -> str | list[str]: - """Replaces specific Unicode characters in a given text. - - Args: - text: The input text to be processed. - - Returns: - The processed text with the specified Unicode characters replaced. - - Examples: - replace_unicode('\u00a0Hello\u00adWorld\u2010') # ' Hello-World-' - replace_unicode(['\u00a0Hello', '\u00adWorld']) # [' Hello', 'World'] - """ - if not text: - raise ValueError("Input text is empty or None.") - if isinstance(text, list): - clean_texts = [] - for t in text: - if t and isinstance(t, str): - clean_texts.append( - t.replace("\u00a0", " ") - .replace("\u00ad", "-") - .replace("\u2010", "-") - .replace("\u00d7", "x") - ) - else: - clean_texts.append(t) - return clean_texts - elif isinstance(text, str): - clean_text = ( - text.replace("\u00a0", " ") - .replace("\u00ad", "-") - .replace("\u2010", "-") - .replace("\u00d7", "x") - ) - return clean_text - - def get_files(base_dir, pattern=r"(.*).html"): """Recursively retrieve all PMC.html files from the directory. diff --git a/autocorpus/word_extractor.py b/autocorpus/word_extractor.py index f49fb8f6..5aaccb8b 100644 --- a/autocorpus/word_extractor.py +++ b/autocorpus/word_extractor.py @@ -8,6 +8,7 @@ import subprocess from pathlib import Path +import docx from docx import Document from autocorpus.ac_bioc.bioctable.collection import BioCTableCollection @@ -19,11 +20,11 @@ from . import logger -def __extract_tables(doc): +def __extract_tables(doc: docx.document.Document) -> list[list[list[str]]]: """Extracts tables from a .docx document. Args: - doc (docx.Document): The Document object representing the .docx document. + doc: The Document object representing the .docx document. Returns: list: A list of tables extracted from the document. Each table is represented as a nested list, @@ -37,7 +38,7 @@ def __extract_tables(doc): tables = extract_tables(doc) """ # Open the .docx file - tables = [] + tables: list[list[list[str]]] = [] # Iterate through the tables in the document for table in doc.tables: tables.append([]) @@ -47,57 +48,104 @@ def __extract_tables(doc): return tables -def __convert_older_doc_file(file: Path, output_dir: Path) -> Path | bool: - """Converts an older .doc file to .docx format using platform-specific methods.""" - operating_system = platform.system() - docx_path = Path(str(file).replace(".doc", ".docx")) - if operating_system == "Windows": +def __windows_convert_doc_to_docx(docx_path: Path, file: Path) -> Path | bool: + """Converts a .doc file to .docx format using Microsoft Word on Windows.""" + try: import win32com.client + except ImportError as e: + logger.error( + "pywin32 is required to convert Word documents on Windows. Please install it via 'pip install pywin32'." + ) + return False - word = None - try: - word = win32com.client.DispatchEx("Word.Application") - doc = word.Documents.Open(file) - doc.SaveAs(docx_path, 16) - doc.Close() - word.Quit() - return docx_path - except Exception: - return False - finally: - if word: + word = None + try: + word = win32com.client.DispatchEx("Word.Application") + doc = word.Documents.Open(str(file)) + doc.SaveAs(str(docx_path), 16) # 16 = wdFormatDocumentDefault (.docx) + doc.Close() + logger.info( + f"Successfully converted '{file}' to '{docx_path}' using Word on Windows." + ) + return docx_path + except Exception as e: + logger.exception(f"Failed to convert '{file}' on Windows: {e}") + return False + finally: + if word: + try: word.Quit() - elif operating_system == "Linux": - # Convert .doc to .docx using LibreOffice - subprocess.run( + except Exception as quit_err: + logger.warning(f"Could not quit Word application cleanly: {quit_err}") + + +def __linux_convert_doc_to_docx(docx_path: Path, file: Path) -> Path | bool: + """Converts a .doc file to .docx format using LibreOffice on Linux.""" + try: + result = subprocess.run( [ "soffice", "--headless", "--convert-to", "docx", "--outdir", - output_dir, - file, + str(docx_path.parent), + str(file), ], check=True, capture_output=True, + text=True, + ) + logger.info(f"LibreOffice output: {result.stdout}") + return docx_path + except FileNotFoundError: + logger.error( + "LibreOffice ('soffice') not found. Please install it to enable DOC to DOCX conversion." + ) + return False + except subprocess.CalledProcessError as e: + logger.exception(f"LibreOffice failed to convert '{file}': {e.stderr}") + return False + + +def __macos_convert_doc_to_docx(docx_path: Path, file: Path) -> Path | bool: + """Converts a .doc file to .docx format using AppleScript on macOS.""" + try: + applescript = f''' + tell application "Microsoft Word" + open "{file}" + save as active document file name "{docx_path}" file format format document + close active document saving no + end tell + ''' + subprocess.run(["osascript", "-e", applescript], check=True) + logger.info( + f"Successfully converted '{file}' to '{docx_path}' using Word on macOS." ) return docx_path + except FileNotFoundError: + logger.error( + "osascript not found. Ensure you have AppleScript and Microsoft Word installed on macOS." + ) + return False + except subprocess.CalledProcessError as e: + logger.exception(f"AppleScript failed to convert '{file}': {e}") + return False + + +def __convert_older_doc_file(file: Path, output_dir: Path) -> Path | bool: + """Converts an older .doc file to .docx format using platform-specific methods.""" + operating_system = platform.system() + docx_path = output_dir / file.with_suffix(".docx").name + + if operating_system == "Windows": + return __windows_convert_doc_to_docx(docx_path, file) + elif operating_system == "Linux": + return __linux_convert_doc_to_docx(docx_path, file) elif operating_system == "Darwin": # macOS - try: - # AppleScript to open the file in Word and save as .docx - applescript = f''' - tell application "Microsoft Word" - open "{file}" - save as active document file name "{docx_path}" file format format document - close active document saving no - end tell - ''' - subprocess.run(["osascript", "-e", applescript], check=True) - return docx_path - except Exception: - return False + return __macos_convert_doc_to_docx(docx_path, file) else: + logger.error(f"Unsupported operating system: {operating_system}") return False From 66ccd3df0ce521a6c4b15cd8db2ba7484584f972 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Thu, 22 May 2025 17:30:31 +0100 Subject: [PATCH 035/125] Revert "Fix running Auto-CORPus from command line" This reverts commit 0bc2764fa1679f2fbba85da0938115d86ce5ebc1. --- autocorpus/abbreviation.py | 8 ++++---- autocorpus/autocorpus.py | 2 +- autocorpus/bioc_documents.py | 2 +- autocorpus/run.py | 4 ++-- autocorpus/table.py | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/autocorpus/abbreviation.py b/autocorpus/abbreviation.py index c636ba88..9b00ed01 100644 --- a/autocorpus/abbreviation.py +++ b/autocorpus/abbreviation.py @@ -398,7 +398,7 @@ def _extract_abbreviations( def _biocify_abbreviations( - abbreviations: _AbbreviationsDict, file_path: Path + abbreviations: _AbbreviationsDict, file_path: str ) -> dict[str, Any]: passages = [] for short, long in abbreviations.items(): @@ -416,8 +416,8 @@ def _biocify_abbreviations( "key": "autocorpus_abbreviations.key", "documents": [ { - "id": file_path.name.partition(".")[0], - "inputfile": str(file_path), + "id": Path(file_path).name.partition(".")[0], + "inputfile": file_path, "passages": passages, } ], @@ -425,7 +425,7 @@ def _biocify_abbreviations( def get_abbreviations( - main_text: dict[str, Any], soup: BeautifulSoup, file_path: Path + main_text: dict[str, Any], soup: BeautifulSoup, file_path: str ) -> dict[str, Any]: """Extract abbreviations from the input main text. diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py index 267bffec..ee161fb5 100644 --- a/autocorpus/autocorpus.py +++ b/autocorpus/autocorpus.py @@ -138,7 +138,7 @@ def __process_html_article(self, file: Path): self.__process_html_tables(file, soup, self.config) self.main_text = self.__extract_text(soup, self.config) try: - self.abbreviations = get_abbreviations(self.main_text, soup, file) + self.abbreviations = get_abbreviations(self.main_text, soup, str(file)) except Exception as e: logger.error(e) diff --git a/autocorpus/bioc_documents.py b/autocorpus/bioc_documents.py index a1c8e2fc..ee9a4d7d 100644 --- a/autocorpus/bioc_documents.py +++ b/autocorpus/bioc_documents.py @@ -35,7 +35,7 @@ def get_formatted_bioc_document(data_store) -> dict[str, Any]: return { "id": Path(data_store.file_path).name.split(".")[0], - "inputfile": str(data_store.file_path), + "inputfile": data_store.file_path, "infons": {}, "passages": passages, "annotations": [], diff --git a/autocorpus/run.py b/autocorpus/run.py index 3d4b7649..2744c917 100644 --- a/autocorpus/run.py +++ b/autocorpus/run.py @@ -16,11 +16,11 @@ def run_autocorpus(config, structure, key, output_format): """ ac = Autocorpus( config=config, - main_text=Path(structure[key]["main_text"]), + main_text=structure[key]["main_text"], linked_tables=sorted(structure[key]["linked_tables"]), ) - ac.process_file() + ac.process_files() out_dir = Path(structure[key]["out_dir"]) if structure[key]["main_text"]: diff --git a/autocorpus/table.py b/autocorpus/table.py index 1210fe11..35dc42d9 100644 --- a/autocorpus/table.py +++ b/autocorpus/table.py @@ -279,7 +279,7 @@ def __format_table_bioc(table_json, table_identifier, file_path): formatted_identifier = table["identifier"].replace(".", "_") offset = 0 table_dict = { - "inputfile": str(file_path), + "inputfile": file_path, "id": formatted_identifier, "infons": {}, "passages": [ From 4c1ae3dd1557457c9c426c13d026e784798367a7 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Thu, 22 May 2025 17:35:53 +0100 Subject: [PATCH 036/125] Try fixing again, by converting `Path`s to `str`s --- autocorpus/autocorpus.py | 4 ++-- autocorpus/run.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py index ee161fb5..ea13084e 100644 --- a/autocorpus/autocorpus.py +++ b/autocorpus/autocorpus.py @@ -319,7 +319,7 @@ def process_file(self): raise RuntimeError("A valid config file must be loaded.") # handle main_text if self.file_path: - soup = self.__soupify_infile(self.file_path) + soup = self.__soupify_infile(Path(self.file_path)) self.__process_html_tables(self.file_path, soup, self.config) self.main_text = self.__extract_text(soup, self.config) try: @@ -388,7 +388,7 @@ def __init__( main_text (Path): path to the main text of the article (HTML files only) linked_tables (list): list of linked table file paths to be included in this run (HTML files only) """ - self.file_path = main_text + self.file_path = str(main_text) self.linked_tables = linked_tables self.config = config self.main_text = {} diff --git a/autocorpus/run.py b/autocorpus/run.py index 2744c917..3d4b7649 100644 --- a/autocorpus/run.py +++ b/autocorpus/run.py @@ -16,11 +16,11 @@ def run_autocorpus(config, structure, key, output_format): """ ac = Autocorpus( config=config, - main_text=structure[key]["main_text"], + main_text=Path(structure[key]["main_text"]), linked_tables=sorted(structure[key]["linked_tables"]), ) - ac.process_files() + ac.process_file() out_dir = Path(structure[key]["out_dir"]) if structure[key]["main_text"]: From 0e2937b890c4c0c22ff6ca17731d11ee2daf1c5d Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Fri, 23 May 2025 09:50:20 +0100 Subject: [PATCH 037/125] Fix: Accidental reference to `marker` outside `pdf` module Looks like I forgot to remove this reference to the `marker` package, which means that if users install Auto-CORPus without PDF support, it fail even to process non-PDF files. Remove it. --- autocorpus/autocorpus.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py index ea13084e..2662fc8f 100644 --- a/autocorpus/autocorpus.py +++ b/autocorpus/autocorpus.py @@ -5,7 +5,6 @@ from typing import Any from bs4 import BeautifulSoup -from marker.converters.pdf import PdfConverter from . import logger from .abbreviation import get_abbreviations @@ -15,8 +14,6 @@ from .table import get_table_json from .utils import handle_not_tables -pdf_converter: PdfConverter | None = None - class Autocorpus: """Parent class for all Auto-CORPus functionality.""" From b331e15ed38e023573c4401c8ebb0ae7e33f386e Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Thu, 22 May 2025 14:18:15 +0100 Subject: [PATCH 038/125] Reorganise HTML test data --- .vscode/launch.json | 4 ++-- .../{PMC/Pre-Oct-2024 => html/LEGACY_PMC}/PMC8885717.html | 0 .../LEGACY_PMC}/PMC8885717_abbreviations.json | 0 .../Pre-Oct-2024 => html/LEGACY_PMC}/PMC8885717_bioc.json | 0 .../LEGACY_PMC}/PMC8885717_tables.json | 0 tests/data/{PMC/Current => html/PMC}/PMC8885717.html | 0 .../Current => html/PMC}/PMC8885717_abbreviations.json | 0 tests/data/{PMC/Current => html/PMC}/PMC8885717_bioc.json | 0 .../data/{PMC/Current => html/PMC}/PMC8885717_tables.json | 0 tests/test_regression.py | 8 +++++--- 10 files changed, 7 insertions(+), 5 deletions(-) rename tests/data/{PMC/Pre-Oct-2024 => html/LEGACY_PMC}/PMC8885717.html (100%) rename tests/data/{PMC/Pre-Oct-2024 => html/LEGACY_PMC}/PMC8885717_abbreviations.json (100%) rename tests/data/{PMC/Pre-Oct-2024 => html/LEGACY_PMC}/PMC8885717_bioc.json (100%) rename tests/data/{PMC/Pre-Oct-2024 => html/LEGACY_PMC}/PMC8885717_tables.json (100%) rename tests/data/{PMC/Current => html/PMC}/PMC8885717.html (100%) rename tests/data/{PMC/Current => html/PMC}/PMC8885717_abbreviations.json (100%) rename tests/data/{PMC/Current => html/PMC}/PMC8885717_bioc.json (100%) rename tests/data/{PMC/Current => html/PMC}/PMC8885717_tables.json (100%) diff --git a/.vscode/launch.json b/.vscode/launch.json index 77238ed0..331f8197 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -15,7 +15,7 @@ "-t", "output", "-f", - "${workspaceFolder}/tests/data/PMC/Current/PMC8885717.html" + "${workspaceFolder}/tests/data/html/PMC/PMC8885717.html" ] }, { @@ -29,7 +29,7 @@ "-t", "output", "-f", - "${workspaceFolder}/tests/data/PMC/Pre-Oct-2024/PMC8885717.html" + "${workspaceFolder}/tests/data/html/LEGACY_PMC/PMC8885717.html" ] } ] diff --git a/tests/data/PMC/Pre-Oct-2024/PMC8885717.html b/tests/data/html/LEGACY_PMC/PMC8885717.html similarity index 100% rename from tests/data/PMC/Pre-Oct-2024/PMC8885717.html rename to tests/data/html/LEGACY_PMC/PMC8885717.html diff --git a/tests/data/PMC/Pre-Oct-2024/PMC8885717_abbreviations.json b/tests/data/html/LEGACY_PMC/PMC8885717_abbreviations.json similarity index 100% rename from tests/data/PMC/Pre-Oct-2024/PMC8885717_abbreviations.json rename to tests/data/html/LEGACY_PMC/PMC8885717_abbreviations.json diff --git a/tests/data/PMC/Pre-Oct-2024/PMC8885717_bioc.json b/tests/data/html/LEGACY_PMC/PMC8885717_bioc.json similarity index 100% rename from tests/data/PMC/Pre-Oct-2024/PMC8885717_bioc.json rename to tests/data/html/LEGACY_PMC/PMC8885717_bioc.json diff --git a/tests/data/PMC/Pre-Oct-2024/PMC8885717_tables.json b/tests/data/html/LEGACY_PMC/PMC8885717_tables.json similarity index 100% rename from tests/data/PMC/Pre-Oct-2024/PMC8885717_tables.json rename to tests/data/html/LEGACY_PMC/PMC8885717_tables.json diff --git a/tests/data/PMC/Current/PMC8885717.html b/tests/data/html/PMC/PMC8885717.html similarity index 100% rename from tests/data/PMC/Current/PMC8885717.html rename to tests/data/html/PMC/PMC8885717.html diff --git a/tests/data/PMC/Current/PMC8885717_abbreviations.json b/tests/data/html/PMC/PMC8885717_abbreviations.json similarity index 100% rename from tests/data/PMC/Current/PMC8885717_abbreviations.json rename to tests/data/html/PMC/PMC8885717_abbreviations.json diff --git a/tests/data/PMC/Current/PMC8885717_bioc.json b/tests/data/html/PMC/PMC8885717_bioc.json similarity index 100% rename from tests/data/PMC/Current/PMC8885717_bioc.json rename to tests/data/html/PMC/PMC8885717_bioc.json diff --git a/tests/data/PMC/Current/PMC8885717_tables.json b/tests/data/html/PMC/PMC8885717_tables.json similarity index 100% rename from tests/data/PMC/Current/PMC8885717_tables.json rename to tests/data/html/PMC/PMC8885717_tables.json diff --git a/tests/test_regression.py b/tests/test_regression.py index f16a022a..67491258 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -12,11 +12,13 @@ @pytest.mark.parametrize( "input_file, config", [ - ("PMC/Pre-Oct-2024/PMC8885717.html", DefaultConfig.LEGACY_PMC.load_config()), - ("PMC/Current/PMC8885717.html", DefaultConfig.PMC.load_config()), + ("html/LEGACY_PMC/PMC8885717.html", DefaultConfig.LEGACY_PMC.load_config()), + ("html/PMC/PMC8885717.html", DefaultConfig.PMC.load_config()), ], ) -def test_autocorpus(data_path: Path, input_file: str, config: dict[str, Any]) -> None: +def test_regression_html( + data_path: Path, input_file: str, config: dict[str, Any] +) -> None: """A regression test for the main autoCORPus class, using the each PMC config on the AutoCORPus Paper.""" from autocorpus.autocorpus import Autocorpus From c0f85044e2898d7cb17344ff05aafe227fbcf783 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Thu, 22 May 2025 14:45:23 +0100 Subject: [PATCH 039/125] Dynamically load regression test data based on paths --- tests/test_regression.py | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/tests/test_regression.py b/tests/test_regression.py index 67491258..1d1c73d0 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -1,6 +1,7 @@ """Primary build test script used for regression testing between AC output versions.""" import json +import os from pathlib import Path from typing import Any @@ -9,12 +10,26 @@ from autocorpus.configs.default_config import DefaultConfig +def _get_html_test_data_paths(): + """Return paths to HTML test data files with appropriate DefaultConfig.""" + DATA_PATH = Path(__file__).parent / "data" + HTML_DATA_PATH = DATA_PATH / "html" + + for dir_name in os.listdir(HTML_DATA_PATH): + dir_path = HTML_DATA_PATH / dir_name + if dir_path.is_dir(): + # Assume the folder name corresponds to a DefaultConfig + config = getattr(DefaultConfig, str(dir_name)).load_config() + + for file_path in dir_path.glob("*.html"): + # The reason for converting the path to a string is so that we get the + # file path in the test name (paths don't work for some reason) + yield (str(file_path.relative_to(DATA_PATH)), config) + + @pytest.mark.parametrize( - "input_file, config", - [ - ("html/LEGACY_PMC/PMC8885717.html", DefaultConfig.LEGACY_PMC.load_config()), - ("html/PMC/PMC8885717.html", DefaultConfig.PMC.load_config()), - ], + "input_file,config", + _get_html_test_data_paths(), ) def test_regression_html( data_path: Path, input_file: str, config: dict[str, Any] @@ -22,25 +37,25 @@ def test_regression_html( """A regression test for the main autoCORPus class, using the each PMC config on the AutoCORPus Paper.""" from autocorpus.autocorpus import Autocorpus - pmc_example_path = data_path / input_file + file_path = data_path / input_file with open( - str(pmc_example_path).replace(".html", "_abbreviations.json"), encoding="utf-8" + str(file_path).replace(".html", "_abbreviations.json"), encoding="utf-8" ) as f: expected_abbreviations = json.load(f) with open( - str(pmc_example_path).replace(".html", "_bioc.json"), + str(file_path).replace(".html", "_bioc.json"), encoding="utf-8", ) as f: expected_bioc = json.load(f) with open( - str(pmc_example_path).replace(".html", "_tables.json"), + str(file_path).replace(".html", "_tables.json"), encoding="utf-8", ) as f: expected_tables = json.load(f) auto_corpus = Autocorpus( config=config, - main_text=pmc_example_path, + main_text=file_path, ) auto_corpus.process_file() From 1718abf2c15de65b68908bc7fb7b1c2fee0fd225 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Thu, 22 May 2025 14:52:20 +0100 Subject: [PATCH 040/125] Move HTML test data to 'public' subfolder --- .vscode/launch.json | 4 ++-- tests/data/{ => public}/html/LEGACY_PMC/PMC8885717.html | 0 .../html/LEGACY_PMC/PMC8885717_abbreviations.json | 0 tests/data/{ => public}/html/LEGACY_PMC/PMC8885717_bioc.json | 0 .../data/{ => public}/html/LEGACY_PMC/PMC8885717_tables.json | 0 tests/data/{ => public}/html/PMC/PMC8885717.html | 0 .../data/{ => public}/html/PMC/PMC8885717_abbreviations.json | 0 tests/data/{ => public}/html/PMC/PMC8885717_bioc.json | 0 tests/data/{ => public}/html/PMC/PMC8885717_tables.json | 0 tests/test_regression.py | 2 +- 10 files changed, 3 insertions(+), 3 deletions(-) rename tests/data/{ => public}/html/LEGACY_PMC/PMC8885717.html (100%) rename tests/data/{ => public}/html/LEGACY_PMC/PMC8885717_abbreviations.json (100%) rename tests/data/{ => public}/html/LEGACY_PMC/PMC8885717_bioc.json (100%) rename tests/data/{ => public}/html/LEGACY_PMC/PMC8885717_tables.json (100%) rename tests/data/{ => public}/html/PMC/PMC8885717.html (100%) rename tests/data/{ => public}/html/PMC/PMC8885717_abbreviations.json (100%) rename tests/data/{ => public}/html/PMC/PMC8885717_bioc.json (100%) rename tests/data/{ => public}/html/PMC/PMC8885717_tables.json (100%) diff --git a/.vscode/launch.json b/.vscode/launch.json index 331f8197..41a7047c 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -15,7 +15,7 @@ "-t", "output", "-f", - "${workspaceFolder}/tests/data/html/PMC/PMC8885717.html" + "${workspaceFolder}/tests/data/public/html/PMC/PMC8885717.html" ] }, { @@ -29,7 +29,7 @@ "-t", "output", "-f", - "${workspaceFolder}/tests/data/html/LEGACY_PMC/PMC8885717.html" + "${workspaceFolder}/tests/data/public/html/LEGACY_PMC/PMC8885717.html" ] } ] diff --git a/tests/data/html/LEGACY_PMC/PMC8885717.html b/tests/data/public/html/LEGACY_PMC/PMC8885717.html similarity index 100% rename from tests/data/html/LEGACY_PMC/PMC8885717.html rename to tests/data/public/html/LEGACY_PMC/PMC8885717.html diff --git a/tests/data/html/LEGACY_PMC/PMC8885717_abbreviations.json b/tests/data/public/html/LEGACY_PMC/PMC8885717_abbreviations.json similarity index 100% rename from tests/data/html/LEGACY_PMC/PMC8885717_abbreviations.json rename to tests/data/public/html/LEGACY_PMC/PMC8885717_abbreviations.json diff --git a/tests/data/html/LEGACY_PMC/PMC8885717_bioc.json b/tests/data/public/html/LEGACY_PMC/PMC8885717_bioc.json similarity index 100% rename from tests/data/html/LEGACY_PMC/PMC8885717_bioc.json rename to tests/data/public/html/LEGACY_PMC/PMC8885717_bioc.json diff --git a/tests/data/html/LEGACY_PMC/PMC8885717_tables.json b/tests/data/public/html/LEGACY_PMC/PMC8885717_tables.json similarity index 100% rename from tests/data/html/LEGACY_PMC/PMC8885717_tables.json rename to tests/data/public/html/LEGACY_PMC/PMC8885717_tables.json diff --git a/tests/data/html/PMC/PMC8885717.html b/tests/data/public/html/PMC/PMC8885717.html similarity index 100% rename from tests/data/html/PMC/PMC8885717.html rename to tests/data/public/html/PMC/PMC8885717.html diff --git a/tests/data/html/PMC/PMC8885717_abbreviations.json b/tests/data/public/html/PMC/PMC8885717_abbreviations.json similarity index 100% rename from tests/data/html/PMC/PMC8885717_abbreviations.json rename to tests/data/public/html/PMC/PMC8885717_abbreviations.json diff --git a/tests/data/html/PMC/PMC8885717_bioc.json b/tests/data/public/html/PMC/PMC8885717_bioc.json similarity index 100% rename from tests/data/html/PMC/PMC8885717_bioc.json rename to tests/data/public/html/PMC/PMC8885717_bioc.json diff --git a/tests/data/html/PMC/PMC8885717_tables.json b/tests/data/public/html/PMC/PMC8885717_tables.json similarity index 100% rename from tests/data/html/PMC/PMC8885717_tables.json rename to tests/data/public/html/PMC/PMC8885717_tables.json diff --git a/tests/test_regression.py b/tests/test_regression.py index 1d1c73d0..531598b0 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -13,7 +13,7 @@ def _get_html_test_data_paths(): """Return paths to HTML test data files with appropriate DefaultConfig.""" DATA_PATH = Path(__file__).parent / "data" - HTML_DATA_PATH = DATA_PATH / "html" + HTML_DATA_PATH = DATA_PATH / "public" / "html" for dir_name in os.listdir(HTML_DATA_PATH): dir_path = HTML_DATA_PATH / dir_name From de583236a0baa15a8cd214d479dc4f756a157cc7 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Thu, 22 May 2025 15:01:15 +0100 Subject: [PATCH 041/125] Add placeholder test for private HTML data Currently just skipped. --- tests/data/private/.gitkeep | 0 tests/test_regression.py | 30 +++++++++++++++++++++++++----- 2 files changed, 25 insertions(+), 5 deletions(-) create mode 100644 tests/data/private/.gitkeep diff --git a/tests/data/private/.gitkeep b/tests/data/private/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_regression.py b/tests/test_regression.py index 531598b0..0bb7a1ac 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -10,10 +10,12 @@ from autocorpus.configs.default_config import DefaultConfig -def _get_html_test_data_paths(): +def _get_html_test_data_paths(subfolder: str): """Return paths to HTML test data files with appropriate DefaultConfig.""" DATA_PATH = Path(__file__).parent / "data" - HTML_DATA_PATH = DATA_PATH / "public" / "html" + HTML_DATA_PATH = DATA_PATH / subfolder / "html" + if not HTML_DATA_PATH.exists(): + return for dir_name in os.listdir(HTML_DATA_PATH): dir_path = HTML_DATA_PATH / dir_name @@ -27,14 +29,32 @@ def _get_html_test_data_paths(): yield (str(file_path.relative_to(DATA_PATH)), config) +_private_test_data = list(_get_html_test_data_paths("private")) + + @pytest.mark.parametrize( "input_file,config", - _get_html_test_data_paths(), + _get_html_test_data_paths("public"), ) -def test_regression_html( +def test_regression_html_public( + data_path: Path, input_file: str, config: dict[str, Any] +) -> None: + """Regression test for public HTML data.""" + _run_html_regression_test(data_path, input_file, config) + + +@pytest.mark.skipif(not _private_test_data, reason="Private test data not checked out") +@pytest.mark.parametrize("input_file,config", _private_test_data) +def test_regression_html_private( + data_path: Path, input_file: str, config: dict[str, Any] +) -> None: + """Regression test for private HTML data.""" + _run_html_regression_test(data_path, input_file, config) + + +def _run_html_regression_test( data_path: Path, input_file: str, config: dict[str, Any] ) -> None: - """A regression test for the main autoCORPus class, using the each PMC config on the AutoCORPus Paper.""" from autocorpus.autocorpus import Autocorpus file_path = data_path / input_file From b4978faa2cee6ed4308a0bddacbc35cd64d24b08 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Thu, 22 May 2025 16:39:28 +0100 Subject: [PATCH 042/125] Handle test files without tables correctly --- tests/test_regression.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/tests/test_regression.py b/tests/test_regression.py index 0bb7a1ac..7252915a 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -67,11 +67,14 @@ def _run_html_regression_test( encoding="utf-8", ) as f: expected_bioc = json.load(f) - with open( - str(file_path).replace(".html", "_tables.json"), - encoding="utf-8", - ) as f: - expected_tables = json.load(f) + try: + with open( + str(file_path).replace(".html", "_tables.json"), + encoding="utf-8", + ) as f: + expected_tables = json.load(f) + except FileNotFoundError: + expected_tables = {} auto_corpus = Autocorpus( config=config, @@ -94,7 +97,10 @@ def _run_html_regression_test( ) assert abbreviations == expected_abbreviations assert bioc == expected_bioc - assert tables == expected_tables + if auto_corpus.has_tables: + assert tables == expected_tables + else: + assert not expected_tables @pytest.mark.skip_ci_macos @@ -149,6 +155,7 @@ def test_pdf_to_bioc(data_path: Path, input_file: str, config: dict[str, Any]) - def _make_reproducible(*data: dict[str, Any]) -> None: """Make output files reproducible by stripping dates and file paths.""" for d in data: - d.pop("date") - for doc in d["documents"]: - doc.pop("inputfile") + d.pop("date", None) + if docs := d.get("documents", None): + for doc in docs: + doc.pop("inputfile", None) From e827ce2a0b8d81c74114d8d12eae617dabe1c4b6 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Thu, 22 May 2025 17:15:08 +0100 Subject: [PATCH 043/125] Mark tests using known problematic files as xfail --- tests/test_regression.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/test_regression.py b/tests/test_regression.py index 7252915a..c38c5000 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -9,6 +9,18 @@ from autocorpus.configs.default_config import DefaultConfig +_KNOWN_FAILURES = [ + "PMC10790237.html", + "PMC5480070.html", + "PMC8853865.html", + "PMC9477686.html", +] +"""These files are known to fail the regression test, even though they shouldn't. + +The problem is in the `*_tables.json` files. You get different results on different runs +for reasons unknown. +""" + def _get_html_test_data_paths(subfolder: str): """Return paths to HTML test data files with appropriate DefaultConfig.""" @@ -49,6 +61,9 @@ def test_regression_html_private( data_path: Path, input_file: str, config: dict[str, Any] ) -> None: """Regression test for private HTML data.""" + if Path(input_file).name in _KNOWN_FAILURES: + pytest.xfail("Known problematic file") + _run_html_regression_test(data_path, input_file, config) From 70245e9e6ebeadb63782622a2cc12394d91200be Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Thu, 22 May 2025 17:18:15 +0100 Subject: [PATCH 044/125] Add private data repo as git submodule --- .gitmodules | 3 +++ tests/data/private | 1 + tests/data/private/.gitkeep | 0 3 files changed, 4 insertions(+) create mode 100644 .gitmodules create mode 160000 tests/data/private delete mode 100644 tests/data/private/.gitkeep diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..9635646b --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "tests/data/private"] + path = tests/data/private + url = ../Auto-CORPus-private-test-data diff --git a/tests/data/private b/tests/data/private new file mode 160000 index 00000000..43ea57de --- /dev/null +++ b/tests/data/private @@ -0,0 +1 @@ +Subproject commit 43ea57de0858c05b89d348a3dcb1000840bb025b diff --git a/tests/data/private/.gitkeep b/tests/data/private/.gitkeep deleted file mode 100644 index e69de29b..00000000 From 5d7f86f2174afad1902083f5404c8b595330fe34 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Thu, 22 May 2025 17:27:17 +0100 Subject: [PATCH 045/125] Add `pytest-xdist` to dependencies and use to parallelise tests --- poetry.lock | 63 +++++++++++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 3 ++- 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/poetry.lock b/poetry.lock index 18feb3ae..a7581cbb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -429,6 +429,21 @@ markers = {main = "python_version == \"3.10\" and extra == \"pdf\"", dev = "pyth [package.extras] test = ["pytest (>=6)"] +[[package]] +name = "execnet" +version = "2.1.1" +description = "execnet: rapid multi-Python deployment" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "execnet-2.1.1-py3-none-any.whl", hash = "sha256:26dee51f1b80cebd6d0ca8e74dd8745419761d3bef34163928cbebbdc4749fdc"}, + {file = "execnet-2.1.1.tar.gz", hash = "sha256:5189b52c6121c24feae288166ab41b32549c7e2348652736540b9e6e7d4e72e3"}, +] + +[package.extras] +testing = ["hatch", "pre-commit", "pytest", "tox"] + [[package]] name = "filelock" version = "3.18.0" @@ -2354,6 +2369,30 @@ nodeenv = ">=0.11.1" pyyaml = ">=5.1" virtualenv = ">=20.10.0" +[[package]] +name = "psutil" +version = "7.0.0" +description = "Cross-platform lib for process and system monitoring in Python. NOTE: the syntax of this script MUST be kept compatible with Python 2.7." +optional = false +python-versions = ">=3.6" +groups = ["dev"] +files = [ + {file = "psutil-7.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25"}, + {file = "psutil-7.0.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:39db632f6bb862eeccf56660871433e111b6ea58f2caea825571951d4b6aa3da"}, + {file = "psutil-7.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fcee592b4c6f146991ca55919ea3d1f8926497a713ed7faaf8225e174581e91"}, + {file = "psutil-7.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b1388a4f6875d7e2aff5c4ca1cc16c545ed41dd8bb596cefea80111db353a34"}, + {file = "psutil-7.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5f098451abc2828f7dc6b58d44b532b22f2088f4999a937557b603ce72b1993"}, + {file = "psutil-7.0.0-cp36-cp36m-win32.whl", hash = "sha256:84df4eb63e16849689f76b1ffcb36db7b8de703d1bc1fe41773db487621b6c17"}, + {file = "psutil-7.0.0-cp36-cp36m-win_amd64.whl", hash = "sha256:1e744154a6580bc968a0195fd25e80432d3afec619daf145b9e5ba16cc1d688e"}, + {file = "psutil-7.0.0-cp37-abi3-win32.whl", hash = "sha256:ba3fcef7523064a6c9da440fc4d6bd07da93ac726b5733c29027d7dc95b39d99"}, + {file = "psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553"}, + {file = "psutil-7.0.0.tar.gz", hash = "sha256:7be9c3eba38beccb6495ea33afd982a44074b78f28c434a1f51cc07fd315c456"}, +] + +[package.extras] +dev = ["abi3audit", "black (==24.10.0)", "check-manifest", "coverage", "packaging", "pylint", "pyperf", "pypinfo", "pytest", "pytest-cov", "pytest-xdist", "requests", "rstcheck", "ruff", "setuptools", "sphinx", "sphinx_rtd_theme", "toml-sort", "twine", "virtualenv", "vulture", "wheel"] +test = ["pytest", "pytest-xdist", "setuptools"] + [[package]] name = "pyasn1" version = "0.6.1" @@ -2662,6 +2701,28 @@ pytest = ">=6.2.5" [package.extras] dev = ["pre-commit", "pytest-asyncio", "tox"] +[[package]] +name = "pytest-xdist" +version = "3.6.1" +description = "pytest xdist plugin for distributed testing, most importantly across multiple CPUs" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "pytest_xdist-3.6.1-py3-none-any.whl", hash = "sha256:9ed4adfb68a016610848639bb7e02c9352d5d9f03d04809919e2dafc3be4cca7"}, + {file = "pytest_xdist-3.6.1.tar.gz", hash = "sha256:ead156a4db231eec769737f57668ef58a2084a34b2e55c4a8fa20d861107300d"}, +] + +[package.dependencies] +execnet = ">=2.1" +psutil = {version = ">=3.0", optional = true, markers = "extra == \"psutil\""} +pytest = ">=7.0.0" + +[package.extras] +psutil = ["psutil (>=3.0)"] +setproctitle = ["setproctitle"] +testing = ["filelock"] + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -4063,4 +4124,4 @@ pdf = ["marker-pdf"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<4" -content-hash = "5a1dee7fcc054de35ca27951eb1cba6fa330a97fcb090de354c4e7cc0741d70c" +content-hash = "6109db165c5237d16faef528db7295ae835d60cadbfc064a19903598334436ed" diff --git a/pyproject.toml b/pyproject.toml index 877ce316..72e07e7f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,7 @@ types-tqdm = "^4.67.0.20250319" types-jsonschema = "^4.23.0.20241208" lxml-stubs = "^0.5.1" pandas-stubs = "^2.2.3.250308" +pytest-xdist = {extras = ["psutil"], version = "^3.6.1"} [tool.poetry.group.docs.dependencies] mkdocs = "^1.6.0" @@ -84,7 +85,7 @@ module = ["nltk.*", "fuzzywuzzy.*", "bioc.*", "marker.*"] ignore_missing_imports = true [tool.pytest.ini_options] -addopts = "-v -p no:warnings --cov=autocorpus --cov-branch --cov-report=xml --cov-report=html --doctest-modules --ignore=docs/ --ignore=site/" +addopts = "-v -p no:warnings -n auto --cov=autocorpus --cov-branch --cov-report=xml --cov-report=html --doctest-modules --ignore=docs/ --ignore=site/" [tool.ruff] target-version = "py310" From b4febd8678d81c81a037d79620b5d62d471dc325 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Fri, 23 May 2025 07:42:36 +0100 Subject: [PATCH 046/125] Remove redundant conversion to `str` Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/test_regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_regression.py b/tests/test_regression.py index c38c5000..b9f23f5f 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -33,7 +33,7 @@ def _get_html_test_data_paths(subfolder: str): dir_path = HTML_DATA_PATH / dir_name if dir_path.is_dir(): # Assume the folder name corresponds to a DefaultConfig - config = getattr(DefaultConfig, str(dir_name)).load_config() + config = getattr(DefaultConfig, dir_name).load_config() for file_path in dir_path.glob("*.html"): # The reason for converting the path to a string is so that we get the From 336febabed84bb76c31d33bd22a752a108dec13b Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Fri, 23 May 2025 07:56:59 +0100 Subject: [PATCH 047/125] Use custom PAT so GitHub can access private data --- .github/workflows/ci.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a661ff09..dca28e9f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,6 +20,10 @@ jobs: python-version: ['3.10', '3.11', '3.12', '3.13'] steps: - uses: actions/checkout@v4 + with: + # Use a custom PAT so the runners can access the private submodule + token: ${{ secrets.PAT }} + submodules: true - uses: ./.github/actions/setup with: python-version: ${{ matrix.python-version }} From e3f7806fcbe7a26bf55c0ebda0b5738dbd219fa3 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Fri, 23 May 2025 08:10:17 +0100 Subject: [PATCH 048/125] Add instructions for downloading private data --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 572f6b02..5eb6636e 100644 --- a/README.md +++ b/README.md @@ -127,6 +127,14 @@ To get started: 1. [Download and install Poetry](https://python-poetry.org/docs/#installation) following the instructions for your OS. 1. Clone this repository and make it your working directory +1. (Optionally) download private test data for additional regression tests. This uses data which + cannot be redistributed publicly (only available to members of the + [omicsNLP](https://github.com/omicsNLP) organisation). + + ```sh + git submodule update --init + ``` + 1. Set up the virtual environment: ```sh From ee64f4cf9c2edfd155495108a7bf7a14be334f82 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Fri, 23 May 2025 08:44:34 +0100 Subject: [PATCH 049/125] Add readme for test data --- tests/data/README.md | 47 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 tests/data/README.md diff --git a/tests/data/README.md b/tests/data/README.md new file mode 100644 index 00000000..7728ad80 --- /dev/null +++ b/tests/data/README.md @@ -0,0 +1,47 @@ +# Test data for Auto-CORPus + +This folder contains data for regression tests. It is divided into public and private, depending on whether the files' licences permit redistribution. + +## Private data + +The private data is only available to members of the [omicsNLP organisation], though you can still run the other regression tests without it. + +[omicsNLP organisation]: https://github.com/omicsNLP + +### Downloading the data + +The data is housed in a [git submodule]. To download the data, run: + +```sh +git submodule update --init +``` + +[git submodule]: https://git-scm.com/book/en/v2/Git-Tools-Submodules + +### Adding new test data + +Data must be committed to the `main` branch of the [Auto-CORPus-private-test-data] repository and pushed, so that it is available to other developers and the GitHub runners. (You can do this directly from the submodule directory.) The data should be structured as described in the section below. + +Once you have updated the private test data repository, you will also need to update the commit that the submodule points to in the main repository (this one) before making a pull request: + +```sh +# Update submodule +cd tests/data/private +git checkout main +git pull + +# Make commit in main repo +cd ../../.. +git add tests/data/private +git commit -m "Obtain new private data" +``` + +[Auto-CORPus-private-test-data]: https://github.com/omicsNLP/Auto-CORPus-private-test-data + +## Structure of data + +The `public` and `private` subfolders are each structured in the same way. + +Currently only data for HTML tests is provided and it is in a folder called `html`. Within that folder, there are subfolders whose names **must** correspond to a [`DefaultConfig`] (e.g. `LEGACY_PMC`). The subfolders contain the test data (i.e. HTML files) along with the expected output files (i.e. JSON files generated by Auto-CORPus). If you add new test data, you must add the corresponding output files at the same time. + +[`DefaultConfig`]: https://omicsnlp.github.io/Auto-CORPus/reference/autocorpus/configs/default_config/#autocorpus.configs.default_config.DefaultConfig \ No newline at end of file From 98896c3f6b0a8be193b73699e87ae3f211d9993d Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Fri, 23 May 2025 08:47:07 +0100 Subject: [PATCH 050/125] Update private data repo --- tests/data/private | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data/private b/tests/data/private index 43ea57de..ebc5c7cd 160000 --- a/tests/data/private +++ b/tests/data/private @@ -1 +1 @@ -Subproject commit 43ea57de0858c05b89d348a3dcb1000840bb025b +Subproject commit ebc5c7cd9d70664aeb75565603075e70f046e9e0 From c2d33b73b86437ec4180cebdf6d06ed69baea415 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Fri, 23 May 2025 12:19:07 +0100 Subject: [PATCH 051/125] Review fixes/changes and altered supplementary tests to use temp directories for requested cleanliness --- .gitignore | 4 - autocorpus/bioc_supplementary.py | 21 +- autocorpus/word_extractor.py | 83 +- pyproject.toml | 4 +- .../Supplementary/Word/mmc1.doc_bioc.json | 846 ------------------ tests/test_regression.py | 95 +- 6 files changed, 119 insertions(+), 934 deletions(-) delete mode 100644 tests/data/Supplementary/Word/mmc1.doc_bioc.json diff --git a/.gitignore b/.gitignore index d7711bdf..f96a9527 100644 --- a/.gitignore +++ b/.gitignore @@ -166,7 +166,3 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ - -# Output files from running pytest locally -tests/data/Supplementary/PDF/tp-10-08-2123-coif.pdf_bioc.json -tests/data/Supplementary/PDF/tp-10-08-2123-coif.pdf_tables.json diff --git a/autocorpus/bioc_supplementary.py b/autocorpus/bioc_supplementary.py index 8f96580a..83ada665 100644 --- a/autocorpus/bioc_supplementary.py +++ b/autocorpus/bioc_supplementary.py @@ -1,6 +1,7 @@ """This module provides functionality for converting text extracted from various file types into a BioC format.""" import datetime +from dataclasses import dataclass from typing import TypeVar, cast import pandas as pd @@ -20,6 +21,14 @@ ) +@dataclass +class WordText: + """Represents a text element extracted from a Word document.""" + + text: str + is_header: bool + + def _split_text_and_tables(text: str) -> tuple[list[str], list[list[str]]]: """Splits PDF text into main text lines and raw table lines.""" lines = [x for x in text.splitlines() if x] @@ -203,7 +212,7 @@ class BioCTextConverter: @staticmethod def build_bioc( - text: str | list[tuple[str, bool]], input_file: str, file_type: str + text: str | list[WordText], input_file: str, file_type: str ) -> BioCCollection: """Builds a BioCCollection object from the provided text, input file, and file type. @@ -221,7 +230,7 @@ def build_bioc( bioc.key = "autocorpus_supplementary.key" temp_doc = BioCDocument(id="1") if file_type == "word": - text = cast(list[tuple[str, bool]], text) + text = cast(list[WordText], text) temp_doc.passages = BioCTextConverter.__identify_word_passages(text) elif file_type == "pdf": text = cast(str, text) @@ -254,12 +263,12 @@ def __identify_passages(text: str | list[str]) -> list[BioCPassage]: return passages @staticmethod - def __identify_word_passages(text: list[tuple[str, bool]]) -> list[BioCPassage]: + def __identify_word_passages(text: list[WordText]) -> list[BioCPassage]: offset = 0 passages = [] - for paragraph, is_header in text: - paragraph = paragraph.replace("\n", "") - if paragraph.isupper() or is_header: + for t in text: + paragraph = t.text.replace("\n", "") + if paragraph.isupper() or t.is_header: iao_name = "document title" iao_id = "IAO:0000305" else: diff --git a/autocorpus/word_extractor.py b/autocorpus/word_extractor.py index 5aaccb8b..c7c7a0be 100644 --- a/autocorpus/word_extractor.py +++ b/autocorpus/word_extractor.py @@ -8,55 +8,58 @@ import subprocess from pathlib import Path -import docx from docx import Document - -from autocorpus.ac_bioc.bioctable.collection import BioCTableCollection -from autocorpus.ac_bioc.bioctable.json import BioCTableJSON -from autocorpus.ac_bioc.collection import BioCCollection -from autocorpus.ac_bioc.json import BioCJSON -from autocorpus.bioc_supplementary import BioCTableConverter, BioCTextConverter +from docx.document import Document as DocumentObject +from pandas import DataFrame from . import logger +from .ac_bioc.bioctable.collection import BioCTableCollection +from .ac_bioc.bioctable.json import BioCTableJSON +from .ac_bioc.collection import BioCCollection +from .ac_bioc.json import BioCJSON +from .bioc_supplementary import ( + BioCTableConverter, + BioCTextConverter, + WordText, +) -def __extract_tables(doc: docx.document.Document) -> list[list[list[str]]]: - """Extracts tables from a .docx document. +def __extract_tables(doc: DocumentObject) -> list[DataFrame]: + """Extracts tables from a .docx document as a list of DataFrames. Args: doc: The Document object representing the .docx document. Returns: - list: A list of tables extracted from the document. Each table is represented as a nested list, - where each inner list corresponds to a row, and each element in the inner list corresponds - to the text content of a cell in the row. + List[pd.DataFrame]: A list of pandas DataFrames, each representing a table in the document. Example: from docx import Document doc = Document("document.docx") - tables = extract_tables(doc) + tables = __extract_tables(doc) """ - # Open the .docx file - tables: list[list[list[str]]] = [] - # Iterate through the tables in the document + dataframes: list[DataFrame] = [] + for table in doc.tables: - tables.append([]) - # Iterate through the rows in the table + data = [] for row in table.rows: - tables[-1].append([x.text for x in row.cells]) - return tables + data.append([cell.text.strip() for cell in row.cells]) + df = DataFrame(data) + dataframes.append(df) + return dataframes -def __windows_convert_doc_to_docx(docx_path: Path, file: Path) -> Path | bool: + +def __windows_convert_doc_to_docx(docx_path: Path, file: Path) -> Path | None: """Converts a .doc file to .docx format using Microsoft Word on Windows.""" try: import win32com.client - except ImportError as e: + except ImportError: logger.error( "pywin32 is required to convert Word documents on Windows. Please install it via 'pip install pywin32'." ) - return False + return None word = None try: @@ -70,7 +73,7 @@ def __windows_convert_doc_to_docx(docx_path: Path, file: Path) -> Path | bool: return docx_path except Exception as e: logger.exception(f"Failed to convert '{file}' on Windows: {e}") - return False + return None finally: if word: try: @@ -79,7 +82,7 @@ def __windows_convert_doc_to_docx(docx_path: Path, file: Path) -> Path | bool: logger.warning(f"Could not quit Word application cleanly: {quit_err}") -def __linux_convert_doc_to_docx(docx_path: Path, file: Path) -> Path | bool: +def __linux_convert_doc_to_docx(docx_path: Path, file: Path) -> Path | None: """Converts a .doc file to .docx format using LibreOffice on Linux.""" try: result = subprocess.run( @@ -102,19 +105,26 @@ def __linux_convert_doc_to_docx(docx_path: Path, file: Path) -> Path | bool: logger.error( "LibreOffice ('soffice') not found. Please install it to enable DOC to DOCX conversion." ) - return False + return None except subprocess.CalledProcessError as e: logger.exception(f"LibreOffice failed to convert '{file}': {e.stderr}") - return False + return None + + +def __escape_applescript_path(path: Path) -> str: + # Convert to absolute path just in case + path = path.absolute() + # Escape backslashes and double quotes for AppleScript + return str(path).replace("\\", "\\\\").replace('"', '\\"') -def __macos_convert_doc_to_docx(docx_path: Path, file: Path) -> Path | bool: +def __macos_convert_doc_to_docx(docx_path: Path, file: Path) -> Path | None: """Converts a .doc file to .docx format using AppleScript on macOS.""" try: applescript = f''' tell application "Microsoft Word" - open "{file}" - save as active document file name "{docx_path}" file format format document + open "{__escape_applescript_path(file)}" + save as active document file name "{__escape_applescript_path(docx_path)}" file format format document close active document saving no end tell ''' @@ -127,26 +137,23 @@ def __macos_convert_doc_to_docx(docx_path: Path, file: Path) -> Path | bool: logger.error( "osascript not found. Ensure you have AppleScript and Microsoft Word installed on macOS." ) - return False + return None except subprocess.CalledProcessError as e: logger.exception(f"AppleScript failed to convert '{file}': {e}") - return False + return None -def __convert_older_doc_file(file: Path, output_dir: Path) -> Path | bool: +def __convert_older_doc_file(file: Path, output_dir: Path) -> Path | None: """Converts an older .doc file to .docx format using platform-specific methods.""" operating_system = platform.system() docx_path = output_dir / file.with_suffix(".docx").name if operating_system == "Windows": return __windows_convert_doc_to_docx(docx_path, file) - elif operating_system == "Linux": - return __linux_convert_doc_to_docx(docx_path, file) elif operating_system == "Darwin": # macOS return __macos_convert_doc_to_docx(docx_path, file) else: - logger.error(f"Unsupported operating system: {operating_system}") - return False + return __linux_convert_doc_to_docx(docx_path, file) # Fallback to Linux method def extract_word_content(file_path: Path): @@ -170,7 +177,7 @@ def extract_word_content(file_path: Path): ] ) paragraphs = [ - ( + WordText( x.text, True if text_sizes diff --git a/pyproject.toml b/pyproject.toml index 244a3213..ac8fd7bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,9 +37,7 @@ lxml = "^5.3.0" pandas = "^2.2.3" marker-pdf = "^1.6.2" lxml-stubs = "^0.5.1" -pandas-stubs = "^2.2.3.250308" python-docx = "^1.1.2" -types-pywin32 = "^310.0.0.20250516" [tool.poetry.group.dev.dependencies] pytest = "^8.3.2" @@ -53,6 +51,8 @@ types-beautifulsoup4 = "^4.12.0.20250204" types-regex = "^2024.11.6.20250318" types-tqdm = "^4.67.0.20250319" types-jsonschema = "^4.23.0.20241208" +types-pywin32 = "^310.0.0.20250516" +pandas-stubs = "^2.2.3.250308" [tool.poetry.group.docs.dependencies] mkdocs = "^1.6.0" diff --git a/tests/data/Supplementary/Word/mmc1.doc_bioc.json b/tests/data/Supplementary/Word/mmc1.doc_bioc.json deleted file mode 100644 index 9908a236..00000000 --- a/tests/data/Supplementary/Word/mmc1.doc_bioc.json +++ /dev/null @@ -1,846 +0,0 @@ -{ - "source": "Auto-CORPus (supplementary)", - "date": "20250520", - "key": "autocorpus_supplementary.key", - "infons": {}, - "documents": [ - { - "id": "1", - "infons": {}, - "inputfile": "/mnt/sda2/Projects/Auto-CORPus/tests/data/Supplementary/Word/mmc1.doc", - "passages": [ - { - "offset": 0, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "The occurrence of a multidrug-resistant tuberculous retropharyngeal abscess in an immunocompetent patient: a case report", - "annotations": [], - "relations": [] - }, - { - "offset": 120, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Tiresse N4* , Oucharqui S2*,Benaissa E1, 2, Badri B4 Bssaibis F2, Maleb A3, Elouennass M1,2", - "annotations": [], - "relations": [] - }, - { - "offset": 212, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "1Epidemiology and bacterial resistance research team/BIO-INOVA Centre, Faculty of Medicine and Pharmacy (University Mohammed V), Rabat, Morocco. ", - "annotations": [], - "relations": [] - }, - { - "offset": 357, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "2Department of Bacteriology, Mohammed V Military Teaching Hospital / Faculty of Medicine and Pharmacy (University Mohammed V), Rabat, Morocco. ", - "annotations": [], - "relations": [] - }, - { - "offset": 500, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "3Laboratory of Microbiology, Mohammed VI University Hospital / Faculty of Medicine and Pharmacy (University Mohammed the first), Oujda, Morocco.", - "annotations": [], - "relations": [] - }, - { - "offset": 644, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "4Department of pneumology, Mohammed V Military Teaching Hospital / Faculty of Medicine and Pharmacy (University Mohammed V), Rabat, Morocco.", - "annotations": [], - "relations": [] - }, - { - "offset": 784, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "* Oucharqui sara and Tiresse nabil have contributed equally in the elaboration of the work.", - "annotations": [], - "relations": [] - }, - { - "offset": 875, - "infons": { - "iao_name_1": "document title", - "iao_id_1": "IAO:0000305" - }, - "text": "Corresponding author: Elmostafa Benaissa : benaissaelmostafa2@gmail.com", - "annotations": [], - "relations": [] - }, - { - "offset": 947, - "infons": { - "iao_name_1": "document title", - "iao_id_1": "IAO:0000305" - }, - "text": "Tiresse Nabil: nabil.tiresse1@gmail.com", - "annotations": [], - "relations": [] - }, - { - "offset": 986, - "infons": { - "iao_name_1": "document title", - "iao_id_1": "IAO:0000305" - }, - "text": "Oucharqui Sara: oucharqui@gmail.com", - "annotations": [], - "relations": [] - }, - { - "offset": 1021, - "infons": { - "iao_name_1": "document title", - "iao_id_1": "IAO:0000305" - }, - "text": "Elmostafa Benaissa : benaissaelmostafa2@gmail.com", - "annotations": [], - "relations": [] - }, - { - "offset": 1071, - "infons": { - "iao_name_1": "document title", - "iao_id_1": "IAO:0000305" - }, - "text": "Badri bouchra: bouchra.ba04@gmail.com", - "annotations": [], - "relations": [] - }, - { - "offset": 1108, - "infons": { - "iao_name_1": "document title", - "iao_id_1": "IAO:0000305" - }, - "text": "Bssaibis fatna: bssaibisfatna@yahoo.fr", - "annotations": [], - "relations": [] - }, - { - "offset": 1146, - "infons": { - "iao_name_1": "document title", - "iao_id_1": "IAO:0000305" - }, - "text": "Adil Maleb: maleb.adil@gmail.com", - "annotations": [], - "relations": [] - }, - { - "offset": 1178, - "infons": { - "iao_name_1": "document title", - "iao_id_1": "IAO:0000305" - }, - "text": "Mostafa Elouennass: elouennassm@yahoo.fr", - "annotations": [], - "relations": [] - }, - { - "offset": 1218, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "", - "annotations": [], - "relations": [] - }, - { - "offset": 1218, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "", - "annotations": [], - "relations": [] - }, - { - "offset": 1218, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "", - "annotations": [], - "relations": [] - }, - { - "offset": 1218, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "", - "annotations": [], - "relations": [] - }, - { - "offset": 1218, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "", - "annotations": [], - "relations": [] - }, - { - "offset": 1218, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "", - "annotations": [], - "relations": [] - }, - { - "offset": 1218, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "", - "annotations": [], - "relations": [] - }, - { - "offset": 1218, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "", - "annotations": [], - "relations": [] - }, - { - "offset": 1218, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Abstract:", - "annotations": [], - "relations": [] - }, - { - "offset": 1227, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Retropharyngeal abscess is an uncommon location of tuberculosis (TB). In this report, we describe a multidrug-resistant tuberculous retropharyngeal abscess in a 21-year-old female patient who was treated for lymph node TB for one year. CT scan revealed a large retropharyngeal abscess that was aspirated intraorally under local anesthesia. The diagnosis of TB was retained by molecular and histological study. GeneXpert MTB/ RIF (Cepheid, Sunnyvale, CA, USA),performed on the pus, showed rifampicin resistance and a first- and second-line drug resistance test using Genotype MTBDRplus VER.2 and MTBDRsl VER.1 (Hain Lifescience GmbH, Nehren, Germany) showed TB highly resistant to rifampicin, isoniazid, and aminoglycosides. Treatment is primarily medical as it combines specific antituberculous antibiotics, and aspiration for drainage of the abscess. Our patient was put on long-term 2nd line anti-TB treatment. ", - "annotations": [], - "relations": [] - }, - { - "offset": 2142, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Key words: Multidrug-resistant, Retropharyngeal abscess, GeneXpert MTB/RIF, GenoType MTBDRplus, GenoType MTBDRsl", - "annotations": [], - "relations": [] - }, - { - "offset": 2254, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "", - "annotations": [], - "relations": [] - }, - { - "offset": 2254, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Introduction", - "annotations": [], - "relations": [] - }, - { - "offset": 2266, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Tuberculous retropharyngeal abscess is not frequently reported in the literature and pre-extensive tuberculous retropharyngeal abscess is even less frequently reported [1]. Early recognition of this condition is essential to prevent serious complications. The diagnosis is difficult and relies on a combination of clinical, radiological and biological arguments. We report a case of multidrug resistant (MDR) tuberculous retropharyngeal abscess in a 21-year-old female patient treated for lymph node tuberculosis (TB) for one year and discuss the different diagnostic and therapeutic elements of this pathology, highlighting the contribution of molecular biology in the effective management of MDR extra-pulmonary TB.", - "annotations": [], - "relations": [] - }, - { - "offset": 2983, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Case report", - "annotations": [], - "relations": [] - }, - { - "offset": 2994, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "This is a 21-year-old female with a history of chronic headache for several years with Chiari decompression surgery performed in 2017 and latero-cervical adenopathy diagnosed as lymph node TB on bacteriological, molecular and histological arguments in 2019. GeneXpert MTB/RIF performed on the cervical lymph node came back positive for TB, without resistance to rifampicin. She was then treated at another institution according to the national protocol which includes quadritherapy with isoniazid, rifampicin, ethambutol and pyrazinamide for 2 months followed by bitherapy with isoniazid and rifampicin for 10 months (2RHZE/10RH). The evolution was then marked by the disappearance of the lymph nodes after one year of treatment. Six months after the end of treatment, the patient presented to the emergency room with severe headaches.", - "annotations": [], - "relations": [] - }, - { - "offset": 3830, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Otherwise, no cough, chest pain, fever, or loss of appetite was reported. The patient noted no signs of trismus or difficulty breathing. She reported no known allergies and had no history of smoking or drinking alcohol. On admission, physical examination revealed a body temperature of 36.6\u00b0C, a heart rate of 90 beats/min, and a blood pressure of 117/75 mmHg. Palpation of both sides of the neck revealed no tenderness and no lymph nodes were noted. Examination of the oral cavity revealed no pathologic findings, and no posterior pharyngeal wall projections were observed. The lungs were clear on auscultation and no neurologic deficits were noted on initial clinical examination. ", - "annotations": [], - "relations": [] - }, - { - "offset": 4514, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "The biological workup showed hemoglobin at 12.6 g/l; white blood cell count at 4.8 G/l; and C-reactive protein at 0.8 mg/l. In addition, serologies for human immunodeficiency virus (HIV), hepatitis B, and hepatitis C were negative.", - "annotations": [], - "relations": [] - }, - { - "offset": 4745, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "A cerebral CT scan performed as part of the etiological diagnosis fortuitously revealed a peripherally enhanced collection in the retropharyngeal area after injection of contrast medium measuring 19x21 mm, associated with an adjacent necrotic adenopathy measuring 10x06 mm. (figure 1).", - "annotations": [], - "relations": [] - }, - { - "offset": 5030, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "", - "annotations": [], - "relations": [] - }, - { - "offset": 5030, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Figure 1: Sagittal CT scan revealed a peripherally enhanced collection in the retropharyngeal area after injection of contrast medium.", - "annotations": [], - "relations": [] - }, - { - "offset": 5164, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "", - "annotations": [], - "relations": [] - }, - { - "offset": 5164, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "A cervical MRI was realized later and confirmed the presence of the retropharyngeal collection. (figure 2)", - "annotations": [], - "relations": [] - }, - { - "offset": 5270, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "", - "annotations": [], - "relations": [] - }, - { - "offset": 5270, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "", - "annotations": [], - "relations": [] - }, - { - "offset": 5270, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Figure 2: Sagittal MRI revealed the presence of the retropharyngeal collection", - "annotations": [], - "relations": [] - }, - { - "offset": 5348, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "", - "annotations": [], - "relations": [] - }, - { - "offset": 5348, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "The abscess was drained under local anesthesia. 02 milliliters of pus were aspirated. The specimen was sent for bacteriological analysis for Mycobacterium tuberculosis complex (MTC) and banal germs as well as for pathological study. A molecular study using GeneXpert MTB/RIF (Cepheid, Sunnyvale, CA, USA) resulted in detection of MTC with detection of rifampicin resistance in less than 2 hours. In response to this rifampicin resistance, we performed other molecular tests, including GenoType MTBDRplus VER. 2 and GenoType MTBDRsl VER.1 (Hain Lifescience GmbH, Nehren, Germany) on the pus to confirm rifampicin resistance and also to investigate resistance to other anti-TB drugs. It should be noted that this technique is not validated on extrapulmonary specimens directly, although many studies have showed a good correlation with the usual resistance screening methods. The MTBDRplus VER. 2 showed resistance to both rifampicin and isoniazid, while MTBDRsl VER.1 showed resistance only to aminoglycosides. ", - "annotations": [], - "relations": [] - }, - { - "offset": 6360, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Direct examination after special Ziehl-Nielsen staining was positive and cultures on Lowenstein-Jensen\u00ae (LJ) solid medium and Mycobacteria Growth Indicator Tube (MGIT\u00ae) liquid medium were positive after 32 days and 12 days respectively, thus confirming the molecular diagnosis.", - "annotations": [], - "relations": [] - }, - { - "offset": 6637, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "A treatment was initiated on the basis of molecular data. The histopathological study confirmed the molecular diagnosis by showing epithelioid and gigantocellular granulomas with caseous necrosis, without histological evidence of malignancy.", - "annotations": [], - "relations": [] - }, - { - "offset": 6878, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Subsequently, the patient was put on a long-term protocol consisting of 6 months of bedaquiline, levofloxacin, linezolid, clofazimine, and cycloserine and 12 to 14 months of levofloxacin, linezolid, clofazimine, and cycloserine.", - "annotations": [], - "relations": [] - }, - { - "offset": 7106, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "After 1 month of treatment, the antibacillary drugs appear to be well tolerated, and the patient is still being monitored.", - "annotations": [], - "relations": [] - }, - { - "offset": 7228, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Discussion", - "annotations": [], - "relations": [] - }, - { - "offset": 7238, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "", - "annotations": [], - "relations": [] - }, - { - "offset": 7238, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "TB remains a major public health problem in the world, mainly affecting developing countries [2]. Its incidence has also increased in developed countries, partly due to co-infection with HIV [2], the latter being more frequent in extra-pulmonary forms [3].", - "annotations": [], - "relations": [] - }, - { - "offset": 7494, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": " The 2019 WHO report estimates the number of new cases at 10 million and the number of deaths at 1.5 million [4]. TB usually affects the lungs (pulmonary) or sometimes other organs (extrapulmonary). Excluding laryngeal TB, TB of the head and neck is rare and constitutes 2-6% of extrapulmonary TB and 0.1-1% of all forms of TB [5]. Retropharyngeal localization is rare [1].", - "annotations": [], - "relations": [] - }, - { - "offset": 7868, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Infection of the retropharyngeal space and subsequent abscess formation are mainly due to acute bacterial infections of the head and neck region, especially in children, injury to the posterior pharyngeal wall, and forward spread of spinal TB [6].", - "annotations": [], - "relations": [] - }, - { - "offset": 8115, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Spread to the retropharyngeal space occurs via lymphatics involving persistent retropharyngeal nodes or by hematogenous spread from pulmonary or extrapulmonary sites [5]. In our patient, the retropharyngeal abscess was probably due to lymphatic dissemination from lymph node TB because radiological exploration revealed a centimetric adenopathy with a necrotic center adjacent to the retropharyngeal abscess and there was no evidence of any distant involvement that could support hematogenous, pulmonary, or other dissemination. Tuberculous retropharyngeal abscess in an immunocompetent adult is rare [6]. ", - "annotations": [], - "relations": [] - }, - { - "offset": 8721, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Drug-resistant TB represents a major challenge to national, regional and global TB control programs. Some MDR strains have developed additional resistance mechanisms to second-line antibacillaries, namely fluoroquinolones and aminoglycosides [7]. Each year, 500,000 cases of MDR-TB or rifampicin-resistant TB (RR-TB) and nearly 200,000 deaths are reported worldwide. ", - "annotations": [], - "relations": [] - }, - { - "offset": 9088, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "In 2019, the reported treatment success rate was 56% for MDR and extensively drug-resistant (XDR) TB cases and 39% for XDR-TB [4]. In Morocco, where TB remains endemic, the 2014 National TB Drug Resistance Survey found a low prevalence of MDR/XDR-TB (1% MDR-TB among new cases and 8.7% among previously treated cases) [4]. ", - "annotations": [], - "relations": [] - }, - { - "offset": 9412, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "In 2019, 235 cases of drug-resistant TB were treated in Morocco, and 1500 cumulative cases have been reported since 2012 [4]. MDR extrapulmonary localizations have rarely been described in the literature [3,7,8]. An Indian study published in 2014 reported 3 cases, including 2 lymph node localizations and 1 cervical cold abscess [3]. ", - "annotations": [], - "relations": [] - }, - { - "offset": 9747, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "MDR extrapulmonary forms are more frequent in young female subjects with a history of TB [8]. This is in accordance with our case. Another Moroccan study published in 2018 presented 7 cases of MDR extrapulmonary TB, of which 6 patients had a history of TB and 1 patient had a therapeutic failure [7]. 4 of these 7 patients had additional resistance to second-line anti-TB drugs [7].", - "annotations": [], - "relations": [] - }, - { - "offset": 10129, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": " The diagnosis of MDR in extrapulmonary forms should be made by tissue or biological fluid sampling, but this is sometimes difficult [3]. Tuberculous retropharyngeal abscess can present with variable manifestations, ranging from asymptomatic to subtle features such as odynophagia alone and neck pain, due to early stage and lesser severity of the disease, to life-threatening respiratory obstruction [6]. Our patient had only chronic headache that can be attributed to her Chiari malformation. In addition, the general condition was preserved. On throat examination, swelling due to tuberculous retropharyngeal abscess is usually located in the midline [6].", - "annotations": [], - "relations": [] - }, - { - "offset": 10787, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Radiologic imaging plays an important role in demonstrating the extent of the abscess and the involvement of surrounding structures [2,5]. CT has an accuracy of 89% and MRI is even more accurate, as it allows for better soft tissue analysis and allows for the assessment of vascular complications, including internal jugular vein thrombosis [2,5]. Both CT and MRI in our patient showed the retropharyngeal abscess. ", - "annotations": [], - "relations": [] - }, - { - "offset": 11202, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "TB was first diagnosed by direct microscopic examination and the discovery of acid-fast bacilli in the abscess aspirate using Ziehl-Neelsen stain, and then confirmed by culture, which remains the gold standard method [2]. ", - "annotations": [], - "relations": [] - }, - { - "offset": 11424, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Molecular biology has demonstrated its effectiveness even on pauci-bacillary specimens by allowing the identification and detection of resistance to anti-TB drugs through several studies. GeneXpert MTB/RIF is a rapid, automated, World Health Organization (WHO)-recommended nucleic acid amplification test that is widely used for the simultaneous detection of MTC and rifampicin resistance in pulmonary and extrapulmonary specimens. It has a sensitivity of more than 80% in cerebral spine fluid, pus and biopsy fragments [7]. In our study, GeneXpert MTB/RIF (Cepheid, Sunnyvale, CA, USA) allowed identification of MTC and detection of rifampicin resistance. ", - "annotations": [], - "relations": [] - }, - { - "offset": 12081, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "In addition to the GeneXpert MTB/RIF, there are the MTBDRplus and MTBDRsl genotype tests which allow the identification of MTC from pulmonary clinical specimens or cultivated samples. The MTBDRplus test is used to identify resistance to rifampicin and isoniazid [7]. The MTBDRsl test is designed to detect resistance to the second-line antibacillary drugs, namely aminoglycosides on the gyrA gene, fluoroquinolones on the rrs gene, and ethambutol on the embB gene [7]. The MTBDRplus test and the MTBDRsl test have a sensitivity greater than 80% for the detection of resistance to rifampicin, isoniazid, fluoroquinolones, and aminoglycosides [7]. The discovery of an additional aminoglycoside resistance makes the choice of treatment even more difficult. These tests have been shown to be effective in detecting resistance to anti-TB drugs from extrapulmonary samples, even though they are not validated on these samples. This has been reported in some studies [9, 10].", - "annotations": [], - "relations": [] - }, - { - "offset": 13049, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "In our case, the aspiration was positive by GeneXpert MTB/RIF with a detection of rifampicin resistance. The MTBDRplus test confirmed resistance to rifampicin and isoniazid and the MTBDRsl test showed additional resistance to aminoglycosides. Later on, mycobacterial culture on solid and liquid media both became positive after 32 days and 12 days respectively. Pre-ultraresistant TB (pre-XDR TB) is defined as MDR/RR-TB in addition to resistance to any fluoroquinolones (levofloxacin and moxifloxacin). ", - "annotations": [], - "relations": [] - }, - { - "offset": 13553, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Antibacillary drug resistance can be primary or secondary, primary drug resistance is defined as resistance in a patient who has never been treated for TB. Treatment with anti-TB drugs exerts selective pressure on the Mycobacterium tuberculosis population, resulting in a decrease in susceptible bacilli, an increase in drug-resistant mutants, and the emergence of drug resistance (acquired resistance). Given her previously treated lymph node TB, it seems safe to assume that our patient has acquired drug resistance. ", - "annotations": [], - "relations": [] - }, - { - "offset": 14072, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "In recent years, significant progress has been made in the rapid diagnosis of TB and drug resistance, as well as in treatment: new drugs, reduction of the age of indication for certain drugs as well as modification of the classification of drugs used to treat MDR-TB. ", - "annotations": [], - "relations": [] - }, - { - "offset": 14340, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "For MDR-TB of all forms, the WHO recommends a short regimen of 9 to 11 months, which includes a 4 to 6 month loading phase with high dose amikacin, moxifloxacin, etionamide, clofazimine, pyrazinamide, ethambutol and high dose of isoniazid. In the maintenance phase, patients are put on moxifloxacin, clofazimine, pyrazinamide and ethambutol [11]. Another recent WHO review in 2020 updated the recommendations eliminating short regimens containing injectables, replacing them with a short regimen containing bedaquiline [4]. Another WHO trial approved by the FDA in 2019 recommends the combination of bedaquiline, linezolid, and pretomanide for ultraresistant TB or XDR-TB for 9 months if the three molecules have not been taken previously [4,11].", - "annotations": [], - "relations": [] - }, - { - "offset": 15087, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "In Morocco, the short regimen has been adapted for some cases, but the old long regimen is still widely prescribed. This long regimen is based on 6 months of initial treatment with bedaquiline combined with levofloxacin, linezolid, clofazimine and cycloserine, followed by cessation of bedaquiline and maintenance of the remainder for 12 to 14 months if there is no resistance to group A and B molecules [4]. Our patient was put on a standard regimen by replacing aminoglycosides with bedaquiline. The simultaneous medical and surgical approach seems to be the best strategy for the management of tuberculous retropharyngeal abscess [3,5].", - "annotations": [], - "relations": [] - }, - { - "offset": 15726, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "As with any abscess, the mainstay of management of retro-pharyngeal tubercular abscess is drainage of the pus. Therapeutic aspiration only has been used successfully and can be repeated if necessary [2]. Anti-TB drug therapy and conservative neck stabilization should be the initial treatment if a retropharyngeal abscess is due to an extension from cervical spine TB, with a stable spine and without any neurological deficit or with minimal neurological signs [6]. If left untreated, internal jugular vein thrombosis, mediastinitis and airway obstruction are potential complications [1,2]. ", - "annotations": [], - "relations": [] - }, - { - "offset": 16317, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Clinical, bacteriological and radiological surveillance is recommended, as well as monitoring of treatment tolerance [7,11]. The prognosis of MDR pulmonary and extrapulmonary TB has been improved thanks in part to the prescription of new anti-TB drugs such as linezolid and bedaquiline. The success of the treatment is related to the number of effective molecules still available [7]. However, high mortality has been observed in patients with XDR-TB and HIV infection. This could be explained by its synergistic relationship with TB and the emergence of MDR and XDR strains [7]. The HIV serology of our patient is negative which could further improve the prognosis of her disease.", - "annotations": [], - "relations": [] - }, - { - "offset": 16998, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Conclusion", - "annotations": [], - "relations": [] - }, - { - "offset": 17008, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Retropharyngeal abscess is a recognized but rare presentation of TB. Unspecified symptoms and unusual location often lead to delayed diagnosis and treatment. Through this case, we highlight the importance of gene amplification tests in the effective and rapid management of this disease.", - "annotations": [], - "relations": [] - }, - { - "offset": 17295, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Competing interests:", - "annotations": [], - "relations": [] - }, - { - "offset": 17315, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "The authors declare no competing interest.", - "annotations": [], - "relations": [] - }, - { - "offset": 17357, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Author contributions:", - "annotations": [], - "relations": [] - }, - { - "offset": 17378, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "OS,TN and BE have been involved in drafting in the manuscript, BF, BY, CM, AM have revising the manuscript and ELM have given final approval of the version to be published.", - "annotations": [], - "relations": [] - }, - { - "offset": 17550, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "References", - "annotations": [], - "relations": [] - }, - { - "offset": 17560, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Supplemental file: ", - "annotations": [], - "relations": [] - }, - { - "offset": 17579, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "", - "annotations": [], - "relations": [] - }, - { - "offset": 17579, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Definitions:", - "annotations": [], - "relations": [] - }, - { - "offset": 17591, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "RR: is defined as isolated resistance to rifampicin.", - "annotations": [], - "relations": [] - }, - { - "offset": 17643, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "MDR: is defined as resistance to both rifampicin and isoniazid.", - "annotations": [], - "relations": [] - }, - { - "offset": 17706, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Pre-XDR: is defined as multidrug resistance (MDR/RR-TB) in addition to resistance to any fluoroquinolones (levofloxacin and moxifloxacin).", - "annotations": [], - "relations": [] - }, - { - "offset": 17844, - "infons": { - "iao_name_1": "supplementary material section", - "iao_id_1": "IAO:0000326" - }, - "text": "Ultraresistant TB or extensively drug-resistant TB (XDR-TB): is defined as multidrug resistance (MDR/RR-TB) in addition to resistance to a fluoroquinolone (levofloxacin or moxifloxacin) and at least one of bedaquiline or linezolid (or both).", - "annotations": [], - "relations": [] - } - ], - "relations": [] - } - ] -} \ No newline at end of file diff --git a/tests/test_regression.py b/tests/test_regression.py index 96ddd08d..be1f495a 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -1,11 +1,13 @@ """Primary build test script used for regression testing between AC output versions.""" import json +import shutil from pathlib import Path from typing import Any import pytest +from autocorpus.autocorpus import Autocorpus from autocorpus.configs.default_config import DefaultConfig @@ -18,8 +20,6 @@ ) def test_autocorpus(data_path: Path, input_file: str, config: dict[str, Any]) -> None: """A regression test for the main autoCORPus class, using the each PMC config on the AutoCORPus Paper.""" - from autocorpus.autocorpus import Autocorpus - pmc_example_path = data_path / input_file with open( str(pmc_example_path).replace(".html", "_abbreviations.json"), encoding="utf-8" @@ -67,38 +67,48 @@ def test_autocorpus(data_path: Path, input_file: str, config: dict[str, Any]) -> ("Supplementary/PDF/tp-10-08-2123-coif.pdf", DefaultConfig.PMC.load_config()), ], ) -def test_pdf_to_bioc(data_path: Path, input_file: str, config: dict[str, Any]) -> None: - """Test the conversion of a PDF file to a BioC format.""" - from autocorpus.autocorpus import Autocorpus +def test_pdf_to_bioc( + data_path: Path, input_file: str, config: dict[str, Any], tmp_path: Path +) -> None: + """Test the conversion of a PDF file to a BioC format using a temp directory.""" + # Original paths + original_pdf_path = data_path / input_file + expected_output_path = ( + original_pdf_path.parent / "Expected Output" / original_pdf_path.name + ) + + # Temp setup + temp_input_dir = tmp_path / "input" + temp_input_dir.mkdir() + temp_pdf_path = temp_input_dir / original_pdf_path.name + shutil.copy(original_pdf_path, temp_pdf_path) - pdf_path = data_path / input_file - expected_output = pdf_path.parent / "Expected Output" / pdf_path.name + # Load expected outputs with open( - str(expected_output).replace(".pdf", ".pdf_bioc.json"), + str(expected_output_path).replace(".pdf", ".pdf_bioc.json"), encoding="utf-8", ) as f: expected_bioc = json.load(f) with open( - str(expected_output).replace(".pdf", ".pdf_tables.json"), + str(expected_output_path).replace(".pdf", ".pdf_tables.json"), encoding="utf-8", ) as f: expected_tables = json.load(f) - ac = Autocorpus( - config=config, - ) - - ac.process_files(files=[pdf_path]) + # Process in temp dir + ac = Autocorpus(config=config) + ac.process_files(files=[temp_pdf_path]) + # Load results with open( - str(pdf_path).replace(".pdf", ".pdf_bioc.json"), + str(temp_pdf_path).replace(".pdf", ".pdf_bioc.json"), encoding="utf-8", ) as f: new_bioc = json.load(f) with open( - str(pdf_path).replace(".pdf", ".pdf_tables.json"), + str(temp_pdf_path).replace(".pdf", ".pdf_tables.json"), encoding="utf-8", ) as f: new_tables = json.load(f) @@ -116,52 +126,61 @@ def test_pdf_to_bioc(data_path: Path, input_file: str, config: dict[str, Any]) - ], ) def test_word_to_bioc( - data_path: Path, input_file: str, config: dict[str, Any], has_tables: bool + data_path: Path, + input_file: str, + config: dict[str, Any], + has_tables: bool, + tmp_path: Path, ) -> None: - """Test the conversion of a doc file to a BioC format.""" - from autocorpus.autocorpus import Autocorpus + """Test the conversion of a doc file to a BioC format using a temp directory.""" + # Original file locations + original_doc_path = data_path / input_file + expected_output_path = ( + original_doc_path.parent / "Expected Output" / original_doc_path.name + ) + + # Copy the input doc file to the temp directory + temp_input_dir = tmp_path / "input" + temp_input_dir.mkdir() + temp_doc_path = temp_input_dir / original_doc_path.name + shutil.copy(original_doc_path, temp_doc_path) - doc_path = data_path / input_file - expected_output = doc_path.parent / "Expected Output" / doc_path.name + # Load expected BioC output with open( - str(expected_output).replace(".doc", ".doc_bioc.json"), + str(expected_output_path).replace(".doc", ".doc_bioc.json"), encoding="utf-8", ) as f: expected_bioc = json.load(f) - if has_tables: - with open( - str(expected_output).replace(".doc", ".doc_tables.json"), - encoding="utf-8", - ) as f: - expected_tables = json.load(f) - - ac = Autocorpus( - config=config, - ) - - ac.process_files(files=[doc_path]) + ac = Autocorpus(config=config) + ac.process_files(files=[temp_doc_path]) # Run on temp file + # Load generated BioC output from temp dir with open( - str(doc_path).replace(".doc", ".doc_bioc.json"), + str(temp_doc_path).replace(".doc", ".doc_bioc.json"), encoding="utf-8", ) as f: new_bioc = json.load(f) if has_tables: with open( - str(doc_path).replace(".doc", ".doc_tables.json"), + str(expected_output_path).replace(".doc", ".doc_tables.json"), + encoding="utf-8", + ) as f: + expected_tables = json.load(f) + + with open( + str(temp_doc_path).replace(".doc", ".doc_tables.json"), encoding="utf-8", ) as f: new_tables = json.load(f) _make_reproducible(new_bioc, expected_bioc, new_tables, expected_tables) + assert new_tables == expected_tables else: _make_reproducible(new_bioc, expected_bioc) assert new_bioc == expected_bioc - if has_tables: - assert new_tables == expected_tables def _make_reproducible(*data: dict[str, Any]) -> None: From 58a984a2d28fb7435311549881e13548b4c54be2 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Fri, 23 May 2025 16:24:35 +0100 Subject: [PATCH 052/125] Post-merge cleanup. Removed duplicate dependency entry. Adjusted Word processing entry point to be like the merged main branch version. --- autocorpus/autocorpus.py | 2 ++ autocorpus/bioc_supplementary.py | 5 ++++- pyproject.toml | 1 - 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py index 8383f7f0..43dce306 100644 --- a/autocorpus/autocorpus.py +++ b/autocorpus/autocorpus.py @@ -292,6 +292,8 @@ def __process_supplementary_file(self, file: Path): ) raise case ".doc" | ".docx": + from .word_extractor import extract_word_content + extract_word_content(file) case _: pass diff --git a/autocorpus/bioc_supplementary.py b/autocorpus/bioc_supplementary.py index 00944e3e..6463ff86 100644 --- a/autocorpus/bioc_supplementary.py +++ b/autocorpus/bioc_supplementary.py @@ -4,6 +4,7 @@ from dataclasses import dataclass from typing import TypeVar, cast +import regex from pandas import DataFrame from .ac_bioc import ( @@ -18,6 +19,7 @@ BioCTablePassage, ) + @dataclass class WordText: """Represents a text element extracted from a Word document.""" @@ -76,7 +78,7 @@ def _parse_tables(raw_tables: list[list[str]]) -> list[DataFrame]: while len(row) < num_columns: row.append("") - df = pd.DataFrame(rows[1:], columns=rows[0]) + df = DataFrame(rows[1:], columns=rows[0]) parsed_tables.append(df) return parsed_tables @@ -89,6 +91,7 @@ def extract_table_from_pdf_text(text: str) -> tuple[str, list[DataFrame]]: text_output = "\n\n".join(main_text_lines) return text_output, tables_output + def string_replace_unicode(text: str) -> str: """Replaces specific Unicode characters with their corresponding replacements in the given text.""" return ( diff --git a/pyproject.toml b/pyproject.toml index eb1f95d5..75ad7769 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,7 +56,6 @@ types-jsonschema = "^4.23.0.20241208" types-pywin32 = "^310.0.0.20250516" lxml-stubs = "^0.5.1" pandas-stubs = "^2.2.3.250308" -lxml-stubs = "^0.5.1" [tool.poetry.group.docs.dependencies] mkdocs = "^1.6.0" From 420d5a9dada41bcb4e7d3c40393b36027eb05012 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Fri, 23 May 2025 16:45:48 +0100 Subject: [PATCH 053/125] regenerated lock file --- poetry.lock | 1939 +++++++++++++++++++++++---------------------------- 1 file changed, 889 insertions(+), 1050 deletions(-) diff --git a/poetry.lock b/poetry.lock index cc6287ff..f7535dd3 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "annotated-types" @@ -6,8 +6,6 @@ version = "0.7.0" description = "Reusable constraint types to use with typing.Annotated" optional = true python-versions = ">=3.8" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, @@ -19,8 +17,6 @@ version = "0.46.0" description = "The official Python library for the anthropic API" optional = true python-versions = ">=3.8" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "anthropic-0.46.0-py3-none-any.whl", hash = "sha256:1445ec9be78d2de7ea51b4d5acd3574e414aea97ef903d0ecbb57bec806aaa49"}, {file = "anthropic-0.46.0.tar.gz", hash = "sha256:eac3d43271d02321a57c3ca68aca84c3d58873e8e72d1433288adee2d46b745b"}, @@ -45,8 +41,6 @@ version = "4.9.0" description = "High level compatibility layer for multiple asynchronous event loop implementations" optional = true python-versions = ">=3.9" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "anyio-4.9.0-py3-none-any.whl", hash = "sha256:9f76d541cad6e36af7beb62e978876f3b41e3e04f2c1fbf0884604c0a9c4d93c"}, {file = "anyio-4.9.0.tar.gz", hash = "sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028"}, @@ -60,7 +54,7 @@ typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""} [package.extras] doc = ["Sphinx (>=8.2,<9.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx_rtd_theme"] -test = ["anyio[trio]", "blockbuster (>=1.5.23)", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1) ; python_version >= \"3.10\"", "uvloop (>=0.21) ; platform_python_implementation == \"CPython\" and platform_system != \"Windows\" and python_version < \"3.14\""] +test = ["anyio[trio]", "blockbuster (>=1.5.23)", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21)"] trio = ["trio (>=0.26.1)"] [[package]] @@ -69,19 +63,18 @@ version = "25.3.0" description = "Classes Without Boilerplate" optional = false python-versions = ">=3.8" -groups = ["dev"] files = [ {file = "attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3"}, {file = "attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b"}, ] [package.extras] -benchmark = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] -cov = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] -dev = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] +benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier"] -tests = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] -tests-mypy = ["mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\""] +tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] [[package]] name = "babel" @@ -89,14 +82,13 @@ version = "2.17.0" description = "Internationalization utilities" optional = false python-versions = ">=3.8" -groups = ["docs"] files = [ {file = "babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2"}, {file = "babel-2.17.0.tar.gz", hash = "sha256:0c54cffb19f690cdcc52a3b50bcbf71e07a808d1c80d549f2459b9d2cf0afb9d"}, ] [package.extras] -dev = ["backports.zoneinfo ; python_version < \"3.9\"", "freezegun (>=1.0,<2.0)", "jinja2 (>=3.0)", "pytest (>=6.0)", "pytest-cov", "pytz", "setuptools", "tzdata ; sys_platform == \"win32\""] +dev = ["backports.zoneinfo", "freezegun (>=1.0,<2.0)", "jinja2 (>=3.0)", "pytest (>=6.0)", "pytest-cov", "pytz", "setuptools", "tzdata"] [[package]] name = "backrefs" @@ -104,7 +96,6 @@ version = "5.8" description = "A wrapper around re and regex that adds additional back references." optional = false python-versions = ">=3.9" -groups = ["docs"] files = [ {file = "backrefs-5.8-py310-none-any.whl", hash = "sha256:c67f6638a34a5b8730812f5101376f9d41dc38c43f1fdc35cb54700f6ed4465d"}, {file = "backrefs-5.8-py311-none-any.whl", hash = "sha256:2e1c15e4af0e12e45c8701bd5da0902d326b2e200cafcd25e49d9f06d44bb61b"}, @@ -123,7 +114,6 @@ version = "4.13.4" description = "Screen-scraping library" optional = false python-versions = ">=3.7.0" -groups = ["main"] files = [ {file = "beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b"}, {file = "beautifulsoup4-4.13.4.tar.gz", hash = "sha256:dbb3c4e1ceae6aefebdaf2423247260cd062430a410e38c66f2baa50a8437195"}, @@ -146,8 +136,6 @@ version = "5.5.2" description = "Extensible memoizing collections and decorators" optional = true python-versions = ">=3.7" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a"}, {file = "cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4"}, @@ -155,16 +143,14 @@ files = [ [[package]] name = "certifi" -version = "2025.1.31" +version = "2025.4.26" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" -groups = ["main", "docs"] files = [ - {file = "certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe"}, - {file = "certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651"}, + {file = "certifi-2025.4.26-py3-none-any.whl", hash = "sha256:30350364dfe371162649852c63336a15c70c6510c2ad5015b21c2345311805f3"}, + {file = "certifi-2025.4.26.tar.gz", hash = "sha256:0a816057ea3cdefcef70270d2c515e4506bbc954f417fa5ade2021213bb8f0c6"}, ] -markers = {main = "extra == \"pdf\""} [[package]] name = "cfgv" @@ -172,7 +158,6 @@ version = "3.4.0" description = "Validate configuration and produce human readable error messages." optional = false python-versions = ">=3.8" -groups = ["main", "dev"] files = [ {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, @@ -180,117 +165,114 @@ files = [ [[package]] name = "charset-normalizer" -version = "3.4.1" +version = "3.4.2" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." optional = false python-versions = ">=3.7" -groups = ["main", "docs"] -files = [ - {file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e218488cd232553829be0664c2292d3af2eeeb94b32bea483cf79ac6a694e037"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:80ed5e856eb7f30115aaf94e4a08114ccc8813e6ed1b5efa74f9f82e8509858f"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b010a7a4fd316c3c484d482922d13044979e78d1861f0e0650423144c616a46a"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4532bff1b8421fd0a320463030c7520f56a79c9024a4e88f01c537316019005a"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d973f03c0cb71c5ed99037b870f2be986c3c05e63622c017ea9816881d2dd247"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3a3bd0dcd373514dcec91c411ddb9632c0d7d92aed7093b8c3bbb6d69ca74408"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:d9c3cdf5390dcd29aa8056d13e8e99526cda0305acc038b96b30352aff5ff2bb"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:2bdfe3ac2e1bbe5b59a1a63721eb3b95fc9b6817ae4a46debbb4e11f6232428d"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:eab677309cdb30d047996b36d34caeda1dc91149e4fdca0b1a039b3f79d9a807"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-win32.whl", hash = "sha256:c0429126cf75e16c4f0ad00ee0eae4242dc652290f940152ca8c75c3a4b6ee8f"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:9f0b8b1c6d84c8034a44893aba5e767bf9c7a211e313a9605d9c617d7083829f"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8bfa33f4f2672964266e940dd22a195989ba31669bd84629f05fab3ef4e2d125"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28bf57629c75e810b6ae989f03c0828d64d6b26a5e205535585f96093e405ed1"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f08ff5e948271dc7e18a35641d2f11a4cd8dfd5634f55228b691e62b37125eb3"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:234ac59ea147c59ee4da87a0c0f098e9c8d169f4dc2a159ef720f1a61bbe27cd"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd4ec41f914fa74ad1b8304bbc634b3de73d2a0889bd32076342a573e0779e00"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eea6ee1db730b3483adf394ea72f808b6e18cf3cb6454b4d86e04fa8c4327a12"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c96836c97b1238e9c9e3fe90844c947d5afbf4f4c92762679acfe19927d81d77"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4d86f7aff21ee58f26dcf5ae81a9addbd914115cdebcbb2217e4f0ed8982e146"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:09b5e6733cbd160dcc09589227187e242a30a49ca5cefa5a7edd3f9d19ed53fd"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:5777ee0881f9499ed0f71cc82cf873d9a0ca8af166dfa0af8ec4e675b7df48e6"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:237bdbe6159cff53b4f24f397d43c6336c6b0b42affbe857970cefbb620911c8"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-win32.whl", hash = "sha256:8417cb1f36cc0bc7eaba8ccb0e04d55f0ee52df06df3ad55259b9a323555fc8b"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:d7f50a1f8c450f3925cb367d011448c39239bb3eb4117c36a6d354794de4ce76"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-win32.whl", hash = "sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-win32.whl", hash = "sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f30bf9fd9be89ecb2360c7d94a711f00c09b976258846efe40db3d05828e8089"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:97f68b8d6831127e4787ad15e6757232e14e12060bec17091b85eb1486b91d8d"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7974a0b5ecd505609e3b19742b60cee7aa2aa2fb3151bc917e6e2646d7667dcf"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc54db6c8593ef7d4b2a331b58653356cf04f67c960f584edb7c3d8c97e8f39e"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:311f30128d7d333eebd7896965bfcfbd0065f1716ec92bd5638d7748eb6f936a"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:7d053096f67cd1241601111b698f5cad775f97ab25d81567d3f59219b5f1adbd"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:807f52c1f798eef6cf26beb819eeb8819b1622ddfeef9d0977a8502d4db6d534"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:dccbe65bd2f7f7ec22c4ff99ed56faa1e9f785482b9bbd7c717e26fd723a1d1e"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_s390x.whl", hash = "sha256:2fb9bd477fdea8684f78791a6de97a953c51831ee2981f8e4f583ff3b9d9687e"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:01732659ba9b5b873fc117534143e4feefecf3b2078b0a6a2e925271bb6f4cfa"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-win32.whl", hash = "sha256:7a4f97a081603d2050bfaffdefa5b02a9ec823f8348a572e39032caa8404a487"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:7b1bef6280950ee6c177b326508f86cad7ad4dff12454483b51d8b7d673a2c5d"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ecddf25bee22fe4fe3737a399d0d177d72bc22be6913acfab364b40bce1ba83c"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c60ca7339acd497a55b0ea5d506b2a2612afb2826560416f6894e8b5770d4a9"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b7b2d86dd06bfc2ade3312a83a5c364c7ec2e3498f8734282c6c3d4b07b346b8"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd78cfcda14a1ef52584dbb008f7ac81c1328c0f58184bf9a84c49c605002da6"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e27f48bcd0957c6d4cb9d6fa6b61d192d0b13d5ef563e5f2ae35feafc0d179c"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:01ad647cdd609225c5350561d084b42ddf732f4eeefe6e678765636791e78b9a"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:619a609aa74ae43d90ed2e89bdd784765de0a25ca761b93e196d938b8fd1dbbd"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:89149166622f4db9b4b6a449256291dc87a99ee53151c74cbd82a53c8c2f6ccd"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:7709f51f5f7c853f0fb938bcd3bc59cdfdc5203635ffd18bf354f6967ea0f824"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:345b0426edd4e18138d6528aed636de7a9ed169b4aaf9d61a8c19e39d26838ca"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0907f11d019260cdc3f94fbdb23ff9125f6b5d1039b76003b5b0ac9d6a6c9d5b"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-win32.whl", hash = "sha256:ea0d8d539afa5eb2728aa1932a988a9a7af94f18582ffae4bc10b3fbdad0626e"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:329ce159e82018d646c7ac45b01a430369d526569ec08516081727a20e9e4af4"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b97e690a2118911e39b4042088092771b4ae3fc3aa86518f84b8cf6888dbdb41"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78baa6d91634dfb69ec52a463534bc0df05dbd546209b79a3880a34487f4b84f"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1a2bc9f351a75ef49d664206d51f8e5ede9da246602dc2d2726837620ea034b2"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:75832c08354f595c760a804588b9357d34ec00ba1c940c15e31e96d902093770"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0af291f4fe114be0280cdd29d533696a77b5b49cfde5467176ecab32353395c4"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0167ddc8ab6508fe81860a57dd472b2ef4060e8d378f0cc555707126830f2537"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2a75d49014d118e4198bcee5ee0a6f25856b29b12dbf7cd012791f8a6cc5c496"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:363e2f92b0f0174b2f8238240a1a30142e3db7b957a5dd5689b0e75fb717cc78"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ab36c8eb7e454e34e60eb55ca5d241a5d18b2c6244f6827a30e451c42410b5f7"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:4c0907b1928a36d5a998d72d64d8eaa7244989f7aaaf947500d3a800c83a3fd6"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:04432ad9479fa40ec0f387795ddad4437a2b50417c69fa275e212933519ff294"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-win32.whl", hash = "sha256:3bed14e9c89dcb10e8f3a29f9ccac4955aebe93c71ae803af79265c9ca5644c5"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:49402233c892a461407c512a19435d1ce275543138294f7ef013f0b63d5d3765"}, - {file = "charset_normalizer-3.4.1-py3-none-any.whl", hash = "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85"}, - {file = "charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3"}, -] -markers = {main = "extra == \"pdf\""} +files = [ + {file = "charset_normalizer-3.4.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7c48ed483eb946e6c04ccbe02c6b4d1d48e51944b6db70f697e089c193404941"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2d318c11350e10662026ad0eb71bb51c7812fc8590825304ae0bdd4ac283acd"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9cbfacf36cb0ec2897ce0ebc5d08ca44213af24265bd56eca54bee7923c48fd6"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18dd2e350387c87dabe711b86f83c9c78af772c748904d372ade190b5c7c9d4d"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8075c35cd58273fee266c58c0c9b670947c19df5fb98e7b66710e04ad4e9ff86"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5bf4545e3b962767e5c06fe1738f951f77d27967cb2caa64c28be7c4563e162c"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7a6ab32f7210554a96cd9e33abe3ddd86732beeafc7a28e9955cdf22ffadbab0"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b33de11b92e9f75a2b545d6e9b6f37e398d86c3e9e9653c4864eb7e89c5773ef"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8755483f3c00d6c9a77f490c17e6ab0c8729e39e6390328e42521ef175380ae6"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:68a328e5f55ec37c57f19ebb1fdc56a248db2e3e9ad769919a58672958e8f366"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:21b2899062867b0e1fde9b724f8aecb1af14f2778d69aacd1a5a1853a597a5db"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-win32.whl", hash = "sha256:e8082b26888e2f8b36a042a58307d5b917ef2b1cacab921ad3323ef91901c71a"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:f69a27e45c43520f5487f27627059b64aaf160415589230992cec34c5e18a509"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:be1e352acbe3c78727a16a455126d9ff83ea2dfdcbc83148d2982305a04714c2"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa88ca0b1932e93f2d961bf3addbb2db902198dca337d88c89e1559e066e7645"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d524ba3f1581b35c03cb42beebab4a13e6cdad7b36246bd22541fa585a56cccd"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28a1005facc94196e1fb3e82a3d442a9d9110b8434fc1ded7a24a2983c9888d8"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fdb20a30fe1175ecabed17cbf7812f7b804b8a315a25f24678bcdf120a90077f"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0f5d9ed7f254402c9e7d35d2f5972c9bbea9040e99cd2861bd77dc68263277c7"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:efd387a49825780ff861998cd959767800d54f8308936b21025326de4b5a42b9"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f0aa37f3c979cf2546b73e8222bbfa3dc07a641585340179d768068e3455e544"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e70e990b2137b29dc5564715de1e12701815dacc1d056308e2b17e9095372a82"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:0c8c57f84ccfc871a48a47321cfa49ae1df56cd1d965a09abe84066f6853b9c0"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6b66f92b17849b85cad91259efc341dce9c1af48e2173bf38a85c6329f1033e5"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-win32.whl", hash = "sha256:daac4765328a919a805fa5e2720f3e94767abd632ae410a9062dff5412bae65a"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:e53efc7c7cee4c1e70661e2e112ca46a575f90ed9ae3fef200f2a25e954f4b28"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0c29de6a1a95f24b9a1aa7aefd27d2487263f00dfd55a77719b530788f75cff7"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cddf7bd982eaa998934a91f69d182aec997c6c468898efe6679af88283b498d3"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcbe676a55d7445b22c10967bceaaf0ee69407fbe0ece4d032b6eb8d4565982a"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d41c4d287cfc69060fa91cae9683eacffad989f1a10811995fa309df656ec214"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e594135de17ab3866138f496755f302b72157d115086d100c3f19370839dd3a"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cf713fe9a71ef6fd5adf7a79670135081cd4431c2943864757f0fa3a65b1fafd"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a370b3e078e418187da8c3674eddb9d983ec09445c99a3a263c2011993522981"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a955b438e62efdf7e0b7b52a64dc5c3396e2634baa62471768a64bc2adb73d5c"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:7222ffd5e4de8e57e03ce2cef95a4c43c98fcb72ad86909abdfc2c17d227fc1b"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:bee093bf902e1d8fc0ac143c88902c3dfc8941f7ea1d6a8dd2bcb786d33db03d"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dedb8adb91d11846ee08bec4c8236c8549ac721c245678282dcb06b221aab59f"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-win32.whl", hash = "sha256:db4c7bf0e07fc3b7d89ac2a5880a6a8062056801b83ff56d8464b70f65482b6c"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:5a9979887252a82fefd3d3ed2a8e3b937a7a809f65dcb1e068b090e165bbe99e"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:926ca93accd5d36ccdabd803392ddc3e03e6d4cd1cf17deff3b989ab8e9dbcf0"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eba9904b0f38a143592d9fc0e19e2df0fa2e41c3c3745554761c5f6447eedabf"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3fddb7e2c84ac87ac3a947cb4e66d143ca5863ef48e4a5ecb83bd48619e4634e"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98f862da73774290f251b9df8d11161b6cf25b599a66baf087c1ffe340e9bfd1"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c9379d65defcab82d07b2a9dfbfc2e95bc8fe0ebb1b176a3190230a3ef0e07c"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e635b87f01ebc977342e2697d05b56632f5f879a4f15955dfe8cef2448b51691"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1c95a1e2902a8b722868587c0e1184ad5c55631de5afc0eb96bc4b0d738092c0"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ef8de666d6179b009dce7bcb2ad4c4a779f113f12caf8dc77f0162c29d20490b"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:32fc0341d72e0f73f80acb0a2c94216bd704f4f0bce10aedea38f30502b271ff"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:289200a18fa698949d2b39c671c2cc7a24d44096784e76614899a7ccf2574b7b"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4a476b06fbcf359ad25d34a057b7219281286ae2477cc5ff5e3f70a246971148"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-win32.whl", hash = "sha256:aaeeb6a479c7667fbe1099af9617c83aaca22182d6cf8c53966491a0f1b7ffb7"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-win_amd64.whl", hash = "sha256:aa6af9e7d59f9c12b33ae4e9450619cf2488e2bbe9b44030905877f0b2324980"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cad5f45b3146325bb38d6855642f6fd609c3f7cad4dbaf75549bf3b904d3184"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b2680962a4848b3c4f155dc2ee64505a9c57186d0d56b43123b17ca3de18f0fa"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:36b31da18b8890a76ec181c3cf44326bf2c48e36d393ca1b72b3f484113ea344"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f4074c5a429281bf056ddd4c5d3b740ebca4d43ffffe2ef4bf4d2d05114299da"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c9e36a97bee9b86ef9a1cf7bb96747eb7a15c2f22bdb5b516434b00f2a599f02"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:1b1bde144d98e446b056ef98e59c256e9294f6b74d7af6846bf5ffdafd687a7d"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:915f3849a011c1f593ab99092f3cecfcb4d65d8feb4a64cf1bf2d22074dc0ec4"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:fb707f3e15060adf5b7ada797624a6c6e0138e2a26baa089df64c68ee98e040f"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-musllinux_1_2_s390x.whl", hash = "sha256:25a23ea5c7edc53e0f29bae2c44fcb5a1aa10591aae107f2a2b2583a9c5cbc64"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:770cab594ecf99ae64c236bc9ee3439c3f46be49796e265ce0cc8bc17b10294f"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-win32.whl", hash = "sha256:6a0289e4589e8bdfef02a80478f1dfcb14f0ab696b5a00e1f4b8a14a307a3c58"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-win_amd64.whl", hash = "sha256:6fc1f5b51fa4cecaa18f2bd7a003f3dd039dd615cd69a2afd6d3b19aed6775f2"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:76af085e67e56c8816c3ccf256ebd136def2ed9654525348cfa744b6802b69eb"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e45ba65510e2647721e35323d6ef54c7974959f6081b58d4ef5d87c60c84919a"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:046595208aae0120559a67693ecc65dd75d46f7bf687f159127046628178dc45"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:75d10d37a47afee94919c4fab4c22b9bc2a8bf7d4f46f87363bcf0573f3ff4f5"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6333b3aa5a12c26b2a4d4e7335a28f1475e0e5e17d69d55141ee3cab736f66d1"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e8323a9b031aa0393768b87f04b4164a40037fb2a3c11ac06a03ffecd3618027"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:24498ba8ed6c2e0b56d4acbf83f2d989720a93b41d712ebd4f4979660db4417b"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:844da2b5728b5ce0e32d863af26f32b5ce61bc4273a9c720a9f3aa9df73b1455"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:65c981bdbd3f57670af8b59777cbfae75364b483fa8a9f420f08094531d54a01"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:3c21d4fca343c805a52c0c78edc01e3477f6dd1ad7c47653241cf2a206d4fc58"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:dc7039885fa1baf9be153a0626e337aa7ec8bf96b0128605fb0d77788ddc1681"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-win32.whl", hash = "sha256:8272b73e1c5603666618805fe821edba66892e2870058c94c53147602eab29c7"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-win_amd64.whl", hash = "sha256:70f7172939fdf8790425ba31915bfbe8335030f05b9913d7ae00a87d4395620a"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:005fa3432484527f9732ebd315da8da8001593e2cf46a3d817669f062c3d9ed4"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e92fca20c46e9f5e1bb485887d074918b13543b1c2a1185e69bb8d17ab6236a7"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:50bf98d5e563b83cc29471fa114366e6806bc06bc7a25fd59641e41445327836"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:721c76e84fe669be19c5791da68232ca2e05ba5185575086e384352e2c309597"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82d8fd25b7f4675d0c47cf95b594d4e7b158aca33b76aa63d07186e13c0e0ab7"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3daeac64d5b371dea99714f08ffc2c208522ec6b06fbc7866a450dd446f5c0f"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:dccab8d5fa1ef9bfba0590ecf4d46df048d18ffe3eec01eeb73a42e0d9e7a8ba"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:aaf27faa992bfee0264dc1f03f4c75e9fcdda66a519db6b957a3f826e285cf12"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:eb30abc20df9ab0814b5a2524f23d75dcf83cde762c161917a2b4b7b55b1e518"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:c72fbbe68c6f32f251bdc08b8611c7b3060612236e960ef848e0a517ddbe76c5"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:982bb1e8b4ffda883b3d0a521e23abcd6fd17418f6d2c4118d257a10199c0ce3"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-win32.whl", hash = "sha256:43e0933a0eff183ee85833f341ec567c0980dae57c464d8a508e1b2ceb336471"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:d11b54acf878eef558599658b0ffca78138c8c3655cf4f3a4a673c437e67732e"}, + {file = "charset_normalizer-3.4.2-py3-none-any.whl", hash = "sha256:7f56930ab0abd1c45cd15be65cc741c28b1c9a34876ce8c17a2fa107810c0af0"}, + {file = "charset_normalizer-3.4.2.tar.gz", hash = "sha256:5baececa9ecba31eff645232d59845c07aa030f0c81ee70184a90d35099a0e63"}, +] [[package]] name = "click" -version = "8.1.8" +version = "8.2.1" description = "Composable command line interface toolkit" optional = false -python-versions = ">=3.7" -groups = ["main", "docs"] +python-versions = ">=3.10" files = [ - {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"}, - {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, + {file = "click-8.2.1-py3-none-any.whl", hash = "sha256:61a3265b914e850b85317d0b3109c7f8cd35a670f963866005d6ef1d5175a12b"}, + {file = "click-8.2.1.tar.gz", hash = "sha256:27c491cc05d968d271d5a1db13e3b5a184636d9d930f148c50b038f0d0646202"}, ] [package.dependencies] @@ -302,91 +284,92 @@ version = "0.4.6" description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -groups = ["main", "dev", "docs"] files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] -markers = {main = "platform_system == \"Windows\"", dev = "sys_platform == \"win32\""} [[package]] name = "coverage" -version = "7.7.1" +version = "7.8.2" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "coverage-7.7.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:553ba93f8e3c70e1b0031e4dfea36aba4e2b51fe5770db35e99af8dc5c5a9dfe"}, - {file = "coverage-7.7.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:44683f2556a56c9a6e673b583763096b8efbd2df022b02995609cf8e64fc8ae0"}, - {file = "coverage-7.7.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:02fad4f8faa4153db76f9246bc95c1d99f054f4e0a884175bff9155cf4f856cb"}, - {file = "coverage-7.7.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c181ceba2e6808ede1e964f7bdc77bd8c7eb62f202c63a48cc541e5ffffccb6"}, - {file = "coverage-7.7.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80b5b207a8b08c6a934b214e364cab2fa82663d4af18981a6c0a9e95f8df7602"}, - {file = "coverage-7.7.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:25fe40967717bad0ce628a0223f08a10d54c9d739e88c9cbb0f77b5959367542"}, - {file = "coverage-7.7.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:881cae0f9cbd928c9c001487bb3dcbfd0b0af3ef53ae92180878591053be0cb3"}, - {file = "coverage-7.7.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c90e9141e9221dd6fbc16a2727a5703c19443a8d9bf7d634c792fa0287cee1ab"}, - {file = "coverage-7.7.1-cp310-cp310-win32.whl", hash = "sha256:ae13ed5bf5542d7d4a0a42ff5160e07e84adc44eda65ddaa635c484ff8e55917"}, - {file = "coverage-7.7.1-cp310-cp310-win_amd64.whl", hash = "sha256:171e9977c6a5d2b2be9efc7df1126fd525ce7cad0eb9904fe692da007ba90d81"}, - {file = "coverage-7.7.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1165490be0069e34e4f99d08e9c5209c463de11b471709dfae31e2a98cbd49fd"}, - {file = "coverage-7.7.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:44af11c00fd3b19b8809487630f8a0039130d32363239dfd15238e6d37e41a48"}, - {file = "coverage-7.7.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fbba59022e7c20124d2f520842b75904c7b9f16c854233fa46575c69949fb5b9"}, - {file = "coverage-7.7.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:af94fb80e4f159f4d93fb411800448ad87b6039b0500849a403b73a0d36bb5ae"}, - {file = "coverage-7.7.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eae79f8e3501133aa0e220bbc29573910d096795882a70e6f6e6637b09522133"}, - {file = "coverage-7.7.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e33426a5e1dc7743dd54dfd11d3a6c02c5d127abfaa2edd80a6e352b58347d1a"}, - {file = "coverage-7.7.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:b559adc22486937786731dac69e57296cb9aede7e2687dfc0d2696dbd3b1eb6b"}, - {file = "coverage-7.7.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b838a91e84e1773c3436f6cc6996e000ed3ca5721799e7789be18830fad009a2"}, - {file = "coverage-7.7.1-cp311-cp311-win32.whl", hash = "sha256:2c492401bdb3a85824669d6a03f57b3dfadef0941b8541f035f83bbfc39d4282"}, - {file = "coverage-7.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:1e6f867379fd033a0eeabb1be0cffa2bd660582b8b0c9478895c509d875a9d9e"}, - {file = "coverage-7.7.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:eff187177d8016ff6addf789dcc421c3db0d014e4946c1cc3fbf697f7852459d"}, - {file = "coverage-7.7.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2444fbe1ba1889e0b29eb4d11931afa88f92dc507b7248f45be372775b3cef4f"}, - {file = "coverage-7.7.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:177d837339883c541f8524683e227adcaea581eca6bb33823a2a1fdae4c988e1"}, - {file = "coverage-7.7.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:15d54ecef1582b1d3ec6049b20d3c1a07d5e7f85335d8a3b617c9960b4f807e0"}, - {file = "coverage-7.7.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75c82b27c56478d5e1391f2e7b2e7f588d093157fa40d53fd9453a471b1191f2"}, - {file = "coverage-7.7.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:315ff74b585110ac3b7ab631e89e769d294f303c6d21302a816b3554ed4c81af"}, - {file = "coverage-7.7.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4dd532dac197d68c478480edde74fd4476c6823355987fd31d01ad9aa1e5fb59"}, - {file = "coverage-7.7.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:385618003e3d608001676bb35dc67ae3ad44c75c0395d8de5780af7bb35be6b2"}, - {file = "coverage-7.7.1-cp312-cp312-win32.whl", hash = "sha256:63306486fcb5a827449464f6211d2991f01dfa2965976018c9bab9d5e45a35c8"}, - {file = "coverage-7.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:37351dc8123c154fa05b7579fdb126b9f8b1cf42fd6f79ddf19121b7bdd4aa04"}, - {file = "coverage-7.7.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:eebd927b86761a7068a06d3699fd6c20129becf15bb44282db085921ea0f1585"}, - {file = "coverage-7.7.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2a79c4a09765d18311c35975ad2eb1ac613c0401afdd9cb1ca4110aeb5dd3c4c"}, - {file = "coverage-7.7.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b1c65a739447c5ddce5b96c0a388fd82e4bbdff7251396a70182b1d83631019"}, - {file = "coverage-7.7.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:392cc8fd2b1b010ca36840735e2a526fcbd76795a5d44006065e79868cc76ccf"}, - {file = "coverage-7.7.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9bb47cc9f07a59a451361a850cb06d20633e77a9118d05fd0f77b1864439461b"}, - {file = "coverage-7.7.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b4c144c129343416a49378e05c9451c34aae5ccf00221e4fa4f487db0816ee2f"}, - {file = "coverage-7.7.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bc96441c9d9ca12a790b5ae17d2fa6654da4b3962ea15e0eabb1b1caed094777"}, - {file = "coverage-7.7.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:3d03287eb03186256999539d98818c425c33546ab4901028c8fa933b62c35c3a"}, - {file = "coverage-7.7.1-cp313-cp313-win32.whl", hash = "sha256:8fed429c26b99641dc1f3a79179860122b22745dd9af36f29b141e178925070a"}, - {file = "coverage-7.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:092b134129a8bb940c08b2d9ceb4459af5fb3faea77888af63182e17d89e1cf1"}, - {file = "coverage-7.7.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d3154b369141c3169b8133973ac00f63fcf8d6dbcc297d788d36afbb7811e511"}, - {file = "coverage-7.7.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:264ff2bcce27a7f455b64ac0dfe097680b65d9a1a293ef902675fa8158d20b24"}, - {file = "coverage-7.7.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba8480ebe401c2f094d10a8c4209b800a9b77215b6c796d16b6ecdf665048950"}, - {file = "coverage-7.7.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:520af84febb6bb54453e7fbb730afa58c7178fd018c398a8fcd8e269a79bf96d"}, - {file = "coverage-7.7.1-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88d96127ae01ff571d465d4b0be25c123789cef88ba0879194d673fdea52f54e"}, - {file = "coverage-7.7.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:0ce92c5a9d7007d838456f4b77ea159cb628187a137e1895331e530973dcf862"}, - {file = "coverage-7.7.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:0dab4ef76d7b14f432057fdb7a0477e8bffca0ad39ace308be6e74864e632271"}, - {file = "coverage-7.7.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7e688010581dbac9cab72800e9076e16f7cccd0d89af5785b70daa11174e94de"}, - {file = "coverage-7.7.1-cp313-cp313t-win32.whl", hash = "sha256:e52eb31ae3afacdacfe50705a15b75ded67935770c460d88c215a9c0c40d0e9c"}, - {file = "coverage-7.7.1-cp313-cp313t-win_amd64.whl", hash = "sha256:a6b6b3bd121ee2ec4bd35039319f3423d0be282b9752a5ae9f18724bc93ebe7c"}, - {file = "coverage-7.7.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:34a3bf6b92e6621fc4dcdaab353e173ccb0ca9e4bfbcf7e49a0134c86c9cd303"}, - {file = "coverage-7.7.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d6874929d624d3a670f676efafbbc747f519a6121b581dd41d012109e70a5ebd"}, - {file = "coverage-7.7.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ba5ff236c87a7b7aa1441a216caf44baee14cbfbd2256d306f926d16b026578"}, - {file = "coverage-7.7.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:452735fafe8ff5918236d5fe1feac322b359e57692269c75151f9b4ee4b7e1bc"}, - {file = "coverage-7.7.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5f99a93cecf799738e211f9746dc83749b5693538fbfac279a61682ba309387"}, - {file = "coverage-7.7.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:11dd6f52c2a7ce8bf0a5f3b6e4a8eb60e157ffedc3c4b4314a41c1dfbd26ce58"}, - {file = "coverage-7.7.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:b52edb940d087e2a96e73c1523284a2e94a4e66fa2ea1e2e64dddc67173bad94"}, - {file = "coverage-7.7.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d2e73e2ac468536197e6b3ab79bc4a5c9da0f078cd78cfcc7fe27cf5d1195ef0"}, - {file = "coverage-7.7.1-cp39-cp39-win32.whl", hash = "sha256:18f544356bceef17cc55fcf859e5664f06946c1b68efcea6acdc50f8f6a6e776"}, - {file = "coverage-7.7.1-cp39-cp39-win_amd64.whl", hash = "sha256:d66ff48ab3bb6f762a153e29c0fc1eb5a62a260217bc64470d7ba602f5886d20"}, - {file = "coverage-7.7.1-pp39.pp310.pp311-none-any.whl", hash = "sha256:5b7b02e50d54be6114cc4f6a3222fec83164f7c42772ba03b520138859b5fde1"}, - {file = "coverage-7.7.1-py3-none-any.whl", hash = "sha256:822fa99dd1ac686061e1219b67868e25d9757989cf2259f735a4802497d6da31"}, - {file = "coverage-7.7.1.tar.gz", hash = "sha256:199a1272e642266b90c9f40dec7fd3d307b51bf639fa0d15980dc0b3246c1393"}, +files = [ + {file = "coverage-7.8.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bd8ec21e1443fd7a447881332f7ce9d35b8fbd2849e761bb290b584535636b0a"}, + {file = "coverage-7.8.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4c26c2396674816deaeae7ded0e2b42c26537280f8fe313335858ffff35019be"}, + {file = "coverage-7.8.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1aec326ed237e5880bfe69ad41616d333712c7937bcefc1343145e972938f9b3"}, + {file = "coverage-7.8.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5e818796f71702d7a13e50c70de2a1924f729228580bcba1607cccf32eea46e6"}, + {file = "coverage-7.8.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:546e537d9e24efc765c9c891328f30f826e3e4808e31f5d0f87c4ba12bbd1622"}, + {file = "coverage-7.8.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ab9b09a2349f58e73f8ebc06fac546dd623e23b063e5398343c5270072e3201c"}, + {file = "coverage-7.8.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:fd51355ab8a372d89fb0e6a31719e825cf8df8b6724bee942fb5b92c3f016ba3"}, + {file = "coverage-7.8.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:0774df1e093acb6c9e4d58bce7f86656aeed6c132a16e2337692c12786b32404"}, + {file = "coverage-7.8.2-cp310-cp310-win32.whl", hash = "sha256:00f2e2f2e37f47e5f54423aeefd6c32a7dbcedc033fcd3928a4f4948e8b96af7"}, + {file = "coverage-7.8.2-cp310-cp310-win_amd64.whl", hash = "sha256:145b07bea229821d51811bf15eeab346c236d523838eda395ea969d120d13347"}, + {file = "coverage-7.8.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b99058eef42e6a8dcd135afb068b3d53aff3921ce699e127602efff9956457a9"}, + {file = "coverage-7.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5feb7f2c3e6ea94d3b877def0270dff0947b8d8c04cfa34a17be0a4dc1836879"}, + {file = "coverage-7.8.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:670a13249b957bb9050fab12d86acef7bf8f6a879b9d1a883799276e0d4c674a"}, + {file = "coverage-7.8.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0bdc8bf760459a4a4187b452213e04d039990211f98644c7292adf1e471162b5"}, + {file = "coverage-7.8.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07a989c867986c2a75f158f03fdb413128aad29aca9d4dbce5fc755672d96f11"}, + {file = "coverage-7.8.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2db10dedeb619a771ef0e2949ccba7b75e33905de959c2643a4607bef2f3fb3a"}, + {file = "coverage-7.8.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e6ea7dba4e92926b7b5f0990634b78ea02f208d04af520c73a7c876d5a8d36cb"}, + {file = "coverage-7.8.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ef2f22795a7aca99fc3c84393a55a53dd18ab8c93fb431004e4d8f0774150f54"}, + {file = "coverage-7.8.2-cp311-cp311-win32.whl", hash = "sha256:641988828bc18a6368fe72355df5f1703e44411adbe49bba5644b941ce6f2e3a"}, + {file = "coverage-7.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:8ab4a51cb39dc1933ba627e0875046d150e88478dbe22ce145a68393e9652975"}, + {file = "coverage-7.8.2-cp311-cp311-win_arm64.whl", hash = "sha256:8966a821e2083c74d88cca5b7dcccc0a3a888a596a04c0b9668a891de3a0cc53"}, + {file = "coverage-7.8.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e2f6fe3654468d061942591aef56686131335b7a8325684eda85dacdf311356c"}, + {file = "coverage-7.8.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:76090fab50610798cc05241bf83b603477c40ee87acd358b66196ab0ca44ffa1"}, + {file = "coverage-7.8.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2bd0a0a5054be160777a7920b731a0570284db5142abaaf81bcbb282b8d99279"}, + {file = "coverage-7.8.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:da23ce9a3d356d0affe9c7036030b5c8f14556bd970c9b224f9c8205505e3b99"}, + {file = "coverage-7.8.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9392773cffeb8d7e042a7b15b82a414011e9d2b5fdbbd3f7e6a6b17d5e21b20"}, + {file = "coverage-7.8.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:876cbfd0b09ce09d81585d266c07a32657beb3eaec896f39484b631555be0fe2"}, + {file = "coverage-7.8.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3da9b771c98977a13fbc3830f6caa85cae6c9c83911d24cb2d218e9394259c57"}, + {file = "coverage-7.8.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9a990f6510b3292686713bfef26d0049cd63b9c7bb17e0864f133cbfd2e6167f"}, + {file = "coverage-7.8.2-cp312-cp312-win32.whl", hash = "sha256:bf8111cddd0f2b54d34e96613e7fbdd59a673f0cf5574b61134ae75b6f5a33b8"}, + {file = "coverage-7.8.2-cp312-cp312-win_amd64.whl", hash = "sha256:86a323a275e9e44cdf228af9b71c5030861d4d2610886ab920d9945672a81223"}, + {file = "coverage-7.8.2-cp312-cp312-win_arm64.whl", hash = "sha256:820157de3a589e992689ffcda8639fbabb313b323d26388d02e154164c57b07f"}, + {file = "coverage-7.8.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ea561010914ec1c26ab4188aef8b1567272ef6de096312716f90e5baa79ef8ca"}, + {file = "coverage-7.8.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cb86337a4fcdd0e598ff2caeb513ac604d2f3da6d53df2c8e368e07ee38e277d"}, + {file = "coverage-7.8.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26a4636ddb666971345541b59899e969f3b301143dd86b0ddbb570bd591f1e85"}, + {file = "coverage-7.8.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5040536cf9b13fb033f76bcb5e1e5cb3b57c4807fef37db9e0ed129c6a094257"}, + {file = "coverage-7.8.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc67994df9bcd7e0150a47ef41278b9e0a0ea187caba72414b71dc590b99a108"}, + {file = "coverage-7.8.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e6c86888fd076d9e0fe848af0a2142bf606044dc5ceee0aa9eddb56e26895a0"}, + {file = "coverage-7.8.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:684ca9f58119b8e26bef860db33524ae0365601492e86ba0b71d513f525e7050"}, + {file = "coverage-7.8.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8165584ddedb49204c4e18da083913bdf6a982bfb558632a79bdaadcdafd0d48"}, + {file = "coverage-7.8.2-cp313-cp313-win32.whl", hash = "sha256:34759ee2c65362163699cc917bdb2a54114dd06d19bab860725f94ef45a3d9b7"}, + {file = "coverage-7.8.2-cp313-cp313-win_amd64.whl", hash = "sha256:2f9bc608fbafaee40eb60a9a53dbfb90f53cc66d3d32c2849dc27cf5638a21e3"}, + {file = "coverage-7.8.2-cp313-cp313-win_arm64.whl", hash = "sha256:9fe449ee461a3b0c7105690419d0b0aba1232f4ff6d120a9e241e58a556733f7"}, + {file = "coverage-7.8.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:8369a7c8ef66bded2b6484053749ff220dbf83cba84f3398c84c51a6f748a008"}, + {file = "coverage-7.8.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:159b81df53a5fcbc7d45dae3adad554fdbde9829a994e15227b3f9d816d00b36"}, + {file = "coverage-7.8.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6fcbbd35a96192d042c691c9e0c49ef54bd7ed865846a3c9d624c30bb67ce46"}, + {file = "coverage-7.8.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:05364b9cc82f138cc86128dc4e2e1251c2981a2218bfcd556fe6b0fbaa3501be"}, + {file = "coverage-7.8.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46d532db4e5ff3979ce47d18e2fe8ecad283eeb7367726da0e5ef88e4fe64740"}, + {file = "coverage-7.8.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4000a31c34932e7e4fa0381a3d6deb43dc0c8f458e3e7ea6502e6238e10be625"}, + {file = "coverage-7.8.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:43ff5033d657cd51f83015c3b7a443287250dc14e69910577c3e03bd2e06f27b"}, + {file = "coverage-7.8.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:94316e13f0981cbbba132c1f9f365cac1d26716aaac130866ca812006f662199"}, + {file = "coverage-7.8.2-cp313-cp313t-win32.whl", hash = "sha256:3f5673888d3676d0a745c3d0e16da338c5eea300cb1f4ada9c872981265e76d8"}, + {file = "coverage-7.8.2-cp313-cp313t-win_amd64.whl", hash = "sha256:2c08b05ee8d7861e45dc5a2cc4195c8c66dca5ac613144eb6ebeaff2d502e73d"}, + {file = "coverage-7.8.2-cp313-cp313t-win_arm64.whl", hash = "sha256:1e1448bb72b387755e1ff3ef1268a06617afd94188164960dba8d0245a46004b"}, + {file = "coverage-7.8.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:496948261eaac5ac9cf43f5d0a9f6eb7a6d4cb3bedb2c5d294138142f5c18f2a"}, + {file = "coverage-7.8.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:eacd2de0d30871eff893bab0b67840a96445edcb3c8fd915e6b11ac4b2f3fa6d"}, + {file = "coverage-7.8.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b039ffddc99ad65d5078ef300e0c7eed08c270dc26570440e3ef18beb816c1ca"}, + {file = "coverage-7.8.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0e49824808d4375ede9dd84e9961a59c47f9113039f1a525e6be170aa4f5c34d"}, + {file = "coverage-7.8.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b069938961dfad881dc2f8d02b47645cd2f455d3809ba92a8a687bf513839787"}, + {file = "coverage-7.8.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:de77c3ba8bb686d1c411e78ee1b97e6e0b963fb98b1637658dd9ad2c875cf9d7"}, + {file = "coverage-7.8.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1676628065a498943bd3f64f099bb573e08cf1bc6088bbe33cf4424e0876f4b3"}, + {file = "coverage-7.8.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:8e1a26e7e50076e35f7afafde570ca2b4d7900a491174ca357d29dece5aacee7"}, + {file = "coverage-7.8.2-cp39-cp39-win32.whl", hash = "sha256:6782a12bf76fa61ad9350d5a6ef5f3f020b57f5e6305cbc663803f2ebd0f270a"}, + {file = "coverage-7.8.2-cp39-cp39-win_amd64.whl", hash = "sha256:1efa4166ba75ccefd647f2d78b64f53f14fb82622bc94c5a5cb0a622f50f1c9e"}, + {file = "coverage-7.8.2-pp39.pp310.pp311-none-any.whl", hash = "sha256:ec455eedf3ba0bbdf8f5a570012617eb305c63cb9f03428d39bf544cb2b94837"}, + {file = "coverage-7.8.2-py3-none-any.whl", hash = "sha256:726f32ee3713f7359696331a18daf0c3b3a70bb0ae71141b9d3c52be7c595e32"}, + {file = "coverage-7.8.2.tar.gz", hash = "sha256:a886d531373a1f6ff9fad2a2ba4a045b68467b779ae729ee0b3b10ac20033b27"}, ] [package.dependencies] tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} [package.extras] -toml = ["tomli ; python_full_version <= \"3.11.0a6\""] +toml = ["tomli"] [[package]] name = "distlib" @@ -394,7 +377,6 @@ version = "0.3.9" description = "Distribution utilities" optional = false python-versions = "*" -groups = ["main", "dev"] files = [ {file = "distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87"}, {file = "distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403"}, @@ -406,25 +388,35 @@ version = "1.9.0" description = "Distro - an OS platform information API" optional = true python-versions = ">=3.6" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"}, {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"}, ] +[[package]] +name = "einops" +version = "0.8.1" +description = "A new flavour of deep learning operations" +optional = true +python-versions = ">=3.8" +files = [ + {file = "einops-0.8.1-py3-none-any.whl", hash = "sha256:919387eb55330f5757c6bea9165c5ff5cfe63a642682ea788a6d472576d81737"}, + {file = "einops-0.8.1.tar.gz", hash = "sha256:de5d960a7a761225532e0f1959e5315ebeafc0cd43394732f103ca44b9837e84"}, +] + [[package]] name = "exceptiongroup" -version = "1.2.2" +version = "1.3.0" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" -groups = ["main", "dev"] files = [ - {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, - {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, + {file = "exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10"}, + {file = "exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88"}, ] -markers = {main = "python_version == \"3.10\" and extra == \"pdf\"", dev = "python_version == \"3.10\""} + +[package.dependencies] +typing-extensions = {version = ">=4.6.0", markers = "python_version < \"3.13\""} [package.extras] test = ["pytest (>=6)"] @@ -435,7 +427,6 @@ version = "3.18.0" description = "A platform independent file lock." optional = false python-versions = ">=3.9" -groups = ["main", "dev"] files = [ {file = "filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de"}, {file = "filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2"}, @@ -444,7 +435,7 @@ files = [ [package.extras] docs = ["furo (>=2024.8.6)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"] testing = ["covdefaults (>=2.3)", "coverage (>=7.6.10)", "diff-cover (>=9.2.1)", "pytest (>=8.3.4)", "pytest-asyncio (>=0.25.2)", "pytest-cov (>=6)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.28.1)"] -typing = ["typing-extensions (>=4.12.2) ; python_version < \"3.11\""] +typing = ["typing-extensions (>=4.12.2)"] [[package]] name = "filetype" @@ -452,8 +443,6 @@ version = "1.2.0" description = "Infer file type and MIME type of any file/buffer. No external dependencies." optional = true python-versions = "*" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "filetype-1.2.0-py2.py3-none-any.whl", hash = "sha256:7ce71b6880181241cf7ac8697a2f1eb6a8bd9b429f7ad6d27b8db9ba5f1c2d25"}, {file = "filetype-1.2.0.tar.gz", hash = "sha256:66b56cd6474bf41d8c54660347d37afcc3f7d1970648de365c102ef77548aadb"}, @@ -461,15 +450,13 @@ files = [ [[package]] name = "fsspec" -version = "2025.3.2" +version = "2025.5.0" description = "File-system specification" optional = true python-versions = ">=3.9" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ - {file = "fsspec-2025.3.2-py3-none-any.whl", hash = "sha256:2daf8dc3d1dfa65b6aa37748d112773a7a08416f6c70d96b264c96476ecaf711"}, - {file = "fsspec-2025.3.2.tar.gz", hash = "sha256:e52c77ef398680bbd6a98c0e628fbc469491282981209907bbc8aea76a04fdc6"}, + {file = "fsspec-2025.5.0-py3-none-any.whl", hash = "sha256:0ca253eca6b5333d8a2b8bd98c7326fe821f1f0fdbd34e1b445bddde8e804c95"}, + {file = "fsspec-2025.5.0.tar.gz", hash = "sha256:e4f4623bb6221f7407fd695cc535d1f857a077eb247580f4ada34f5dc25fd5c8"}, ] [package.extras] @@ -506,8 +493,6 @@ version = "6.3.1" description = "Fixes mojibake and other problems with Unicode, after the fact" optional = true python-versions = ">=3.9" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "ftfy-6.3.1-py3-none-any.whl", hash = "sha256:7c70eb532015cd2f9adb53f101fb6c7945988d023a085d127d1573dc49dd0083"}, {file = "ftfy-6.3.1.tar.gz", hash = "sha256:9b3c3d90f84fb267fe64d375a07b7f8912d817cf86009ae134aa03e1819506ec"}, @@ -522,7 +507,6 @@ version = "0.18.0" description = "Fuzzy string matching in python" optional = false python-versions = "*" -groups = ["main"] files = [ {file = "fuzzywuzzy-0.18.0-py2.py3-none-any.whl", hash = "sha256:928244b28db720d1e0ee7587acf660ea49d7e4c632569cad4f1cd7e68a5f0993"}, {file = "fuzzywuzzy-0.18.0.tar.gz", hash = "sha256:45016e92264780e58972dca1b3d939ac864b78437422beecebb3095f8efd00e8"}, @@ -540,7 +524,6 @@ version = "2.1.0" description = "Copy your docs directly to the gh-pages branch." optional = false python-versions = "*" -groups = ["docs"] files = [ {file = "ghp-import-2.1.0.tar.gz", hash = "sha256:9c535c4c61193c2df8871222567d7fd7e5014d835f97dc7b7439069e2413d343"}, {file = "ghp_import-2.1.0-py3-none-any.whl", hash = "sha256:8337dd7b50877f163d4c0289bc1f1c7f127550241988d568c1db512c4324a619"}, @@ -554,15 +537,13 @@ dev = ["flake8", "markdown", "twine", "wheel"] [[package]] name = "google-auth" -version = "2.39.0" +version = "2.40.2" description = "Google Authentication Library" optional = true python-versions = ">=3.7" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ - {file = "google_auth-2.39.0-py2.py3-none-any.whl", hash = "sha256:0150b6711e97fb9f52fe599f55648950cc4540015565d8fbb31be2ad6e1548a2"}, - {file = "google_auth-2.39.0.tar.gz", hash = "sha256:73222d43cdc35a3aeacbfdcaf73142a97839f10de930550d89ebfe1d0a00cde7"}, + {file = "google_auth-2.40.2-py2.py3-none-any.whl", hash = "sha256:f7e568d42eedfded58734f6a60c58321896a621f7c116c411550a4b4a13da90b"}, + {file = "google_auth-2.40.2.tar.gz", hash = "sha256:a33cde547a2134273226fa4b853883559947ebe9207521f7afc707efbf690f58"}, ] [package.dependencies] @@ -573,24 +554,22 @@ rsa = ">=3.1.4,<5" [package.extras] aiohttp = ["aiohttp (>=3.6.2,<4.0.0)", "requests (>=2.20.0,<3.0.0)"] enterprise-cert = ["cryptography", "pyopenssl"] -pyjwt = ["cryptography (<39.0.0) ; python_version < \"3.8\"", "cryptography (>=38.0.3)", "pyjwt (>=2.0)"] -pyopenssl = ["cryptography (<39.0.0) ; python_version < \"3.8\"", "cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"] +pyjwt = ["cryptography (<39.0.0)", "cryptography (>=38.0.3)", "pyjwt (>=2.0)"] +pyopenssl = ["cryptography (<39.0.0)", "cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"] reauth = ["pyu2f (>=0.1.5)"] requests = ["requests (>=2.20.0,<3.0.0)"] -testing = ["aiohttp (<3.10.0)", "aiohttp (>=3.6.2,<4.0.0)", "aioresponses", "cryptography (<39.0.0) ; python_version < \"3.8\"", "cryptography (>=38.0.3)", "flask", "freezegun", "grpcio", "mock", "oauth2client", "packaging", "pyjwt (>=2.0)", "pyopenssl (<24.3.0)", "pyopenssl (>=20.0.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-localserver", "pyu2f (>=0.1.5)", "requests (>=2.20.0,<3.0.0)", "responses", "urllib3"] +testing = ["aiohttp (<3.10.0)", "aiohttp (>=3.6.2,<4.0.0)", "aioresponses", "cryptography (<39.0.0)", "cryptography (>=38.0.3)", "flask", "freezegun", "grpcio", "mock", "oauth2client", "packaging", "pyjwt (>=2.0)", "pyopenssl (<24.3.0)", "pyopenssl (>=20.0.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-localserver", "pyu2f (>=0.1.5)", "requests (>=2.20.0,<3.0.0)", "responses", "urllib3"] urllib3 = ["packaging", "urllib3"] [[package]] name = "google-genai" -version = "1.12.1" +version = "1.16.1" description = "GenAI Python SDK" optional = true python-versions = ">=3.9" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ - {file = "google_genai-1.12.1-py3-none-any.whl", hash = "sha256:7cbc1bc029712946ce41bcf80c0eaa89eb8c09c308efbbfe30fd491f402c258a"}, - {file = "google_genai-1.12.1.tar.gz", hash = "sha256:5c7eda422360643ce602a3f6b23152470ec1039310ef40080cbe4e71237f6391"}, + {file = "google_genai-1.16.1-py3-none-any.whl", hash = "sha256:6ae5d24282244f577ca4f0d95c09f75ab29e556602c9d3531b70161e34cd2a39"}, + {file = "google_genai-1.16.1.tar.gz", hash = "sha256:4b4ed4ed781a9d61e5ce0fef1486dd3a5d7ff0a73bd76b9633d21e687ab998df"}, ] [package.dependencies] @@ -604,14 +583,13 @@ websockets = ">=13.0.0,<15.1.0" [[package]] name = "griffe" -version = "1.6.2" +version = "1.7.3" description = "Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API." optional = false python-versions = ">=3.9" -groups = ["docs"] files = [ - {file = "griffe-1.6.2-py3-none-any.whl", hash = "sha256:6399f7e663150e4278a312a8e8a14d2f3d7bd86e2ef2f8056a1058e38579c2ee"}, - {file = "griffe-1.6.2.tar.gz", hash = "sha256:3a46fa7bd83280909b63c12b9a975732a927dd97809efe5b7972290b606c5d91"}, + {file = "griffe-1.7.3-py3-none-any.whl", hash = "sha256:c6b3ee30c2f0f17f30bcdef5068d6ab7a2a4f1b8bf1a3e74b56fffd21e1c5f75"}, + {file = "griffe-1.7.3.tar.gz", hash = "sha256:52ee893c6a3a968b639ace8015bec9d36594961e156e23315c8e8e51401fa50b"}, ] [package.dependencies] @@ -623,21 +601,37 @@ version = "0.16.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" optional = true python-versions = ">=3.8" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86"}, {file = "h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1"}, ] +[[package]] +name = "hf-xet" +version = "1.1.2" +description = "Fast transfer of large files with the Hugging Face Hub." +optional = true +python-versions = ">=3.8" +files = [ + {file = "hf_xet-1.1.2-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:dfd1873fd648488c70735cb60f7728512bca0e459e61fcd107069143cd798469"}, + {file = "hf_xet-1.1.2-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:29b584983b2d977c44157d9241dcf0fd50acde0b7bff8897fe4386912330090d"}, + {file = "hf_xet-1.1.2-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b29ac84298147fe9164cc55ad994ba47399f90b5d045b0b803b99cf5f06d8ec"}, + {file = "hf_xet-1.1.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d921ba32615676e436a0d15e162331abc9ed43d440916b1d836dc27ce1546173"}, + {file = "hf_xet-1.1.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:d9b03c34e13c44893ab6e8fea18ee8d2a6878c15328dd3aabedbdd83ee9f2ed3"}, + {file = "hf_xet-1.1.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:01b18608955b3d826307d37da8bd38b28a46cd2d9908b3a3655d1363274f941a"}, + {file = "hf_xet-1.1.2-cp37-abi3-win_amd64.whl", hash = "sha256:3562902c81299b09f3582ddfb324400c6a901a2f3bc854f83556495755f4954c"}, + {file = "hf_xet-1.1.2.tar.gz", hash = "sha256:3712d6d4819d3976a1c18e36db9f503e296283f9363af818f50703506ed63da3"}, +] + +[package.extras] +tests = ["pytest"] + [[package]] name = "httpcore" version = "1.0.9" description = "A minimal low-level HTTP client." optional = true python-versions = ">=3.8" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55"}, {file = "httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8"}, @@ -659,8 +653,6 @@ version = "0.28.1" description = "The next generation HTTP client." optional = true python-versions = ">=3.8" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad"}, {file = "httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc"}, @@ -673,7 +665,7 @@ httpcore = "==1.*" idna = "*" [package.extras] -brotli = ["brotli ; platform_python_implementation == \"CPython\"", "brotlicffi ; platform_python_implementation != \"CPython\""] +brotli = ["brotli", "brotlicffi"] cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] @@ -681,20 +673,19 @@ zstd = ["zstandard (>=0.18.0)"] [[package]] name = "huggingface-hub" -version = "0.30.2" +version = "0.32.0" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" optional = true python-versions = ">=3.8.0" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ - {file = "huggingface_hub-0.30.2-py3-none-any.whl", hash = "sha256:68ff05969927058cfa41df4f2155d4bb48f5f54f719dd0390103eefa9b191e28"}, - {file = "huggingface_hub-0.30.2.tar.gz", hash = "sha256:9a7897c5b6fd9dad3168a794a8998d6378210f5b9688d0dfc180b1a228dc2466"}, + {file = "huggingface_hub-0.32.0-py3-none-any.whl", hash = "sha256:e56e94109649ce6ebdb59b4e393ee3543ec0eca2eab4f41b269e1d885c88d08c"}, + {file = "huggingface_hub-0.32.0.tar.gz", hash = "sha256:dd66c9365ea43049ec9b939bdcdb21a0051e1bd70026fc50304e4fb1bb6a15ba"}, ] [package.dependencies] filelock = "*" fsspec = ">=2023.5.0" +hf-xet = {version = ">=1.1.2,<2.0.0", markers = "platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\""} packaging = ">=20.9" pyyaml = ">=5.1" requests = "*" @@ -702,30 +693,31 @@ tqdm = ">=4.42.1" typing-extensions = ">=3.7.4.3" [package.extras] -all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio (>=4.0.0)", "jedi", "libcst (==1.4.0)", "mypy (==1.5.1)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.9.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] +all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "authlib (>=1.3.2)", "fastapi", "gradio (>=4.0.0)", "httpx", "itsdangerous", "jedi", "libcst (==1.4.0)", "mypy (==1.15.0)", "mypy (>=1.14.1,<1.15.0)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.9.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] cli = ["InquirerPy (==0.3.4)"] -dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio (>=4.0.0)", "jedi", "libcst (==1.4.0)", "mypy (==1.5.1)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.9.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] +dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "authlib (>=1.3.2)", "fastapi", "gradio (>=4.0.0)", "httpx", "itsdangerous", "jedi", "libcst (==1.4.0)", "mypy (==1.15.0)", "mypy (>=1.14.1,<1.15.0)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.9.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"] hf-transfer = ["hf-transfer (>=0.1.4)"] -hf-xet = ["hf-xet (>=0.1.4)"] +hf-xet = ["hf-xet (>=1.1.2,<2.0.0)"] inference = ["aiohttp"] -quality = ["libcst (==1.4.0)", "mypy (==1.5.1)", "ruff (>=0.9.0)"] +mcp = ["aiohttp", "mcp (>=1.8.0)", "typer"] +oauth = ["authlib (>=1.3.2)", "fastapi", "httpx", "itsdangerous"] +quality = ["libcst (==1.4.0)", "mypy (==1.15.0)", "mypy (>=1.14.1,<1.15.0)", "ruff (>=0.9.0)"] tensorflow = ["graphviz", "pydot", "tensorflow"] tensorflow-testing = ["keras (<3.0)", "tensorflow"] -testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio (>=4.0.0)", "jedi", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"] +testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "authlib (>=1.3.2)", "fastapi", "gradio (>=4.0.0)", "httpx", "itsdangerous", "jedi", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"] torch = ["safetensors[torch]", "torch"] typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)"] [[package]] name = "identify" -version = "2.6.9" +version = "2.6.10" description = "File identification library for Python" optional = false python-versions = ">=3.9" -groups = ["main", "dev"] files = [ - {file = "identify-2.6.9-py2.py3-none-any.whl", hash = "sha256:c98b4322da415a8e5a70ff6e51fbc2d2932c015532d77e9f8537b4ba7813b150"}, - {file = "identify-2.6.9.tar.gz", hash = "sha256:d40dfe3142a1421d8518e3d3985ef5ac42890683e32306ad614a29490abeb6bf"}, + {file = "identify-2.6.10-py2.py3-none-any.whl", hash = "sha256:5f34248f54136beed1a7ba6a6b5c4b6cf21ff495aac7c359e1ef831ae3b8ab25"}, + {file = "identify-2.6.10.tar.gz", hash = "sha256:45e92fd704f3da71cc3880036633f48b4b7265fd4de2b57627cb157216eb7eb8"}, ] [package.extras] @@ -737,12 +729,10 @@ version = "3.10" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.6" -groups = ["main", "docs"] files = [ {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, ] -markers = {main = "extra == \"pdf\""} [package.extras] all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"] @@ -753,7 +743,6 @@ version = "2.1.0" description = "brain-dead simple config-ini parsing" optional = false python-versions = ">=3.8" -groups = ["dev"] files = [ {file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"}, {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"}, @@ -765,12 +754,10 @@ version = "3.1.6" description = "A very fast and expressive template engine." optional = false python-versions = ">=3.7" -groups = ["main", "docs"] files = [ {file = "jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"}, {file = "jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d"}, ] -markers = {main = "extra == \"pdf\""} [package.dependencies] MarkupSafe = ">=2.0" @@ -780,101 +767,99 @@ i18n = ["Babel (>=2.7)"] [[package]] name = "jiter" -version = "0.9.0" +version = "0.10.0" description = "Fast iterable JSON parser." optional = true -python-versions = ">=3.8" -groups = ["main"] -markers = "extra == \"pdf\"" -files = [ - {file = "jiter-0.9.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:816ec9b60fdfd1fec87da1d7ed46c66c44ffec37ab2ef7de5b147b2fce3fd5ad"}, - {file = "jiter-0.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9b1d3086f8a3ee0194ecf2008cf81286a5c3e540d977fa038ff23576c023c0ea"}, - {file = "jiter-0.9.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1339f839b91ae30b37c409bf16ccd3dc453e8b8c3ed4bd1d6a567193651a4a51"}, - {file = "jiter-0.9.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ffba79584b3b670fefae66ceb3a28822365d25b7bf811e030609a3d5b876f538"}, - {file = "jiter-0.9.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cfc7d0a8e899089d11f065e289cb5b2daf3d82fbe028f49b20d7b809193958d"}, - {file = "jiter-0.9.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e00a1a2bbfaaf237e13c3d1592356eab3e9015d7efd59359ac8b51eb56390a12"}, - {file = "jiter-0.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1d9870561eb26b11448854dce0ff27a9a27cb616b632468cafc938de25e9e51"}, - {file = "jiter-0.9.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9872aeff3f21e437651df378cb75aeb7043e5297261222b6441a620218b58708"}, - {file = "jiter-0.9.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:1fd19112d1049bdd47f17bfbb44a2c0001061312dcf0e72765bfa8abd4aa30e5"}, - {file = "jiter-0.9.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6ef5da104664e526836070e4a23b5f68dec1cc673b60bf1edb1bfbe8a55d0678"}, - {file = "jiter-0.9.0-cp310-cp310-win32.whl", hash = "sha256:cb12e6d65ebbefe5518de819f3eda53b73187b7089040b2d17f5b39001ff31c4"}, - {file = "jiter-0.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:c43ca669493626d8672be3b645dbb406ef25af3f4b6384cfd306da7eb2e70322"}, - {file = "jiter-0.9.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6c4d99c71508912a7e556d631768dcdef43648a93660670986916b297f1c54af"}, - {file = "jiter-0.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8f60fb8ce7df529812bf6c625635a19d27f30806885139e367af93f6e734ef58"}, - {file = "jiter-0.9.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51c4e1a4f8ea84d98b7b98912aa4290ac3d1eabfde8e3c34541fae30e9d1f08b"}, - {file = "jiter-0.9.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f4c677c424dc76684fea3e7285a7a2a7493424bea89ac441045e6a1fb1d7b3b"}, - {file = "jiter-0.9.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2221176dfec87f3470b21e6abca056e6b04ce9bff72315cb0b243ca9e835a4b5"}, - {file = "jiter-0.9.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3c7adb66f899ffa25e3c92bfcb593391ee1947dbdd6a9a970e0d7e713237d572"}, - {file = "jiter-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c98d27330fdfb77913c1097a7aab07f38ff2259048949f499c9901700789ac15"}, - {file = "jiter-0.9.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:eda3f8cc74df66892b1d06b5d41a71670c22d95a1ca2cbab73654745ce9d0419"}, - {file = "jiter-0.9.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:dd5ab5ddc11418dce28343123644a100f487eaccf1de27a459ab36d6cca31043"}, - {file = "jiter-0.9.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:42f8a68a69f047b310319ef8e2f52fdb2e7976fb3313ef27df495cf77bcad965"}, - {file = "jiter-0.9.0-cp311-cp311-win32.whl", hash = "sha256:a25519efb78a42254d59326ee417d6f5161b06f5da827d94cf521fed961b1ff2"}, - {file = "jiter-0.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:923b54afdd697dfd00d368b7ccad008cccfeb1efb4e621f32860c75e9f25edbd"}, - {file = "jiter-0.9.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:7b46249cfd6c48da28f89eb0be3f52d6fdb40ab88e2c66804f546674e539ec11"}, - {file = "jiter-0.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:609cf3c78852f1189894383cf0b0b977665f54cb38788e3e6b941fa6d982c00e"}, - {file = "jiter-0.9.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d726a3890a54561e55a9c5faea1f7655eda7f105bd165067575ace6e65f80bb2"}, - {file = "jiter-0.9.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2e89dc075c1fef8fa9be219e249f14040270dbc507df4215c324a1839522ea75"}, - {file = "jiter-0.9.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:04e8ffa3c353b1bc4134f96f167a2082494351e42888dfcf06e944f2729cbe1d"}, - {file = "jiter-0.9.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:203f28a72a05ae0e129b3ed1f75f56bc419d5f91dfacd057519a8bd137b00c42"}, - {file = "jiter-0.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fca1a02ad60ec30bb230f65bc01f611c8608b02d269f998bc29cca8619a919dc"}, - {file = "jiter-0.9.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:237e5cee4d5d2659aaf91bbf8ec45052cc217d9446070699441a91b386ae27dc"}, - {file = "jiter-0.9.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:528b6b71745e7326eed73c53d4aa57e2a522242320b6f7d65b9c5af83cf49b6e"}, - {file = "jiter-0.9.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9f48e86b57bc711eb5acdfd12b6cb580a59cc9a993f6e7dcb6d8b50522dcd50d"}, - {file = "jiter-0.9.0-cp312-cp312-win32.whl", hash = "sha256:699edfde481e191d81f9cf6d2211debbfe4bd92f06410e7637dffb8dd5dfde06"}, - {file = "jiter-0.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:099500d07b43f61d8bd780466d429c45a7b25411b334c60ca875fa775f68ccb0"}, - {file = "jiter-0.9.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:2764891d3f3e8b18dce2cff24949153ee30c9239da7c00f032511091ba688ff7"}, - {file = "jiter-0.9.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:387b22fbfd7a62418d5212b4638026d01723761c75c1c8232a8b8c37c2f1003b"}, - {file = "jiter-0.9.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d8da8629ccae3606c61d9184970423655fb4e33d03330bcdfe52d234d32f69"}, - {file = "jiter-0.9.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1be73d8982bdc278b7b9377426a4b44ceb5c7952073dd7488e4ae96b88e1103"}, - {file = "jiter-0.9.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2228eaaaa111ec54b9e89f7481bffb3972e9059301a878d085b2b449fbbde635"}, - {file = "jiter-0.9.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:11509bfecbc319459647d4ac3fd391d26fdf530dad00c13c4dadabf5b81f01a4"}, - {file = "jiter-0.9.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f22238da568be8bbd8e0650e12feeb2cfea15eda4f9fc271d3b362a4fa0604d"}, - {file = "jiter-0.9.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:17f5d55eb856597607562257c8e36c42bc87f16bef52ef7129b7da11afc779f3"}, - {file = "jiter-0.9.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:6a99bed9fbb02f5bed416d137944419a69aa4c423e44189bc49718859ea83bc5"}, - {file = "jiter-0.9.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e057adb0cd1bd39606100be0eafe742de2de88c79df632955b9ab53a086b3c8d"}, - {file = "jiter-0.9.0-cp313-cp313-win32.whl", hash = "sha256:f7e6850991f3940f62d387ccfa54d1a92bd4bb9f89690b53aea36b4364bcab53"}, - {file = "jiter-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:c8ae3bf27cd1ac5e6e8b7a27487bf3ab5f82318211ec2e1346a5b058756361f7"}, - {file = "jiter-0.9.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f0b2827fb88dda2cbecbbc3e596ef08d69bda06c6f57930aec8e79505dc17001"}, - {file = "jiter-0.9.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:062b756ceb1d40b0b28f326cba26cfd575a4918415b036464a52f08632731e5a"}, - {file = "jiter-0.9.0-cp313-cp313t-win_amd64.whl", hash = "sha256:6f7838bc467ab7e8ef9f387bd6de195c43bad82a569c1699cb822f6609dd4cdf"}, - {file = "jiter-0.9.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:4a2d16360d0642cd68236f931b85fe50288834c383492e4279d9f1792e309571"}, - {file = "jiter-0.9.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e84ed1c9c9ec10bbb8c37f450077cbe3c0d4e8c2b19f0a49a60ac7ace73c7452"}, - {file = "jiter-0.9.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f3c848209ccd1bfa344a1240763975ca917de753c7875c77ec3034f4151d06c"}, - {file = "jiter-0.9.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7825f46e50646bee937e0f849d14ef3a417910966136f59cd1eb848b8b5bb3e4"}, - {file = "jiter-0.9.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d82a811928b26d1a6311a886b2566f68ccf2b23cf3bfed042e18686f1f22c2d7"}, - {file = "jiter-0.9.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0c058ecb51763a67f019ae423b1cbe3fa90f7ee6280c31a1baa6ccc0c0e2d06e"}, - {file = "jiter-0.9.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9897115ad716c48f0120c1f0c4efae348ec47037319a6c63b2d7838bb53aaef4"}, - {file = "jiter-0.9.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:351f4c90a24c4fb8c87c6a73af2944c440494ed2bea2094feecacb75c50398ae"}, - {file = "jiter-0.9.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:d45807b0f236c485e1e525e2ce3a854807dfe28ccf0d013dd4a563395e28008a"}, - {file = "jiter-0.9.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:1537a890724ba00fdba21787010ac6f24dad47f763410e9e1093277913592784"}, - {file = "jiter-0.9.0-cp38-cp38-win32.whl", hash = "sha256:e3630ec20cbeaddd4b65513fa3857e1b7c4190d4481ef07fb63d0fad59033321"}, - {file = "jiter-0.9.0-cp38-cp38-win_amd64.whl", hash = "sha256:2685f44bf80e95f8910553bf2d33b9c87bf25fceae6e9f0c1355f75d2922b0ee"}, - {file = "jiter-0.9.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:9ef340fae98065071ccd5805fe81c99c8f80484e820e40043689cf97fb66b3e2"}, - {file = "jiter-0.9.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:efb767d92c63b2cd9ec9f24feeb48f49574a713870ec87e9ba0c2c6e9329c3e2"}, - {file = "jiter-0.9.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:113f30f87fb1f412510c6d7ed13e91422cfd329436364a690c34c8b8bd880c42"}, - {file = "jiter-0.9.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8793b6df019b988526f5a633fdc7456ea75e4a79bd8396a3373c371fc59f5c9b"}, - {file = "jiter-0.9.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7a9aaa5102dba4e079bb728076fadd5a2dca94c05c04ce68004cfd96f128ea34"}, - {file = "jiter-0.9.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d838650f6ebaf4ccadfb04522463e74a4c378d7e667e0eb1865cfe3990bfac49"}, - {file = "jiter-0.9.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0194f813efdf4b8865ad5f5c5f50f8566df7d770a82c51ef593d09e0b347020"}, - {file = "jiter-0.9.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a7954a401d0a8a0b8bc669199db78af435aae1e3569187c2939c477c53cb6a0a"}, - {file = "jiter-0.9.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4feafe787eb8a8d98168ab15637ca2577f6ddf77ac6c8c66242c2d028aa5420e"}, - {file = "jiter-0.9.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:27cd1f2e8bb377f31d3190b34e4328d280325ad7ef55c6ac9abde72f79e84d2e"}, - {file = "jiter-0.9.0-cp39-cp39-win32.whl", hash = "sha256:161d461dcbe658cf0bd0aa375b30a968b087cdddc624fc585f3867c63c6eca95"}, - {file = "jiter-0.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:e8b36d8a16a61993be33e75126ad3d8aa29cf450b09576f3c427d27647fcb4aa"}, - {file = "jiter-0.9.0.tar.gz", hash = "sha256:aadba0964deb424daa24492abc3d229c60c4a31bfee205aedbf1acc7639d7893"}, +python-versions = ">=3.9" +files = [ + {file = "jiter-0.10.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:cd2fb72b02478f06a900a5782de2ef47e0396b3e1f7d5aba30daeb1fce66f303"}, + {file = "jiter-0.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:32bb468e3af278f095d3fa5b90314728a6916d89ba3d0ffb726dd9bf7367285e"}, + {file = "jiter-0.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa8b3e0068c26ddedc7abc6fac37da2d0af16b921e288a5a613f4b86f050354f"}, + {file = "jiter-0.10.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:286299b74cc49e25cd42eea19b72aa82c515d2f2ee12d11392c56d8701f52224"}, + {file = "jiter-0.10.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6ed5649ceeaeffc28d87fb012d25a4cd356dcd53eff5acff1f0466b831dda2a7"}, + {file = "jiter-0.10.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2ab0051160cb758a70716448908ef14ad476c3774bd03ddce075f3c1f90a3d6"}, + {file = "jiter-0.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:03997d2f37f6b67d2f5c475da4412be584e1cec273c1cfc03d642c46db43f8cf"}, + {file = "jiter-0.10.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c404a99352d839fed80d6afd6c1d66071f3bacaaa5c4268983fc10f769112e90"}, + {file = "jiter-0.10.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:66e989410b6666d3ddb27a74c7e50d0829704ede652fd4c858e91f8d64b403d0"}, + {file = "jiter-0.10.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b532d3af9ef4f6374609a3bcb5e05a1951d3bf6190dc6b176fdb277c9bbf15ee"}, + {file = "jiter-0.10.0-cp310-cp310-win32.whl", hash = "sha256:da9be20b333970e28b72edc4dff63d4fec3398e05770fb3205f7fb460eb48dd4"}, + {file = "jiter-0.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:f59e533afed0c5b0ac3eba20d2548c4a550336d8282ee69eb07b37ea526ee4e5"}, + {file = "jiter-0.10.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:3bebe0c558e19902c96e99217e0b8e8b17d570906e72ed8a87170bc290b1e978"}, + {file = "jiter-0.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:558cc7e44fd8e507a236bee6a02fa17199ba752874400a0ca6cd6e2196cdb7dc"}, + {file = "jiter-0.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d613e4b379a07d7c8453c5712ce7014e86c6ac93d990a0b8e7377e18505e98d"}, + {file = "jiter-0.10.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f62cf8ba0618eda841b9bf61797f21c5ebd15a7a1e19daab76e4e4b498d515b2"}, + {file = "jiter-0.10.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:919d139cdfa8ae8945112398511cb7fca58a77382617d279556b344867a37e61"}, + {file = "jiter-0.10.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:13ddbc6ae311175a3b03bd8994881bc4635c923754932918e18da841632349db"}, + {file = "jiter-0.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c440ea003ad10927a30521a9062ce10b5479592e8a70da27f21eeb457b4a9c5"}, + {file = "jiter-0.10.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dc347c87944983481e138dea467c0551080c86b9d21de6ea9306efb12ca8f606"}, + {file = "jiter-0.10.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:13252b58c1f4d8c5b63ab103c03d909e8e1e7842d302473f482915d95fefd605"}, + {file = "jiter-0.10.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7d1bbf3c465de4a24ab12fb7766a0003f6f9bce48b8b6a886158c4d569452dc5"}, + {file = "jiter-0.10.0-cp311-cp311-win32.whl", hash = "sha256:db16e4848b7e826edca4ccdd5b145939758dadf0dc06e7007ad0e9cfb5928ae7"}, + {file = "jiter-0.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:9c9c1d5f10e18909e993f9641f12fe1c77b3e9b533ee94ffa970acc14ded3812"}, + {file = "jiter-0.10.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:1e274728e4a5345a6dde2d343c8da018b9d4bd4350f5a472fa91f66fda44911b"}, + {file = "jiter-0.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7202ae396446c988cb2a5feb33a543ab2165b786ac97f53b59aafb803fef0744"}, + {file = "jiter-0.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23ba7722d6748b6920ed02a8f1726fb4b33e0fd2f3f621816a8b486c66410ab2"}, + {file = "jiter-0.10.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:371eab43c0a288537d30e1f0b193bc4eca90439fc08a022dd83e5e07500ed026"}, + {file = "jiter-0.10.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c675736059020365cebc845a820214765162728b51ab1e03a1b7b3abb70f74c"}, + {file = "jiter-0.10.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0c5867d40ab716e4684858e4887489685968a47e3ba222e44cde6e4a2154f959"}, + {file = "jiter-0.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:395bb9a26111b60141757d874d27fdea01b17e8fac958b91c20128ba8f4acc8a"}, + {file = "jiter-0.10.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6842184aed5cdb07e0c7e20e5bdcfafe33515ee1741a6835353bb45fe5d1bd95"}, + {file = "jiter-0.10.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:62755d1bcea9876770d4df713d82606c8c1a3dca88ff39046b85a048566d56ea"}, + {file = "jiter-0.10.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:533efbce2cacec78d5ba73a41756beff8431dfa1694b6346ce7af3a12c42202b"}, + {file = "jiter-0.10.0-cp312-cp312-win32.whl", hash = "sha256:8be921f0cadd245e981b964dfbcd6fd4bc4e254cdc069490416dd7a2632ecc01"}, + {file = "jiter-0.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:a7c7d785ae9dda68c2678532a5a1581347e9c15362ae9f6e68f3fdbfb64f2e49"}, + {file = "jiter-0.10.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:e0588107ec8e11b6f5ef0e0d656fb2803ac6cf94a96b2b9fc675c0e3ab5e8644"}, + {file = "jiter-0.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cafc4628b616dc32530c20ee53d71589816cf385dd9449633e910d596b1f5c8a"}, + {file = "jiter-0.10.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:520ef6d981172693786a49ff5b09eda72a42e539f14788124a07530f785c3ad6"}, + {file = "jiter-0.10.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:554dedfd05937f8fc45d17ebdf298fe7e0c77458232bcb73d9fbbf4c6455f5b3"}, + {file = "jiter-0.10.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5bc299da7789deacf95f64052d97f75c16d4fc8c4c214a22bf8d859a4288a1c2"}, + {file = "jiter-0.10.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5161e201172de298a8a1baad95eb85db4fb90e902353b1f6a41d64ea64644e25"}, + {file = "jiter-0.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e2227db6ba93cb3e2bf67c87e594adde0609f146344e8207e8730364db27041"}, + {file = "jiter-0.10.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:15acb267ea5e2c64515574b06a8bf393fbfee6a50eb1673614aa45f4613c0cca"}, + {file = "jiter-0.10.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:901b92f2e2947dc6dfcb52fd624453862e16665ea909a08398dde19c0731b7f4"}, + {file = "jiter-0.10.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d0cb9a125d5a3ec971a094a845eadde2db0de85b33c9f13eb94a0c63d463879e"}, + {file = "jiter-0.10.0-cp313-cp313-win32.whl", hash = "sha256:48a403277ad1ee208fb930bdf91745e4d2d6e47253eedc96e2559d1e6527006d"}, + {file = "jiter-0.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:75f9eb72ecb640619c29bf714e78c9c46c9c4eaafd644bf78577ede459f330d4"}, + {file = "jiter-0.10.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:28ed2a4c05a1f32ef0e1d24c2611330219fed727dae01789f4a335617634b1ca"}, + {file = "jiter-0.10.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14a4c418b1ec86a195f1ca69da8b23e8926c752b685af665ce30777233dfe070"}, + {file = "jiter-0.10.0-cp313-cp313t-win_amd64.whl", hash = "sha256:d7bfed2fe1fe0e4dda6ef682cee888ba444b21e7a6553e03252e4feb6cf0adca"}, + {file = "jiter-0.10.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:5e9251a5e83fab8d87799d3e1a46cb4b7f2919b895c6f4483629ed2446f66522"}, + {file = "jiter-0.10.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:023aa0204126fe5b87ccbcd75c8a0d0261b9abdbbf46d55e7ae9f8e22424eeb8"}, + {file = "jiter-0.10.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c189c4f1779c05f75fc17c0c1267594ed918996a231593a21a5ca5438445216"}, + {file = "jiter-0.10.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:15720084d90d1098ca0229352607cd68256c76991f6b374af96f36920eae13c4"}, + {file = "jiter-0.10.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e4f2fb68e5f1cfee30e2b2a09549a00683e0fde4c6a2ab88c94072fc33cb7426"}, + {file = "jiter-0.10.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ce541693355fc6da424c08b7edf39a2895f58d6ea17d92cc2b168d20907dee12"}, + {file = "jiter-0.10.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:31c50c40272e189d50006ad5c73883caabb73d4e9748a688b216e85a9a9ca3b9"}, + {file = "jiter-0.10.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fa3402a2ff9815960e0372a47b75c76979d74402448509ccd49a275fa983ef8a"}, + {file = "jiter-0.10.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:1956f934dca32d7bb647ea21d06d93ca40868b505c228556d3373cbd255ce853"}, + {file = "jiter-0.10.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:fcedb049bdfc555e261d6f65a6abe1d5ad68825b7202ccb9692636c70fcced86"}, + {file = "jiter-0.10.0-cp314-cp314-win32.whl", hash = "sha256:ac509f7eccca54b2a29daeb516fb95b6f0bd0d0d8084efaf8ed5dfc7b9f0b357"}, + {file = "jiter-0.10.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5ed975b83a2b8639356151cef5c0d597c68376fc4922b45d0eb384ac058cfa00"}, + {file = "jiter-0.10.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3aa96f2abba33dc77f79b4cf791840230375f9534e5fac927ccceb58c5e604a5"}, + {file = "jiter-0.10.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:bd6292a43c0fc09ce7c154ec0fa646a536b877d1e8f2f96c19707f65355b5a4d"}, + {file = "jiter-0.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:39de429dcaeb6808d75ffe9effefe96a4903c6a4b376b2f6d08d77c1aaee2f18"}, + {file = "jiter-0.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52ce124f13a7a616fad3bb723f2bfb537d78239d1f7f219566dc52b6f2a9e48d"}, + {file = "jiter-0.10.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:166f3606f11920f9a1746b2eea84fa2c0a5d50fd313c38bdea4edc072000b0af"}, + {file = "jiter-0.10.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:28dcecbb4ba402916034fc14eba7709f250c4d24b0c43fc94d187ee0580af181"}, + {file = "jiter-0.10.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:86c5aa6910f9bebcc7bc4f8bc461aff68504388b43bfe5e5c0bd21efa33b52f4"}, + {file = "jiter-0.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ceeb52d242b315d7f1f74b441b6a167f78cea801ad7c11c36da77ff2d42e8a28"}, + {file = "jiter-0.10.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ff76d8887c8c8ee1e772274fcf8cc1071c2c58590d13e33bd12d02dc9a560397"}, + {file = "jiter-0.10.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a9be4d0fa2b79f7222a88aa488bd89e2ae0a0a5b189462a12def6ece2faa45f1"}, + {file = "jiter-0.10.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9ab7fd8738094139b6c1ab1822d6f2000ebe41515c537235fd45dabe13ec9324"}, + {file = "jiter-0.10.0-cp39-cp39-win32.whl", hash = "sha256:5f51e048540dd27f204ff4a87f5d79294ea0aa3aa552aca34934588cf27023cf"}, + {file = "jiter-0.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:1b28302349dc65703a9e4ead16f163b1c339efffbe1049c30a44b001a2a4fff9"}, + {file = "jiter-0.10.0.tar.gz", hash = "sha256:07a7142c38aacc85194391108dc91b5b57093c978a9932bd86a36862759d9500"}, ] [[package]] name = "joblib" -version = "1.4.2" +version = "1.5.1" description = "Lightweight pipelining with Python functions" optional = false -python-versions = ">=3.8" -groups = ["main"] +python-versions = ">=3.9" files = [ - {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"}, - {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, + {file = "joblib-1.5.1-py3-none-any.whl", hash = "sha256:4719a31f054c7d766948dcd83e9613686b27114f190f717cec7eaa2084f8a74a"}, + {file = "joblib-1.5.1.tar.gz", hash = "sha256:f4f86e351f39fe3d0d32a9f2c3d8af1ee4cec285aafcb27003dda5205576b444"}, ] [[package]] @@ -883,7 +868,6 @@ version = "4.23.0" description = "An implementation of JSON Schema validation for Python" optional = false python-versions = ">=3.8" -groups = ["dev"] files = [ {file = "jsonschema-4.23.0-py3-none-any.whl", hash = "sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566"}, {file = "jsonschema-4.23.0.tar.gz", hash = "sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4"}, @@ -901,14 +885,13 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339- [[package]] name = "jsonschema-specifications" -version = "2024.10.1" +version = "2025.4.1" description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" optional = false python-versions = ">=3.9" -groups = ["dev"] files = [ - {file = "jsonschema_specifications-2024.10.1-py3-none-any.whl", hash = "sha256:a09a0680616357d9a0ecf05c12ad234479f549239d0f5b55f3deea67475da9bf"}, - {file = "jsonschema_specifications-2024.10.1.tar.gz", hash = "sha256:0f38b83639958ce1152d02a7f062902c41c8fd20d558b0c34344292d417ae272"}, + {file = "jsonschema_specifications-2025.4.1-py3-none-any.whl", hash = "sha256:4653bffbd6584f7de83a67e0d620ef16900b390ddc7939d56684d6c81e33f1af"}, + {file = "jsonschema_specifications-2025.4.1.tar.gz", hash = "sha256:630159c9f4dbea161a6a2205c3011cc4f18ff381b189fff48bb39b9bf26ae608"}, ] [package.dependencies] @@ -920,7 +903,6 @@ version = "0.27.1" description = "Python extension for computing string edit distances and similarities." optional = false python-versions = ">=3.9" -groups = ["main"] files = [ {file = "levenshtein-0.27.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:13d6f617cb6fe63714c4794861cfaacd398db58a292f930edb7f12aad931dace"}, {file = "levenshtein-0.27.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ca9d54d41075e130c390e61360bec80f116b62d6ae973aec502e77e921e95334"}, @@ -1027,7 +1009,6 @@ version = "5.4.0" description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." optional = false python-versions = ">=3.6" -groups = ["main"] files = [ {file = "lxml-5.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e7bc6df34d42322c5289e37e9971d6ed114e3776b45fa879f734bded9d1fea9c"}, {file = "lxml-5.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6854f8bd8a1536f8a1d9a3655e6354faa6406621cf857dc27b681b69860645c7"}, @@ -1176,7 +1157,6 @@ version = "0.5.1" description = "Type annotations for the lxml package" optional = false python-versions = "*" -groups = ["dev"] files = [ {file = "lxml-stubs-0.5.1.tar.gz", hash = "sha256:e0ec2aa1ce92d91278b719091ce4515c12adc1d564359dfaf81efa7d4feab79d"}, {file = "lxml_stubs-0.5.1-py3-none-any.whl", hash = "sha256:1f689e5dbc4b9247cb09ae820c7d34daeb1fdbd1db06123814b856dae7787272"}, @@ -1187,18 +1167,17 @@ test = ["coverage[toml] (>=7.2.5)", "mypy (>=1.2.0)", "pytest (>=7.3.0)", "pytes [[package]] name = "markdown" -version = "3.7" +version = "3.8" description = "Python implementation of John Gruber's Markdown." optional = false -python-versions = ">=3.8" -groups = ["docs"] +python-versions = ">=3.9" files = [ - {file = "Markdown-3.7-py3-none-any.whl", hash = "sha256:7eb6df5690b81a1d7942992c97fad2938e956e79df20cbc6186e9c3a77b1c803"}, - {file = "markdown-3.7.tar.gz", hash = "sha256:2ae2471477cfd02dbbf038d5d9bc226d40def84b4fe2986e49b59b6b472bbed2"}, + {file = "markdown-3.8-py3-none-any.whl", hash = "sha256:794a929b79c5af141ef5ab0f2f642d0f7b1872981250230e72682346f7cc90dc"}, + {file = "markdown-3.8.tar.gz", hash = "sha256:7df81e63f0df5c4b24b7d156eb81e4690595239b7d70937d0409f1b0de319c6f"}, ] [package.extras] -docs = ["mdx-gh-links (>=0.2)", "mkdocs (>=1.5)", "mkdocs-gen-files", "mkdocs-literate-nav", "mkdocs-nature (>=0.6)", "mkdocs-section-index", "mkdocstrings[python]"] +docs = ["mdx_gh_links (>=0.2)", "mkdocs (>=1.6)", "mkdocs-gen-files", "mkdocs-literate-nav", "mkdocs-nature (>=0.6)", "mkdocs-section-index", "mkdocstrings[python]"] testing = ["coverage", "pyyaml"] [[package]] @@ -1207,17 +1186,15 @@ version = "2.5.3" description = "A fast and complete Python implementation of Markdown" optional = true python-versions = "<4,>=3.9" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "markdown2-2.5.3-py3-none-any.whl", hash = "sha256:a8ebb7e84b8519c37bf7382b3db600f1798a22c245bfd754a1f87ca8d7ea63b3"}, {file = "markdown2-2.5.3.tar.gz", hash = "sha256:4d502953a4633408b0ab3ec503c5d6984d1b14307e32b325ec7d16ea57524895"}, ] [package.extras] -all = ["latex2mathml ; python_version >= \"3.8.1\"", "pygments (>=2.7.3)", "wavedrom"] +all = ["latex2mathml", "pygments (>=2.7.3)", "wavedrom"] code-syntax-highlighting = ["pygments (>=2.7.3)"] -latex = ["latex2mathml ; python_version >= \"3.8.1\""] +latex = ["latex2mathml"] wavedrom = ["wavedrom"] [[package]] @@ -1226,8 +1203,6 @@ version = "0.13.1" description = "Convert HTML to markdown." optional = true python-versions = "*" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "markdownify-0.13.1-py3-none-any.whl", hash = "sha256:1d181d43d20902bcc69d7be85b5316ed174d0dda72ff56e14ae4c95a4a407d22"}, {file = "markdownify-0.13.1.tar.gz", hash = "sha256:ab257f9e6bd4075118828a28c9d02f8a4bfeb7421f558834aa79b2dfeb32a098"}, @@ -1239,20 +1214,18 @@ six = ">=1.15,<2" [[package]] name = "marker-pdf" -version = "1.6.2" +version = "1.7.3" description = "Convert documents to markdown with high speed and accuracy." optional = true python-versions = "<4.0,>=3.10" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ - {file = "marker_pdf-1.6.2-py3-none-any.whl", hash = "sha256:48fbc6353e6fc3510d30d5682a8974fc9d6eb58a13e7c3f525ed6973b721f108"}, - {file = "marker_pdf-1.6.2.tar.gz", hash = "sha256:38725082c89c0aec5e28e4f1df8f3974ccc0742c2265f0342c20e52fbde90bf0"}, + {file = "marker_pdf-1.7.3-py3-none-any.whl", hash = "sha256:3dbb890dfe383d4d437d55ab97dbc35e2aae1b613a032b0738d1d25cabbd07f4"}, + {file = "marker_pdf-1.7.3.tar.gz", hash = "sha256:ddf297036ccd54d94e2f4a684b71bfe201c19755aa40dd8f2be757a8e631f8b9"}, ] [package.dependencies] anthropic = ">=0.46.0,<0.47.0" -click = ">=8.1.7,<9.0.0" +click = ">=8.2.0,<9.0.0" filetype = ">=1.2.0,<2.0.0" ftfy = ">=6.1.1,<7.0.0" google-genai = ">=1.0.0,<2.0.0" @@ -1268,8 +1241,8 @@ python-dotenv = ">=1.0.0,<2.0.0" rapidfuzz = ">=3.8.1,<4.0.0" regex = ">=2024.4.28,<2025.0.0" scikit-learn = ">=1.6.1,<2.0.0" -surya-ocr = ">=0.13.1,<0.14.0" -torch = ">=2.5.1,<3.0.0" +surya-ocr = ">=0.14.2,<0.15.0" +torch = ">=2.7.0,<3.0.0" tqdm = ">=4.66.1,<5.0.0" transformers = ">=4.45.2,<5.0.0" @@ -1282,7 +1255,6 @@ version = "3.0.2" description = "Safely add untrusted strings to HTML/XML markup." optional = false python-versions = ">=3.9" -groups = ["main", "docs"] files = [ {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8"}, {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158"}, @@ -1346,7 +1318,6 @@ files = [ {file = "MarkupSafe-3.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:6e296a513ca3d94054c2c881cc913116e90fd030ad1c656b3869762b754f5f8a"}, {file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"}, ] -markers = {main = "extra == \"pdf\""} [[package]] name = "mergedeep" @@ -1354,7 +1325,6 @@ version = "1.3.4" description = "A deep merge function for 🐍." optional = false python-versions = ">=3.6" -groups = ["docs"] files = [ {file = "mergedeep-1.3.4-py3-none-any.whl", hash = "sha256:70775750742b25c0d8f36c55aed03d24c3384d17c951b3175d898bd778ef0307"}, {file = "mergedeep-1.3.4.tar.gz", hash = "sha256:0096d52e9dad9939c3d975a774666af186eda617e6ca84df4c94dec30004f2a8"}, @@ -1366,7 +1336,6 @@ version = "1.6.1" description = "Project documentation with Markdown." optional = false python-versions = ">=3.8" -groups = ["docs"] files = [ {file = "mkdocs-1.6.1-py3-none-any.whl", hash = "sha256:db91759624d1647f3f34aa0c3f327dd2601beae39a366d6e064c03468d35c20e"}, {file = "mkdocs-1.6.1.tar.gz", hash = "sha256:7b432f01d928c084353ab39c57282f29f92136665bdd6abf7c1ec8d822ef86f2"}, @@ -1389,18 +1358,17 @@ watchdog = ">=2.0" [package.extras] i18n = ["babel (>=2.9.0)"] -min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4) ; platform_system == \"Windows\"", "ghp-import (==1.0)", "importlib-metadata (==4.4) ; python_version < \"3.10\"", "jinja2 (==2.11.1)", "markdown (==3.3.6)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "mkdocs-get-deps (==0.2.0)", "packaging (==20.5)", "pathspec (==0.11.1)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "watchdog (==2.0)"] +min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-import (==1.0)", "importlib-metadata (==4.4)", "jinja2 (==2.11.1)", "markdown (==3.3.6)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "mkdocs-get-deps (==0.2.0)", "packaging (==20.5)", "pathspec (==0.11.1)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "watchdog (==2.0)"] [[package]] name = "mkdocs-autorefs" -version = "1.4.1" +version = "1.4.2" description = "Automatically link across pages in MkDocs." optional = false python-versions = ">=3.9" -groups = ["docs"] files = [ - {file = "mkdocs_autorefs-1.4.1-py3-none-any.whl", hash = "sha256:9793c5ac06a6ebbe52ec0f8439256e66187badf4b5334b5fde0b128ec134df4f"}, - {file = "mkdocs_autorefs-1.4.1.tar.gz", hash = "sha256:4b5b6235a4becb2b10425c2fa191737e415b37aa3418919db33e5d774c9db079"}, + {file = "mkdocs_autorefs-1.4.2-py3-none-any.whl", hash = "sha256:83d6d777b66ec3c372a1aad4ae0cf77c243ba5bcda5bf0c6b8a2c5e7a3d89f13"}, + {file = "mkdocs_autorefs-1.4.2.tar.gz", hash = "sha256:e2ebe1abd2b67d597ed19378c0fff84d73d1dbce411fce7a7cc6f161888b6749"}, ] [package.dependencies] @@ -1414,7 +1382,6 @@ version = "0.5.0" description = "MkDocs plugin to programmatically generate documentation pages during the build" optional = false python-versions = ">=3.7" -groups = ["docs"] files = [ {file = "mkdocs_gen_files-0.5.0-py3-none-any.whl", hash = "sha256:7ac060096f3f40bd19039e7277dd3050be9a453c8ac578645844d4d91d7978ea"}, {file = "mkdocs_gen_files-0.5.0.tar.gz", hash = "sha256:4c7cf256b5d67062a788f6b1d035e157fc1a9498c2399be9af5257d4ff4d19bc"}, @@ -1429,7 +1396,6 @@ version = "0.2.0" description = "MkDocs extension that lists all dependencies according to a mkdocs.yml file" optional = false python-versions = ">=3.8" -groups = ["docs"] files = [ {file = "mkdocs_get_deps-0.2.0-py3-none-any.whl", hash = "sha256:2bf11d0b133e77a0dd036abeeb06dec8775e46efa526dc70667d8863eefc6134"}, {file = "mkdocs_get_deps-0.2.0.tar.gz", hash = "sha256:162b3d129c7fad9b19abfdcb9c1458a651628e4b1dea628ac68790fb3061c60c"}, @@ -1446,7 +1412,6 @@ version = "0.6.2" description = "MkDocs plugin to specify the navigation in Markdown instead of YAML" optional = false python-versions = ">=3.9" -groups = ["docs"] files = [ {file = "mkdocs_literate_nav-0.6.2-py3-none-any.whl", hash = "sha256:0a6489a26ec7598477b56fa112056a5e3a6c15729f0214bea8a4dbc55bd5f630"}, {file = "mkdocs_literate_nav-0.6.2.tar.gz", hash = "sha256:760e1708aa4be86af81a2b56e82c739d5a8388a0eab1517ecfd8e5aa40810a75"}, @@ -1461,7 +1426,6 @@ version = "9.6.14" description = "Documentation that simply works" optional = false python-versions = ">=3.8" -groups = ["docs"] files = [ {file = "mkdocs_material-9.6.14-py3-none-any.whl", hash = "sha256:3b9cee6d3688551bf7a8e8f41afda97a3c39a12f0325436d76c86706114b721b"}, {file = "mkdocs_material-9.6.14.tar.gz", hash = "sha256:39d795e90dce6b531387c255bd07e866e027828b7346d3eba5ac3de265053754"}, @@ -1491,7 +1455,6 @@ version = "1.3.1" description = "Extension pack for Python Markdown and MkDocs Material." optional = false python-versions = ">=3.8" -groups = ["docs"] files = [ {file = "mkdocs_material_extensions-1.3.1-py3-none-any.whl", hash = "sha256:adff8b62700b25cb77b53358dad940f3ef973dd6db797907c49e3c2ef3ab4e31"}, {file = "mkdocs_material_extensions-1.3.1.tar.gz", hash = "sha256:10c9511cea88f568257f960358a467d12b970e1f7b2c0e5fb2bb48cab1928443"}, @@ -1503,7 +1466,6 @@ version = "0.3.10" description = "MkDocs plugin to allow clickable sections that lead to an index page" optional = false python-versions = ">=3.9" -groups = ["docs"] files = [ {file = "mkdocs_section_index-0.3.10-py3-none-any.whl", hash = "sha256:bc27c0d0dc497c0ebaee1fc72839362aed77be7318b5ec0c30628f65918e4776"}, {file = "mkdocs_section_index-0.3.10.tar.gz", hash = "sha256:a82afbda633c82c5568f0e3b008176b9b365bf4bd8b6f919d6eff09ee146b9f8"}, @@ -1518,7 +1480,6 @@ version = "0.29.1" description = "Automatic documentation from sources, for MkDocs." optional = false python-versions = ">=3.9" -groups = ["docs"] files = [ {file = "mkdocstrings-0.29.1-py3-none-any.whl", hash = "sha256:37a9736134934eea89cbd055a513d40a020d87dfcae9e3052c2a6b8cd4af09b6"}, {file = "mkdocstrings-0.29.1.tar.gz", hash = "sha256:8722f8f8c5cd75da56671e0a0c1bbed1df9946c0cef74794d6141b34011abd42"}, @@ -1543,7 +1504,6 @@ version = "1.16.10" description = "A Python handler for mkdocstrings." optional = false python-versions = ">=3.9" -groups = ["docs"] files = [ {file = "mkdocstrings_python-1.16.10-py3-none-any.whl", hash = "sha256:63bb9f01f8848a644bdb6289e86dc38ceddeaa63ecc2e291e3b2ca52702a6643"}, {file = "mkdocstrings_python-1.16.10.tar.gz", hash = "sha256:f9eedfd98effb612ab4d0ed6dd2b73aff6eba5215e0a65cea6d877717f75502e"}, @@ -1561,8 +1521,6 @@ version = "1.3.0" description = "Python library for arbitrary-precision floating-point arithmetic" optional = true python-versions = "*" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, @@ -1571,7 +1529,7 @@ files = [ [package.extras] develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"] docs = ["sphinx"] -gmpy = ["gmpy2 (>=2.1.0a4) ; platform_python_implementation != \"PyPy\""] +gmpy = ["gmpy2 (>=2.1.0a4)"] tests = ["pytest (>=4.6)"] [[package]] @@ -1580,7 +1538,6 @@ version = "1.15.0" description = "Optional static typing for Python" optional = false python-versions = ">=3.9" -groups = ["dev"] files = [ {file = "mypy-1.15.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:979e4e1a006511dacf628e36fadfecbcc0160a8af6ca7dad2f5025529e082c13"}, {file = "mypy-1.15.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c4bb0e1bd29f7d34efcccd71cf733580191e9a264a2202b0239da95984c5b559"}, @@ -1630,14 +1587,13 @@ reports = ["lxml"] [[package]] name = "mypy-extensions" -version = "1.0.0" +version = "1.1.0" description = "Type system extensions for programs checked with the mypy type checker." optional = false -python-versions = ">=3.5" -groups = ["dev"] +python-versions = ">=3.8" files = [ - {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, - {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, + {file = "mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505"}, + {file = "mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558"}, ] [[package]] @@ -1646,8 +1602,6 @@ version = "3.4.2" description = "Python package for creating and manipulating graphs and networks" optional = true python-versions = ">=3.10" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f"}, {file = "networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1"}, @@ -1667,7 +1621,6 @@ version = "3.9.1" description = "Natural Language Toolkit" optional = false python-versions = ">=3.8" -groups = ["main"] files = [ {file = "nltk-3.9.1-py3-none-any.whl", hash = "sha256:4fa26829c5b00715afe3061398a8989dc643b92ce7dd93fb4585a70930d168a1"}, {file = "nltk-3.9.1.tar.gz", hash = "sha256:87d127bd3de4bd89a4f81265e5fa59cb1b199b27440175370f7417d2bc7ae868"}, @@ -1693,7 +1646,6 @@ version = "1.9.1" description = "Node.js virtual environment builder" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -groups = ["main", "dev"] files = [ {file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"}, {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"}, @@ -1701,67 +1653,66 @@ files = [ [[package]] name = "numpy" -version = "2.2.5" +version = "2.2.6" description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.10" -groups = ["main", "dev"] -files = [ - {file = "numpy-2.2.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1f4a922da1729f4c40932b2af4fe84909c7a6e167e6e99f71838ce3a29f3fe26"}, - {file = "numpy-2.2.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b6f91524d31b34f4a5fee24f5bc16dcd1491b668798b6d85585d836c1e633a6a"}, - {file = "numpy-2.2.5-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:19f4718c9012e3baea91a7dba661dcab2451cda2550678dc30d53acb91a7290f"}, - {file = "numpy-2.2.5-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:eb7fd5b184e5d277afa9ec0ad5e4eb562ecff541e7f60e69ee69c8d59e9aeaba"}, - {file = "numpy-2.2.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6413d48a9be53e183eb06495d8e3b006ef8f87c324af68241bbe7a39e8ff54c3"}, - {file = "numpy-2.2.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7451f92eddf8503c9b8aa4fe6aa7e87fd51a29c2cfc5f7dbd72efde6c65acf57"}, - {file = "numpy-2.2.5-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0bcb1d057b7571334139129b7f941588f69ce7c4ed15a9d6162b2ea54ded700c"}, - {file = "numpy-2.2.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:36ab5b23915887543441efd0417e6a3baa08634308894316f446027611b53bf1"}, - {file = "numpy-2.2.5-cp310-cp310-win32.whl", hash = "sha256:422cc684f17bc963da5f59a31530b3936f57c95a29743056ef7a7903a5dbdf88"}, - {file = "numpy-2.2.5-cp310-cp310-win_amd64.whl", hash = "sha256:e4f0b035d9d0ed519c813ee23e0a733db81ec37d2e9503afbb6e54ccfdee0fa7"}, - {file = "numpy-2.2.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c42365005c7a6c42436a54d28c43fe0e01ca11eb2ac3cefe796c25a5f98e5e9b"}, - {file = "numpy-2.2.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:498815b96f67dc347e03b719ef49c772589fb74b8ee9ea2c37feae915ad6ebda"}, - {file = "numpy-2.2.5-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:6411f744f7f20081b1b4e7112e0f4c9c5b08f94b9f086e6f0adf3645f85d3a4d"}, - {file = "numpy-2.2.5-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:9de6832228f617c9ef45d948ec1cd8949c482238d68b2477e6f642c33a7b0a54"}, - {file = "numpy-2.2.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:369e0d4647c17c9363244f3468f2227d557a74b6781cb62ce57cf3ef5cc7c610"}, - {file = "numpy-2.2.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:262d23f383170f99cd9191a7c85b9a50970fe9069b2f8ab5d786eca8a675d60b"}, - {file = "numpy-2.2.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:aa70fdbdc3b169d69e8c59e65c07a1c9351ceb438e627f0fdcd471015cd956be"}, - {file = "numpy-2.2.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37e32e985f03c06206582a7323ef926b4e78bdaa6915095ef08070471865b906"}, - {file = "numpy-2.2.5-cp311-cp311-win32.whl", hash = "sha256:f5045039100ed58fa817a6227a356240ea1b9a1bc141018864c306c1a16d4175"}, - {file = "numpy-2.2.5-cp311-cp311-win_amd64.whl", hash = "sha256:b13f04968b46ad705f7c8a80122a42ae8f620536ea38cf4bdd374302926424dd"}, - {file = "numpy-2.2.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ee461a4eaab4f165b68780a6a1af95fb23a29932be7569b9fab666c407969051"}, - {file = "numpy-2.2.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ec31367fd6a255dc8de4772bd1658c3e926d8e860a0b6e922b615e532d320ddc"}, - {file = "numpy-2.2.5-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:47834cde750d3c9f4e52c6ca28a7361859fcaf52695c7dc3cc1a720b8922683e"}, - {file = "numpy-2.2.5-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:2c1a1c6ccce4022383583a6ded7bbcda22fc635eb4eb1e0a053336425ed36dfa"}, - {file = "numpy-2.2.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d75f338f5f79ee23548b03d801d28a505198297534f62416391857ea0479571"}, - {file = "numpy-2.2.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a801fef99668f309b88640e28d261991bfad9617c27beda4a3aec4f217ea073"}, - {file = "numpy-2.2.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:abe38cd8381245a7f49967a6010e77dbf3680bd3627c0fe4362dd693b404c7f8"}, - {file = "numpy-2.2.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5a0ac90e46fdb5649ab6369d1ab6104bfe5854ab19b645bf5cda0127a13034ae"}, - {file = "numpy-2.2.5-cp312-cp312-win32.whl", hash = "sha256:0cd48122a6b7eab8f06404805b1bd5856200e3ed6f8a1b9a194f9d9054631beb"}, - {file = "numpy-2.2.5-cp312-cp312-win_amd64.whl", hash = "sha256:ced69262a8278547e63409b2653b372bf4baff0870c57efa76c5703fd6543282"}, - {file = "numpy-2.2.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:059b51b658f4414fff78c6d7b1b4e18283ab5fa56d270ff212d5ba0c561846f4"}, - {file = "numpy-2.2.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:47f9ed103af0bc63182609044b0490747e03bd20a67e391192dde119bf43d52f"}, - {file = "numpy-2.2.5-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:261a1ef047751bb02f29dfe337230b5882b54521ca121fc7f62668133cb119c9"}, - {file = "numpy-2.2.5-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:4520caa3807c1ceb005d125a75e715567806fed67e315cea619d5ec6e75a4191"}, - {file = "numpy-2.2.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d14b17b9be5f9c9301f43d2e2a4886a33b53f4e6fdf9ca2f4cc60aeeee76372"}, - {file = "numpy-2.2.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ba321813a00e508d5421104464510cc962a6f791aa2fca1c97b1e65027da80d"}, - {file = "numpy-2.2.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4cbdef3ddf777423060c6f81b5694bad2dc9675f110c4b2a60dc0181543fac7"}, - {file = "numpy-2.2.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:54088a5a147ab71a8e7fdfd8c3601972751ded0739c6b696ad9cb0343e21ab73"}, - {file = "numpy-2.2.5-cp313-cp313-win32.whl", hash = "sha256:c8b82a55ef86a2d8e81b63da85e55f5537d2157165be1cb2ce7cfa57b6aef38b"}, - {file = "numpy-2.2.5-cp313-cp313-win_amd64.whl", hash = "sha256:d8882a829fd779f0f43998e931c466802a77ca1ee0fe25a3abe50278616b1471"}, - {file = "numpy-2.2.5-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:e8b025c351b9f0e8b5436cf28a07fa4ac0204d67b38f01433ac7f9b870fa38c6"}, - {file = "numpy-2.2.5-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:8dfa94b6a4374e7851bbb6f35e6ded2120b752b063e6acdd3157e4d2bb922eba"}, - {file = "numpy-2.2.5-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:97c8425d4e26437e65e1d189d22dff4a079b747ff9c2788057bfb8114ce1e133"}, - {file = "numpy-2.2.5-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:352d330048c055ea6db701130abc48a21bec690a8d38f8284e00fab256dc1376"}, - {file = "numpy-2.2.5-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b4c0773b6ada798f51f0f8e30c054d32304ccc6e9c5d93d46cb26f3d385ab19"}, - {file = "numpy-2.2.5-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:55f09e00d4dccd76b179c0f18a44f041e5332fd0e022886ba1c0bbf3ea4a18d0"}, - {file = "numpy-2.2.5-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:02f226baeefa68f7d579e213d0f3493496397d8f1cff5e2b222af274c86a552a"}, - {file = "numpy-2.2.5-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c26843fd58f65da9491165072da2cccc372530681de481ef670dcc8e27cfb066"}, - {file = "numpy-2.2.5-cp313-cp313t-win32.whl", hash = "sha256:1a161c2c79ab30fe4501d5a2bbfe8b162490757cf90b7f05be8b80bc02f7bb8e"}, - {file = "numpy-2.2.5-cp313-cp313t-win_amd64.whl", hash = "sha256:d403c84991b5ad291d3809bace5e85f4bbf44a04bdc9a88ed2bb1807b3360bb8"}, - {file = "numpy-2.2.5-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b4ea7e1cff6784e58fe281ce7e7f05036b3e1c89c6f922a6bfbc0a7e8768adbe"}, - {file = "numpy-2.2.5-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:d7543263084a85fbc09c704b515395398d31d6395518446237eac219eab9e55e"}, - {file = "numpy-2.2.5-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0255732338c4fdd00996c0421884ea8a3651eea555c3a56b84892b66f696eb70"}, - {file = "numpy-2.2.5-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d2e3bdadaba0e040d1e7ab39db73e0afe2c74ae277f5614dad53eadbecbbb169"}, - {file = "numpy-2.2.5.tar.gz", hash = "sha256:a9c0d994680cd991b1cb772e8b297340085466a6fe964bc9d4e80f5e2f43c291"}, +files = [ + {file = "numpy-2.2.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b412caa66f72040e6d268491a59f2c43bf03eb6c96dd8f0307829feb7fa2b6fb"}, + {file = "numpy-2.2.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e41fd67c52b86603a91c1a505ebaef50b3314de0213461c7a6e99c9a3beff90"}, + {file = "numpy-2.2.6-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:37e990a01ae6ec7fe7fa1c26c55ecb672dd98b19c3d0e1d1f326fa13cb38d163"}, + {file = "numpy-2.2.6-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:5a6429d4be8ca66d889b7cf70f536a397dc45ba6faeb5f8c5427935d9592e9cf"}, + {file = "numpy-2.2.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:efd28d4e9cd7d7a8d39074a4d44c63eda73401580c5c76acda2ce969e0a38e83"}, + {file = "numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc7b73d02efb0e18c000e9ad8b83480dfcd5dfd11065997ed4c6747470ae8915"}, + {file = "numpy-2.2.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:74d4531beb257d2c3f4b261bfb0fc09e0f9ebb8842d82a7b4209415896adc680"}, + {file = "numpy-2.2.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8fc377d995680230e83241d8a96def29f204b5782f371c532579b4f20607a289"}, + {file = "numpy-2.2.6-cp310-cp310-win32.whl", hash = "sha256:b093dd74e50a8cba3e873868d9e93a85b78e0daf2e98c6797566ad8044e8363d"}, + {file = "numpy-2.2.6-cp310-cp310-win_amd64.whl", hash = "sha256:f0fd6321b839904e15c46e0d257fdd101dd7f530fe03fd6359c1ea63738703f3"}, + {file = "numpy-2.2.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f9f1adb22318e121c5c69a09142811a201ef17ab257a1e66ca3025065b7f53ae"}, + {file = "numpy-2.2.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c820a93b0255bc360f53eca31a0e676fd1101f673dda8da93454a12e23fc5f7a"}, + {file = "numpy-2.2.6-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3d70692235e759f260c3d837193090014aebdf026dfd167834bcba43e30c2a42"}, + {file = "numpy-2.2.6-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:481b49095335f8eed42e39e8041327c05b0f6f4780488f61286ed3c01368d491"}, + {file = "numpy-2.2.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b64d8d4d17135e00c8e346e0a738deb17e754230d7e0810ac5012750bbd85a5a"}, + {file = "numpy-2.2.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba10f8411898fc418a521833e014a77d3ca01c15b0c6cdcce6a0d2897e6dbbdf"}, + {file = "numpy-2.2.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bd48227a919f1bafbdda0583705e547892342c26fb127219d60a5c36882609d1"}, + {file = "numpy-2.2.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9551a499bf125c1d4f9e250377c1ee2eddd02e01eac6644c080162c0c51778ab"}, + {file = "numpy-2.2.6-cp311-cp311-win32.whl", hash = "sha256:0678000bb9ac1475cd454c6b8c799206af8107e310843532b04d49649c717a47"}, + {file = "numpy-2.2.6-cp311-cp311-win_amd64.whl", hash = "sha256:e8213002e427c69c45a52bbd94163084025f533a55a59d6f9c5b820774ef3303"}, + {file = "numpy-2.2.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41c5a21f4a04fa86436124d388f6ed60a9343a6f767fced1a8a71c3fbca038ff"}, + {file = "numpy-2.2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de749064336d37e340f640b05f24e9e3dd678c57318c7289d222a8a2f543e90c"}, + {file = "numpy-2.2.6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:894b3a42502226a1cac872f840030665f33326fc3dac8e57c607905773cdcde3"}, + {file = "numpy-2.2.6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:71594f7c51a18e728451bb50cc60a3ce4e6538822731b2933209a1f3614e9282"}, + {file = "numpy-2.2.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2618db89be1b4e05f7a1a847a9c1c0abd63e63a1607d892dd54668dd92faf87"}, + {file = "numpy-2.2.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd83c01228a688733f1ded5201c678f0c53ecc1006ffbc404db9f7a899ac6249"}, + {file = "numpy-2.2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37c0ca431f82cd5fa716eca9506aefcabc247fb27ba69c5062a6d3ade8cf8f49"}, + {file = "numpy-2.2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fe27749d33bb772c80dcd84ae7e8df2adc920ae8297400dabec45f0dedb3f6de"}, + {file = "numpy-2.2.6-cp312-cp312-win32.whl", hash = "sha256:4eeaae00d789f66c7a25ac5f34b71a7035bb474e679f410e5e1a94deb24cf2d4"}, + {file = "numpy-2.2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c1f9540be57940698ed329904db803cf7a402f3fc200bfe599334c9bd84a40b2"}, + {file = "numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84"}, + {file = "numpy-2.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b"}, + {file = "numpy-2.2.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d"}, + {file = "numpy-2.2.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566"}, + {file = "numpy-2.2.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f"}, + {file = "numpy-2.2.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f"}, + {file = "numpy-2.2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868"}, + {file = "numpy-2.2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d"}, + {file = "numpy-2.2.6-cp313-cp313-win32.whl", hash = "sha256:5beb72339d9d4fa36522fc63802f469b13cdbe4fdab4a288f0c441b74272ebfd"}, + {file = "numpy-2.2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b0544343a702fa80c95ad5d3d608ea3599dd54d4632df855e4c8d24eb6ecfa1c"}, + {file = "numpy-2.2.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6"}, + {file = "numpy-2.2.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda"}, + {file = "numpy-2.2.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40"}, + {file = "numpy-2.2.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8"}, + {file = "numpy-2.2.6-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f"}, + {file = "numpy-2.2.6-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa"}, + {file = "numpy-2.2.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571"}, + {file = "numpy-2.2.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1"}, + {file = "numpy-2.2.6-cp313-cp313t-win32.whl", hash = "sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff"}, + {file = "numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06"}, + {file = "numpy-2.2.6-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0b605b275d7bd0c640cad4e5d30fa701a8d59302e127e5f79138ad62762c3e3d"}, + {file = "numpy-2.2.6-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:7befc596a7dc9da8a337f79802ee8adb30a552a94f792b9c9d18c840055907db"}, + {file = "numpy-2.2.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce47521a4754c8f4593837384bd3424880629f718d87c5d44f8ed763edd63543"}, + {file = "numpy-2.2.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d042d24c90c41b54fd506da306759e06e568864df8ec17ccc17e9e884634fd00"}, + {file = "numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd"}, ] [[package]] @@ -1770,8 +1721,6 @@ version = "12.6.4.1" description = "CUBLAS native runtime libraries" optional = true python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_cublas_cu12-12.6.4.1-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:08ed2686e9875d01b58e3cb379c6896df8e76c75e0d4a7f7dace3d7b6d9ef8eb"}, {file = "nvidia_cublas_cu12-12.6.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:235f728d6e2a409eddf1df58d5b0921cf80cfa9e72b9f2775ccb7b4a87984668"}, @@ -1784,8 +1733,6 @@ version = "12.6.80" description = "CUDA profiling tools runtime libs." optional = true python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:166ee35a3ff1587f2490364f90eeeb8da06cd867bd5b701bf7f9a02b78bc63fc"}, {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_aarch64.whl", hash = "sha256:358b4a1d35370353d52e12f0a7d1769fc01ff74a191689d3870b2123156184c4"}, @@ -1800,8 +1747,6 @@ version = "12.6.77" description = "NVRTC native runtime libraries" optional = true python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5847f1d6e5b757f1d2b3991a01082a44aad6f10ab3c5c0213fa3e25bddc25a13"}, {file = "nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:35b0cc6ee3a9636d5409133e79273ce1f3fd087abb0532d2d2e8fff1fe9efc53"}, @@ -1814,8 +1759,6 @@ version = "12.6.77" description = "CUDA Runtime native Libraries" optional = true python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6116fad3e049e04791c0256a9778c16237837c08b27ed8c8401e2e45de8d60cd"}, {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:d461264ecb429c84c8879a7153499ddc7b19b5f8d84c204307491989a365588e"}, @@ -1830,8 +1773,6 @@ version = "9.5.1.17" description = "cuDNN runtime libraries" optional = true python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:9fd4584468533c61873e5fda8ca41bac3a38bcb2d12350830c69b0a96a7e4def"}, {file = "nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:30ac3869f6db17d170e0e556dd6cc5eee02647abc31ca856634d5a40f82c15b2"}, @@ -1847,8 +1788,6 @@ version = "11.3.0.4" description = "CUFFT native runtime libraries" optional = true python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d16079550df460376455cba121db6564089176d9bac9e4f360493ca4741b22a6"}, {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8510990de9f96c803a051822618d42bf6cb8f069ff3f48d93a8486efdacb48fb"}, @@ -1866,8 +1805,6 @@ version = "1.11.1.6" description = "cuFile GPUDirect libraries" optional = true python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cc23469d1c7e52ce6c1d55253273d32c565dd22068647f3aa59b3c6b005bf159"}, {file = "nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:8f57a0051dcf2543f6dc2b98a98cb2719c37d3cee1baba8965d57f3bbc90d4db"}, @@ -1879,8 +1816,6 @@ version = "10.3.7.77" description = "CURAND native runtime libraries" optional = true python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:6e82df077060ea28e37f48a3ec442a8f47690c7499bff392a5938614b56c98d8"}, {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a42cd1344297f70b9e39a1e4f467a4e1c10f1da54ff7a85c12197f6c652c8bdf"}, @@ -1895,8 +1830,6 @@ version = "11.7.1.2" description = "CUDA solver native runtime libraries" optional = true python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0ce237ef60acde1efc457335a2ddadfd7610b892d94efee7b776c64bb1cac9e0"}, {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e9e49843a7707e42022babb9bcfa33c29857a93b88020c4e4434656a655b698c"}, @@ -1916,8 +1849,6 @@ version = "12.5.4.2" description = "CUSPARSE native runtime libraries" optional = true python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d25b62fb18751758fe3c93a4a08eff08effedfe4edf1c6bb5afd0890fe88f887"}, {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7aa32fa5470cf754f72d1116c7cbc300b4e638d3ae5304cfa4a638a5b87161b1"}, @@ -1935,8 +1866,6 @@ version = "0.6.3" description = "NVIDIA cuSPARSELt" optional = true python-versions = "*" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_cusparselt_cu12-0.6.3-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8371549623ba601a06322af2133c4a44350575f5a3108fb75f3ef20b822ad5f1"}, {file = "nvidia_cusparselt_cu12-0.6.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:e5c8a26c36445dd2e6812f1177978a24e2d37cacce7e090f297a688d1ec44f46"}, @@ -1949,8 +1878,6 @@ version = "2.26.2" description = "NVIDIA Collective Communication Library (NCCL) Runtime" optional = true python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5c196e95e832ad30fbbb50381eb3cbd1fadd5675e587a548563993609af19522"}, {file = "nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:694cf3879a206553cc9d7dbda76b13efaf610fdb70a50cba303de1b0d1530ac6"}, @@ -1962,8 +1889,6 @@ version = "12.6.85" description = "Nvidia JIT LTO Library" optional = true python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:eedc36df9e88b682efe4309aa16b5b4e78c2407eac59e8c10a6a47535164369a"}, {file = "nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cf4eaa7d4b6b543ffd69d6abfb11efdeb2db48270d94dfd3a452c24150829e41"}, @@ -1976,8 +1901,6 @@ version = "12.6.77" description = "NVIDIA Tools Extension" optional = true python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f44f8d86bb7d5629988d61c8d3ae61dddb2015dee142740536bc7481b022fe4b"}, {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:adcaabb9d436c9761fca2b13959a2d237c5f9fd406c8e4b723c695409ff88059"}, @@ -1988,15 +1911,13 @@ files = [ [[package]] name = "openai" -version = "1.76.0" +version = "1.82.0" description = "The official Python library for the openai API" optional = true python-versions = ">=3.8" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ - {file = "openai-1.76.0-py3-none-any.whl", hash = "sha256:a712b50e78cf78e6d7b2a8f69c4978243517c2c36999756673e07a14ce37dc0a"}, - {file = "openai-1.76.0.tar.gz", hash = "sha256:fd2bfaf4608f48102d6b74f9e11c5ecaa058b60dad9c36e409c12477dfd91fb2"}, + {file = "openai-1.82.0-py3-none-any.whl", hash = "sha256:8c40647fea1816516cb3de5189775b30b5f4812777e40b8768f361f232b61b30"}, + {file = "openai-1.82.0.tar.gz", hash = "sha256:b0a009b9a58662d598d07e91e4219ab4b1e3d8ba2db3f173896a92b9b874d1a7"}, ] [package.dependencies] @@ -2020,8 +1941,6 @@ version = "4.11.0.86" description = "Wrapper package for OpenCV python bindings." optional = true python-versions = ">=3.6" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "opencv-python-headless-4.11.0.86.tar.gz", hash = "sha256:996eb282ca4b43ec6a3972414de0e2331f5d9cda2b41091a49739c19fb843798"}, {file = "opencv_python_headless-4.11.0.86-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:48128188ade4a7e517237c8e1e11a9cdf5c282761473383e77beb875bb1e61ca"}, @@ -2034,24 +1953,22 @@ files = [ [package.dependencies] numpy = [ - {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\""}, - {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\""}, - {version = ">=1.23.5", markers = "python_version >= \"3.11\""}, + {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""}, + {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""}, + {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, ] [[package]] name = "packaging" -version = "24.2" +version = "25.0" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" -groups = ["main", "dev", "docs"] files = [ - {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, - {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, + {file = "packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484"}, + {file = "packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"}, ] -markers = {main = "extra == \"pdf\""} [[package]] name = "paginate" @@ -2059,7 +1976,6 @@ version = "0.5.7" description = "Divides large result sets into pages for easier browsing" optional = false python-versions = "*" -groups = ["docs"] files = [ {file = "paginate-0.5.7-py2.py3-none-any.whl", hash = "sha256:b885e2af73abcf01d9559fd5216b57ef722f8c42affbb63942377668e35c7591"}, {file = "paginate-0.5.7.tar.gz", hash = "sha256:22bd083ab41e1a8b4f3690544afb2c60c25e5c9a63a30fa2f483f6c60c8e5945"}, @@ -2075,7 +1991,6 @@ version = "2.2.3" description = "Powerful data structures for data analysis, time series, and statistics" optional = false python-versions = ">=3.9" -groups = ["main"] files = [ {file = "pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5"}, {file = "pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348"}, @@ -2162,7 +2077,6 @@ version = "2.2.3.250308" description = "Type annotations for pandas" optional = false python-versions = ">=3.10" -groups = ["dev"] files = [ {file = "pandas_stubs-2.2.3.250308-py3-none-any.whl", hash = "sha256:a377edff3b61f8b268c82499fdbe7c00fdeed13235b8b71d6a1dc347aeddc74d"}, {file = "pandas_stubs-2.2.3.250308.tar.gz", hash = "sha256:3a6e9daf161f00b85c83772ed3d5cff9522028f07a94817472c07b91f46710fd"}, @@ -2178,7 +2092,6 @@ version = "0.12.1" description = "Utility library for gitignore style pattern matching of file paths." optional = false python-versions = ">=3.8" -groups = ["docs"] files = [ {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, @@ -2190,8 +2103,6 @@ version = "0.6.2" description = "Extract structured text from pdfs quickly" optional = true python-versions = "<4.0,>=3.10" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "pdftext-0.6.2-py3-none-any.whl", hash = "sha256:905d11e62d548e307933c25865a69c8e993947bb5b40b1535b0a2aa8f07a71d4"}, {file = "pdftext-0.6.2.tar.gz", hash = "sha256:ff5b92462ac03ae63a23429384ae123d45c162dcda30e7bf2c5c92a6b208c9de"}, @@ -2209,8 +2120,6 @@ version = "10.4.0" description = "Python Imaging Library (Fork)" optional = true python-versions = ">=3.8" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "pillow-10.4.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:4d9667937cfa347525b319ae34375c37b9ee6b525440f3ef48542fcf66f2731e"}, {file = "pillow-10.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:543f3dc61c18dafb755773efc89aae60d06b6596a63914107f75459cf984164d"}, @@ -2299,19 +2208,18 @@ docs = ["furo", "olefile", "sphinx (>=7.3)", "sphinx-copybutton", "sphinx-inline fpx = ["olefile"] mic = ["olefile"] tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] -typing = ["typing-extensions ; python_version < \"3.10\""] +typing = ["typing-extensions"] xmp = ["defusedxml"] [[package]] name = "platformdirs" -version = "4.3.7" +version = "4.3.8" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false python-versions = ">=3.9" -groups = ["main", "dev", "docs"] files = [ - {file = "platformdirs-4.3.7-py3-none-any.whl", hash = "sha256:a03875334331946f13c549dbd8f4bac7a13a50a895a0eb1e8c6a8ace80d40a94"}, - {file = "platformdirs-4.3.7.tar.gz", hash = "sha256:eb437d586b6a0986388f0d6f74aa0cde27b48d0e3d66843640bfb6bdcdb6e351"}, + {file = "platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4"}, + {file = "platformdirs-4.3.8.tar.gz", hash = "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc"}, ] [package.extras] @@ -2321,19 +2229,18 @@ type = ["mypy (>=1.14.1)"] [[package]] name = "pluggy" -version = "1.5.0" +version = "1.6.0" description = "plugin and hook calling mechanisms for python" optional = false -python-versions = ">=3.8" -groups = ["dev"] +python-versions = ">=3.9" files = [ - {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, - {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, + {file = "pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746"}, + {file = "pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3"}, ] [package.extras] dev = ["pre-commit", "tox"] -testing = ["pytest", "pytest-benchmark"] +testing = ["coverage", "pytest", "pytest-benchmark"] [[package]] name = "pre-commit" @@ -2341,7 +2248,6 @@ version = "4.2.0" description = "A framework for managing and maintaining multi-language pre-commit hooks." optional = false python-versions = ">=3.9" -groups = ["main", "dev"] files = [ {file = "pre_commit-4.2.0-py2.py3-none-any.whl", hash = "sha256:a009ca7205f1eb497d10b845e52c838a98b6cdd2102a6c8e4540e94ee75c58bd"}, {file = "pre_commit-4.2.0.tar.gz", hash = "sha256:601283b9757afd87d40c4c4a9b2b5de9637a8ea02eaff7adc2d0fb4e04841146"}, @@ -2360,8 +2266,6 @@ version = "0.6.1" description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" optional = true python-versions = ">=3.8" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629"}, {file = "pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034"}, @@ -2373,8 +2277,6 @@ version = "0.4.2" description = "A collection of ASN.1-based protocols modules" optional = true python-versions = ">=3.8" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a"}, {file = "pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6"}, @@ -2385,135 +2287,131 @@ pyasn1 = ">=0.6.1,<0.7.0" [[package]] name = "pydantic" -version = "2.11.3" +version = "2.11.5" description = "Data validation using Python type hints" optional = true python-versions = ">=3.9" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ - {file = "pydantic-2.11.3-py3-none-any.whl", hash = "sha256:a082753436a07f9ba1289c6ffa01cd93db3548776088aa917cc43b63f68fa60f"}, - {file = "pydantic-2.11.3.tar.gz", hash = "sha256:7471657138c16adad9322fe3070c0116dd6c3ad8d649300e3cbdfe91f4db4ec3"}, + {file = "pydantic-2.11.5-py3-none-any.whl", hash = "sha256:f9c26ba06f9747749ca1e5c94d6a85cb84254577553c8785576fd38fa64dc0f7"}, + {file = "pydantic-2.11.5.tar.gz", hash = "sha256:7f853db3d0ce78ce8bbb148c401c2cdd6431b3473c0cdff2755c7690952a7b7a"}, ] [package.dependencies] annotated-types = ">=0.6.0" -pydantic-core = "2.33.1" +pydantic-core = "2.33.2" typing-extensions = ">=4.12.2" typing-inspection = ">=0.4.0" [package.extras] email = ["email-validator (>=2.0.0)"] -timezone = ["tzdata ; python_version >= \"3.9\" and platform_system == \"Windows\""] +timezone = ["tzdata"] [[package]] name = "pydantic-core" -version = "2.33.1" +version = "2.33.2" description = "Core functionality for Pydantic validation and serialization" optional = true python-versions = ">=3.9" -groups = ["main"] -markers = "extra == \"pdf\"" -files = [ - {file = "pydantic_core-2.33.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3077cfdb6125cc8dab61b155fdd714663e401f0e6883f9632118ec12cf42df26"}, - {file = "pydantic_core-2.33.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8ffab8b2908d152e74862d276cf5017c81a2f3719f14e8e3e8d6b83fda863927"}, - {file = "pydantic_core-2.33.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5183e4f6a2d468787243ebcd70cf4098c247e60d73fb7d68d5bc1e1beaa0c4db"}, - {file = "pydantic_core-2.33.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:398a38d323f37714023be1e0285765f0a27243a8b1506b7b7de87b647b517e48"}, - {file = "pydantic_core-2.33.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:87d3776f0001b43acebfa86f8c64019c043b55cc5a6a2e313d728b5c95b46969"}, - {file = "pydantic_core-2.33.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c566dd9c5f63d22226409553531f89de0cac55397f2ab8d97d6f06cfce6d947e"}, - {file = "pydantic_core-2.33.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0d5f3acc81452c56895e90643a625302bd6be351e7010664151cc55b7b97f89"}, - {file = "pydantic_core-2.33.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d3a07fadec2a13274a8d861d3d37c61e97a816beae717efccaa4b36dfcaadcde"}, - {file = "pydantic_core-2.33.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:f99aeda58dce827f76963ee87a0ebe75e648c72ff9ba1174a253f6744f518f65"}, - {file = "pydantic_core-2.33.1-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:902dbc832141aa0ec374f4310f1e4e7febeebc3256f00dc359a9ac3f264a45dc"}, - {file = "pydantic_core-2.33.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fe44d56aa0b00d66640aa84a3cbe80b7a3ccdc6f0b1ca71090696a6d4777c091"}, - {file = "pydantic_core-2.33.1-cp310-cp310-win32.whl", hash = "sha256:ed3eb16d51257c763539bde21e011092f127a2202692afaeaccb50db55a31383"}, - {file = "pydantic_core-2.33.1-cp310-cp310-win_amd64.whl", hash = "sha256:694ad99a7f6718c1a498dc170ca430687a39894a60327f548e02a9c7ee4b6504"}, - {file = "pydantic_core-2.33.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6e966fc3caaf9f1d96b349b0341c70c8d6573bf1bac7261f7b0ba88f96c56c24"}, - {file = "pydantic_core-2.33.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bfd0adeee563d59c598ceabddf2c92eec77abcb3f4a391b19aa7366170bd9e30"}, - {file = "pydantic_core-2.33.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91815221101ad3c6b507804178a7bb5cb7b2ead9ecd600041669c8d805ebd595"}, - {file = "pydantic_core-2.33.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9fea9c1869bb4742d174a57b4700c6dadea951df8b06de40c2fedb4f02931c2e"}, - {file = "pydantic_core-2.33.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d20eb4861329bb2484c021b9d9a977566ab16d84000a57e28061151c62b349a"}, - {file = "pydantic_core-2.33.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fb935c5591573ae3201640579f30128ccc10739b45663f93c06796854405505"}, - {file = "pydantic_core-2.33.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c964fd24e6166420d18fb53996d8c9fd6eac9bf5ae3ec3d03015be4414ce497f"}, - {file = "pydantic_core-2.33.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:681d65e9011f7392db5aa002b7423cc442d6a673c635668c227c6c8d0e5a4f77"}, - {file = "pydantic_core-2.33.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e100c52f7355a48413e2999bfb4e139d2977a904495441b374f3d4fb4a170961"}, - {file = "pydantic_core-2.33.1-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:048831bd363490be79acdd3232f74a0e9951b11b2b4cc058aeb72b22fdc3abe1"}, - {file = "pydantic_core-2.33.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:bdc84017d28459c00db6f918a7272a5190bec3090058334e43a76afb279eac7c"}, - {file = "pydantic_core-2.33.1-cp311-cp311-win32.whl", hash = "sha256:32cd11c5914d1179df70406427097c7dcde19fddf1418c787540f4b730289896"}, - {file = "pydantic_core-2.33.1-cp311-cp311-win_amd64.whl", hash = "sha256:2ea62419ba8c397e7da28a9170a16219d310d2cf4970dbc65c32faf20d828c83"}, - {file = "pydantic_core-2.33.1-cp311-cp311-win_arm64.whl", hash = "sha256:fc903512177361e868bc1f5b80ac8c8a6e05fcdd574a5fb5ffeac5a9982b9e89"}, - {file = "pydantic_core-2.33.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:1293d7febb995e9d3ec3ea09caf1a26214eec45b0f29f6074abb004723fc1de8"}, - {file = "pydantic_core-2.33.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:99b56acd433386c8f20be5c4000786d1e7ca0523c8eefc995d14d79c7a081498"}, - {file = "pydantic_core-2.33.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35a5ec3fa8c2fe6c53e1b2ccc2454398f95d5393ab398478f53e1afbbeb4d939"}, - {file = "pydantic_core-2.33.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b172f7b9d2f3abc0efd12e3386f7e48b576ef309544ac3a63e5e9cdd2e24585d"}, - {file = "pydantic_core-2.33.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9097b9f17f91eea659b9ec58148c0747ec354a42f7389b9d50701610d86f812e"}, - {file = "pydantic_core-2.33.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc77ec5b7e2118b152b0d886c7514a4653bcb58c6b1d760134a9fab915f777b3"}, - {file = "pydantic_core-2.33.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5e3d15245b08fa4a84cefc6c9222e6f37c98111c8679fbd94aa145f9a0ae23d"}, - {file = "pydantic_core-2.33.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ef99779001d7ac2e2461d8ab55d3373fe7315caefdbecd8ced75304ae5a6fc6b"}, - {file = "pydantic_core-2.33.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:fc6bf8869e193855e8d91d91f6bf59699a5cdfaa47a404e278e776dd7f168b39"}, - {file = "pydantic_core-2.33.1-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:b1caa0bc2741b043db7823843e1bde8aaa58a55a58fda06083b0569f8b45693a"}, - {file = "pydantic_core-2.33.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ec259f62538e8bf364903a7d0d0239447059f9434b284f5536e8402b7dd198db"}, - {file = "pydantic_core-2.33.1-cp312-cp312-win32.whl", hash = "sha256:e14f369c98a7c15772b9da98987f58e2b509a93235582838bd0d1d8c08b68fda"}, - {file = "pydantic_core-2.33.1-cp312-cp312-win_amd64.whl", hash = "sha256:1c607801d85e2e123357b3893f82c97a42856192997b95b4d8325deb1cd0c5f4"}, - {file = "pydantic_core-2.33.1-cp312-cp312-win_arm64.whl", hash = "sha256:8d13f0276806ee722e70a1c93da19748594f19ac4299c7e41237fc791d1861ea"}, - {file = "pydantic_core-2.33.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:70af6a21237b53d1fe7b9325b20e65cbf2f0a848cf77bed492b029139701e66a"}, - {file = "pydantic_core-2.33.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:282b3fe1bbbe5ae35224a0dbd05aed9ccabccd241e8e6b60370484234b456266"}, - {file = "pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b315e596282bbb5822d0c7ee9d255595bd7506d1cb20c2911a4da0b970187d3"}, - {file = "pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1dfae24cf9921875ca0ca6a8ecb4bb2f13c855794ed0d468d6abbec6e6dcd44a"}, - {file = "pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6dd8ecfde08d8bfadaea669e83c63939af76f4cf5538a72597016edfa3fad516"}, - {file = "pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2f593494876eae852dc98c43c6f260f45abdbfeec9e4324e31a481d948214764"}, - {file = "pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:948b73114f47fd7016088e5186d13faf5e1b2fe83f5e320e371f035557fd264d"}, - {file = "pydantic_core-2.33.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e11f3864eb516af21b01e25fac915a82e9ddad3bb0fb9e95a246067398b435a4"}, - {file = "pydantic_core-2.33.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:549150be302428b56fdad0c23c2741dcdb5572413776826c965619a25d9c6bde"}, - {file = "pydantic_core-2.33.1-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:495bc156026efafd9ef2d82372bd38afce78ddd82bf28ef5276c469e57c0c83e"}, - {file = "pydantic_core-2.33.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ec79de2a8680b1a67a07490bddf9636d5c2fab609ba8c57597e855fa5fa4dacd"}, - {file = "pydantic_core-2.33.1-cp313-cp313-win32.whl", hash = "sha256:ee12a7be1742f81b8a65b36c6921022301d466b82d80315d215c4c691724986f"}, - {file = "pydantic_core-2.33.1-cp313-cp313-win_amd64.whl", hash = "sha256:ede9b407e39949d2afc46385ce6bd6e11588660c26f80576c11c958e6647bc40"}, - {file = "pydantic_core-2.33.1-cp313-cp313-win_arm64.whl", hash = "sha256:aa687a23d4b7871a00e03ca96a09cad0f28f443690d300500603bd0adba4b523"}, - {file = "pydantic_core-2.33.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:401d7b76e1000d0dd5538e6381d28febdcacb097c8d340dde7d7fc6e13e9f95d"}, - {file = "pydantic_core-2.33.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7aeb055a42d734c0255c9e489ac67e75397d59c6fbe60d155851e9782f276a9c"}, - {file = "pydantic_core-2.33.1-cp313-cp313t-win_amd64.whl", hash = "sha256:338ea9b73e6e109f15ab439e62cb3b78aa752c7fd9536794112e14bee02c8d18"}, - {file = "pydantic_core-2.33.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:5ab77f45d33d264de66e1884fca158bc920cb5e27fd0764a72f72f5756ae8bdb"}, - {file = "pydantic_core-2.33.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e7aaba1b4b03aaea7bb59e1b5856d734be011d3e6d98f5bcaa98cb30f375f2ad"}, - {file = "pydantic_core-2.33.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7fb66263e9ba8fea2aa85e1e5578980d127fb37d7f2e292773e7bc3a38fb0c7b"}, - {file = "pydantic_core-2.33.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3f2648b9262607a7fb41d782cc263b48032ff7a03a835581abbf7a3bec62bcf5"}, - {file = "pydantic_core-2.33.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:723c5630c4259400818b4ad096735a829074601805d07f8cafc366d95786d331"}, - {file = "pydantic_core-2.33.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d100e3ae783d2167782391e0c1c7a20a31f55f8015f3293647544df3f9c67824"}, - {file = "pydantic_core-2.33.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:177d50460bc976a0369920b6c744d927b0ecb8606fb56858ff542560251b19e5"}, - {file = "pydantic_core-2.33.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a3edde68d1a1f9af1273b2fe798997b33f90308fb6d44d8550c89fc6a3647cf6"}, - {file = "pydantic_core-2.33.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a62c3c3ef6a7e2c45f7853b10b5bc4ddefd6ee3cd31024754a1a5842da7d598d"}, - {file = "pydantic_core-2.33.1-cp39-cp39-musllinux_1_1_armv7l.whl", hash = "sha256:c91dbb0ab683fa0cd64a6e81907c8ff41d6497c346890e26b23de7ee55353f96"}, - {file = "pydantic_core-2.33.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9f466e8bf0a62dc43e068c12166281c2eca72121dd2adc1040f3aa1e21ef8599"}, - {file = "pydantic_core-2.33.1-cp39-cp39-win32.whl", hash = "sha256:ab0277cedb698749caada82e5d099dc9fed3f906a30d4c382d1a21725777a1e5"}, - {file = "pydantic_core-2.33.1-cp39-cp39-win_amd64.whl", hash = "sha256:5773da0ee2d17136b1f1c6fbde543398d452a6ad2a7b54ea1033e2daa739b8d2"}, - {file = "pydantic_core-2.33.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5c834f54f8f4640fd7e4b193f80eb25a0602bba9e19b3cd2fc7ffe8199f5ae02"}, - {file = "pydantic_core-2.33.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:049e0de24cf23766f12cc5cc71d8abc07d4a9deb9061b334b62093dedc7cb068"}, - {file = "pydantic_core-2.33.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a28239037b3d6f16916a4c831a5a0eadf856bdd6d2e92c10a0da3a59eadcf3e"}, - {file = "pydantic_core-2.33.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d3da303ab5f378a268fa7d45f37d7d85c3ec19769f28d2cc0c61826a8de21fe"}, - {file = "pydantic_core-2.33.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:25626fb37b3c543818c14821afe0fd3830bc327a43953bc88db924b68c5723f1"}, - {file = "pydantic_core-2.33.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:3ab2d36e20fbfcce8f02d73c33a8a7362980cff717926bbae030b93ae46b56c7"}, - {file = "pydantic_core-2.33.1-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:2f9284e11c751b003fd4215ad92d325d92c9cb19ee6729ebd87e3250072cdcde"}, - {file = "pydantic_core-2.33.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:048c01eee07d37cbd066fc512b9d8b5ea88ceeb4e629ab94b3e56965ad655add"}, - {file = "pydantic_core-2.33.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:5ccd429694cf26af7997595d627dd2637e7932214486f55b8a357edaac9dae8c"}, - {file = "pydantic_core-2.33.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3a371dc00282c4b84246509a5ddc808e61b9864aa1eae9ecc92bb1268b82db4a"}, - {file = "pydantic_core-2.33.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:f59295ecc75a1788af8ba92f2e8c6eeaa5a94c22fc4d151e8d9638814f85c8fc"}, - {file = "pydantic_core-2.33.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08530b8ac922003033f399128505f513e30ca770527cc8bbacf75a84fcc2c74b"}, - {file = "pydantic_core-2.33.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bae370459da6a5466978c0eacf90690cb57ec9d533f8e63e564ef3822bfa04fe"}, - {file = "pydantic_core-2.33.1-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e3de2777e3b9f4d603112f78006f4ae0acb936e95f06da6cb1a45fbad6bdb4b5"}, - {file = "pydantic_core-2.33.1-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:3a64e81e8cba118e108d7126362ea30e021291b7805d47e4896e52c791be2761"}, - {file = "pydantic_core-2.33.1-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:52928d8c1b6bda03cc6d811e8923dffc87a2d3c8b3bfd2ce16471c7147a24850"}, - {file = "pydantic_core-2.33.1-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:1b30d92c9412beb5ac6b10a3eb7ef92ccb14e3f2a8d7732e2d739f58b3aa7544"}, - {file = "pydantic_core-2.33.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:f995719707e0e29f0f41a8aa3bcea6e761a36c9136104d3189eafb83f5cec5e5"}, - {file = "pydantic_core-2.33.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:7edbc454a29fc6aeae1e1eecba4f07b63b8d76e76a748532233c4c167b4cb9ea"}, - {file = "pydantic_core-2.33.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:ad05b683963f69a1d5d2c2bdab1274a31221ca737dbbceaa32bcb67359453cdd"}, - {file = "pydantic_core-2.33.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df6a94bf9452c6da9b5d76ed229a5683d0306ccb91cca8e1eea883189780d568"}, - {file = "pydantic_core-2.33.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7965c13b3967909a09ecc91f21d09cfc4576bf78140b988904e94f130f188396"}, - {file = "pydantic_core-2.33.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3f1fdb790440a34f6ecf7679e1863b825cb5ffde858a9197f851168ed08371e5"}, - {file = "pydantic_core-2.33.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:5277aec8d879f8d05168fdd17ae811dd313b8ff894aeeaf7cd34ad28b4d77e33"}, - {file = "pydantic_core-2.33.1-pp39-pypy39_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:8ab581d3530611897d863d1a649fb0644b860286b4718db919bfd51ece41f10b"}, - {file = "pydantic_core-2.33.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:0483847fa9ad5e3412265c1bd72aad35235512d9ce9d27d81a56d935ef489672"}, - {file = "pydantic_core-2.33.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:de9e06abe3cc5ec6a2d5f75bc99b0bdca4f5c719a5b34026f8c57efbdecd2ee3"}, - {file = "pydantic_core-2.33.1.tar.gz", hash = "sha256:bcc9c6fdb0ced789245b02b7d6603e17d1563064ddcfc36f046b61c0c05dd9df"}, +files = [ + {file = "pydantic_core-2.33.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2b3d326aaef0c0399d9afffeb6367d5e26ddc24d351dbc9c636840ac355dc5d8"}, + {file = "pydantic_core-2.33.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e5b2671f05ba48b94cb90ce55d8bdcaaedb8ba00cc5359f6810fc918713983d"}, + {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0069c9acc3f3981b9ff4cdfaf088e98d83440a4c7ea1bc07460af3d4dc22e72d"}, + {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d53b22f2032c42eaaf025f7c40c2e3b94568ae077a606f006d206a463bc69572"}, + {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0405262705a123b7ce9f0b92f123334d67b70fd1f20a9372b907ce1080c7ba02"}, + {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4b25d91e288e2c4e0662b8038a28c6a07eaac3e196cfc4ff69de4ea3db992a1b"}, + {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bdfe4b3789761f3bcb4b1ddf33355a71079858958e3a552f16d5af19768fef2"}, + {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:efec8db3266b76ef9607c2c4c419bdb06bf335ae433b80816089ea7585816f6a"}, + {file = "pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:031c57d67ca86902726e0fae2214ce6770bbe2f710dc33063187a68744a5ecac"}, + {file = "pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:f8de619080e944347f5f20de29a975c2d815d9ddd8be9b9b7268e2e3ef68605a"}, + {file = "pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:73662edf539e72a9440129f231ed3757faab89630d291b784ca99237fb94db2b"}, + {file = "pydantic_core-2.33.2-cp310-cp310-win32.whl", hash = "sha256:0a39979dcbb70998b0e505fb1556a1d550a0781463ce84ebf915ba293ccb7e22"}, + {file = "pydantic_core-2.33.2-cp310-cp310-win_amd64.whl", hash = "sha256:b0379a2b24882fef529ec3b4987cb5d003b9cda32256024e6fe1586ac45fc640"}, + {file = "pydantic_core-2.33.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:4c5b0a576fb381edd6d27f0a85915c6daf2f8138dc5c267a57c08a62900758c7"}, + {file = "pydantic_core-2.33.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e799c050df38a639db758c617ec771fd8fb7a5f8eaaa4b27b101f266b216a246"}, + {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc46a01bf8d62f227d5ecee74178ffc448ff4e5197c756331f71efcc66dc980f"}, + {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a144d4f717285c6d9234a66778059f33a89096dfb9b39117663fd8413d582dcc"}, + {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:73cf6373c21bc80b2e0dc88444f41ae60b2f070ed02095754eb5a01df12256de"}, + {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dc625f4aa79713512d1976fe9f0bc99f706a9dee21dfd1810b4bbbf228d0e8a"}, + {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b21b5549499972441da4758d662aeea93f1923f953e9cbaff14b8b9565aef"}, + {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bdc25f3681f7b78572699569514036afe3c243bc3059d3942624e936ec93450e"}, + {file = "pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:fe5b32187cbc0c862ee201ad66c30cf218e5ed468ec8dc1cf49dec66e160cc4d"}, + {file = "pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:bc7aee6f634a6f4a95676fcb5d6559a2c2a390330098dba5e5a5f28a2e4ada30"}, + {file = "pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:235f45e5dbcccf6bd99f9f472858849f73d11120d76ea8707115415f8e5ebebf"}, + {file = "pydantic_core-2.33.2-cp311-cp311-win32.whl", hash = "sha256:6368900c2d3ef09b69cb0b913f9f8263b03786e5b2a387706c5afb66800efd51"}, + {file = "pydantic_core-2.33.2-cp311-cp311-win_amd64.whl", hash = "sha256:1e063337ef9e9820c77acc768546325ebe04ee38b08703244c1309cccc4f1bab"}, + {file = "pydantic_core-2.33.2-cp311-cp311-win_arm64.whl", hash = "sha256:6b99022f1d19bc32a4c2a0d544fc9a76e3be90f0b3f4af413f87d38749300e65"}, + {file = "pydantic_core-2.33.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a7ec89dc587667f22b6a0b6579c249fca9026ce7c333fc142ba42411fa243cdc"}, + {file = "pydantic_core-2.33.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3c6db6e52c6d70aa0d00d45cdb9b40f0433b96380071ea80b09277dba021ddf7"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e61206137cbc65e6d5256e1166f88331d3b6238e082d9f74613b9b765fb9025"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eb8c529b2819c37140eb51b914153063d27ed88e3bdc31b71198a198e921e011"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c52b02ad8b4e2cf14ca7b3d918f3eb0ee91e63b3167c32591e57c4317e134f8f"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:96081f1605125ba0855dfda83f6f3df5ec90c61195421ba72223de35ccfb2f88"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f57a69461af2a5fa6e6bbd7a5f60d3b7e6cebb687f55106933188e79ad155c1"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:572c7e6c8bb4774d2ac88929e3d1f12bc45714ae5ee6d9a788a9fb35e60bb04b"}, + {file = "pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:db4b41f9bd95fbe5acd76d89920336ba96f03e149097365afe1cb092fceb89a1"}, + {file = "pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:fa854f5cf7e33842a892e5c73f45327760bc7bc516339fda888c75ae60edaeb6"}, + {file = "pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5f483cfb75ff703095c59e365360cb73e00185e01aaea067cd19acffd2ab20ea"}, + {file = "pydantic_core-2.33.2-cp312-cp312-win32.whl", hash = "sha256:9cb1da0f5a471435a7bc7e439b8a728e8b61e59784b2af70d7c169f8dd8ae290"}, + {file = "pydantic_core-2.33.2-cp312-cp312-win_amd64.whl", hash = "sha256:f941635f2a3d96b2973e867144fde513665c87f13fe0e193c158ac51bfaaa7b2"}, + {file = "pydantic_core-2.33.2-cp312-cp312-win_arm64.whl", hash = "sha256:cca3868ddfaccfbc4bfb1d608e2ccaaebe0ae628e1416aeb9c4d88c001bb45ab"}, + {file = "pydantic_core-2.33.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1082dd3e2d7109ad8b7da48e1d4710c8d06c253cbc4a27c1cff4fbcaa97a9e3f"}, + {file = "pydantic_core-2.33.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f517ca031dfc037a9c07e748cefd8d96235088b83b4f4ba8939105d20fa1dcd6"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a9f2c9dd19656823cb8250b0724ee9c60a82f3cdf68a080979d13092a3b0fef"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2b0a451c263b01acebe51895bfb0e1cc842a5c666efe06cdf13846c7418caa9a"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ea40a64d23faa25e62a70ad163571c0b342b8bf66d5fa612ac0dec4f069d916"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fb2d542b4d66f9470e8065c5469ec676978d625a8b7a363f07d9a501a9cb36a"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdac5d6ffa1b5a83bca06ffe7583f5576555e6c8b3a91fbd25ea7780f825f7d"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04a1a413977ab517154eebb2d326da71638271477d6ad87a769102f7c2488c56"}, + {file = "pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c8e7af2f4e0194c22b5b37205bfb293d166a7344a5b0d0eaccebc376546d77d5"}, + {file = "pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:5c92edd15cd58b3c2d34873597a1e20f13094f59cf88068adb18947df5455b4e"}, + {file = "pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:65132b7b4a1c0beded5e057324b7e16e10910c106d43675d9bd87d4f38dde162"}, + {file = "pydantic_core-2.33.2-cp313-cp313-win32.whl", hash = "sha256:52fb90784e0a242bb96ec53f42196a17278855b0f31ac7c3cc6f5c1ec4811849"}, + {file = "pydantic_core-2.33.2-cp313-cp313-win_amd64.whl", hash = "sha256:c083a3bdd5a93dfe480f1125926afcdbf2917ae714bdb80b36d34318b2bec5d9"}, + {file = "pydantic_core-2.33.2-cp313-cp313-win_arm64.whl", hash = "sha256:e80b087132752f6b3d714f041ccf74403799d3b23a72722ea2e6ba2e892555b9"}, + {file = "pydantic_core-2.33.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61c18fba8e5e9db3ab908620af374db0ac1baa69f0f32df4f61ae23f15e586ac"}, + {file = "pydantic_core-2.33.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95237e53bb015f67b63c91af7518a62a8660376a6a0db19b89acc77a4d6199f5"}, + {file = "pydantic_core-2.33.2-cp313-cp313t-win_amd64.whl", hash = "sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9"}, + {file = "pydantic_core-2.33.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:a2b911a5b90e0374d03813674bf0a5fbbb7741570dcd4b4e85a2e48d17def29d"}, + {file = "pydantic_core-2.33.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6fa6dfc3e4d1f734a34710f391ae822e0a8eb8559a85c6979e14e65ee6ba2954"}, + {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c54c939ee22dc8e2d545da79fc5381f1c020d6d3141d3bd747eab59164dc89fb"}, + {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:53a57d2ed685940a504248187d5685e49eb5eef0f696853647bf37c418c538f7"}, + {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:09fb9dd6571aacd023fe6aaca316bd01cf60ab27240d7eb39ebd66a3a15293b4"}, + {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0e6116757f7959a712db11f3e9c0a99ade00a5bbedae83cb801985aa154f071b"}, + {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d55ab81c57b8ff8548c3e4947f119551253f4e3787a7bbc0b6b3ca47498a9d3"}, + {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c20c462aa4434b33a2661701b861604913f912254e441ab8d78d30485736115a"}, + {file = "pydantic_core-2.33.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:44857c3227d3fb5e753d5fe4a3420d6376fa594b07b621e220cd93703fe21782"}, + {file = "pydantic_core-2.33.2-cp39-cp39-musllinux_1_1_armv7l.whl", hash = "sha256:eb9b459ca4df0e5c87deb59d37377461a538852765293f9e6ee834f0435a93b9"}, + {file = "pydantic_core-2.33.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9fcd347d2cc5c23b06de6d3b7b8275be558a0c90549495c699e379a80bf8379e"}, + {file = "pydantic_core-2.33.2-cp39-cp39-win32.whl", hash = "sha256:83aa99b1285bc8f038941ddf598501a86f1536789740991d7d8756e34f1e74d9"}, + {file = "pydantic_core-2.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:f481959862f57f29601ccced557cc2e817bce7533ab8e01a797a48b49c9692b3"}, + {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5c4aa4e82353f65e548c476b37e64189783aa5384903bfea4f41580f255fddfa"}, + {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d946c8bf0d5c24bf4fe333af284c59a19358aa3ec18cb3dc4370080da1e8ad29"}, + {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:87b31b6846e361ef83fedb187bb5b4372d0da3f7e28d85415efa92d6125d6e6d"}, + {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa9d91b338f2df0508606f7009fde642391425189bba6d8c653afd80fd6bb64e"}, + {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2058a32994f1fde4ca0480ab9d1e75a0e8c87c22b53a3ae66554f9af78f2fe8c"}, + {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:0e03262ab796d986f978f79c943fc5f620381be7287148b8010b4097f79a39ec"}, + {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:1a8695a8d00c73e50bff9dfda4d540b7dee29ff9b8053e38380426a85ef10052"}, + {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:fa754d1850735a0b0e03bcffd9d4b4343eb417e47196e4485d9cca326073a42c"}, + {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a11c8d26a50bfab49002947d3d237abe4d9e4b5bdc8846a63537b6488e197808"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:dd14041875d09cc0f9308e37a6f8b65f5585cf2598a53aa0123df8b129d481f8"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d87c561733f66531dced0da6e864f44ebf89a8fba55f31407b00c2f7f9449593"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f82865531efd18d6e07a04a17331af02cb7a651583c418df8266f17a63c6612"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bfb5112df54209d820d7bf9317c7a6c9025ea52e49f46b6a2060104bba37de7"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:64632ff9d614e5eecfb495796ad51b0ed98c453e447a76bcbeeb69615079fc7e"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f889f7a40498cc077332c7ab6b4608d296d852182211787d4f3ee377aaae66e8"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:de4b83bb311557e439b9e186f733f6c645b9417c84e2eb8203f3f820a4b988bf"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:82f68293f055f51b51ea42fafc74b6aad03e70e191799430b90c13d643059ebb"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:329467cecfb529c925cf2bbd4d60d2c509bc2fb52a20c1045bf09bb70971a9c1"}, + {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:87acbfcf8e90ca885206e98359d7dca4bcbb35abdc0ff66672a293e1d7a19101"}, + {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:7f92c15cd1e97d4b12acd1cc9004fa092578acfa57b67ad5e43a197175d01a64"}, + {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3f26877a748dc4251cfcfda9dfb5f13fcb034f5308388066bcfe9031b63ae7d"}, + {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac89aea9af8cd672fa7b510e7b8c33b0bba9a43186680550ccf23020f32d535"}, + {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:970919794d126ba8645f3837ab6046fb4e72bbc057b3709144066204c19a455d"}, + {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:3eb3fe62804e8f859c49ed20a8451342de53ed764150cb14ca71357c765dc2a6"}, + {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:3abcd9392a36025e3bd55f9bd38d908bd17962cc49bc6da8e7e96285336e2bca"}, + {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:3a1c81334778f9e3af2f8aeb7a960736e5cab1dfebfb26aabca09afd2906c039"}, + {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2807668ba86cb38c6817ad9bc66215ab8584d1d304030ce4f0887336f28a5e27"}, + {file = "pydantic_core-2.33.2.tar.gz", hash = "sha256:7cb8bc3605c29176e1b105350d2e6474142d7c1bd1d9327c4a9bdb46bf827acc"}, ] [package.dependencies] @@ -2525,8 +2423,6 @@ version = "2.9.1" description = "Settings management using Pydantic" optional = true python-versions = ">=3.9" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "pydantic_settings-2.9.1-py3-none-any.whl", hash = "sha256:59b4f431b1defb26fe620c71a7d3968a710d719f5f4cdbbdb7926edeb770f6ef"}, {file = "pydantic_settings-2.9.1.tar.gz", hash = "sha256:c509bf79d27563add44e8446233359004ed85066cd096d8b510f715e6ef5d268"}, @@ -2550,7 +2446,6 @@ version = "2.19.1" description = "Pygments is a syntax highlighting package written in Python." optional = false python-versions = ">=3.8" -groups = ["docs"] files = [ {file = "pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c"}, {file = "pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f"}, @@ -2561,14 +2456,13 @@ windows-terminal = ["colorama (>=0.4.6)"] [[package]] name = "pymdown-extensions" -version = "10.14.3" +version = "10.15" description = "Extension pack for Python Markdown." optional = false python-versions = ">=3.8" -groups = ["docs"] files = [ - {file = "pymdown_extensions-10.14.3-py3-none-any.whl", hash = "sha256:05e0bee73d64b9c71a4ae17c72abc2f700e8bc8403755a00580b49a4e9f189e9"}, - {file = "pymdown_extensions-10.14.3.tar.gz", hash = "sha256:41e576ce3f5d650be59e900e4ceff231e0aed2a88cf30acaee41e02f063a061b"}, + {file = "pymdown_extensions-10.15-py3-none-any.whl", hash = "sha256:46e99bb272612b0de3b7e7caf6da8dd5f4ca5212c0b273feb9304e236c484e5f"}, + {file = "pymdown_extensions-10.15.tar.gz", hash = "sha256:0e5994e32155f4b03504f939e501b981d306daf7ec2aa1cd2eb6bd300784f8f7"}, ] [package.dependencies] @@ -2584,8 +2478,6 @@ version = "4.30.0" description = "Python bindings to PDFium" optional = true python-versions = ">=3.6" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "pypdfium2-4.30.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:b33ceded0b6ff5b2b93bc1fe0ad4b71aa6b7e7bd5875f1ca0cdfb6ba6ac01aab"}, {file = "pypdfium2-4.30.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:4e55689f4b06e2d2406203e771f78789bd4f190731b5d57383d05cf611d829de"}, @@ -2608,7 +2500,6 @@ version = "8.3.5" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.8" -groups = ["dev"] files = [ {file = "pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820"}, {file = "pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845"}, @@ -2631,7 +2522,6 @@ version = "6.1.1" description = "Pytest plugin for measuring coverage." optional = false python-versions = ">=3.9" -groups = ["dev"] files = [ {file = "pytest_cov-6.1.1-py3-none-any.whl", hash = "sha256:bddf29ed2d0ab6f4df17b4c55b0a657287db8684af9c42ea546b21b1041b3dde"}, {file = "pytest_cov-6.1.1.tar.gz", hash = "sha256:46935f7aaefba760e716c2ebfbe1c216240b9592966e7da99ea8292d4d3e2a0a"}, @@ -2650,7 +2540,6 @@ version = "3.14.0" description = "Thin-wrapper around the mock package for easier use with pytest" optional = false python-versions = ">=3.8" -groups = ["dev"] files = [ {file = "pytest-mock-3.14.0.tar.gz", hash = "sha256:2719255a1efeceadbc056d6bf3df3d1c5015530fb40cf347c0f9afac88410bd0"}, {file = "pytest_mock-3.14.0-py3-none-any.whl", hash = "sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f"}, @@ -2668,7 +2557,6 @@ version = "2.9.0.post0" description = "Extensions to the standard Python datetime module" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" -groups = ["main", "docs"] files = [ {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, @@ -2698,8 +2586,6 @@ version = "1.1.0" description = "Read key-value pairs from a .env file and set them as environment variables" optional = true python-versions = ">=3.9" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "python_dotenv-1.1.0-py3-none-any.whl", hash = "sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d"}, {file = "python_dotenv-1.1.0.tar.gz", hash = "sha256:41f90bc6f5f177fb41f53e87666db362025010eb28f60a01c9143bfa33a2b2d5"}, @@ -2714,7 +2600,6 @@ version = "0.27.1" description = "Python extension for computing string edit distances and similarities." optional = false python-versions = ">=3.9" -groups = ["main"] files = [ {file = "python_levenshtein-0.27.1-py3-none-any.whl", hash = "sha256:e1a4bc2a70284b2ebc4c505646142fecd0f831e49aa04ed972995895aec57396"}, {file = "python_levenshtein-0.27.1.tar.gz", hash = "sha256:3a5314a011016d373d309a68e875fd029caaa692ad3f32e78319299648045f11"}, @@ -2729,7 +2614,6 @@ version = "2025.2" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" -groups = ["main"] files = [ {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, @@ -2741,7 +2625,6 @@ version = "6.0.2" description = "YAML parser and emitter for Python" optional = false python-versions = ">=3.8" -groups = ["main", "dev", "docs"] files = [ {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, @@ -2800,14 +2683,13 @@ files = [ [[package]] name = "pyyaml-env-tag" -version = "0.1" -description = "A custom YAML tag for referencing environment variables in YAML files. " +version = "1.1" +description = "A custom YAML tag for referencing environment variables in YAML files." optional = false -python-versions = ">=3.6" -groups = ["docs"] +python-versions = ">=3.9" files = [ - {file = "pyyaml_env_tag-0.1-py3-none-any.whl", hash = "sha256:af31106dec8a4d68c60207c1886031cbf839b68aa7abccdb19868200532c2069"}, - {file = "pyyaml_env_tag-0.1.tar.gz", hash = "sha256:70092675bda14fdec33b31ba77e7543de9ddc88f2e5b99160396572d11525bdb"}, + {file = "pyyaml_env_tag-1.1-py3-none-any.whl", hash = "sha256:17109e1a528561e32f026364712fee1264bc2ea6715120891174ed1b980d2e04"}, + {file = "pyyaml_env_tag-1.1.tar.gz", hash = "sha256:2eb38b75a2d21ee0475d6d97ec19c63287a7e140231e4214969d0eac923cd7ff"}, ] [package.dependencies] @@ -2815,106 +2697,105 @@ pyyaml = "*" [[package]] name = "rapidfuzz" -version = "3.12.2" +version = "3.13.0" description = "rapid fuzzy string matching" optional = false python-versions = ">=3.9" -groups = ["main"] -files = [ - {file = "rapidfuzz-3.12.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0b9a75e0385a861178adf59e86d6616cbd0d5adca7228dc9eeabf6f62cf5b0b1"}, - {file = "rapidfuzz-3.12.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6906a7eb458731e3dd2495af1d0410e23a21a2a2b7ced535e6d5cd15cb69afc5"}, - {file = "rapidfuzz-3.12.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4b3334a8958b689f292d5ce8a928140ac98919b51e084f04bf0c14276e4c6ba"}, - {file = "rapidfuzz-3.12.2-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:85a54ce30345cff2c79cbcffa063f270ad1daedd0d0c3ff6e541d3c3ba4288cf"}, - {file = "rapidfuzz-3.12.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:acb63c5072c08058f8995404201a52fc4e1ecac105548a4d03c6c6934bda45a3"}, - {file = "rapidfuzz-3.12.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5385398d390c6571f0f2a7837e6ddde0c8b912dac096dc8c87208ce9aaaa7570"}, - {file = "rapidfuzz-3.12.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5032cbffa245b4beba0067f8ed17392ef2501b346ae3c1f1d14b950edf4b6115"}, - {file = "rapidfuzz-3.12.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:195adbb384d89d6c55e2fd71e7fb262010f3196e459aa2f3f45f31dd7185fe72"}, - {file = "rapidfuzz-3.12.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:f43b773a4d4950606fb25568ecde5f25280daf8f97b87eb323e16ecd8177b328"}, - {file = "rapidfuzz-3.12.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:55a43be0e0fa956a919043c19d19bd988991d15c59f179d413fe5145ed9deb43"}, - {file = "rapidfuzz-3.12.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:71cf1ea16acdebe9e2fb62ee7a77f8f70e877bebcbb33b34e660af2eb6d341d9"}, - {file = "rapidfuzz-3.12.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a3692d4ab36d44685f61326dca539975a4eda49b2a76f0a3df177d8a2c0de9d2"}, - {file = "rapidfuzz-3.12.2-cp310-cp310-win32.whl", hash = "sha256:09227bd402caa4397ba1d6e239deea635703b042dd266a4092548661fb22b9c6"}, - {file = "rapidfuzz-3.12.2-cp310-cp310-win_amd64.whl", hash = "sha256:0f05b7b95f9f87254b53fa92048367a8232c26cee7fc8665e4337268c3919def"}, - {file = "rapidfuzz-3.12.2-cp310-cp310-win_arm64.whl", hash = "sha256:6938738e00d9eb6e04097b3f565097e20b0c398f9c58959a2bc64f7f6be3d9da"}, - {file = "rapidfuzz-3.12.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e9c4d984621ae17404c58f8d06ed8b025e167e52c0e6a511dfec83c37e9220cd"}, - {file = "rapidfuzz-3.12.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9f9132c55d330f0a1d34ce6730a76805323a6250d97468a1ca766a883d6a9a25"}, - {file = "rapidfuzz-3.12.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39b343b6cb4b2c3dbc8d2d4c5ee915b6088e3b144ddf8305a57eaab16cf9fc74"}, - {file = "rapidfuzz-3.12.2-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:24081077b571ec4ee6d5d7ea0e49bc6830bf05b50c1005028523b9cd356209f3"}, - {file = "rapidfuzz-3.12.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c988a4fc91856260355773bf9d32bebab2083d4c6df33fafeddf4330e5ae9139"}, - {file = "rapidfuzz-3.12.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:780b4469ee21cf62b1b2e8ada042941fd2525e45d5fb6a6901a9798a0e41153c"}, - {file = "rapidfuzz-3.12.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:edd84b0a323885493c893bad16098c5e3b3005d7caa995ae653da07373665d97"}, - {file = "rapidfuzz-3.12.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:efa22059c765b3d8778083805b199deaaf643db070f65426f87d274565ddf36a"}, - {file = "rapidfuzz-3.12.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:095776b11bb45daf7c2973dd61cc472d7ea7f2eecfa454aef940b4675659b92f"}, - {file = "rapidfuzz-3.12.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:7e2574cf4aa86065600b664a1ac7b8b8499107d102ecde836aaaa403fc4f1784"}, - {file = "rapidfuzz-3.12.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:d5a3425a6c50fd8fbd991d8f085ddb504791dae6ef9cc3ab299fea2cb5374bef"}, - {file = "rapidfuzz-3.12.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:97fb05e1ddb7b71a054040af588b0634214ee87cea87900d309fafc16fd272a4"}, - {file = "rapidfuzz-3.12.2-cp311-cp311-win32.whl", hash = "sha256:b4c5a0413589aef936892fbfa94b7ff6f7dd09edf19b5a7b83896cc9d4e8c184"}, - {file = "rapidfuzz-3.12.2-cp311-cp311-win_amd64.whl", hash = "sha256:58d9ae5cf9246d102db2a2558b67fe7e73c533e5d769099747921232d88b9be2"}, - {file = "rapidfuzz-3.12.2-cp311-cp311-win_arm64.whl", hash = "sha256:7635fe34246cd241c8e35eb83084e978b01b83d5ef7e5bf72a704c637f270017"}, - {file = "rapidfuzz-3.12.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:1d982a651253ffe8434d9934ff0c1089111d60502228464721a2a4587435e159"}, - {file = "rapidfuzz-3.12.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:02e6466caa0222d5233b1f05640873671cd99549a5c5ba4c29151634a1e56080"}, - {file = "rapidfuzz-3.12.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e956b3f053e474abae69ac693a52742109d860ac2375fe88e9387d3277f4c96c"}, - {file = "rapidfuzz-3.12.2-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2dee7d740a2d5418d4f964f39ab8d89923e6b945850db833e798a1969b19542a"}, - {file = "rapidfuzz-3.12.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a057cdb0401e42c84b6516c9b1635f7aedd5e430c6e388bd5f6bcd1d6a0686bb"}, - {file = "rapidfuzz-3.12.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dccf8d4fb5b86d39c581a59463c596b1d09df976da26ff04ae219604223d502f"}, - {file = "rapidfuzz-3.12.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21d5b3793c6f5aecca595cd24164bf9d3c559e315ec684f912146fc4e769e367"}, - {file = "rapidfuzz-3.12.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:46a616c0e13cff2de1761b011e0b14bb73b110182f009223f1453d505c9a975c"}, - {file = "rapidfuzz-3.12.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:19fa5bc4301a1ee55400d4a38a8ecf9522b0391fc31e6da5f4d68513fe5c0026"}, - {file = "rapidfuzz-3.12.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:544a47190a0d25971658a9365dba7095397b4ce3e897f7dd0a77ca2cf6fa984e"}, - {file = "rapidfuzz-3.12.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:f21af27c5e001f0ba1b88c36a0936437dfe034c452548d998891c21125eb640f"}, - {file = "rapidfuzz-3.12.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b63170d9db00629b5b3f2862114d8d6ee19127eaba0eee43762d62a25817dbe0"}, - {file = "rapidfuzz-3.12.2-cp312-cp312-win32.whl", hash = "sha256:6c7152d77b2eb6bfac7baa11f2a9c45fd5a2d848dbb310acd0953b3b789d95c9"}, - {file = "rapidfuzz-3.12.2-cp312-cp312-win_amd64.whl", hash = "sha256:1a314d170ee272ac87579f25a6cf8d16a031e1f7a7b07663434b41a1473bc501"}, - {file = "rapidfuzz-3.12.2-cp312-cp312-win_arm64.whl", hash = "sha256:d41e8231326e94fd07c4d8f424f6bed08fead6f5e6688d1e6e787f1443ae7631"}, - {file = "rapidfuzz-3.12.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:941f31038dba5d3dedcfcceba81d61570ad457c873a24ceb13f4f44fcb574260"}, - {file = "rapidfuzz-3.12.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:fe2dfc454ee51ba168a67b1e92b72aad251e45a074972cef13340bbad2fd9438"}, - {file = "rapidfuzz-3.12.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78fafaf7f5a48ee35ccd7928339080a0136e27cf97396de45259eca1d331b714"}, - {file = "rapidfuzz-3.12.2-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e0c7989ff32c077bb8fd53253fd6ca569d1bfebc80b17557e60750e6909ba4fe"}, - {file = "rapidfuzz-3.12.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:96fa00bc105caa34b6cd93dca14a29243a3a7f0c336e4dcd36348d38511e15ac"}, - {file = "rapidfuzz-3.12.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bccfb30c668620c5bc3490f2dc7d7da1cca0ead5a9da8b755e2e02e2ef0dff14"}, - {file = "rapidfuzz-3.12.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f9b0adc3d894beb51f5022f64717b6114a6fabaca83d77e93ac7675911c8cc5"}, - {file = "rapidfuzz-3.12.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:32691aa59577f42864d5535cb6225d0f47e2c7bff59cf4556e5171e96af68cc1"}, - {file = "rapidfuzz-3.12.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:758b10380ad34c1f51753a070d7bb278001b5e6fcf544121c6df93170952d705"}, - {file = "rapidfuzz-3.12.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:50a9c54c0147b468363119132d514c5024fbad1ed8af12bd8bd411b0119f9208"}, - {file = "rapidfuzz-3.12.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:e3ceb87c11d2d0fbe8559bb795b0c0604b84cfc8bb7b8720b5c16e9e31e00f41"}, - {file = "rapidfuzz-3.12.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f7c9a003002434889255ff5676ca0f8934a478065ab5e702f75dc42639505bba"}, - {file = "rapidfuzz-3.12.2-cp313-cp313-win32.whl", hash = "sha256:cf165a76870cd875567941cf861dfd361a0a6e6a56b936c5d30042ddc9def090"}, - {file = "rapidfuzz-3.12.2-cp313-cp313-win_amd64.whl", hash = "sha256:55bcc003541f5f16ec0a73bf6de758161973f9e8d75161954380738dd147f9f2"}, - {file = "rapidfuzz-3.12.2-cp313-cp313-win_arm64.whl", hash = "sha256:69f6ecdf1452139f2b947d0c169a605de578efdb72cbb2373cb0a94edca1fd34"}, - {file = "rapidfuzz-3.12.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c4c852cd8bed1516a64fd6e2d4c6f270d4356196ee03fda2af1e5a9e13c34643"}, - {file = "rapidfuzz-3.12.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:42e7f747b55529a6d0d1588695d71025e884ab48664dca54b840413dea4588d8"}, - {file = "rapidfuzz-3.12.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a749fd2690f24ef256b264a781487746bbb95344364fe8fe356f0eef7ef206ba"}, - {file = "rapidfuzz-3.12.2-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9a11e1d036170bbafa43a9e63d8c309273564ec5bdfc5439062f439d1a16965a"}, - {file = "rapidfuzz-3.12.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dfb337f1832c1231e3d5621bd0ebebb854e46036aedae3e6a49c1fc08f16f249"}, - {file = "rapidfuzz-3.12.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e88c6e68fca301722fa3ab7fd3ca46998012c14ada577bc1e2c2fc04f2067ca6"}, - {file = "rapidfuzz-3.12.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17e1a3a8b4b5125cfb63a6990459b25b87ea769bdaf90d05bb143f8febef076a"}, - {file = "rapidfuzz-3.12.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:b9f8177b24ccc0a843e85932b1088c5e467a7dd7a181c13f84c684b796bea815"}, - {file = "rapidfuzz-3.12.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:6c506bdc2f304051592c0d3b0e82eed309248ec10cdf802f13220251358375ea"}, - {file = "rapidfuzz-3.12.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:30bf15c1ecec2798b713d551df17f23401a3e3653ad9ed4e83ad1c2b06e86100"}, - {file = "rapidfuzz-3.12.2-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:bd9a67cfc83e8453ef17ddd1c2c4ce4a74d448a197764efb54c29f29fb41f611"}, - {file = "rapidfuzz-3.12.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7a6eaec2ef658dd650c6eb9b36dff7a361ebd7d8bea990ce9d639b911673b2cb"}, - {file = "rapidfuzz-3.12.2-cp39-cp39-win32.whl", hash = "sha256:d7701769f110332cde45c41759cb2a497de8d2dca55e4c519a46aed5fbb19d1a"}, - {file = "rapidfuzz-3.12.2-cp39-cp39-win_amd64.whl", hash = "sha256:296bf0fd4f678488670e262c87a3e4f91900b942d73ae38caa42a417e53643b1"}, - {file = "rapidfuzz-3.12.2-cp39-cp39-win_arm64.whl", hash = "sha256:7957f5d768de14f6b2715303ccdf224b78416738ee95a028a2965c95f73afbfb"}, - {file = "rapidfuzz-3.12.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:e5fd3ce849b27d063755829cda27a9dab6dbd63be3801f2a40c60ec563a4c90f"}, - {file = "rapidfuzz-3.12.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:54e53662d71ed660c83c5109127c8e30b9e607884b7c45d2aff7929bbbd00589"}, - {file = "rapidfuzz-3.12.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b9e43cf2213e524f3309d329f1ad8dbf658db004ed44f6ae1cd2919aa997da5"}, - {file = "rapidfuzz-3.12.2-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29ca445e320e5a8df3bd1d75b4fa4ecfa7c681942b9ac65b55168070a1a1960e"}, - {file = "rapidfuzz-3.12.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83eb7ef732c2f8533c6b5fbe69858a722c218acc3e1fc190ab6924a8af7e7e0e"}, - {file = "rapidfuzz-3.12.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:648adc2dd2cf873efc23befcc6e75754e204a409dfa77efd0fea30d08f22ef9d"}, - {file = "rapidfuzz-3.12.2-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:9b1e6f48e1ffa0749261ee23a1c6462bdd0be5eac83093f4711de17a42ae78ad"}, - {file = "rapidfuzz-3.12.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:1ae9ded463f2ca4ba1eb762913c5f14c23d2e120739a62b7f4cc102eab32dc90"}, - {file = "rapidfuzz-3.12.2-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dda45f47b559be72ecbce45c7f71dc7c97b9772630ab0f3286d97d2c3025ab71"}, - {file = "rapidfuzz-3.12.2-pp311-pypy311_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3745c6443890265513a3c8777f2de4cb897aeb906a406f97741019be8ad5bcc"}, - {file = "rapidfuzz-3.12.2-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36d3ef4f047ed1bc96fa29289f9e67a637ddca5e4f4d3dc7cb7f50eb33ec1664"}, - {file = "rapidfuzz-3.12.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:54bb69ebe5ca0bd7527357e348f16a4c0c52fe0c2fcc8a041010467dcb8385f7"}, - {file = "rapidfuzz-3.12.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:3f2ddd5b99b254039a8c82be5749d4d75943f62eb2c2918acf6ffd586852834f"}, - {file = "rapidfuzz-3.12.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:8117dab9b26a1aaffab59b4e30f80ac4d55e61ad4139a637c149365960933bee"}, - {file = "rapidfuzz-3.12.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40c0f16d62d6553527de3dab2fb69709c4383430ea44bce8fb4711ed4cbc6ae3"}, - {file = "rapidfuzz-3.12.2-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f177e1eb6e4f5261a89c475e21bce7a99064a8f217d2336fb897408f46f0ceaf"}, - {file = "rapidfuzz-3.12.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5df0cecc2852fcb078ed1b4482fac4fc2c2e7787f3edda8920d9a4c0f51b1c95"}, - {file = "rapidfuzz-3.12.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:3b3c4df0321df6f8f0b61afbaa2ced9622750ee1e619128db57a18533d139820"}, - {file = "rapidfuzz-3.12.2.tar.gz", hash = "sha256:b0ba1ccc22fff782e7152a3d3d0caca44ec4e32dc48ba01c560b8593965b5aa3"}, +files = [ + {file = "rapidfuzz-3.13.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:aafc42a1dc5e1beeba52cd83baa41372228d6d8266f6d803c16dbabbcc156255"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:85c9a131a44a95f9cac2eb6e65531db014e09d89c4f18c7b1fa54979cb9ff1f3"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d7cec4242d30dd521ef91c0df872e14449d1dffc2a6990ede33943b0dae56c3"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e297c09972698c95649e89121e3550cee761ca3640cd005e24aaa2619175464e"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ef0f5f03f61b0e5a57b1df7beafd83df993fd5811a09871bad6038d08e526d0d"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d8cf5f7cd6e4d5eb272baf6a54e182b2c237548d048e2882258336533f3f02b7"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9256218ac8f1a957806ec2fb9a6ddfc6c32ea937c0429e88cf16362a20ed8602"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e1bdd2e6d0c5f9706ef7595773a81ca2b40f3b33fd7f9840b726fb00c6c4eb2e"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5280be8fd7e2bee5822e254fe0a5763aa0ad57054b85a32a3d9970e9b09bbcbf"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fd742c03885db1fce798a1cd87a20f47f144ccf26d75d52feb6f2bae3d57af05"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:5435fcac94c9ecf0504bf88a8a60c55482c32e18e108d6079a0089c47f3f8cf6"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:93a755266856599be4ab6346273f192acde3102d7aa0735e2f48b456397a041f"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-win32.whl", hash = "sha256:3abe6a4e8eb4cfc4cda04dd650a2dc6d2934cbdeda5def7e6fd1c20f6e7d2a0b"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:e8ddb58961401da7d6f55f185512c0d6bd24f529a637078d41dd8ffa5a49c107"}, + {file = "rapidfuzz-3.13.0-cp310-cp310-win_arm64.whl", hash = "sha256:c523620d14ebd03a8d473c89e05fa1ae152821920c3ff78b839218ff69e19ca3"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d395a5cad0c09c7f096433e5fd4224d83b53298d53499945a9b0e5a971a84f3a"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b7b3eda607a019169f7187328a8d1648fb9a90265087f6903d7ee3a8eee01805"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:98e0bfa602e1942d542de077baf15d658bd9d5dcfe9b762aff791724c1c38b70"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bef86df6d59667d9655905b02770a0c776d2853971c0773767d5ef8077acd624"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fedd316c165beed6307bf754dee54d3faca2c47e1f3bcbd67595001dfa11e969"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5158da7f2ec02a930be13bac53bb5903527c073c90ee37804090614cab83c29e"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b6f913ee4618ddb6d6f3e387b76e8ec2fc5efee313a128809fbd44e65c2bbb2"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d25fdbce6459ccbbbf23b4b044f56fbd1158b97ac50994eaae2a1c0baae78301"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:25343ccc589a4579fbde832e6a1e27258bfdd7f2eb0f28cb836d6694ab8591fc"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:a9ad1f37894e3ffb76bbab76256e8a8b789657183870be11aa64e306bb5228fd"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:5dc71ef23845bb6b62d194c39a97bb30ff171389c9812d83030c1199f319098c"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b7f4c65facdb94f44be759bbd9b6dda1fa54d0d6169cdf1a209a5ab97d311a75"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-win32.whl", hash = "sha256:b5104b62711565e0ff6deab2a8f5dbf1fbe333c5155abe26d2cfd6f1849b6c87"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:9093cdeb926deb32a4887ebe6910f57fbcdbc9fbfa52252c10b56ef2efb0289f"}, + {file = "rapidfuzz-3.13.0-cp311-cp311-win_arm64.whl", hash = "sha256:f70f646751b6aa9d05be1fb40372f006cc89d6aad54e9d79ae97bd1f5fce5203"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a1a6a906ba62f2556372282b1ef37b26bca67e3d2ea957277cfcefc6275cca7"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2fd0975e015b05c79a97f38883a11236f5a24cca83aa992bd2558ceaa5652b26"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d4e13593d298c50c4f94ce453f757b4b398af3fa0fd2fde693c3e51195b7f69"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed6f416bda1c9133000009d84d9409823eb2358df0950231cc936e4bf784eb97"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1dc82b6ed01acb536b94a43996a94471a218f4d89f3fdd9185ab496de4b2a981"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e9d824de871daa6e443b39ff495a884931970d567eb0dfa213d234337343835f"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d18228a2390375cf45726ce1af9d36ff3dc1f11dce9775eae1f1b13ac6ec50f"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9f5fe634c9482ec5d4a6692afb8c45d370ae86755e5f57aa6c50bfe4ca2bdd87"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:694eb531889f71022b2be86f625a4209c4049e74be9ca836919b9e395d5e33b3"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:11b47b40650e06147dee5e51a9c9ad73bb7b86968b6f7d30e503b9f8dd1292db"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:98b8107ff14f5af0243f27d236bcc6e1ef8e7e3b3c25df114e91e3a99572da73"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b836f486dba0aceb2551e838ff3f514a38ee72b015364f739e526d720fdb823a"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-win32.whl", hash = "sha256:4671ee300d1818d7bdfd8fa0608580d7778ba701817216f0c17fb29e6b972514"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:6e2065f68fb1d0bf65adc289c1bdc45ba7e464e406b319d67bb54441a1b9da9e"}, + {file = "rapidfuzz-3.13.0-cp312-cp312-win_arm64.whl", hash = "sha256:65cc97c2fc2c2fe23586599686f3b1ceeedeca8e598cfcc1b7e56dc8ca7e2aa7"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:09e908064d3684c541d312bd4c7b05acb99a2c764f6231bd507d4b4b65226c23"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:57c390336cb50d5d3bfb0cfe1467478a15733703af61f6dffb14b1cd312a6fae"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0da54aa8547b3c2c188db3d1c7eb4d1bb6dd80baa8cdaeaec3d1da3346ec9caa"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:df8e8c21e67afb9d7fbe18f42c6111fe155e801ab103c81109a61312927cc611"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:461fd13250a2adf8e90ca9a0e1e166515cbcaa5e9c3b1f37545cbbeff9e77f6b"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c2b3dd5d206a12deca16870acc0d6e5036abeb70e3cad6549c294eff15591527"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1343d745fbf4688e412d8f398c6e6d6f269db99a54456873f232ba2e7aeb4939"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b1b065f370d54551dcc785c6f9eeb5bd517ae14c983d2784c064b3aa525896df"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:11b125d8edd67e767b2295eac6eb9afe0b1cdc82ea3d4b9257da4b8e06077798"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c33f9c841630b2bb7e69a3fb5c84a854075bb812c47620978bddc591f764da3d"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:ae4574cb66cf1e85d32bb7e9ec45af5409c5b3970b7ceb8dea90168024127566"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e05752418b24bbd411841b256344c26f57da1148c5509e34ea39c7eb5099ab72"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-win32.whl", hash = "sha256:0e1d08cb884805a543f2de1f6744069495ef527e279e05370dd7c83416af83f8"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:9a7c6232be5f809cd39da30ee5d24e6cadd919831e6020ec6c2391f4c3bc9264"}, + {file = "rapidfuzz-3.13.0-cp313-cp313-win_arm64.whl", hash = "sha256:3f32f15bacd1838c929b35c84b43618481e1b3d7a61b5ed2db0291b70ae88b53"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cc64da907114d7a18b5e589057e3acaf2fec723d31c49e13fedf043592a3f6a7"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4d9d7f84c8e992a8dbe5a3fdbea73d733da39bf464e62c912ac3ceba9c0cff93"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a79a2f07786a2070669b4b8e45bd96a01c788e7a3c218f531f3947878e0f956"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9f338e71c45b69a482de8b11bf4a029993230760120c8c6e7c9b71760b6825a1"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:adb40ca8ddfcd4edd07b0713a860be32bdf632687f656963bcbce84cea04b8d8"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48719f7dcf62dfb181063b60ee2d0a39d327fa8ad81b05e3e510680c44e1c078"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9327a4577f65fc3fb712e79f78233815b8a1c94433d0c2c9f6bc5953018b3565"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:200030dfc0a1d5d6ac18e993c5097c870c97c41574e67f227300a1fb74457b1d"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:cc269e74cad6043cb8a46d0ce580031ab642b5930562c2bb79aa7fbf9c858d26"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:e62779c6371bd2b21dbd1fdce89eaec2d93fd98179d36f61130b489f62294a92"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:f4797f821dc5d7c2b6fc818b89f8a3f37bcc900dd9e4369e6ebf1e525efce5db"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d21f188f6fe4fbf422e647ae9d5a68671d00218e187f91859c963d0738ccd88c"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-win32.whl", hash = "sha256:45dd4628dd9c21acc5c97627dad0bb791764feea81436fb6e0a06eef4c6dceaa"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-win_amd64.whl", hash = "sha256:624a108122039af89ddda1a2b7ab2a11abe60c1521956f142f5d11bcd42ef138"}, + {file = "rapidfuzz-3.13.0-cp39-cp39-win_arm64.whl", hash = "sha256:435071fd07a085ecbf4d28702a66fd2e676a03369ee497cc38bcb69a46bc77e2"}, + {file = "rapidfuzz-3.13.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:fe5790a36d33a5d0a6a1f802aa42ecae282bf29ac6f7506d8e12510847b82a45"}, + {file = "rapidfuzz-3.13.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:cdb33ee9f8a8e4742c6b268fa6bd739024f34651a06b26913381b1413ebe7590"}, + {file = "rapidfuzz-3.13.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c99b76b93f7b495eee7dcb0d6a38fb3ce91e72e99d9f78faa5664a881cb2b7d"}, + {file = "rapidfuzz-3.13.0-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6af42f2ede8b596a6aaf6d49fdee3066ca578f4856b85ab5c1e2145de367a12d"}, + {file = "rapidfuzz-3.13.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c0efa73afbc5b265aca0d8a467ae2a3f40d6854cbe1481cb442a62b7bf23c99"}, + {file = "rapidfuzz-3.13.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:7ac21489de962a4e2fc1e8f0b0da4aa1adc6ab9512fd845563fecb4b4c52093a"}, + {file = "rapidfuzz-3.13.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1ba007f4d35a45ee68656b2eb83b8715e11d0f90e5b9f02d615a8a321ff00c27"}, + {file = "rapidfuzz-3.13.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d7a217310429b43be95b3b8ad7f8fc41aba341109dc91e978cd7c703f928c58f"}, + {file = "rapidfuzz-3.13.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:558bf526bcd777de32b7885790a95a9548ffdcce68f704a81207be4a286c1095"}, + {file = "rapidfuzz-3.13.0-pp311-pypy311_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:202a87760f5145140d56153b193a797ae9338f7939eb16652dd7ff96f8faf64c"}, + {file = "rapidfuzz-3.13.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cfcccc08f671646ccb1e413c773bb92e7bba789e3a1796fd49d23c12539fe2e4"}, + {file = "rapidfuzz-3.13.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:1f219f1e3c3194d7a7de222f54450ce12bc907862ff9a8962d83061c1f923c86"}, + {file = "rapidfuzz-3.13.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:ccbd0e7ea1a216315f63ffdc7cd09c55f57851afc8fe59a74184cb7316c0598b"}, + {file = "rapidfuzz-3.13.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a50856f49a4016ef56edd10caabdaf3608993f9faf1e05c3c7f4beeac46bd12a"}, + {file = "rapidfuzz-3.13.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fd05336db4d0b8348d7eaaf6fa3c517b11a56abaa5e89470ce1714e73e4aca7"}, + {file = "rapidfuzz-3.13.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:573ad267eb9b3f6e9b04febce5de55d8538a87c56c64bf8fd2599a48dc9d8b77"}, + {file = "rapidfuzz-3.13.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30fd1451f87ccb6c2f9d18f6caa483116bbb57b5a55d04d3ddbd7b86f5b14998"}, + {file = "rapidfuzz-3.13.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a6dd36d4916cf57ddb05286ed40b09d034ca5d4bca85c17be0cb6a21290597d9"}, + {file = "rapidfuzz-3.13.0.tar.gz", hash = "sha256:d2eaf3839e52cbcc0accbe9817a67b4b0fcf70aaeb229cfddc1c28061f9ce5d8"}, ] [package.extras] @@ -2926,7 +2807,6 @@ version = "0.36.2" description = "JSON Referencing + Python" optional = false python-versions = ">=3.9" -groups = ["dev"] files = [ {file = "referencing-0.36.2-py3-none-any.whl", hash = "sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0"}, {file = "referencing-0.36.2.tar.gz", hash = "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa"}, @@ -2943,7 +2823,6 @@ version = "2024.11.6" description = "Alternative regular expression module, to replace re." optional = false python-versions = ">=3.8" -groups = ["main"] files = [ {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91"}, {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0"}, @@ -3047,12 +2926,10 @@ version = "2.32.3" description = "Python HTTP for Humans." optional = false python-versions = ">=3.8" -groups = ["main", "docs"] files = [ {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, ] -markers = {main = "extra == \"pdf\""} [package.dependencies] certifi = ">=2017.4.17" @@ -3066,115 +2943,128 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] [[package]] name = "rpds-py" -version = "0.23.1" +version = "0.25.1" description = "Python bindings to Rust's persistent data structures (rpds)" optional = false python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "rpds_py-0.23.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2a54027554ce9b129fc3d633c92fa33b30de9f08bc61b32c053dc9b537266fed"}, - {file = "rpds_py-0.23.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b5ef909a37e9738d146519657a1aab4584018746a18f71c692f2f22168ece40c"}, - {file = "rpds_py-0.23.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ee9d6f0b38efb22ad94c3b68ffebe4c47865cdf4b17f6806d6c674e1feb4246"}, - {file = "rpds_py-0.23.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f7356a6da0562190558c4fcc14f0281db191cdf4cb96e7604c06acfcee96df15"}, - {file = "rpds_py-0.23.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9441af1d25aed96901f97ad83d5c3e35e6cd21a25ca5e4916c82d7dd0490a4fa"}, - {file = "rpds_py-0.23.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d8abf7896a91fb97e7977d1aadfcc2c80415d6dc2f1d0fca5b8d0df247248f3"}, - {file = "rpds_py-0.23.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b08027489ba8fedde72ddd233a5ea411b85a6ed78175f40285bd401bde7466d"}, - {file = "rpds_py-0.23.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fee513135b5a58f3bb6d89e48326cd5aa308e4bcdf2f7d59f67c861ada482bf8"}, - {file = "rpds_py-0.23.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:35d5631ce0af26318dba0ae0ac941c534453e42f569011585cb323b7774502a5"}, - {file = "rpds_py-0.23.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:a20cb698c4a59c534c6701b1c24a968ff2768b18ea2991f886bd8985ce17a89f"}, - {file = "rpds_py-0.23.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5e9c206a1abc27e0588cf8b7c8246e51f1a16a103734f7750830a1ccb63f557a"}, - {file = "rpds_py-0.23.1-cp310-cp310-win32.whl", hash = "sha256:d9f75a06ecc68f159d5d7603b734e1ff6daa9497a929150f794013aa9f6e3f12"}, - {file = "rpds_py-0.23.1-cp310-cp310-win_amd64.whl", hash = "sha256:f35eff113ad430b5272bbfc18ba111c66ff525828f24898b4e146eb479a2cdda"}, - {file = "rpds_py-0.23.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:b79f5ced71efd70414a9a80bbbfaa7160da307723166f09b69773153bf17c590"}, - {file = "rpds_py-0.23.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c9e799dac1ffbe7b10c1fd42fe4cd51371a549c6e108249bde9cd1200e8f59b4"}, - {file = "rpds_py-0.23.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:721f9c4011b443b6e84505fc00cc7aadc9d1743f1c988e4c89353e19c4a968ee"}, - {file = "rpds_py-0.23.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f88626e3f5e57432e6191cd0c5d6d6b319b635e70b40be2ffba713053e5147dd"}, - {file = "rpds_py-0.23.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:285019078537949cecd0190f3690a0b0125ff743d6a53dfeb7a4e6787af154f5"}, - {file = "rpds_py-0.23.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b92f5654157de1379c509b15acec9d12ecf6e3bc1996571b6cb82a4302060447"}, - {file = "rpds_py-0.23.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e768267cbe051dd8d1c5305ba690bb153204a09bf2e3de3ae530de955f5b5580"}, - {file = "rpds_py-0.23.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c5334a71f7dc1160382d45997e29f2637c02f8a26af41073189d79b95d3321f1"}, - {file = "rpds_py-0.23.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d6adb81564af0cd428910f83fa7da46ce9ad47c56c0b22b50872bc4515d91966"}, - {file = "rpds_py-0.23.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:cafa48f2133d4daa028473ede7d81cd1b9f9e6925e9e4003ebdf77010ee02f35"}, - {file = "rpds_py-0.23.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0fced9fd4a07a1ded1bac7e961ddd9753dd5d8b755ba8e05acba54a21f5f1522"}, - {file = "rpds_py-0.23.1-cp311-cp311-win32.whl", hash = "sha256:243241c95174b5fb7204c04595852fe3943cc41f47aa14c3828bc18cd9d3b2d6"}, - {file = "rpds_py-0.23.1-cp311-cp311-win_amd64.whl", hash = "sha256:11dd60b2ffddba85715d8a66bb39b95ddbe389ad2cfcf42c833f1bcde0878eaf"}, - {file = "rpds_py-0.23.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:3902df19540e9af4cc0c3ae75974c65d2c156b9257e91f5101a51f99136d834c"}, - {file = "rpds_py-0.23.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:66f8d2a17e5838dd6fb9be6baaba8e75ae2f5fa6b6b755d597184bfcd3cb0eba"}, - {file = "rpds_py-0.23.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:112b8774b0b4ee22368fec42749b94366bd9b536f8f74c3d4175d4395f5cbd31"}, - {file = "rpds_py-0.23.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e0df046f2266e8586cf09d00588302a32923eb6386ced0ca5c9deade6af9a149"}, - {file = "rpds_py-0.23.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0f3288930b947cbebe767f84cf618d2cbe0b13be476e749da0e6a009f986248c"}, - {file = "rpds_py-0.23.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ce473a2351c018b06dd8d30d5da8ab5a0831056cc53b2006e2a8028172c37ce5"}, - {file = "rpds_py-0.23.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d550d7e9e7d8676b183b37d65b5cd8de13676a738973d330b59dc8312df9c5dc"}, - {file = "rpds_py-0.23.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e14f86b871ea74c3fddc9a40e947d6a5d09def5adc2076ee61fb910a9014fb35"}, - {file = "rpds_py-0.23.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1bf5be5ba34e19be579ae873da515a2836a2166d8d7ee43be6ff909eda42b72b"}, - {file = "rpds_py-0.23.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:d7031d493c4465dbc8d40bd6cafefef4bd472b17db0ab94c53e7909ee781b9ef"}, - {file = "rpds_py-0.23.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:55ff4151cfd4bc635e51cfb1c59ac9f7196b256b12e3a57deb9e5742e65941ad"}, - {file = "rpds_py-0.23.1-cp312-cp312-win32.whl", hash = "sha256:a9d3b728f5a5873d84cba997b9d617c6090ca5721caaa691f3b1a78c60adc057"}, - {file = "rpds_py-0.23.1-cp312-cp312-win_amd64.whl", hash = "sha256:b03a8d50b137ee758e4c73638b10747b7c39988eb8e6cd11abb7084266455165"}, - {file = "rpds_py-0.23.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:4caafd1a22e5eaa3732acb7672a497123354bef79a9d7ceed43387d25025e935"}, - {file = "rpds_py-0.23.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:178f8a60fc24511c0eb756af741c476b87b610dba83270fce1e5a430204566a4"}, - {file = "rpds_py-0.23.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c632419c3870507ca20a37c8f8f5352317aca097639e524ad129f58c125c61c6"}, - {file = "rpds_py-0.23.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:698a79d295626ee292d1730bc2ef6e70a3ab135b1d79ada8fde3ed0047b65a10"}, - {file = "rpds_py-0.23.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:271fa2184cf28bdded86bb6217c8e08d3a169fe0bbe9be5e8d96e8476b707122"}, - {file = "rpds_py-0.23.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b91cceb5add79ee563bd1f70b30896bd63bc5f78a11c1f00a1e931729ca4f1f4"}, - {file = "rpds_py-0.23.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a6cb95074777f1ecda2ca4fa7717caa9ee6e534f42b7575a8f0d4cb0c24013"}, - {file = "rpds_py-0.23.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:50fb62f8d8364978478b12d5f03bf028c6bc2af04082479299139dc26edf4c64"}, - {file = "rpds_py-0.23.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c8f7e90b948dc9dcfff8003f1ea3af08b29c062f681c05fd798e36daa3f7e3e8"}, - {file = "rpds_py-0.23.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:5b98b6c953e5c2bda51ab4d5b4f172617d462eebc7f4bfdc7c7e6b423f6da957"}, - {file = "rpds_py-0.23.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2893d778d4671ee627bac4037a075168b2673c57186fb1a57e993465dbd79a93"}, - {file = "rpds_py-0.23.1-cp313-cp313-win32.whl", hash = "sha256:2cfa07c346a7ad07019c33fb9a63cf3acb1f5363c33bc73014e20d9fe8b01cdd"}, - {file = "rpds_py-0.23.1-cp313-cp313-win_amd64.whl", hash = "sha256:3aaf141d39f45322e44fc2c742e4b8b4098ead5317e5f884770c8df0c332da70"}, - {file = "rpds_py-0.23.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:759462b2d0aa5a04be5b3e37fb8183615f47014ae6b116e17036b131985cb731"}, - {file = "rpds_py-0.23.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3e9212f52074fc9d72cf242a84063787ab8e21e0950d4d6709886fb62bcb91d5"}, - {file = "rpds_py-0.23.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9e9f3a3ac919406bc0414bbbd76c6af99253c507150191ea79fab42fdb35982a"}, - {file = "rpds_py-0.23.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c04ca91dda8a61584165825907f5c967ca09e9c65fe8966ee753a3f2b019fe1e"}, - {file = "rpds_py-0.23.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4ab923167cfd945abb9b51a407407cf19f5bee35001221f2911dc85ffd35ff4f"}, - {file = "rpds_py-0.23.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ed6f011bedca8585787e5082cce081bac3d30f54520097b2411351b3574e1219"}, - {file = "rpds_py-0.23.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6959bb9928c5c999aba4a3f5a6799d571ddc2c59ff49917ecf55be2bbb4e3722"}, - {file = "rpds_py-0.23.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1ed7de3c86721b4e83ac440751329ec6a1102229aa18163f84c75b06b525ad7e"}, - {file = "rpds_py-0.23.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5fb89edee2fa237584e532fbf78f0ddd1e49a47c7c8cfa153ab4849dc72a35e6"}, - {file = "rpds_py-0.23.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:7e5413d2e2d86025e73f05510ad23dad5950ab8417b7fc6beaad99be8077138b"}, - {file = "rpds_py-0.23.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:d31ed4987d72aabdf521eddfb6a72988703c091cfc0064330b9e5f8d6a042ff5"}, - {file = "rpds_py-0.23.1-cp313-cp313t-win32.whl", hash = "sha256:f3429fb8e15b20961efca8c8b21432623d85db2228cc73fe22756c6637aa39e7"}, - {file = "rpds_py-0.23.1-cp313-cp313t-win_amd64.whl", hash = "sha256:d6f6512a90bd5cd9030a6237f5346f046c6f0e40af98657568fa45695d4de59d"}, - {file = "rpds_py-0.23.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:09cd7dbcb673eb60518231e02874df66ec1296c01a4fcd733875755c02014b19"}, - {file = "rpds_py-0.23.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c6760211eee3a76316cf328f5a8bd695b47b1626d21c8a27fb3b2473a884d597"}, - {file = "rpds_py-0.23.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:72e680c1518733b73c994361e4b06441b92e973ef7d9449feec72e8ee4f713da"}, - {file = "rpds_py-0.23.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ae28144c1daa61366205d32abd8c90372790ff79fc60c1a8ad7fd3c8553a600e"}, - {file = "rpds_py-0.23.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c698d123ce5d8f2d0cd17f73336615f6a2e3bdcedac07a1291bb4d8e7d82a05a"}, - {file = "rpds_py-0.23.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98b257ae1e83f81fb947a363a274c4eb66640212516becaff7bef09a5dceacaa"}, - {file = "rpds_py-0.23.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c9ff044eb07c8468594d12602291c635da292308c8c619244e30698e7fc455a"}, - {file = "rpds_py-0.23.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7938c7b0599a05246d704b3f5e01be91a93b411d0d6cc62275f025293b8a11ce"}, - {file = "rpds_py-0.23.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:e9cb79ecedfc156c0692257ac7ed415243b6c35dd969baa461a6888fc79f2f07"}, - {file = "rpds_py-0.23.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:7b77e07233925bd33fc0022b8537774423e4c6680b6436316c5075e79b6384f4"}, - {file = "rpds_py-0.23.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:a970bfaf130c29a679b1d0a6e0f867483cea455ab1535fb427566a475078f27f"}, - {file = "rpds_py-0.23.1-cp39-cp39-win32.whl", hash = "sha256:4233df01a250b3984465faed12ad472f035b7cd5240ea3f7c76b7a7016084495"}, - {file = "rpds_py-0.23.1-cp39-cp39-win_amd64.whl", hash = "sha256:c617d7453a80e29d9973b926983b1e700a9377dbe021faa36041c78537d7b08c"}, - {file = "rpds_py-0.23.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c1f8afa346ccd59e4e5630d5abb67aba6a9812fddf764fd7eb11f382a345f8cc"}, - {file = "rpds_py-0.23.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:fad784a31869747df4ac968a351e070c06ca377549e4ace94775aaa3ab33ee06"}, - {file = "rpds_py-0.23.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5a96fcac2f18e5a0a23a75cd27ce2656c66c11c127b0318e508aab436b77428"}, - {file = "rpds_py-0.23.1-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3e77febf227a1dc3220159355dba68faa13f8dca9335d97504abf428469fb18b"}, - {file = "rpds_py-0.23.1-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:26bb3e8de93443d55e2e748e9fd87deb5f8075ca7bc0502cfc8be8687d69a2ec"}, - {file = "rpds_py-0.23.1-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:db7707dde9143a67b8812c7e66aeb2d843fe33cc8e374170f4d2c50bd8f2472d"}, - {file = "rpds_py-0.23.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1eedaaccc9bb66581d4ae7c50e15856e335e57ef2734dbc5fd8ba3e2a4ab3cb6"}, - {file = "rpds_py-0.23.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:28358c54fffadf0ae893f6c1050e8f8853e45df22483b7fff2f6ab6152f5d8bf"}, - {file = "rpds_py-0.23.1-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:633462ef7e61d839171bf206551d5ab42b30b71cac8f10a64a662536e057fdef"}, - {file = "rpds_py-0.23.1-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:a98f510d86f689fcb486dc59e6e363af04151e5260ad1bdddb5625c10f1e95f8"}, - {file = "rpds_py-0.23.1-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:e0397dd0b3955c61ef9b22838144aa4bef6f0796ba5cc8edfc64d468b93798b4"}, - {file = "rpds_py-0.23.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:75307599f0d25bf6937248e5ac4e3bde5ea72ae6618623b86146ccc7845ed00b"}, - {file = "rpds_py-0.23.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3614d280bf7aab0d3721b5ce0e73434acb90a2c993121b6e81a1c15c665298ac"}, - {file = "rpds_py-0.23.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:e5963ea87f88bddf7edd59644a35a0feecf75f8985430124c253612d4f7d27ae"}, - {file = "rpds_py-0.23.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad76f44f70aac3a54ceb1813ca630c53415da3a24fd93c570b2dfb4856591017"}, - {file = "rpds_py-0.23.1-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2c6ae11e6e93728d86aafc51ced98b1658a0080a7dd9417d24bfb955bb09c3c2"}, - {file = "rpds_py-0.23.1-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fc869af5cba24d45fb0399b0cfdbcefcf6910bf4dee5d74036a57cf5264b3ff4"}, - {file = "rpds_py-0.23.1-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c76b32eb2ab650a29e423525e84eb197c45504b1c1e6e17b6cc91fcfeb1a4b1d"}, - {file = "rpds_py-0.23.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4263320ed887ed843f85beba67f8b2d1483b5947f2dc73a8b068924558bfeace"}, - {file = "rpds_py-0.23.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7f9682a8f71acdf59fd554b82b1c12f517118ee72c0f3944eda461606dfe7eb9"}, - {file = "rpds_py-0.23.1-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:754fba3084b70162a6b91efceee8a3f06b19e43dac3f71841662053c0584209a"}, - {file = "rpds_py-0.23.1-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:a1c66e71ecfd2a4acf0e4bd75e7a3605afa8f9b28a3b497e4ba962719df2be57"}, - {file = "rpds_py-0.23.1-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:8d67beb6002441faef8251c45e24994de32c4c8686f7356a1f601ad7c466f7c3"}, - {file = "rpds_py-0.23.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a1e17d8dc8e57d8e0fd21f8f0f0a5211b3fa258b2e444c2053471ef93fe25a00"}, - {file = "rpds_py-0.23.1.tar.gz", hash = "sha256:7f3240dcfa14d198dba24b8b9cb3b108c06b68d45b7babd9eefc1038fdf7e707"}, +files = [ + {file = "rpds_py-0.25.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:f4ad628b5174d5315761b67f212774a32f5bad5e61396d38108bd801c0a8f5d9"}, + {file = "rpds_py-0.25.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8c742af695f7525e559c16f1562cf2323db0e3f0fbdcabdf6865b095256b2d40"}, + {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:605ffe7769e24b1800b4d024d24034405d9404f0bc2f55b6db3362cd34145a6f"}, + {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ccc6f3ddef93243538be76f8e47045b4aad7a66a212cd3a0f23e34469473d36b"}, + {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f70316f760174ca04492b5ab01be631a8ae30cadab1d1081035136ba12738cfa"}, + {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e1dafef8df605fdb46edcc0bf1573dea0d6d7b01ba87f85cd04dc855b2b4479e"}, + {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0701942049095741a8aeb298a31b203e735d1c61f4423511d2b1a41dcd8a16da"}, + {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e87798852ae0b37c88babb7f7bbbb3e3fecc562a1c340195b44c7e24d403e380"}, + {file = "rpds_py-0.25.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3bcce0edc1488906c2d4c75c94c70a0417e83920dd4c88fec1078c94843a6ce9"}, + {file = "rpds_py-0.25.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e2f6a2347d3440ae789505693a02836383426249d5293541cd712e07e7aecf54"}, + {file = "rpds_py-0.25.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:4fd52d3455a0aa997734f3835cbc4c9f32571345143960e7d7ebfe7b5fbfa3b2"}, + {file = "rpds_py-0.25.1-cp310-cp310-win32.whl", hash = "sha256:3f0b1798cae2bbbc9b9db44ee068c556d4737911ad53a4e5093d09d04b3bbc24"}, + {file = "rpds_py-0.25.1-cp310-cp310-win_amd64.whl", hash = "sha256:3ebd879ab996537fc510a2be58c59915b5dd63bccb06d1ef514fee787e05984a"}, + {file = "rpds_py-0.25.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:5f048bbf18b1f9120685c6d6bb70cc1a52c8cc11bdd04e643d28d3be0baf666d"}, + {file = "rpds_py-0.25.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4fbb0dbba559959fcb5d0735a0f87cdbca9e95dac87982e9b95c0f8f7ad10255"}, + {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4ca54b9cf9d80b4016a67a0193ebe0bcf29f6b0a96f09db942087e294d3d4c2"}, + {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1ee3e26eb83d39b886d2cb6e06ea701bba82ef30a0de044d34626ede51ec98b0"}, + {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:89706d0683c73a26f76a5315d893c051324d771196ae8b13e6ffa1ffaf5e574f"}, + {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c2013ee878c76269c7b557a9a9c042335d732e89d482606990b70a839635feb7"}, + {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45e484db65e5380804afbec784522de84fa95e6bb92ef1bd3325d33d13efaebd"}, + {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:48d64155d02127c249695abb87d39f0faf410733428d499867606be138161d65"}, + {file = "rpds_py-0.25.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:048893e902132fd6548a2e661fb38bf4896a89eea95ac5816cf443524a85556f"}, + {file = "rpds_py-0.25.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:0317177b1e8691ab5879f4f33f4b6dc55ad3b344399e23df2e499de7b10a548d"}, + {file = "rpds_py-0.25.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bffcf57826d77a4151962bf1701374e0fc87f536e56ec46f1abdd6a903354042"}, + {file = "rpds_py-0.25.1-cp311-cp311-win32.whl", hash = "sha256:cda776f1967cb304816173b30994faaf2fd5bcb37e73118a47964a02c348e1bc"}, + {file = "rpds_py-0.25.1-cp311-cp311-win_amd64.whl", hash = "sha256:dc3c1ff0abc91444cd20ec643d0f805df9a3661fcacf9c95000329f3ddf268a4"}, + {file = "rpds_py-0.25.1-cp311-cp311-win_arm64.whl", hash = "sha256:5a3ddb74b0985c4387719fc536faced33cadf2172769540c62e2a94b7b9be1c4"}, + {file = "rpds_py-0.25.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:b5ffe453cde61f73fea9430223c81d29e2fbf412a6073951102146c84e19e34c"}, + {file = "rpds_py-0.25.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:115874ae5e2fdcfc16b2aedc95b5eef4aebe91b28e7e21951eda8a5dc0d3461b"}, + {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a714bf6e5e81b0e570d01f56e0c89c6375101b8463999ead3a93a5d2a4af91fa"}, + {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:35634369325906bcd01577da4c19e3b9541a15e99f31e91a02d010816b49bfda"}, + {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d4cb2b3ddc16710548801c6fcc0cfcdeeff9dafbc983f77265877793f2660309"}, + {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9ceca1cf097ed77e1a51f1dbc8d174d10cb5931c188a4505ff9f3e119dfe519b"}, + {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c2cd1a4b0c2b8c5e31ffff50d09f39906fe351389ba143c195566056c13a7ea"}, + {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1de336a4b164c9188cb23f3703adb74a7623ab32d20090d0e9bf499a2203ad65"}, + {file = "rpds_py-0.25.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9fca84a15333e925dd59ce01da0ffe2ffe0d6e5d29a9eeba2148916d1824948c"}, + {file = "rpds_py-0.25.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:88ec04afe0c59fa64e2f6ea0dd9657e04fc83e38de90f6de201954b4d4eb59bd"}, + {file = "rpds_py-0.25.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a8bd2f19e312ce3e1d2c635618e8a8d8132892bb746a7cf74780a489f0f6cdcb"}, + {file = "rpds_py-0.25.1-cp312-cp312-win32.whl", hash = "sha256:e5e2f7280d8d0d3ef06f3ec1b4fd598d386cc6f0721e54f09109a8132182fbfe"}, + {file = "rpds_py-0.25.1-cp312-cp312-win_amd64.whl", hash = "sha256:db58483f71c5db67d643857404da360dce3573031586034b7d59f245144cc192"}, + {file = "rpds_py-0.25.1-cp312-cp312-win_arm64.whl", hash = "sha256:6d50841c425d16faf3206ddbba44c21aa3310a0cebc3c1cdfc3e3f4f9f6f5728"}, + {file = "rpds_py-0.25.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:659d87430a8c8c704d52d094f5ba6fa72ef13b4d385b7e542a08fc240cb4a559"}, + {file = "rpds_py-0.25.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:68f6f060f0bbdfb0245267da014d3a6da9be127fe3e8cc4a68c6f833f8a23bb1"}, + {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:083a9513a33e0b92cf6e7a6366036c6bb43ea595332c1ab5c8ae329e4bcc0a9c"}, + {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:816568614ecb22b18a010c7a12559c19f6fe993526af88e95a76d5a60b8b75fb"}, + {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3c6564c0947a7f52e4792983f8e6cf9bac140438ebf81f527a21d944f2fd0a40"}, + {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c4a128527fe415d73cf1f70a9a688d06130d5810be69f3b553bf7b45e8acf79"}, + {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a49e1d7a4978ed554f095430b89ecc23f42014a50ac385eb0c4d163ce213c325"}, + {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d74ec9bc0e2feb81d3f16946b005748119c0f52a153f6db6a29e8cd68636f295"}, + {file = "rpds_py-0.25.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:3af5b4cc10fa41e5bc64e5c198a1b2d2864337f8fcbb9a67e747e34002ce812b"}, + {file = "rpds_py-0.25.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:79dc317a5f1c51fd9c6a0c4f48209c6b8526d0524a6904fc1076476e79b00f98"}, + {file = "rpds_py-0.25.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1521031351865e0181bc585147624d66b3b00a84109b57fcb7a779c3ec3772cd"}, + {file = "rpds_py-0.25.1-cp313-cp313-win32.whl", hash = "sha256:5d473be2b13600b93a5675d78f59e63b51b1ba2d0476893415dfbb5477e65b31"}, + {file = "rpds_py-0.25.1-cp313-cp313-win_amd64.whl", hash = "sha256:a7b74e92a3b212390bdce1d93da9f6488c3878c1d434c5e751cbc202c5e09500"}, + {file = "rpds_py-0.25.1-cp313-cp313-win_arm64.whl", hash = "sha256:dd326a81afe332ede08eb39ab75b301d5676802cdffd3a8f287a5f0b694dc3f5"}, + {file = "rpds_py-0.25.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:a58d1ed49a94d4183483a3ce0af22f20318d4a1434acee255d683ad90bf78129"}, + {file = "rpds_py-0.25.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f251bf23deb8332823aef1da169d5d89fa84c89f67bdfb566c49dea1fccfd50d"}, + {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8dbd586bfa270c1103ece2109314dd423df1fa3d9719928b5d09e4840cec0d72"}, + {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6d273f136e912aa101a9274c3145dcbddbe4bac560e77e6d5b3c9f6e0ed06d34"}, + {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:666fa7b1bd0a3810a7f18f6d3a25ccd8866291fbbc3c9b912b917a6715874bb9"}, + {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:921954d7fbf3fccc7de8f717799304b14b6d9a45bbeec5a8d7408ccbf531faf5"}, + {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3d86373ff19ca0441ebeb696ef64cb58b8b5cbacffcda5a0ec2f3911732a194"}, + {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c8980cde3bb8575e7c956a530f2c217c1d6aac453474bf3ea0f9c89868b531b6"}, + {file = "rpds_py-0.25.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:8eb8c84ecea987a2523e057c0d950bcb3f789696c0499290b8d7b3107a719d78"}, + {file = "rpds_py-0.25.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:e43a005671a9ed5a650f3bc39e4dbccd6d4326b24fb5ea8be5f3a43a6f576c72"}, + {file = "rpds_py-0.25.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:58f77c60956501a4a627749a6dcb78dac522f249dd96b5c9f1c6af29bfacfb66"}, + {file = "rpds_py-0.25.1-cp313-cp313t-win32.whl", hash = "sha256:2cb9e5b5e26fc02c8a4345048cd9998c2aca7c2712bd1b36da0c72ee969a3523"}, + {file = "rpds_py-0.25.1-cp313-cp313t-win_amd64.whl", hash = "sha256:401ca1c4a20cc0510d3435d89c069fe0a9ae2ee6495135ac46bdd49ec0495763"}, + {file = "rpds_py-0.25.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:ce4c8e485a3c59593f1a6f683cf0ea5ab1c1dc94d11eea5619e4fb5228b40fbd"}, + {file = "rpds_py-0.25.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d8222acdb51a22929c3b2ddb236b69c59c72af4019d2cba961e2f9add9b6e634"}, + {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4593c4eae9b27d22df41cde518b4b9e4464d139e4322e2127daa9b5b981b76be"}, + {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bd035756830c712b64725a76327ce80e82ed12ebab361d3a1cdc0f51ea21acb0"}, + {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:114a07e85f32b125404f28f2ed0ba431685151c037a26032b213c882f26eb908"}, + {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dec21e02e6cc932538b5203d3a8bd6aa1480c98c4914cb88eea064ecdbc6396a"}, + {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:09eab132f41bf792c7a0ea1578e55df3f3e7f61888e340779b06050a9a3f16e9"}, + {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c98f126c4fc697b84c423e387337d5b07e4a61e9feac494362a59fd7a2d9ed80"}, + {file = "rpds_py-0.25.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0e6a327af8ebf6baba1c10fadd04964c1965d375d318f4435d5f3f9651550f4a"}, + {file = "rpds_py-0.25.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:bc120d1132cff853ff617754196d0ac0ae63befe7c8498bd67731ba368abe451"}, + {file = "rpds_py-0.25.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:140f61d9bed7839446bdd44852e30195c8e520f81329b4201ceead4d64eb3a9f"}, + {file = "rpds_py-0.25.1-cp39-cp39-win32.whl", hash = "sha256:9c006f3aadeda131b438c3092124bd196b66312f0caa5823ef09585a669cf449"}, + {file = "rpds_py-0.25.1-cp39-cp39-win_amd64.whl", hash = "sha256:a61d0b2c7c9a0ae45732a77844917b427ff16ad5464b4d4f5e4adb955f582890"}, + {file = "rpds_py-0.25.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b24bf3cd93d5b6ecfbedec73b15f143596c88ee249fa98cefa9a9dc9d92c6f28"}, + {file = "rpds_py-0.25.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:0eb90e94f43e5085623932b68840b6f379f26db7b5c2e6bcef3179bd83c9330f"}, + {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d50e4864498a9ab639d6d8854b25e80642bd362ff104312d9770b05d66e5fb13"}, + {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7c9409b47ba0650544b0bb3c188243b83654dfe55dcc173a86832314e1a6a35d"}, + {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:796ad874c89127c91970652a4ee8b00d56368b7e00d3477f4415fe78164c8000"}, + {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:85608eb70a659bf4c1142b2781083d4b7c0c4e2c90eff11856a9754e965b2540"}, + {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4feb9211d15d9160bc85fa72fed46432cdc143eb9cf6d5ca377335a921ac37b"}, + {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ccfa689b9246c48947d31dd9d8b16d89a0ecc8e0e26ea5253068efb6c542b76e"}, + {file = "rpds_py-0.25.1-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:3c5b317ecbd8226887994852e85de562f7177add602514d4ac40f87de3ae45a8"}, + {file = "rpds_py-0.25.1-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:454601988aab2c6e8fd49e7634c65476b2b919647626208e376afcd22019eeb8"}, + {file = "rpds_py-0.25.1-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:1c0c434a53714358532d13539272db75a5ed9df75a4a090a753ac7173ec14e11"}, + {file = "rpds_py-0.25.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:f73ce1512e04fbe2bc97836e89830d6b4314c171587a99688082d090f934d20a"}, + {file = "rpds_py-0.25.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:ee86d81551ec68a5c25373c5643d343150cc54672b5e9a0cafc93c1870a53954"}, + {file = "rpds_py-0.25.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:89c24300cd4a8e4a51e55c31a8ff3918e6651b241ee8876a42cc2b2a078533ba"}, + {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:771c16060ff4e79584dc48902a91ba79fd93eade3aa3a12d6d2a4aadaf7d542b"}, + {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:785ffacd0ee61c3e60bdfde93baa6d7c10d86f15655bd706c89da08068dc5038"}, + {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2a40046a529cc15cef88ac5ab589f83f739e2d332cb4d7399072242400ed68c9"}, + {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:85fc223d9c76cabe5d0bff82214459189720dc135db45f9f66aa7cffbf9ff6c1"}, + {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b0be9965f93c222fb9b4cc254235b3b2b215796c03ef5ee64f995b1b69af0762"}, + {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8378fa4a940f3fb509c081e06cb7f7f2adae8cf46ef258b0e0ed7519facd573e"}, + {file = "rpds_py-0.25.1-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:33358883a4490287e67a2c391dfaea4d9359860281db3292b6886bf0be3d8692"}, + {file = "rpds_py-0.25.1-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:1d1fadd539298e70cac2f2cb36f5b8a65f742b9b9f1014dd4ea1f7785e2470bf"}, + {file = "rpds_py-0.25.1-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:9a46c2fb2545e21181445515960006e85d22025bd2fe6db23e76daec6eb689fe"}, + {file = "rpds_py-0.25.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:50f2c501a89c9a5f4e454b126193c5495b9fb441a75b298c60591d8a2eb92e1b"}, + {file = "rpds_py-0.25.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:7d779b325cc8238227c47fbc53964c8cc9a941d5dbae87aa007a1f08f2f77b23"}, + {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:036ded36bedb727beeabc16dc1dad7cb154b3fa444e936a03b67a86dc6a5066e"}, + {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:245550f5a1ac98504147cba96ffec8fabc22b610742e9150138e5d60774686d7"}, + {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ff7c23ba0a88cb7b104281a99476cccadf29de2a0ef5ce864959a52675b1ca83"}, + {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e37caa8cdb3b7cf24786451a0bdb853f6347b8b92005eeb64225ae1db54d1c2b"}, + {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f2f48ab00181600ee266a095fe815134eb456163f7d6699f525dee471f312cf"}, + {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9e5fc7484fa7dce57e25063b0ec9638ff02a908304f861d81ea49273e43838c1"}, + {file = "rpds_py-0.25.1-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:d3c10228d6cf6fe2b63d2e7985e94f6916fa46940df46b70449e9ff9297bd3d1"}, + {file = "rpds_py-0.25.1-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:5d9e40f32745db28c1ef7aad23f6fc458dc1e29945bd6781060f0d15628b8ddf"}, + {file = "rpds_py-0.25.1-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:35a8d1a24b5936b35c5003313bc177403d8bdef0f8b24f28b1c4a255f94ea992"}, + {file = "rpds_py-0.25.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:6099263f526efff9cf3883dfef505518730f7a7a93049b1d90d42e50a22b4793"}, + {file = "rpds_py-0.25.1.tar.gz", hash = "sha256:8960b6dac09b62dac26e75d7e2c4a22efb835d827a7278c34f72b2b84fa160e3"}, ] [[package]] @@ -3183,8 +3073,6 @@ version = "4.9.1" description = "Pure-Python RSA implementation" optional = true python-versions = "<4,>=3.6" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762"}, {file = "rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75"}, @@ -3195,30 +3083,29 @@ pyasn1 = ">=0.1.3" [[package]] name = "ruff" -version = "0.11.10" +version = "0.11.11" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" -groups = ["dev"] -files = [ - {file = "ruff-0.11.10-py3-none-linux_armv6l.whl", hash = "sha256:859a7bfa7bc8888abbea31ef8a2b411714e6a80f0d173c2a82f9041ed6b50f58"}, - {file = "ruff-0.11.10-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:968220a57e09ea5e4fd48ed1c646419961a0570727c7e069842edd018ee8afed"}, - {file = "ruff-0.11.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:1067245bad978e7aa7b22f67113ecc6eb241dca0d9b696144256c3a879663bca"}, - {file = "ruff-0.11.10-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4854fd09c7aed5b1590e996a81aeff0c9ff51378b084eb5a0b9cd9518e6cff2"}, - {file = "ruff-0.11.10-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8b4564e9f99168c0f9195a0fd5fa5928004b33b377137f978055e40008a082c5"}, - {file = "ruff-0.11.10-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b6a9cc5b62c03cc1fea0044ed8576379dbaf751d5503d718c973d5418483641"}, - {file = "ruff-0.11.10-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:607ecbb6f03e44c9e0a93aedacb17b4eb4f3563d00e8b474298a201622677947"}, - {file = "ruff-0.11.10-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7b3a522fa389402cd2137df9ddefe848f727250535c70dafa840badffb56b7a4"}, - {file = "ruff-0.11.10-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2f071b0deed7e9245d5820dac235cbdd4ef99d7b12ff04c330a241ad3534319f"}, - {file = "ruff-0.11.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a60e3a0a617eafba1f2e4186d827759d65348fa53708ca547e384db28406a0b"}, - {file = "ruff-0.11.10-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:da8ec977eaa4b7bf75470fb575bea2cb41a0e07c7ea9d5a0a97d13dbca697bf2"}, - {file = "ruff-0.11.10-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:ddf8967e08227d1bd95cc0851ef80d2ad9c7c0c5aab1eba31db49cf0a7b99523"}, - {file = "ruff-0.11.10-py3-none-musllinux_1_2_i686.whl", hash = "sha256:5a94acf798a82db188f6f36575d80609072b032105d114b0f98661e1679c9125"}, - {file = "ruff-0.11.10-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:3afead355f1d16d95630df28d4ba17fb2cb9c8dfac8d21ced14984121f639bad"}, - {file = "ruff-0.11.10-py3-none-win32.whl", hash = "sha256:dc061a98d32a97211af7e7f3fa1d4ca2fcf919fb96c28f39551f35fc55bdbc19"}, - {file = "ruff-0.11.10-py3-none-win_amd64.whl", hash = "sha256:5cc725fbb4d25b0f185cb42df07ab6b76c4489b4bfb740a175f3a59c70e8a224"}, - {file = "ruff-0.11.10-py3-none-win_arm64.whl", hash = "sha256:ef69637b35fb8b210743926778d0e45e1bffa850a7c61e428c6b971549b5f5d1"}, - {file = "ruff-0.11.10.tar.gz", hash = "sha256:d522fb204b4959909ecac47da02830daec102eeb100fb50ea9554818d47a5fa6"}, +files = [ + {file = "ruff-0.11.11-py3-none-linux_armv6l.whl", hash = "sha256:9924e5ae54125ed8958a4f7de320dab7380f6e9fa3195e3dc3b137c6842a0092"}, + {file = "ruff-0.11.11-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:c8a93276393d91e952f790148eb226658dd275cddfde96c6ca304873f11d2ae4"}, + {file = "ruff-0.11.11-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d6e333dbe2e6ae84cdedefa943dfd6434753ad321764fd937eef9d6b62022bcd"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7885d9a5e4c77b24e8c88aba8c80be9255fa22ab326019dac2356cff42089fc6"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1b5ab797fcc09121ed82e9b12b6f27e34859e4227080a42d090881be888755d4"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e231ff3132c1119ece836487a02785f099a43992b95c2f62847d29bace3c75ac"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:a97c9babe1d4081037a90289986925726b802d180cca784ac8da2bbbc335f709"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d8c4ddcbe8a19f59f57fd814b8b117d4fcea9bee7c0492e6cf5fdc22cfa563c8"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6224076c344a7694c6fbbb70d4f2a7b730f6d47d2a9dc1e7f9d9bb583faf390b"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:882821fcdf7ae8db7a951df1903d9cb032bbe838852e5fc3c2b6c3ab54e39875"}, + {file = "ruff-0.11.11-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:dcec2d50756463d9df075a26a85a6affbc1b0148873da3997286caf1ce03cae1"}, + {file = "ruff-0.11.11-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:99c28505ecbaeb6594701a74e395b187ee083ee26478c1a795d35084d53ebd81"}, + {file = "ruff-0.11.11-py3-none-musllinux_1_2_i686.whl", hash = "sha256:9263f9e5aa4ff1dec765e99810f1cc53f0c868c5329b69f13845f699fe74f639"}, + {file = "ruff-0.11.11-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:64ac6f885e3ecb2fdbb71de2701d4e34526651f1e8503af8fb30d4915a3fe345"}, + {file = "ruff-0.11.11-py3-none-win32.whl", hash = "sha256:1adcb9a18802268aaa891ffb67b1c94cd70578f126637118e8099b8e4adcf112"}, + {file = "ruff-0.11.11-py3-none-win_amd64.whl", hash = "sha256:748b4bb245f11e91a04a4ff0f96e386711df0a30412b9fe0c74d5bdc0e4a531f"}, + {file = "ruff-0.11.11-py3-none-win_arm64.whl", hash = "sha256:6c51f136c0364ab1b774767aa8b86331bd8e9d414e2d107db7a2189f35ea1f7b"}, + {file = "ruff-0.11.11.tar.gz", hash = "sha256:7774173cc7c1980e6bf67569ebb7085989a78a103922fb83ef3dfe230cd0687d"}, ] [[package]] @@ -3227,8 +3114,6 @@ version = "0.5.3" description = "" optional = true python-versions = ">=3.7" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "safetensors-0.5.3-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:bd20eb133db8ed15b40110b7c00c6df51655a2998132193de2f75f72d99c7073"}, {file = "safetensors-0.5.3-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:21d01c14ff6c415c485616b8b0bf961c46b3b343ca59110d38d744e577f9cce7"}, @@ -3266,8 +3151,6 @@ version = "1.6.1" description = "A set of python modules for machine learning and data mining" optional = true python-versions = ">=3.9" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "scikit_learn-1.6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d056391530ccd1e501056160e3c9673b4da4805eb67eb2bdf4e983e1f9c9204e"}, {file = "scikit_learn-1.6.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:0c8d036eb937dbb568c6242fa598d551d88fb4399c0344d95c001980ec1c7d36"}, @@ -3318,59 +3201,57 @@ tests = ["black (>=24.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.9)", "numpydoc ( [[package]] name = "scipy" -version = "1.15.2" +version = "1.15.3" description = "Fundamental algorithms for scientific computing in Python" optional = true python-versions = ">=3.10" -groups = ["main"] -markers = "extra == \"pdf\"" -files = [ - {file = "scipy-1.15.2-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:a2ec871edaa863e8213ea5df811cd600734f6400b4af272e1c011e69401218e9"}, - {file = "scipy-1.15.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:6f223753c6ea76983af380787611ae1291e3ceb23917393079dcc746ba60cfb5"}, - {file = "scipy-1.15.2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:ecf797d2d798cf7c838c6d98321061eb3e72a74710e6c40540f0e8087e3b499e"}, - {file = "scipy-1.15.2-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:9b18aa747da280664642997e65aab1dd19d0c3d17068a04b3fe34e2559196cb9"}, - {file = "scipy-1.15.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:87994da02e73549dfecaed9e09a4f9d58a045a053865679aeb8d6d43747d4df3"}, - {file = "scipy-1.15.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:69ea6e56d00977f355c0f84eba69877b6df084516c602d93a33812aa04d90a3d"}, - {file = "scipy-1.15.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:888307125ea0c4466287191e5606a2c910963405ce9671448ff9c81c53f85f58"}, - {file = "scipy-1.15.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9412f5e408b397ff5641080ed1e798623dbe1ec0d78e72c9eca8992976fa65aa"}, - {file = "scipy-1.15.2-cp310-cp310-win_amd64.whl", hash = "sha256:b5e025e903b4f166ea03b109bb241355b9c42c279ea694d8864d033727205e65"}, - {file = "scipy-1.15.2-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:92233b2df6938147be6fa8824b8136f29a18f016ecde986666be5f4d686a91a4"}, - {file = "scipy-1.15.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:62ca1ff3eb513e09ed17a5736929429189adf16d2d740f44e53270cc800ecff1"}, - {file = "scipy-1.15.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:4c6676490ad76d1c2894d77f976144b41bd1a4052107902238047fb6a473e971"}, - {file = "scipy-1.15.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:a8bf5cb4a25046ac61d38f8d3c3426ec11ebc350246a4642f2f315fe95bda655"}, - {file = "scipy-1.15.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a8e34cf4c188b6dd004654f88586d78f95639e48a25dfae9c5e34a6dc34547e"}, - {file = "scipy-1.15.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28a0d2c2075946346e4408b211240764759e0fabaeb08d871639b5f3b1aca8a0"}, - {file = "scipy-1.15.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:42dabaaa798e987c425ed76062794e93a243be8f0f20fff6e7a89f4d61cb3d40"}, - {file = "scipy-1.15.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6f5e296ec63c5da6ba6fa0343ea73fd51b8b3e1a300b0a8cae3ed4b1122c7462"}, - {file = "scipy-1.15.2-cp311-cp311-win_amd64.whl", hash = "sha256:597a0c7008b21c035831c39927406c6181bcf8f60a73f36219b69d010aa04737"}, - {file = "scipy-1.15.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c4697a10da8f8765bb7c83e24a470da5797e37041edfd77fd95ba3811a47c4fd"}, - {file = "scipy-1.15.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:869269b767d5ee7ea6991ed7e22b3ca1f22de73ab9a49c44bad338b725603301"}, - {file = "scipy-1.15.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:bad78d580270a4d32470563ea86c6590b465cb98f83d760ff5b0990cb5518a93"}, - {file = "scipy-1.15.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:b09ae80010f52efddb15551025f9016c910296cf70adbf03ce2a8704f3a5ad20"}, - {file = "scipy-1.15.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a6fd6eac1ce74a9f77a7fc724080d507c5812d61e72bd5e4c489b042455865e"}, - {file = "scipy-1.15.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b871df1fe1a3ba85d90e22742b93584f8d2b8e6124f8372ab15c71b73e428b8"}, - {file = "scipy-1.15.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:03205d57a28e18dfd39f0377d5002725bf1f19a46f444108c29bdb246b6c8a11"}, - {file = "scipy-1.15.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:601881dfb761311045b03114c5fe718a12634e5608c3b403737ae463c9885d53"}, - {file = "scipy-1.15.2-cp312-cp312-win_amd64.whl", hash = "sha256:e7c68b6a43259ba0aab737237876e5c2c549a031ddb7abc28c7b47f22e202ded"}, - {file = "scipy-1.15.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:01edfac9f0798ad6b46d9c4c9ca0e0ad23dbf0b1eb70e96adb9fa7f525eff0bf"}, - {file = "scipy-1.15.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:08b57a9336b8e79b305a143c3655cc5bdbe6d5ece3378578888d2afbb51c4e37"}, - {file = "scipy-1.15.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:54c462098484e7466362a9f1672d20888f724911a74c22ae35b61f9c5919183d"}, - {file = "scipy-1.15.2-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:cf72ff559a53a6a6d77bd8eefd12a17995ffa44ad86c77a5df96f533d4e6c6bb"}, - {file = "scipy-1.15.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9de9d1416b3d9e7df9923ab23cd2fe714244af10b763975bea9e4f2e81cebd27"}, - {file = "scipy-1.15.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb530e4794fc8ea76a4a21ccb67dea33e5e0e60f07fc38a49e821e1eae3b71a0"}, - {file = "scipy-1.15.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5ea7ed46d437fc52350b028b1d44e002646e28f3e8ddc714011aaf87330f2f32"}, - {file = "scipy-1.15.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:11e7ad32cf184b74380f43d3c0a706f49358b904fa7d5345f16ddf993609184d"}, - {file = "scipy-1.15.2-cp313-cp313-win_amd64.whl", hash = "sha256:a5080a79dfb9b78b768cebf3c9dcbc7b665c5875793569f48bf0e2b1d7f68f6f"}, - {file = "scipy-1.15.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:447ce30cee6a9d5d1379087c9e474628dab3db4a67484be1b7dc3196bfb2fac9"}, - {file = "scipy-1.15.2-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:c90ebe8aaa4397eaefa8455a8182b164a6cc1d59ad53f79943f266d99f68687f"}, - {file = "scipy-1.15.2-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:def751dd08243934c884a3221156d63e15234a3155cf25978b0a668409d45eb6"}, - {file = "scipy-1.15.2-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:302093e7dfb120e55515936cb55618ee0b895f8bcaf18ff81eca086c17bd80af"}, - {file = "scipy-1.15.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7cd5b77413e1855351cdde594eca99c1f4a588c2d63711388b6a1f1c01f62274"}, - {file = "scipy-1.15.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d0194c37037707b2afa7a2f2a924cf7bac3dc292d51b6a925e5fcb89bc5c776"}, - {file = "scipy-1.15.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:bae43364d600fdc3ac327db99659dcb79e6e7ecd279a75fe1266669d9a652828"}, - {file = "scipy-1.15.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f031846580d9acccd0044efd1a90e6f4df3a6e12b4b6bd694a7bc03a89892b28"}, - {file = "scipy-1.15.2-cp313-cp313t-win_amd64.whl", hash = "sha256:fe8a9eb875d430d81755472c5ba75e84acc980e4a8f6204d402849234d3017db"}, - {file = "scipy-1.15.2.tar.gz", hash = "sha256:cd58a314d92838f7e6f755c8a2167ead4f27e1fd5c1251fd54289569ef3495ec"}, +files = [ + {file = "scipy-1.15.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:a345928c86d535060c9c2b25e71e87c39ab2f22fc96e9636bd74d1dbf9de448c"}, + {file = "scipy-1.15.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:ad3432cb0f9ed87477a8d97f03b763fd1d57709f1bbde3c9369b1dff5503b253"}, + {file = "scipy-1.15.3-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:aef683a9ae6eb00728a542b796f52a5477b78252edede72b8327a886ab63293f"}, + {file = "scipy-1.15.3-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:1c832e1bd78dea67d5c16f786681b28dd695a8cb1fb90af2e27580d3d0967e92"}, + {file = "scipy-1.15.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:263961f658ce2165bbd7b99fa5135195c3a12d9bef045345016b8b50c315cb82"}, + {file = "scipy-1.15.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e2abc762b0811e09a0d3258abee2d98e0c703eee49464ce0069590846f31d40"}, + {file = "scipy-1.15.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ed7284b21a7a0c8f1b6e5977ac05396c0d008b89e05498c8b7e8f4a1423bba0e"}, + {file = "scipy-1.15.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5380741e53df2c566f4d234b100a484b420af85deb39ea35a1cc1be84ff53a5c"}, + {file = "scipy-1.15.3-cp310-cp310-win_amd64.whl", hash = "sha256:9d61e97b186a57350f6d6fd72640f9e99d5a4a2b8fbf4b9ee9a841eab327dc13"}, + {file = "scipy-1.15.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:993439ce220d25e3696d1b23b233dd010169b62f6456488567e830654ee37a6b"}, + {file = "scipy-1.15.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:34716e281f181a02341ddeaad584205bd2fd3c242063bd3423d61ac259ca7eba"}, + {file = "scipy-1.15.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3b0334816afb8b91dab859281b1b9786934392aa3d527cd847e41bb6f45bee65"}, + {file = "scipy-1.15.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:6db907c7368e3092e24919b5e31c76998b0ce1684d51a90943cb0ed1b4ffd6c1"}, + {file = "scipy-1.15.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:721d6b4ef5dc82ca8968c25b111e307083d7ca9091bc38163fb89243e85e3889"}, + {file = "scipy-1.15.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39cb9c62e471b1bb3750066ecc3a3f3052b37751c7c3dfd0fd7e48900ed52982"}, + {file = "scipy-1.15.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:795c46999bae845966368a3c013e0e00947932d68e235702b5c3f6ea799aa8c9"}, + {file = "scipy-1.15.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:18aaacb735ab38b38db42cb01f6b92a2d0d4b6aabefeb07f02849e47f8fb3594"}, + {file = "scipy-1.15.3-cp311-cp311-win_amd64.whl", hash = "sha256:ae48a786a28412d744c62fd7816a4118ef97e5be0bee968ce8f0a2fba7acf3bb"}, + {file = "scipy-1.15.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6ac6310fdbfb7aa6612408bd2f07295bcbd3fda00d2d702178434751fe48e019"}, + {file = "scipy-1.15.3-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:185cd3d6d05ca4b44a8f1595af87f9c372bb6acf9c808e99aa3e9aa03bd98cf6"}, + {file = "scipy-1.15.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:05dc6abcd105e1a29f95eada46d4a3f251743cfd7d3ae8ddb4088047f24ea477"}, + {file = "scipy-1.15.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:06efcba926324df1696931a57a176c80848ccd67ce6ad020c810736bfd58eb1c"}, + {file = "scipy-1.15.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05045d8b9bfd807ee1b9f38761993297b10b245f012b11b13b91ba8945f7e45"}, + {file = "scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:271e3713e645149ea5ea3e97b57fdab61ce61333f97cfae392c28ba786f9bb49"}, + {file = "scipy-1.15.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6cfd56fc1a8e53f6e89ba3a7a7251f7396412d655bca2aa5611c8ec9a6784a1e"}, + {file = "scipy-1.15.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0ff17c0bb1cb32952c09217d8d1eed9b53d1463e5f1dd6052c7857f83127d539"}, + {file = "scipy-1.15.3-cp312-cp312-win_amd64.whl", hash = "sha256:52092bc0472cfd17df49ff17e70624345efece4e1a12b23783a1ac59a1b728ed"}, + {file = "scipy-1.15.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2c620736bcc334782e24d173c0fdbb7590a0a436d2fdf39310a8902505008759"}, + {file = "scipy-1.15.3-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:7e11270a000969409d37ed399585ee530b9ef6aa99d50c019de4cb01e8e54e62"}, + {file = "scipy-1.15.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:8c9ed3ba2c8a2ce098163a9bdb26f891746d02136995df25227a20e71c396ebb"}, + {file = "scipy-1.15.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:0bdd905264c0c9cfa74a4772cdb2070171790381a5c4d312c973382fc6eaf730"}, + {file = "scipy-1.15.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79167bba085c31f38603e11a267d862957cbb3ce018d8b38f79ac043bc92d825"}, + {file = "scipy-1.15.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9deabd6d547aee2c9a81dee6cc96c6d7e9a9b1953f74850c179f91fdc729cb7"}, + {file = "scipy-1.15.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dde4fc32993071ac0c7dd2d82569e544f0bdaff66269cb475e0f369adad13f11"}, + {file = "scipy-1.15.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f77f853d584e72e874d87357ad70f44b437331507d1c311457bed8ed2b956126"}, + {file = "scipy-1.15.3-cp313-cp313-win_amd64.whl", hash = "sha256:b90ab29d0c37ec9bf55424c064312930ca5f4bde15ee8619ee44e69319aab163"}, + {file = "scipy-1.15.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3ac07623267feb3ae308487c260ac684b32ea35fd81e12845039952f558047b8"}, + {file = "scipy-1.15.3-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6487aa99c2a3d509a5227d9a5e889ff05830a06b2ce08ec30df6d79db5fcd5c5"}, + {file = "scipy-1.15.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:50f9e62461c95d933d5c5ef4a1f2ebf9a2b4e83b0db374cb3f1de104d935922e"}, + {file = "scipy-1.15.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:14ed70039d182f411ffc74789a16df3835e05dc469b898233a245cdfd7f162cb"}, + {file = "scipy-1.15.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a769105537aa07a69468a0eefcd121be52006db61cdd8cac8a0e68980bbb723"}, + {file = "scipy-1.15.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9db984639887e3dffb3928d118145ffe40eff2fa40cb241a306ec57c219ebbbb"}, + {file = "scipy-1.15.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:40e54d5c7e7ebf1aa596c374c49fa3135f04648a0caabcb66c52884b943f02b4"}, + {file = "scipy-1.15.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5e721fed53187e71d0ccf382b6bf977644c533e506c4d33c3fb24de89f5c3ed5"}, + {file = "scipy-1.15.3-cp313-cp313t-win_amd64.whl", hash = "sha256:76ad1fb5f8752eabf0fa02e4cc0336b4e8f021e2d5f061ed37d6d264db35e3ca"}, + {file = "scipy-1.15.3.tar.gz", hash = "sha256:eae3cf522bc7df64b42cad3925c876e1b0b6c35c1337c93e12c0f366f55b0eaf"}, ] [package.dependencies] @@ -3378,30 +3259,28 @@ numpy = ">=1.23.5,<2.5" [package.extras] dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy (==1.10.0)", "pycodestyle", "pydevtool", "rich-click", "ruff (>=0.0.292)", "types-psutil", "typing_extensions"] -doc = ["intersphinx_registry", "jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.16.5)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0,<8.0.0)", "sphinx-copybutton", "sphinx-design (>=0.4.0)"] -test = ["Cython", "array-api-strict (>=2.0,<2.1.1)", "asv", "gmpy2", "hypothesis (>=6.30)", "meson", "mpmath", "ninja ; sys_platform != \"emscripten\"", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] +doc = ["intersphinx_registry", "jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.19.1)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0,<8.0.0)", "sphinx-copybutton", "sphinx-design (>=0.4.0)"] +test = ["Cython", "array-api-strict (>=2.0,<2.1.1)", "asv", "gmpy2", "hypothesis (>=6.30)", "meson", "mpmath", "ninja", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] [[package]] name = "setuptools" -version = "80.0.0" +version = "80.8.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = true python-versions = ">=3.9" -groups = ["main"] -markers = "extra == \"pdf\" and platform_system == \"Linux\" and platform_machine == \"x86_64\" or extra == \"pdf\" and python_version >= \"3.12\"" files = [ - {file = "setuptools-80.0.0-py3-none-any.whl", hash = "sha256:a38f898dcd6e5380f4da4381a87ec90bd0a7eec23d204a5552e80ee3cab6bd27"}, - {file = "setuptools-80.0.0.tar.gz", hash = "sha256:c40a5b3729d58dd749c0f08f1a07d134fb8a0a3d7f87dc33e7c5e1f762138650"}, + {file = "setuptools-80.8.0-py3-none-any.whl", hash = "sha256:95a60484590d24103af13b686121328cc2736bee85de8936383111e421b9edc0"}, + {file = "setuptools-80.8.0.tar.gz", hash = "sha256:49f7af965996f26d43c8ae34539c8d99c5042fbff34302ea151eaa9c207cd257"}, ] [package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "ruff (>=0.8.0) ; sys_platform != \"cygwin\""] -core = ["importlib_metadata (>=6) ; python_version < \"3.10\"", "jaraco.functools (>=4)", "jaraco.text (>=3.7)", "more_itertools", "more_itertools (>=8.8)", "packaging (>=24.2)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1) ; python_version < \"3.11\"", "wheel (>=0.43.0)"] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.8.0)"] +core = ["importlib_metadata (>=6)", "jaraco.functools (>=4)", "jaraco.text (>=3.7)", "more_itertools", "more_itertools (>=8.8)", "packaging (>=24.2)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] enabler = ["pytest-enabler (>=2.2)"] -test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.7.2)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] -type = ["importlib_metadata (>=7.0.2) ; python_version < \"3.10\"", "jaraco.develop (>=7.21) ; sys_platform != \"cygwin\"", "mypy (==1.14.*)", "pytest-mypy"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.7.2)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] +type = ["importlib_metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.14.*)", "pytest-mypy"] [[package]] name = "six" @@ -3409,7 +3288,6 @@ version = "1.17.0" description = "Python 2 and 3 compatibility utilities" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" -groups = ["main", "docs"] files = [ {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, @@ -3421,8 +3299,6 @@ version = "1.3.1" description = "Sniff out which async library your code is running under" optional = true python-versions = ">=3.7" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, @@ -3430,41 +3306,40 @@ files = [ [[package]] name = "soupsieve" -version = "2.6" +version = "2.7" description = "A modern CSS selector implementation for Beautiful Soup." optional = false python-versions = ">=3.8" -groups = ["main"] files = [ - {file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"}, - {file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"}, + {file = "soupsieve-2.7-py3-none-any.whl", hash = "sha256:6e60cc5c1ffaf1cebcc12e8188320b72071e922c2e897f737cadce79ad5d30c4"}, + {file = "soupsieve-2.7.tar.gz", hash = "sha256:ad282f9b6926286d2ead4750552c8a6142bc4c783fd66b0293547c8fe6ae126a"}, ] [[package]] name = "surya-ocr" -version = "0.13.1" +version = "0.14.2" description = "OCR, layout, reading order, and table recognition in 90+ languages" optional = true python-versions = "<4.0,>=3.10" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ - {file = "surya_ocr-0.13.1-py3-none-any.whl", hash = "sha256:2704a97f5de625bc747eddf87874635cb8be164c4c9373207a022648325f009d"}, - {file = "surya_ocr-0.13.1.tar.gz", hash = "sha256:af4004448eb8798aeddd4aa709c2f4d3795a3ec7bf12252595b481a65f799a52"}, + {file = "surya_ocr-0.14.2-py3-none-any.whl", hash = "sha256:0c402705c860f8bf98fc2bf2a3b49d7f0e16fba587aed6d3f01bb53bb776d283"}, + {file = "surya_ocr-0.14.2.tar.gz", hash = "sha256:852af681073167beba9a638658c70b81318f1a8f3d558db68dead1b2c391e862"}, ] [package.dependencies] click = ">=8.1.8,<9.0.0" +einops = ">=0.8.1,<0.9.0" filetype = ">=1.2.0,<2.0.0" opencv-python-headless = ">=4.11.0.86,<5.0.0.0" pillow = ">=10.2.0,<11.0.0" platformdirs = ">=4.3.6,<5.0.0" +pre-commit = ">=4.2.0,<5.0.0" pydantic = ">=2.5.3,<3.0.0" pydantic-settings = ">=2.1.0,<3.0.0" pypdfium2 = "4.30.0" python-dotenv = ">=1.0.0,<2.0.0" -torch = ">=2.5.1,<3.0.0" -transformers = ">=4.41.0,<5.0.0" +torch = ">=2.7.0,<3.0.0" +transformers = ">=4.51.2,<5.0.0" [[package]] name = "sympy" @@ -3472,8 +3347,6 @@ version = "1.14.0" description = "Computer algebra system (CAS) in Python" optional = true python-versions = ">=3.9" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5"}, {file = "sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517"}, @@ -3491,8 +3364,6 @@ version = "3.6.0" description = "threadpoolctl" optional = true python-versions = ">=3.9" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb"}, {file = "threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e"}, @@ -3504,8 +3375,6 @@ version = "0.21.1" description = "" optional = true python-versions = ">=3.9" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "tokenizers-0.21.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:e78e413e9e668ad790a29456e677d9d3aa50a9ad311a40905d6861ba7692cf41"}, {file = "tokenizers-0.21.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:cd51cd0a91ecc801633829fcd1fda9cf8682ed3477c6243b9a095539de4aecf3"}, @@ -3538,8 +3407,6 @@ version = "2.2.1" description = "A lil' TOML parser" optional = false python-versions = ">=3.8" -groups = ["dev"] -markers = "python_version == \"3.10\"" files = [ {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, @@ -3581,8 +3448,6 @@ version = "2.7.0" description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" optional = true python-versions = ">=3.9.0" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "torch-2.7.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:c9afea41b11e1a1ab1b258a5c31afbd646d6319042bfe4f231b408034b51128b"}, {file = "torch-2.7.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:0b9960183b6e5b71239a3e6c883d8852c304e691c0b2955f7045e8a6d05b9183"}, @@ -3644,7 +3509,6 @@ version = "4.67.1" description = "Fast, Extensible Progress Meter" optional = false python-versions = ">=3.7" -groups = ["main"] files = [ {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"}, {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"}, @@ -3662,15 +3526,13 @@ telegram = ["requests"] [[package]] name = "transformers" -version = "4.51.3" +version = "4.52.3" description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" optional = true python-versions = ">=3.9.0" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ - {file = "transformers-4.51.3-py3-none-any.whl", hash = "sha256:fd3279633ceb2b777013234bbf0b4f5c2d23c4626b05497691f00cfda55e8a83"}, - {file = "transformers-4.51.3.tar.gz", hash = "sha256:e292fcab3990c6defe6328f0f7d2004283ca81a7a07b2de9a46d67fd81ea1409"}, + {file = "transformers-4.52.3-py3-none-any.whl", hash = "sha256:cd04059da50e7cf2a617ce3143ba8beffbf119f8c25a0717c3454fd9d0f19609"}, + {file = "transformers-4.52.3.tar.gz", hash = "sha256:2e1de29374f27920aaf6d589d4e6339f33def2fb08809e1a1d792e040e9fbce7"}, ] [package.dependencies] @@ -3687,23 +3549,22 @@ tqdm = ">=4.27" [package.extras] accelerate = ["accelerate (>=0.26.0)"] -agents = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch (>=2.0)"] -all = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "av", "codecarbon (>=2.8.1)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "kernels (>=0.3.2,<0.4)", "librosa", "num2words", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm (<=1.0.11)", "tokenizers (>=0.21,<0.22)", "torch (>=2.0)", "torchaudio", "torchvision"] +all = ["Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "accelerate (>=0.26.0)", "av", "codecarbon (>=2.8.1)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "kernels (>=0.4.4,<0.5)", "librosa", "num2words", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm (<=1.0.11)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1,<2.7)", "torchaudio", "torchvision"] audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] benchmark = ["optimum-benchmark (>=0.3.0)"] codecarbon = ["codecarbon (>=2.8.1)"] deepspeed = ["accelerate (>=0.26.0)", "deepspeed (>=0.9.3)"] -deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.26.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk (<=3.8.1)", "optuna", "parameterized", "protobuf", "psutil", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"] -dev = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "av", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "kernels (>=0.3.2,<0.4)", "libcst", "librosa", "nltk (<=3.8.1)", "num2words", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm (<=1.0.11)", "tokenizers (>=0.21,<0.22)", "torch (>=2.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] -dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "libcst", "librosa", "nltk (<=3.8.1)", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.21,<0.22)", "urllib3 (<2.0.0)"] -dev-torch = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "kernels (>=0.3.2,<0.4)", "libcst", "librosa", "nltk (<=3.8.1)", "num2words", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (<=1.0.11)", "tokenizers (>=0.21,<0.22)", "torch (>=2.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] +deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.26.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk (<=3.8.1)", "optuna", "parameterized", "protobuf", "psutil", "pydantic", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"] +dev = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "accelerate (>=0.26.0)", "av", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "kernels (>=0.4.4,<0.5)", "libcst", "librosa", "nltk (<=3.8.1)", "num2words", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm (<=1.0.11)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1,<2.7)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)"] +dev-tensorflow = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "libcst", "librosa", "nltk (<=3.8.1)", "onnxconverter-common", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "tf2onnx", "timeout-decorator", "tokenizers (>=0.21,<0.22)", "urllib3 (<2.0.0)"] +dev-torch = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "kernels (>=0.4.4,<0.5)", "libcst", "librosa", "nltk (<=3.8.1)", "num2words", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (<=1.0.11)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1,<2.7)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)"] flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)", "scipy (<1.13.0)"] flax-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] ftfy = ["ftfy"] -hf-xet = ["hf-xet"] -hub-kernels = ["kernels (>=0.3.2,<0.4)"] -integrations = ["kernels (>=0.3.2,<0.4)", "optuna", "ray[tune] (>=2.7.0)", "sigopt"] -ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0,<1.3.1)", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)"] +hf-xet = ["hf_xet"] +hub-kernels = ["kernels (>=0.4.4,<0.5)"] +integrations = ["kernels (>=0.4.4,<0.5)", "optuna", "ray[tune] (>=2.7.0)", "sigopt"] +ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0,<1.3.1)", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)"] modelcreation = ["cookiecutter (==1.7.3)"] natten = ["natten (>=0.14.6,<0.15.0)"] num2words = ["num2words"] @@ -3720,17 +3581,17 @@ serving = ["fastapi", "pydantic", "starlette", "uvicorn"] sigopt = ["sigopt"] sklearn = ["scikit-learn"] speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] -testing = ["GitPython (<3.1.19)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk (<=3.8.1)", "parameterized", "psutil", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"] +testing = ["GitPython (<3.1.19)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk (<=3.8.1)", "parameterized", "psutil", "pydantic", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"] tf = ["keras-nlp (>=0.3.1,<0.14.0)", "onnxconverter-common", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx"] tf-cpu = ["keras (>2.9,<2.16)", "keras-nlp (>=0.3.1,<0.14.0)", "onnxconverter-common", "tensorflow-cpu (>2.9,<2.16)", "tensorflow-probability (<0.24)", "tensorflow-text (<2.16)", "tf2onnx"] tf-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] tiktoken = ["blobfile", "tiktoken"] timm = ["timm (<=1.0.11)"] tokenizers = ["tokenizers (>=0.21,<0.22)"] -torch = ["accelerate (>=0.26.0)", "torch (>=2.0)"] +torch = ["accelerate (>=0.26.0)", "torch (>=2.1,<2.7)"] torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] torch-vision = ["Pillow (>=10.0.1,<=15.0)", "torchvision"] -torchhub = ["filelock", "huggingface-hub (>=0.30.0,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.21,<0.22)", "torch (>=2.0)", "tqdm (>=4.27)"] +torchhub = ["filelock", "huggingface-hub (>=0.30.0,<1.0)", "importlib_metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1,<2.7)", "tqdm (>=4.27)"] video = ["av"] vision = ["Pillow (>=10.0.1,<=15.0)"] @@ -3740,8 +3601,6 @@ version = "3.3.0" description = "A language and compiler for custom Deep Learning operations" optional = true python-versions = "*" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"pdf\"" files = [ {file = "triton-3.3.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fad99beafc860501d7fcc1fb7045d9496cbe2c882b1674640304949165a916e7"}, {file = "triton-3.3.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3161a2bf073d6b22c4e2f33f951f3e5e3001462b2570e6df9cd57565bdec2984"}, @@ -3765,7 +3624,6 @@ version = "4.12.0.20250516" description = "Typing stubs for beautifulsoup4" optional = false python-versions = ">=3.9" -groups = ["dev"] files = [ {file = "types_beautifulsoup4-4.12.0.20250516-py3-none-any.whl", hash = "sha256:5923399d4a1ba9cc8f0096fe334cc732e130269541d66261bb42ab039c0376ee"}, {file = "types_beautifulsoup4-4.12.0.20250516.tar.gz", hash = "sha256:aa19dd73b33b70d6296adf92da8ab8a0c945c507e6fb7d5db553415cc77b417e"}, @@ -3776,14 +3634,13 @@ types-html5lib = "*" [[package]] name = "types-html5lib" -version = "1.1.11.20241018" +version = "1.1.11.20250516" description = "Typing stubs for html5lib" optional = false -python-versions = ">=3.8" -groups = ["dev"] +python-versions = ">=3.9" files = [ - {file = "types-html5lib-1.1.11.20241018.tar.gz", hash = "sha256:98042555ff78d9e3a51c77c918b1041acbb7eb6c405408d8a9e150ff5beccafa"}, - {file = "types_html5lib-1.1.11.20241018-py3-none-any.whl", hash = "sha256:3f1e064d9ed2c289001ae6392c84c93833abb0816165c6ff0abfc304a779f403"}, + {file = "types_html5lib-1.1.11.20250516-py3-none-any.whl", hash = "sha256:5e407b14b1bd2b9b1107cbd1e2e19d4a0c46d60febd231c7ab7313d7405663c1"}, + {file = "types_html5lib-1.1.11.20250516.tar.gz", hash = "sha256:65043a6718c97f7d52567cc0cdf41efbfc33b1f92c6c0c5e19f60a7ec69ae720"}, ] [[package]] @@ -3792,7 +3649,6 @@ version = "4.23.0.20250516" description = "Typing stubs for jsonschema" optional = false python-versions = ">=3.9" -groups = ["dev"] files = [ {file = "types_jsonschema-4.23.0.20250516-py3-none-any.whl", hash = "sha256:e7d0dd7db7e59e63c26e3230e26ffc64c4704cc5170dc21270b366a35ead1618"}, {file = "types_jsonschema-4.23.0.20250516.tar.gz", hash = "sha256:9ace09d9d35c4390a7251ccd7d833b92ccc189d24d1b347f26212afce361117e"}, @@ -3803,14 +3659,13 @@ referencing = "*" [[package]] name = "types-pytz" -version = "2025.2.0.20250326" +version = "2025.2.0.20250516" description = "Typing stubs for pytz" optional = false python-versions = ">=3.9" -groups = ["dev"] files = [ - {file = "types_pytz-2025.2.0.20250326-py3-none-any.whl", hash = "sha256:3c397fd1b845cd2b3adc9398607764ced9e578a98a5d1fbb4a9bc9253edfb162"}, - {file = "types_pytz-2025.2.0.20250326.tar.gz", hash = "sha256:deda02de24f527066fc8d6a19e284ab3f3ae716a42b4adb6b40e75e408c08d36"}, + {file = "types_pytz-2025.2.0.20250516-py3-none-any.whl", hash = "sha256:e0e0c8a57e2791c19f718ed99ab2ba623856b11620cb6b637e5f62ce285a7451"}, + {file = "types_pytz-2025.2.0.20250516.tar.gz", hash = "sha256:e1216306f8c0d5da6dafd6492e72eb080c9a166171fa80dd7a1990fd8be7a7b3"}, ] [[package]] @@ -3830,7 +3685,6 @@ version = "2024.11.6.20250403" description = "Typing stubs for regex" optional = false python-versions = ">=3.9" -groups = ["dev"] files = [ {file = "types_regex-2024.11.6.20250403-py3-none-any.whl", hash = "sha256:e22c0f67d73f4b4af6086a340f387b6f7d03bed8a0bb306224b75c51a29b0001"}, {file = "types_regex-2024.11.6.20250403.tar.gz", hash = "sha256:3fdf2a70bbf830de4b3a28e9649a52d43dabb57cdb18fbfe2252eefb53666665"}, @@ -3838,14 +3692,13 @@ files = [ [[package]] name = "types-requests" -version = "2.32.0.20250306" +version = "2.32.0.20250515" description = "Typing stubs for requests" optional = false python-versions = ">=3.9" -groups = ["dev"] files = [ - {file = "types_requests-2.32.0.20250306-py3-none-any.whl", hash = "sha256:25f2cbb5c8710b2022f8bbee7b2b66f319ef14aeea2f35d80f18c9dbf3b60a0b"}, - {file = "types_requests-2.32.0.20250306.tar.gz", hash = "sha256:0962352694ec5b2f95fda877ee60a159abdf84a0fc6fdace599f20acb41a03d1"}, + {file = "types_requests-2.32.0.20250515-py3-none-any.whl", hash = "sha256:f8eba93b3a892beee32643ff836993f15a785816acca21ea0ffa006f05ef0fb2"}, + {file = "types_requests-2.32.0.20250515.tar.gz", hash = "sha256:09c8b63c11318cb2460813871aaa48b671002e59fda67ca909e9883777787581"}, ] [package.dependencies] @@ -3857,7 +3710,6 @@ version = "4.67.0.20250516" description = "Typing stubs for tqdm" optional = false python-versions = ">=3.9" -groups = ["dev"] files = [ {file = "types_tqdm-4.67.0.20250516-py3-none-any.whl", hash = "sha256:1dd9b2c65273f2342f37e5179bc6982df86b6669b3376efc12aef0a29e35d36d"}, {file = "types_tqdm-4.67.0.20250516.tar.gz", hash = "sha256:230ccab8a332d34f193fc007eb132a6ef54b4512452e718bf21ae0a7caeb5a6b"}, @@ -3868,28 +3720,24 @@ types-requests = "*" [[package]] name = "typing-extensions" -version = "4.12.2" +version = "4.13.2" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" -groups = ["main", "dev", "docs"] files = [ - {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, - {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, + {file = "typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c"}, + {file = "typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef"}, ] -markers = {docs = "python_version == \"3.10\""} [[package]] name = "typing-inspection" -version = "0.4.0" +version = "0.4.1" description = "Runtime typing introspection tools" optional = true python-versions = ">=3.9" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ - {file = "typing_inspection-0.4.0-py3-none-any.whl", hash = "sha256:50e72559fcd2a6367a19f7a7e610e6afcb9fac940c650290eed893d61386832f"}, - {file = "typing_inspection-0.4.0.tar.gz", hash = "sha256:9765c87de36671694a67904bf2c96e395be9c6439bb6c87b5142569dcdd65122"}, + {file = "typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51"}, + {file = "typing_inspection-0.4.1.tar.gz", hash = "sha256:6ae134cc0203c33377d43188d4064e9b357dba58cff3185f22924610e70a9d28"}, ] [package.dependencies] @@ -3901,7 +3749,6 @@ version = "2025.2" description = "Provider of IANA time zone data" optional = false python-versions = ">=2" -groups = ["main"] files = [ {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"}, {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"}, @@ -3909,33 +3756,30 @@ files = [ [[package]] name = "urllib3" -version = "2.3.0" +version = "2.4.0" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" -groups = ["main", "dev", "docs"] files = [ - {file = "urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df"}, - {file = "urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d"}, + {file = "urllib3-2.4.0-py3-none-any.whl", hash = "sha256:4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813"}, + {file = "urllib3-2.4.0.tar.gz", hash = "sha256:414bc6535b787febd7567804cc015fee39daab8ad86268f1310a9250697de466"}, ] -markers = {main = "extra == \"pdf\""} [package.extras] -brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] [[package]] name = "virtualenv" -version = "20.29.3" +version = "20.31.2" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.8" -groups = ["main", "dev"] files = [ - {file = "virtualenv-20.29.3-py3-none-any.whl", hash = "sha256:3e3d00f5807e83b234dfb6122bf37cfadf4be216c53a49ac059d02414f819170"}, - {file = "virtualenv-20.29.3.tar.gz", hash = "sha256:95e39403fcf3940ac45bc717597dba16110b74506131845d9b687d5e73d947ac"}, + {file = "virtualenv-20.31.2-py3-none-any.whl", hash = "sha256:36efd0d9650ee985f0cad72065001e66d49a6f24eb44d98980f630686243cf11"}, + {file = "virtualenv-20.31.2.tar.gz", hash = "sha256:e10c0a9d02835e592521be48b332b6caee6887f332c111aa79a09b9e79efc2af"}, ] [package.dependencies] @@ -3945,7 +3789,7 @@ platformdirs = ">=3.9.1,<5" [package.extras] docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] -test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8) ; platform_python_implementation == \"PyPy\" or platform_python_implementation == \"CPython\" and sys_platform == \"win32\" and python_version >= \"3.13\"", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10) ; platform_python_implementation == \"CPython\""] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] [[package]] name = "watchdog" @@ -3953,7 +3797,6 @@ version = "6.0.0" description = "Filesystem events monitoring" optional = false python-versions = ">=3.9" -groups = ["docs"] files = [ {file = "watchdog-6.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d1cdb490583ebd691c012b3d6dae011000fe42edb7a82ece80965b42abd61f26"}, {file = "watchdog-6.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bc64ab3bdb6a04d69d4023b29422170b74681784ffb9463ed4870cf2f3e66112"}, @@ -3996,8 +3839,6 @@ version = "0.2.13" description = "Measures the displayed width of unicode strings in a terminal" optional = true python-versions = "*" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"}, {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"}, @@ -4009,8 +3850,6 @@ version = "15.0.1" description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" optional = true python-versions = ">=3.9" -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "websockets-15.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d63efaa0cd96cf0c5fe4d581521d9fa87744540d4bc999ae6e08595a1014b45b"}, {file = "websockets-15.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ac60e3b188ec7574cb761b08d50fcedf9d77f1530352db4eef1707fe9dee7205"}, @@ -4087,6 +3926,6 @@ files = [ pdf = ["marker-pdf"] [metadata] -lock-version = "2.1" +lock-version = "2.0" python-versions = ">=3.10,<4" -content-hash = "5a1dee7fcc054de35ca27951eb1cba6fa330a97fcb090de354c4e7cc0741d70c" +content-hash = "125f40ee56b22ffe1aba457ad85a1a8a78ce55fdee49b3b4ee7ba68dfbd903ae" From 9c53e875a40d86053624e8a6db17f20b0b22386f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 May 2025 16:24:44 +0000 Subject: [PATCH 054/125] Bump mkdocstrings-python from 1.16.10 to 1.16.11 Bumps [mkdocstrings-python](https://github.com/mkdocstrings/python) from 1.16.10 to 1.16.11. - [Release notes](https://github.com/mkdocstrings/python/releases) - [Changelog](https://github.com/mkdocstrings/python/blob/main/CHANGELOG.md) - [Commits](https://github.com/mkdocstrings/python/compare/1.16.10...1.16.11) --- updated-dependencies: - dependency-name: mkdocstrings-python dependency-version: 1.16.11 dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- poetry.lock | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index 18feb3ae..0ce8984a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. [[package]] name = "annotated-types" @@ -424,7 +424,7 @@ files = [ {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, ] -markers = {main = "python_version == \"3.10\" and extra == \"pdf\"", dev = "python_version == \"3.10\""} +markers = {main = "extra == \"pdf\" and python_version == \"3.10\"", dev = "python_version == \"3.10\""} [package.extras] test = ["pytest (>=6)"] @@ -1539,14 +1539,14 @@ python-legacy = ["mkdocstrings-python-legacy (>=0.2.1)"] [[package]] name = "mkdocstrings-python" -version = "1.16.10" +version = "1.16.11" description = "A Python handler for mkdocstrings." optional = false python-versions = ">=3.9" groups = ["docs"] files = [ - {file = "mkdocstrings_python-1.16.10-py3-none-any.whl", hash = "sha256:63bb9f01f8848a644bdb6289e86dc38ceddeaa63ecc2e291e3b2ca52702a6643"}, - {file = "mkdocstrings_python-1.16.10.tar.gz", hash = "sha256:f9eedfd98effb612ab4d0ed6dd2b73aff6eba5215e0a65cea6d877717f75502e"}, + {file = "mkdocstrings_python-1.16.11-py3-none-any.whl", hash = "sha256:25d96cc9c1f9c272ea1bd8222c900b5f852bf46c984003e9c7c56eaa4696190f"}, + {file = "mkdocstrings_python-1.16.11.tar.gz", hash = "sha256:935f95efa887f99178e4a7becaaa1286fb35adafffd669b04fd611d97c00e5ce"}, ] [package.dependencies] From beedd68af2c60a2a3db49228a2f0f26948978e85 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 May 2025 16:36:34 +0000 Subject: [PATCH 055/125] Bump ruff from 0.11.10 to 0.11.11 Bumps [ruff](https://github.com/astral-sh/ruff) from 0.11.10 to 0.11.11. - [Release notes](https://github.com/astral-sh/ruff/releases) - [Changelog](https://github.com/astral-sh/ruff/blob/main/CHANGELOG.md) - [Commits](https://github.com/astral-sh/ruff/compare/0.11.10...0.11.11) --- updated-dependencies: - dependency-name: ruff dependency-version: 0.11.11 dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- poetry.lock | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/poetry.lock b/poetry.lock index 0ce8984a..b3ad342f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3180,30 +3180,30 @@ pyasn1 = ">=0.1.3" [[package]] name = "ruff" -version = "0.11.10" +version = "0.11.11" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" groups = ["dev"] files = [ - {file = "ruff-0.11.10-py3-none-linux_armv6l.whl", hash = "sha256:859a7bfa7bc8888abbea31ef8a2b411714e6a80f0d173c2a82f9041ed6b50f58"}, - {file = "ruff-0.11.10-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:968220a57e09ea5e4fd48ed1c646419961a0570727c7e069842edd018ee8afed"}, - {file = "ruff-0.11.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:1067245bad978e7aa7b22f67113ecc6eb241dca0d9b696144256c3a879663bca"}, - {file = "ruff-0.11.10-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4854fd09c7aed5b1590e996a81aeff0c9ff51378b084eb5a0b9cd9518e6cff2"}, - {file = "ruff-0.11.10-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8b4564e9f99168c0f9195a0fd5fa5928004b33b377137f978055e40008a082c5"}, - {file = "ruff-0.11.10-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b6a9cc5b62c03cc1fea0044ed8576379dbaf751d5503d718c973d5418483641"}, - {file = "ruff-0.11.10-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:607ecbb6f03e44c9e0a93aedacb17b4eb4f3563d00e8b474298a201622677947"}, - {file = "ruff-0.11.10-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7b3a522fa389402cd2137df9ddefe848f727250535c70dafa840badffb56b7a4"}, - {file = "ruff-0.11.10-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2f071b0deed7e9245d5820dac235cbdd4ef99d7b12ff04c330a241ad3534319f"}, - {file = "ruff-0.11.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a60e3a0a617eafba1f2e4186d827759d65348fa53708ca547e384db28406a0b"}, - {file = "ruff-0.11.10-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:da8ec977eaa4b7bf75470fb575bea2cb41a0e07c7ea9d5a0a97d13dbca697bf2"}, - {file = "ruff-0.11.10-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:ddf8967e08227d1bd95cc0851ef80d2ad9c7c0c5aab1eba31db49cf0a7b99523"}, - {file = "ruff-0.11.10-py3-none-musllinux_1_2_i686.whl", hash = "sha256:5a94acf798a82db188f6f36575d80609072b032105d114b0f98661e1679c9125"}, - {file = "ruff-0.11.10-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:3afead355f1d16d95630df28d4ba17fb2cb9c8dfac8d21ced14984121f639bad"}, - {file = "ruff-0.11.10-py3-none-win32.whl", hash = "sha256:dc061a98d32a97211af7e7f3fa1d4ca2fcf919fb96c28f39551f35fc55bdbc19"}, - {file = "ruff-0.11.10-py3-none-win_amd64.whl", hash = "sha256:5cc725fbb4d25b0f185cb42df07ab6b76c4489b4bfb740a175f3a59c70e8a224"}, - {file = "ruff-0.11.10-py3-none-win_arm64.whl", hash = "sha256:ef69637b35fb8b210743926778d0e45e1bffa850a7c61e428c6b971549b5f5d1"}, - {file = "ruff-0.11.10.tar.gz", hash = "sha256:d522fb204b4959909ecac47da02830daec102eeb100fb50ea9554818d47a5fa6"}, + {file = "ruff-0.11.11-py3-none-linux_armv6l.whl", hash = "sha256:9924e5ae54125ed8958a4f7de320dab7380f6e9fa3195e3dc3b137c6842a0092"}, + {file = "ruff-0.11.11-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:c8a93276393d91e952f790148eb226658dd275cddfde96c6ca304873f11d2ae4"}, + {file = "ruff-0.11.11-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d6e333dbe2e6ae84cdedefa943dfd6434753ad321764fd937eef9d6b62022bcd"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7885d9a5e4c77b24e8c88aba8c80be9255fa22ab326019dac2356cff42089fc6"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1b5ab797fcc09121ed82e9b12b6f27e34859e4227080a42d090881be888755d4"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e231ff3132c1119ece836487a02785f099a43992b95c2f62847d29bace3c75ac"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:a97c9babe1d4081037a90289986925726b802d180cca784ac8da2bbbc335f709"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d8c4ddcbe8a19f59f57fd814b8b117d4fcea9bee7c0492e6cf5fdc22cfa563c8"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6224076c344a7694c6fbbb70d4f2a7b730f6d47d2a9dc1e7f9d9bb583faf390b"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:882821fcdf7ae8db7a951df1903d9cb032bbe838852e5fc3c2b6c3ab54e39875"}, + {file = "ruff-0.11.11-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:dcec2d50756463d9df075a26a85a6affbc1b0148873da3997286caf1ce03cae1"}, + {file = "ruff-0.11.11-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:99c28505ecbaeb6594701a74e395b187ee083ee26478c1a795d35084d53ebd81"}, + {file = "ruff-0.11.11-py3-none-musllinux_1_2_i686.whl", hash = "sha256:9263f9e5aa4ff1dec765e99810f1cc53f0c868c5329b69f13845f699fe74f639"}, + {file = "ruff-0.11.11-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:64ac6f885e3ecb2fdbb71de2701d4e34526651f1e8503af8fb30d4915a3fe345"}, + {file = "ruff-0.11.11-py3-none-win32.whl", hash = "sha256:1adcb9a18802268aaa891ffb67b1c94cd70578f126637118e8099b8e4adcf112"}, + {file = "ruff-0.11.11-py3-none-win_amd64.whl", hash = "sha256:748b4bb245f11e91a04a4ff0f96e386711df0a30412b9fe0c74d5bdc0e4a531f"}, + {file = "ruff-0.11.11-py3-none-win_arm64.whl", hash = "sha256:6c51f136c0364ab1b774767aa8b86331bd8e9d414e2d107db7a2189f35ea1f7b"}, + {file = "ruff-0.11.11.tar.gz", hash = "sha256:7774173cc7c1980e6bf67569ebb7085989a78a103922fb83ef3dfe230cd0687d"}, ] [[package]] From cc5ec3ab2dcef824287698b6252dd31f79b37569 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 May 2025 16:48:14 +0000 Subject: [PATCH 056/125] Bump pytest-mock from 3.14.0 to 3.14.1 Bumps [pytest-mock](https://github.com/pytest-dev/pytest-mock) from 3.14.0 to 3.14.1. - [Release notes](https://github.com/pytest-dev/pytest-mock/releases) - [Changelog](https://github.com/pytest-dev/pytest-mock/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest-mock/compare/v3.14.0...v3.14.1) --- updated-dependencies: - dependency-name: pytest-mock dependency-version: 3.14.1 dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- poetry.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index b3ad342f..0c961e07 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2646,14 +2646,14 @@ testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"] [[package]] name = "pytest-mock" -version = "3.14.0" +version = "3.14.1" description = "Thin-wrapper around the mock package for easier use with pytest" optional = false python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "pytest-mock-3.14.0.tar.gz", hash = "sha256:2719255a1efeceadbc056d6bf3df3d1c5015530fb40cf347c0f9afac88410bd0"}, - {file = "pytest_mock-3.14.0-py3-none-any.whl", hash = "sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f"}, + {file = "pytest_mock-3.14.1-py3-none-any.whl", hash = "sha256:178aefcd11307d874b4cd3100344e7e2d888d9791a6a1d9bfe90fbc1b74fd1d0"}, + {file = "pytest_mock-3.14.1.tar.gz", hash = "sha256:159e9edac4c451ce77a5cdb9fc5d1100708d2dd4ba3c3df572f14097351af80e"}, ] [package.dependencies] From 0cf98200b277f40cdf3bc3c23327a4efbd29013d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 May 2025 16:59:32 +0000 Subject: [PATCH 057/125] Bump marker-pdf from 1.6.2 to 1.7.3 Bumps [marker-pdf](https://github.com/VikParuchuri/marker) from 1.6.2 to 1.7.3. - [Release notes](https://github.com/VikParuchuri/marker/releases) - [Commits](https://github.com/VikParuchuri/marker/compare/v1.6.2...v1.7.3) --- updated-dependencies: - dependency-name: marker-pdf dependency-version: 1.7.3 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- poetry.lock | 45 ++++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/poetry.lock b/poetry.lock index 0c961e07..82faee69 100644 --- a/poetry.lock +++ b/poetry.lock @@ -283,14 +283,14 @@ markers = {main = "extra == \"pdf\""} [[package]] name = "click" -version = "8.1.8" +version = "8.2.1" description = "Composable command line interface toolkit" optional = false -python-versions = ">=3.7" +python-versions = ">=3.10" groups = ["main", "docs"] files = [ - {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"}, - {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, + {file = "click-8.2.1-py3-none-any.whl", hash = "sha256:61a3265b914e850b85317d0b3109c7f8cd35a670f963866005d6ef1d5175a12b"}, + {file = "click-8.2.1.tar.gz", hash = "sha256:27c491cc05d968d271d5a1db13e3b5a184636d9d930f148c50b038f0d0646202"}, ] [package.dependencies] @@ -413,6 +413,19 @@ files = [ {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"}, ] +[[package]] +name = "einops" +version = "0.8.1" +description = "A new flavour of deep learning operations" +optional = true +python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"pdf\"" +files = [ + {file = "einops-0.8.1-py3-none-any.whl", hash = "sha256:919387eb55330f5757c6bea9165c5ff5cfe63a642682ea788a6d472576d81737"}, + {file = "einops-0.8.1.tar.gz", hash = "sha256:de5d960a7a761225532e0f1959e5315ebeafc0cd43394732f103ca44b9837e84"}, +] + [[package]] name = "exceptiongroup" version = "1.2.2" @@ -1239,20 +1252,20 @@ six = ">=1.15,<2" [[package]] name = "marker-pdf" -version = "1.6.2" +version = "1.7.3" description = "Convert documents to markdown with high speed and accuracy." optional = true python-versions = "<4.0,>=3.10" groups = ["main"] markers = "extra == \"pdf\"" files = [ - {file = "marker_pdf-1.6.2-py3-none-any.whl", hash = "sha256:48fbc6353e6fc3510d30d5682a8974fc9d6eb58a13e7c3f525ed6973b721f108"}, - {file = "marker_pdf-1.6.2.tar.gz", hash = "sha256:38725082c89c0aec5e28e4f1df8f3974ccc0742c2265f0342c20e52fbde90bf0"}, + {file = "marker_pdf-1.7.3-py3-none-any.whl", hash = "sha256:3dbb890dfe383d4d437d55ab97dbc35e2aae1b613a032b0738d1d25cabbd07f4"}, + {file = "marker_pdf-1.7.3.tar.gz", hash = "sha256:ddf297036ccd54d94e2f4a684b71bfe201c19755aa40dd8f2be757a8e631f8b9"}, ] [package.dependencies] anthropic = ">=0.46.0,<0.47.0" -click = ">=8.1.7,<9.0.0" +click = ">=8.2.0,<9.0.0" filetype = ">=1.2.0,<2.0.0" ftfy = ">=6.1.1,<7.0.0" google-genai = ">=1.0.0,<2.0.0" @@ -1268,8 +1281,8 @@ python-dotenv = ">=1.0.0,<2.0.0" rapidfuzz = ">=3.8.1,<4.0.0" regex = ">=2024.4.28,<2025.0.0" scikit-learn = ">=1.6.1,<2.0.0" -surya-ocr = ">=0.13.1,<0.14.0" -torch = ">=2.5.1,<3.0.0" +surya-ocr = ">=0.14.2,<0.15.0" +torch = ">=2.7.0,<3.0.0" tqdm = ">=4.66.1,<5.0.0" transformers = ">=4.45.2,<5.0.0" @@ -3427,29 +3440,31 @@ files = [ [[package]] name = "surya-ocr" -version = "0.13.1" +version = "0.14.2" description = "OCR, layout, reading order, and table recognition in 90+ languages" optional = true python-versions = "<4.0,>=3.10" groups = ["main"] markers = "extra == \"pdf\"" files = [ - {file = "surya_ocr-0.13.1-py3-none-any.whl", hash = "sha256:2704a97f5de625bc747eddf87874635cb8be164c4c9373207a022648325f009d"}, - {file = "surya_ocr-0.13.1.tar.gz", hash = "sha256:af4004448eb8798aeddd4aa709c2f4d3795a3ec7bf12252595b481a65f799a52"}, + {file = "surya_ocr-0.14.2-py3-none-any.whl", hash = "sha256:0c402705c860f8bf98fc2bf2a3b49d7f0e16fba587aed6d3f01bb53bb776d283"}, + {file = "surya_ocr-0.14.2.tar.gz", hash = "sha256:852af681073167beba9a638658c70b81318f1a8f3d558db68dead1b2c391e862"}, ] [package.dependencies] click = ">=8.1.8,<9.0.0" +einops = ">=0.8.1,<0.9.0" filetype = ">=1.2.0,<2.0.0" opencv-python-headless = ">=4.11.0.86,<5.0.0.0" pillow = ">=10.2.0,<11.0.0" platformdirs = ">=4.3.6,<5.0.0" +pre-commit = ">=4.2.0,<5.0.0" pydantic = ">=2.5.3,<3.0.0" pydantic-settings = ">=2.1.0,<3.0.0" pypdfium2 = "4.30.0" python-dotenv = ">=1.0.0,<2.0.0" -torch = ">=2.5.1,<3.0.0" -transformers = ">=4.41.0,<5.0.0" +torch = ">=2.7.0,<3.0.0" +transformers = ">=4.51.2,<5.0.0" [[package]] name = "sympy" From fb77a28b90b1d4d2c903f0b818870349f5d9f257 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 26 May 2025 20:26:59 +0000 Subject: [PATCH 058/125] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.11.9 → v0.11.11](https://github.com/astral-sh/ruff-pre-commit/compare/v0.11.9...v0.11.11) - [github.com/igorshubovych/markdownlint-cli: v0.44.0 → v0.45.0](https://github.com/igorshubovych/markdownlint-cli/compare/v0.44.0...v0.45.0) --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 727474dd..f76ef388 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,7 +21,7 @@ repos: hooks: - id: check-github-workflows - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.11.9 + rev: v0.11.11 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] @@ -33,7 +33,7 @@ repos: exclude: autocorpus/parse_xml.py additional_dependencies: [types-beautifulsoup4, types-regex, lxml-stubs, types-tqdm, types-jsonschema] - repo: https://github.com/igorshubovych/markdownlint-cli - rev: v0.44.0 + rev: v0.45.0 hooks: - id: markdownlint-fix - repo: https://github.com/codespell-project/codespell From 461bb2fc6ba38050423faf6fc3047b4016c1480d Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Tue, 27 May 2025 12:20:39 +0100 Subject: [PATCH 059/125] Added linux and windows-specific dependencies for word processing --- .github/workflows/ci.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a661ff09..abd3d695 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,9 +20,15 @@ jobs: python-version: ['3.10', '3.11', '3.12', '3.13'] steps: - uses: actions/checkout@v4 + - name: Install LibreOffice + if: runner.os == 'Linux' + run: sudo apt-get update && sudo apt-get install -y libreoffice - uses: ./.github/actions/setup with: python-version: ${{ matrix.python-version }} + - name: Install pywin32 on Windows + if: runner.os == 'Windows' + run: poetry add pywin32 - name: Run tests run: poetry run pytest --skip-ci-macos - name: Upload coverage reports to Codecov From 4ac3c39e0b9f6fffa8d818b8e55ac2318fe5d6c2 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Tue, 27 May 2025 12:43:28 +0100 Subject: [PATCH 060/125] Windows word processing needs microsoft office (for now) so this will need to be skipped for windows runners --- .github/workflows/ci.yml | 2 +- tests/test_regression.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index abd3d695..e57f7c3a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -30,7 +30,7 @@ jobs: if: runner.os == 'Windows' run: poetry add pywin32 - name: Run tests - run: poetry run pytest --skip-ci-macos + run: poetry run pytest --skip-ci-macos --skip-ci-windows - name: Upload coverage reports to Codecov if: ${{ matrix.os == 'ubuntu-latest' && matrix.python-version == '3.13' && github.event.pull_request.user.login != 'dependabot[bot]' && github.event.pull_request.user.login != 'pre-commit-ci[bot]' }} uses: codecov/codecov-action@v5 diff --git a/tests/test_regression.py b/tests/test_regression.py index be1f495a..3eb67195 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -119,6 +119,7 @@ def test_pdf_to_bioc( assert new_tables == expected_tables +@pytest.mark.skip_ci_windows @pytest.mark.parametrize( "input_file, config, has_tables", [ From da33631670a5c0802ca64e4cddfa27fcc8888083 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Tue, 27 May 2025 13:37:53 +0100 Subject: [PATCH 061/125] Added windows skip flag --- tests/conftest.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 02d682ae..0e5668fb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -87,6 +87,12 @@ def pytest_addoption(parser): default=False, help="Skip tests that are unable to run in CI on macOS", ) + parser.addoption( + "--skip-ci-windows", + action="store_true", + default=False, + help="Skip tests that are unable to run in CI on Windows", + ) def pytest_configure(config): @@ -98,12 +104,19 @@ def pytest_configure(config): def pytest_collection_modifyitems(config, items): """Fixture to modify test collection based on command line options.""" - if not config.getoption("--skip-ci-macos"): - # `--skip-ci-macos` not given in cli: this is not a CI run + if not config.getoption("--skip-ci-macos") and not config.getoption( + "--skip-ci-windows" + ): + # `--skip-ci-macos` or `--skip-ci-windows` not given in cli: this is not a CI run return skip_ci_macos = pytest.mark.skipif( sys.platform == "darwin", reason="Uses too much memory in CI on MacOS" ) + skip_ci_windows = pytest.mark.skipif( + sys.platform == "win32", reason="Requires Microsoft Word on Windows" + ) for item in items: if "skip_ci_macos" in item.keywords: item.add_marker(skip_ci_macos) + elif "skip_ci_windows" in item.keywords: + item.add_marker(skip_ci_windows) From c8c197303e9d6bdf8ff36599d2840bcd72b9b43e Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Tue, 27 May 2025 13:50:39 +0100 Subject: [PATCH 062/125] mac runner requires Microsoft Word too --- tests/test_regression.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_regression.py b/tests/test_regression.py index 3eb67195..af0e296c 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -119,6 +119,7 @@ def test_pdf_to_bioc( assert new_tables == expected_tables +@pytest.mark.skip_ci_macos @pytest.mark.skip_ci_windows @pytest.mark.parametrize( "input_file, config, has_tables", From 2153b8a907922de3b3a642eeb442162ba87101b9 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Tue, 27 May 2025 14:45:54 +0100 Subject: [PATCH 063/125] Correction for windows skip flag --- tests/conftest.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 0e5668fb..b3c31045 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -98,7 +98,9 @@ def pytest_addoption(parser): def pytest_configure(config): """Fixture to add custom markers to pytest.""" config.addinivalue_line( - "markers", "skip_ci_macos: mark test as unable to run in CI on MacOS" + "markers", + "skip_ci_macos: mark test as unable to run in CI on MacOS", + "skip_ci_windows: mark test as unable to run in CI on Windows", ) From 826ead43afd0560b8d913f3c3ff8f24252582cff Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Tue, 27 May 2025 14:57:08 +0100 Subject: [PATCH 064/125] Attempt no. 999 to push a working skip_ci_windows flag. --- tests/conftest.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index b3c31045..64765f7d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -98,9 +98,10 @@ def pytest_addoption(parser): def pytest_configure(config): """Fixture to add custom markers to pytest.""" config.addinivalue_line( - "markers", - "skip_ci_macos: mark test as unable to run in CI on MacOS", - "skip_ci_windows: mark test as unable to run in CI on Windows", + "markers", "skip_ci_macos: mark test as unable to run in CI on MacOS" + ) + config.addinivalue_line( + "markers", "skip_ci_windows: mark test as unable to run in CI on Windows" ) From de85be250cf37a98b18c5517c2ecccfb8ae0ae94 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Tue, 27 May 2025 15:25:40 +0100 Subject: [PATCH 065/125] altered item marker logic --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 64765f7d..d3918a49 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -121,5 +121,5 @@ def pytest_collection_modifyitems(config, items): for item in items: if "skip_ci_macos" in item.keywords: item.add_marker(skip_ci_macos) - elif "skip_ci_windows" in item.keywords: + if "skip_ci_windows" in item.keywords: item.add_marker(skip_ci_windows) From 1e9a886eba32da654b1cfce770c93017f7c4b89c Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Tue, 20 May 2025 16:59:36 +0100 Subject: [PATCH 066/125] Move read_config out of Autocorpus class --- autocorpus/__main__.py | 5 ++- autocorpus/autocorpus.py | 20 ----------- .../{configs/default_config.py => config.py} | 36 ++++++++++++++----- tests/test_regression.py | 2 +- 4 files changed, 31 insertions(+), 32 deletions(-) rename autocorpus/{configs/default_config.py => config.py} (51%) diff --git a/autocorpus/__main__.py b/autocorpus/__main__.py index 6d4a4818..9511903b 100644 --- a/autocorpus/__main__.py +++ b/autocorpus/__main__.py @@ -7,8 +7,7 @@ from tqdm import tqdm from . import add_file_logger, logger -from .autocorpus import Autocorpus -from .configs.default_config import DefaultConfig +from .config import DefaultConfig, read_config from .inputs import read_file_structure from .run import run_autocorpus @@ -66,7 +65,7 @@ def main(): # Load the config if args.config: - config = Autocorpus.read_config(args.config) + config = read_config(args.config) elif args.default_config: try: config = DefaultConfig[args.default_config].load_config() diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py index 2662fc8f..951f12a3 100644 --- a/autocorpus/autocorpus.py +++ b/autocorpus/autocorpus.py @@ -18,26 +18,6 @@ class Autocorpus: """Parent class for all Auto-CORPus functionality.""" - @staticmethod - def read_config(config_path: str) -> dict[str, Any]: - """Reads a configuration file and returns its content. - - Args: - config_path (str): The path to the configuration file. - - Returns: - dict: The content of the configuration file. - - Raises: - FileNotFoundError: If the configuration file does not exist. - json.JSONDecodeError: If the configuration file is not a valid JSON. - KeyError: If the configuration file does not contain the expected "config" key. - """ - with open(config_path, encoding="utf-8") as f: - ## TODO: validate config file here if possible - content = json.load(f) - return content["config"] - def __soupify_infile(self, fpath: Path): with fpath.open(encoding="utf-8") as fp: soup = BeautifulSoup(fp.read(), "html.parser") diff --git a/autocorpus/configs/default_config.py b/autocorpus/config.py similarity index 51% rename from autocorpus/configs/default_config.py rename to autocorpus/config.py index 0d5d52e1..e36d3171 100644 --- a/autocorpus/configs/default_config.py +++ b/autocorpus/config.py @@ -6,14 +6,34 @@ from typing import Any +def read_config(config_path: str) -> dict[str, Any]: + """Reads a configuration file and returns its content. + + Args: + config_path: The path to the configuration file. + + Returns: + dict: The content of the configuration file. + + Raises: + FileNotFoundError: If the configuration file does not exist. + json.JSONDecodeError: If the configuration file is not a valid JSON. + KeyError: If the configuration file does not contain the expected "config" key. + """ + with open(config_path, encoding="utf-8") as f: + ## TODO: validate config file here if possible + content = json.load(f) + return content["config"] + + class DefaultConfig(Enum): """An enumeration representing different configuration files for various datasets. Attributes: - LEGACY_PMC (str): Configuration file for legacy PMC data (pre-October 2024). - PMC (str): Configuration file for current PMC data. - PLOS_GENETICS (str): Configuration file for PLOS Genetics data. - NATURE_GENETICS (str): Configuration file for Nature Genetics data. + LEGACY_PMC: Configuration file for legacy PMC data (pre-October 2024). + PMC: Configuration file for current PMC data. + PLOS_GENETICS: Configuration file for PLOS Genetics data. + NATURE_GENETICS: Configuration file for Nature Genetics data. Methods: load_config(): @@ -27,14 +47,14 @@ class DefaultConfig(Enum): PLOS_GENETICS = "config_plos_genetics.json" NATURE_GENETICS = "config_nature_genetics.json" - def __init__(self, filename): + def __init__(self, filename: str) -> None: """Initializes the DefaultConfig enum with the given filename. Args: - filename (str): The name of the configuration file to load. + filename: The name of the configuration file to load. """ self._filename = filename - self._config = None # Lazy-loaded cache + self._config: dict[str, Any] = {} # Lazy-loaded cache def load_config(self) -> dict[str, Any]: """Loads the configuration file when first accessed. @@ -42,7 +62,7 @@ def load_config(self) -> dict[str, Any]: Returns: The configuration file as a dictionary. """ - if self._config is None: + if self._config == {}: config_path = resources.files("autocorpus.configs") / self._filename with config_path.open("r", encoding="utf-8") as f_in: self._config = json.load(f_in)["config"] diff --git a/tests/test_regression.py b/tests/test_regression.py index f16a022a..cbec9703 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -6,7 +6,7 @@ import pytest -from autocorpus.configs.default_config import DefaultConfig +from autocorpus.config import DefaultConfig @pytest.mark.parametrize( From bdc02beb5b73396f70a0d99c569e538f8f160612 Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Wed, 21 May 2025 10:44:50 +0100 Subject: [PATCH 067/125] Do not pass Autocorpus object to formatter --- autocorpus/autocorpus.py | 26 +++++++++++++++----------- autocorpus/bioc_documents.py | 24 ++++++++++++++---------- autocorpus/bioc_formatter.py | 12 ++++++++---- autocorpus/configs/__init__.py | 1 - tests/test_regression.py | 2 +- 5 files changed, 38 insertions(+), 27 deletions(-) delete mode 100644 autocorpus/configs/__init__.py diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py index 951f12a3..64c0997a 100644 --- a/autocorpus/autocorpus.py +++ b/autocorpus/autocorpus.py @@ -355,7 +355,7 @@ def process_files( def __init__( self, config: dict[str, Any], - main_text: Path | None = None, + main_text: str = "", linked_tables=None, ): """Utilises the input config file to create valid BioC versions of input HTML journal articles. @@ -374,15 +374,15 @@ def __init__( self.abbreviations = {} self.has_tables = False - def to_bioc(self): + def to_bioc(self) -> dict[str, Any]: """Get the currently loaded bioc as a dict. Returns: (dict): bioc as a dict """ - return get_formatted_bioc_collection(self) + return get_formatted_bioc_collection(self.main_text, self.file_path) - def main_text_to_bioc_json(self): + def main_text_to_bioc_json(self) -> str: """Get the currently loaded main text as BioC JSON. Args: @@ -392,10 +392,12 @@ def main_text_to_bioc_json(self): (str): main text as BioC JSON """ return json.dumps( - get_formatted_bioc_collection(self), indent=2, ensure_ascii=False + get_formatted_bioc_collection(self.main_text, self.file_path), + indent=2, + ensure_ascii=False, ) - def main_text_to_bioc_xml(self): + def main_text_to_bioc_xml(self) -> str: """Get the currently loaded main text as BioC XML. Returns: @@ -403,12 +405,14 @@ def main_text_to_bioc_xml(self): """ collection = BioCJSON.loads( json.dumps( - get_formatted_bioc_collection(self), indent=2, ensure_ascii=False + get_formatted_bioc_collection(self.main_text, self.file_path), + indent=2, + ensure_ascii=False, ) ) return BioCXML.dumps(collection) - def tables_to_bioc_json(self, indent=2): + def tables_to_bioc_json(self, indent: int = 2) -> str: """Get the currently loaded tables as Tables-JSON. Args: @@ -419,7 +423,7 @@ def tables_to_bioc_json(self, indent=2): """ return json.dumps(self.tables, ensure_ascii=False, indent=indent) - def abbreviations_to_bioc_json(self, indent=2): + def abbreviations_to_bioc_json(self, indent: int = 2) -> str: """Get the currently loaded abbreviations as BioC JSON. Args: @@ -430,7 +434,7 @@ def abbreviations_to_bioc_json(self, indent=2): """ return json.dumps(self.abbreviations, ensure_ascii=False, indent=indent) - def to_json(self, indent=2): + def to_json(self, indent: int = 2) -> str: """Get the currently loaded AC object as a dict. Args: @@ -441,7 +445,7 @@ def to_json(self, indent=2): """ return json.dumps(self.to_dict(), ensure_ascii=False, indent=indent) - def to_dict(self): + def to_dict(self) -> dict[str, Any]: """Get the currently loaded AC object as a dict. Returns: diff --git a/autocorpus/bioc_documents.py b/autocorpus/bioc_documents.py index ee9a4d7d..de830cd8 100644 --- a/autocorpus/bioc_documents.py +++ b/autocorpus/bioc_documents.py @@ -6,23 +6,27 @@ from .bioc_passage import BioCPassage -def get_formatted_bioc_document(data_store) -> dict[str, Any]: +def get_formatted_bioc_document( + main_text: dict[str, Any], + file_path: str, +) -> dict[str, Any]: # TODO: Change return type to ac_bioc.BioCDocument """Constructs the BioC document template using the provided data store. Args: - data_store (Autocorpus): Input article data store. + main_text: Input document-level data. + file_path: Path to the input file. Returns: - (dict): BioC document complete populated with passages. + BioC document complete populated with passages. """ # build document passages seen_headings = [] - passages = [BioCPassage.from_title(data_store.main_text["title"], 0).as_dict()] + passages = [BioCPassage.from_title(main_text["title"], 0).as_dict()] offset = 0 # offset for passage start position - if data_store.main_text["title"] not in seen_headings: - offset = len(data_store.main_text["title"]) - seen_headings.append(data_store.main_text["title"]) - for passage in data_store.main_text["paragraphs"]: + if main_text["title"] not in seen_headings: + offset = len(main_text["title"]) + seen_headings.append(main_text["title"]) + for passage in main_text["paragraphs"]: passage_obj = BioCPassage.from_dict(passage, offset) passages.append(passage_obj.as_dict()) offset += len(passage["body"]) @@ -34,8 +38,8 @@ def get_formatted_bioc_document(data_store) -> dict[str, Any]: seen_headings.append(passage["section_heading"]) return { - "id": Path(data_store.file_path).name.split(".")[0], - "inputfile": data_store.file_path, + "id": Path(file_path).name.split(".")[0], + "inputfile": file_path, "infons": {}, "passages": passages, "annotations": [], diff --git a/autocorpus/bioc_formatter.py b/autocorpus/bioc_formatter.py index 4ebb52b4..efe0ff5f 100644 --- a/autocorpus/bioc_formatter.py +++ b/autocorpus/bioc_formatter.py @@ -6,20 +6,24 @@ from autocorpus.bioc_documents import get_formatted_bioc_document -def get_formatted_bioc_collection(input_vals: object) -> dict[str, Any]: +def get_formatted_bioc_collection( + main_text: dict[str, Any], + file_path: str, +) -> dict[str, Any]: # TODO: Change return type to ac_bioc.BioCCollection """Constructs a BioC collection from input document-level data. Args: - input_vals (object): Input document-level data. + main_text: Input document-level data. + file_path: Path to the input file. Returns: - (dict): BioC collection + BioC collection """ bioc_collection = { "source": "Auto-CORPus (full-text)", "date": datetime.today().strftime("%Y%m%d"), "key": "autocorpus_fulltext.key", "infons": {}, - "documents": [get_formatted_bioc_document(input_vals)], + "documents": [get_formatted_bioc_document(main_text, file_path)], } return bioc_collection diff --git a/autocorpus/configs/__init__.py b/autocorpus/configs/__init__.py deleted file mode 100644 index a1b11403..00000000 --- a/autocorpus/configs/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Autocorpus configs package.""" diff --git a/tests/test_regression.py b/tests/test_regression.py index cbec9703..7ed2ad19 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -38,7 +38,7 @@ def test_autocorpus(data_path: Path, input_file: str, config: dict[str, Any]) -> auto_corpus = Autocorpus( config=config, - main_text=pmc_example_path, + main_text=str(pmc_example_path), ) auto_corpus.process_file() From 0c99d6c36d2f81a54b187efdcd10b6a15deab4cd Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Thu, 22 May 2025 18:11:48 +0100 Subject: [PATCH 068/125] Take all methods that don't use self out of the class --- autocorpus/autocorpus.py | 183 ++++++++++++++++++++++++++------------- autocorpus/section.py | 13 +-- autocorpus/utils.py | 24 ++--- 3 files changed, 144 insertions(+), 76 deletions(-) diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py index 64c0997a..6184b1b2 100644 --- a/autocorpus/autocorpus.py +++ b/autocorpus/autocorpus.py @@ -1,10 +1,10 @@ -"""Auto-CORPus primary functions are called from this script, after initialisation with __main__.py.""" +"""Auto-CORPus primary functions are defined in this module.""" import json from pathlib import Path -from typing import Any +from typing import Any, TypedDict, cast -from bs4 import BeautifulSoup +from bs4 import BeautifulSoup, Tag from . import logger from .abbreviation import get_abbreviations @@ -15,49 +15,127 @@ from .utils import handle_not_tables -class Autocorpus: - """Parent class for all Auto-CORPus functionality.""" +def soupify_infile(fpath: Path) -> BeautifulSoup: + """Convert the input file into a BeautifulSoup object. - def __soupify_infile(self, fpath: Path): - with fpath.open(encoding="utf-8") as fp: - soup = BeautifulSoup(fp.read(), "html.parser") - for e in soup.find_all( - attrs={"style": ["display:none", "visibility:hidden"]} - ): - e.extract() - return soup + Args: + fpath: Path to the input file. - def __get_keywords(self, soup, config): - if "keywords" not in config: - return {} + Returns: + BeautifulSoup object of the input file. + """ + with fpath.open(encoding="utf-8") as fp: + soup = BeautifulSoup(fp.read(), "html.parser") + for e in soup.find_all(attrs={"style": ["display:none", "visibility:hidden"]}): + e.extract() + return soup - responses = handle_not_tables(config["keywords"], soup) - if not responses: - return {} - responses = " ".join(x["node"].get_text() for x in responses) - return { - "section_heading": "keywords", - "subsection_heading": "", - "body": responses, - "section_type": [{"iao_name": "keywords section", "iao_id": "IAO:0000630"}], - } +class Keywords(TypedDict, total=False): + """TypedDict for keywords section.""" + + section_heading: str + subsection_heading: str + body: str + section_type: list[dict[str, str]] + + +def get_keywords(soup: BeautifulSoup, config: dict[str, Any]) -> Keywords: + """Extract keywords from the soup object based on the provided configuration. + + Args: + soup: BeautifulSoup object of the HTML file. + config: AC config rules. + + Returns: + dict: Extracted keywords as a dictionary. + """ + if "keywords" not in config: + return {} + + responses = handle_not_tables(config["keywords"], soup) + if not responses: + return {} + + return { + "section_heading": "keywords", + "subsection_heading": "", + "body": " ".join( + x["node"].get_text() for x in responses if isinstance(x["node"], Tag) + ), + "section_type": [{"iao_name": "keywords section", "iao_id": "IAO:0000630"}], + } + + +def get_title(soup: BeautifulSoup, config: dict[str, Any]) -> str: + """Extract the title from the soup object based on the provided configuration. + + Args: + soup: BeautifulSoup object of the HTML file. + config: AC config rules. + + Returns: + Extracted title as a string. + """ + if "title" not in config: + return "" + + titles = handle_not_tables(config["title"], soup) + if not titles: + return "" - def __get_title(self, soup, config): - if "title" not in config: - return "" + node = cast(Tag, titles[0]["node"]) - titles = handle_not_tables(config["title"], soup) - if not titles: - return "" + return node.get_text() - return titles[0]["node"].get_text() - def __get_sections(self, soup, config): - if "sections" not in config: - return [] +def get_sections( + soup: BeautifulSoup, config: dict[str, Any] +) -> list[dict[str, Tag | list[str]]]: + """Extract sections from the soup object based on the provided configuration. - return handle_not_tables(config["sections"], soup) + Args: + soup: Beautiful Soup object of the HTML file. + config: AC config rules. + + Returns: + A list of matches for the provided config rules. Either as a Tag or a list of + strings. + """ + if "sections" not in config: + return [] + + return handle_not_tables(config["sections"], soup) + + +def set_unknown_section_headings(unique_text: list[Keywords]) -> list[Keywords]: + """Set the heading for sections that are not specified in the config. + + Args: + unique_text: List of unique text sections. + + Returns: + A list of unique text sections with unknown headings set to "document part". + """ + paper = {} + for para in unique_text: + if para["section_heading"] != "keywords": + paper[para["section_heading"]] = [ + x["iao_name"] for x in para["section_type"] + ] + + for text in unique_text: + if not text["section_heading"]: + text["section_heading"] = "document part" + text["section_type"] = [ + {"iao_name": "document part", "iao_id": "IAO:0000314"} + ] + + return unique_text + + +class Autocorpus: + """Parent class for all Auto-CORPus functionality.""" def __extract_text(self, soup, config): """Convert beautiful soup object into a python dict object with cleaned main text body. @@ -72,11 +150,11 @@ def __extract_text(self, soup, config): result = {} # Tags of text body to be extracted are hard-coded as p (main text) and span (keywords and refs) - result["title"] = self.__get_title(soup, config) + result["title"] = get_title(soup, config) maintext = [] - if keywords := self.__get_keywords(soup, config): + if keywords := get_keywords(soup, config): maintext.append(keywords) - sections = self.__get_sections(soup, config) + sections = get_sections(soup, config) for sec in sections: maintext.extend(get_section(config, sec)) @@ -89,29 +167,12 @@ def __extract_text(self, soup, config): seen_text.append(text["body"]) unique_text.append(text) - result["paragraphs"] = self.__set_unknown_section_headings(unique_text) + result["paragraphs"] = set_unknown_section_headings(unique_text) return result - def __set_unknown_section_headings(self, unique_text): - paper = {} - for para in unique_text: - if para["section_heading"] != "keywords": - paper[para["section_heading"]] = [ - x["iao_name"] for x in para["section_type"] - ] - - for text in unique_text: - if not text["section_heading"]: - text["section_heading"] = "document part" - text["section_type"] = [ - {"iao_name": "document part", "iao_id": "IAO:0000314"} - ] - - return unique_text - def __process_html_article(self, file: Path): - soup = self.__soupify_infile(file) + soup = soupify_infile(file) self.__process_html_tables(file, soup, self.config) self.main_text = self.__extract_text(soup, self.config) try: @@ -296,7 +357,7 @@ def process_file(self): raise RuntimeError("A valid config file must be loaded.") # handle main_text if self.file_path: - soup = self.__soupify_infile(Path(self.file_path)) + soup = soupify_infile(Path(self.file_path)) self.__process_html_tables(self.file_path, soup, self.config) self.main_text = self.__extract_text(soup, self.config) try: @@ -307,7 +368,7 @@ def process_file(self): logger.error(e) if self.linked_tables: for table_file in self.linked_tables: - soup = self.__soupify_infile(table_file) + soup = soupify_infile(table_file) self.__process_html_tables(table_file, soup, self.config) self.__merge_table_data() if "documents" in self.tables and not self.tables["documents"] == []: diff --git a/autocorpus/section.py b/autocorpus/section.py index 80c75865..86c583bb 100644 --- a/autocorpus/section.py +++ b/autocorpus/section.py @@ -12,7 +12,7 @@ from functools import lru_cache from importlib import resources from itertools import chain -from typing import Any +from typing import Any, cast import nltk from bs4 import BeautifulSoup, Tag @@ -148,7 +148,7 @@ def _get_abbreviations( ) -> str: try: abbreviations_tables = handle_not_tables(abbreviations_config, soup_section) - node = abbreviations_tables[0]["node"] + node = cast(Tag, abbreviations_tables[0]["node"]) abbreviations = {} for tr in node.find_all("tr"): short_form, long_form = (td.get_text() for td in tr.find_all("td")) @@ -225,13 +225,16 @@ def _get_section( ) -> Iterable[SectionChild]: subsections = handle_not_tables(config["sub-sections"], soup_section) paragraphs = [ - para["node"] for para in handle_not_tables(config["paragraphs"], soup_section) + cast(Tag, para["node"]) + for para in handle_not_tables(config["paragraphs"], soup_section) ] tables = [ - table["node"] for table in handle_not_tables(config["tables"], soup_section) + cast(Tag, table["node"]) + for table in handle_not_tables(config["tables"], soup_section) ] figures = [ - figure["node"] for figure in handle_not_tables(config["figures"], soup_section) + cast(Tag, figure["node"]) + for figure in handle_not_tables(config["figures"], soup_section) ] unwanted_paragraphs = list( chain.from_iterable( diff --git a/autocorpus/utils.py b/autocorpus/utils.py index 8ee3b2f0..09fd06e9 100644 --- a/autocorpus/utils.py +++ b/autocorpus/utils.py @@ -6,7 +6,7 @@ from typing import Any import bs4 -from bs4 import BeautifulSoup, NavigableString +from bs4 import BeautifulSoup, NavigableString, Tag from lxml import etree from lxml.html.soupparser import fromstring @@ -129,7 +129,7 @@ def parse_configs(definition): return bs_attrs -def handle_defined_by(config, soup): +def handle_defined_by(config: dict[str, Any], soup: BeautifulSoup) -> list[Tag]: """Retrieve matching nodes for the 'defined-by' config rules. Args: @@ -145,9 +145,11 @@ def handle_defined_by(config, soup): } } node is a bs4 object of a single result derived from bs4.find_all() - data is an object where the results from the config "data" sections is housed. The key is the name of the data - section and the values are all matches found within any of the main matches which match the current data section - definition. The values is the response you get from get_text() on any found nodes, not the nodes themselves. + data is an object where the results from the config "data" sections is housed. + The key is the name of the data section and the values are all matches found + within any of the main matches which match the current data section definition. + The values is the response you get from get_text() on any found nodes, not the + nodes themselves. """ if "defined-by" not in config: quit(f"{config} does not contain the required 'defined-by' key.") @@ -155,7 +157,7 @@ def handle_defined_by(config, soup): seen_text = [] for definition in config["defined-by"]: bs_attrs = parse_configs(definition) - new_matches = [] + new_matches = [] # type: ignore[var-annotated] if bs_attrs["name"] or bs_attrs["attrs"]: new_matches = soup.find_all( bs_attrs["name"] if bs_attrs["name"] else None, @@ -201,8 +203,9 @@ def handle_defined_by(config, soup): def handle_not_tables( - config: dict[str, Any], soup: BeautifulSoup -) -> list[dict[str, Any]]: + config: dict[str, Any], + soup: BeautifulSoup, +) -> list[dict[str, Tag | list[str]]]: """Executes a search on non-table bs4 soup objects based on provided config rules. Args: @@ -210,13 +213,14 @@ def handle_not_tables( soup: BeautifulSoup object containing the input text to search Returns: - Matches for the provided config rules + A list of matches for the provided config rules. Either as a Tag or a list of + strings. """ responses = [] matches = handle_defined_by(config, soup) if "data" in config: for match in matches: - response_addition = {"node": match} + response_addition: dict[str, Tag | list[str]] = {"node": match} for ele in config["data"]: seen_text = set() for definition in config["data"][ele]: From dad61fc2be72e8272013bacddc5314b7625beff5 Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Tue, 27 May 2025 18:50:05 +0100 Subject: [PATCH 069/125] take extract_text out of AC class. Create data_structures.py --- autocorpus/autocorpus.py | 120 ++++++++++++++++------------------ autocorpus/data_structures.py | 26 ++++++++ autocorpus/reference.py | 44 ++++++++++--- autocorpus/section.py | 32 ++------- tests/test_reference.py | 54 +++++++++++++-- 5 files changed, 170 insertions(+), 106 deletions(-) create mode 100644 autocorpus/data_structures.py diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py index 6184b1b2..5ecc5aec 100644 --- a/autocorpus/autocorpus.py +++ b/autocorpus/autocorpus.py @@ -2,7 +2,7 @@ import json from pathlib import Path -from typing import Any, TypedDict, cast +from typing import Any, cast from bs4 import BeautifulSoup, Tag @@ -10,6 +10,7 @@ from .abbreviation import get_abbreviations from .ac_bioc import BioCJSON, BioCXML from .bioc_formatter import get_formatted_bioc_collection +from .data_structures import Paragraph from .section import get_section from .table import get_table_json from .utils import handle_not_tables @@ -31,16 +32,7 @@ def soupify_infile(fpath: Path) -> BeautifulSoup: return soup -class Keywords(TypedDict, total=False): - """TypedDict for keywords section.""" - - section_heading: str - subsection_heading: str - body: str - section_type: list[dict[str, str]] - - -def get_keywords(soup: BeautifulSoup, config: dict[str, Any]) -> Keywords: +def get_keywords(soup: BeautifulSoup, config: dict[str, Any]) -> None | Paragraph: """Extract keywords from the soup object based on the provided configuration. Args: @@ -51,20 +43,20 @@ def get_keywords(soup: BeautifulSoup, config: dict[str, Any]) -> Keywords: dict: Extracted keywords as a dictionary. """ if "keywords" not in config: - return {} + return None responses = handle_not_tables(config["keywords"], soup) if not responses: - return {} + return None - return { - "section_heading": "keywords", - "subsection_heading": "", - "body": " ".join( + return Paragraph( + section_heading="keywords", + subsection_heading="", + body=" ".join( x["node"].get_text() for x in responses if isinstance(x["node"], Tag) ), - "section_type": [{"iao_name": "keywords section", "iao_id": "IAO:0000630"}], - } + section_type=[{"iao_name": "keywords section", "iao_id": "IAO:0000630"}], + ) def get_title(soup: BeautifulSoup, config: dict[str, Any]) -> str: @@ -108,7 +100,7 @@ def get_sections( return handle_not_tables(config["sections"], soup) -def set_unknown_section_headings(unique_text: list[Keywords]) -> list[Keywords]: +def set_unknown_section_headings(unique_text: list[Paragraph]) -> list[Paragraph]: """Set the heading for sections that are not specified in the config. Args: @@ -119,62 +111,62 @@ def set_unknown_section_headings(unique_text: list[Keywords]) -> list[Keywords]: """ paper = {} for para in unique_text: - if para["section_heading"] != "keywords": - paper[para["section_heading"]] = [ - x["iao_name"] for x in para["section_type"] - ] + if para.section_heading != "keywords": + paper[para.section_heading] = [x["iao_name"] for x in para.section_type] for text in unique_text: - if not text["section_heading"]: - text["section_heading"] = "document part" - text["section_type"] = [ - {"iao_name": "document part", "iao_id": "IAO:0000314"} - ] + if not text.section_heading: + text.section_heading = "document part" + text.section_type = [{"iao_name": "document part", "iao_id": "IAO:0000314"}] return unique_text -class Autocorpus: - """Parent class for all Auto-CORPus functionality.""" +def extract_text(soup: BeautifulSoup, config: dict[str, Any]) -> dict[str, Any]: + """Convert BeautifulSoup object into a Python dict with cleaned main text body. + + Args: + soup: BeautifulSoup object of html + config: AC config rules - def __extract_text(self, soup, config): - """Convert beautiful soup object into a python dict object with cleaned main text body. + Return: + dict of the maintext + """ + result: dict[str, Any] = {} + + # Extract tags of text body and hard-code as: + # p (main text) and span (keywords and refs) + result["title"] = get_title(soup, config) + maintext = [] + if keywords := get_keywords(soup, config): + maintext.append(keywords) + sections = get_sections(soup, config) + for sec in sections: + maintext.extend(get_section(config, sec)) + + # filter out the sections which do not contain any info + filtered_text = [x for x in maintext if x] + unique_text = [] + seen_text = [] + for text in filtered_text: + if text.body not in seen_text: + seen_text.append(text.body) + unique_text.append(text) + + result["paragraphs"] = [ + p.as_dict() for p in set_unknown_section_headings(unique_text) + ] + + return result - Args: - soup (bs4.BeautifulSoup): BeautifulSoup object of html - config (dict): AC config rules - Return: - (dict): dict of the maintext - """ - result = {} - - # Tags of text body to be extracted are hard-coded as p (main text) and span (keywords and refs) - result["title"] = get_title(soup, config) - maintext = [] - if keywords := get_keywords(soup, config): - maintext.append(keywords) - sections = get_sections(soup, config) - for sec in sections: - maintext.extend(get_section(config, sec)) - - # filter out the sections which do not contain any info - filtered_text = [x for x in maintext if x] - unique_text = [] - seen_text = [] - for text in filtered_text: - if text["body"] not in seen_text: - seen_text.append(text["body"]) - unique_text.append(text) - - result["paragraphs"] = set_unknown_section_headings(unique_text) - - return result +class Autocorpus: + """Parent class for all Auto-CORPus functionality.""" def __process_html_article(self, file: Path): soup = soupify_infile(file) self.__process_html_tables(file, soup, self.config) - self.main_text = self.__extract_text(soup, self.config) + self.main_text = extract_text(soup, self.config) try: self.abbreviations = get_abbreviations(self.main_text, soup, str(file)) except Exception as e: @@ -359,7 +351,7 @@ def process_file(self): if self.file_path: soup = soupify_infile(Path(self.file_path)) self.__process_html_tables(self.file_path, soup, self.config) - self.main_text = self.__extract_text(soup, self.config) + self.main_text = extract_text(soup, self.config) try: self.abbreviations = get_abbreviations( self.main_text, soup, self.file_path diff --git a/autocorpus/data_structures.py b/autocorpus/data_structures.py new file mode 100644 index 00000000..037fa953 --- /dev/null +++ b/autocorpus/data_structures.py @@ -0,0 +1,26 @@ +"""Module to define common data structures for autocorpus.""" + +from dataclasses import asdict, dataclass +from typing import Any + + +@dataclass +class Paragraph: + """A paragraph for a section of the article.""" + + section_heading: str + subsection_heading: str + body: str + section_type: list[dict[str, str]] + + def as_dict(self) -> dict[str, Any]: + """Return the dictionary representation of the Paragraph.""" + return asdict(self) + + +@dataclass(frozen=True) +class SectionChild: + """A child node in the section.""" + + subheading: str + body: str diff --git a/autocorpus/reference.py b/autocorpus/reference.py index 688dd6ce..48f657e0 100644 --- a/autocorpus/reference.py +++ b/autocorpus/reference.py @@ -1,11 +1,33 @@ """Use regular expression for searching/replacing reference strings.""" import re +from dataclasses import dataclass from typing import Any +from .data_structures import Paragraph -def get_references(reference: dict[str, Any], section_heading: str) -> dict[str, Any]: - """Retrieve a structured reference dictionary from a BeautifulSoup object and section heading. + +@dataclass +class ReferencesParagraph(Paragraph): + """A paragraph for the references section of the article.""" + + title: str = "" + journal: str = "" + volume: str = "" + + def as_dict(self) -> dict[str, Any]: + """Return the dictionary representation of the ReferencesParagraph.""" + return { + k: v + for k, v in super().as_dict().items() + if v or k not in ("title", "journal", "volume") + } + + +def get_references( + reference: dict[str, Any], section_heading: str +) -> ReferencesParagraph: + """Retrieve a structured reference dictionary from a BS4 object and section heading. Args: reference: dictionary containing the references node @@ -15,13 +37,15 @@ def get_references(reference: dict[str, Any], section_heading: str) -> dict[str, """ text = reference["node"].get_text().replace("Go to:", "").replace("\n", "") text = re.sub(r"\s{2,}", " ", text) - ref_section = { - "section_heading": section_heading, - "subsection_heading": "", - "body": text, - "section_type": [{"iao_name": "references section", "iao_id": "IAO:0000320"}], - } - - ref_section |= {k: ". ".join(v) for k, v in reference.items() if k != "node"} + ref_section = ReferencesParagraph( + section_heading, + "", + text, + [{"iao_name": "references section", "iao_id": "IAO:0000320"}], + ) + + for k, v in reference.items(): + if k != "node": + setattr(ref_section, k, ". ".join(v)) return ref_section diff --git a/autocorpus/section.py b/autocorpus/section.py index 86c583bb..9b51a762 100644 --- a/autocorpus/section.py +++ b/autocorpus/section.py @@ -8,7 +8,6 @@ import re from collections.abc import Iterable -from dataclasses import asdict, dataclass from functools import lru_cache from importlib import resources from itertools import chain @@ -19,7 +18,8 @@ from fuzzywuzzy import fuzz from . import logger -from .reference import get_references +from .data_structures import Paragraph, SectionChild +from .reference import ReferencesParagraph, get_references from .utils import handle_not_tables @@ -131,18 +131,6 @@ def get_iao_term_to_id_mapping(iao_term: str) -> dict[str, str]: return {"iao_name": iao_term, "iao_id": mapping_result_id_version} -@dataclass -class Paragraph: - """A paragraph for a section of the article.""" - - section_heading: str - subsection_heading: str - body: str - section_type: list[dict[str, str]] - - as_dict = asdict - - def _get_abbreviations( abbreviations_config: dict[str, Any], soup_section: BeautifulSoup ) -> str: @@ -161,7 +149,7 @@ def _get_abbreviations( def _get_references( config: dict[str, Any], section_heading: str, soup_section: BeautifulSoup -) -> Iterable[dict[str, Any]]: +) -> Iterable[ReferencesParagraph]: """Constructs the article references using the provided configuration file. Args: @@ -174,14 +162,6 @@ def _get_references( yield get_references(ref, section_heading) -@dataclass(frozen=True) -class SectionChild: - """A child node in the section.""" - - subheading: str - body: str - - def _navigate_children( subheading: str, soup_sections: list[Tag], @@ -248,7 +228,7 @@ def _get_section( def get_section( config: dict[str, dict[str, Any]], section_dict: dict[str, Any] -) -> Iterable[dict[str, Any]]: +) -> Iterable[Paragraph]: """Identifies a section using the provided configuration. Args: @@ -265,7 +245,7 @@ def get_section( abbreviations_config, section_dict["node"] ) for body in abbreviations: - yield Paragraph(section_heading, "", body, section_type).as_dict() + yield Paragraph(section_heading, "", body, section_type) return if { @@ -281,4 +261,4 @@ def get_section( child.subheading, child.body, section_type, - ).as_dict() + ) diff --git a/tests/test_reference.py b/tests/test_reference.py index dbfbdfd8..cbc02fbd 100644 --- a/tests/test_reference.py +++ b/tests/test_reference.py @@ -5,18 +5,60 @@ def test_references() -> None: """A regression test for the references section of the PMC example.""" - from autocorpus.reference import get_references + from autocorpus.reference import ReferencesParagraph, get_references node = MagicMock() node.get_text.return_value = "NODE TEXT\n" - ref = {"node": node, "a": ["A"], "b": ["B", "C"]} - expected = { + ref = {"node": node, "title": ["A"], "journal": ["B", "C"], "volume": ["1"]} + expected = ReferencesParagraph( + section_heading="References", + subsection_heading="", + body="NODE TEXT", + section_type=[{"iao_name": "references section", "iao_id": "IAO:0000320"}], + title="A", + journal="B. C", + volume="1", + ) + + assert expected == get_references(ref, "References") + + +def test_references_paragraph_as_dict() -> None: + """Test the ReferencesParagraph dataclass.""" + from autocorpus.reference import ReferencesParagraph + + ref = ReferencesParagraph( + section_heading="References", + subsection_heading="", + body="NODE TEXT", + section_type=[{"iao_name": "references section", "iao_id": "IAO:0000320"}], + title="A", + journal="B. C", + volume="1", + ) + + assert ref.as_dict() == { "section_heading": "References", "subsection_heading": "", "body": "NODE TEXT", "section_type": [{"iao_name": "references section", "iao_id": "IAO:0000320"}], - "a": "A", - "b": "B. C", + "title": "A", + "journal": "B. C", + "volume": "1", } - assert expected == get_references(ref, "References") + ref = ReferencesParagraph( + section_heading="References", + subsection_heading="", + body="NODE TEXT", + section_type=[{"iao_name": "references section", "iao_id": "IAO:0000320"}], + title="A", + ) + + assert ref.as_dict() == { + "section_heading": "References", + "subsection_heading": "", + "body": "NODE TEXT", + "section_type": [{"iao_name": "references section", "iao_id": "IAO:0000320"}], + "title": "A", + } From 4f206972928e1cc51cb7528ae8e8b3224df49e47 Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Wed, 28 May 2025 17:14:02 +0100 Subject: [PATCH 070/125] Extract soup and tables. Use pathlib object --- autocorpus/abbreviation.py | 8 ++--- autocorpus/autocorpus.py | 70 ++++++++++++++++++++---------------- autocorpus/bioc_documents.py | 6 ++-- autocorpus/bioc_formatter.py | 3 +- autocorpus/table.py | 24 +++++++------ tests/test_regression.py | 6 ++-- 6 files changed, 64 insertions(+), 53 deletions(-) diff --git a/autocorpus/abbreviation.py b/autocorpus/abbreviation.py index 9b00ed01..c636ba88 100644 --- a/autocorpus/abbreviation.py +++ b/autocorpus/abbreviation.py @@ -398,7 +398,7 @@ def _extract_abbreviations( def _biocify_abbreviations( - abbreviations: _AbbreviationsDict, file_path: str + abbreviations: _AbbreviationsDict, file_path: Path ) -> dict[str, Any]: passages = [] for short, long in abbreviations.items(): @@ -416,8 +416,8 @@ def _biocify_abbreviations( "key": "autocorpus_abbreviations.key", "documents": [ { - "id": Path(file_path).name.partition(".")[0], - "inputfile": file_path, + "id": file_path.name.partition(".")[0], + "inputfile": str(file_path), "passages": passages, } ], @@ -425,7 +425,7 @@ def _biocify_abbreviations( def get_abbreviations( - main_text: dict[str, Any], soup: BeautifulSoup, file_path: str + main_text: dict[str, Any], soup: BeautifulSoup, file_path: Path ) -> dict[str, Any]: """Extract abbreviations from the input main text. diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py index 5ecc5aec..3af15cb3 100644 --- a/autocorpus/autocorpus.py +++ b/autocorpus/autocorpus.py @@ -163,28 +163,41 @@ def extract_text(soup: BeautifulSoup, config: dict[str, Any]) -> dict[str, Any]: class Autocorpus: """Parent class for all Auto-CORPus functionality.""" - def __process_html_article(self, file: Path): - soup = soupify_infile(file) - self.__process_html_tables(file, soup, self.config) + def __process_html_article(self, file_path: Path): + soup = self._extract_soup_and_tables(file_path) self.main_text = extract_text(soup, self.config) try: - self.abbreviations = get_abbreviations(self.main_text, soup, str(file)) + self.abbreviations = get_abbreviations(self.main_text, soup, file_path) except Exception as e: logger.error(e) - def __process_html_tables(self, file_path, soup, config): - """Extract data from tables in the HTML file. + def _extract_soup_and_tables(self, file_path: Path) -> BeautifulSoup: + """Extract the soup from the html file and assign tables to self.tables. Args: - file_path (str): path to the main text file - soup (bs4.BeautifulSoup): soup object - config (dict): dict of the maintext + file_path: The html file path to be processed. + + Returns: + The BeautifulSoup object of the html file. """ - if "tables" not in config: - return + soup = soupify_infile(file_path) + if "tables" in self.config: + tables, empty_tables = get_table_json(soup, self.config, file_path) + self._update_table_ids(tables, empty_tables) + return soup + def _update_table_ids( + self, tables: dict[str, Any], empty_tables: list[dict[str, Any]] + ): + """Update the table IDs in the new tables to avoid conflicts with existing ones. + + Args: + tables: New tables dictionary to be updated in line with self.tables. + empty_tables: New empty tables list to add to self.empty_tables. + """ if not self.tables: - self.tables, self.empty_tables = get_table_json(soup, config, file_path) + self.tables: dict[str, Any] = tables + self.empty_tables = empty_tables return seen_ids = set() @@ -194,8 +207,7 @@ def __process_html_tables(self, file_path, soup, config): else: seen_ids.add(tab["id"]) - tmp_tables, tmp_empty = get_table_json(soup, config, file_path) - for tabl in tmp_tables["documents"]: + for tabl in tables["documents"]: if "." in tabl["id"]: tabl_id = tabl["id"].split(".")[0] tabl_pos = ".".join(tabl["id"].split(".")[1:]) @@ -209,8 +221,9 @@ def __process_html_tables(self, file_path, soup, config): else: tabl["id"] = tabl_id seen_ids.add(tabl_id) - self.tables["documents"].extend(tmp_tables["documents"]) - self.empty_tables.extend(tmp_empty) + + self.tables["documents"].extend(tables["documents"]) + self.empty_tables.extend(empty_tables) def __merge_table_data(self): if not self.empty_tables: @@ -347,21 +360,16 @@ def process_file(self): """ if not self.config: raise RuntimeError("A valid config file must be loaded.") - # handle main_text - if self.file_path: - soup = soupify_infile(Path(self.file_path)) - self.__process_html_tables(self.file_path, soup, self.config) - self.main_text = extract_text(soup, self.config) - try: - self.abbreviations = get_abbreviations( - self.main_text, soup, self.file_path - ) - except Exception as e: - logger.error(e) + + soup = self._extract_soup_and_tables(self.file_path) + self.main_text = extract_text(soup, self.config) + try: + self.abbreviations = get_abbreviations(self.main_text, soup, self.file_path) + except Exception as e: + logger.error(e) if self.linked_tables: for table_file in self.linked_tables: - soup = soupify_infile(table_file) - self.__process_html_tables(table_file, soup, self.config) + soup = self._extract_soup_and_tables(table_file) self.__merge_table_data() if "documents" in self.tables and not self.tables["documents"] == []: self.has_tables = True @@ -408,7 +416,7 @@ def process_files( def __init__( self, config: dict[str, Any], - main_text: str = "", + main_text: Path, linked_tables=None, ): """Utilises the input config file to create valid BioC versions of input HTML journal articles. @@ -418,7 +426,7 @@ def __init__( main_text (Path): path to the main text of the article (HTML files only) linked_tables (list): list of linked table file paths to be included in this run (HTML files only) """ - self.file_path = str(main_text) + self.file_path = main_text self.linked_tables = linked_tables self.config = config self.main_text = {} diff --git a/autocorpus/bioc_documents.py b/autocorpus/bioc_documents.py index de830cd8..2916ec3b 100644 --- a/autocorpus/bioc_documents.py +++ b/autocorpus/bioc_documents.py @@ -8,7 +8,7 @@ def get_formatted_bioc_document( main_text: dict[str, Any], - file_path: str, + file_path: Path, ) -> dict[str, Any]: # TODO: Change return type to ac_bioc.BioCDocument """Constructs the BioC document template using the provided data store. @@ -38,8 +38,8 @@ def get_formatted_bioc_document( seen_headings.append(passage["section_heading"]) return { - "id": Path(file_path).name.split(".")[0], - "inputfile": file_path, + "id": file_path.name.split(".")[0], + "inputfile": str(file_path), "infons": {}, "passages": passages, "annotations": [], diff --git a/autocorpus/bioc_formatter.py b/autocorpus/bioc_formatter.py index efe0ff5f..a3882c62 100644 --- a/autocorpus/bioc_formatter.py +++ b/autocorpus/bioc_formatter.py @@ -1,6 +1,7 @@ """Top-level BioC collection builder script.""" from datetime import datetime +from pathlib import Path from typing import Any from autocorpus.bioc_documents import get_formatted_bioc_document @@ -8,7 +9,7 @@ def get_formatted_bioc_collection( main_text: dict[str, Any], - file_path: str, + file_path: Path, ) -> dict[str, Any]: # TODO: Change return type to ac_bioc.BioCCollection """Constructs a BioC collection from input document-level data. diff --git a/autocorpus/table.py b/autocorpus/table.py index 35dc42d9..9945e7ac 100644 --- a/autocorpus/table.py +++ b/autocorpus/table.py @@ -101,7 +101,8 @@ def __table_to_2d(t: BeautifulSoup) -> list[list[str]]: def __check_superrow(cells: list[str]) -> bool: """Check if the current row is a superrow. - Superrows contain cells that are split and contain more values than other cells on the same row. + Superrows contain cells that are split and contain more values than other cells on + the same row. Args: cells: Cells in row @@ -372,30 +373,33 @@ def __format_table_bioc(table_json, table_identifier, file_path): def get_table_json( - soup: BeautifulSoup, config: dict[str, Any], file_name: str + soup: BeautifulSoup, config: dict[str, Any], file_path: Path ) -> tuple[dict[str, Any], list[dict[str, Any]]]: - """Extracts and processes tables from an HTML document using BeautifulSoup and a configuration dictionary. + """Extracts and processes tables from an HTML document. + + This is done using BeautifulSoup and a configuration dictionary. The function performs the following steps: 1. Extracts tables from the HTML document based on the provided configuration. 2. Removes empty tables and tables with specific classes (e.g., "table-group"). 3. Identifies and processes table headers, superrows, and subheaders. - 4. Converts tables into a 2D format and processes cell data types (e.g., numeric, text, mixed). + 4. Converts tables into a 2D format and processes cell data types (e.g., numeric, + text, mixed). 5. Converts the processed table data into a JSON-compatible format. 6. Merges headers and formats the final table data for output. Args: soup: A BeautifulSoup object representing the parsed HTML document. config: A dictionary containing configuration options for table processing. - file_name: The file name or path of the HTML document being processed. + file_path: The file name or path of the HTML document being processed. Returns: - A dictionary containing the processed table data in JSON format and a list of dictionaries representing empty tables. + A dictionary containing the processed table data in JSON format and a list of + dictionaries representing empty tables. """ soup_tables: list[dict[str, Any]] = handle_tables(config["tables"], soup) - file_path: str = file_name - file_name = Path(file_name).name + file_name = file_path.name table_identifier: str | None = None if re.search(r"_table_\d+\.html", file_name): table_identifier = file_name.split("/")[-1].split("_")[-1].split(".")[0] @@ -412,7 +416,7 @@ def get_table_json( pop_list.append(i) empty_tables.append(table) soup_tables = [table for i, table in enumerate(soup_tables) if i not in pop_list] - empty_tables = [] + for etable in empty_tables: # has a table element, not empty if not etable["node"].find("table"): @@ -564,5 +568,5 @@ def get_table_json( tables += cur_table table_json = {"tables": tables} - table_json = __format_table_bioc(table_json, table_identifier, file_path) + table_json = __format_table_bioc(table_json, table_identifier, str(file_path)) return table_json, empty_tables diff --git a/tests/test_regression.py b/tests/test_regression.py index 7ed2ad19..199ea3e5 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -36,10 +36,7 @@ def test_autocorpus(data_path: Path, input_file: str, config: dict[str, Any]) -> ) as f: expected_tables = json.load(f) - auto_corpus = Autocorpus( - config=config, - main_text=str(pmc_example_path), - ) + auto_corpus = Autocorpus(config=config, main_text=pmc_example_path) auto_corpus.process_file() @@ -87,6 +84,7 @@ def test_pdf_to_bioc(data_path: Path, input_file: str, config: dict[str, Any]) - ac = Autocorpus( config=config, + main_text=pdf_path, ) ac.process_files(files=[pdf_path]) From e17b419917d593ee0fe135773d2d20b8d2a88ab1 Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Wed, 28 May 2025 19:32:06 +0100 Subject: [PATCH 071/125] Completely change entrypoint for AC class --- autocorpus/autocorpus.py | 206 ++++++++++++++++++++------------------- autocorpus/pdf.py | 23 ++--- autocorpus/run.py | 15 ++- tests/test_regression.py | 25 ++--- 4 files changed, 135 insertions(+), 134 deletions(-) diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py index 3af15cb3..3b10bcea 100644 --- a/autocorpus/autocorpus.py +++ b/autocorpus/autocorpus.py @@ -1,6 +1,7 @@ """Auto-CORPus primary functions are defined in this module.""" import json +from collections.abc import Iterable from pathlib import Path from typing import Any, cast @@ -163,14 +164,6 @@ def extract_text(soup: BeautifulSoup, config: dict[str, Any]) -> dict[str, Any]: class Autocorpus: """Parent class for all Auto-CORPus functionality.""" - def __process_html_article(self, file_path: Path): - soup = self._extract_soup_and_tables(file_path) - self.main_text = extract_text(soup, self.config) - try: - self.abbreviations = get_abbreviations(self.main_text, soup, file_path) - except Exception as e: - logger.error(e) - def _extract_soup_and_tables(self, file_path: Path) -> BeautifulSoup: """Extract the soup from the html file and assign tables to self.tables. @@ -225,7 +218,7 @@ def _update_table_ids( self.tables["documents"].extend(tables["documents"]) self.empty_tables.extend(empty_tables) - def __merge_table_data(self): + def _merge_table_data(self): if not self.empty_tables: return @@ -319,29 +312,16 @@ def __merge_table_data(self): } ) - def __process_supplementary_file(self, file: Path): - match file.suffix: - case ".html" | ".htm": - self.__process_html_article(file) - case ".xml": - pass - case ".pdf": - try: - from .pdf import extract_pdf_content - - extract_pdf_content(file) - except ModuleNotFoundError: - logger.error( - "Could not load necessary PDF packages. " - "If you installed Auto-CORPUS via pip, you can obtain these with:\n" - " pip install autocorpus[pdf]" - ) - raise - case _: - pass + def _extract_html_article(self, file_path: Path): + soup = self._extract_soup_and_tables(file_path) + self.main_text = extract_text(soup, self.config) + try: + self.abbreviations = get_abbreviations(self.main_text, soup, file_path) + except Exception as e: + logger.error(e) - def process_file(self): - """Processes the files specified in the configuration. + def process_html_article(self): + """Processes the main text file and tables specified in the configuration. This method performs the following steps: 1. Checks if a valid configuration is loaded. If not, raises a RuntimeError. @@ -353,7 +333,8 @@ def process_file(self): 3. Processes linked tables, if any: - Parses the HTML content of each linked table file. 4. Merges table data. - 5. Checks if there are any documents in the tables and sets the `has_tables` attribute accordingly. + 5. Checks if there are any documents in the tables and sets the `has_tables` + attribute accordingly. Raises: RuntimeError: If no valid configuration is loaded. @@ -361,72 +342,59 @@ def process_file(self): if not self.config: raise RuntimeError("A valid config file must be loaded.") - soup = self._extract_soup_and_tables(self.file_path) - self.main_text = extract_text(soup, self.config) - try: - self.abbreviations = get_abbreviations(self.main_text, soup, self.file_path) - except Exception as e: - logger.error(e) - if self.linked_tables: - for table_file in self.linked_tables: - soup = self._extract_soup_and_tables(table_file) - self.__merge_table_data() - if "documents" in self.tables and not self.tables["documents"] == []: - self.has_tables = True - - def process_files( - self, - files: list[Path | str] = [], - dir_path: Path | str = "", - linked_tables: list[Path | str] = [], - ): - """Processes main text files provided and nested supplementary files. + self._extract_html_article(self.file_path) + for table_file in self.linked_tables: + self._extract_soup_and_tables(table_file) + self._merge_table_data() + self.has_tables = bool(self.tables.get("documents")) + + def _process_file(self): + """Process the input file based on its type. + + This method checks the file extension and processes the file accordingly. Raises: - RuntimeError: If no valid configuration is provided. + NotImplementedError: _description_ """ - # Either a list of specific files or a directory path must be provided. - if not (files or dir_path): - logger.error("No files or directory provided.") - raise FileNotFoundError("No files or directory provided.") - # - if dir_path: - # Path is the preferred type, users can also provide a string though - if isinstance(dir_path, str): - dir_path = Path(dir_path) - for file in dir_path.iterdir(): - if file.is_file() and file.suffix in [".html", ".htm"]: - self.__process_html_article(file) - elif file.is_dir(): - # recursively process all files in the subdirectory - for sub_file in file.rglob("*"): - self.__process_supplementary_file(sub_file) - - # process any specific files provided - for specific_file in files: - # Path is the preferred type, users can also provide a string though - if isinstance(specific_file, str): - specific_file = Path(specific_file) - if specific_file.is_file() and specific_file.suffix in [".html", ".htm"]: - self.__process_html_article(specific_file) - else: - # process any specific files provided - self.__process_supplementary_file(specific_file) + match self.file_path.suffix: + case ".html" | ".htm": + self.process_html_article() + case ".xml": + raise NotImplementedError("XML processing is not implemented yet.") + case ".pdf": + try: + from .pdf import extract_pdf_content + + text, tables = extract_pdf_content(self.file_path) + + self.main_text = text.to_dict() + self.tables = tables.to_dict() + + except ModuleNotFoundError: + logger.error( + "Could not load necessary PDF packages. If you installed " + "Auto-CORPUS via pip, you can obtain these with:\n" + " pip install autocorpus[pdf]" + ) + raise + case _: + pass def __init__( self, config: dict[str, Any], - main_text: Path, - linked_tables=None, + file_path: Path, + linked_tables: list[Path] = [], ): - """Utilises the input config file to create valid BioC versions of input HTML journal articles. + """Create valid BioC versions of input HTML journal articles based off config. Args: - config (dict): configuration file for the input HTML journal articles - main_text (Path): path to the main text of the article (HTML files only) - linked_tables (list): list of linked table file paths to be included in this run (HTML files only) + config: Configuration dictionary for the input journal articles + file_path: Path to the article file to be processed + linked_tables: list of linked table file paths to be included in this run + (HTML files only) """ - self.file_path = main_text + self.file_path = file_path self.linked_tables = linked_tables self.config = config self.main_text = {} @@ -435,11 +403,13 @@ def __init__( self.abbreviations = {} self.has_tables = False + self._process_file() + def to_bioc(self) -> dict[str, Any]: """Get the currently loaded bioc as a dict. Returns: - (dict): bioc as a dict + bioc as a dict """ return get_formatted_bioc_collection(self.main_text, self.file_path) @@ -447,10 +417,10 @@ def main_text_to_bioc_json(self) -> str: """Get the currently loaded main text as BioC JSON. Args: - indent (int): level of indentation + indent: level of indentation Returns: - (str): main text as BioC JSON + main text as BioC JSON """ return json.dumps( get_formatted_bioc_collection(self.main_text, self.file_path), @@ -462,7 +432,7 @@ def main_text_to_bioc_xml(self) -> str: """Get the currently loaded main text as BioC XML. Returns: - (str): main text as BioC XML + main text as BioC XML """ collection = BioCJSON.loads( json.dumps( @@ -477,10 +447,10 @@ def tables_to_bioc_json(self, indent: int = 2) -> str: """Get the currently loaded tables as Tables-JSON. Args: - indent (int): level of indentation + indent: level of indentation Returns: - (str): tables as Tables-JSON + tables as Tables-JSON """ return json.dumps(self.tables, ensure_ascii=False, indent=indent) @@ -488,10 +458,10 @@ def abbreviations_to_bioc_json(self, indent: int = 2) -> str: """Get the currently loaded abbreviations as BioC JSON. Args: - indent (int): level of indentation + indent: level of indentation Returns: - (str): abbreviations as BioC JSON + abbreviations as BioC JSON """ return json.dumps(self.abbreviations, ensure_ascii=False, indent=indent) @@ -499,10 +469,10 @@ def to_json(self, indent: int = 2) -> str: """Get the currently loaded AC object as a dict. Args: - indent (int): Level of indentation. + indent: Level of indentation. Returns: - (str): AC object as a JSON string + AC object as a JSON string """ return json.dumps(self.to_dict(), ensure_ascii=False, indent=indent) @@ -510,10 +480,50 @@ def to_dict(self) -> dict[str, Any]: """Get the currently loaded AC object as a dict. Returns: - (dict): AC object as a dict + AC object as a dict """ return { "main_text": self.main_text, "abbreviations": self.abbreviations, "tables": self.tables, } + + +def process_directory(config: dict[str, Any], dir_path: Path) -> Iterable[Autocorpus]: + """Process all files in a directory and its subdirectories. + + Args: + config: Configuration dictionary for the input HTML journal articles + dir_path: Path to the directory containing files to be processed. + + Returns: + A generator yielding Autocorpus objects for each processed file. + """ + for file_path in dir_path.iterdir(): + if file_path.is_file(): + yield Autocorpus(config, file_path) + + elif file_path.is_dir(): + # recursively process all files in the subdirectory + for sub_file_path in file_path.rglob("*"): + yield Autocorpus(config, sub_file_path) + + +def process_files(config: dict[str, Any], files: list[Path]) -> Iterable[Autocorpus]: + """Process all files in a list. + + Args: + config: Configuration dictionary for the input HTML journal articles + files: list of Paths to the files to be processed. + + Returns: + A generator yielding Autocorpus objects for each processed file. + + Raises: + RuntimeError: If the list of files is invalid. + """ + if not all(file.is_file() for file in files): + raise RuntimeError("All files must be valid file paths.") + + for file_path in files: + yield Autocorpus(config, file_path) diff --git a/autocorpus/pdf.py b/autocorpus/pdf.py index b5496396..870316fb 100644 --- a/autocorpus/pdf.py +++ b/autocorpus/pdf.py @@ -12,8 +12,8 @@ from autocorpus.bioc_supplementary import BioCTableConverter, BioCTextConverter from . import logger -from .ac_bioc import BioCJSON -from .ac_bioc.bioctable.json import BioCTableJSON +from .ac_bioc import BioCCollection +from .ac_bioc.bioctable.json import BioCTableCollection _pdf_converter: PdfConverter | None = None @@ -35,7 +35,7 @@ def _get_pdf_converter() -> PdfConverter | None: def extract_pdf_content( file_path: Path, -) -> bool: +) -> tuple[BioCCollection, BioCTableCollection]: """Extracts content from a PDF file. Args: @@ -43,13 +43,17 @@ def extract_pdf_content( Returns: bool: success status of the extraction process. + + Raises: + RuntimeError: If the PDF converter is not initialized. """ bioc_text, bioc_tables = None, None pdf_converter = _get_pdf_converter() if not pdf_converter: - logger.error("PDF converter not initialized.") - return False + message = "PDF converter not initialized." + logger.error(message) + raise RuntimeError(message) # extract text from PDF rendered = pdf_converter(str(file_path)) @@ -60,14 +64,7 @@ def extract_pdf_content( bioc_text = BioCTextConverter.build_bioc(text, str(file_path), "pdf") bioc_tables = BioCTableConverter.build_bioc(tables, str(file_path)) - out_filename = str(file_path).replace(".pdf", ".pdf_bioc.json") - with open(out_filename, "w", encoding="utf-8") as f: - BioCJSON.dump(bioc_text, f, indent=4) - - out_table_filename = str(file_path).replace(".pdf", ".pdf_tables.json") - with open(out_table_filename, "w", encoding="utf-8") as f: - BioCTableJSON.dump(bioc_tables, f, indent=4) - return True + return bioc_text, bioc_tables def _split_text_and_tables(text: str) -> tuple[list[str], list[list[str]]]: diff --git a/autocorpus/run.py b/autocorpus/run.py index 3d4b7649..85225a9c 100644 --- a/autocorpus/run.py +++ b/autocorpus/run.py @@ -16,12 +16,10 @@ def run_autocorpus(config, structure, key, output_format): """ ac = Autocorpus( config=config, - main_text=Path(structure[key]["main_text"]), - linked_tables=sorted(structure[key]["linked_tables"]), + file_path=Path(structure[key]["main_text"]), + linked_tables=sorted(Path(lt) for lt in structure[key]["linked_tables"]), ) - ac.process_file() - out_dir = Path(structure[key]["out_dir"]) if structure[key]["main_text"]: key = key.replace("\\", "/") @@ -46,6 +44,15 @@ def run_autocorpus(config, structure, key, output_format): ) as outfp: outfp.write(ac.abbreviations_to_bioc_json()) + ## TODO: Uncomment when SI conversion is supported + # out_filename = str(file_path).replace(".pdf", ".pdf_bioc.json") + # with open(out_filename, "w", encoding="utf-8") as f: + # BioCJSON.dump(bioc_text, f, indent=4) + + # out_table_filename = str(file_path).replace(".pdf", ".pdf_tables.json") + # with open(out_table_filename, "w", encoding="utf-8") as f: + # BioCTableJSON.dump(bioc_tables, f, indent=4) + # AC does not support the conversion of tables or abbreviations to XML if ac.has_tables: with open( diff --git a/tests/test_regression.py b/tests/test_regression.py index 199ea3e5..3efb3447 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -36,9 +36,7 @@ def test_autocorpus(data_path: Path, input_file: str, config: dict[str, Any]) -> ) as f: expected_tables = json.load(f) - auto_corpus = Autocorpus(config=config, main_text=pmc_example_path) - - auto_corpus.process_file() + auto_corpus = Autocorpus(config=config, file_path=pmc_example_path) abbreviations = auto_corpus.abbreviations bioc = auto_corpus.to_bioc() @@ -82,24 +80,13 @@ def test_pdf_to_bioc(data_path: Path, input_file: str, config: dict[str, Any]) - ) as f: expected_tables = json.load(f) - ac = Autocorpus( + auto_corpus = Autocorpus( config=config, - main_text=pdf_path, + file_path=pdf_path, ) - ac.process_files(files=[pdf_path]) - - with open( - str(pdf_path).replace(".pdf", ".pdf_bioc.json"), - encoding="utf-8", - ) as f: - new_bioc = json.load(f) - - with open( - str(pdf_path).replace(".pdf", ".pdf_tables.json"), - encoding="utf-8", - ) as f: - new_tables = json.load(f) + new_bioc = auto_corpus.main_text + new_tables = auto_corpus.tables _make_reproducible(new_bioc, expected_bioc, new_tables, expected_tables) @@ -112,4 +99,4 @@ def _make_reproducible(*data: dict[str, Any]) -> None: for d in data: d.pop("date") for doc in d["documents"]: - doc.pop("inputfile") + doc.pop("inputfile", None) From 5d1260579b04a4623e992a0f4f21793db09a3c11 Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Thu, 29 May 2025 10:54:51 +0100 Subject: [PATCH 072/125] Use BioCJSONEncoder to make tests pass. Temporary until #272 is fixed --- autocorpus/autocorpus.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py index 3b10bcea..d2887d63 100644 --- a/autocorpus/autocorpus.py +++ b/autocorpus/autocorpus.py @@ -363,12 +363,14 @@ def _process_file(self): raise NotImplementedError("XML processing is not implemented yet.") case ".pdf": try: + from .ac_bioc.json import BioCJSONEncoder from .pdf import extract_pdf_content text, tables = extract_pdf_content(self.file_path) - self.main_text = text.to_dict() - self.tables = tables.to_dict() + # TODO: Use text.to_dict() after bugfix in ac_bioc + self.main_text = BioCJSONEncoder().default(text) + self.tables = BioCJSONEncoder().default(tables) except ModuleNotFoundError: logger.error( From 8277b78d98633fcf937bd5eca8fff81949321d24 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Thu, 29 May 2025 11:25:01 +0100 Subject: [PATCH 073/125] Add more detail about test directory structure Suggested by @AdrianDAlessandro. --- tests/data/README.md | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/tests/data/README.md b/tests/data/README.md index 7728ad80..2876787d 100644 --- a/tests/data/README.md +++ b/tests/data/README.md @@ -42,6 +42,31 @@ git commit -m "Obtain new private data" The `public` and `private` subfolders are each structured in the same way. -Currently only data for HTML tests is provided and it is in a folder called `html`. Within that folder, there are subfolders whose names **must** correspond to a [`DefaultConfig`] (e.g. `LEGACY_PMC`). The subfolders contain the test data (i.e. HTML files) along with the expected output files (i.e. JSON files generated by Auto-CORPus). If you add new test data, you must add the corresponding output files at the same time. +Currently only data for HTML tests is provided and it is in a folder called `html`. Within that folder, there are subfolders whose names **must** correspond to a [`DefaultConfig`] (e.g. `LEGACY_PMC`). The subfolders contain the test data (i.e. HTML files) along with the expected output files (i.e. `*_bioc.json`, `*_abbreviations.json` and, optionally, `*_tables.json`). If you add new test data, you must add the corresponding output files at the same time. + +For example, at the time of writing, the structure of `tests/data` looks like this: + +```txt +tests/data/ +├── private +│   └── html +│   └── PMC +│   ├── PMC10071775_abbreviations.json +│   ├── PMC10071775_bioc.json +│   ├── PMC10071775.html +│ (...) +└── public + └── html + ├── LEGACY_PMC + │   ├── PMC8885717_abbreviations.json + │   ├── PMC8885717_bioc.json + │   ├── PMC8885717.html + │   └── PMC8885717_tables.json + └── PMC + ├── PMC8885717_abbreviations.json + ├── PMC8885717_bioc.json + ├── PMC8885717.html + └── PMC8885717_tables.json +``` [`DefaultConfig`]: https://omicsnlp.github.io/Auto-CORPus/reference/autocorpus/configs/default_config/#autocorpus.configs.default_config.DefaultConfig \ No newline at end of file From 25d756896ba2c160c22076549d93c2b069f13093 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Thu, 29 May 2025 11:32:09 +0100 Subject: [PATCH 074/125] Use definition of data path in `conftest.py` --- tests/__init__.py | 1 + tests/conftest.py | 4 +++- tests/test_regression.py | 3 ++- 3 files changed, 6 insertions(+), 2 deletions(-) create mode 100644 tests/__init__.py diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..f95b1211 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for Auto-CORPus.""" diff --git a/tests/conftest.py b/tests/conftest.py index 02d682ae..4310aced 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -15,11 +15,13 @@ BioCRelation, ) +DATA_PATH = Path(__file__).parent / "data" + @pytest.fixture def data_path() -> Path: """The path to the folder containing test data files.""" - return Path(__file__).parent / "data" + return DATA_PATH @pytest.fixture diff --git a/tests/test_regression.py b/tests/test_regression.py index b9f23f5f..d3760ca5 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -9,6 +9,8 @@ from autocorpus.configs.default_config import DefaultConfig +from .conftest import DATA_PATH + _KNOWN_FAILURES = [ "PMC10790237.html", "PMC5480070.html", @@ -24,7 +26,6 @@ def _get_html_test_data_paths(subfolder: str): """Return paths to HTML test data files with appropriate DefaultConfig.""" - DATA_PATH = Path(__file__).parent / "data" HTML_DATA_PATH = DATA_PATH / subfolder / "html" if not HTML_DATA_PATH.exists(): return From 00059bdb890b213edffdc0928af526ff970fa77e Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Thu, 29 May 2025 12:46:42 +0100 Subject: [PATCH 075/125] Use BioCTableJSONEncoder to make tests pass --- autocorpus/autocorpus.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py index d2887d63..4474a2e3 100644 --- a/autocorpus/autocorpus.py +++ b/autocorpus/autocorpus.py @@ -363,6 +363,7 @@ def _process_file(self): raise NotImplementedError("XML processing is not implemented yet.") case ".pdf": try: + from .ac_bioc.bioctable.json import BioCTableJSONEncoder from .ac_bioc.json import BioCJSONEncoder from .pdf import extract_pdf_content @@ -370,7 +371,7 @@ def _process_file(self): # TODO: Use text.to_dict() after bugfix in ac_bioc self.main_text = BioCJSONEncoder().default(text) - self.tables = BioCJSONEncoder().default(tables) + self.tables = BioCTableJSONEncoder().default(tables) except ModuleNotFoundError: logger.error( From bfe24ef3a36625b9d834e8746e9c1599c072e259 Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Thu, 29 May 2025 14:28:25 +0100 Subject: [PATCH 076/125] Include PDF files in check_file_type and use a FileType Enum and a match-case --- autocorpus/utils.py | 58 +++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/autocorpus/utils.py b/autocorpus/utils.py index 5c80404c..edec1856 100644 --- a/autocorpus/utils.py +++ b/autocorpus/utils.py @@ -2,6 +2,7 @@ import re import unicodedata +from enum import Enum, auto from pathlib import Path from typing import Any @@ -10,8 +11,19 @@ from lxml import etree from lxml.html.soupparser import fromstring +from . import logger -def check_file_type(file_path: Path) -> str: + +class FileType(Enum): + """Enumeration for different file types.""" + + HTML = auto() + XML = auto() + PDF = auto() + OTHER = auto() + + +def check_file_type(file_path: Path) -> FileType: """Determines the type of a file based on its content and extension. This function checks whether the given file is an HTML or XML file by @@ -19,40 +31,30 @@ def check_file_type(file_path: Path) -> str: be parsed as either HTML or XML, it is classified as "other". Args: - file_path (Path): The path to the file to be checked. + file_path: The path to the file to be checked. Returns: - str: A string indicating the file type: + A string indicating the file type: - "html" if the file is determined to be an HTML file. - "xml" if the file is determined to be an XML file. - "other" if the file type cannot be determined as HTML or XML. """ - is_html, is_xml = False, False file_extension = file_path.suffix.lower() - if file_extension in [".html", ".htm"]: - try: - etree.parse(file_path, etree.HTMLParser()) - is_html = True - except etree.ParseError: - etree.parse(file_path, etree.XMLParser()) - is_xml = True - except Exception as ex: - print(f"Error parsing file {file_path}: {ex}") - elif file_extension == ".xml": - try: - etree.parse(file_path, etree.XMLParser()) - is_xml = True - except etree.ParseError: - etree.parse(file_path, etree.HTMLParser()) - is_html = True - except Exception as ex: - print(f"Error parsing file {file_path}: {ex}") - if is_html: - return "html" - elif is_xml: - return "xml" - else: - return "other" + match file_extension: + case ".html" | ".htm" | ".xml": + try: + etree.parse(file_path, etree.HTMLParser()) + return FileType.HTML + except etree.ParseError: + etree.parse(file_path, etree.XMLParser()) + return FileType.XML + except Exception as ex: + logger.error(f"Error parsing file {file_path}: {ex}") + return FileType.OTHER + case ".pdf": + return FileType.PDF + case _: + return FileType.OTHER def get_files(base_dir, pattern=r"(.*).html"): From b51f260fc229cc7a412497d761e16671b792861b Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Thu, 29 May 2025 14:29:13 +0100 Subject: [PATCH 077/125] Use check_file_type in _process_file --- autocorpus/autocorpus.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py index 4474a2e3..35f8dc91 100644 --- a/autocorpus/autocorpus.py +++ b/autocorpus/autocorpus.py @@ -14,7 +14,7 @@ from .data_structures import Paragraph from .section import get_section from .table import get_table_json -from .utils import handle_not_tables +from .utils import FileType, check_file_type, handle_not_tables def soupify_infile(fpath: Path) -> BeautifulSoup: @@ -351,17 +351,21 @@ def process_html_article(self): def _process_file(self): """Process the input file based on its type. - This method checks the file extension and processes the file accordingly. + This method checks the file type and processes the file accordingly. Raises: - NotImplementedError: _description_ + NotImplementedError: For files types with no implemented processing. + ModuleNotFoundError: For PDF processing if required packages are not found. """ - match self.file_path.suffix: - case ".html" | ".htm": + match check_file_type(self.file_path): + case FileType.HTML: self.process_html_article() - case ".xml": - raise NotImplementedError("XML processing is not implemented yet.") - case ".pdf": + case FileType.XML: + raise NotImplementedError( + f"Could not process file {self.file_path}: " + "XML processing is not implemented yet." + ) + case FileType.PDF: try: from .ac_bioc.bioctable.json import BioCTableJSONEncoder from .ac_bioc.json import BioCJSONEncoder @@ -380,8 +384,10 @@ def _process_file(self): " pip install autocorpus[pdf]" ) raise - case _: - pass + case FileType.OTHER: + raise NotImplementedError( + f"Could not identify file type for {self.file_path}" + ) def __init__( self, From 1308f92eb77dd233c3cb5aa3ef247635fe5e6bf7 Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Thu, 29 May 2025 17:16:02 +0100 Subject: [PATCH 078/125] Move file checker to file_type module and include test --- autocorpus/autocorpus.py | 3 ++- autocorpus/file_type.py | 51 ++++++++++++++++++++++++++++++++++++++++ autocorpus/utils.py | 46 ------------------------------------ tests/test_file_type.py | 26 ++++++++++++++++++++ 4 files changed, 79 insertions(+), 47 deletions(-) create mode 100644 autocorpus/file_type.py create mode 100644 tests/test_file_type.py diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py index 35f8dc91..0a35431b 100644 --- a/autocorpus/autocorpus.py +++ b/autocorpus/autocorpus.py @@ -12,9 +12,10 @@ from .ac_bioc import BioCJSON, BioCXML from .bioc_formatter import get_formatted_bioc_collection from .data_structures import Paragraph +from .file_type import FileType, check_file_type from .section import get_section from .table import get_table_json -from .utils import FileType, check_file_type, handle_not_tables +from .utils import handle_not_tables def soupify_infile(fpath: Path) -> BeautifulSoup: diff --git a/autocorpus/file_type.py b/autocorpus/file_type.py new file mode 100644 index 00000000..a9a38f4d --- /dev/null +++ b/autocorpus/file_type.py @@ -0,0 +1,51 @@ +"""Contains utilities for identifying file types based on content and extension.""" + +from enum import Enum, auto +from pathlib import Path + +from lxml import etree + +from . import logger + + +class FileType(Enum): + """Enumeration for different file types.""" + + HTML = auto() + XML = auto() + PDF = auto() + OTHER = auto() + + +def check_file_type(file_path: Path) -> FileType: + """Determines the type of a file based on its content and extension. + + This function checks whether the given file is an HTML or XML file by + attempting to parse it using appropriate parsers. If the file cannot + be parsed as either HTML or XML, it is classified as "other". + + Args: + file_path: The path to the file to be checked. + + Returns: + A string indicating the file type: + - "html" if the file is determined to be an HTML file. + - "xml" if the file is determined to be an XML file. + - "other" if the file type cannot be determined as HTML or XML. + """ + file_extension = file_path.suffix.lower() + match file_extension: + case ".html" | ".htm" | ".xml": + try: + assert etree.parse(file_path, etree.XMLParser()).docinfo.xml_version + return FileType.XML + except (etree.ParseError, AssertionError): + etree.parse(file_path, etree.HTMLParser()) + return FileType.HTML + except Exception as ex: + logger.error(f"Error parsing file {file_path}: {ex}") + return FileType.OTHER + case ".pdf": + return FileType.PDF + case _: + return FileType.OTHER diff --git a/autocorpus/utils.py b/autocorpus/utils.py index edec1856..09fd06e9 100644 --- a/autocorpus/utils.py +++ b/autocorpus/utils.py @@ -2,7 +2,6 @@ import re import unicodedata -from enum import Enum, auto from pathlib import Path from typing import Any @@ -11,51 +10,6 @@ from lxml import etree from lxml.html.soupparser import fromstring -from . import logger - - -class FileType(Enum): - """Enumeration for different file types.""" - - HTML = auto() - XML = auto() - PDF = auto() - OTHER = auto() - - -def check_file_type(file_path: Path) -> FileType: - """Determines the type of a file based on its content and extension. - - This function checks whether the given file is an HTML or XML file by - attempting to parse it using appropriate parsers. If the file cannot - be parsed as either HTML or XML, it is classified as "other". - - Args: - file_path: The path to the file to be checked. - - Returns: - A string indicating the file type: - - "html" if the file is determined to be an HTML file. - - "xml" if the file is determined to be an XML file. - - "other" if the file type cannot be determined as HTML or XML. - """ - file_extension = file_path.suffix.lower() - match file_extension: - case ".html" | ".htm" | ".xml": - try: - etree.parse(file_path, etree.HTMLParser()) - return FileType.HTML - except etree.ParseError: - etree.parse(file_path, etree.XMLParser()) - return FileType.XML - except Exception as ex: - logger.error(f"Error parsing file {file_path}: {ex}") - return FileType.OTHER - case ".pdf": - return FileType.PDF - case _: - return FileType.OTHER - def get_files(base_dir, pattern=r"(.*).html"): """Recursively retrieve all PMC.html files from the directory. diff --git a/tests/test_file_type.py b/tests/test_file_type.py new file mode 100644 index 00000000..db0fe6bb --- /dev/null +++ b/tests/test_file_type.py @@ -0,0 +1,26 @@ +"""Test the file_type checking utilities.""" + +from pathlib import Path + +from lxml import etree + + +def test_check_file_type_html(tmp_path: Path, data_path: Path) -> None: + """Test that HTML files are correctly identified.""" + from autocorpus.file_type import FileType, check_file_type + + html_file = data_path / "PMC" / "Current" / "PMC8885717.html" + assert check_file_type(html_file) == FileType.HTML + + json_file = data_path / "PMC" / "Current" / "PMC8885717_bioc.json" + assert check_file_type(json_file) == FileType.OTHER + + pdf_file = data_path / "Supplementary" / "PDF" / "tp-10-08-2123-coif.pdf" + assert check_file_type(pdf_file) == FileType.PDF + + # Create temporary XML file + xml_file = tmp_path / "output.xml" + with xml_file.open("wb") as out: + out.write(etree.tostring(etree.XML("data"), xml_declaration=True)) + + assert check_file_type(xml_file) == FileType.XML From 3cbef8e6147e08becf76a95c928077d77e7e9ae1 Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Thu, 29 May 2025 17:23:05 +0100 Subject: [PATCH 079/125] Correct docstrings --- autocorpus/file_type.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/autocorpus/file_type.py b/autocorpus/file_type.py index a9a38f4d..2aa5e0c0 100644 --- a/autocorpus/file_type.py +++ b/autocorpus/file_type.py @@ -9,7 +9,16 @@ class FileType(Enum): - """Enumeration for different file types.""" + """Enum for different file types. + + Access the attributes like so FileType.HTML, FileType.XML, etc. + + Attributes: + HTML: Represents an HTML file. + XML: Represents an XML file. + PDF: Represents a PDF file. + OTHER: Represents any other file type that is not recognized. + """ HTML = auto() XML = auto() @@ -20,18 +29,15 @@ class FileType(Enum): def check_file_type(file_path: Path) -> FileType: """Determines the type of a file based on its content and extension. - This function checks whether the given file is an HTML or XML file by + This function checks the given file type by checking the file extension and then attempting to parse it using appropriate parsers. If the file cannot - be parsed as either HTML or XML, it is classified as "other". + be parsed or the fileextension is not recognised, it is classified as "OTHER". Args: file_path: The path to the file to be checked. Returns: - A string indicating the file type: - - "html" if the file is determined to be an HTML file. - - "xml" if the file is determined to be an XML file. - - "other" if the file type cannot be determined as HTML or XML. + A FileType Enum value indicating the type of the file. """ file_extension = file_path.suffix.lower() match file_extension: From d1dbb1489aae4f7c43faeaa25a20d14fd508b8ad Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Thu, 29 May 2025 17:43:45 +0100 Subject: [PATCH 080/125] Apply suggestions from code review Co-authored-by: Alex Dewar --- autocorpus/autocorpus.py | 2 +- autocorpus/config.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py index 0a35431b..15042c0d 100644 --- a/autocorpus/autocorpus.py +++ b/autocorpus/autocorpus.py @@ -34,7 +34,7 @@ def soupify_infile(fpath: Path) -> BeautifulSoup: return soup -def get_keywords(soup: BeautifulSoup, config: dict[str, Any]) -> None | Paragraph: +def get_keywords(soup: BeautifulSoup, config: dict[str, Any]) -> Paragraph | None: """Extract keywords from the soup object based on the provided configuration. Args: diff --git a/autocorpus/config.py b/autocorpus/config.py index e36d3171..dda079c3 100644 --- a/autocorpus/config.py +++ b/autocorpus/config.py @@ -7,13 +7,13 @@ def read_config(config_path: str) -> dict[str, Any]: - """Reads a configuration file and returns its content. + """Reads a configuration file and returns its contents. Args: config_path: The path to the configuration file. Returns: - dict: The content of the configuration file. + dict: The contents of the configuration file. Raises: FileNotFoundError: If the configuration file does not exist. From abc6f06306030530b4c6159829d99a818cb702f0 Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Thu, 29 May 2025 18:30:33 +0100 Subject: [PATCH 081/125] Make changes suggested by @alexdewar --- autocorpus/autocorpus.py | 62 ++++++++++++++++------------------------ autocorpus/reference.py | 11 +++++-- autocorpus/utils.py | 6 ++-- 3 files changed, 37 insertions(+), 42 deletions(-) diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py index 15042c0d..eb727777 100644 --- a/autocorpus/autocorpus.py +++ b/autocorpus/autocorpus.py @@ -18,7 +18,7 @@ from .utils import handle_not_tables -def soupify_infile(fpath: Path) -> BeautifulSoup: +def load_html_file(fpath: Path) -> BeautifulSoup: """Convert the input file into a BeautifulSoup object. Args: @@ -34,20 +34,19 @@ def soupify_infile(fpath: Path) -> BeautifulSoup: return soup -def get_keywords(soup: BeautifulSoup, config: dict[str, Any]) -> Paragraph | None: +def get_keywords( + soup: BeautifulSoup, keywords_config: dict[str, Any] +) -> Paragraph | None: """Extract keywords from the soup object based on the provided configuration. Args: soup: BeautifulSoup object of the HTML file. - config: AC config rules. + keywords_config: AC config rules for keywords. Returns: dict: Extracted keywords as a dictionary. """ - if "keywords" not in config: - return None - - responses = handle_not_tables(config["keywords"], soup) + responses = handle_not_tables(keywords_config, soup) if not responses: return None @@ -61,20 +60,17 @@ def get_keywords(soup: BeautifulSoup, config: dict[str, Any]) -> Paragraph | Non ) -def get_title(soup: BeautifulSoup, config: dict[str, Any]) -> str: +def get_title(soup: BeautifulSoup, title_config: dict[str, Any]) -> str: """Extract the title from the soup object based on the provided configuration. Args: soup: BeautifulSoup object of the HTML file. - config: AC config rules. + title_config: AC config rules for the title. Returns: Extracted title as a string. """ - if "title" not in config: - return "" - - titles = handle_not_tables(config["title"], soup) + titles = handle_not_tables(title_config, soup) if not titles: return "" @@ -84,22 +80,19 @@ def get_title(soup: BeautifulSoup, config: dict[str, Any]) -> str: def get_sections( - soup: BeautifulSoup, config: dict[str, Any] + soup: BeautifulSoup, sections_config: dict[str, Any] ) -> list[dict[str, Tag | list[str]]]: """Extract sections from the soup object based on the provided configuration. Args: soup: Beautiful Soup object of the HTML file. - config: AC config rules. + sections_config: AC config rules for sections. Returns: A list of matches for the provided config rules. Either as a Tag or a list of strings. """ - if "sections" not in config: - return [] - - return handle_not_tables(config["sections"], soup) + return handle_not_tables(sections_config, soup) def set_unknown_section_headings(unique_text: list[Paragraph]) -> list[Paragraph]: @@ -125,7 +118,10 @@ def set_unknown_section_headings(unique_text: list[Paragraph]) -> list[Paragraph def extract_text(soup: BeautifulSoup, config: dict[str, Any]) -> dict[str, Any]: - """Convert BeautifulSoup object into a Python dict with cleaned main text body. + """Extract the main text of the article from the soup object. + + This converts a BeautifulSoup object of a html article into a Python dict that + aligns with the BioC format defined in the provided config. Args: soup: BeautifulSoup object of html @@ -138,11 +134,11 @@ def extract_text(soup: BeautifulSoup, config: dict[str, Any]) -> dict[str, Any]: # Extract tags of text body and hard-code as: # p (main text) and span (keywords and refs) - result["title"] = get_title(soup, config) + result["title"] = get_title(soup, config["title"]) if "title" in config else "" maintext = [] - if keywords := get_keywords(soup, config): + if "keywords" in config and (keywords := get_keywords(soup, config["keywords"])): maintext.append(keywords) - sections = get_sections(soup, config) + sections = get_sections(soup, config["sections"]) if "sections" in config else [] for sec in sections: maintext.extend(get_section(config, sec)) @@ -174,7 +170,7 @@ def _extract_soup_and_tables(self, file_path: Path) -> BeautifulSoup: Returns: The BeautifulSoup object of the html file. """ - soup = soupify_infile(file_path) + soup = load_html_file(file_path) if "tables" in self.config: tables, empty_tables = get_table_json(soup, self.config, file_path) self._update_table_ids(tables, empty_tables) @@ -196,18 +192,10 @@ def _update_table_ids( seen_ids = set() for tab in self.tables["documents"]: - if "." in tab["id"]: - seen_ids.add(tab["id"].split(".")[0]) - else: - seen_ids.add(tab["id"]) + seen_ids.add(tab["id"].partition(".")[0]) for tabl in tables["documents"]: - if "." in tabl["id"]: - tabl_id = tabl["id"].split(".")[0] - tabl_pos = ".".join(tabl["id"].split(".")[1:]) - else: - tabl_id = tabl["id"] - tabl_pos = None + tabl_id, _, tabl_pos = tabl["id"].partition(".") if tabl_id in seen_ids: tabl_id = str(len(seen_ids) + 1) if tabl_pos: @@ -340,9 +328,6 @@ def process_html_article(self): Raises: RuntimeError: If no valid configuration is loaded. """ - if not self.config: - raise RuntimeError("A valid config file must be loaded.") - self._extract_html_article(self.file_path) for table_file in self.linked_tables: self._extract_soup_and_tables(table_file) @@ -404,6 +389,9 @@ def __init__( linked_tables: list of linked table file paths to be included in this run (HTML files only) """ + if config == {}: + raise RuntimeError("A valid config file must be loaded.") + self.file_path = file_path self.linked_tables = linked_tables self.config = config diff --git a/autocorpus/reference.py b/autocorpus/reference.py index 48f657e0..ad188366 100644 --- a/autocorpus/reference.py +++ b/autocorpus/reference.py @@ -16,11 +16,18 @@ class ReferencesParagraph(Paragraph): volume: str = "" def as_dict(self) -> dict[str, Any]: - """Return the dictionary representation of the ReferencesParagraph.""" + """Return the dictionary representation of the ReferencesParagraph. + + For consistency between old and new PMC specification, we only include the three + fields `title`, `journal`, and `volume` if they are not empty. + + Returns: + A dictionary representation of the ReferencesParagraph. + """ return { k: v for k, v in super().as_dict().items() - if v or k not in ("title", "journal", "volume") + if v or (k not in ("title", "journal", "volume")) } diff --git a/autocorpus/utils.py b/autocorpus/utils.py index 09fd06e9..4fd4d6e0 100644 --- a/autocorpus/utils.py +++ b/autocorpus/utils.py @@ -133,11 +133,11 @@ def handle_defined_by(config: dict[str, Any], soup: BeautifulSoup) -> list[Tag]: """Retrieve matching nodes for the 'defined-by' config rules. Args: - config (dict): config file section used to parse - soup (bs4.BeautifulSoup): soup section to parse + config: config file section used to parse + soup: soup section to parse Returns: - (list): list of objects, each object being a matching node. Object of the form: + A list of objects, each object being a matching node. Object of the form: { node: bs4Object, data:{ From 5059e078ceea547c76140024623a1eb6ca1eb337 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Wed, 14 May 2025 10:04:45 +0100 Subject: [PATCH 082/125] Fix `is_abbreviation` Note that this naturally changes the behaviour of the function, so it is technically a breaking change. There is one string that was previously considered as an abbreviation, which isn't anymore (CRIS_CODE). Fixes #144. --- autocorpus/abbreviation.py | 50 +++++++------------ .../PMC/Current/PMC8885717_abbreviations.json | 9 +--- tests/data/PMC/Current/PMC8885717_bioc.json | 4 +- tests/data/PMC/Current/PMC8885717_tables.json | 10 ++-- .../PMC8885717_abbreviations.json | 7 +-- .../PMC/Pre-Oct-2024/PMC8885717_bioc.json | 2 +- .../PMC/Pre-Oct-2024/PMC8885717_tables.json | 2 +- tests/test_abbreviation.py | 42 ++++++++++++++++ tests/test_regression.py | 5 +- 9 files changed, 75 insertions(+), 56 deletions(-) create mode 100644 tests/test_abbreviation.py diff --git a/autocorpus/abbreviation.py b/autocorpus/abbreviation.py index 9b00ed01..63a13c60 100644 --- a/autocorpus/abbreviation.py +++ b/autocorpus/abbreviation.py @@ -35,43 +35,27 @@ def _remove_quotes(text: str) -> str: return re2.sub(r'([(])[\'"\p{Pi}]|[\'"\p{Pf}]([);:])', r"\1\2", text) -def _is_abbreviation(candidate: str) -> bool: - r"""Check whether input string is an abbreviation. +def _is_abbreviation(s: str) -> bool: + """Check whether input string is an abbreviation. - Based on Schwartz&Hearst. + To be classified as an abbreviation, a string must be composed exclusively of + Unicode letters or digits, optionally separated by dots. This sequence must repeat + between two and ten times. We exclude strings that are *exclusively* composed of + digits or lowercase letters. - 2 <= len(str) <= 10 - len(tokens) <= 2 - re.search(r'\p{L}', str) - str[0].isalnum() - - and extra: - if it matches (\p{L}\.?\s?){2,} - it is a good candidate. + Adapted from Schwartz & Hearst. + """ + # Disallow if exclusively composed of digits + if re2.match(r"\p{N}+$", s): + return False - Args: - candidate: Candidate abbreviation + # Disallow if exclusively composed of lowercase unicode chars + if re2.match(r"\p{Ll}+$", s): + return False - Returns: - True if this is a good candidate - """ - viable = True - - # Broken: See https://github.com/omicsNLP/Auto-CORPus/issues/144 - # if re2.match(r"(\p{L}\.?\s?){2,}", candidate.lstrip()): - # viable = True - if len(candidate) < 2 or len(candidate) > 10: - viable = False - if len(candidate.split()) > 2: - viable = False - if candidate.islower(): # customize function discard all lower case candidate - viable = False - if not re2.search(r"\p{L}", candidate): # \p{L} = All Unicode letter - viable = False - if not candidate[0].isalnum(): - viable = False - - return viable + # Should be a repeating sequence of unicode chars or digits, optionally separated + # by dots. The sequence must repeat between 2 and 10 times. + return bool(re2.match(r"([\p{L}\p{N}]\.?){2,10}$", s)) def _get_definition(candidate: str, preceding: str) -> str: diff --git a/tests/data/PMC/Current/PMC8885717_abbreviations.json b/tests/data/PMC/Current/PMC8885717_abbreviations.json index b04dac0d..f55d07e4 100644 --- a/tests/data/PMC/Current/PMC8885717_abbreviations.json +++ b/tests/data/PMC/Current/PMC8885717_abbreviations.json @@ -1,11 +1,11 @@ { "source": "Auto-CORPus (abbreviations)", - "date": "20250326", + "date": "20250514", "key": "autocorpus_abbreviations.key", "documents": [ { "id": "PMC8885717", - "inputfile": "tests\\data\\PMC\\Current\\PMC8885717.html", + "inputfile": "tests/data/PMC/Current/PMC8885717.html", "passages": [ { "text_short": "NLP", @@ -56,11 +56,6 @@ "text_short": "HDR", "text_long_1": "Health Data Research", "extraction_algorithm_1": "fulltext" - }, - { - "text_short": "CRIS-CODE", - "text_long_1": "Clinical Record Interactive Search Comprehensive Data Extraction", - "extraction_algorithm_1": "fulltext" } ] } diff --git a/tests/data/PMC/Current/PMC8885717_bioc.json b/tests/data/PMC/Current/PMC8885717_bioc.json index 4b8703f0..2de2005f 100644 --- a/tests/data/PMC/Current/PMC8885717_bioc.json +++ b/tests/data/PMC/Current/PMC8885717_bioc.json @@ -1,12 +1,12 @@ { "source": "Auto-CORPus (full-text)", - "date": "20250326", + "date": "20250514", "key": "autocorpus_fulltext.key", "infons": {}, "documents": [ { "id": "PMC8885717", - "inputfile": "tests\\data\\PMC\\Current\\PMC8885717.html", + "inputfile": "tests/data/PMC/Current/PMC8885717.html", "infons": {}, "passages": [ { diff --git a/tests/data/PMC/Current/PMC8885717_tables.json b/tests/data/PMC/Current/PMC8885717_tables.json index 219d82ec..c06319a4 100644 --- a/tests/data/PMC/Current/PMC8885717_tables.json +++ b/tests/data/PMC/Current/PMC8885717_tables.json @@ -1,11 +1,11 @@ { "source": "Auto-CORPus (tables)", - "date": "20250326", + "date": "20250514", "key": "autocorpus_tables.key", "infons": {}, "documents": [ { - "inputfile": "tests\\data\\PMC\\Current\\PMC8885717.html", + "inputfile": "tests/data/PMC/Current/PMC8885717.html", "id": "1", "infons": {}, "passages": [ @@ -782,7 +782,7 @@ ] }, { - "inputfile": "tests\\data\\PMC\\Current\\PMC8885717.html", + "inputfile": "tests/data/PMC/Current/PMC8885717.html", "id": "2", "infons": {}, "passages": [ @@ -1139,7 +1139,7 @@ ] }, { - "inputfile": "tests\\data\\PMC\\Current\\PMC8885717.html", + "inputfile": "tests/data/PMC/Current/PMC8885717.html", "id": "3", "infons": {}, "passages": [ @@ -1319,7 +1319,7 @@ ] }, { - "inputfile": "tests\\data\\PMC\\Current\\PMC8885717.html", + "inputfile": "tests/data/PMC/Current/PMC8885717.html", "id": "4", "infons": {}, "passages": [ diff --git a/tests/data/PMC/Pre-Oct-2024/PMC8885717_abbreviations.json b/tests/data/PMC/Pre-Oct-2024/PMC8885717_abbreviations.json index cf26b8d3..83183b33 100644 --- a/tests/data/PMC/Pre-Oct-2024/PMC8885717_abbreviations.json +++ b/tests/data/PMC/Pre-Oct-2024/PMC8885717_abbreviations.json @@ -1,6 +1,6 @@ { "source": "Auto-CORPus (abbreviations)", - "date": "20240829", + "date": "20250514", "key": "autocorpus_abbreviations.key", "documents": [ { @@ -56,11 +56,6 @@ "text_short": "HDR", "text_long_1": "Health Data Research", "extraction_algorithm_1": "fulltext" - }, - { - "text_short": "CRIS-CODE", - "text_long_1": "Clinical Record Interactive Search Comprehensive Data Extraction", - "extraction_algorithm_1": "fulltext" } ] } diff --git a/tests/data/PMC/Pre-Oct-2024/PMC8885717_bioc.json b/tests/data/PMC/Pre-Oct-2024/PMC8885717_bioc.json index 53c29fa8..ff587380 100644 --- a/tests/data/PMC/Pre-Oct-2024/PMC8885717_bioc.json +++ b/tests/data/PMC/Pre-Oct-2024/PMC8885717_bioc.json @@ -1,6 +1,6 @@ { "source": "Auto-CORPus (full-text)", - "date": "20240829", + "date": "20250514", "key": "autocorpus_fulltext.key", "infons": {}, "documents": [ diff --git a/tests/data/PMC/Pre-Oct-2024/PMC8885717_tables.json b/tests/data/PMC/Pre-Oct-2024/PMC8885717_tables.json index 04c78e3d..7c6280bf 100644 --- a/tests/data/PMC/Pre-Oct-2024/PMC8885717_tables.json +++ b/tests/data/PMC/Pre-Oct-2024/PMC8885717_tables.json @@ -1,6 +1,6 @@ { "source": "Auto-CORPus (tables)", - "date": "20240829", + "date": "20250514", "key": "autocorpus_tables.key", "infons": {}, "documents": [ diff --git a/tests/test_abbreviation.py b/tests/test_abbreviation.py new file mode 100644 index 00000000..c0860966 --- /dev/null +++ b/tests/test_abbreviation.py @@ -0,0 +1,42 @@ +"""Tests for the abbreviation module.""" + +from itertools import chain, repeat + +import pytest + +from autocorpus.abbreviation import _is_abbreviation + +_ABBREVIATIONS = ( + "ABC", + "H.P.", # can be separated by dots + "A.BC", # we don't enforce that there is a dot after every letter + "HOUSE", # allowed: all caps + "House", # allowed: at least one letter is capital (odd though) + "ÄBÇ", # we support unicode chars + "3ABC", # we allow numbers + "ABC3", + "A.B.3.", # abbreviations with numbers can still be separated by dots + "a.b.c.", # all lowercase strings are fine if separated by dots + "A.B." * 5, # long string, but separated by dots, so also fine +) +_NON_ABBREVIATIONS = ( + "", + "A", # too short + "AB" * 6, # too long + "3", # disallowed: exclusively composed of digits + "A!B!C!", + "H.P.!", + "abc", # disallowed: all lowercase + "house", + "äbç", # disallowed: all lowercase (unicode) + "CRIS-CODE", # hyphens not allowed +) + + +@pytest.mark.parametrize( + "s,expected", + chain(zip(_ABBREVIATIONS, repeat(True)), zip(_NON_ABBREVIATIONS, repeat(False))), +) +def test_is_abbreviation(s: str, expected: bool): + """Test the _is_abbreviation() function.""" + assert _is_abbreviation(s) == expected diff --git a/tests/test_regression.py b/tests/test_regression.py index f16a022a..eb18a088 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -17,7 +17,10 @@ ], ) def test_autocorpus(data_path: Path, input_file: str, config: dict[str, Any]) -> None: - """A regression test for the main autoCORPus class, using the each PMC config on the AutoCORPus Paper.""" + """A regression test for the main autoCORPus class. + + Uses each PMC config on the AutoCORPus Paper. + """ from autocorpus.autocorpus import Autocorpus pmc_example_path = data_path / input_file From 94c48ead601967e5a66e89c95b226fc91a87a3da Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Thu, 29 May 2025 17:36:27 +0100 Subject: [PATCH 083/125] Allow abbreviations separated by hyphens --- autocorpus/abbreviation.py | 10 +++++----- tests/data/PMC/Current/PMC8885717_abbreviations.json | 12 +++++++++++- tests/data/PMC/Current/PMC8885717_bioc.json | 2 +- tests/data/PMC/Current/PMC8885717_tables.json | 2 +- .../PMC/Pre-Oct-2024/PMC8885717_abbreviations.json | 12 +++++++++++- tests/data/PMC/Pre-Oct-2024/PMC8885717_bioc.json | 2 +- tests/data/PMC/Pre-Oct-2024/PMC8885717_tables.json | 2 +- tests/test_abbreviation.py | 3 ++- 8 files changed, 33 insertions(+), 12 deletions(-) diff --git a/autocorpus/abbreviation.py b/autocorpus/abbreviation.py index 63a13c60..ea2435df 100644 --- a/autocorpus/abbreviation.py +++ b/autocorpus/abbreviation.py @@ -39,9 +39,9 @@ def _is_abbreviation(s: str) -> bool: """Check whether input string is an abbreviation. To be classified as an abbreviation, a string must be composed exclusively of - Unicode letters or digits, optionally separated by dots. This sequence must repeat - between two and ten times. We exclude strings that are *exclusively* composed of - digits or lowercase letters. + Unicode letters or digits, optionally separated by dots or hyphens. This sequence + must repeat between two and ten times. We exclude strings that are *exclusively* + composed of digits or lowercase letters. Adapted from Schwartz & Hearst. """ @@ -54,8 +54,8 @@ def _is_abbreviation(s: str) -> bool: return False # Should be a repeating sequence of unicode chars or digits, optionally separated - # by dots. The sequence must repeat between 2 and 10 times. - return bool(re2.match(r"([\p{L}\p{N}]\.?){2,10}$", s)) + # by dots or hyphens. The sequence must repeat between 2 and 10 times. + return bool(re2.match(r"([\p{L}\p{N}][\.\-]?){2,10}$", s)) def _get_definition(candidate: str, preceding: str) -> str: diff --git a/tests/data/PMC/Current/PMC8885717_abbreviations.json b/tests/data/PMC/Current/PMC8885717_abbreviations.json index f55d07e4..bd1e0d7c 100644 --- a/tests/data/PMC/Current/PMC8885717_abbreviations.json +++ b/tests/data/PMC/Current/PMC8885717_abbreviations.json @@ -1,6 +1,6 @@ { "source": "Auto-CORPus (abbreviations)", - "date": "20250514", + "date": "20250529", "key": "autocorpus_abbreviations.key", "documents": [ { @@ -17,6 +17,11 @@ "text_long_1": "PubMed Central", "extraction_algorithm_1": "fulltext" }, + { + "text_short": "Auto-CORPus", + "text_long_1": "Automated pipeline for Consistent Outputs from Research Publications", + "extraction_algorithm_1": "fulltext" + }, { "text_short": "IAO", "text_long_1": "Information Artifact Ontology", @@ -56,6 +61,11 @@ "text_short": "HDR", "text_long_1": "Health Data Research", "extraction_algorithm_1": "fulltext" + }, + { + "text_short": "CRIS-CODE", + "text_long_1": "Clinical Record Interactive Search Comprehensive Data Extraction", + "extraction_algorithm_1": "fulltext" } ] } diff --git a/tests/data/PMC/Current/PMC8885717_bioc.json b/tests/data/PMC/Current/PMC8885717_bioc.json index 2de2005f..a2f2f279 100644 --- a/tests/data/PMC/Current/PMC8885717_bioc.json +++ b/tests/data/PMC/Current/PMC8885717_bioc.json @@ -1,6 +1,6 @@ { "source": "Auto-CORPus (full-text)", - "date": "20250514", + "date": "20250529", "key": "autocorpus_fulltext.key", "infons": {}, "documents": [ diff --git a/tests/data/PMC/Current/PMC8885717_tables.json b/tests/data/PMC/Current/PMC8885717_tables.json index c06319a4..df9d426b 100644 --- a/tests/data/PMC/Current/PMC8885717_tables.json +++ b/tests/data/PMC/Current/PMC8885717_tables.json @@ -1,6 +1,6 @@ { "source": "Auto-CORPus (tables)", - "date": "20250514", + "date": "20250529", "key": "autocorpus_tables.key", "infons": {}, "documents": [ diff --git a/tests/data/PMC/Pre-Oct-2024/PMC8885717_abbreviations.json b/tests/data/PMC/Pre-Oct-2024/PMC8885717_abbreviations.json index 83183b33..ece41782 100644 --- a/tests/data/PMC/Pre-Oct-2024/PMC8885717_abbreviations.json +++ b/tests/data/PMC/Pre-Oct-2024/PMC8885717_abbreviations.json @@ -1,6 +1,6 @@ { "source": "Auto-CORPus (abbreviations)", - "date": "20250514", + "date": "20250529", "key": "autocorpus_abbreviations.key", "documents": [ { @@ -17,6 +17,11 @@ "text_long_1": "PubMed Central", "extraction_algorithm_1": "fulltext" }, + { + "text_short": "Auto-CORPus", + "text_long_1": "Automated pipeline for Consistent Outputs from Research Publications", + "extraction_algorithm_1": "fulltext" + }, { "text_short": "IAO", "text_long_1": "Information Artifact Ontology", @@ -56,6 +61,11 @@ "text_short": "HDR", "text_long_1": "Health Data Research", "extraction_algorithm_1": "fulltext" + }, + { + "text_short": "CRIS-CODE", + "text_long_1": "Clinical Record Interactive Search Comprehensive Data Extraction", + "extraction_algorithm_1": "fulltext" } ] } diff --git a/tests/data/PMC/Pre-Oct-2024/PMC8885717_bioc.json b/tests/data/PMC/Pre-Oct-2024/PMC8885717_bioc.json index ff587380..df572974 100644 --- a/tests/data/PMC/Pre-Oct-2024/PMC8885717_bioc.json +++ b/tests/data/PMC/Pre-Oct-2024/PMC8885717_bioc.json @@ -1,6 +1,6 @@ { "source": "Auto-CORPus (full-text)", - "date": "20250514", + "date": "20250529", "key": "autocorpus_fulltext.key", "infons": {}, "documents": [ diff --git a/tests/data/PMC/Pre-Oct-2024/PMC8885717_tables.json b/tests/data/PMC/Pre-Oct-2024/PMC8885717_tables.json index 7c6280bf..db57ddfe 100644 --- a/tests/data/PMC/Pre-Oct-2024/PMC8885717_tables.json +++ b/tests/data/PMC/Pre-Oct-2024/PMC8885717_tables.json @@ -1,6 +1,6 @@ { "source": "Auto-CORPus (tables)", - "date": "20250514", + "date": "20250529", "key": "autocorpus_tables.key", "infons": {}, "documents": [ diff --git a/tests/test_abbreviation.py b/tests/test_abbreviation.py index c0860966..2eb797f6 100644 --- a/tests/test_abbreviation.py +++ b/tests/test_abbreviation.py @@ -10,6 +10,8 @@ "ABC", "H.P.", # can be separated by dots "A.BC", # we don't enforce that there is a dot after every letter + "CRIS-CODE", # hyphens are also allowed + "C.R.I.S-C.O.D.E", # ...as is a mix "HOUSE", # allowed: all caps "House", # allowed: at least one letter is capital (odd though) "ÄBÇ", # we support unicode chars @@ -29,7 +31,6 @@ "abc", # disallowed: all lowercase "house", "äbç", # disallowed: all lowercase (unicode) - "CRIS-CODE", # hyphens not allowed ) From 56e52b6c476f52de52b65bc623d6e0ed1b9f415e Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Thu, 29 May 2025 19:19:56 +0100 Subject: [PATCH 084/125] Update bug_report.md --- .github/ISSUE_TEMPLATE/bug_report.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 84952f76..0c4573fc 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -22,7 +22,7 @@ If applicable, add screenshots to help explain your problem. ## Context -Please, complete the following to better understand the system you are using to run MUSE. +Please, complete the following to better understand the system you are using to run Auto-CORPus. - Operating system (eg. Windows 10): - Auto-CORPus version (eg. 1.0.0): From 3e281d91fe258bafbd5b00024f0b63c7aa2d57f7 Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Thu, 29 May 2025 21:27:38 +0100 Subject: [PATCH 085/125] Move the table extending and merging functions outof the AC class --- autocorpus/autocorpus.py | 294 ++++++++++++++++++++------------------- 1 file changed, 149 insertions(+), 145 deletions(-) diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py index eb727777..73855322 100644 --- a/autocorpus/autocorpus.py +++ b/autocorpus/autocorpus.py @@ -158,156 +158,139 @@ def extract_text(soup: BeautifulSoup, config: dict[str, Any]) -> dict[str, Any]: return result -class Autocorpus: - """Parent class for all Auto-CORPus functionality.""" +def extend_tables_documents( + documents: list[dict[str, Any]], new_documents: list[dict[str, Any]] +) -> list[dict[str, Any]]: + """Extends the list of tables documents with new documents, ensuring unique IDs. - def _extract_soup_and_tables(self, file_path: Path) -> BeautifulSoup: - """Extract the soup from the html file and assign tables to self.tables. + Args: + documents: The original list of documents to be extended. + new_documents: New list of documents to add. - Args: - file_path: The html file path to be processed. + Returns: + A list of documents with unique IDs, combining the original and new documents. + """ + seen_ids = set() + for doc in documents: + seen_ids.add(doc["id"].partition(".")[0]) - Returns: - The BeautifulSoup object of the html file. - """ - soup = load_html_file(file_path) - if "tables" in self.config: - tables, empty_tables = get_table_json(soup, self.config, file_path) - self._update_table_ids(tables, empty_tables) - return soup + for doc in new_documents: + tabl_id, _, tabl_pos = doc["id"].partition(".") + if tabl_id in seen_ids: + tabl_id = str(len(seen_ids) + 1) + if tabl_pos: + doc["id"] = f"{tabl_id}.{tabl_pos}" + else: + doc["id"] = tabl_id + seen_ids.add(tabl_id) - def _update_table_ids( - self, tables: dict[str, Any], empty_tables: list[dict[str, Any]] - ): - """Update the table IDs in the new tables to avoid conflicts with existing ones. + documents.extend(new_documents) - Args: - tables: New tables dictionary to be updated in line with self.tables. - empty_tables: New empty tables list to add to self.empty_tables. - """ - if not self.tables: - self.tables: dict[str, Any] = tables - self.empty_tables = empty_tables - return + return documents - seen_ids = set() - for tab in self.tables["documents"]: - seen_ids.add(tab["id"].partition(".")[0]) - - for tabl in tables["documents"]: - tabl_id, _, tabl_pos = tabl["id"].partition(".") - if tabl_id in seen_ids: - tabl_id = str(len(seen_ids) + 1) - if tabl_pos: - tabl["id"] = f"{tabl_id}.{tabl_pos}" - else: - tabl["id"] = tabl_id - seen_ids.add(tabl_id) - - self.tables["documents"].extend(tables["documents"]) - self.empty_tables.extend(empty_tables) - - def _merge_table_data(self): - if not self.empty_tables: - return - documents = self.tables.get("documents", None) - if not documents: - return +def merge_tables_with_empty_tables( + documents: list[dict[str, Any]], empty_tables: list[dict[str, Any]] +) -> list[dict[str, Any]]: + """Extends the list of tables documents with empty tables, ensuring titles are set. - seen_ids = {} - for i, table in enumerate(documents): - if "id" in table: - seen_ids[str(i)] = f"Table {table['id']}." - - for table in self.empty_tables: - for seenID in seen_ids.keys(): - if not table["title"].startswith(seen_ids[seenID]): - continue - - if "title" in table and not table["title"] == "": - set_new = False - for passage in documents[int(seenID)]["passages"]: - if ( - passage["infons"]["section_type"][0]["section_name"] - == "table_title" - ): - passage["text"] = table["title"] - set_new = True - if not set_new: - documents[int(seenID)]["passages"].append( - { - "offset": 0, - "infons": { - "section_type": [ - { - "section_name": "table_title", - "iao_name": "document title", - "iao_id": "IAO:0000305", - } - ] - }, - "text": table["title"], - } - ) - if "caption" in table and not table["caption"] == "": - set_new = False - for passage in documents[int(seenID)]["passages"]: - if ( - passage["infons"]["section_type"][0]["section_name"] - == "table_caption" - ): - passage["text"] = table["caption"] - set_new = True - if not set_new: - documents[int(seenID)]["passages"].append( - { - "offset": 0, - "infons": { - "section_type": [ - { - "section_name": "table_caption", - "iao_name": "caption", - "iao_id": "IAO:0000304", - } - ] - }, - "text": table["caption"], - } - ) - if "footer" in table and not table["footer"] == "": - set_new = False - for passage in documents[int(seenID)]["passages"]: - if ( - passage["infons"]["section_type"][0]["section_name"] - == "table_footer" - ): - passage["text"] = table["footer"] - set_new = True - if not set_new: - documents[int(seenID)]["passages"].append( - { - "offset": 0, - "infons": { - "section_type": [ - { - "section_name": "table_footer", - "iao_name": "caption", - "iao_id": "IAO:0000304", - } - ] - }, - "text": table["footer"], - } - ) - - def _extract_html_article(self, file_path: Path): - soup = self._extract_soup_and_tables(file_path) - self.main_text = extract_text(soup, self.config) - try: - self.abbreviations = get_abbreviations(self.main_text, soup, file_path) - except Exception as e: - logger.error(e) + Args: + documents: The original list of documents to be extended. + empty_tables: A list of empty tables to merge with the documents. + + Returns: + A list of documents with titles and captions from empty tables merged in. + """ + seen_ids = {} + for i, table in enumerate(documents): + if "id" in table: + seen_ids[str(i)] = f"Table {table['id']}." + + for table in empty_tables: + for seenID in seen_ids.keys(): + if not table["title"].startswith(seen_ids[seenID]): + continue + + if "title" in table and not table["title"] == "": + set_new = False + for passage in documents[int(seenID)]["passages"]: + if ( + passage["infons"]["section_type"][0]["section_name"] + == "table_title" + ): + passage["text"] = table["title"] + set_new = True + if not set_new: + documents[int(seenID)]["passages"].append( + { + "offset": 0, + "infons": { + "section_type": [ + { + "section_name": "table_title", + "iao_name": "document title", + "iao_id": "IAO:0000305", + } + ] + }, + "text": table["title"], + } + ) + if "caption" in table and not table["caption"] == "": + set_new = False + for passage in documents[int(seenID)]["passages"]: + if ( + passage["infons"]["section_type"][0]["section_name"] + == "table_caption" + ): + passage["text"] = table["caption"] + set_new = True + if not set_new: + documents[int(seenID)]["passages"].append( + { + "offset": 0, + "infons": { + "section_type": [ + { + "section_name": "table_caption", + "iao_name": "caption", + "iao_id": "IAO:0000304", + } + ] + }, + "text": table["caption"], + } + ) + if "footer" in table and not table["footer"] == "": + set_new = False + for passage in documents[int(seenID)]["passages"]: + if ( + passage["infons"]["section_type"][0]["section_name"] + == "table_footer" + ): + passage["text"] = table["footer"] + set_new = True + if not set_new: + documents[int(seenID)]["passages"].append( + { + "offset": 0, + "infons": { + "section_type": [ + { + "section_name": "table_footer", + "iao_name": "caption", + "iao_id": "IAO:0000304", + } + ] + }, + "text": table["footer"], + } + ) + return documents + + +class Autocorpus: + """Parent class for all Auto-CORPus functionality.""" def process_html_article(self): """Processes the main text file and tables specified in the configuration. @@ -328,10 +311,31 @@ def process_html_article(self): Raises: RuntimeError: If no valid configuration is loaded. """ - self._extract_html_article(self.file_path) + soup = load_html_file(self.file_path) + self.main_text = extract_text(soup, self.config) + try: + self.abbreviations = get_abbreviations(self.main_text, soup, self.file_path) + except Exception as e: + logger.error(e) + + if "tables" not in self.config: + return + + self.tables, self.empty_tables = get_table_json( + soup, self.config, self.file_path + ) + + new_documents = [] for table_file in self.linked_tables: - self._extract_soup_and_tables(table_file) - self._merge_table_data() + soup = load_html_file(self.file_path) + tables, empty_tables = get_table_json(soup, self.config, self.file_path) + new_documents.extend(tables.get("documents", [])) + self.empty_tables.extend(empty_tables) + self.tables["documents"] = extend_tables_documents( + self.tables.get("documents", []), new_documents + ) + if self.empty_tables: + merge_tables_with_empty_tables(self.tables["documents"], self.empty_tables) self.has_tables = bool(self.tables.get("documents")) def _process_file(self): From 72708e220903fb035ab719f203cb23ef2ebde603 Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Thu, 29 May 2025 21:37:00 +0100 Subject: [PATCH 086/125] Move process_file out of AC class --- autocorpus/autocorpus.py | 87 ++++++++++++++++++++-------------------- autocorpus/run.py | 1 + tests/test_regression.py | 11 ++--- 3 files changed, 49 insertions(+), 50 deletions(-) diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py index 73855322..4da0243e 100644 --- a/autocorpus/autocorpus.py +++ b/autocorpus/autocorpus.py @@ -338,47 +338,6 @@ def process_html_article(self): merge_tables_with_empty_tables(self.tables["documents"], self.empty_tables) self.has_tables = bool(self.tables.get("documents")) - def _process_file(self): - """Process the input file based on its type. - - This method checks the file type and processes the file accordingly. - - Raises: - NotImplementedError: For files types with no implemented processing. - ModuleNotFoundError: For PDF processing if required packages are not found. - """ - match check_file_type(self.file_path): - case FileType.HTML: - self.process_html_article() - case FileType.XML: - raise NotImplementedError( - f"Could not process file {self.file_path}: " - "XML processing is not implemented yet." - ) - case FileType.PDF: - try: - from .ac_bioc.bioctable.json import BioCTableJSONEncoder - from .ac_bioc.json import BioCJSONEncoder - from .pdf import extract_pdf_content - - text, tables = extract_pdf_content(self.file_path) - - # TODO: Use text.to_dict() after bugfix in ac_bioc - self.main_text = BioCJSONEncoder().default(text) - self.tables = BioCTableJSONEncoder().default(tables) - - except ModuleNotFoundError: - logger.error( - "Could not load necessary PDF packages. If you installed " - "Auto-CORPUS via pip, you can obtain these with:\n" - " pip install autocorpus[pdf]" - ) - raise - case FileType.OTHER: - raise NotImplementedError( - f"Could not identify file type for {self.file_path}" - ) - def __init__( self, config: dict[str, Any], @@ -405,8 +364,6 @@ def __init__( self.abbreviations = {} self.has_tables = False - self._process_file() - def to_bioc(self) -> dict[str, Any]: """Get the currently loaded bioc as a dict. @@ -491,6 +448,50 @@ def to_dict(self) -> dict[str, Any]: } +def process_file(config: dict[str, Any], file_path: Path) -> Autocorpus: + """Process the input file based on its type. + + This method checks the file type and processes the file accordingly. + + Raises: + NotImplementedError: For files types with no implemented processing. + ModuleNotFoundError: For PDF processing if required packages are not found. + """ + ac = Autocorpus(config, file_path) + + match check_file_type(file_path): + case FileType.HTML: + ac.process_html_article() + case FileType.XML: + raise NotImplementedError( + f"Could not process file {file_path}: " + "XML processing is not implemented yet." + ) + case FileType.PDF: + try: + from .ac_bioc.bioctable.json import BioCTableJSONEncoder + from .ac_bioc.json import BioCJSONEncoder + from .pdf import extract_pdf_content + + text, tables = extract_pdf_content(file_path) + + # TODO: Use text.to_dict() after bugfix in ac_bioc + ac.main_text = BioCJSONEncoder().default(text) + ac.tables = BioCTableJSONEncoder().default(tables) + + except ModuleNotFoundError: + logger.error( + "Could not load necessary PDF packages. If you installed " + "Auto-CORPUS via pip, you can obtain these with:\n" + " pip install autocorpus[pdf]" + ) + raise + case FileType.OTHER: + raise NotImplementedError(f"Could not identify file type for {file_path}") + + return ac + + def process_directory(config: dict[str, Any], dir_path: Path) -> Iterable[Autocorpus]: """Process all files in a directory and its subdirectories. diff --git a/autocorpus/run.py b/autocorpus/run.py index 85225a9c..011f4433 100644 --- a/autocorpus/run.py +++ b/autocorpus/run.py @@ -19,6 +19,7 @@ def run_autocorpus(config, structure, key, output_format): file_path=Path(structure[key]["main_text"]), linked_tables=sorted(Path(lt) for lt in structure[key]["linked_tables"]), ) + ac.process_html_article() out_dir = Path(structure[key]["out_dir"]) if structure[key]["main_text"]: diff --git a/tests/test_regression.py b/tests/test_regression.py index 53753aff..7d459e9e 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -21,7 +21,7 @@ def test_autocorpus(data_path: Path, input_file: str, config: dict[str, Any]) -> Uses each PMC config on the AutoCORPus Paper. """ - from autocorpus.autocorpus import Autocorpus + from autocorpus.autocorpus import process_file pmc_example_path = data_path / input_file with open( @@ -39,7 +39,7 @@ def test_autocorpus(data_path: Path, input_file: str, config: dict[str, Any]) -> ) as f: expected_tables = json.load(f) - auto_corpus = Autocorpus(config=config, file_path=pmc_example_path) + auto_corpus = process_file(config=config, file_path=pmc_example_path) abbreviations = auto_corpus.abbreviations bioc = auto_corpus.to_bioc() @@ -67,7 +67,7 @@ def test_autocorpus(data_path: Path, input_file: str, config: dict[str, Any]) -> ) def test_pdf_to_bioc(data_path: Path, input_file: str, config: dict[str, Any]) -> None: """Test the conversion of a PDF file to a BioC format.""" - from autocorpus.autocorpus import Autocorpus + from autocorpus.autocorpus import process_file pdf_path = data_path / input_file expected_output = pdf_path.parent / "Expected Output" / pdf_path.name @@ -83,10 +83,7 @@ def test_pdf_to_bioc(data_path: Path, input_file: str, config: dict[str, Any]) - ) as f: expected_tables = json.load(f) - auto_corpus = Autocorpus( - config=config, - file_path=pdf_path, - ) + auto_corpus = process_file(config=config, file_path=pdf_path) new_bioc = auto_corpus.main_text new_tables = auto_corpus.tables From 75d6aa7216cabcb855e15edf2e9f104af50e252e Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Thu, 29 May 2025 22:12:07 +0100 Subject: [PATCH 087/125] Remove process_html_article and convert Autocorpus to a dataclass --- autocorpus/autocorpus.py | 175 ++++++++++++++++++++------------------- autocorpus/run.py | 5 +- 2 files changed, 94 insertions(+), 86 deletions(-) diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py index 4da0243e..cac1bc3c 100644 --- a/autocorpus/autocorpus.py +++ b/autocorpus/autocorpus.py @@ -2,6 +2,7 @@ import json from collections.abc import Iterable +from dataclasses import dataclass, field from pathlib import Path from typing import Any, cast @@ -289,80 +290,23 @@ def merge_tables_with_empty_tables( return documents +@dataclass class Autocorpus: - """Parent class for all Auto-CORPus functionality.""" - - def process_html_article(self): - """Processes the main text file and tables specified in the configuration. - - This method performs the following steps: - 1. Checks if a valid configuration is loaded. If not, raises a RuntimeError. - 2. Handles the main text file: - - Parses the HTML content of the file. - - Extracts the main text from the parsed HTML. - - Attempts to extract abbreviations from the main text and HTML content. - If an error occurs during this process, it prints the error. - 3. Processes linked tables, if any: - - Parses the HTML content of each linked table file. - 4. Merges table data. - 5. Checks if there are any documents in the tables and sets the `has_tables` - attribute accordingly. - - Raises: - RuntimeError: If no valid configuration is loaded. - """ - soup = load_html_file(self.file_path) - self.main_text = extract_text(soup, self.config) - try: - self.abbreviations = get_abbreviations(self.main_text, soup, self.file_path) - except Exception as e: - logger.error(e) - - if "tables" not in self.config: - return - - self.tables, self.empty_tables = get_table_json( - soup, self.config, self.file_path - ) + """Dataclass for a collection of BioC formatted text, tables and abbreviations.""" - new_documents = [] - for table_file in self.linked_tables: - soup = load_html_file(self.file_path) - tables, empty_tables = get_table_json(soup, self.config, self.file_path) - new_documents.extend(tables.get("documents", [])) - self.empty_tables.extend(empty_tables) - self.tables["documents"] = extend_tables_documents( - self.tables.get("documents", []), new_documents - ) - if self.empty_tables: - merge_tables_with_empty_tables(self.tables["documents"], self.empty_tables) - self.has_tables = bool(self.tables.get("documents")) - - def __init__( - self, - config: dict[str, Any], - file_path: Path, - linked_tables: list[Path] = [], - ): - """Create valid BioC versions of input HTML journal articles based off config. + file_path: Path + main_text: dict[str, Any] + abbreviations: dict[str, Any] + tables: dict[str, Any] = field(default_factory=dict) - Args: - config: Configuration dictionary for the input journal articles - file_path: Path to the article file to be processed - linked_tables: list of linked table file paths to be included in this run - (HTML files only) + @property + def has_tables(self) -> bool: + """Check if the Autocorpus has any tables. + + Returns: + True if there are tables, False otherwise. """ - if config == {}: - raise RuntimeError("A valid config file must be loaded.") - - self.file_path = file_path - self.linked_tables = linked_tables - self.config = config - self.main_text = {} - self.empty_tables = [] - self.tables = {} - self.abbreviations = {} - self.has_tables = False + return bool(self.tables.get("documents")) def to_bioc(self) -> dict[str, Any]: """Get the currently loaded bioc as a dict. @@ -448,20 +392,85 @@ def to_dict(self) -> dict[str, Any]: } -def process_file(config: dict[str, Any], file_path: Path) -> Autocorpus: +def process_html_article( + config: dict[str, Any], file_path: Path, linked_tables: list[Path] = [] +) -> Autocorpus: + """Create valid BioC versions of input HTML journal articles based off config. + + Processes the main text file and tables specified in the configuration. + + This method performs the following steps: + 1. Checks if a valid configuration is loaded. If not, raises a RuntimeError. + 2. Handles the main text file: + - Parses the HTML content of the file. + - Extracts the main text from the parsed HTML. + - Attempts to extract abbreviations from the main text and HTML content. + If an error occurs during this process, it prints the error. + 3. Processes linked tables, if any: + - Parses the HTML content of each linked table file. + 4. Merges table data. + 5. Checks if there are any documents in the tables and sets the `has_tables` + attribute accordingly. + + Args: + config: Configuration dictionary for the input journal articles + file_path: Path to the article file to be processed + linked_tables: list of linked table file paths to be included in this run + (HTML files only) + + Raises: + RuntimeError: If no valid configuration is loaded. + """ + if config == {}: + raise RuntimeError("A valid config file must be loaded.") + + soup = load_html_file(file_path) + main_text = extract_text(soup, config) + try: + abbreviations = get_abbreviations(main_text, soup, file_path) + except Exception as e: + logger.error(e) + + if "tables" not in config: + return Autocorpus(file_path, main_text, abbreviations) + + tables, empty_tables = get_table_json(soup, config, file_path) + + new_documents = [] + for table_file in linked_tables: + soup = load_html_file(file_path) + tables, empty_tables = get_table_json(soup, config, file_path) + new_documents.extend(tables.get("documents", [])) + empty_tables.extend(empty_tables) + tables["documents"] = extend_tables_documents( + tables.get("documents", []), new_documents + ) + if empty_tables: + merge_tables_with_empty_tables(tables["documents"], empty_tables) + + return Autocorpus(file_path, main_text, abbreviations, tables) + + +def process_file( + config: dict[str, Any], file_path: Path, linked_tables: list[Path] = [] +) -> Autocorpus: """Process the input file based on its type. This method checks the file type and processes the file accordingly. + Args: + config: Configuration dictionary for the input journal articles + file_path: Path to the article file to be processed + linked_tables: list of linked table file paths to be included in this run + (HTML files only) + Raises: NotImplementedError: For files types with no implemented processing. ModuleNotFoundError: For PDF processing if required packages are not found. """ - ac = Autocorpus(config, file_path) - match check_file_type(file_path): case FileType.HTML: - ac.process_html_article() + return process_html_article(config, file_path, linked_tables) case FileType.XML: raise NotImplementedError( f"Could not process file {file_path}: " @@ -473,11 +482,13 @@ def process_file(config: dict[str, Any], file_path: Path) -> Autocorpus: from .ac_bioc.json import BioCJSONEncoder from .pdf import extract_pdf_content - text, tables = extract_pdf_content(file_path) + text, tbls = extract_pdf_content(file_path) - # TODO: Use text.to_dict() after bugfix in ac_bioc - ac.main_text = BioCJSONEncoder().default(text) - ac.tables = BioCTableJSONEncoder().default(tables) + # TODO: Use text.to_dict() after bugfix in ac_bioc (Issue #272) + main_text = BioCJSONEncoder().default(text) + tables = BioCTableJSONEncoder().default(tbls) + + return Autocorpus(file_path, main_text, dict(), tables) except ModuleNotFoundError: logger.error( @@ -489,8 +500,6 @@ def process_file(config: dict[str, Any], file_path: Path) -> Autocorpus: case FileType.OTHER: raise NotImplementedError(f"Could not identify file type for {file_path}") - return ac - def process_directory(config: dict[str, Any], dir_path: Path) -> Iterable[Autocorpus]: """Process all files in a directory and its subdirectories. @@ -504,12 +513,12 @@ def process_directory(config: dict[str, Any], dir_path: Path) -> Iterable[Autoco """ for file_path in dir_path.iterdir(): if file_path.is_file(): - yield Autocorpus(config, file_path) + yield process_file(config, file_path) elif file_path.is_dir(): # recursively process all files in the subdirectory for sub_file_path in file_path.rglob("*"): - yield Autocorpus(config, sub_file_path) + yield process_file(config, sub_file_path) def process_files(config: dict[str, Any], files: list[Path]) -> Iterable[Autocorpus]: @@ -529,4 +538,4 @@ def process_files(config: dict[str, Any], files: list[Path]) -> Iterable[Autocor raise RuntimeError("All files must be valid file paths.") for file_path in files: - yield Autocorpus(config, file_path) + yield process_file(config, file_path) diff --git a/autocorpus/run.py b/autocorpus/run.py index 011f4433..606bdf5e 100644 --- a/autocorpus/run.py +++ b/autocorpus/run.py @@ -2,7 +2,7 @@ from pathlib import Path -from .autocorpus import Autocorpus +from .autocorpus import process_file def run_autocorpus(config, structure, key, output_format): @@ -14,12 +14,11 @@ def run_autocorpus(config, structure, key, output_format): key: The key in the structure dict for the current file. output_format: The output format to use (JSON or XML). """ - ac = Autocorpus( + ac = process_file( config=config, file_path=Path(structure[key]["main_text"]), linked_tables=sorted(Path(lt) for lt in structure[key]["linked_tables"]), ) - ac.process_html_article() out_dir = Path(structure[key]["out_dir"]) if structure[key]["main_text"]: From 562dfa53b96b1960f45678b20fb1cc9c614a2ca5 Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Thu, 29 May 2025 22:34:04 +0100 Subject: [PATCH 088/125] Move functions into new html and file_processins modules --- autocorpus/autocorpus.py | 434 +--------------------------------- autocorpus/file_processing.py | 102 ++++++++ autocorpus/html.py | 349 +++++++++++++++++++++++++++ autocorpus/py.typed | 0 autocorpus/run.py | 2 +- tests/test_regression.py | 5 +- 6 files changed, 455 insertions(+), 437 deletions(-) create mode 100644 autocorpus/file_processing.py create mode 100644 autocorpus/html.py create mode 100644 autocorpus/py.typed diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py index cac1bc3c..40f8c121 100644 --- a/autocorpus/autocorpus.py +++ b/autocorpus/autocorpus.py @@ -1,293 +1,12 @@ -"""Auto-CORPus primary functions are defined in this module.""" +"""The Auto-CORPus primary dataclass is defined in this module.""" import json -from collections.abc import Iterable from dataclasses import dataclass, field from pathlib import Path -from typing import Any, cast +from typing import Any -from bs4 import BeautifulSoup, Tag - -from . import logger -from .abbreviation import get_abbreviations from .ac_bioc import BioCJSON, BioCXML from .bioc_formatter import get_formatted_bioc_collection -from .data_structures import Paragraph -from .file_type import FileType, check_file_type -from .section import get_section -from .table import get_table_json -from .utils import handle_not_tables - - -def load_html_file(fpath: Path) -> BeautifulSoup: - """Convert the input file into a BeautifulSoup object. - - Args: - fpath: Path to the input file. - - Returns: - BeautifulSoup object of the input file. - """ - with fpath.open(encoding="utf-8") as fp: - soup = BeautifulSoup(fp.read(), "html.parser") - for e in soup.find_all(attrs={"style": ["display:none", "visibility:hidden"]}): - e.extract() - return soup - - -def get_keywords( - soup: BeautifulSoup, keywords_config: dict[str, Any] -) -> Paragraph | None: - """Extract keywords from the soup object based on the provided configuration. - - Args: - soup: BeautifulSoup object of the HTML file. - keywords_config: AC config rules for keywords. - - Returns: - dict: Extracted keywords as a dictionary. - """ - responses = handle_not_tables(keywords_config, soup) - if not responses: - return None - - return Paragraph( - section_heading="keywords", - subsection_heading="", - body=" ".join( - x["node"].get_text() for x in responses if isinstance(x["node"], Tag) - ), - section_type=[{"iao_name": "keywords section", "iao_id": "IAO:0000630"}], - ) - - -def get_title(soup: BeautifulSoup, title_config: dict[str, Any]) -> str: - """Extract the title from the soup object based on the provided configuration. - - Args: - soup: BeautifulSoup object of the HTML file. - title_config: AC config rules for the title. - - Returns: - Extracted title as a string. - """ - titles = handle_not_tables(title_config, soup) - if not titles: - return "" - - node = cast(Tag, titles[0]["node"]) - - return node.get_text() - - -def get_sections( - soup: BeautifulSoup, sections_config: dict[str, Any] -) -> list[dict[str, Tag | list[str]]]: - """Extract sections from the soup object based on the provided configuration. - - Args: - soup: Beautiful Soup object of the HTML file. - sections_config: AC config rules for sections. - - Returns: - A list of matches for the provided config rules. Either as a Tag or a list of - strings. - """ - return handle_not_tables(sections_config, soup) - - -def set_unknown_section_headings(unique_text: list[Paragraph]) -> list[Paragraph]: - """Set the heading for sections that are not specified in the config. - - Args: - unique_text: List of unique text sections. - - Returns: - A list of unique text sections with unknown headings set to "document part". - """ - paper = {} - for para in unique_text: - if para.section_heading != "keywords": - paper[para.section_heading] = [x["iao_name"] for x in para.section_type] - - for text in unique_text: - if not text.section_heading: - text.section_heading = "document part" - text.section_type = [{"iao_name": "document part", "iao_id": "IAO:0000314"}] - - return unique_text - - -def extract_text(soup: BeautifulSoup, config: dict[str, Any]) -> dict[str, Any]: - """Extract the main text of the article from the soup object. - - This converts a BeautifulSoup object of a html article into a Python dict that - aligns with the BioC format defined in the provided config. - - Args: - soup: BeautifulSoup object of html - config: AC config rules - - Return: - dict of the maintext - """ - result: dict[str, Any] = {} - - # Extract tags of text body and hard-code as: - # p (main text) and span (keywords and refs) - result["title"] = get_title(soup, config["title"]) if "title" in config else "" - maintext = [] - if "keywords" in config and (keywords := get_keywords(soup, config["keywords"])): - maintext.append(keywords) - sections = get_sections(soup, config["sections"]) if "sections" in config else [] - for sec in sections: - maintext.extend(get_section(config, sec)) - - # filter out the sections which do not contain any info - filtered_text = [x for x in maintext if x] - unique_text = [] - seen_text = [] - for text in filtered_text: - if text.body not in seen_text: - seen_text.append(text.body) - unique_text.append(text) - - result["paragraphs"] = [ - p.as_dict() for p in set_unknown_section_headings(unique_text) - ] - - return result - - -def extend_tables_documents( - documents: list[dict[str, Any]], new_documents: list[dict[str, Any]] -) -> list[dict[str, Any]]: - """Extends the list of tables documents with new documents, ensuring unique IDs. - - Args: - documents: The original list of documents to be extended. - new_documents: New list of documents to add. - - Returns: - A list of documents with unique IDs, combining the original and new documents. - """ - seen_ids = set() - for doc in documents: - seen_ids.add(doc["id"].partition(".")[0]) - - for doc in new_documents: - tabl_id, _, tabl_pos = doc["id"].partition(".") - if tabl_id in seen_ids: - tabl_id = str(len(seen_ids) + 1) - if tabl_pos: - doc["id"] = f"{tabl_id}.{tabl_pos}" - else: - doc["id"] = tabl_id - seen_ids.add(tabl_id) - - documents.extend(new_documents) - - return documents - - -def merge_tables_with_empty_tables( - documents: list[dict[str, Any]], empty_tables: list[dict[str, Any]] -) -> list[dict[str, Any]]: - """Extends the list of tables documents with empty tables, ensuring titles are set. - - Args: - documents: The original list of documents to be extended. - empty_tables: A list of empty tables to merge with the documents. - - Returns: - A list of documents with titles and captions from empty tables merged in. - """ - seen_ids = {} - for i, table in enumerate(documents): - if "id" in table: - seen_ids[str(i)] = f"Table {table['id']}." - - for table in empty_tables: - for seenID in seen_ids.keys(): - if not table["title"].startswith(seen_ids[seenID]): - continue - - if "title" in table and not table["title"] == "": - set_new = False - for passage in documents[int(seenID)]["passages"]: - if ( - passage["infons"]["section_type"][0]["section_name"] - == "table_title" - ): - passage["text"] = table["title"] - set_new = True - if not set_new: - documents[int(seenID)]["passages"].append( - { - "offset": 0, - "infons": { - "section_type": [ - { - "section_name": "table_title", - "iao_name": "document title", - "iao_id": "IAO:0000305", - } - ] - }, - "text": table["title"], - } - ) - if "caption" in table and not table["caption"] == "": - set_new = False - for passage in documents[int(seenID)]["passages"]: - if ( - passage["infons"]["section_type"][0]["section_name"] - == "table_caption" - ): - passage["text"] = table["caption"] - set_new = True - if not set_new: - documents[int(seenID)]["passages"].append( - { - "offset": 0, - "infons": { - "section_type": [ - { - "section_name": "table_caption", - "iao_name": "caption", - "iao_id": "IAO:0000304", - } - ] - }, - "text": table["caption"], - } - ) - if "footer" in table and not table["footer"] == "": - set_new = False - for passage in documents[int(seenID)]["passages"]: - if ( - passage["infons"]["section_type"][0]["section_name"] - == "table_footer" - ): - passage["text"] = table["footer"] - set_new = True - if not set_new: - documents[int(seenID)]["passages"].append( - { - "offset": 0, - "infons": { - "section_type": [ - { - "section_name": "table_footer", - "iao_name": "caption", - "iao_id": "IAO:0000304", - } - ] - }, - "text": table["footer"], - } - ) - return documents @dataclass @@ -390,152 +109,3 @@ def to_dict(self) -> dict[str, Any]: "abbreviations": self.abbreviations, "tables": self.tables, } - - -def process_html_article( - config: dict[str, Any], file_path: Path, linked_tables: list[Path] = [] -) -> Autocorpus: - """Create valid BioC versions of input HTML journal articles based off config. - - Processes the main text file and tables specified in the configuration. - - This method performs the following steps: - 1. Checks if a valid configuration is loaded. If not, raises a RuntimeError. - 2. Handles the main text file: - - Parses the HTML content of the file. - - Extracts the main text from the parsed HTML. - - Attempts to extract abbreviations from the main text and HTML content. - If an error occurs during this process, it prints the error. - 3. Processes linked tables, if any: - - Parses the HTML content of each linked table file. - 4. Merges table data. - 5. Checks if there are any documents in the tables and sets the `has_tables` - attribute accordingly. - - Args: - config: Configuration dictionary for the input journal articles - file_path: Path to the article file to be processed - linked_tables: list of linked table file paths to be included in this run - (HTML files only) - - Raises: - RuntimeError: If no valid configuration is loaded. - """ - if config == {}: - raise RuntimeError("A valid config file must be loaded.") - - soup = load_html_file(file_path) - main_text = extract_text(soup, config) - try: - abbreviations = get_abbreviations(main_text, soup, file_path) - except Exception as e: - logger.error(e) - - if "tables" not in config: - return Autocorpus(file_path, main_text, abbreviations) - - tables, empty_tables = get_table_json(soup, config, file_path) - - new_documents = [] - for table_file in linked_tables: - soup = load_html_file(file_path) - tables, empty_tables = get_table_json(soup, config, file_path) - new_documents.extend(tables.get("documents", [])) - empty_tables.extend(empty_tables) - tables["documents"] = extend_tables_documents( - tables.get("documents", []), new_documents - ) - if empty_tables: - merge_tables_with_empty_tables(tables["documents"], empty_tables) - - return Autocorpus(file_path, main_text, abbreviations, tables) - - -def process_file( - config: dict[str, Any], file_path: Path, linked_tables: list[Path] = [] -) -> Autocorpus: - """Process the input file based on its type. - - This method checks the file type and processes the file accordingly. - - Args: - config: Configuration dictionary for the input journal articles - file_path: Path to the article file to be processed - linked_tables: list of linked table file paths to be included in this run - (HTML files only) - - Raises: - NotImplementedError: For files types with no implemented processing. - ModuleNotFoundError: For PDF processing if required packages are not found. - """ - match check_file_type(file_path): - case FileType.HTML: - return process_html_article(config, file_path, linked_tables) - case FileType.XML: - raise NotImplementedError( - f"Could not process file {file_path}: " - "XML processing is not implemented yet." - ) - case FileType.PDF: - try: - from .ac_bioc.bioctable.json import BioCTableJSONEncoder - from .ac_bioc.json import BioCJSONEncoder - from .pdf import extract_pdf_content - - text, tbls = extract_pdf_content(file_path) - - # TODO: Use text.to_dict() after bugfix in ac_bioc (Issue #272) - main_text = BioCJSONEncoder().default(text) - tables = BioCTableJSONEncoder().default(tbls) - - return Autocorpus(file_path, main_text, dict(), tables) - - except ModuleNotFoundError: - logger.error( - "Could not load necessary PDF packages. If you installed " - "Auto-CORPUS via pip, you can obtain these with:\n" - " pip install autocorpus[pdf]" - ) - raise - case FileType.OTHER: - raise NotImplementedError(f"Could not identify file type for {file_path}") - - -def process_directory(config: dict[str, Any], dir_path: Path) -> Iterable[Autocorpus]: - """Process all files in a directory and its subdirectories. - - Args: - config: Configuration dictionary for the input HTML journal articles - dir_path: Path to the directory containing files to be processed. - - Returns: - A generator yielding Autocorpus objects for each processed file. - """ - for file_path in dir_path.iterdir(): - if file_path.is_file(): - yield process_file(config, file_path) - - elif file_path.is_dir(): - # recursively process all files in the subdirectory - for sub_file_path in file_path.rglob("*"): - yield process_file(config, sub_file_path) - - -def process_files(config: dict[str, Any], files: list[Path]) -> Iterable[Autocorpus]: - """Process all files in a list. - - Args: - config: Configuration dictionary for the input HTML journal articles - files: list of Paths to the files to be processed. - - Returns: - A generator yielding Autocorpus objects for each processed file. - - Raises: - RuntimeError: If the list of files is invalid. - """ - if not all(file.is_file() for file in files): - raise RuntimeError("All files must be valid file paths.") - - for file_path in files: - yield process_file(config, file_path) diff --git a/autocorpus/file_processing.py b/autocorpus/file_processing.py new file mode 100644 index 00000000..d8136939 --- /dev/null +++ b/autocorpus/file_processing.py @@ -0,0 +1,102 @@ +"""Module providing functions for processing files with Auto-CORPus.""" + +from collections.abc import Iterable +from pathlib import Path +from typing import Any + +from . import logger +from .autocorpus import Autocorpus +from .file_type import FileType, check_file_type +from .html import process_html_article + + +def process_file( + config: dict[str, Any], file_path: Path, linked_tables: list[Path] = [] +) -> Autocorpus: + """Process the input file based on its type. + + This method checks the file type and processes the file accordingly. + + Args: + config: Configuration dictionary for the input journal articles + file_path: Path to the article file to be processed + linked_tables: list of linked table file paths to be included in this run + (HTML files only) + + Raises: + NotImplementedError: For files types with no implemented processing. + ModuleNotFoundError: For PDF processing if required packages are not found. + """ + match check_file_type(file_path): + case FileType.HTML: + return Autocorpus( + file_path, *process_html_article(config, file_path, linked_tables) + ) + case FileType.XML: + raise NotImplementedError( + f"Could not process file {file_path}: " + "XML processing is not implemented yet." + ) + case FileType.PDF: + try: + from .ac_bioc.bioctable.json import BioCTableJSONEncoder + from .ac_bioc.json import BioCJSONEncoder + from .pdf import extract_pdf_content + + text, tbls = extract_pdf_content(file_path) + + # TODO: Use text.to_dict() after bugfix in ac_bioc (Issue #272) + main_text = BioCJSONEncoder().default(text) + tables = BioCTableJSONEncoder().default(tbls) + + return Autocorpus(file_path, main_text, dict(), tables) + + except ModuleNotFoundError: + logger.error( + "Could not load necessary PDF packages. If you installed " + "Auto-CORPUS via pip, you can obtain these with:\n" + " pip install autocorpus[pdf]" + ) + raise + case FileType.OTHER: + raise NotImplementedError(f"Could not identify file type for {file_path}") + + +def process_directory(config: dict[str, Any], dir_path: Path) -> Iterable[Autocorpus]: + """Process all files in a directory and its subdirectories. + + Args: + config: Configuration dictionary for the input HTML journal articles + dir_path: Path to the directory containing files to be processed. + + Returns: + A generator yielding Autocorpus objects for each processed file. + """ + for file_path in dir_path.iterdir(): + if file_path.is_file(): + yield process_file(config, file_path) + + elif file_path.is_dir(): + # recursively process all files in the subdirectory + for sub_file_path in file_path.rglob("*"): + yield process_file(config, sub_file_path) + + +def process_files(config: dict[str, Any], files: list[Path]) -> Iterable[Autocorpus]: + """Process all files in a list. + + Args: + config: Configuration dictionary for the input HTML journal articles + files: list of Paths to the files to be processed. + + Returns: + A generator yielding Autocorpus objects for each processed file. + + Raises: + RuntimeError: If the list of files is invalid. + """ + if not all(file.is_file() for file in files): + raise RuntimeError("All files must be valid file paths.") + + for file_path in files: + yield process_file(config, file_path) diff --git a/autocorpus/html.py b/autocorpus/html.py new file mode 100644 index 00000000..31fd8ee5 --- /dev/null +++ b/autocorpus/html.py @@ -0,0 +1,349 @@ +"""The Auto-CORPus HTML processing module.""" + +from pathlib import Path +from typing import Any, cast + +from bs4 import BeautifulSoup, Tag + +from . import logger +from .abbreviation import get_abbreviations +from .data_structures import Paragraph +from .section import get_section +from .table import get_table_json +from .utils import handle_not_tables + + +def load_html_file(fpath: Path) -> BeautifulSoup: + """Convert the input file into a BeautifulSoup object. + + Args: + fpath: Path to the input file. + + Returns: + BeautifulSoup object of the input file. + """ + with fpath.open(encoding="utf-8") as fp: + soup = BeautifulSoup(fp.read(), "html.parser") + for e in soup.find_all(attrs={"style": ["display:none", "visibility:hidden"]}): + e.extract() + return soup + + +def _get_keywords( + soup: BeautifulSoup, keywords_config: dict[str, Any] +) -> Paragraph | None: + """Extract keywords from the soup object based on the provided configuration. + + Args: + soup: BeautifulSoup object of the HTML file. + keywords_config: AC config rules for keywords. + + Returns: + dict: Extracted keywords as a dictionary. + """ + responses = handle_not_tables(keywords_config, soup) + if not responses: + return None + + return Paragraph( + section_heading="keywords", + subsection_heading="", + body=" ".join( + x["node"].get_text() for x in responses if isinstance(x["node"], Tag) + ), + section_type=[{"iao_name": "keywords section", "iao_id": "IAO:0000630"}], + ) + + +def _get_title(soup: BeautifulSoup, title_config: dict[str, Any]) -> str: + """Extract the title from the soup object based on the provided configuration. + + Args: + soup: BeautifulSoup object of the HTML file. + title_config: AC config rules for the title. + + Returns: + Extracted title as a string. + """ + titles = handle_not_tables(title_config, soup) + if not titles: + return "" + + node = cast(Tag, titles[0]["node"]) + + return node.get_text() + + +def _get_sections( + soup: BeautifulSoup, sections_config: dict[str, Any] +) -> list[dict[str, Tag | list[str]]]: + """Extract sections from the soup object based on the provided configuration. + + Args: + soup: Beautiful Soup object of the HTML file. + sections_config: AC config rules for sections. + + Returns: + A list of matches for the provided config rules. Either as a Tag or a list of + strings. + """ + return handle_not_tables(sections_config, soup) + + +def _set_unknown_section_headings(unique_text: list[Paragraph]) -> list[Paragraph]: + """Set the heading for sections that are not specified in the config. + + Args: + unique_text: List of unique text sections. + + Returns: + A list of unique text sections with unknown headings set to "document part". + """ + paper = {} + for para in unique_text: + if para.section_heading != "keywords": + paper[para.section_heading] = [x["iao_name"] for x in para.section_type] + + for text in unique_text: + if not text.section_heading: + text.section_heading = "document part" + text.section_type = [{"iao_name": "document part", "iao_id": "IAO:0000314"}] + + return unique_text + + +def _extract_text(soup: BeautifulSoup, config: dict[str, Any]) -> dict[str, Any]: + """Extract the main text of the article from the soup object. + + This converts a BeautifulSoup object of a html article into a Python dict that + aligns with the BioC format defined in the provided config. + + Args: + soup: BeautifulSoup object of html + config: AC config rules + + Return: + dict of the maintext + """ + result: dict[str, Any] = {} + + # Extract tags of text body and hard-code as: + # p (main text) and span (keywords and refs) + result["title"] = _get_title(soup, config["title"]) if "title" in config else "" + maintext = [] + if "keywords" in config and (keywords := _get_keywords(soup, config["keywords"])): + maintext.append(keywords) + sections = _get_sections(soup, config["sections"]) if "sections" in config else [] + for sec in sections: + maintext.extend(get_section(config, sec)) + + # filter out the sections which do not contain any info + filtered_text = [x for x in maintext if x] + unique_text = [] + seen_text = [] + for text in filtered_text: + if text.body not in seen_text: + seen_text.append(text.body) + unique_text.append(text) + + result["paragraphs"] = [ + p.as_dict() for p in _set_unknown_section_headings(unique_text) + ] + + return result + + +def _extend_tables_documents( + documents: list[dict[str, Any]], new_documents: list[dict[str, Any]] +) -> list[dict[str, Any]]: + """Extends the list of tables documents with new documents, ensuring unique IDs. + + Args: + documents: The original list of documents to be extended. + new_documents: New list of documents to add. + + Returns: + A list of documents with unique IDs, combining the original and new documents. + """ + seen_ids = set() + for doc in documents: + seen_ids.add(doc["id"].partition(".")[0]) + + for doc in new_documents: + tabl_id, _, tabl_pos = doc["id"].partition(".") + if tabl_id in seen_ids: + tabl_id = str(len(seen_ids) + 1) + if tabl_pos: + doc["id"] = f"{tabl_id}.{tabl_pos}" + else: + doc["id"] = tabl_id + seen_ids.add(tabl_id) + + documents.extend(new_documents) + + return documents + + +def _merge_tables_with_empty_tables( + documents: list[dict[str, Any]], empty_tables: list[dict[str, Any]] +) -> list[dict[str, Any]]: + """Extends the list of tables documents with empty tables, ensuring titles are set. + + Args: + documents: The original list of documents to be extended. + empty_tables: A list of empty tables to merge with the documents. + + Returns: + A list of documents with titles and captions from empty tables merged in. + """ + seen_ids = {} + for i, table in enumerate(documents): + if "id" in table: + seen_ids[str(i)] = f"Table {table['id']}." + + for table in empty_tables: + for seenID in seen_ids.keys(): + if not table["title"].startswith(seen_ids[seenID]): + continue + + if "title" in table and not table["title"] == "": + set_new = False + for passage in documents[int(seenID)]["passages"]: + if ( + passage["infons"]["section_type"][0]["section_name"] + == "table_title" + ): + passage["text"] = table["title"] + set_new = True + if not set_new: + documents[int(seenID)]["passages"].append( + { + "offset": 0, + "infons": { + "section_type": [ + { + "section_name": "table_title", + "iao_name": "document title", + "iao_id": "IAO:0000305", + } + ] + }, + "text": table["title"], + } + ) + if "caption" in table and not table["caption"] == "": + set_new = False + for passage in documents[int(seenID)]["passages"]: + if ( + passage["infons"]["section_type"][0]["section_name"] + == "table_caption" + ): + passage["text"] = table["caption"] + set_new = True + if not set_new: + documents[int(seenID)]["passages"].append( + { + "offset": 0, + "infons": { + "section_type": [ + { + "section_name": "table_caption", + "iao_name": "caption", + "iao_id": "IAO:0000304", + } + ] + }, + "text": table["caption"], + } + ) + if "footer" in table and not table["footer"] == "": + set_new = False + for passage in documents[int(seenID)]["passages"]: + if ( + passage["infons"]["section_type"][0]["section_name"] + == "table_footer" + ): + passage["text"] = table["footer"] + set_new = True + if not set_new: + documents[int(seenID)]["passages"].append( + { + "offset": 0, + "infons": { + "section_type": [ + { + "section_name": "table_footer", + "iao_name": "caption", + "iao_id": "IAO:0000304", + } + ] + }, + "text": table["footer"], + } + ) + return documents + + +def process_html_article( + config: dict[str, Any], file_path: Path, linked_tables: list[Path] = [] +) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any]]: + """Create valid BioC versions of input HTML journal articles based off config. + + Processes the main text file and tables specified in the configuration. + + This method performs the following steps: + 1. Checks if a valid configuration is loaded. If not, raises a RuntimeError. + 2. Handles the main text file: + - Parses the HTML content of the file. + - Extracts the main text from the parsed HTML. + - Attempts to extract abbreviations from the main text and HTML content. + If an error occurs during this process, it prints the error. + 3. Processes linked tables, if any: + - Parses the HTML content of each linked table file. + 4. Merges table data. + 5. Checks if there are any documents in the tables and sets the `has_tables` + attribute accordingly. + + Args: + config: Configuration dictionary for the input journal articles + file_path: Path to the article file to be processed + linked_tables: list of linked table file paths to be included in this run + (HTML files only) + + Returns: + A tuple containing: + - main_text: Extracted main text as a dictionary. + - abbreviations: Extracted abbreviations as a dictionary. + - tables: Extracted tables as a dictionary (possibly empty). + + Raises: + RuntimeError: If no valid configuration is loaded. + """ + if config == {}: + raise RuntimeError("A valid config file must be loaded.") + + soup = load_html_file(file_path) + main_text = _extract_text(soup, config) + try: + abbreviations = get_abbreviations(main_text, soup, file_path) + except Exception as e: + logger.error(e) + + if "tables" not in config: + return main_text, abbreviations, dict() + + tables, empty_tables = get_table_json(soup, config, file_path) + + new_documents = [] + for table_file in linked_tables: + soup = load_html_file(file_path) + tables, empty_tables = get_table_json(soup, config, file_path) + new_documents.extend(tables.get("documents", [])) + empty_tables.extend(empty_tables) + tables["documents"] = _extend_tables_documents( + tables.get("documents", []), new_documents + ) + if empty_tables: + _merge_tables_with_empty_tables(tables["documents"], empty_tables) + + return main_text, abbreviations, tables diff --git a/autocorpus/py.typed b/autocorpus/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/autocorpus/run.py b/autocorpus/run.py index 606bdf5e..fd44c2e7 100644 --- a/autocorpus/run.py +++ b/autocorpus/run.py @@ -2,7 +2,7 @@ from pathlib import Path -from .autocorpus import process_file +from .file_processing import process_file def run_autocorpus(config, structure, key, output_format): diff --git a/tests/test_regression.py b/tests/test_regression.py index 7d459e9e..ad61eb29 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -7,6 +7,7 @@ import pytest from autocorpus.config import DefaultConfig +from autocorpus.file_processing import process_file @pytest.mark.parametrize( @@ -21,8 +22,6 @@ def test_autocorpus(data_path: Path, input_file: str, config: dict[str, Any]) -> Uses each PMC config on the AutoCORPus Paper. """ - from autocorpus.autocorpus import process_file - pmc_example_path = data_path / input_file with open( str(pmc_example_path).replace(".html", "_abbreviations.json"), encoding="utf-8" @@ -67,8 +66,6 @@ def test_autocorpus(data_path: Path, input_file: str, config: dict[str, Any]) -> ) def test_pdf_to_bioc(data_path: Path, input_file: str, config: dict[str, Any]) -> None: """Test the conversion of a PDF file to a BioC format.""" - from autocorpus.autocorpus import process_file - pdf_path = data_path / input_file expected_output = pdf_path.parent / "Expected Output" / pdf_path.name with open( From f6cebf4a3528365ed67f64b363a090641cb8972f Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Thu, 29 May 2025 22:54:52 +0100 Subject: [PATCH 089/125] Use the table file to extract linked_tables --- autocorpus/html.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/autocorpus/html.py b/autocorpus/html.py index 31fd8ee5..a6e25e16 100644 --- a/autocorpus/html.py +++ b/autocorpus/html.py @@ -336,8 +336,8 @@ def process_html_article( new_documents = [] for table_file in linked_tables: - soup = load_html_file(file_path) - tables, empty_tables = get_table_json(soup, config, file_path) + soup = load_html_file(table_file) + tables, empty_tables = get_table_json(soup, config, table_file) new_documents.extend(tables.get("documents", [])) empty_tables.extend(empty_tables) tables["documents"] = _extend_tables_documents( From 21f5f3136ed1f2fbcd822a257ac49ce3a478498b Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Thu, 29 May 2025 23:07:27 +0100 Subject: [PATCH 090/125] implement some copilot suggestions --- autocorpus/configs/__init__.py | 1 + autocorpus/pdf.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 autocorpus/configs/__init__.py diff --git a/autocorpus/configs/__init__.py b/autocorpus/configs/__init__.py new file mode 100644 index 00000000..a1b11403 --- /dev/null +++ b/autocorpus/configs/__init__.py @@ -0,0 +1 @@ +"""Autocorpus configs package.""" diff --git a/autocorpus/pdf.py b/autocorpus/pdf.py index 870316fb..476c92fb 100644 --- a/autocorpus/pdf.py +++ b/autocorpus/pdf.py @@ -42,7 +42,8 @@ def extract_pdf_content( file_path (Path): Path to the PDF file. Returns: - bool: success status of the extraction process. + A tuple of BioCTextConverter and BioCTableConverter objects containing + the extracted text and tables. Raises: RuntimeError: If the PDF converter is not initialized. From bdd0b782a7b2dd35881602529bea181295835be2 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Fri, 30 May 2025 07:50:05 +0100 Subject: [PATCH 091/125] Update private test data for changes to abbreviation code --- tests/data/private | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data/private b/tests/data/private index ebc5c7cd..f43d4cd4 160000 --- a/tests/data/private +++ b/tests/data/private @@ -1 +1 @@ -Subproject commit ebc5c7cd9d70664aeb75565603075e70f046e9e0 +Subproject commit f43d4cd44ee36b84994963f98a041931548e2212 From 1080dd0424253ee5ca1785b39a945e0467c38b5c Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Fri, 30 May 2025 08:48:28 +0100 Subject: [PATCH 092/125] get_table_json(): Make output deterministic It currently uses a set to filter out duplicate values, but then iterates over the contents. As sets have non-deterministic ordering, this means you get different results on different runs of AC. Fix by replacing with a `dict` (which does have deterministic ordering). --- autocorpus/table.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/autocorpus/table.py b/autocorpus/table.py index 35dc42d9..015b8983 100644 --- a/autocorpus/table.py +++ b/autocorpus/table.py @@ -449,7 +449,9 @@ def get_table_json( first_col_vals = [ i for i in first_col if first_col.index(i) not in header_idx ] - unique_vals = set(i for i in first_col_vals if i not in ("", "None")) + unique_vals = dict.fromkeys( + i for i in first_col_vals if i not in ("", "None") + ) if len(unique_vals) <= len(first_col_vals) / 2: section_names = list(unique_vals) for i in section_names: From 48116439f156fea94f3ad47b92560cfbc36bbb31 Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Thu, 29 May 2025 12:02:47 +0100 Subject: [PATCH 093/125] Add Inputs and Outputs to documentation --- docs/io.md | 128 +++++++++++++++++++++++++++++++++++++++++++++++++++++ mkdocs.yml | 1 + 2 files changed, 129 insertions(+) create mode 100644 docs/io.md diff --git a/docs/io.md b/docs/io.md new file mode 100644 index 00000000..c931092a --- /dev/null +++ b/docs/io.md @@ -0,0 +1,128 @@ +# Inputs and Outputs + +Auto-CORPus can be used directly on one file, or a list of files. However, to use it on +a directory containing multiple files and nested directories, there is an assumed +structure. This is useful for analysing an article along with its associated tables +and supplementary information. + +## Input + +Auto-CORPus processes two categories of biomedical literature: **full text with tables** +and **supplementary information**. + +Full text input files can be in HTML, XML or PDF formats, with an option to process +standalone HTML files which describe a single table. + +Supplementary information files can be in PDF, word processor (doc, docx), spreadsheet +(xlsx, xls, csv, tsv) and presentation (pptx, ppt, pptm, odp) formats. + +Auto-CORPus relies on a prescriptive directory structure and naming convention to +distinguish and group full text, table and supplementary information files. + +The input directory passed to Auto-CORPus should contain article full text files and +optional subdirectories for tables and supplementary information for each article. + +The article full text file is named: + +- `{article_name}.html` or `{article_name}.xml` or `{article_name}.pdf` + +Article table HTML files are placed in a subdirectory named: + +- `{article_name}_tbl` + +The table files in the subdirectory must contain _X at the end of the file name, where X +is the table number. + +- `{any_name_you_want}_X.html` + +Article supplementary information files are placed in a subdirectory named: + +- `{article_name}_si` + +The supplementary information files in the subdirectory can have any name, but must +contain a `.pdf`, `.docx`, `.doc`, `.xlsx`, `.xls`, `.csv`, `.tsv`, `.pptx`, `.ppt`, +`.pptm` or `.odp` file extension. + +## Output + +Auto-COPRus will write output to the “root” location passed to it, replicating the input +directory structure. + + + + + + + + + + + + + + + +
InputOutput
+ +```text +inputs +├── PMC1.html +├── PMC1_tbl +│ ├── results_tbl_1.html +│ └── results_tbl_2.html +└── PMC1_si + └── suppl_methods.docx +``` + + + +```text +outputs +├── PMC1_bioc.json +├── PMC1_abbreviations.json +├── PMC1_tbl +│ └── PMC1_tables.json +└── PMC1_si + └── suppl_methods.docx_bioc.json +``` + +
+ +```text +inputs +├── article1.pdf +└── article1_si + ├── extended_results.xlsx + └── file3454.pdf +``` + + + +```text +outputs +├── article1_bioc.json +├── article1_abbreviations.json +├── article1_tables.json +└── article1_si + ├── extended_results.xlsx_tables.json + └── file3454.pdf_bioc.json +``` + +
+ + +For each full text file, a bioc and abbreviations json file is output to the root. + +If a tables subdirectory was not given for the article, the tables json file is output +to the root. + +If a tables subdirectory was given, the tables json file is output to the subdirectory. +The tables json contains all tables from the separate HTML files and any tables +described within the main text. + +The processed supplementary files are output to the supplementary subdirectory. If +Auto-CORPus detects text in the input file, a bioc file is output with \_bioc.json +appended to the end of the original filename. If Auto-CORPus detects one or more tables +in the input file, a tables json file is output with \_tables.json appended to the end +of the original filename. If both text and tables are detected, then both formats will +be output. diff --git a/mkdocs.yml b/mkdocs.yml index 451b314f..b8e5d897 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -43,6 +43,7 @@ plugins: nav: - Auto-CORPus documentation: index.md + - io.md - How to create/edit a config file: config_tutorial.md - Use of data elements: data_elements.md - Package workflows: package_workflows.md From 3de57ddb08c123c3f75e8f84c3ff2fe3223ed096 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Fri, 30 May 2025 13:32:50 +0100 Subject: [PATCH 094/125] Update private test data for tables fix --- tests/data/private | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data/private b/tests/data/private index f43d4cd4..8e9f45e4 160000 --- a/tests/data/private +++ b/tests/data/private @@ -1 +1 @@ -Subproject commit f43d4cd44ee36b84994963f98a041931548e2212 +Subproject commit 8e9f45e4d6c3f4ebdc1bc612674b1da6d0ff94dd From a38850aae526ef874801e0010a93d42828a5ce04 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Fri, 30 May 2025 13:33:04 +0100 Subject: [PATCH 095/125] Reinclude excluded private test files --- tests/test_regression.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/tests/test_regression.py b/tests/test_regression.py index d3760ca5..a0b0bc49 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -11,18 +11,6 @@ from .conftest import DATA_PATH -_KNOWN_FAILURES = [ - "PMC10790237.html", - "PMC5480070.html", - "PMC8853865.html", - "PMC9477686.html", -] -"""These files are known to fail the regression test, even though they shouldn't. - -The problem is in the `*_tables.json` files. You get different results on different runs -for reasons unknown. -""" - def _get_html_test_data_paths(subfolder: str): """Return paths to HTML test data files with appropriate DefaultConfig.""" @@ -62,9 +50,6 @@ def test_regression_html_private( data_path: Path, input_file: str, config: dict[str, Any] ) -> None: """Regression test for private HTML data.""" - if Path(input_file).name in _KNOWN_FAILURES: - pytest.xfail("Known problematic file") - _run_html_regression_test(data_path, input_file, config) From e3e3b115420efa66524a42f0eafaf480c639caee Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 1 Jun 2025 20:01:02 +0000 Subject: [PATCH 096/125] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_regression.py b/tests/test_regression.py index 0d4bb3ac..59b9e1c4 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -1,8 +1,8 @@ """Primary build test script used for regression testing between AC output versions.""" import json -import shutil import os +import shutil from pathlib import Path from typing import Any From f6b31e2bfd50d703587aff59c25228bbc1d4f9b8 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Sun, 1 Jun 2025 21:03:09 +0100 Subject: [PATCH 097/125] Updated lock file --- poetry.lock | 216 +++++++++++++++++++++++++--------------------------- 1 file changed, 103 insertions(+), 113 deletions(-) diff --git a/poetry.lock b/poetry.lock index 1679e610..11d696bb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "annotated-types" @@ -270,8 +270,6 @@ version = "8.2.1" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.10" - -groups = ["main", "docs"] files = [ {file = "click-8.2.1-py3-none-any.whl", hash = "sha256:61a3265b914e850b85317d0b3109c7f8cd35a670f963866005d6ef1d5175a12b"}, {file = "click-8.2.1.tar.gz", hash = "sha256:27c491cc05d968d271d5a1db13e3b5a184636d9d930f148c50b038f0d0646202"}, @@ -401,9 +399,6 @@ version = "0.8.1" description = "A new flavour of deep learning operations" optional = true python-versions = ">=3.8" - -groups = ["main"] -markers = "extra == \"pdf\"" files = [ {file = "einops-0.8.1-py3-none-any.whl", hash = "sha256:919387eb55330f5757c6bea9165c5ff5cfe63a642682ea788a6d472576d81737"}, {file = "einops-0.8.1.tar.gz", hash = "sha256:de5d960a7a761225532e0f1959e5315ebeafc0cd43394732f103ca44b9837e84"}, @@ -419,7 +414,6 @@ files = [ {file = "exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10"}, {file = "exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88"}, ] -markers = {main = "extra == \"pdf\" and python_version == \"3.10\"", dev = "python_version == \"3.10\""} [package.dependencies] typing-extensions = {version = ">=4.6.0", markers = "python_version < \"3.13\""} @@ -433,7 +427,6 @@ version = "2.1.1" description = "execnet: rapid multi-Python deployment" optional = false python-versions = ">=3.8" -groups = ["dev"] files = [ {file = "execnet-2.1.1-py3-none-any.whl", hash = "sha256:26dee51f1b80cebd6d0ca8e74dd8745419761d3bef34163928cbebbdc4749fdc"}, {file = "execnet-2.1.1.tar.gz", hash = "sha256:5189b52c6121c24feae288166ab41b32549c7e2348652736540b9e6e7d4e72e3"}, @@ -471,13 +464,13 @@ files = [ [[package]] name = "fsspec" -version = "2025.5.0" +version = "2025.5.1" description = "File-system specification" optional = true python-versions = ">=3.9" files = [ - {file = "fsspec-2025.5.0-py3-none-any.whl", hash = "sha256:0ca253eca6b5333d8a2b8bd98c7326fe821f1f0fdbd34e1b445bddde8e804c95"}, - {file = "fsspec-2025.5.0.tar.gz", hash = "sha256:e4f4623bb6221f7407fd695cc535d1f857a077eb247580f4ada34f5dc25fd5c8"}, + {file = "fsspec-2025.5.1-py3-none-any.whl", hash = "sha256:24d3a2e663d5fc735ab256263c4075f374a174c3410c0b25e5bd1970bceaa462"}, + {file = "fsspec-2025.5.1.tar.gz", hash = "sha256:2e55e47a540b91843b755e83ded97c6e897fa0942b11490113f09e9c443c2475"}, ] [package.extras] @@ -584,13 +577,13 @@ urllib3 = ["packaging", "urllib3"] [[package]] name = "google-genai" -version = "1.16.1" +version = "1.18.0" description = "GenAI Python SDK" optional = true python-versions = ">=3.9" files = [ - {file = "google_genai-1.16.1-py3-none-any.whl", hash = "sha256:6ae5d24282244f577ca4f0d95c09f75ab29e556602c9d3531b70161e34cd2a39"}, - {file = "google_genai-1.16.1.tar.gz", hash = "sha256:4b4ed4ed781a9d61e5ce0fef1486dd3a5d7ff0a73bd76b9633d21e687ab998df"}, + {file = "google_genai-1.18.0-py3-none-any.whl", hash = "sha256:3527bb93c8306e725401aca0a0a684610bbf1ef9aa61c2ed3333a695f43dc9af"}, + {file = "google_genai-1.18.0.tar.gz", hash = "sha256:242a02df3248e291f03e37019ce5a1c8a21a14ec245b59668c9f2b4d8965295e"}, ] [package.dependencies] @@ -694,13 +687,13 @@ zstd = ["zstandard (>=0.18.0)"] [[package]] name = "huggingface-hub" -version = "0.32.0" +version = "0.32.3" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" optional = true python-versions = ">=3.8.0" files = [ - {file = "huggingface_hub-0.32.0-py3-none-any.whl", hash = "sha256:e56e94109649ce6ebdb59b4e393ee3543ec0eca2eab4f41b269e1d885c88d08c"}, - {file = "huggingface_hub-0.32.0.tar.gz", hash = "sha256:dd66c9365ea43049ec9b939bdcdb21a0051e1bd70026fc50304e4fb1bb6a15ba"}, + {file = "huggingface_hub-0.32.3-py3-none-any.whl", hash = "sha256:e46f7ea7fe2b5e5f67cc4e37eb201140091946a314d7c2b134a9673dadd80b6a"}, + {file = "huggingface_hub-0.32.3.tar.gz", hash = "sha256:752c889ebf3a63cbd39803f6d87ccc135a463bbcb36abfa2faff0ccbf1cec087"}, ] [package.dependencies] @@ -732,13 +725,13 @@ typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "t [[package]] name = "identify" -version = "2.6.10" +version = "2.6.12" description = "File identification library for Python" optional = false python-versions = ">=3.9" files = [ - {file = "identify-2.6.10-py2.py3-none-any.whl", hash = "sha256:5f34248f54136beed1a7ba6a6b5c4b6cf21ff495aac7c359e1ef831ae3b8ab25"}, - {file = "identify-2.6.10.tar.gz", hash = "sha256:45e92fd704f3da71cc3880036633f48b4b7265fd4de2b57627cb157216eb7eb8"}, + {file = "identify-2.6.12-py2.py3-none-any.whl", hash = "sha256:ad9672d5a72e0d2ff7c5c8809b62dfa60458626352fb0eb7b55e69bdc45334a2"}, + {file = "identify-2.6.12.tar.gz", hash = "sha256:d8de45749f1efb108badef65ee8386f0f7bb19a7f26185f74de6367bffbaf0e6"}, ] [package.extras] @@ -885,13 +878,13 @@ files = [ [[package]] name = "jsonschema" -version = "4.23.0" +version = "4.24.0" description = "An implementation of JSON Schema validation for Python" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "jsonschema-4.23.0-py3-none-any.whl", hash = "sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566"}, - {file = "jsonschema-4.23.0.tar.gz", hash = "sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4"}, + {file = "jsonschema-4.24.0-py3-none-any.whl", hash = "sha256:a462455f19f5faf404a7902952b6f0e3ce868f3ee09a359b05eca6673bd8412d"}, + {file = "jsonschema-4.24.0.tar.gz", hash = "sha256:0b4e8069eb12aedfa881333004bccaec24ecef5a8a6a4b6df142b2cc9599d196"}, ] [package.dependencies] @@ -1555,47 +1548,48 @@ tests = ["pytest (>=4.6)"] [[package]] name = "mypy" -version = "1.15.0" +version = "1.16.0" description = "Optional static typing for Python" optional = false python-versions = ">=3.9" files = [ - {file = "mypy-1.15.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:979e4e1a006511dacf628e36fadfecbcc0160a8af6ca7dad2f5025529e082c13"}, - {file = "mypy-1.15.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c4bb0e1bd29f7d34efcccd71cf733580191e9a264a2202b0239da95984c5b559"}, - {file = "mypy-1.15.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:be68172e9fd9ad8fb876c6389f16d1c1b5f100ffa779f77b1fb2176fcc9ab95b"}, - {file = "mypy-1.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c7be1e46525adfa0d97681432ee9fcd61a3964c2446795714699a998d193f1a3"}, - {file = "mypy-1.15.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:2e2c2e6d3593f6451b18588848e66260ff62ccca522dd231cd4dd59b0160668b"}, - {file = "mypy-1.15.0-cp310-cp310-win_amd64.whl", hash = "sha256:6983aae8b2f653e098edb77f893f7b6aca69f6cffb19b2cc7443f23cce5f4828"}, - {file = "mypy-1.15.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2922d42e16d6de288022e5ca321cd0618b238cfc5570e0263e5ba0a77dbef56f"}, - {file = "mypy-1.15.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2ee2d57e01a7c35de00f4634ba1bbf015185b219e4dc5909e281016df43f5ee5"}, - {file = "mypy-1.15.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:973500e0774b85d9689715feeffcc980193086551110fd678ebe1f4342fb7c5e"}, - {file = "mypy-1.15.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5a95fb17c13e29d2d5195869262f8125dfdb5c134dc8d9a9d0aecf7525b10c2c"}, - {file = "mypy-1.15.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1905f494bfd7d85a23a88c5d97840888a7bd516545fc5aaedff0267e0bb54e2f"}, - {file = "mypy-1.15.0-cp311-cp311-win_amd64.whl", hash = "sha256:c9817fa23833ff189db061e6d2eff49b2f3b6ed9856b4a0a73046e41932d744f"}, - {file = "mypy-1.15.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:aea39e0583d05124836ea645f412e88a5c7d0fd77a6d694b60d9b6b2d9f184fd"}, - {file = "mypy-1.15.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2f2147ab812b75e5b5499b01ade1f4a81489a147c01585cda36019102538615f"}, - {file = "mypy-1.15.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ce436f4c6d218a070048ed6a44c0bbb10cd2cc5e272b29e7845f6a2f57ee4464"}, - {file = "mypy-1.15.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8023ff13985661b50a5928fc7a5ca15f3d1affb41e5f0a9952cb68ef090b31ee"}, - {file = "mypy-1.15.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1124a18bc11a6a62887e3e137f37f53fbae476dc36c185d549d4f837a2a6a14e"}, - {file = "mypy-1.15.0-cp312-cp312-win_amd64.whl", hash = "sha256:171a9ca9a40cd1843abeca0e405bc1940cd9b305eaeea2dda769ba096932bb22"}, - {file = "mypy-1.15.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:93faf3fdb04768d44bf28693293f3904bbb555d076b781ad2530214ee53e3445"}, - {file = "mypy-1.15.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:811aeccadfb730024c5d3e326b2fbe9249bb7413553f15499a4050f7c30e801d"}, - {file = "mypy-1.15.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:98b7b9b9aedb65fe628c62a6dc57f6d5088ef2dfca37903a7d9ee374d03acca5"}, - {file = "mypy-1.15.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c43a7682e24b4f576d93072216bf56eeff70d9140241f9edec0c104d0c515036"}, - {file = "mypy-1.15.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:baefc32840a9f00babd83251560e0ae1573e2f9d1b067719479bfb0e987c6357"}, - {file = "mypy-1.15.0-cp313-cp313-win_amd64.whl", hash = "sha256:b9378e2c00146c44793c98b8d5a61039a048e31f429fb0eb546d93f4b000bedf"}, - {file = "mypy-1.15.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e601a7fa172c2131bff456bb3ee08a88360760d0d2f8cbd7a75a65497e2df078"}, - {file = "mypy-1.15.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:712e962a6357634fef20412699a3655c610110e01cdaa6180acec7fc9f8513ba"}, - {file = "mypy-1.15.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f95579473af29ab73a10bada2f9722856792a36ec5af5399b653aa28360290a5"}, - {file = "mypy-1.15.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8f8722560a14cde92fdb1e31597760dc35f9f5524cce17836c0d22841830fd5b"}, - {file = "mypy-1.15.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1fbb8da62dc352133d7d7ca90ed2fb0e9d42bb1a32724c287d3c76c58cbaa9c2"}, - {file = "mypy-1.15.0-cp39-cp39-win_amd64.whl", hash = "sha256:d10d994b41fb3497719bbf866f227b3489048ea4bbbb5015357db306249f7980"}, - {file = "mypy-1.15.0-py3-none-any.whl", hash = "sha256:5469affef548bd1895d86d3bf10ce2b44e33d86923c29e4d675b3e323437ea3e"}, - {file = "mypy-1.15.0.tar.gz", hash = "sha256:404534629d51d3efea5c800ee7c42b72a6554d6c400e6a79eafe15d11341fd43"}, + {file = "mypy-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7909541fef256527e5ee9c0a7e2aeed78b6cda72ba44298d1334fe7881b05c5c"}, + {file = "mypy-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e71d6f0090c2256c713ed3d52711d01859c82608b5d68d4fa01a3fe30df95571"}, + {file = "mypy-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:936ccfdd749af4766be824268bfe22d1db9eb2f34a3ea1d00ffbe5b5265f5491"}, + {file = "mypy-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4086883a73166631307fdd330c4a9080ce24913d4f4c5ec596c601b3a4bdd777"}, + {file = "mypy-1.16.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:feec38097f71797da0231997e0de3a58108c51845399669ebc532c815f93866b"}, + {file = "mypy-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:09a8da6a0ee9a9770b8ff61b39c0bb07971cda90e7297f4213741b48a0cc8d93"}, + {file = "mypy-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9f826aaa7ff8443bac6a494cf743f591488ea940dd360e7dd330e30dd772a5ab"}, + {file = "mypy-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:82d056e6faa508501af333a6af192c700b33e15865bda49611e3d7d8358ebea2"}, + {file = "mypy-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:089bedc02307c2548eb51f426e085546db1fa7dd87fbb7c9fa561575cf6eb1ff"}, + {file = "mypy-1.16.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6a2322896003ba66bbd1318c10d3afdfe24e78ef12ea10e2acd985e9d684a666"}, + {file = "mypy-1.16.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:021a68568082c5b36e977d54e8f1de978baf401a33884ffcea09bd8e88a98f4c"}, + {file = "mypy-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:54066fed302d83bf5128632d05b4ec68412e1f03ef2c300434057d66866cea4b"}, + {file = "mypy-1.16.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c5436d11e89a3ad16ce8afe752f0f373ae9620841c50883dc96f8b8805620b13"}, + {file = "mypy-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f2622af30bf01d8fc36466231bdd203d120d7a599a6d88fb22bdcb9dbff84090"}, + {file = "mypy-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d045d33c284e10a038f5e29faca055b90eee87da3fc63b8889085744ebabb5a1"}, + {file = "mypy-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b4968f14f44c62e2ec4a038c8797a87315be8df7740dc3ee8d3bfe1c6bf5dba8"}, + {file = "mypy-1.16.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:eb14a4a871bb8efb1e4a50360d4e3c8d6c601e7a31028a2c79f9bb659b63d730"}, + {file = "mypy-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:bd4e1ebe126152a7bbaa4daedd781c90c8f9643c79b9748caa270ad542f12bec"}, + {file = "mypy-1.16.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a9e056237c89f1587a3be1a3a70a06a698d25e2479b9a2f57325ddaaffc3567b"}, + {file = "mypy-1.16.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0b07e107affb9ee6ce1f342c07f51552d126c32cd62955f59a7db94a51ad12c0"}, + {file = "mypy-1.16.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c6fb60cbd85dc65d4d63d37cb5c86f4e3a301ec605f606ae3a9173e5cf34997b"}, + {file = "mypy-1.16.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a7e32297a437cc915599e0578fa6bc68ae6a8dc059c9e009c628e1c47f91495d"}, + {file = "mypy-1.16.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:afe420c9380ccec31e744e8baff0d406c846683681025db3531b32db56962d52"}, + {file = "mypy-1.16.0-cp313-cp313-win_amd64.whl", hash = "sha256:55f9076c6ce55dd3f8cd0c6fff26a008ca8e5131b89d5ba6d86bd3f47e736eeb"}, + {file = "mypy-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f56236114c425620875c7cf71700e3d60004858da856c6fc78998ffe767b73d3"}, + {file = "mypy-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:15486beea80be24ff067d7d0ede673b001d0d684d0095803b3e6e17a886a2a92"}, + {file = "mypy-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f2ed0e0847a80655afa2c121835b848ed101cc7b8d8d6ecc5205aedc732b1436"}, + {file = "mypy-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eb5fbc8063cb4fde7787e4c0406aa63094a34a2daf4673f359a1fb64050e9cb2"}, + {file = "mypy-1.16.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:a5fcfdb7318c6a8dd127b14b1052743b83e97a970f0edb6c913211507a255e20"}, + {file = "mypy-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:2e7e0ad35275e02797323a5aa1be0b14a4d03ffdb2e5f2b0489fa07b89c67b21"}, + {file = "mypy-1.16.0-py3-none-any.whl", hash = "sha256:29e1499864a3888bca5c1542f2d7232c6e586295183320caa95758fc84034031"}, + {file = "mypy-1.16.0.tar.gz", hash = "sha256:84b94283f817e2aa6350a14b4a8fb2a35a53c286f97c9d30f53b63620e7af8ab"}, ] [package.dependencies] mypy_extensions = ">=1.0.0" +pathspec = ">=0.9.0" tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} typing_extensions = ">=4.6.0" @@ -1932,13 +1926,13 @@ files = [ [[package]] name = "openai" -version = "1.82.0" +version = "1.82.1" description = "The official Python library for the openai API" optional = true python-versions = ">=3.8" files = [ - {file = "openai-1.82.0-py3-none-any.whl", hash = "sha256:8c40647fea1816516cb3de5189775b30b5f4812777e40b8768f361f232b61b30"}, - {file = "openai-1.82.0.tar.gz", hash = "sha256:b0a009b9a58662d598d07e91e4219ab4b1e3d8ba2db3f173896a92b9b874d1a7"}, + {file = "openai-1.82.1-py3-none-any.whl", hash = "sha256:334eb5006edf59aa464c9e932b9d137468d810b2659e5daea9b3a8c39d052395"}, + {file = "openai-1.82.1.tar.gz", hash = "sha256:ffc529680018e0417acac85f926f92aa0bbcbc26e82e2621087303c66bc7f95d"}, ] [package.dependencies] @@ -2094,13 +2088,13 @@ xml = ["lxml (>=4.9.2)"] [[package]] name = "pandas-stubs" -version = "2.2.3.250308" +version = "2.2.3.250527" description = "Type annotations for pandas" optional = false python-versions = ">=3.10" files = [ - {file = "pandas_stubs-2.2.3.250308-py3-none-any.whl", hash = "sha256:a377edff3b61f8b268c82499fdbe7c00fdeed13235b8b71d6a1dc347aeddc74d"}, - {file = "pandas_stubs-2.2.3.250308.tar.gz", hash = "sha256:3a6e9daf161f00b85c83772ed3d5cff9522028f07a94817472c07b91f46710fd"}, + {file = "pandas_stubs-2.2.3.250527-py3-none-any.whl", hash = "sha256:cd0a49a95b8c5f944e605be711042a4dd8550e2c559b43d70ba2c4b524b66163"}, + {file = "pandas_stubs-2.2.3.250527.tar.gz", hash = "sha256:e2d694c4e72106055295ad143664e5c99e5815b07190d1ff85b73b13ff019e63"}, ] [package.dependencies] @@ -2287,7 +2281,6 @@ version = "7.0.0" description = "Cross-platform lib for process and system monitoring in Python. NOTE: the syntax of this script MUST be kept compatible with Python 2.7." optional = false python-versions = ">=3.6" -groups = ["dev"] files = [ {file = "psutil-7.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25"}, {file = "psutil-7.0.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:39db632f6bb862eeccf56660871433e111b6ea58f2caea825571951d4b6aa3da"}, @@ -2598,14 +2591,13 @@ dev = ["pre-commit", "pytest-asyncio", "tox"] [[package]] name = "pytest-xdist" -version = "3.6.1" +version = "3.7.0" description = "pytest xdist plugin for distributed testing, most importantly across multiple CPUs" optional = false -python-versions = ">=3.8" -groups = ["dev"] +python-versions = ">=3.9" files = [ - {file = "pytest_xdist-3.6.1-py3-none-any.whl", hash = "sha256:9ed4adfb68a016610848639bb7e02c9352d5d9f03d04809919e2dafc3be4cca7"}, - {file = "pytest_xdist-3.6.1.tar.gz", hash = "sha256:ead156a4db231eec769737f57668ef58a2084a34b2e55c4a8fa20d861107300d"}, + {file = "pytest_xdist-3.7.0-py3-none-any.whl", hash = "sha256:7d3fbd255998265052435eb9daa4e99b62e6fb9cfb6efd1f858d4d8c0c7f0ca0"}, + {file = "pytest_xdist-3.7.0.tar.gz", hash = "sha256:f9248c99a7c15b7d2f90715df93610353a485827bc06eefb6566d23f6400f126"}, ] [package.dependencies] @@ -3150,31 +3142,29 @@ pyasn1 = ">=0.1.3" [[package]] name = "ruff" -version = "0.11.11" +version = "0.11.12" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" - -groups = ["dev"] -files = [ - {file = "ruff-0.11.11-py3-none-linux_armv6l.whl", hash = "sha256:9924e5ae54125ed8958a4f7de320dab7380f6e9fa3195e3dc3b137c6842a0092"}, - {file = "ruff-0.11.11-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:c8a93276393d91e952f790148eb226658dd275cddfde96c6ca304873f11d2ae4"}, - {file = "ruff-0.11.11-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d6e333dbe2e6ae84cdedefa943dfd6434753ad321764fd937eef9d6b62022bcd"}, - {file = "ruff-0.11.11-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7885d9a5e4c77b24e8c88aba8c80be9255fa22ab326019dac2356cff42089fc6"}, - {file = "ruff-0.11.11-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1b5ab797fcc09121ed82e9b12b6f27e34859e4227080a42d090881be888755d4"}, - {file = "ruff-0.11.11-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e231ff3132c1119ece836487a02785f099a43992b95c2f62847d29bace3c75ac"}, - {file = "ruff-0.11.11-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:a97c9babe1d4081037a90289986925726b802d180cca784ac8da2bbbc335f709"}, - {file = "ruff-0.11.11-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d8c4ddcbe8a19f59f57fd814b8b117d4fcea9bee7c0492e6cf5fdc22cfa563c8"}, - {file = "ruff-0.11.11-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6224076c344a7694c6fbbb70d4f2a7b730f6d47d2a9dc1e7f9d9bb583faf390b"}, - {file = "ruff-0.11.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:882821fcdf7ae8db7a951df1903d9cb032bbe838852e5fc3c2b6c3ab54e39875"}, - {file = "ruff-0.11.11-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:dcec2d50756463d9df075a26a85a6affbc1b0148873da3997286caf1ce03cae1"}, - {file = "ruff-0.11.11-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:99c28505ecbaeb6594701a74e395b187ee083ee26478c1a795d35084d53ebd81"}, - {file = "ruff-0.11.11-py3-none-musllinux_1_2_i686.whl", hash = "sha256:9263f9e5aa4ff1dec765e99810f1cc53f0c868c5329b69f13845f699fe74f639"}, - {file = "ruff-0.11.11-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:64ac6f885e3ecb2fdbb71de2701d4e34526651f1e8503af8fb30d4915a3fe345"}, - {file = "ruff-0.11.11-py3-none-win32.whl", hash = "sha256:1adcb9a18802268aaa891ffb67b1c94cd70578f126637118e8099b8e4adcf112"}, - {file = "ruff-0.11.11-py3-none-win_amd64.whl", hash = "sha256:748b4bb245f11e91a04a4ff0f96e386711df0a30412b9fe0c74d5bdc0e4a531f"}, - {file = "ruff-0.11.11-py3-none-win_arm64.whl", hash = "sha256:6c51f136c0364ab1b774767aa8b86331bd8e9d414e2d107db7a2189f35ea1f7b"}, - {file = "ruff-0.11.11.tar.gz", hash = "sha256:7774173cc7c1980e6bf67569ebb7085989a78a103922fb83ef3dfe230cd0687d"}, +files = [ + {file = "ruff-0.11.12-py3-none-linux_armv6l.whl", hash = "sha256:c7680aa2f0d4c4f43353d1e72123955c7a2159b8646cd43402de6d4a3a25d7cc"}, + {file = "ruff-0.11.12-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:2cad64843da9f134565c20bcc430642de897b8ea02e2e79e6e02a76b8dcad7c3"}, + {file = "ruff-0.11.12-py3-none-macosx_11_0_arm64.whl", hash = "sha256:9b6886b524a1c659cee1758140138455d3c029783d1b9e643f3624a5ee0cb0aa"}, + {file = "ruff-0.11.12-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3cc3a3690aad6e86c1958d3ec3c38c4594b6ecec75c1f531e84160bd827b2012"}, + {file = "ruff-0.11.12-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f97fdbc2549f456c65b3b0048560d44ddd540db1f27c778a938371424b49fe4a"}, + {file = "ruff-0.11.12-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:74adf84960236961090e2d1348c1a67d940fd12e811a33fb3d107df61eef8fc7"}, + {file = "ruff-0.11.12-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:b56697e5b8bcf1d61293ccfe63873aba08fdbcbbba839fc046ec5926bdb25a3a"}, + {file = "ruff-0.11.12-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4d47afa45e7b0eaf5e5969c6b39cbd108be83910b5c74626247e366fd7a36a13"}, + {file = "ruff-0.11.12-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:692bf9603fe1bf949de8b09a2da896f05c01ed7a187f4a386cdba6760e7f61be"}, + {file = "ruff-0.11.12-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:08033320e979df3b20dba567c62f69c45e01df708b0f9c83912d7abd3e0801cd"}, + {file = "ruff-0.11.12-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:929b7706584f5bfd61d67d5070f399057d07c70585fa8c4491d78ada452d3bef"}, + {file = "ruff-0.11.12-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:7de4a73205dc5756b8e09ee3ed67c38312dce1aa28972b93150f5751199981b5"}, + {file = "ruff-0.11.12-py3-none-musllinux_1_2_i686.whl", hash = "sha256:2635c2a90ac1b8ca9e93b70af59dfd1dd2026a40e2d6eebaa3efb0465dd9cf02"}, + {file = "ruff-0.11.12-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:d05d6a78a89166f03f03a198ecc9d18779076ad0eec476819467acb401028c0c"}, + {file = "ruff-0.11.12-py3-none-win32.whl", hash = "sha256:f5a07f49767c4be4772d161bfc049c1f242db0cfe1bd976e0f0886732a4765d6"}, + {file = "ruff-0.11.12-py3-none-win_amd64.whl", hash = "sha256:5a4d9f8030d8c3a45df201d7fb3ed38d0219bccd7955268e863ee4a115fa0832"}, + {file = "ruff-0.11.12-py3-none-win_arm64.whl", hash = "sha256:65194e37853158d368e333ba282217941029a28ea90913c67e558c611d04daa5"}, + {file = "ruff-0.11.12.tar.gz", hash = "sha256:43cf7f69c7d7c7d7513b9d59c5d8cafd704e05944f978614aa9faff6ac202603"}, ] [[package]] @@ -3333,13 +3323,13 @@ test = ["Cython", "array-api-strict (>=2.0,<2.1.1)", "asv", "gmpy2", "hypothesis [[package]] name = "setuptools" -version = "80.8.0" +version = "80.9.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = true python-versions = ">=3.9" files = [ - {file = "setuptools-80.8.0-py3-none-any.whl", hash = "sha256:95a60484590d24103af13b686121328cc2736bee85de8936383111e421b9edc0"}, - {file = "setuptools-80.8.0.tar.gz", hash = "sha256:49f7af965996f26d43c8ae34539c8d99c5042fbff34302ea151eaa9c207cd257"}, + {file = "setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922"}, + {file = "setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c"}, ] [package.extras] @@ -3386,13 +3376,13 @@ files = [ [[package]] name = "surya-ocr" -version = "0.14.2" +version = "0.14.3" description = "OCR, layout, reading order, and table recognition in 90+ languages" optional = true python-versions = "<4.0,>=3.10" files = [ - {file = "surya_ocr-0.14.2-py3-none-any.whl", hash = "sha256:0c402705c860f8bf98fc2bf2a3b49d7f0e16fba587aed6d3f01bb53bb776d283"}, - {file = "surya_ocr-0.14.2.tar.gz", hash = "sha256:852af681073167beba9a638658c70b81318f1a8f3d558db68dead1b2c391e862"}, + {file = "surya_ocr-0.14.3-py3-none-any.whl", hash = "sha256:2055e84a839b95536b6d957b5ecf5680ee46ecfa201f504f14b92a09fea41247"}, + {file = "surya_ocr-0.14.3.tar.gz", hash = "sha256:ec7e48bd9f2167913b329e46d27cf15cd4497554b5a30f3ada249b819482a4c6"}, ] [package.dependencies] @@ -3595,13 +3585,13 @@ telegram = ["requests"] [[package]] name = "transformers" -version = "4.52.3" +version = "4.52.4" description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" optional = true python-versions = ">=3.9.0" files = [ - {file = "transformers-4.52.3-py3-none-any.whl", hash = "sha256:cd04059da50e7cf2a617ce3143ba8beffbf119f8c25a0717c3454fd9d0f19609"}, - {file = "transformers-4.52.3.tar.gz", hash = "sha256:2e1de29374f27920aaf6d589d4e6339f33def2fb08809e1a1d792e040e9fbce7"}, + {file = "transformers-4.52.4-py3-none-any.whl", hash = "sha256:203f5c19416d5877e36e88633943761719538a25d9775977a24fe77a1e5adfc7"}, + {file = "transformers-4.52.4.tar.gz", hash = "sha256:aff3764441c1adc192a08dba49740d3cbbcb72d850586075aed6bd89b98203e6"}, ] [package.dependencies] @@ -3618,22 +3608,22 @@ tqdm = ">=4.27" [package.extras] accelerate = ["accelerate (>=0.26.0)"] -all = ["Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "accelerate (>=0.26.0)", "av", "codecarbon (>=2.8.1)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "kernels (>=0.4.4,<0.5)", "librosa", "num2words", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm (<=1.0.11)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1,<2.7)", "torchaudio", "torchvision"] +all = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "av", "codecarbon (>=2.8.1)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "kernels (>=0.4.4,<0.5)", "librosa", "num2words", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm (<=1.0.11)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1,<2.7)", "torchaudio", "torchvision"] audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] benchmark = ["optimum-benchmark (>=0.3.0)"] codecarbon = ["codecarbon (>=2.8.1)"] deepspeed = ["accelerate (>=0.26.0)", "deepspeed (>=0.9.3)"] -deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.26.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk (<=3.8.1)", "optuna", "parameterized", "protobuf", "psutil", "pydantic", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"] -dev = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "accelerate (>=0.26.0)", "av", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "kernels (>=0.4.4,<0.5)", "libcst", "librosa", "nltk (<=3.8.1)", "num2words", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm (<=1.0.11)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1,<2.7)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)"] -dev-tensorflow = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "libcst", "librosa", "nltk (<=3.8.1)", "onnxconverter-common", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "tf2onnx", "timeout-decorator", "tokenizers (>=0.21,<0.22)", "urllib3 (<2.0.0)"] -dev-torch = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "kernels (>=0.4.4,<0.5)", "libcst", "librosa", "nltk (<=3.8.1)", "num2words", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (<=1.0.11)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1,<2.7)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)"] +deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.26.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk (<=3.8.1)", "optuna", "parameterized", "protobuf", "psutil", "pydantic", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"] +dev = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "av", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "kernels (>=0.4.4,<0.5)", "libcst", "librosa", "nltk (<=3.8.1)", "num2words", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm (<=1.0.11)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1,<2.7)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] +dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "libcst", "librosa", "nltk (<=3.8.1)", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.21,<0.22)", "urllib3 (<2.0.0)"] +dev-torch = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "kernels (>=0.4.4,<0.5)", "libcst", "librosa", "nltk (<=3.8.1)", "num2words", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (<=1.0.11)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1,<2.7)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)", "scipy (<1.13.0)"] flax-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] ftfy = ["ftfy"] -hf-xet = ["hf_xet"] +hf-xet = ["hf-xet"] hub-kernels = ["kernels (>=0.4.4,<0.5)"] integrations = ["kernels (>=0.4.4,<0.5)", "optuna", "ray[tune] (>=2.7.0)", "sigopt"] -ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0,<1.3.1)", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)"] +ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0,<1.3.1)", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)"] modelcreation = ["cookiecutter (==1.7.3)"] natten = ["natten (>=0.14.6,<0.15.0)"] num2words = ["num2words"] @@ -3650,7 +3640,7 @@ serving = ["fastapi", "pydantic", "starlette", "uvicorn"] sigopt = ["sigopt"] sklearn = ["scikit-learn"] speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] -testing = ["GitPython (<3.1.19)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk (<=3.8.1)", "parameterized", "psutil", "pydantic", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"] +testing = ["GitPython (<3.1.19)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk (<=3.8.1)", "parameterized", "psutil", "pydantic", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"] tf = ["keras-nlp (>=0.3.1,<0.14.0)", "onnxconverter-common", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx"] tf-cpu = ["keras (>2.9,<2.16)", "keras-nlp (>=0.3.1,<0.14.0)", "onnxconverter-common", "tensorflow-cpu (>2.9,<2.16)", "tensorflow-probability (<0.24)", "tensorflow-text (<2.16)", "tf2onnx"] tf-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] @@ -3660,7 +3650,7 @@ tokenizers = ["tokenizers (>=0.21,<0.22)"] torch = ["accelerate (>=0.26.0)", "torch (>=2.1,<2.7)"] torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] torch-vision = ["Pillow (>=10.0.1,<=15.0)", "torchvision"] -torchhub = ["filelock", "huggingface-hub (>=0.30.0,<1.0)", "importlib_metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1,<2.7)", "tqdm (>=4.27)"] +torchhub = ["filelock", "huggingface-hub (>=0.30.0,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1,<2.7)", "tqdm (>=4.27)"] video = ["av"] vision = ["Pillow (>=10.0.1,<=15.0)"] @@ -3714,13 +3704,13 @@ files = [ [[package]] name = "types-jsonschema" -version = "4.23.0.20250516" +version = "4.24.0.20250528" description = "Typing stubs for jsonschema" optional = false python-versions = ">=3.9" files = [ - {file = "types_jsonschema-4.23.0.20250516-py3-none-any.whl", hash = "sha256:e7d0dd7db7e59e63c26e3230e26ffc64c4704cc5170dc21270b366a35ead1618"}, - {file = "types_jsonschema-4.23.0.20250516.tar.gz", hash = "sha256:9ace09d9d35c4390a7251ccd7d833b92ccc189d24d1b347f26212afce361117e"}, + {file = "types_jsonschema-4.24.0.20250528-py3-none-any.whl", hash = "sha256:6a906b5ff73ac11c8d1e0b6c30a9693e1e4e1ab56c56c932b3a7e081b86d187b"}, + {file = "types_jsonschema-4.24.0.20250528.tar.gz", hash = "sha256:7e28c64e0ae7980eeb158105b20663fc6a6b8f81d5f86ea6614aa0014417bd1e"}, ] [package.dependencies] @@ -3997,4 +3987,4 @@ pdf = ["marker-pdf"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<4" -content-hash = "6109db165c5237d16faef528db7295ae835d60cadbfc064a19903598334436ed" +content-hash = "7caff65c39c3b097d0bb29dd6c516a7d5574191140a9356ca60b41d75f0c83e4" From c9e4b6b0fbadee49992427d2fbc870eeb475a691 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 1 Jun 2025 20:54:37 +0000 Subject: [PATCH 098/125] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_regression.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/test_regression.py b/tests/test_regression.py index 3e736a18..14d806d4 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -54,12 +54,9 @@ def test_regression_html_private( _run_html_regression_test(data_path, input_file, config) - def _run_html_regression_test( data_path: Path, input_file: str, config: dict[str, Any] ) -> None: - from autocorpus.autocorpus import Autocorpus - file_path = data_path / input_file with open( str(file_path).replace(".html", "_abbreviations.json"), encoding="utf-8" @@ -79,7 +76,6 @@ def _run_html_regression_test( except FileNotFoundError: expected_tables = {} - auto_corpus = process_file(config=config, file_path=pmc_example_path) abbreviations = auto_corpus.abbreviations bioc = auto_corpus.to_bioc() From ac35017eb98c6c6274417e74785d44792561ec07 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands <41603761+Thomas-Rowlands@users.noreply.github.com> Date: Sun, 1 Jun 2025 22:43:28 +0100 Subject: [PATCH 099/125] Update test_regression.py Correcting merge error which had two different references to the input path --- tests/test_regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_regression.py b/tests/test_regression.py index 14d806d4..8dbf181c 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -76,7 +76,7 @@ def _run_html_regression_test( except FileNotFoundError: expected_tables = {} - auto_corpus = process_file(config=config, file_path=pmc_example_path) + auto_corpus = process_file(config=config, file_path=file_path) abbreviations = auto_corpus.abbreviations bioc = auto_corpus.to_bioc() tables = auto_corpus.tables From 4bda422dc8f3209ead6e8a3471669500cea2b15e Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Fri, 30 May 2025 20:22:52 +0100 Subject: [PATCH 100/125] README.md: Use default config flag for example usages --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 5eb6636e..949a677b 100644 --- a/README.md +++ b/README.md @@ -38,16 +38,16 @@ pip install autocorpus[pdf] ## Usage -Run the below command for a single file example +You can run Auto-CORPus on a single file like so: ```sh -auto-corpus -c "autocorpus/configs/config_pmc.json" -t "output" -f "path/to/html/file" -o JSON +auto-corpus -b PMC -t "output" -f "path/to/html/file" -o JSON ``` -Run the main app for a directory of files example +Auto-CORPus can also process whole directories: ```sh -auto-corpus -c "autocorpus/configs/config_pmc.json" -t "output" -f "path/to/directory/of/html/files" -o JSON +auto-corpus -b PMC -t "output" -f "path/to/directory/of/html/files" -o JSON ``` ### Available arguments From 0848c2f8e18dac78d8e733258e4ee8c780435ddd Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Fri, 30 May 2025 20:24:20 +0100 Subject: [PATCH 101/125] VS Code: Use default config flag for launch configs --- .vscode/launch.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 41a7047c..01c9715e 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -10,8 +10,8 @@ "request": "launch", "module": "autocorpus", "args": [ - "-c", - "${workspaceFolder}/autocorpus/configs/config_pmc.json", + "-b", + "PMC", "-t", "output", "-f", @@ -24,8 +24,8 @@ "request": "launch", "module": "autocorpus", "args": [ - "-c", - "${workspaceFolder}/autocorpus/configs/config_pmc_pre_oct_2024.json", + "-b", + "LEGACY_PMC", "-t", "output", "-f", From 0be8e24a15e8bce8ed1c0e15ed52d77ec869fc7c Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Mon, 2 Jun 2025 12:20:29 +0100 Subject: [PATCH 102/125] Fix file path in test_file_type --- tests/test_file_type.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_file_type.py b/tests/test_file_type.py index db0fe6bb..aa21dc8f 100644 --- a/tests/test_file_type.py +++ b/tests/test_file_type.py @@ -9,10 +9,10 @@ def test_check_file_type_html(tmp_path: Path, data_path: Path) -> None: """Test that HTML files are correctly identified.""" from autocorpus.file_type import FileType, check_file_type - html_file = data_path / "PMC" / "Current" / "PMC8885717.html" + html_file = data_path / "public" / "html" / "PMC" / "PMC8885717.html" assert check_file_type(html_file) == FileType.HTML - json_file = data_path / "PMC" / "Current" / "PMC8885717_bioc.json" + json_file = data_path / "public" / "html" / "PMC" / "PMC8885717_bioc.json" assert check_file_type(json_file) == FileType.OTHER pdf_file = data_path / "Supplementary" / "PDF" / "tp-10-08-2123-coif.pdf" From e8a7de50349eab05666c325cb714d7e8e8687422 Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Mon, 2 Jun 2025 12:21:14 +0100 Subject: [PATCH 103/125] Use UNKNOWN instead of OTHER and check if file exists --- autocorpus/file_processing.py | 2 +- autocorpus/file_type.py | 15 +++++++++++---- tests/test_file_type.py | 6 +++++- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/autocorpus/file_processing.py b/autocorpus/file_processing.py index d8136939..db3d8303 100644 --- a/autocorpus/file_processing.py +++ b/autocorpus/file_processing.py @@ -58,7 +58,7 @@ def process_file( " pip install autocorpus[pdf]" ) raise - case FileType.OTHER: + case FileType.UNKNOWN: raise NotImplementedError(f"Could not identify file type for {file_path}") diff --git a/autocorpus/file_type.py b/autocorpus/file_type.py index 2aa5e0c0..34a892b9 100644 --- a/autocorpus/file_type.py +++ b/autocorpus/file_type.py @@ -17,13 +17,13 @@ class FileType(Enum): HTML: Represents an HTML file. XML: Represents an XML file. PDF: Represents a PDF file. - OTHER: Represents any other file type that is not recognized. + UNKNOWN: Represents any other file type that is not recognized. """ HTML = auto() XML = auto() PDF = auto() - OTHER = auto() + UNKNOWN = auto() def check_file_type(file_path: Path) -> FileType: @@ -38,7 +38,14 @@ def check_file_type(file_path: Path) -> FileType: Returns: A FileType Enum value indicating the type of the file. + + Raises: + FileNotFoundError: If the provided path does not point to a file. """ + if not file_path.is_file(): + message = f"File {file_path} is not a file." + logger.error(message) + raise FileNotFoundError(message) file_extension = file_path.suffix.lower() match file_extension: case ".html" | ".htm" | ".xml": @@ -50,8 +57,8 @@ def check_file_type(file_path: Path) -> FileType: return FileType.HTML except Exception as ex: logger.error(f"Error parsing file {file_path}: {ex}") - return FileType.OTHER + return FileType.UNKNOWN case ".pdf": return FileType.PDF case _: - return FileType.OTHER + return FileType.UNKNOWN diff --git a/tests/test_file_type.py b/tests/test_file_type.py index aa21dc8f..ccad64c5 100644 --- a/tests/test_file_type.py +++ b/tests/test_file_type.py @@ -2,6 +2,7 @@ from pathlib import Path +import pytest from lxml import etree @@ -13,7 +14,7 @@ def test_check_file_type_html(tmp_path: Path, data_path: Path) -> None: assert check_file_type(html_file) == FileType.HTML json_file = data_path / "public" / "html" / "PMC" / "PMC8885717_bioc.json" - assert check_file_type(json_file) == FileType.OTHER + assert check_file_type(json_file) == FileType.UNKNOWN pdf_file = data_path / "Supplementary" / "PDF" / "tp-10-08-2123-coif.pdf" assert check_file_type(pdf_file) == FileType.PDF @@ -24,3 +25,6 @@ def test_check_file_type_html(tmp_path: Path, data_path: Path) -> None: out.write(etree.tostring(etree.XML("data"), xml_declaration=True)) assert check_file_type(xml_file) == FileType.XML + + with pytest.raises(FileNotFoundError): + check_file_type(tmp_path / "non_existent_file.txt") From b74d75979694bebbaa4ff14783743e2923ca46d4 Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Mon, 2 Jun 2025 13:08:29 +0100 Subject: [PATCH 104/125] Do not use asserts for error checking --- autocorpus/file_type.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/autocorpus/file_type.py b/autocorpus/file_type.py index 34a892b9..ae12c0c4 100644 --- a/autocorpus/file_type.py +++ b/autocorpus/file_type.py @@ -50,10 +50,13 @@ def check_file_type(file_path: Path) -> FileType: match file_extension: case ".html" | ".htm" | ".xml": try: - assert etree.parse(file_path, etree.XMLParser()).docinfo.xml_version + if not etree.parse(file_path, etree.XMLParser()).docinfo.xml_version: + raise etree.ParseError("Not a valid XML file") return FileType.XML - except (etree.ParseError, AssertionError): - etree.parse(file_path, etree.HTMLParser()) + except etree.ParseError: + docinfo = etree.parse(file_path, etree.HTMLParser()).docinfo + if not isinstance(docinfo, etree.DocInfo) and not docinfo.doctype: + raise etree.ParseError("Not a valid HTML file") return FileType.HTML except Exception as ex: logger.error(f"Error parsing file {file_path}: {ex}") From d1b4d74e7d71e9c3ff2b17be0b263ca8f5fa58e6 Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Mon, 2 Jun 2025 13:10:19 +0100 Subject: [PATCH 105/125] Review suggestions for file_processing --- autocorpus/file_processing.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/autocorpus/file_processing.py b/autocorpus/file_processing.py index db3d8303..374a7c99 100644 --- a/autocorpus/file_processing.py +++ b/autocorpus/file_processing.py @@ -5,6 +5,8 @@ from typing import Any from . import logger +from .ac_bioc.bioctable.json import BioCTableJSONEncoder +from .ac_bioc.json import BioCJSONEncoder from .autocorpus import Autocorpus from .file_type import FileType, check_file_type from .html import process_html_article @@ -34,13 +36,11 @@ def process_file( ) case FileType.XML: raise NotImplementedError( - f"Could not process file {file_path}: " - "XML processing is not implemented yet." + f"Could not process file {file_path}. Process XML files by running:\n\t" + f"python -m autocorpus.parse_xml {file_path}" ) case FileType.PDF: try: - from .ac_bioc.bioctable.json import BioCTableJSONEncoder - from .ac_bioc.json import BioCJSONEncoder from .pdf import extract_pdf_content text, tbls = extract_pdf_content(file_path) From 606a14b120bc947686e813e66dc5813388a95e68 Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Mon, 2 Jun 2025 13:12:01 +0100 Subject: [PATCH 106/125] Update autocorpus/html.py Co-authored-by: Alex Dewar --- autocorpus/html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autocorpus/html.py b/autocorpus/html.py index a6e25e16..797822ff 100644 --- a/autocorpus/html.py +++ b/autocorpus/html.py @@ -202,7 +202,7 @@ def _merge_tables_with_empty_tables( seen_ids[str(i)] = f"Table {table['id']}." for table in empty_tables: - for seenID in seen_ids.keys(): + for seenID in seen_ids: if not table["title"].startswith(seen_ids[seenID]): continue From 4330527fe910f6f65067d99b23002c1d71c51274 Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Mon, 2 Jun 2025 13:23:44 +0100 Subject: [PATCH 107/125] merge_tables stylistic suggested changes --- autocorpus/html.py | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/autocorpus/html.py b/autocorpus/html.py index 797822ff..1da2b62c 100644 --- a/autocorpus/html.py +++ b/autocorpus/html.py @@ -196,27 +196,27 @@ def _merge_tables_with_empty_tables( Returns: A list of documents with titles and captions from empty tables merged in. """ - seen_ids = {} + seen_ids: dict[int, str] = {} for i, table in enumerate(documents): if "id" in table: - seen_ids[str(i)] = f"Table {table['id']}." + seen_ids[i] = f"Table {table['id']}." for table in empty_tables: - for seenID in seen_ids: - if not table["title"].startswith(seen_ids[seenID]): + for seen_id in seen_ids: + if not table["title"].startswith(seen_ids[seen_id]): continue - if "title" in table and not table["title"] == "": + if title := table.get("title"): set_new = False - for passage in documents[int(seenID)]["passages"]: + for passage in documents[seen_id]["passages"]: if ( passage["infons"]["section_type"][0]["section_name"] == "table_title" ): - passage["text"] = table["title"] + passage["text"] = title set_new = True if not set_new: - documents[int(seenID)]["passages"].append( + documents[seen_id]["passages"].append( { "offset": 0, "infons": { @@ -228,20 +228,20 @@ def _merge_tables_with_empty_tables( } ] }, - "text": table["title"], + "text": title, } ) - if "caption" in table and not table["caption"] == "": + if caption := table.get("caption"): set_new = False - for passage in documents[int(seenID)]["passages"]: + for passage in documents[seen_id]["passages"]: if ( passage["infons"]["section_type"][0]["section_name"] == "table_caption" ): - passage["text"] = table["caption"] + passage["text"] = caption set_new = True if not set_new: - documents[int(seenID)]["passages"].append( + documents[seen_id]["passages"].append( { "offset": 0, "infons": { @@ -253,20 +253,20 @@ def _merge_tables_with_empty_tables( } ] }, - "text": table["caption"], + "text": caption, } ) - if "footer" in table and not table["footer"] == "": + if footer := table.get("footer"): set_new = False - for passage in documents[int(seenID)]["passages"]: + for passage in documents[seen_id]["passages"]: if ( passage["infons"]["section_type"][0]["section_name"] == "table_footer" ): - passage["text"] = table["footer"] + passage["text"] = footer set_new = True if not set_new: - documents[int(seenID)]["passages"].append( + documents[seen_id]["passages"].append( { "offset": 0, "infons": { @@ -278,7 +278,7 @@ def _merge_tables_with_empty_tables( } ] }, - "text": table["footer"], + "text": footer, } ) return documents From 5a0869b790b66c02a9a0f97bc0300979326ab478 Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Mon, 2 Jun 2025 13:37:39 +0100 Subject: [PATCH 108/125] Use _set_table_passage helper function in _merge_tables --- autocorpus/html.py | 116 +++++++++++++++++---------------------------- 1 file changed, 44 insertions(+), 72 deletions(-) diff --git a/autocorpus/html.py b/autocorpus/html.py index 1da2b62c..816b6f09 100644 --- a/autocorpus/html.py +++ b/autocorpus/html.py @@ -196,6 +196,29 @@ def _merge_tables_with_empty_tables( Returns: A list of documents with titles and captions from empty tables merged in. """ + + def _set_table_passage(passages, section_name, iao_name, iao_id, text): + set_new = False + for passage in passages: + if passage["infons"]["section_type"][0]["section_name"] == section_name: + passage["text"] = text + set_new = True + if set_new: + return passages + return { + "offset": 0, + "infons": { + "section_type": [ + { + "section_name": section_name, + "iao_name": iao_name, + "iao_id": iao_id, + } + ] + }, + "text": text, + } + seen_ids: dict[int, str] = {} for i, table in enumerate(documents): if "id" in table: @@ -207,80 +230,29 @@ def _merge_tables_with_empty_tables( continue if title := table.get("title"): - set_new = False - for passage in documents[seen_id]["passages"]: - if ( - passage["infons"]["section_type"][0]["section_name"] - == "table_title" - ): - passage["text"] = title - set_new = True - if not set_new: - documents[seen_id]["passages"].append( - { - "offset": 0, - "infons": { - "section_type": [ - { - "section_name": "table_title", - "iao_name": "document title", - "iao_id": "IAO:0000305", - } - ] - }, - "text": title, - } - ) + documents[seen_id]["passages"] = _set_table_passage( + documents[seen_id]["passages"], + "table_title", + "document title", + "IAO:0000305", + title, + ) if caption := table.get("caption"): - set_new = False - for passage in documents[seen_id]["passages"]: - if ( - passage["infons"]["section_type"][0]["section_name"] - == "table_caption" - ): - passage["text"] = caption - set_new = True - if not set_new: - documents[seen_id]["passages"].append( - { - "offset": 0, - "infons": { - "section_type": [ - { - "section_name": "table_caption", - "iao_name": "caption", - "iao_id": "IAO:0000304", - } - ] - }, - "text": caption, - } - ) + documents[seen_id]["passages"] = _set_table_passage( + documents[seen_id], + "table_caption", + "caption", + "IAO:0000304", + caption, + ) if footer := table.get("footer"): - set_new = False - for passage in documents[seen_id]["passages"]: - if ( - passage["infons"]["section_type"][0]["section_name"] - == "table_footer" - ): - passage["text"] = footer - set_new = True - if not set_new: - documents[seen_id]["passages"].append( - { - "offset": 0, - "infons": { - "section_type": [ - { - "section_name": "table_footer", - "iao_name": "caption", - "iao_id": "IAO:0000304", - } - ] - }, - "text": footer, - } - ) + documents[seen_id]["passages"] = _set_table_passage( + documents[seen_id], + "table_footer", + "caption", + "IAO:0000304", + footer, + ) return documents From 094c2443d24f4a211d6ec3fad30dea236e4241e0 Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Mon, 2 Jun 2025 14:59:38 +0100 Subject: [PATCH 109/125] Fix error in empty_tables processing --- autocorpus/html.py | 10 ++++++---- autocorpus/table.py | 10 ---------- tests/test_regression.py | 2 +- 3 files changed, 7 insertions(+), 15 deletions(-) diff --git a/autocorpus/html.py b/autocorpus/html.py index 816b6f09..edc127c3 100644 --- a/autocorpus/html.py +++ b/autocorpus/html.py @@ -309,13 +309,15 @@ def process_html_article( new_documents = [] for table_file in linked_tables: soup = load_html_file(table_file) - tables, empty_tables = get_table_json(soup, config, table_file) - new_documents.extend(tables.get("documents", [])) - empty_tables.extend(empty_tables) + new_tables, new_empty_tables = get_table_json(soup, config, table_file) + new_documents.extend(new_tables.get("documents", [])) + empty_tables.extend(new_empty_tables) tables["documents"] = _extend_tables_documents( tables.get("documents", []), new_documents ) if empty_tables: - _merge_tables_with_empty_tables(tables["documents"], empty_tables) + tables["documents"] = _merge_tables_with_empty_tables( + tables["documents"], empty_tables + ) return main_text, abbreviations, tables diff --git a/autocorpus/table.py b/autocorpus/table.py index 75215f51..a4745373 100644 --- a/autocorpus/table.py +++ b/autocorpus/table.py @@ -417,16 +417,6 @@ def get_table_json( empty_tables.append(table) soup_tables = [table for i, table in enumerate(soup_tables) if i not in pop_list] - for etable in empty_tables: - # has a table element, not empty - if not etable["node"].find("table"): - et_dict = { - "title": " ".join(etable["title"]), - "caption": " ".join(etable["caption"]), - "footer": " ".join(etable["footer"]), - } - empty_tables.append(et_dict) - # One table tables = [] for table_num, table in enumerate(soup_tables): diff --git a/tests/test_regression.py b/tests/test_regression.py index 8dbf181c..5f3e59da 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -135,6 +135,6 @@ def _make_reproducible(*data: dict[str, Any]) -> None: """Make output files reproducible by stripping dates and file paths.""" for d in data: d.pop("date", None) - if docs := d.get("documents", None): + if docs := d.get("documents", []): for doc in docs: doc.pop("inputfile", None) From 7ee9cd9f5549405c578ee905a909b43da532e879 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 2 Jun 2025 14:17:03 +0000 Subject: [PATCH 110/125] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_regression.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_regression.py b/tests/test_regression.py index 567dff7f..68cb89df 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -121,7 +121,6 @@ def test_pdf_to_bioc(data_path: Path, input_file: str, config: dict[str, Any]) - ) as f: expected_tables = json.load(f) - auto_corpus = process_file(config=config, file_path=pdf_path) new_bioc = auto_corpus.main_text From ef28bd695150dde9850c9643845a799bd8b9041d Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Mon, 2 Jun 2025 15:33:38 +0100 Subject: [PATCH 111/125] Post-merge fixes for Word regression test --- tests/test_regression.py | 30 ++++-------------------------- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/tests/test_regression.py b/tests/test_regression.py index 68cb89df..c890f152 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -108,7 +108,7 @@ def _run_html_regression_test( def test_pdf_to_bioc(data_path: Path, input_file: str, config: dict[str, Any]) -> None: """Test the conversion of a PDF file to a BioC format.""" pdf_path = data_path / input_file - expected_output = pdf_path.parent / "Expected Output" / pdf_path.name + expected_output_path = pdf_path.parent / "Expected Output" / pdf_path.name with open( str(expected_output_path).replace(".pdf", ".pdf_bioc.json"), encoding="utf-8", @@ -167,33 +167,11 @@ def test_word_to_bioc( ) as f: expected_bioc = json.load(f) - ac = Autocorpus(config=config) - ac.process_files(files=[temp_doc_path]) # Run on temp file + auto_corpus = process_file(config=config, file_path=temp_doc_path) - # Load generated BioC output from temp dir - with open( - str(temp_doc_path).replace(".doc", ".doc_bioc.json"), - encoding="utf-8", - ) as f: - new_bioc = json.load(f) - - if has_tables: - with open( - str(expected_output_path).replace(".doc", ".doc_tables.json"), - encoding="utf-8", - ) as f: - expected_tables = json.load(f) - - with open( - str(temp_doc_path).replace(".doc", ".doc_tables.json"), - encoding="utf-8", - ) as f: - new_tables = json.load(f) + new_bioc = auto_corpus.main_text - _make_reproducible(new_bioc, expected_bioc, new_tables, expected_tables) - assert new_tables == expected_tables - else: - _make_reproducible(new_bioc, expected_bioc) + _make_reproducible(new_bioc, expected_bioc) assert new_bioc == expected_bioc From f87fed4796e9e0dc01ab32de52147e424a645e71 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 2 Jun 2025 20:40:29 +0000 Subject: [PATCH 112/125] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.11.11 → v0.11.12](https://github.com/astral-sh/ruff-pre-commit/compare/v0.11.11...v0.11.12) - [github.com/pre-commit/mirrors-mypy: v1.15.0 → v1.16.0](https://github.com/pre-commit/mirrors-mypy/compare/v1.15.0...v1.16.0) --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f76ef388..588cb6e1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,13 +21,13 @@ repos: hooks: - id: check-github-workflows - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.11.11 + rev: v0.11.12 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] - id: ruff-format - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.15.0 + rev: v1.16.0 hooks: - id: mypy exclude: autocorpus/parse_xml.py From 29287e5332f4d8f668a0230dfaf33b9b74b83f29 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Tue, 3 Jun 2025 10:43:03 +0100 Subject: [PATCH 113/125] Refactored for the new autocorpus class. Implemented dataclass_json decorator for cleaner code and adjusted tests for their use. --- autocorpus/ac_bioc/annotation.py | 16 +--- autocorpus/ac_bioc/collection.py | 46 +--------- autocorpus/ac_bioc/document.py | 32 +++---- autocorpus/ac_bioc/json.py | 74 ++------------- autocorpus/ac_bioc/location.py | 30 +++--- autocorpus/ac_bioc/node.py | 12 +-- autocorpus/ac_bioc/passage.py | 54 ++++++++--- autocorpus/ac_bioc/relation.py | 14 +-- autocorpus/ac_bioc/sentence.py | 26 +----- autocorpus/autocorpus.py | 6 +- autocorpus/bioc_documents.py | 47 ---------- autocorpus/bioc_formatter.py | 54 +++++++++-- autocorpus/file_processing.py | 26 +++++- autocorpus/file_type.py | 4 + autocorpus/{word_extractor.py => word.py} | 17 +--- poetry.lock | 106 ++++++++++++++++------ pyproject.toml | 1 + tests/bioc/test_collection.py | 7 +- tests/bioc/test_document.py | 6 +- tests/conftest.py | 5 + tests/test_regression.py | 2 +- 21 files changed, 260 insertions(+), 325 deletions(-) delete mode 100644 autocorpus/bioc_documents.py rename autocorpus/{word_extractor.py => word.py} (91%) diff --git a/autocorpus/ac_bioc/annotation.py b/autocorpus/ac_bioc/annotation.py index 1a055fe5..ba5d64e9 100644 --- a/autocorpus/ac_bioc/annotation.py +++ b/autocorpus/ac_bioc/annotation.py @@ -3,7 +3,7 @@ from __future__ import annotations import xml.etree.ElementTree as ET -from dataclasses import dataclass, field +from dataclasses import asdict, dataclass, field from typing import Any from .location import BioCLocation @@ -20,19 +20,7 @@ class BioCAnnotation: infons: dict[str, str] = field(default_factory=dict) locations: list[BioCLocation] = field(default_factory=list) - def to_dict(self): - """Convert the annotation to a dictionary representation. - - Returns: - dict: A dictionary containing the annotation's id, text, offset, length, and infons. - """ - return { - "id": self.id, - "text": self.text, - "offset": self.offset, - "length": self.length, - "infons": self.infons, - } + to_dict = asdict def to_json(self) -> dict[str, Any]: """Convert the annotation to a JSON-serializable dictionary. diff --git a/autocorpus/ac_bioc/collection.py b/autocorpus/ac_bioc/collection.py index f4e69d67..9f68db55 100644 --- a/autocorpus/ac_bioc/collection.py +++ b/autocorpus/ac_bioc/collection.py @@ -7,12 +7,15 @@ from __future__ import annotations import xml.etree.ElementTree as ET -from dataclasses import dataclass, field +from dataclasses import asdict, dataclass, field from typing import Any +from dataclasses_json import dataclass_json + from .document import BioCDocument +@dataclass_json @dataclass class BioCCollection: """A class representing a BioC collection.""" @@ -23,46 +26,7 @@ class BioCCollection: documents: list[BioCDocument] = field(default_factory=list) infons: dict[str, str] = field(default_factory=dict) - def to_dict(self): - """Convert the BioCCollection instance to a dictionary. - - Returns: - dict: A dictionary representation of the BioCCollection instance. - """ - return { - "source": self.source, - "date": self.date, - "key": self.key, - "infons": self.infons, - "documents": [d.to_dict() for d in self.documents], - } - - def to_json(self) -> dict[str, Any]: - """Convert the BioCCollection instance to a JSON-compatible dictionary. - - Returns: - dict[str, Any]: A dictionary representation of the BioCCollection instance. - """ - return self.to_dict() - - @classmethod - def from_json(cls, data: dict[str, Any]) -> BioCCollection: - """Create a BioCCollection instance from a JSON dictionary. - - Args: - data (dict[str, Any]): A dictionary containing the JSON representation of a BioCCollection. - - Returns: - BioCCollection: An instance of BioCCollection created from the JSON dictionary. - """ - documents = [BioCDocument.from_dict(d) for d in data.get("documents", [])] - return cls( - source=data.get("source", ""), - date=data.get("date", ""), - key=data.get("key", ""), - infons=data.get("infons", {}), - documents=documents, - ) + to_dict = asdict def to_xml(self) -> ET.Element: """Convert the BioCCollection instance to an XML element. diff --git a/autocorpus/ac_bioc/document.py b/autocorpus/ac_bioc/document.py index e87ee76c..5dccc4cf 100644 --- a/autocorpus/ac_bioc/document.py +++ b/autocorpus/ac_bioc/document.py @@ -7,13 +7,17 @@ from __future__ import annotations import xml.etree.ElementTree as ET -from dataclasses import dataclass, field +from dataclasses import asdict, dataclass, field from typing import Any +from dataclasses_json import dataclass_json + +from .annotation import BioCAnnotation from .passage import BioCPassage from .relation import BioCRelation +@dataclass_json @dataclass class BioCDocument: """Represents a BioC document containing passages, annotations, and relations.""" @@ -23,18 +27,11 @@ class BioCDocument: infons: dict[str, str] = field(default_factory=dict) passages: list[BioCPassage] = field(default_factory=list) relations: list[BioCRelation] = field(default_factory=list) + annotations: list[BioCAnnotation] = field( + default_factory=list + ) # TODO: discuss why this is here in legacy outputs, should it be removed? - def to_dict(self): - """Convert the BioCDocument instance to a dictionary representation. - - Returns: - dict: A dictionary containing the document's ID, infons, and passages. - """ - return { - "id": self.id, - "infons": self.infons, - "passages": [p.to_dict() for p in self.passages], - } + to_dict = asdict @classmethod def from_dict(cls, data: dict[str, Any]) -> BioCDocument: @@ -46,22 +43,15 @@ def from_dict(cls, data: dict[str, Any]) -> BioCDocument: Returns: BioCDocument: An instance of BioCDocument created from the dictionary. """ - passages = [BioCPassage.from_dict(p) for p in data.get("passages", [])] + passages = [BioCPassage().from_ac_dict(p) for p in data.get("passages", [])] return cls( id=data["id"], infons=data.get("infons", {}), passages=passages, relations=data.get("relations", []), + annotations=data.get("annotations", []), ) - def to_json(self) -> dict[str, Any]: - """Convert the BioCDocument instance to a JSON-compatible dictionary. - - Returns: - dict[str, Any]: A dictionary representation of the document. - """ - return self.to_dict() - def to_xml(self) -> ET.Element: """Convert the BioCDocument instance to an XML element. diff --git a/autocorpus/ac_bioc/json.py b/autocorpus/ac_bioc/json.py index 64c159ff..13b3908a 100644 --- a/autocorpus/ac_bioc/json.py +++ b/autocorpus/ac_bioc/json.py @@ -9,81 +9,19 @@ import json from typing import Any -from .annotation import BioCAnnotation from .collection import BioCCollection -from .document import BioCDocument -from .location import BioCLocation -from .node import BioCNode -from .passage import BioCPassage -from .relation import BioCRelation -from .sentence import BioCSentence class BioCJSONEncoder(json.JSONEncoder): """Custom JSON encoder for BioC-related objects.""" def default(self, o: Any) -> Any: - """Override the default method to handle BioC-related objects.""" - match o: - case BioCLocation(): - return { - "offset": o.offset, - "length": o.length, - } - - case BioCNode(): - return { - "refid": o.refid, - "role": o.role, - } - case BioCSentence(): - return { - "offset": o.offset, - "infons": o.infons, - "text": o.text, - "annotations": [self.default(a) for a in o.annotations], - "relations": [self.default(r) for r in o.relations], - } - case BioCPassage(): - return { - "offset": o.offset, - "infons": o.infons, - "text": o.text, - "annotations": [self.default(a) for a in o.annotations], - "relations": [self.default(r) for r in o.relations], - } - case BioCDocument(): - return { - "id": o.id, - "infons": o.infons, - "inputfile": o.inputfile, - "passages": [self.default(p) for p in o.passages], - "relations": [self.default(r) for r in o.relations], - } - case BioCAnnotation(): - return { - "id": o.id, - "infons": o.infons, - "text": o.text, - "locations": [self.default(loc) for loc in o.locations], - } - case BioCRelation(): - return { - "id": o.id, - "infons": o.infons, - "nodes": [self.default(n) for n in o.nodes], - } - case BioCCollection(): - return { - "source": o.source, - "date": o.date, - "key": o.key, - "infons": o.infons, - "documents": [self.default(d) for d in o.documents], - } - case _: - # Let the base class default method raise the TypeError - return super().default(o) + """Return a serializable object for JSON encoding, using to_dict if available.""" + if hasattr(o, "to_dict") and callable(o.to_dict): + return o.to_dict() + if o is None: + return None + return super().default(o) class BioCJSON: diff --git a/autocorpus/ac_bioc/location.py b/autocorpus/ac_bioc/location.py index ab968d46..9bba4388 100644 --- a/autocorpus/ac_bioc/location.py +++ b/autocorpus/ac_bioc/location.py @@ -6,31 +6,23 @@ from __future__ import annotations import xml.etree.ElementTree as ET +from dataclasses import asdict, dataclass, field +@dataclass class BioCLocation: """Represents a location in BioC format.""" - def __init__(self, offset: int, length: int): - """Initialize a BioCLocation instance. + offset: int = field( + default_factory=int, + metadata={"description": "The offset of the location in the text."}, + ) + length: int = field( + default_factory=int, + metadata={"description": "The length of the location in the text."}, + ) - Args: - offset (int): The starting offset of the location. - length (int): The length of the location. - """ - self.offset = offset - self.length = length - - def to_dict(self) -> dict[str, int]: - """Convert the BioCLocation instance to a dictionary. - - Returns: - dict[str, int]: A dictionary representation of the BioCLocation instance. - """ - return { - "offset": self.offset, - "length": self.length, - } + to_dict = asdict @classmethod def from_dict(cls, data: dict[str, int]) -> BioCLocation: diff --git a/autocorpus/ac_bioc/node.py b/autocorpus/ac_bioc/node.py index cc3c2e26..94da67cd 100644 --- a/autocorpus/ac_bioc/node.py +++ b/autocorpus/ac_bioc/node.py @@ -3,9 +3,12 @@ from __future__ import annotations import xml.etree.ElementTree as ET -from dataclasses import dataclass, field +from dataclasses import asdict, dataclass, field +from dataclasses_json import dataclass_json + +@dataclass_json @dataclass class BioCNode: """Represents a node in a BioC graph with a reference ID and a role.""" @@ -13,12 +16,7 @@ class BioCNode: refid: str = field(default_factory=str) role: str = field(default_factory=str) - def to_dict(self) -> dict[str, str]: - """Convert the BioCNode instance to a dictionary representation.""" - return { - "refid": self.refid, - "role": self.role, - } + to_dict = asdict @classmethod def from_dict(cls, data: dict[str, str]) -> BioCNode: diff --git a/autocorpus/ac_bioc/passage.py b/autocorpus/ac_bioc/passage.py index fbfa3cbb..e66a3b58 100644 --- a/autocorpus/ac_bioc/passage.py +++ b/autocorpus/ac_bioc/passage.py @@ -7,14 +7,21 @@ from __future__ import annotations import xml.etree.ElementTree as ET -from dataclasses import dataclass, field +from dataclasses import asdict, dataclass, field from typing import Any +from dataclasses_json import dataclass_json + from .annotation import BioCAnnotation from .relation import BioCRelation from .sentence import BioCSentence +_DEFAULT_KEYS = set( + ("section_heading", "subsection_heading", "body", "section_type", "offset") +) + +@dataclass_json @dataclass class BioCPassage: """Represents a passage in a BioC document.""" @@ -26,14 +33,7 @@ class BioCPassage: annotations: list[BioCAnnotation] = field(default_factory=list) relations: list[BioCRelation] = field(default_factory=list) - def to_dict(self): - """Convert the BioCPassage instance to a dictionary representation.""" - return { - "text": self.text, - "offset": self.offset, - "infons": self.infons, - "sentences": [s.to_dict() for s in self.sentences], - } + to_dict = asdict @classmethod def from_dict(cls, data: dict[str, Any]) -> BioCPassage: @@ -53,13 +53,41 @@ def from_dict(cls, data: dict[str, Any]) -> BioCPassage: sentences=sentences, ) - def to_json(self) -> dict[str, Any]: - """Convert the BioCPassage instance to a JSON-compatible dictionary. + @classmethod + def from_ac_dict(cls, passage: dict[str, Any]) -> BioCPassage: + """Create a BioCPassage from a passage dict and an offset. + + Args: + passage: dict containing info about passage + + Returns: + BioCPassage object + """ + infons = {k: v for k, v in passage.items() if k not in _DEFAULT_KEYS} + # TODO: Doesn't account for subsubsection headings which might exist + if heading := passage.get("section_heading", None): + infons["section_title_1"] = heading + if subheading := passage.get("subsection_heading", None): + infons["section_title_2"] = subheading + for i, section_type in enumerate(passage["section_type"]): + infons[f"iao_name_{i + 1}"] = section_type["iao_name"] + infons[f"iao_id_{i + 1}"] = section_type["iao_id"] + + return cls(offset=passage.get("offset", 0), infons=infons, text=passage["body"]) + + @classmethod + def from_title(cls, title: str, offset: int) -> BioCPassage: + """Create a BioCPassage from a title and offset. + + Args: + title: Passage title + offset: Passage offset Returns: - dict[str, Any]: A dictionary representation of the BioCPassage instance. + BioCPassage object """ - return self.to_dict() + infons = {"iao_name_1": "document title", "iao_id_1": "IAO:0000305"} + return cls(offset=offset, infons=infons, text=title) def to_xml(self) -> ET.Element: """Convert the BioCPassage instance to an XML element. diff --git a/autocorpus/ac_bioc/relation.py b/autocorpus/ac_bioc/relation.py index cae19c13..a458e568 100644 --- a/autocorpus/ac_bioc/relation.py +++ b/autocorpus/ac_bioc/relation.py @@ -3,7 +3,7 @@ from __future__ import annotations import xml.etree.ElementTree as ET -from dataclasses import dataclass, field +from dataclasses import asdict, dataclass, field from typing import Any from .node import BioCNode @@ -17,17 +17,7 @@ class BioCRelation: infons: dict[str, str] = field(default_factory=dict) nodes: list[BioCNode] = field(default_factory=list) - def to_dict(self) -> dict[str, Any]: - """Convert the BioCRelation instance to a dictionary. - - Returns: - dict[str, Any]: A dictionary representation of the BioCRelation instance. - """ - return { - "id": self.id, - "infons": self.infons, - "nodes": [n.to_dict() for n in self.nodes], - } + to_dict = asdict @classmethod def from_dict(cls, data: dict[str, Any]) -> BioCRelation: diff --git a/autocorpus/ac_bioc/sentence.py b/autocorpus/ac_bioc/sentence.py index 3a3a6b56..32ff35da 100644 --- a/autocorpus/ac_bioc/sentence.py +++ b/autocorpus/ac_bioc/sentence.py @@ -3,13 +3,16 @@ from __future__ import annotations import xml.etree.ElementTree as ET -from dataclasses import dataclass, field +from dataclasses import asdict, dataclass, field from typing import Any +from dataclasses_json import dataclass_json + from .annotation import BioCAnnotation from .relation import BioCRelation +@dataclass_json @dataclass class BioCSentence: """Represents a sentence in the BioC format.""" @@ -20,18 +23,7 @@ class BioCSentence: annotations: list[BioCAnnotation] = field(default_factory=list) relations: list[BioCRelation] = field(default_factory=list) - def to_dict(self): - """Convert the BioCSentence instance to a dictionary representation. - - Returns: - dict: A dictionary containing the sentence's text, offset, infons, and annotations. - """ - return { - "text": self.text, - "offset": self.offset, - "infons": self.infons, - "annotations": [a.to_dict() for a in self.annotations], - } + to_dict = asdict @classmethod def from_dict(cls, data: dict[str, Any]) -> BioCSentence: @@ -51,14 +43,6 @@ def from_dict(cls, data: dict[str, Any]) -> BioCSentence: annotations=annotations, ) - def to_json(self) -> dict[str, Any]: - """Convert the BioCSentence instance to a JSON-compatible dictionary. - - Returns: - dict[str, Any]: A dictionary representation of the sentence. - """ - return self.to_dict() - def to_xml(self) -> ET.Element: """Convert the BioCSentence instance to an XML element. diff --git a/autocorpus/autocorpus.py b/autocorpus/autocorpus.py index 40f8c121..5314c115 100644 --- a/autocorpus/autocorpus.py +++ b/autocorpus/autocorpus.py @@ -5,6 +5,8 @@ from pathlib import Path from typing import Any +from autocorpus.ac_bioc.collection import BioCCollection + from .ac_bioc import BioCJSON, BioCXML from .bioc_formatter import get_formatted_bioc_collection @@ -27,11 +29,11 @@ def has_tables(self) -> bool: """ return bool(self.tables.get("documents")) - def to_bioc(self) -> dict[str, Any]: + def to_bioc(self) -> BioCCollection: """Get the currently loaded bioc as a dict. Returns: - bioc as a dict + bioc as a BioCCollection object """ return get_formatted_bioc_collection(self.main_text, self.file_path) diff --git a/autocorpus/bioc_documents.py b/autocorpus/bioc_documents.py deleted file mode 100644 index 2916ec3b..00000000 --- a/autocorpus/bioc_documents.py +++ /dev/null @@ -1,47 +0,0 @@ -"""Script for handling construction of BioC documents.""" - -from pathlib import Path -from typing import Any - -from .bioc_passage import BioCPassage - - -def get_formatted_bioc_document( - main_text: dict[str, Any], - file_path: Path, -) -> dict[str, Any]: # TODO: Change return type to ac_bioc.BioCDocument - """Constructs the BioC document template using the provided data store. - - Args: - main_text: Input document-level data. - file_path: Path to the input file. - - Returns: - BioC document complete populated with passages. - """ - # build document passages - seen_headings = [] - passages = [BioCPassage.from_title(main_text["title"], 0).as_dict()] - offset = 0 # offset for passage start position - if main_text["title"] not in seen_headings: - offset = len(main_text["title"]) - seen_headings.append(main_text["title"]) - for passage in main_text["paragraphs"]: - passage_obj = BioCPassage.from_dict(passage, offset) - passages.append(passage_obj.as_dict()) - offset += len(passage["body"]) - if passage["subsection_heading"] not in seen_headings: - offset += len(passage["subsection_heading"]) - seen_headings.append(passage["subsection_heading"]) - if passage["section_heading"] not in seen_headings: - offset += len(passage["section_heading"]) - seen_headings.append(passage["section_heading"]) - - return { - "id": file_path.name.split(".")[0], - "inputfile": str(file_path), - "infons": {}, - "passages": passages, - "annotations": [], - "relations": [], - } diff --git a/autocorpus/bioc_formatter.py b/autocorpus/bioc_formatter.py index a3882c62..de84526f 100644 --- a/autocorpus/bioc_formatter.py +++ b/autocorpus/bioc_formatter.py @@ -4,13 +4,13 @@ from pathlib import Path from typing import Any -from autocorpus.bioc_documents import get_formatted_bioc_document +from .ac_bioc import BioCCollection, BioCDocument, BioCPassage def get_formatted_bioc_collection( main_text: dict[str, Any], file_path: Path, -) -> dict[str, Any]: # TODO: Change return type to ac_bioc.BioCCollection +) -> BioCCollection: # TODO: Change return type to ac_bioc.BioCCollection """Constructs a BioC collection from input document-level data. Args: @@ -20,11 +20,47 @@ def get_formatted_bioc_collection( Returns: BioC collection """ - bioc_collection = { - "source": "Auto-CORPus (full-text)", - "date": datetime.today().strftime("%Y%m%d"), - "key": "autocorpus_fulltext.key", - "infons": {}, - "documents": [get_formatted_bioc_document(main_text, file_path)], - } + bioc_collection = BioCCollection( + date=datetime.today().strftime("%Y%m%d"), + documents=[get_formatted_bioc_document(main_text, file_path)], + source="Auto-CORPus (full-text)", + key="autocorpus_fulltext.key", + ) return bioc_collection + + +def get_formatted_bioc_document( + main_text: dict[str, Any], + file_path: Path, +) -> BioCDocument: # TODO: Change return type to ac_bioc.BioCDocument + """Constructs the BioC document template using the provided data store. + + Args: + main_text: Input document-level data. + file_path: Path to the input file. + + Returns: + BioC document complete populated with passages. + """ + # build document passages + seen_headings = [] + passages = [BioCPassage().from_title(main_text["title"], 0)] + offset = 0 # offset for passage start position + if main_text["title"] not in seen_headings: + offset = len(main_text["title"]) + seen_headings.append(main_text["title"]) + for passage in main_text["paragraphs"]: + passage["offset"] = offset + passage_obj = BioCPassage().from_ac_dict(passage) + passages.append(passage_obj) + offset += len(passage["body"]) + if passage["subsection_heading"] not in seen_headings: + offset += len(passage["subsection_heading"]) + seen_headings.append(passage["subsection_heading"]) + if passage["section_heading"] not in seen_headings: + offset += len(passage["section_heading"]) + seen_headings.append(passage["section_heading"]) + + return BioCDocument( + id=file_path.name.split(".")[0], inputfile=str(file_path), passages=passages + ) diff --git a/autocorpus/file_processing.py b/autocorpus/file_processing.py index 374a7c99..9e9cac07 100644 --- a/autocorpus/file_processing.py +++ b/autocorpus/file_processing.py @@ -43,11 +43,14 @@ def process_file( try: from .pdf import extract_pdf_content - text, tbls = extract_pdf_content(file_path) + text, tables = extract_pdf_content(file_path) # TODO: Use text.to_dict() after bugfix in ac_bioc (Issue #272) - main_text = BioCJSONEncoder().default(text) - tables = BioCTableJSONEncoder().default(tbls) + if text: + main_text = BioCJSONEncoder().default(text) + + if tables: + tables = BioCTableJSONEncoder().default(tables) return Autocorpus(file_path, main_text, dict(), tables) @@ -58,6 +61,23 @@ def process_file( " pip install autocorpus[pdf]" ) raise + case FileType.WORD: + try: + from .word import extract_word_content + + text, tbls = extract_word_content(file_path) + + # TODO: Use text.to_dict() after bugfix in ac_bioc (Issue #272) + main_text = BioCJSONEncoder().default(text) + tables = BioCTableJSONEncoder().default(tbls) + + return Autocorpus(file_path, main_text, dict(), tables) + except ModuleNotFoundError: + logger.error( + "Could not load necessary Word packages. Microsoft Word is required to process Word documents on Windows & MAC OS, or alternatively LibreOffice can be used on Linux.\n" + ) + raise + case FileType.UNKNOWN: raise NotImplementedError(f"Could not identify file type for {file_path}") diff --git a/autocorpus/file_type.py b/autocorpus/file_type.py index ae12c0c4..66966030 100644 --- a/autocorpus/file_type.py +++ b/autocorpus/file_type.py @@ -17,12 +17,14 @@ class FileType(Enum): HTML: Represents an HTML file. XML: Represents an XML file. PDF: Represents a PDF file. + WORD: Represents a Word document (DOCX or DOC). UNKNOWN: Represents any other file type that is not recognized. """ HTML = auto() XML = auto() PDF = auto() + WORD = auto() UNKNOWN = auto() @@ -63,5 +65,7 @@ def check_file_type(file_path: Path) -> FileType: return FileType.UNKNOWN case ".pdf": return FileType.PDF + case ".docx" | ".doc": + return FileType.WORD case _: return FileType.UNKNOWN diff --git a/autocorpus/word_extractor.py b/autocorpus/word.py similarity index 91% rename from autocorpus/word_extractor.py rename to autocorpus/word.py index c7c7a0be..f7eca2db 100644 --- a/autocorpus/word_extractor.py +++ b/autocorpus/word.py @@ -14,9 +14,7 @@ from . import logger from .ac_bioc.bioctable.collection import BioCTableCollection -from .ac_bioc.bioctable.json import BioCTableJSON from .ac_bioc.collection import BioCCollection -from .ac_bioc.json import BioCJSON from .bioc_supplementary import ( BioCTableConverter, BioCTextConverter, @@ -197,21 +195,8 @@ def extract_word_content(file_path: Path): if tables: bioc_tables = BioCTableConverter.build_bioc(tables, str(file_path)) - if bioc_text: - out_filename = str(file_path).replace( - file_path.suffix, f"{file_path.suffix}_bioc.json" - ) - with open(out_filename, "w", encoding="utf-8") as f: - BioCJSON.dump(bioc_text, f, indent=4) - - if bioc_tables: - out_table_filename = str(file_path).replace( - file_path.suffix, f"{file_path.suffix}_tables.json" - ) - with open(out_table_filename, "w", encoding="utf-8") as f: - BioCTableJSON.dump(bioc_tables, f, indent=4) - os.unlink(str(docx_path)) + return bioc_text, bioc_tables except FileNotFoundError: logger.error( "LibreOffice 'soffice' command not found. Ensure it is installed and in your PATH." diff --git a/poetry.lock b/poetry.lock index 11d696bb..ece8ed73 100644 --- a/poetry.lock +++ b/poetry.lock @@ -371,6 +371,21 @@ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.1 [package.extras] toml = ["tomli"] +[[package]] +name = "dataclasses-json" +version = "0.6.7" +description = "Easily serialize dataclasses to and from JSON." +optional = false +python-versions = "<4.0,>=3.7" +files = [ + {file = "dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a"}, + {file = "dataclasses_json-0.6.7.tar.gz", hash = "sha256:b6b3e528266ea45b9535223bc53ca645f5208833c29229e847b3f26a1cc55fc0"}, +] + +[package.dependencies] +marshmallow = ">=3.18.0,<4.0.0" +typing-inspect = ">=0.4.0,<1" + [[package]] name = "distlib" version = "0.3.9" @@ -1228,13 +1243,13 @@ six = ">=1.15,<2" [[package]] name = "marker-pdf" -version = "1.7.3" +version = "1.7.4" description = "Convert documents to markdown with high speed and accuracy." optional = true python-versions = "<4.0,>=3.10" files = [ - {file = "marker_pdf-1.7.3-py3-none-any.whl", hash = "sha256:3dbb890dfe383d4d437d55ab97dbc35e2aae1b613a032b0738d1d25cabbd07f4"}, - {file = "marker_pdf-1.7.3.tar.gz", hash = "sha256:ddf297036ccd54d94e2f4a684b71bfe201c19755aa40dd8f2be757a8e631f8b9"}, + {file = "marker_pdf-1.7.4-py3-none-any.whl", hash = "sha256:c27d4657a366140b871dd3aebb6421bd8351e5e3be46bc8ea6ad2242d4e3f935"}, + {file = "marker_pdf-1.7.4.tar.gz", hash = "sha256:292e9e42eb8f426e713b1dc147edbfdc5608731b0f00646e34be1f03dec0d7df"}, ] [package.dependencies] @@ -1255,7 +1270,7 @@ python-dotenv = ">=1.0.0,<2.0.0" rapidfuzz = ">=3.8.1,<4.0.0" regex = ">=2024.4.28,<2025.0.0" scikit-learn = ">=1.6.1,<2.0.0" -surya-ocr = ">=0.14.2,<0.15.0" +surya-ocr = ">=0.14.5,<0.15.0" torch = ">=2.7.0,<3.0.0" tqdm = ">=4.66.1,<5.0.0" transformers = ">=4.45.2,<5.0.0" @@ -1333,6 +1348,25 @@ files = [ {file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"}, ] +[[package]] +name = "marshmallow" +version = "3.26.1" +description = "A lightweight library for converting complex datatypes to and from native Python datatypes." +optional = false +python-versions = ">=3.9" +files = [ + {file = "marshmallow-3.26.1-py3-none-any.whl", hash = "sha256:3350409f20a70a7e4e11a27661187b77cdcaeb20abca41c1454fe33636bea09c"}, + {file = "marshmallow-3.26.1.tar.gz", hash = "sha256:e6d8affb6cb61d39d26402096dc0aee12d5a26d490a121f118d2e81dc0719dc6"}, +] + +[package.dependencies] +packaging = ">=17.0" + +[package.extras] +dev = ["marshmallow[tests]", "pre-commit (>=3.5,<5.0)", "tox"] +docs = ["autodocsumm (==0.2.14)", "furo (==2024.8.6)", "sphinx (==8.1.3)", "sphinx-copybutton (==0.5.2)", "sphinx-issues (==5.0.0)", "sphinxext-opengraph (==0.9.1)"] +tests = ["pytest", "simplejson"] + [[package]] name = "mergedeep" version = "1.3.4" @@ -1926,13 +1960,13 @@ files = [ [[package]] name = "openai" -version = "1.82.1" +version = "1.83.0" description = "The official Python library for the openai API" optional = true python-versions = ">=3.8" files = [ - {file = "openai-1.82.1-py3-none-any.whl", hash = "sha256:334eb5006edf59aa464c9e932b9d137468d810b2659e5daea9b3a8c39d052395"}, - {file = "openai-1.82.1.tar.gz", hash = "sha256:ffc529680018e0417acac85f926f92aa0bbcbc26e82e2621087303c66bc7f95d"}, + {file = "openai-1.83.0-py3-none-any.whl", hash = "sha256:d15ec58ba52537d4abc7b744890ecc4ab3cffb0fdaa8e5389830f6e1a2f7f128"}, + {file = "openai-1.83.0.tar.gz", hash = "sha256:dfb421837962d9e8078929d8fc7e36e51c2a110b23a777a14e27f579d1afd6b6"}, ] [package.dependencies] @@ -2534,25 +2568,26 @@ files = [ [[package]] name = "pytest" -version = "8.3.5" +version = "8.4.0" description = "pytest: simple powerful testing with Python" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820"}, - {file = "pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845"}, + {file = "pytest-8.4.0-py3-none-any.whl", hash = "sha256:f40f825768ad76c0977cbacdf1fd37c6f7a468e460ea6a0636078f8972d4517e"}, + {file = "pytest-8.4.0.tar.gz", hash = "sha256:14d920b48472ea0dbf68e45b96cd1ffda4705f33307dcc86c676c1b5104838a6"}, ] [package.dependencies] -colorama = {version = "*", markers = "sys_platform == \"win32\""} -exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} -iniconfig = "*" -packaging = "*" +colorama = {version = ">=0.4", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1", markers = "python_version < \"3.11\""} +iniconfig = ">=1" +packaging = ">=20" pluggy = ">=1.5,<2" +pygments = ">=2.7.2" tomli = {version = ">=1", markers = "python_version < \"3.11\""} [package.extras] -dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests", "setuptools", "xmlschema"] [[package]] name = "pytest-cov" @@ -3376,13 +3411,13 @@ files = [ [[package]] name = "surya-ocr" -version = "0.14.3" +version = "0.14.5" description = "OCR, layout, reading order, and table recognition in 90+ languages" optional = true python-versions = "<4.0,>=3.10" files = [ - {file = "surya_ocr-0.14.3-py3-none-any.whl", hash = "sha256:2055e84a839b95536b6d957b5ecf5680ee46ecfa201f504f14b92a09fea41247"}, - {file = "surya_ocr-0.14.3.tar.gz", hash = "sha256:ec7e48bd9f2167913b329e46d27cf15cd4497554b5a30f3ada249b819482a4c6"}, + {file = "surya_ocr-0.14.5-py3-none-any.whl", hash = "sha256:3293fdcc1f1a6e83b177479d9d71d51359e3028a4b7ada47094069b08fe652be"}, + {file = "surya_ocr-0.14.5.tar.gz", hash = "sha256:1609ec6d9ab84003e7cde93f0cba4edefb052439faffddeff0832b0f36e49b00"}, ] [package.dependencies] @@ -3751,13 +3786,13 @@ files = [ [[package]] name = "types-requests" -version = "2.32.0.20250515" +version = "2.32.0.20250602" description = "Typing stubs for requests" optional = false python-versions = ">=3.9" files = [ - {file = "types_requests-2.32.0.20250515-py3-none-any.whl", hash = "sha256:f8eba93b3a892beee32643ff836993f15a785816acca21ea0ffa006f05ef0fb2"}, - {file = "types_requests-2.32.0.20250515.tar.gz", hash = "sha256:09c8b63c11318cb2460813871aaa48b671002e59fda67ca909e9883777787581"}, + {file = "types_requests-2.32.0.20250602-py3-none-any.whl", hash = "sha256:f4f335f87779b47ce10b8b8597b409130299f6971ead27fead4fe7ba6ea3e726"}, + {file = "types_requests-2.32.0.20250602.tar.gz", hash = "sha256:ee603aeefec42051195ae62ca7667cd909a2f8128fdf8aad9e8a5219ecfab3bf"}, ] [package.dependencies] @@ -3779,15 +3814,30 @@ types-requests = "*" [[package]] name = "typing-extensions" -version = "4.13.2" -description = "Backported and Experimental Type Hints for Python 3.8+" +version = "4.14.0" +description = "Backported and Experimental Type Hints for Python 3.9+" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c"}, - {file = "typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef"}, + {file = "typing_extensions-4.14.0-py3-none-any.whl", hash = "sha256:a1514509136dd0b477638fc68d6a91497af5076466ad0fa6c338e44e359944af"}, + {file = "typing_extensions-4.14.0.tar.gz", hash = "sha256:8676b788e32f02ab42d9e7c61324048ae4c6d844a399eebace3d4979d75ceef4"}, ] +[[package]] +name = "typing-inspect" +version = "0.9.0" +description = "Runtime inspection utilities for typing module." +optional = false +python-versions = "*" +files = [ + {file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"}, + {file = "typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78"}, +] + +[package.dependencies] +mypy-extensions = ">=0.3.0" +typing-extensions = ">=3.7.4" + [[package]] name = "typing-inspection" version = "0.4.1" @@ -3987,4 +4037,4 @@ pdf = ["marker-pdf"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<4" -content-hash = "7caff65c39c3b097d0bb29dd6c516a7d5574191140a9356ca60b41d75f0c83e4" +content-hash = "f4976b8b0abcf0b958b2e4014a3642ae4c7e550214dbcf74fa8ab68443d895a9" diff --git a/pyproject.toml b/pyproject.toml index 21d7176d..8599a014 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ lxml = "^5.3.0" pandas = "^2.2.3" python-docx = "^1.1.2" marker-pdf = { version = "^1.6.2", optional = true } +dataclasses-json = "^0.6.7" [tool.poetry.extras] pdf = ["marker-pdf"] diff --git a/tests/bioc/test_collection.py b/tests/bioc/test_collection.py index 052c67bd..ed1c59e7 100644 --- a/tests/bioc/test_collection.py +++ b/tests/bioc/test_collection.py @@ -1,5 +1,6 @@ """Unit tests for the BioCCollection class and its methods for serialization and deserialization.""" +import json import xml.etree.ElementTree as ET from autocorpus.ac_bioc import ( @@ -21,12 +22,14 @@ def test_to_dict(sample_collection): def test_to_json_matches_to_dict(sample_collection): """Test that the JSON representation of the collection matches its dictionary representation.""" collection = sample_collection - assert collection.to_json() == collection.to_dict() + json_as_dict = json.loads(collection.to_json()) + dict_repr = collection.to_dict() + assert json_as_dict == dict_repr def test_from_json(sample_collection): """Test creating a BioCCollection from JSON data.""" - json_data = sample_collection.to_dict() + json_data = sample_collection.to_json() c = BioCCollection.from_json(json_data) assert c.source == "test_source" diff --git a/tests/bioc/test_document.py b/tests/bioc/test_document.py index e6002b2d..5d662d3e 100644 --- a/tests/bioc/test_document.py +++ b/tests/bioc/test_document.py @@ -3,6 +3,7 @@ Including serialization to/from dictionaries, JSON, and XML. """ +import json import xml.etree.ElementTree as ET import pytest @@ -46,7 +47,10 @@ def test_to_dict(sample_document): def test_to_json_matches_to_dict(sample_document): """Test that the to_json method produces the same output as to_dict.""" - assert sample_document.to_json() == sample_document.to_dict() + document = sample_document + json_as_dict = json.loads(document.to_json()) + dict_repr = document.to_dict() + assert json_as_dict == dict_repr def test_from_dict(): diff --git a/tests/conftest.py b/tests/conftest.py index 5ee2c663..26b3568e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -46,6 +46,11 @@ def sample_collection() -> BioCCollection: BioCPassage( text="Hello", offset=0, + infons={ + "section_title_1": "Abstract", + "iao_name_1": "textual abstract section", + "iao_id_1": "IAO:0000315", + }, annotations=[ BioCAnnotation( id="a1", diff --git a/tests/test_regression.py b/tests/test_regression.py index c890f152..86969947 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -79,7 +79,7 @@ def _run_html_regression_test( auto_corpus = process_file(config=config, file_path=file_path) abbreviations = auto_corpus.abbreviations - bioc = auto_corpus.to_bioc() + bioc = auto_corpus.to_bioc().to_dict() tables = auto_corpus.tables _make_reproducible( From 3a13f197193834d938f09e6d72504eff995dae77 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 3 Jun 2025 09:43:17 +0000 Subject: [PATCH 114/125] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- autocorpus/ac_bioc/collection.py | 1 - 1 file changed, 1 deletion(-) diff --git a/autocorpus/ac_bioc/collection.py b/autocorpus/ac_bioc/collection.py index 9f68db55..5aae6a75 100644 --- a/autocorpus/ac_bioc/collection.py +++ b/autocorpus/ac_bioc/collection.py @@ -8,7 +8,6 @@ import xml.etree.ElementTree as ET from dataclasses import asdict, dataclass, field -from typing import Any from dataclasses_json import dataclass_json From 2f314faa21cb4cd5dfeb888db56e4039705b6937 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Tue, 3 Jun 2025 10:44:16 +0100 Subject: [PATCH 115/125] Ruff fix --- autocorpus/ac_bioc/collection.py | 1 - 1 file changed, 1 deletion(-) diff --git a/autocorpus/ac_bioc/collection.py b/autocorpus/ac_bioc/collection.py index 9f68db55..5aae6a75 100644 --- a/autocorpus/ac_bioc/collection.py +++ b/autocorpus/ac_bioc/collection.py @@ -8,7 +8,6 @@ import xml.etree.ElementTree as ET from dataclasses import asdict, dataclass, field -from typing import Any from dataclasses_json import dataclass_json From 1db882c776ba2289312f0df561b3882eaccebb3f Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Tue, 3 Jun 2025 11:09:47 +0100 Subject: [PATCH 116/125] Fixes for mypy typing errors --- autocorpus/ac_bioc/bioctable/cell.py | 9 ------- autocorpus/ac_bioc/bioctable/collection.py | 11 --------- autocorpus/ac_bioc/bioctable/document.py | 10 -------- autocorpus/ac_bioc/bioctable/passage.py | 24 ------------------- autocorpus/ac_bioc/collection.py | 8 +++---- autocorpus/ac_bioc/document.py | 28 +++------------------- autocorpus/ac_bioc/passage.py | 26 +++----------------- autocorpus/ac_bioc/relation.py | 27 ++++----------------- autocorpus/ac_bioc/sentence.py | 27 +++------------------ autocorpus/file_processing.py | 8 +++---- 10 files changed, 20 insertions(+), 158 deletions(-) diff --git a/autocorpus/ac_bioc/bioctable/cell.py b/autocorpus/ac_bioc/bioctable/cell.py index 7f241f80..472da42d 100644 --- a/autocorpus/ac_bioc/bioctable/cell.py +++ b/autocorpus/ac_bioc/bioctable/cell.py @@ -9,12 +9,3 @@ class BioCTableCell: cell_id: str = field(default_factory=str) cell_text: str = field(default_factory=str) - - def to_dict(self) -> dict[str, str]: - """Convert the cell's attributes to a dictionary. - - Returns: - dict[str, str] - A dictionary containing the cell's ID and text content. - """ - return {"cell_id": self.cell_id, "cell_text": self.cell_text} diff --git a/autocorpus/ac_bioc/bioctable/collection.py b/autocorpus/ac_bioc/bioctable/collection.py index 2f7b87e5..f981777f 100644 --- a/autocorpus/ac_bioc/bioctable/collection.py +++ b/autocorpus/ac_bioc/bioctable/collection.py @@ -1,7 +1,6 @@ """This module defines the BioCTableCollection class.""" from dataclasses import dataclass, field -from typing import Any from ...ac_bioc import BioCCollection, BioCDocument @@ -11,13 +10,3 @@ class BioCTableCollection(BioCCollection): """A collection of BioCTableDocument objects extending BioCCollection.""" documents: list[BioCDocument] = field(default_factory=list) - - def to_dict(self) -> dict[str, Any]: - """Convert the BioCTableCollection to a dictionary representation. - - Returns: - dict[str, Any]: A dictionary containing the collection's data, including its documents. - """ - base = super().to_dict() - base["documents"] = [doc.to_dict() for doc in self.documents] - return base diff --git a/autocorpus/ac_bioc/bioctable/document.py b/autocorpus/ac_bioc/bioctable/document.py index 45a292d1..e307819f 100644 --- a/autocorpus/ac_bioc/bioctable/document.py +++ b/autocorpus/ac_bioc/bioctable/document.py @@ -5,10 +5,8 @@ """ from dataclasses import dataclass, field -from typing import Any from ...ac_bioc import BioCAnnotation, BioCDocument, BioCPassage -from ...ac_bioc.bioctable.passage import BioCTablePassage @dataclass @@ -17,11 +15,3 @@ class BioCTableDocument(BioCDocument): passages: list[BioCPassage] = field(default_factory=list) annotations: list[BioCAnnotation] = field(default_factory=list) - - def to_dict(self) -> dict[str, Any]: - """Convert the BioCTableDocument to a dictionary representation.""" - base = super().to_dict() - base["passages"] = [ - p.to_dict() for p in self.passages if isinstance(p, BioCTablePassage) - ] - return base diff --git a/autocorpus/ac_bioc/bioctable/passage.py b/autocorpus/ac_bioc/bioctable/passage.py index ba53266a..51de0de4 100644 --- a/autocorpus/ac_bioc/bioctable/passage.py +++ b/autocorpus/ac_bioc/bioctable/passage.py @@ -13,27 +13,3 @@ class BioCTablePassage(BioCPassage): column_headings: list[BioCTableCell] = field(default_factory=list) data_section: list[dict[str, Any]] = field(default_factory=list) - - def to_dict(self) -> dict[str, Any]: - """Convert the BioCTablePassage instance to a dictionary. - - Returns: - dict[str, Any]: A dictionary representation of the BioCTablePassage instance, - including column headings and data sections. - """ - base = super().to_dict() - base["column_headings"] = [cell.to_dict() for cell in self.column_headings] - - # Convert data_section cells too - data_section_serialized = [] - for section in self.data_section: - serialized_section = { - "table_section_title_1": section.get("table_section_title_1", ""), - "data_rows": [], - } - for row in section["data_rows"]: - serialized_row = [cell.to_dict() for cell in row] - serialized_section["data_rows"].append(serialized_row) - data_section_serialized.append(serialized_section) - base["data_section"] = data_section_serialized - return base diff --git a/autocorpus/ac_bioc/collection.py b/autocorpus/ac_bioc/collection.py index 5aae6a75..63f4a7d1 100644 --- a/autocorpus/ac_bioc/collection.py +++ b/autocorpus/ac_bioc/collection.py @@ -7,16 +7,16 @@ from __future__ import annotations import xml.etree.ElementTree as ET -from dataclasses import asdict, dataclass, field +from dataclasses import dataclass, field -from dataclasses_json import dataclass_json +from dataclasses_json import DataClassJsonMixin, dataclass_json from .document import BioCDocument @dataclass_json @dataclass -class BioCCollection: +class BioCCollection(DataClassJsonMixin): """A class representing a BioC collection.""" source: str = field(default_factory=str) @@ -25,8 +25,6 @@ class BioCCollection: documents: list[BioCDocument] = field(default_factory=list) infons: dict[str, str] = field(default_factory=dict) - to_dict = asdict - def to_xml(self) -> ET.Element: """Convert the BioCCollection instance to an XML element. diff --git a/autocorpus/ac_bioc/document.py b/autocorpus/ac_bioc/document.py index 5dccc4cf..091087e7 100644 --- a/autocorpus/ac_bioc/document.py +++ b/autocorpus/ac_bioc/document.py @@ -7,10 +7,9 @@ from __future__ import annotations import xml.etree.ElementTree as ET -from dataclasses import asdict, dataclass, field -from typing import Any +from dataclasses import dataclass, field -from dataclasses_json import dataclass_json +from dataclasses_json import DataClassJsonMixin, dataclass_json from .annotation import BioCAnnotation from .passage import BioCPassage @@ -19,7 +18,7 @@ @dataclass_json @dataclass -class BioCDocument: +class BioCDocument(DataClassJsonMixin): """Represents a BioC document containing passages, annotations, and relations.""" id: str = field(default_factory=str) @@ -31,27 +30,6 @@ class BioCDocument: default_factory=list ) # TODO: discuss why this is here in legacy outputs, should it be removed? - to_dict = asdict - - @classmethod - def from_dict(cls, data: dict[str, Any]) -> BioCDocument: - """Create a BioCDocument instance from a dictionary. - - Args: - data (dict[str, Any]): A dictionary containing the document's data. - - Returns: - BioCDocument: An instance of BioCDocument created from the dictionary. - """ - passages = [BioCPassage().from_ac_dict(p) for p in data.get("passages", [])] - return cls( - id=data["id"], - infons=data.get("infons", {}), - passages=passages, - relations=data.get("relations", []), - annotations=data.get("annotations", []), - ) - def to_xml(self) -> ET.Element: """Convert the BioCDocument instance to an XML element. diff --git a/autocorpus/ac_bioc/passage.py b/autocorpus/ac_bioc/passage.py index e66a3b58..c8b23de3 100644 --- a/autocorpus/ac_bioc/passage.py +++ b/autocorpus/ac_bioc/passage.py @@ -7,10 +7,10 @@ from __future__ import annotations import xml.etree.ElementTree as ET -from dataclasses import asdict, dataclass, field +from dataclasses import dataclass, field from typing import Any -from dataclasses_json import dataclass_json +from dataclasses_json import DataClassJsonMixin, dataclass_json from .annotation import BioCAnnotation from .relation import BioCRelation @@ -23,7 +23,7 @@ @dataclass_json @dataclass -class BioCPassage: +class BioCPassage(DataClassJsonMixin): """Represents a passage in a BioC document.""" text: str = field(default_factory=str) @@ -33,26 +33,6 @@ class BioCPassage: annotations: list[BioCAnnotation] = field(default_factory=list) relations: list[BioCRelation] = field(default_factory=list) - to_dict = asdict - - @classmethod - def from_dict(cls, data: dict[str, Any]) -> BioCPassage: - """Create a BioCPassage instance from a dictionary. - - Args: - data (dict[str, Any]): A dictionary containing passage data. - - Returns: - BioCPassage: An instance of BioCPassage populated with the provided data. - """ - sentences = [BioCSentence.from_dict(s) for s in data.get("sentences", [])] - return cls( - text=data.get("text", ""), - offset=data.get("offset", 0), - infons=data.get("infons", {}), - sentences=sentences, - ) - @classmethod def from_ac_dict(cls, passage: dict[str, Any]) -> BioCPassage: """Create a BioCPassage from a passage dict and an offset. diff --git a/autocorpus/ac_bioc/relation.py b/autocorpus/ac_bioc/relation.py index a458e568..8b8d0d43 100644 --- a/autocorpus/ac_bioc/relation.py +++ b/autocorpus/ac_bioc/relation.py @@ -3,40 +3,21 @@ from __future__ import annotations import xml.etree.ElementTree as ET -from dataclasses import asdict, dataclass, field -from typing import Any +from dataclasses import dataclass, field + +from dataclasses_json import DataClassJsonMixin from .node import BioCNode @dataclass -class BioCRelation: +class BioCRelation(DataClassJsonMixin): """A class representing a BioC relation.""" id: str = field(default_factory=str) infons: dict[str, str] = field(default_factory=dict) nodes: list[BioCNode] = field(default_factory=list) - to_dict = asdict - - @classmethod - def from_dict(cls, data: dict[str, Any]) -> BioCRelation: - """Create a BioCRelation instance from a dictionary. - - Args: - data (dict[str, Any]): A dictionary containing the relation data. - - Returns: - BioCRelation: An instance of BioCRelation created from the dictionary. - """ - from .node import BioCNode # import inside to avoid circular import issues - - return cls( - id=data.get("id", ""), - infons=data.get("infons", {}), - nodes=[BioCNode.from_dict(n) for n in data.get("nodes", [])], - ) - def to_xml(self) -> ET.Element: """Convert the BioCRelation instance to an XML element. diff --git a/autocorpus/ac_bioc/sentence.py b/autocorpus/ac_bioc/sentence.py index 32ff35da..29e2d6fd 100644 --- a/autocorpus/ac_bioc/sentence.py +++ b/autocorpus/ac_bioc/sentence.py @@ -3,10 +3,9 @@ from __future__ import annotations import xml.etree.ElementTree as ET -from dataclasses import asdict, dataclass, field -from typing import Any +from dataclasses import dataclass, field -from dataclasses_json import dataclass_json +from dataclasses_json import DataClassJsonMixin, dataclass_json from .annotation import BioCAnnotation from .relation import BioCRelation @@ -14,7 +13,7 @@ @dataclass_json @dataclass -class BioCSentence: +class BioCSentence(DataClassJsonMixin): """Represents a sentence in the BioC format.""" text: str @@ -23,26 +22,6 @@ class BioCSentence: annotations: list[BioCAnnotation] = field(default_factory=list) relations: list[BioCRelation] = field(default_factory=list) - to_dict = asdict - - @classmethod - def from_dict(cls, data: dict[str, Any]) -> BioCSentence: - """Create a BioCSentence instance from a dictionary. - - Args: - data (dict[str, Any]): A dictionary containing sentence data. - - Returns: - BioCSentence: An instance of BioCSentence created from the dictionary. - """ - annotations = [BioCAnnotation.from_dict(a) for a in data.get("annotations", [])] - return cls( - text=data.get("text", ""), - offset=data.get("offset", 0), - infons=data.get("infons", {}), - annotations=annotations, - ) - def to_xml(self) -> ET.Element: """Convert the BioCSentence instance to an XML element. diff --git a/autocorpus/file_processing.py b/autocorpus/file_processing.py index 9e9cac07..d4eb5d7c 100644 --- a/autocorpus/file_processing.py +++ b/autocorpus/file_processing.py @@ -50,9 +50,9 @@ def process_file( main_text = BioCJSONEncoder().default(text) if tables: - tables = BioCTableJSONEncoder().default(tables) + tables_dict = BioCTableJSONEncoder().default(tables).to_dict() - return Autocorpus(file_path, main_text, dict(), tables) + return Autocorpus(file_path, main_text, dict(), tables_dict) except ModuleNotFoundError: logger.error( @@ -69,9 +69,9 @@ def process_file( # TODO: Use text.to_dict() after bugfix in ac_bioc (Issue #272) main_text = BioCJSONEncoder().default(text) - tables = BioCTableJSONEncoder().default(tbls) + tables_dict = BioCTableJSONEncoder().default(tbls).to_dict() - return Autocorpus(file_path, main_text, dict(), tables) + return Autocorpus(file_path, main_text, dict(), tables_dict) except ModuleNotFoundError: logger.error( "Could not load necessary Word packages. Microsoft Word is required to process Word documents on Windows & MAC OS, or alternatively LibreOffice can be used on Linux.\n" From acbf5b1875b4d0a8ad7041a1254b94674eff03d1 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Tue, 3 Jun 2025 11:36:01 +0100 Subject: [PATCH 117/125] Fixed unbound values referenced --- autocorpus/file_processing.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/autocorpus/file_processing.py b/autocorpus/file_processing.py index d4eb5d7c..974811a6 100644 --- a/autocorpus/file_processing.py +++ b/autocorpus/file_processing.py @@ -45,12 +45,13 @@ def process_file( text, tables = extract_pdf_content(file_path) - # TODO: Use text.to_dict() after bugfix in ac_bioc (Issue #272) + main_text: dict[str, Any] = {} if text: - main_text = BioCJSONEncoder().default(text) + main_text = text.to_dict() + tables_dict: dict[str, Any] = {} if tables: - tables_dict = BioCTableJSONEncoder().default(tables).to_dict() + tables_dict = tables.to_dict() return Autocorpus(file_path, main_text, dict(), tables_dict) @@ -68,8 +69,13 @@ def process_file( text, tbls = extract_word_content(file_path) # TODO: Use text.to_dict() after bugfix in ac_bioc (Issue #272) - main_text = BioCJSONEncoder().default(text) - tables_dict = BioCTableJSONEncoder().default(tbls).to_dict() + main_text: dict[str, Any] = {} + if text: + main_text = text.to_dict() + + tables_dict: dict[str, Any] = {} + if tbls: + tables_dict = tbls.to_dict() return Autocorpus(file_path, main_text, dict(), tables_dict) except ModuleNotFoundError: From b23909e726a55103374734a678f454698c9122de Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 3 Jun 2025 10:36:16 +0000 Subject: [PATCH 118/125] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- autocorpus/file_processing.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/autocorpus/file_processing.py b/autocorpus/file_processing.py index 974811a6..bc091c63 100644 --- a/autocorpus/file_processing.py +++ b/autocorpus/file_processing.py @@ -5,8 +5,6 @@ from typing import Any from . import logger -from .ac_bioc.bioctable.json import BioCTableJSONEncoder -from .ac_bioc.json import BioCJSONEncoder from .autocorpus import Autocorpus from .file_type import FileType, check_file_type from .html import process_html_article From 9407bbe59f7dd8b3c420d278174e33f3c6c4551e Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Tue, 3 Jun 2025 11:39:18 +0100 Subject: [PATCH 119/125] mypy fixes --- autocorpus/file_processing.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/autocorpus/file_processing.py b/autocorpus/file_processing.py index 974811a6..fe13e26e 100644 --- a/autocorpus/file_processing.py +++ b/autocorpus/file_processing.py @@ -5,8 +5,6 @@ from typing import Any from . import logger -from .ac_bioc.bioctable.json import BioCTableJSONEncoder -from .ac_bioc.json import BioCJSONEncoder from .autocorpus import Autocorpus from .file_type import FileType, check_file_type from .html import process_html_article @@ -29,6 +27,8 @@ def process_file( NotImplementedError: For files types with no implemented processing. ModuleNotFoundError: For PDF processing if required packages are not found. """ + main_text: dict[str, Any] = {} + tables_dict: dict[str, Any] = {} match check_file_type(file_path): case FileType.HTML: return Autocorpus( @@ -45,11 +45,9 @@ def process_file( text, tables = extract_pdf_content(file_path) - main_text: dict[str, Any] = {} if text: main_text = text.to_dict() - tables_dict: dict[str, Any] = {} if tables: tables_dict = tables.to_dict() @@ -68,12 +66,9 @@ def process_file( text, tbls = extract_word_content(file_path) - # TODO: Use text.to_dict() after bugfix in ac_bioc (Issue #272) - main_text: dict[str, Any] = {} if text: main_text = text.to_dict() - tables_dict: dict[str, Any] = {} if tbls: tables_dict = tbls.to_dict() From 7eb4b830ca73ae84d6d173241d87b77ee99e6acb Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Tue, 3 Jun 2025 12:10:26 +0100 Subject: [PATCH 120/125] Added new fields to expected output, matching the HTML bioc structure --- .../tp-10-08-2123-coif.pdf_bioc.json | 1110 +++++++++++------ .../Word/Expected Output/mmc1.doc_bioc.json | 252 ++-- 2 files changed, 908 insertions(+), 454 deletions(-) diff --git a/tests/data/Supplementary/PDF/Expected Output/tp-10-08-2123-coif.pdf_bioc.json b/tests/data/Supplementary/PDF/Expected Output/tp-10-08-2123-coif.pdf_bioc.json index 46bac838..9659ed66 100644 --- a/tests/data/Supplementary/PDF/Expected Output/tp-10-08-2123-coif.pdf_bioc.json +++ b/tests/data/Supplementary/PDF/Expected Output/tp-10-08-2123-coif.pdf_bioc.json @@ -17,7 +17,8 @@ }, "text": "![](_page_0_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 27, @@ -27,7 +28,8 @@ }, "text": "### **Instructions**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 47, @@ -37,7 +39,8 @@ }, "text": " ", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 48, @@ -47,7 +50,8 @@ }, "text": "> **The purpose of this form is to provide readers of your manuscript with information about your other interests that could influence how they receive and understand your work. The form is designed to be completed electronically and stored electronically. It contains programming that allows appropriate data display. Each author should submit a separate form and is responsible for the accuracy and completeness of the submitted information. The form is in six parts.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 519, @@ -57,7 +61,8 @@ }, "text": "#### **Identifying information. 1.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 555, @@ -67,7 +72,8 @@ }, "text": "#### **The work under consideration for publication. 2.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 612, @@ -77,7 +83,8 @@ }, "text": "This section asks for information about the work that you have submitted for publication. The time frame for this reporting is that of the work itself, from the initial conception and planning to the present. The requested information is about resources that you received, either directly or indirectly (via your institution), to enable you to complete the work. Checking \"No\" means that you did the work without receiving any financial support from any third party -- that is, the work was supported by funds from the same institution that pays your salary and that institution did not receive third-party funds with which to pay you. If you or your institution received funds from a third party to support the work, such as a government granting agency, charitable foundation or commercial sponsor, check \"Yes\".", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 1425, @@ -87,7 +94,8 @@ }, "text": "#### **Relevant financial activities outside the submitted work. 3.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 1494, @@ -97,7 +105,8 @@ }, "text": "This section asks about your financial relationships with entities in the bio-medical arena that could be perceived to influence, or that give the appearance of potentially influencing, what you wrote in the submitted work. You should disclose interactions with ANY entity that could be considered broadly relevant to the work. For example, if your article is about testing an epidermal growth factor receptor (EGFR) antagonist in lung cancer, you should report all associations with entities pursuing diagnostic or therapeutic strategies in cancer in general, not just in the area of EGFR or lung cancer.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 2099, @@ -107,7 +116,8 @@ }, "text": "Report all sources of revenue paid (or promised to be paid) directly to you or your institution on your behalf over the 36 months prior to submission of the work. This should include all monies from sources with relevance to the submitted work, not just monies from the entity that sponsored the research. Please note that your interactions with the work's sponsor that are outside the submitted work should also be listed here. If there is any question, it is usually better to disclose a relationship than not to do so.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 2620, @@ -117,7 +127,8 @@ }, "text": " For grants you have received for work outside the submitted work, you should disclose support ONLY from entities that could be perceived to be affected financially by the published work, such as drug companies, or foundations supported by entities that could be perceived to have a financial stake in the outcome. Public funding sources, such as government agencies, charitable foundations or academic institutions, need not be disclosed. For example, if a government agency sponsored a study in which you have been involved and drugs were provided by a pharmaceutical company, you need only list the pharmaceutical company.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 3245, @@ -127,7 +138,8 @@ }, "text": "#### **Intellectual Property. 4.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 3279, @@ -137,7 +149,8 @@ }, "text": "This section asks about patents and copyrights, whether pending, issued, licensed and/or receiving royalties.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 3388, @@ -147,7 +160,8 @@ }, "text": "#### **Relationships not covered above. 5.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 3432, @@ -157,7 +171,8 @@ }, "text": "Use this section to report other relationships or activities that readers could perceive to have influenced, or that give the appearance of potentially influencing, what you wrote in the submitted work.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 3634, @@ -167,7 +182,8 @@ }, "text": "### **Definitions.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 3654, @@ -177,7 +193,8 @@ }, "text": "**Entity:** government agency, foundation, commercial sponsor, academic institution, etc.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 3743, @@ -187,7 +204,8 @@ }, "text": "**Grant:** A grant from an entity, generally [but not always] paid to your organization", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 3830, @@ -197,7 +215,8 @@ }, "text": "**Personal Fees:** Monies paid to you for services rendered, generally honoraria, royalties, or fees for consulting , lectures, speakers bureaus, expert testimony, employment, or other affiliations", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 4027, @@ -207,7 +226,8 @@ }, "text": "**Non-Financial Support:** Examples include drugs/equipment supplied by the entity, travel paid by the entity, writing assistance, administrative support, etc**.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 4190, @@ -217,7 +237,8 @@ }, "text": "**Other:** Anything not covered under the previous three boxes **Pending:** The patent has been filed but not issued **Issued:** The patent has been issued by the agency **Licensed:** The patent has been licensed to an entity, whether earning royalties or not", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 4449, @@ -227,7 +248,8 @@ }, "text": "![](_page_1_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 4476, @@ -237,7 +259,8 @@ }, "text": "Did you or your institution **at any time** receive payment or services from a third party (government, commercial, private foundation, etc.) for any aspect of the submitted work (including but not limited to grants, data monitoring board, study design, manuscript preparation, statistical analysis, etc.)?", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 4782, @@ -247,7 +270,8 @@ }, "text": "# **Relevant financial activities outside the submitted work. Section 3.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 4856, @@ -257,7 +281,8 @@ }, "text": "Place a check in the appropriate boxes in the table to indicate whether you have financial relationships (regardless of amount of compensation) with entities as described in the instructions. Use one line for each entity; add as many lines as you need by clicking the \"Add +\" box. You should report relationships that were **present during the 36 months prior to publication**.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 5233, @@ -267,7 +292,8 @@ }, "text": "# **Intellectual Property -- Patents & Copyrights Section 4.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 5295, @@ -277,7 +303,8 @@ }, "text": "Do you have any patents, whether planned, pending or issued, broadly relevant to the work? Yes \u2714 No", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 5394, @@ -287,7 +314,8 @@ }, "text": "![](_page_2_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 5421, @@ -297,7 +325,8 @@ }, "text": "## **Relationships not covered above Section 5.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 5470, @@ -307,7 +336,8 @@ }, "text": "Are there other relationships or activities that readers could perceive to have influenced, or that give the appearance of potentially influencing, what you wrote in the submitted work?", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 5655, @@ -317,7 +347,8 @@ }, "text": "Yes, the following relationships/conditions/circumstances are present (explain below):", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 5741, @@ -327,7 +358,8 @@ }, "text": "\u2714 No other relationships/conditions/circumstances that present a potential conflict of interest", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 5836, @@ -337,7 +369,8 @@ }, "text": "At the time of manuscript acceptance, journals will ask authors to confirm and, if necessary, update their disclosure statements. On occasion, journals may ask authors to disclose further information about reported relationships.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 6065, @@ -347,7 +380,8 @@ }, "text": "## **Disclosure Statement Section 6.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 6103, @@ -357,7 +391,8 @@ }, "text": "Based on the above disclosures, this form will automatically generate a disclosure statement, which will appear in the box below.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 6232, @@ -367,7 +402,8 @@ }, "text": "Dr. Zong has nothing to disclose.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 6265, @@ -377,7 +413,8 @@ }, "text": "### **Evaluation and Feedback**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 6296, @@ -387,7 +424,8 @@ }, "text": "![](_page_3_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 6323, @@ -397,7 +435,8 @@ }, "text": "### **Instructions**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 6343, @@ -407,7 +446,8 @@ }, "text": " ", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 6344, @@ -417,7 +457,8 @@ }, "text": "> **The purpose of this form is to provide readers of your manuscript with information about your other interests that could influence how they receive and understand your work. The form is designed to be completed electronically and stored electronically. It contains programming that allows appropriate data display. Each author should submit a separate form and is responsible for the accuracy and completeness of the submitted information. The form is in six parts.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 6815, @@ -427,7 +468,8 @@ }, "text": "#### **Identifying information. 1.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 6851, @@ -437,7 +479,8 @@ }, "text": "#### **The work under consideration for publication. 2.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 6908, @@ -447,7 +490,8 @@ }, "text": "This section asks for information about the work that you have submitted for publication. The time frame for this reporting is that of the work itself, from the initial conception and planning to the present. The requested information is about resources that you received, either directly or indirectly (via your institution), to enable you to complete the work. Checking \"No\" means that you did the work without receiving any financial support from any third party -- that is, the work was supported by funds from the same institution that pays your salary and that institution did not receive third-party funds with which to pay you. If you or your institution received funds from a third party to support the work, such as a government granting agency, charitable foundation or commercial sponsor, check \"Yes\".", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 7721, @@ -457,7 +501,8 @@ }, "text": "#### **Relevant financial activities outside the submitted work. 3.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 7790, @@ -467,7 +512,8 @@ }, "text": "This section asks about your financial relationships with entities in the bio-medical arena that could be perceived to influence, or that give the appearance of potentially influencing, what you wrote in the submitted work. You should disclose interactions with ANY entity that could be considered broadly relevant to the work. For example, if your article is about testing an epidermal growth factor receptor (EGFR) antagonist in lung cancer, you should report all associations with entities pursuing diagnostic or therapeutic strategies in cancer in general, not just in the area of EGFR or lung cancer.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 8395, @@ -477,7 +523,8 @@ }, "text": "Report all sources of revenue paid (or promised to be paid) directly to you or your institution on your behalf over the 36 months prior to submission of the work. This should include all monies from sources with relevance to the submitted work, not just monies from the entity that sponsored the research. Please note that your interactions with the work's sponsor that are outside the submitted work should also be listed here. If there is any question, it is usually better to disclose a relationship than not to do so.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 8916, @@ -487,7 +534,8 @@ }, "text": " For grants you have received for work outside the submitted work, you should disclose support ONLY from entities that could be perceived to be affected financially by the published work, such as drug companies, or foundations supported by entities that could be perceived to have a financial stake in the outcome. Public funding sources, such as government agencies, charitable foundations or academic institutions, need not be disclosed. For example, if a government agency sponsored a study in which you have been involved and drugs were provided by a pharmaceutical company, you need only list the pharmaceutical company.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 9541, @@ -497,7 +545,8 @@ }, "text": "#### **Intellectual Property. 4.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 9575, @@ -507,7 +556,8 @@ }, "text": "This section asks about patents and copyrights, whether pending, issued, licensed and/or receiving royalties.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 9684, @@ -517,7 +567,8 @@ }, "text": "#### **Relationships not covered above. 5.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 9728, @@ -527,7 +578,8 @@ }, "text": "Use this section to report other relationships or activities that readers could perceive to have influenced, or that give the appearance of potentially influencing, what you wrote in the submitted work.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 9930, @@ -537,7 +589,8 @@ }, "text": "### **Definitions.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 9950, @@ -547,7 +600,8 @@ }, "text": "**Entity:** government agency, foundation, commercial sponsor, academic institution, etc.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 10039, @@ -557,7 +611,8 @@ }, "text": "**Grant:** A grant from an entity, generally [but not always] paid to your organization", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 10126, @@ -567,7 +622,8 @@ }, "text": "**Personal Fees:** Monies paid to you for services rendered, generally honoraria, royalties, or fees for consulting , lectures, speakers bureaus, expert testimony, employment, or other affiliations", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 10323, @@ -577,7 +633,8 @@ }, "text": "**Non-Financial Support:** Examples include drugs/equipment supplied by the entity, travel paid by the entity, writing assistance, administrative support, etc**.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 10486, @@ -587,7 +644,8 @@ }, "text": "**Other:** Anything not covered under the previous three boxes **Pending:** The patent has been filed but not issued **Issued:** The patent has been issued by the agency **Licensed:** The patent has been licensed to an entity, whether earning royalties or not", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 10745, @@ -597,7 +655,8 @@ }, "text": "![](_page_4_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 10772, @@ -607,7 +666,8 @@ }, "text": "## **The Work Under Consideration for Publication Section 2.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 10834, @@ -617,7 +677,8 @@ }, "text": "Did you or your institution **at any time** receive payment or services from a third party (government, commercial, private foundation, etc.) for any aspect of the submitted work (including but not limited to grants, data monitoring board, study design, manuscript preparation, statistical analysis, etc.)?", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 11140, @@ -627,7 +688,8 @@ }, "text": "## **Relevant financial activities outside the submitted work. Section 3.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 11215, @@ -637,7 +699,8 @@ }, "text": "Place a check in the appropriate boxes in the table to indicate whether you have financial relationships (regardless of amount of compensation) with entities as described in the instructions. Use one line for each entity; add as many lines as you need by clicking the \"Add +\" box. You should report relationships that were **present during the 36 months prior to publication**.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 11592, @@ -647,7 +710,8 @@ }, "text": "# **Intellectual Property -- Patents & Copyrights Section 4.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 11654, @@ -657,7 +721,8 @@ }, "text": "![](_page_5_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 11681, @@ -667,7 +732,8 @@ }, "text": "## **Relationships not covered above Section 5.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 11730, @@ -677,7 +743,8 @@ }, "text": "Are there other relationships or activities that readers could perceive to have influenced, or that give the appearance of potentially influencing, what you wrote in the submitted work?", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 11915, @@ -687,7 +754,8 @@ }, "text": "Yes, the following relationships/conditions/circumstances are present (explain below):", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 12001, @@ -697,7 +765,8 @@ }, "text": "\u2714 No other relationships/conditions/circumstances that present a potential conflict of interest", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 12096, @@ -707,7 +776,8 @@ }, "text": "At the time of manuscript acceptance, journals will ask authors to confirm and, if necessary, update their disclosure statements. On occasion, journals may ask authors to disclose further information about reported relationships.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 12325, @@ -717,7 +787,8 @@ }, "text": "## **Disclosure Statement Section 6.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 12363, @@ -727,7 +798,8 @@ }, "text": "Based on the above disclosures, this form will automatically generate a disclosure statement, which will appear in the box below.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 12492, @@ -737,7 +809,8 @@ }, "text": "Dr. Liu has nothing to disclose.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 12524, @@ -747,7 +820,8 @@ }, "text": "### **Evaluation and Feedback**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 12555, @@ -757,7 +831,8 @@ }, "text": "![](_page_6_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 12582, @@ -767,7 +842,8 @@ }, "text": "### **Instructions**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 12602, @@ -777,7 +853,8 @@ }, "text": " ", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 12603, @@ -787,7 +864,8 @@ }, "text": "> **The purpose of this form is to provide readers of your manuscript with information about your other interests that could influence how they receive and understand your work. The form is designed to be completed electronically and stored electronically. It contains programming that allows appropriate data display. Each author should submit a separate form and is responsible for the accuracy and completeness of the submitted information. The form is in six parts.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 13074, @@ -797,7 +875,8 @@ }, "text": "#### **Identifying information. 1.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 13110, @@ -807,7 +886,8 @@ }, "text": "#### **The work under consideration for publication. 2.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 13167, @@ -817,7 +897,8 @@ }, "text": "This section asks for information about the work that you have submitted for publication. The time frame for this reporting is that of the work itself, from the initial conception and planning to the present. The requested information is about resources that you received, either directly or indirectly (via your institution), to enable you to complete the work. Checking \"No\" means that you did the work without receiving any financial support from any third party -- that is, the work was supported by funds from the same institution that pays your salary and that institution did not receive third-party funds with which to pay you. If you or your institution received funds from a third party to support the work, such as a government granting agency, charitable foundation or commercial sponsor, check \"Yes\".", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 13980, @@ -827,7 +908,8 @@ }, "text": "#### **Relevant financial activities outside the submitted work. 3.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 14049, @@ -837,7 +919,8 @@ }, "text": "This section asks about your financial relationships with entities in the bio-medical arena that could be perceived to influence, or that give the appearance of potentially influencing, what you wrote in the submitted work. You should disclose interactions with ANY entity that could be considered broadly relevant to the work. For example, if your article is about testing an epidermal growth factor receptor (EGFR) antagonist in lung cancer, you should report all associations with entities pursuing diagnostic or therapeutic strategies in cancer in general, not just in the area of EGFR or lung cancer.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 14654, @@ -847,7 +930,8 @@ }, "text": "Report all sources of revenue paid (or promised to be paid) directly to you or your institution on your behalf over the 36 months prior to submission of the work. This should include all monies from sources with relevance to the submitted work, not just monies from the entity that sponsored the research. Please note that your interactions with the work's sponsor that are outside the submitted work should also be listed here. If there is any question, it is usually better to disclose a relationship than not to do so.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 15175, @@ -857,7 +941,8 @@ }, "text": " For grants you have received for work outside the submitted work, you should disclose support ONLY from entities that could be perceived to be affected financially by the published work, such as drug companies, or foundations supported by entities that could be perceived to have a financial stake in the outcome. Public funding sources, such as government agencies, charitable foundations or academic institutions, need not be disclosed. For example, if a government agency sponsored a study in which you have been involved and drugs were provided by a pharmaceutical company, you need only list the pharmaceutical company.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 15800, @@ -867,7 +952,8 @@ }, "text": "#### **Intellectual Property. 4.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 15834, @@ -877,7 +963,8 @@ }, "text": "This section asks about patents and copyrights, whether pending, issued, licensed and/or receiving royalties.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 15943, @@ -887,7 +974,8 @@ }, "text": "#### **Relationships not covered above. 5.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 15987, @@ -897,7 +985,8 @@ }, "text": "Use this section to report other relationships or activities that readers could perceive to have influenced, or that give the appearance of potentially influencing, what you wrote in the submitted work.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 16189, @@ -907,7 +996,8 @@ }, "text": "### **Definitions.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 16209, @@ -917,7 +1007,8 @@ }, "text": "**Entity:** government agency, foundation, commercial sponsor, academic institution, etc.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 16298, @@ -927,7 +1018,8 @@ }, "text": "**Grant:** A grant from an entity, generally [but not always] paid to your organization", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 16385, @@ -937,7 +1029,8 @@ }, "text": "**Personal Fees:** Monies paid to you for services rendered, generally honoraria, royalties, or fees for consulting , lectures, speakers bureaus, expert testimony, employment, or other affiliations", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 16582, @@ -947,7 +1040,8 @@ }, "text": "**Non-Financial Support:** Examples include drugs/equipment supplied by the entity, travel paid by the entity, writing assistance, administrative support, etc**.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 16745, @@ -957,7 +1051,8 @@ }, "text": "**Other:** Anything not covered under the previous three boxes **Pending:** The patent has been filed but not issued **Issued:** The patent has been issued by the agency **Licensed:** The patent has been licensed to an entity, whether earning royalties or not", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 17004, @@ -967,7 +1062,8 @@ }, "text": "![](_page_7_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 17031, @@ -977,7 +1073,8 @@ }, "text": "Did you or your institution **at any time** receive payment or services from a third party (government, commercial, private foundation, etc.) for any aspect of the submitted work (including but not limited to grants, data monitoring board, study design, manuscript preparation, statistical analysis, etc.)?", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 17337, @@ -987,7 +1084,8 @@ }, "text": "# **Relevant financial activities outside the submitted work. Section 3.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 17411, @@ -997,7 +1095,8 @@ }, "text": "Place a check in the appropriate boxes in the table to indicate whether you have financial relationships (regardless of amount of compensation) with entities as described in the instructions. Use one line for each entity; add as many lines as you need by clicking the \"Add +\" box. You should report relationships that were **present during the 36 months prior to publication**.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 17788, @@ -1007,7 +1106,8 @@ }, "text": "# **Intellectual Property -- Patents & Copyrights Section 4.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 17850, @@ -1017,7 +1117,8 @@ }, "text": "![](_page_8_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 17877, @@ -1027,7 +1128,8 @@ }, "text": "## **Relationships not covered above Section 5.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 17926, @@ -1037,7 +1139,8 @@ }, "text": "Are there other relationships or activities that readers could perceive to have influenced, or that give the appearance of potentially influencing, what you wrote in the submitted work?", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 18111, @@ -1047,7 +1150,8 @@ }, "text": "Yes, the following relationships/conditions/circumstances are present (explain below):", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 18197, @@ -1057,7 +1161,8 @@ }, "text": "\u2714 No other relationships/conditions/circumstances that present a potential conflict of interest", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 18292, @@ -1067,7 +1172,8 @@ }, "text": "At the time of manuscript acceptance, journals will ask authors to confirm and, if necessary, update their disclosure statements. On occasion, journals may ask authors to disclose further information about reported relationships.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 18521, @@ -1077,7 +1183,8 @@ }, "text": "## **Disclosure Statement Section 6.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 18559, @@ -1087,7 +1194,8 @@ }, "text": "Based on the above disclosures, this form will automatically generate a disclosure statement, which will appear in the box below.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 18688, @@ -1097,7 +1205,8 @@ }, "text": "Dr. Hou has nothing to disclose.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 18720, @@ -1107,7 +1216,8 @@ }, "text": "### **Evaluation and Feedback**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 18751, @@ -1117,7 +1227,8 @@ }, "text": "![](_page_9_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 18778, @@ -1127,7 +1238,8 @@ }, "text": "### **Instructions**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 18798, @@ -1137,7 +1249,8 @@ }, "text": " ", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 18799, @@ -1147,7 +1260,8 @@ }, "text": "> **The purpose of this form is to provide readers of your manuscript with information about your other interests that could influence how they receive and understand your work. The form is designed to be completed electronically and stored electronically. It contains programming that allows appropriate data display. Each author should submit a separate form and is responsible for the accuracy and completeness of the submitted information. The form is in six parts.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 19270, @@ -1157,7 +1271,8 @@ }, "text": "#### **Identifying information. 1.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 19306, @@ -1167,7 +1282,8 @@ }, "text": "#### **The work under consideration for publication. 2.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 19363, @@ -1177,7 +1293,8 @@ }, "text": "This section asks for information about the work that you have submitted for publication. The time frame for this reporting is that of the work itself, from the initial conception and planning to the present. The requested information is about resources that you received, either directly or indirectly (via your institution), to enable you to complete the work. Checking \"No\" means that you did the work without receiving any financial support from any third party -- that is, the work was supported by funds from the same institution that pays your salary and that institution did not receive third-party funds with which to pay you. If you or your institution received funds from a third party to support the work, such as a government granting agency, charitable foundation or commercial sponsor, check \"Yes\".", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 20176, @@ -1187,7 +1304,8 @@ }, "text": "#### **Relevant financial activities outside the submitted work. 3.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 20245, @@ -1197,7 +1315,8 @@ }, "text": "This section asks about your financial relationships with entities in the bio-medical arena that could be perceived to influence, or that give the appearance of potentially influencing, what you wrote in the submitted work. You should disclose interactions with ANY entity that could be considered broadly relevant to the work. For example, if your article is about testing an epidermal growth factor receptor (EGFR) antagonist in lung cancer, you should report all associations with entities pursuing diagnostic or therapeutic strategies in cancer in general, not just in the area of EGFR or lung cancer.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 20850, @@ -1207,7 +1326,8 @@ }, "text": "Report all sources of revenue paid (or promised to be paid) directly to you or your institution on your behalf over the 36 months prior to submission of the work. This should include all monies from sources with relevance to the submitted work, not just monies from the entity that sponsored the research. Please note that your interactions with the work's sponsor that are outside the submitted work should also be listed here. If there is any question, it is usually better to disclose a relationship than not to do so.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 21371, @@ -1217,7 +1337,8 @@ }, "text": " For grants you have received for work outside the submitted work, you should disclose support ONLY from entities that could be perceived to be affected financially by the published work, such as drug companies, or foundations supported by entities that could be perceived to have a financial stake in the outcome. Public funding sources, such as government agencies, charitable foundations or academic institutions, need not be disclosed. For example, if a government agency sponsored a study in which you have been involved and drugs were provided by a pharmaceutical company, you need only list the pharmaceutical company.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 21996, @@ -1227,7 +1348,8 @@ }, "text": "#### **Intellectual Property. 4.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 22030, @@ -1237,7 +1359,8 @@ }, "text": "This section asks about patents and copyrights, whether pending, issued, licensed and/or receiving royalties.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 22139, @@ -1247,7 +1370,8 @@ }, "text": "#### **Relationships not covered above. 5.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 22183, @@ -1257,7 +1381,8 @@ }, "text": "Use this section to report other relationships or activities that readers could perceive to have influenced, or that give the appearance of potentially influencing, what you wrote in the submitted work.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 22385, @@ -1267,7 +1392,8 @@ }, "text": "### **Definitions.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 22405, @@ -1277,7 +1403,8 @@ }, "text": "**Entity:** government agency, foundation, commercial sponsor, academic institution, etc.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 22494, @@ -1287,7 +1414,8 @@ }, "text": "**Grant:** A grant from an entity, generally [but not always] paid to your organization", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 22581, @@ -1297,7 +1425,8 @@ }, "text": "**Personal Fees:** Monies paid to you for services rendered, generally honoraria, royalties, or fees for consulting , lectures, speakers bureaus, expert testimony, employment, or other affiliations", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 22778, @@ -1307,7 +1436,8 @@ }, "text": "**Non-Financial Support:** Examples include drugs/equipment supplied by the entity, travel paid by the entity, writing assistance, administrative support, etc**.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 22941, @@ -1317,7 +1447,8 @@ }, "text": "**Other:** Anything not covered under the previous three boxes **Pending:** The patent has been filed but not issued **Issued:** The patent has been issued by the agency **Licensed:** The patent has been licensed to an entity, whether earning royalties or not", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 23200, @@ -1327,7 +1458,8 @@ }, "text": "![](_page_10_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 23228, @@ -1337,7 +1469,8 @@ }, "text": "Did you or your institution **at any time** receive payment or services from a third party (government, commercial, private foundation, etc.) for any aspect of the submitted work (including but not limited to grants, data monitoring board, study design, manuscript preparation, statistical analysis, etc.)?", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 23534, @@ -1347,7 +1480,8 @@ }, "text": "# **Relevant financial activities outside the submitted work. Section 3.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 23608, @@ -1357,7 +1491,8 @@ }, "text": "Place a check in the appropriate boxes in the table to indicate whether you have financial relationships (regardless of amount of compensation) with entities as described in the instructions. Use one line for each entity; add as many lines as you need by clicking the \"Add +\" box. You should report relationships that were **present during the 36 months prior to publication**.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 23985, @@ -1367,7 +1502,8 @@ }, "text": "# **Intellectual Property -- Patents & Copyrights Section 4.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 24047, @@ -1377,7 +1513,8 @@ }, "text": "Do you have any patents, whether planned, pending or issued, broadly relevant to the work? Yes \u2714 No", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 24146, @@ -1387,7 +1524,8 @@ }, "text": "![](_page_11_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 24174, @@ -1397,7 +1535,8 @@ }, "text": "## **Relationships not covered above Section 5.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 24223, @@ -1407,7 +1546,8 @@ }, "text": "Are there other relationships or activities that readers could perceive to have influenced, or that give the appearance of potentially influencing, what you wrote in the submitted work?", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 24408, @@ -1417,7 +1557,8 @@ }, "text": "Yes, the following relationships/conditions/circumstances are present (explain below):", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 24494, @@ -1427,7 +1568,8 @@ }, "text": "\u2714 No other relationships/conditions/circumstances that present a potential conflict of interest", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 24589, @@ -1437,7 +1579,8 @@ }, "text": "At the time of manuscript acceptance, journals will ask authors to confirm and, if necessary, update their disclosure statements. On occasion, journals may ask authors to disclose further information about reported relationships.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 24818, @@ -1447,7 +1590,8 @@ }, "text": "## **Disclosure Statement Section 6.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 24856, @@ -1457,7 +1601,8 @@ }, "text": "Based on the above disclosures, this form will automatically generate a disclosure statement, which will appear in the box below.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 24985, @@ -1467,7 +1612,8 @@ }, "text": "Dr. Zhang has nothing to disclose.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 25019, @@ -1477,7 +1623,8 @@ }, "text": "### **Evaluation and Feedback**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 25050, @@ -1487,7 +1634,8 @@ }, "text": "![](_page_12_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 25078, @@ -1497,7 +1645,8 @@ }, "text": "### **Instructions**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 25098, @@ -1507,7 +1656,8 @@ }, "text": " ", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 25099, @@ -1517,7 +1667,8 @@ }, "text": "> **The purpose of this form is to provide readers of your manuscript with information about your other interests that could influence how they receive and understand your work. The form is designed to be completed electronically and stored electronically. It contains programming that allows appropriate data display. Each author should submit a separate form and is responsible for the accuracy and completeness of the submitted information. The form is in six parts.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 25570, @@ -1527,7 +1678,8 @@ }, "text": "#### **Identifying information. 1.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 25606, @@ -1537,7 +1689,8 @@ }, "text": "#### **The work under consideration for publication. 2.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 25663, @@ -1547,7 +1700,8 @@ }, "text": "This section asks for information about the work that you have submitted for publication. The time frame for this reporting is that of the work itself, from the initial conception and planning to the present. The requested information is about resources that you received, either directly or indirectly (via your institution), to enable you to complete the work. Checking \"No\" means that you did the work without receiving any financial support from any third party -- that is, the work was supported by funds from the same institution that pays your salary and that institution did not receive third-party funds with which to pay you. If you or your institution received funds from a third party to support the work, such as a government granting agency, charitable foundation or commercial sponsor, check \"Yes\".", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 26476, @@ -1557,7 +1711,8 @@ }, "text": "#### **Relevant financial activities outside the submitted work. 3.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 26545, @@ -1567,7 +1722,8 @@ }, "text": "This section asks about your financial relationships with entities in the bio-medical arena that could be perceived to influence, or that give the appearance of potentially influencing, what you wrote in the submitted work. You should disclose interactions with ANY entity that could be considered broadly relevant to the work. For example, if your article is about testing an epidermal growth factor receptor (EGFR) antagonist in lung cancer, you should report all associations with entities pursuing diagnostic or therapeutic strategies in cancer in general, not just in the area of EGFR or lung cancer.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 27150, @@ -1577,7 +1733,8 @@ }, "text": "Report all sources of revenue paid (or promised to be paid) directly to you or your institution on your behalf over the 36 months prior to submission of the work. This should include all monies from sources with relevance to the submitted work, not just monies from the entity that sponsored the research. Please note that your interactions with the work's sponsor that are outside the submitted work should also be listed here. If there is any question, it is usually better to disclose a relationship than not to do so.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 27671, @@ -1587,7 +1744,8 @@ }, "text": " For grants you have received for work outside the submitted work, you should disclose support ONLY from entities that could be perceived to be affected financially by the published work, such as drug companies, or foundations supported by entities that could be perceived to have a financial stake in the outcome. Public funding sources, such as government agencies, charitable foundations or academic institutions, need not be disclosed. For example, if a government agency sponsored a study in which you have been involved and drugs were provided by a pharmaceutical company, you need only list the pharmaceutical company.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 28296, @@ -1597,7 +1755,8 @@ }, "text": "#### **Intellectual Property. 4.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 28330, @@ -1607,7 +1766,8 @@ }, "text": "This section asks about patents and copyrights, whether pending, issued, licensed and/or receiving royalties.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 28439, @@ -1617,7 +1777,8 @@ }, "text": "#### **Relationships not covered above. 5.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 28483, @@ -1627,7 +1788,8 @@ }, "text": "Use this section to report other relationships or activities that readers could perceive to have influenced, or that give the appearance of potentially influencing, what you wrote in the submitted work.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 28685, @@ -1637,7 +1799,8 @@ }, "text": "### **Definitions.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 28705, @@ -1647,7 +1810,8 @@ }, "text": "**Entity:** government agency, foundation, commercial sponsor, academic institution, etc.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 28794, @@ -1657,7 +1821,8 @@ }, "text": "**Grant:** A grant from an entity, generally [but not always] paid to your organization", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 28881, @@ -1667,7 +1832,8 @@ }, "text": "**Personal Fees:** Monies paid to you for services rendered, generally honoraria, royalties, or fees for consulting , lectures, speakers bureaus, expert testimony, employment, or other affiliations", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 29078, @@ -1677,7 +1843,8 @@ }, "text": "**Non-Financial Support:** Examples include drugs/equipment supplied by the entity, travel paid by the entity, writing assistance, administrative support, etc**.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 29241, @@ -1687,7 +1854,8 @@ }, "text": "**Other:** Anything not covered under the previous three boxes **Pending:** The patent has been filed but not issued **Issued:** The patent has been issued by the agency **Licensed:** The patent has been licensed to an entity, whether earning royalties or not", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 29500, @@ -1697,7 +1865,8 @@ }, "text": "![](_page_13_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 29528, @@ -1707,7 +1876,8 @@ }, "text": "Did you or your institution **at any time** receive payment or services from a third party (government, commercial, private foundation, etc.) for any aspect of the submitted work (including but not limited to grants, data monitoring board, study design, manuscript preparation, statistical analysis, etc.)?", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 29834, @@ -1717,7 +1887,8 @@ }, "text": "# **Relevant financial activities outside the submitted work. Section 3.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 29908, @@ -1727,7 +1898,8 @@ }, "text": "Place a check in the appropriate boxes in the table to indicate whether you have financial relationships (regardless of amount of compensation) with entities as described in the instructions. Use one line for each entity; add as many lines as you need by clicking the \"Add +\" box. You should report relationships that were **present during the 36 months prior to publication**.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 30285, @@ -1737,7 +1909,8 @@ }, "text": "# **Intellectual Property -- Patents & Copyrights Section 4.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 30347, @@ -1747,7 +1920,8 @@ }, "text": "Do you have any patents, whether planned, pending or issued, broadly relevant to the work? Yes \u2714 No", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 30446, @@ -1757,7 +1931,8 @@ }, "text": "![](_page_14_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 30474, @@ -1767,7 +1942,8 @@ }, "text": "## **Relationships not covered above Section 5.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 30523, @@ -1777,7 +1953,8 @@ }, "text": "Are there other relationships or activities that readers could perceive to have influenced, or that give the appearance of potentially influencing, what you wrote in the submitted work?", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 30708, @@ -1787,7 +1964,8 @@ }, "text": "Yes, the following relationships/conditions/circumstances are present (explain below):", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 30794, @@ -1797,7 +1975,8 @@ }, "text": "\u2714 No other relationships/conditions/circumstances that present a potential conflict of interest", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 30889, @@ -1807,7 +1986,8 @@ }, "text": "At the time of manuscript acceptance, journals will ask authors to confirm and, if necessary, update their disclosure statements. On occasion, journals may ask authors to disclose further information about reported relationships.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 31118, @@ -1817,7 +1997,8 @@ }, "text": "## **Disclosure Statement Section 6.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 31156, @@ -1827,7 +2008,8 @@ }, "text": "Based on the above disclosures, this form will automatically generate a disclosure statement, which will appear in the box below.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 31285, @@ -1837,7 +2019,8 @@ }, "text": "Dr. Jiang has nothing to disclose.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 31319, @@ -1847,7 +2030,8 @@ }, "text": "### **Evaluation and Feedback**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 31350, @@ -1857,7 +2041,8 @@ }, "text": "![](_page_15_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 31378, @@ -1867,7 +2052,8 @@ }, "text": "### **Instructions**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 31398, @@ -1877,7 +2063,8 @@ }, "text": " ", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 31399, @@ -1887,7 +2074,8 @@ }, "text": "> **The purpose of this form is to provide readers of your manuscript with information about your other interests that could influence how they receive and understand your work. The form is designed to be completed electronically and stored electronically. It contains programming that allows appropriate data display. Each author should submit a separate form and is responsible for the accuracy and completeness of the submitted information. The form is in six parts.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 31870, @@ -1897,7 +2085,8 @@ }, "text": "#### **Identifying information. 1.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 31906, @@ -1907,7 +2096,8 @@ }, "text": "#### **The work under consideration for publication. 2.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 31963, @@ -1917,7 +2107,8 @@ }, "text": "This section asks for information about the work that you have submitted for publication. The time frame for this reporting is that of the work itself, from the initial conception and planning to the present. The requested information is about resources that you received, either directly or indirectly (via your institution), to enable you to complete the work. Checking \"No\" means that you did the work without receiving any financial support from any third party -- that is, the work was supported by funds from the same institution that pays your salary and that institution did not receive third-party funds with which to pay you. If you or your institution received funds from a third party to support the work, such as a government granting agency, charitable foundation or commercial sponsor, check \"Yes\".", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 32776, @@ -1927,7 +2118,8 @@ }, "text": "#### **Relevant financial activities outside the submitted work. 3.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 32845, @@ -1937,7 +2129,8 @@ }, "text": "This section asks about your financial relationships with entities in the bio-medical arena that could be perceived to influence, or that give the appearance of potentially influencing, what you wrote in the submitted work. You should disclose interactions with ANY entity that could be considered broadly relevant to the work. For example, if your article is about testing an epidermal growth factor receptor (EGFR) antagonist in lung cancer, you should report all associations with entities pursuing diagnostic or therapeutic strategies in cancer in general, not just in the area of EGFR or lung cancer.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 33450, @@ -1947,7 +2140,8 @@ }, "text": "Report all sources of revenue paid (or promised to be paid) directly to you or your institution on your behalf over the 36 months prior to submission of the work. This should include all monies from sources with relevance to the submitted work, not just monies from the entity that sponsored the research. Please note that your interactions with the work's sponsor that are outside the submitted work should also be listed here. If there is any question, it is usually better to disclose a relationship than not to do so.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 33971, @@ -1957,7 +2151,8 @@ }, "text": " For grants you have received for work outside the submitted work, you should disclose support ONLY from entities that could be perceived to be affected financially by the published work, such as drug companies, or foundations supported by entities that could be perceived to have a financial stake in the outcome. Public funding sources, such as government agencies, charitable foundations or academic institutions, need not be disclosed. For example, if a government agency sponsored a study in which you have been involved and drugs were provided by a pharmaceutical company, you need only list the pharmaceutical company.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 34596, @@ -1967,7 +2162,8 @@ }, "text": "#### **Intellectual Property. 4.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 34630, @@ -1977,7 +2173,8 @@ }, "text": "This section asks about patents and copyrights, whether pending, issued, licensed and/or receiving royalties.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 34739, @@ -1987,7 +2184,8 @@ }, "text": "#### **Relationships not covered above. 5.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 34783, @@ -1997,7 +2195,8 @@ }, "text": "Use this section to report other relationships or activities that readers could perceive to have influenced, or that give the appearance of potentially influencing, what you wrote in the submitted work.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 34985, @@ -2007,7 +2206,8 @@ }, "text": "### **Definitions.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 35005, @@ -2017,7 +2217,8 @@ }, "text": "**Entity:** government agency, foundation, commercial sponsor, academic institution, etc.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 35094, @@ -2027,7 +2228,8 @@ }, "text": "**Grant:** A grant from an entity, generally [but not always] paid to your organization", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 35181, @@ -2037,7 +2239,8 @@ }, "text": "**Personal Fees:** Monies paid to you for services rendered, generally honoraria, royalties, or fees for consulting , lectures, speakers bureaus, expert testimony, employment, or other affiliations", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 35378, @@ -2047,7 +2250,8 @@ }, "text": "**Non-Financial Support:** Examples include drugs/equipment supplied by the entity, travel paid by the entity, writing assistance, administrative support, etc**.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 35541, @@ -2057,7 +2261,8 @@ }, "text": "**Other:** Anything not covered under the previous three boxes **Pending:** The patent has been filed but not issued **Issued:** The patent has been issued by the agency **Licensed:** The patent has been licensed to an entity, whether earning royalties or not", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 35800, @@ -2067,7 +2272,8 @@ }, "text": "![](_page_16_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 35828, @@ -2077,7 +2283,8 @@ }, "text": "Did you or your institution **at any time** receive payment or services from a third party (government, commercial, private foundation, etc.) for any aspect of the submitted work (including but not limited to grants, data monitoring board, study design, manuscript preparation, statistical analysis, etc.)?", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 36134, @@ -2087,7 +2294,8 @@ }, "text": "# **Relevant financial activities outside the submitted work. Section 3.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 36208, @@ -2097,7 +2305,8 @@ }, "text": "Place a check in the appropriate boxes in the table to indicate whether you have financial relationships (regardless of amount of compensation) with entities as described in the instructions. Use one line for each entity; add as many lines as you need by clicking the \"Add +\" box. You should report relationships that were **present during the 36 months prior to publication**.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 36585, @@ -2107,7 +2316,8 @@ }, "text": "# **Intellectual Property -- Patents & Copyrights Section 4.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 36647, @@ -2117,7 +2327,8 @@ }, "text": "![](_page_17_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 36675, @@ -2127,7 +2338,8 @@ }, "text": "## **Relationships not covered above Section 5.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 36724, @@ -2137,7 +2349,8 @@ }, "text": "Are there other relationships or activities that readers could perceive to have influenced, or that give the appearance of potentially influencing, what you wrote in the submitted work?", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 36909, @@ -2147,7 +2360,8 @@ }, "text": "Yes, the following relationships/conditions/circumstances are present (explain below):", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 36995, @@ -2157,7 +2371,8 @@ }, "text": "\u2714 No other relationships/conditions/circumstances that present a potential conflict of interest", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 37090, @@ -2167,7 +2382,8 @@ }, "text": "At the time of manuscript acceptance, journals will ask authors to confirm and, if necessary, update their disclosure statements. On occasion, journals may ask authors to disclose further information about reported relationships.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 37319, @@ -2177,7 +2393,8 @@ }, "text": "## **Disclosure Statement Section 6.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 37357, @@ -2187,7 +2404,8 @@ }, "text": "Based on the above disclosures, this form will automatically generate a disclosure statement, which will appear in the box below.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 37486, @@ -2197,7 +2415,8 @@ }, "text": "Dr. Sun has nothing to disclose.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 37518, @@ -2207,7 +2426,8 @@ }, "text": "### **Evaluation and Feedback**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 37549, @@ -2217,7 +2437,8 @@ }, "text": "![](_page_18_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 37577, @@ -2227,7 +2448,8 @@ }, "text": "### **Instructions**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 37597, @@ -2237,7 +2459,8 @@ }, "text": " ", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 37598, @@ -2247,7 +2470,8 @@ }, "text": "> **The purpose of this form is to provide readers of your manuscript with information about your other interests that could influence how they receive and understand your work. The form is designed to be completed electronically and stored electronically. It contains programming that allows appropriate data display. Each author should submit a separate form and is responsible for the accuracy and completeness of the submitted information. The form is in six parts.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 38069, @@ -2257,7 +2481,8 @@ }, "text": "#### **Identifying information. 1.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 38105, @@ -2267,7 +2492,8 @@ }, "text": "#### **The work under consideration for publication. 2.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 38162, @@ -2277,7 +2503,8 @@ }, "text": "This section asks for information about the work that you have submitted for publication. The time frame for this reporting is that of the work itself, from the initial conception and planning to the present. The requested information is about resources that you received, either directly or indirectly (via your institution), to enable you to complete the work. Checking \"No\" means that you did the work without receiving any financial support from any third party -- that is, the work was supported by funds from the same institution that pays your salary and that institution did not receive third-party funds with which to pay you. If you or your institution received funds from a third party to support the work, such as a government granting agency, charitable foundation or commercial sponsor, check \"Yes\".", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 38975, @@ -2287,7 +2514,8 @@ }, "text": "#### **Relevant financial activities outside the submitted work. 3.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 39044, @@ -2297,7 +2525,8 @@ }, "text": "This section asks about your financial relationships with entities in the bio-medical arena that could be perceived to influence, or that give the appearance of potentially influencing, what you wrote in the submitted work. You should disclose interactions with ANY entity that could be considered broadly relevant to the work. For example, if your article is about testing an epidermal growth factor receptor (EGFR) antagonist in lung cancer, you should report all associations with entities pursuing diagnostic or therapeutic strategies in cancer in general, not just in the area of EGFR or lung cancer.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 39649, @@ -2307,7 +2536,8 @@ }, "text": "Report all sources of revenue paid (or promised to be paid) directly to you or your institution on your behalf over the 36 months prior to submission of the work. This should include all monies from sources with relevance to the submitted work, not just monies from the entity that sponsored the research. Please note that your interactions with the work's sponsor that are outside the submitted work should also be listed here. If there is any question, it is usually better to disclose a relationship than not to do so.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 40170, @@ -2317,7 +2547,8 @@ }, "text": " For grants you have received for work outside the submitted work, you should disclose support ONLY from entities that could be perceived to be affected financially by the published work, such as drug companies, or foundations supported by entities that could be perceived to have a financial stake in the outcome. Public funding sources, such as government agencies, charitable foundations or academic institutions, need not be disclosed. For example, if a government agency sponsored a study in which you have been involved and drugs were provided by a pharmaceutical company, you need only list the pharmaceutical company.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 40795, @@ -2327,7 +2558,8 @@ }, "text": "#### **Intellectual Property. 4.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 40829, @@ -2337,7 +2569,8 @@ }, "text": "This section asks about patents and copyrights, whether pending, issued, licensed and/or receiving royalties.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 40938, @@ -2347,7 +2580,8 @@ }, "text": "#### **Relationships not covered above. 5.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 40982, @@ -2357,7 +2591,8 @@ }, "text": "Use this section to report other relationships or activities that readers could perceive to have influenced, or that give the appearance of potentially influencing, what you wrote in the submitted work.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 41184, @@ -2367,7 +2602,8 @@ }, "text": "### **Definitions.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 41204, @@ -2377,7 +2613,8 @@ }, "text": "**Entity:** government agency, foundation, commercial sponsor, academic institution, etc.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 41293, @@ -2387,7 +2624,8 @@ }, "text": "**Grant:** A grant from an entity, generally [but not always] paid to your organization", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 41380, @@ -2397,7 +2635,8 @@ }, "text": "**Personal Fees:** Monies paid to you for services rendered, generally honoraria, royalties, or fees for consulting , lectures, speakers bureaus, expert testimony, employment, or other affiliations", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 41577, @@ -2407,7 +2646,8 @@ }, "text": "**Non-Financial Support:** Examples include drugs/equipment supplied by the entity, travel paid by the entity, writing assistance, administrative support, etc**.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 41740, @@ -2417,7 +2657,8 @@ }, "text": "**Other:** Anything not covered under the previous three boxes **Pending:** The patent has been filed but not issued **Issued:** The patent has been issued by the agency **Licensed:** The patent has been licensed to an entity, whether earning royalties or not", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 41999, @@ -2427,7 +2668,8 @@ }, "text": "![](_page_19_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 42027, @@ -2437,7 +2679,8 @@ }, "text": "Did you or your institution **at any time** receive payment or services from a third party (government, commercial, private foundation, etc.) for any aspect of the submitted work (including but not limited to grants, data monitoring board, study design, manuscript preparation,", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 42304, @@ -2447,7 +2690,8 @@ }, "text": "statistical analysis, etc.)? Are there any relevant conflicts of interest? Yes \u2714 No", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 42387, @@ -2457,7 +2701,8 @@ }, "text": "# **Relevant financial activities outside the submitted work. Section 3.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 42461, @@ -2467,7 +2712,8 @@ }, "text": "Place a check in the appropriate boxes in the table to indicate whether you have financial relationships (regardless of amount of compensation) with entities as described in the instructions. Use one line for each entity; add as many lines as you need by clicking the \"Add +\" box. You should report relationships that were **present during the 36 months prior to publication**.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 42838, @@ -2477,7 +2723,8 @@ }, "text": "Are there any relevant conflicts of interest? Yes \u2714 No", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 42892, @@ -2487,7 +2734,8 @@ }, "text": "# **Intellectual Property -- Patents & Copyrights Section 4.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 42954, @@ -2497,7 +2745,8 @@ }, "text": "![](_page_20_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 42982, @@ -2507,7 +2756,8 @@ }, "text": "## **Relationships not covered above Section 5.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 43031, @@ -2517,7 +2767,8 @@ }, "text": "Are there other relationships or activities that readers could perceive to have influenced, or that give the appearance of potentially influencing, what you wrote in the submitted work?", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 43216, @@ -2527,7 +2778,8 @@ }, "text": "Yes, the following relationships/conditions/circumstances are present (explain below):", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 43302, @@ -2537,7 +2789,8 @@ }, "text": "\u2714 No other relationships/conditions/circumstances that present a potential conflict of interest", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 43397, @@ -2547,7 +2800,8 @@ }, "text": "At the time of manuscript acceptance, journals will ask authors to confirm and, if necessary, update their disclosure statements. On occasion, journals may ask authors to disclose further information about reported relationships.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 43626, @@ -2557,7 +2811,8 @@ }, "text": "## **Disclosure Statement Section 6.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 43664, @@ -2567,7 +2822,8 @@ }, "text": "Based on the above disclosures, this form will automatically generate a disclosure statement, which will appear in the box below.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 43793, @@ -2577,7 +2833,8 @@ }, "text": "Dr. Xie has nothing to disclose.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 43825, @@ -2587,7 +2844,8 @@ }, "text": "### **Evaluation and Feedback**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 43856, @@ -2597,7 +2855,8 @@ }, "text": "![](_page_21_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 43884, @@ -2607,7 +2866,8 @@ }, "text": "### **Instructions**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 43904, @@ -2617,7 +2877,8 @@ }, "text": " ", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 43905, @@ -2627,7 +2888,8 @@ }, "text": "> **The purpose of this form is to provide readers of your manuscript with information about your other interests that could influence how they receive and understand your work. The form is designed to be completed electronically and stored electronically. It contains programming that allows appropriate data display. Each author should submit a separate form and is responsible for the accuracy and completeness of the submitted information. The form is in six parts.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 44376, @@ -2637,7 +2899,8 @@ }, "text": "#### **Identifying information. 1.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 44412, @@ -2647,7 +2910,8 @@ }, "text": "#### **The work under consideration for publication. 2.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 44469, @@ -2657,7 +2921,8 @@ }, "text": "This section asks for information about the work that you have submitted for publication. The time frame for this reporting is that of the work itself, from the initial conception and planning to the present. The requested information is about resources that you received, either directly or indirectly (via your institution), to enable you to complete the work. Checking \"No\" means that you did the work without receiving any financial support from any third party -- that is, the work was supported by funds from the same institution that pays your salary and that institution did not receive third-party funds with which to pay you. If you or your institution received funds from a third party to support the work, such as a government granting agency, charitable foundation or commercial sponsor, check \"Yes\".", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 45282, @@ -2667,7 +2932,8 @@ }, "text": "#### **Relevant financial activities outside the submitted work. 3.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 45351, @@ -2677,7 +2943,8 @@ }, "text": "This section asks about your financial relationships with entities in the bio-medical arena that could be perceived to influence, or that give the appearance of potentially influencing, what you wrote in the submitted work. You should disclose interactions with ANY entity that could be considered broadly relevant to the work. For example, if your article is about testing an epidermal growth factor receptor (EGFR) antagonist in lung cancer, you should report all associations with entities pursuing diagnostic or therapeutic strategies in cancer in general, not just in the area of EGFR or lung cancer.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 45956, @@ -2687,7 +2954,8 @@ }, "text": "Report all sources of revenue paid (or promised to be paid) directly to you or your institution on your behalf over the 36 months prior to submission of the work. This should include all monies from sources with relevance to the submitted work, not just monies from the entity that sponsored the research. Please note that your interactions with the work's sponsor that are outside the submitted work should also be listed here. If there is any question, it is usually better to disclose a relationship than not to do so.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 46477, @@ -2697,7 +2965,8 @@ }, "text": " For grants you have received for work outside the submitted work, you should disclose support ONLY from entities that could be perceived to be affected financially by the published work, such as drug companies, or foundations supported by entities that could be perceived to have a financial stake in the outcome. Public funding sources, such as government agencies, charitable foundations or academic institutions, need not be disclosed. For example, if a government agency sponsored a study in which you have been involved and drugs were provided by a pharmaceutical company, you need only list the pharmaceutical company.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 47102, @@ -2707,7 +2976,8 @@ }, "text": "#### **Intellectual Property. 4.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 47136, @@ -2717,7 +2987,8 @@ }, "text": "This section asks about patents and copyrights, whether pending, issued, licensed and/or receiving royalties.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 47245, @@ -2727,7 +2998,8 @@ }, "text": "#### **Relationships not covered above. 5.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 47289, @@ -2737,7 +3009,8 @@ }, "text": "Use this section to report other relationships or activities that readers could perceive to have influenced, or that give the appearance of potentially influencing, what you wrote in the submitted work.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 47491, @@ -2747,7 +3020,8 @@ }, "text": "### **Definitions.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 47511, @@ -2757,7 +3031,8 @@ }, "text": "**Entity:** government agency, foundation, commercial sponsor, academic institution, etc.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 47600, @@ -2767,7 +3042,8 @@ }, "text": "**Grant:** A grant from an entity, generally [but not always] paid to your organization", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 47687, @@ -2777,7 +3053,8 @@ }, "text": "**Personal Fees:** Monies paid to you for services rendered, generally honoraria, royalties, or fees for consulting , lectures, speakers bureaus, expert testimony, employment, or other affiliations", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 47884, @@ -2787,7 +3064,8 @@ }, "text": "**Non-Financial Support:** Examples include drugs/equipment supplied by the entity, travel paid by the entity, writing assistance, administrative support, etc**.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 48047, @@ -2797,7 +3075,8 @@ }, "text": "**Other:** Anything not covered under the previous three boxes **Pending:** The patent has been filed but not issued **Issued:** The patent has been issued by the agency **Licensed:** The patent has been licensed to an entity, whether earning royalties or not", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 48306, @@ -2807,7 +3086,8 @@ }, "text": "![](_page_22_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 48334, @@ -2817,7 +3097,8 @@ }, "text": "Did you or your institution **at any time** receive payment or services from a third party (government, commercial, private foundation, etc.) for any aspect of the submitted work (including but not limited to grants, data monitoring board, study design, manuscript preparation,", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 48611, @@ -2827,7 +3108,8 @@ }, "text": "statistical analysis, etc.)? Are there any relevant conflicts of interest? Yes \u2714 No", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 48694, @@ -2837,7 +3119,8 @@ }, "text": "# **Relevant financial activities outside the submitted work. Section 3.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 48768, @@ -2847,7 +3130,8 @@ }, "text": "Place a check in the appropriate boxes in the table to indicate whether you have financial relationships (regardless of amount of compensation) with entities as described in the instructions. Use one line for each entity; add as many lines as you need by clicking the \"Add +\" box. You should report relationships that were **present during the 36 months prior to publication**.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 49145, @@ -2857,7 +3141,8 @@ }, "text": "Are there any relevant conflicts of interest? Yes \u2714 No", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 49199, @@ -2867,7 +3152,8 @@ }, "text": "# **Intellectual Property -- Patents & Copyrights Section 4.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 49261, @@ -2877,7 +3163,8 @@ }, "text": "Do you have any patents, whether planned, pending or issued, broadly relevant to the work? Yes \u2714 No", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 49360, @@ -2887,7 +3174,8 @@ }, "text": "![](_page_23_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 49388, @@ -2897,7 +3185,8 @@ }, "text": "## **Relationships not covered above Section 5.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 49437, @@ -2907,7 +3196,8 @@ }, "text": "Are there other relationships or activities that readers could perceive to have influenced, or that give the appearance of potentially influencing, what you wrote in the submitted work?", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 49622, @@ -2917,7 +3207,8 @@ }, "text": "Yes, the following relationships/conditions/circumstances are present (explain below):", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 49708, @@ -2927,7 +3218,8 @@ }, "text": "\u2714 No other relationships/conditions/circumstances that present a potential conflict of interest", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 49803, @@ -2937,7 +3229,8 @@ }, "text": "At the time of manuscript acceptance, journals will ask authors to confirm and, if necessary, update their disclosure statements. On occasion, journals may ask authors to disclose further information about reported relationships.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 50032, @@ -2947,7 +3240,8 @@ }, "text": "## **Disclosure Statement Section 6.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 50070, @@ -2957,7 +3251,8 @@ }, "text": "Based on the above disclosures, this form will automatically generate a disclosure statement, which will appear in the box below.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 50199, @@ -2967,7 +3262,8 @@ }, "text": "Dr. Xiao has nothing to disclose.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 50232, @@ -2977,7 +3273,8 @@ }, "text": "### **Evaluation and Feedback**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 50263, @@ -2987,7 +3284,8 @@ }, "text": "![](_page_24_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 50291, @@ -2997,7 +3295,8 @@ }, "text": "### **Instructions**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 50311, @@ -3007,7 +3306,8 @@ }, "text": " ", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 50312, @@ -3017,7 +3317,8 @@ }, "text": "> **The purpose of this form is to provide readers of your manuscript with information about your other interests that could influence how they receive and understand your work. The form is designed to be completed electronically and stored electronically. It contains programming that allows appropriate data display. Each author should submit a separate form and is responsible for the accuracy and completeness of the submitted information. The form is in six parts.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 50783, @@ -3027,7 +3328,8 @@ }, "text": "#### **Identifying information. 1.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 50819, @@ -3037,7 +3339,8 @@ }, "text": "#### **The work under consideration for publication. 2.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 50876, @@ -3047,7 +3350,8 @@ }, "text": "This section asks for information about the work that you have submitted for publication. The time frame for this reporting is that of the work itself, from the initial conception and planning to the present. The requested information is about resources that you received, either directly or indirectly (via your institution), to enable you to complete the work. Checking \"No\" means that you did the work without receiving any financial support from any third party -- that is, the work was supported by funds from the same institution that pays your salary and that institution did not receive third-party funds with which to pay you. If you or your institution received funds from a third party to support the work, such as a government granting agency, charitable foundation or commercial sponsor, check \"Yes\".", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 51689, @@ -3057,7 +3361,8 @@ }, "text": "#### **Relevant financial activities outside the submitted work. 3.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 51758, @@ -3067,7 +3372,8 @@ }, "text": "This section asks about your financial relationships with entities in the bio-medical arena that could be perceived to influence, or that give the appearance of potentially influencing, what you wrote in the submitted work. You should disclose interactions with ANY entity that could be considered broadly relevant to the work. For example, if your article is about testing an epidermal growth factor receptor (EGFR) antagonist in lung cancer, you should report all associations with entities pursuing diagnostic or therapeutic strategies in cancer in general, not just in the area of EGFR or lung cancer.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 52363, @@ -3077,7 +3383,8 @@ }, "text": "Report all sources of revenue paid (or promised to be paid) directly to you or your institution on your behalf over the 36 months prior to submission of the work. This should include all monies from sources with relevance to the submitted work, not just monies from the entity that sponsored the research. Please note that your interactions with the work's sponsor that are outside the submitted work should also be listed here. If there is any question, it is usually better to disclose a relationship than not to do so.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 52884, @@ -3087,7 +3394,8 @@ }, "text": " For grants you have received for work outside the submitted work, you should disclose support ONLY from entities that could be perceived to be affected financially by the published work, such as drug companies, or foundations supported by entities that could be perceived to have a financial stake in the outcome. Public funding sources, such as government agencies, charitable foundations or academic institutions, need not be disclosed. For example, if a government agency sponsored a study in which you have been involved and drugs were provided by a pharmaceutical company, you need only list the pharmaceutical company.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 53509, @@ -3097,7 +3405,8 @@ }, "text": "#### **Intellectual Property. 4.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 53543, @@ -3107,7 +3416,8 @@ }, "text": "This section asks about patents and copyrights, whether pending, issued, licensed and/or receiving royalties.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 53652, @@ -3117,7 +3427,8 @@ }, "text": "#### **Relationships not covered above. 5.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 53696, @@ -3127,7 +3438,8 @@ }, "text": "Use this section to report other relationships or activities that readers could perceive to have influenced, or that give the appearance of potentially influencing, what you wrote in the submitted work.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 53898, @@ -3137,7 +3449,8 @@ }, "text": "### **Definitions.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 53918, @@ -3147,7 +3460,8 @@ }, "text": "**Entity:** government agency, foundation, commercial sponsor, academic institution, etc.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 54007, @@ -3157,7 +3471,8 @@ }, "text": "**Grant:** A grant from an entity, generally [but not always] paid to your organization", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 54094, @@ -3167,7 +3482,8 @@ }, "text": "**Personal Fees:** Monies paid to you for services rendered, generally honoraria, royalties, or fees for consulting , lectures, speakers bureaus, expert testimony, employment, or other affiliations", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 54291, @@ -3177,7 +3493,8 @@ }, "text": "**Non-Financial Support:** Examples include drugs/equipment supplied by the entity, travel paid by the entity, writing assistance, administrative support, etc**.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 54454, @@ -3187,7 +3504,8 @@ }, "text": "**Other:** Anything not covered under the previous three boxes **Pending:** The patent has been filed but not issued **Issued:** The patent has been issued by the agency **Licensed:** The patent has been licensed to an entity, whether earning royalties or not", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 54713, @@ -3197,7 +3515,8 @@ }, "text": "![](_page_25_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 54741, @@ -3207,7 +3526,8 @@ }, "text": "Did you or your institution **at any time** receive payment or services from a third party (government, commercial, private foundation, etc.) for any aspect of the submitted work (including but not limited to grants, data monitoring board, study design, manuscript preparation, statistical analysis, etc.)?", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 55047, @@ -3217,7 +3537,8 @@ }, "text": "# **Relevant financial activities outside the submitted work. Section 3.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 55121, @@ -3227,7 +3548,8 @@ }, "text": "Place a check in the appropriate boxes in the table to indicate whether you have financial relationships (regardless of amount of compensation) with entities as described in the instructions. Use one line for each entity; add as many lines as you need by clicking the \"Add +\" box. You should report relationships that were **present during the 36 months prior to publication**.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 55498, @@ -3237,7 +3559,8 @@ }, "text": "# **Intellectual Property -- Patents & Copyrights Section 4.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 55560, @@ -3247,7 +3570,8 @@ }, "text": "Do you have any patents, whether planned, pending or issued, broadly relevant to the work? Yes \u2714 No", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 55659, @@ -3257,7 +3581,8 @@ }, "text": "![](_page_26_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 55687, @@ -3267,7 +3592,8 @@ }, "text": "## **Relationships not covered above Section 5.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 55736, @@ -3277,7 +3603,8 @@ }, "text": "Are there other relationships or activities that readers could perceive to have influenced, or that give the appearance of potentially influencing, what you wrote in the submitted work?", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 55921, @@ -3287,7 +3614,8 @@ }, "text": "Yes, the following relationships/conditions/circumstances are present (explain below):", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 56007, @@ -3297,7 +3625,8 @@ }, "text": "\u2714 No other relationships/conditions/circumstances that present a potential conflict of interest", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 56102, @@ -3307,7 +3636,8 @@ }, "text": "At the time of manuscript acceptance, journals will ask authors to confirm and, if necessary, update their disclosure statements. On occasion, journals may ask authors to disclose further information about reported relationships.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 56331, @@ -3317,7 +3647,8 @@ }, "text": "## **Disclosure Statement Section 6.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 56369, @@ -3327,7 +3658,8 @@ }, "text": "Based on the above disclosures, this form will automatically generate a disclosure statement, which will appear in the box below.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 56498, @@ -3337,7 +3669,8 @@ }, "text": "Dr. Zhang has nothing to disclose.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 56532, @@ -3347,7 +3680,8 @@ }, "text": "### **Evaluation and Feedback**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 56563, @@ -3357,7 +3691,8 @@ }, "text": "![](_page_27_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 56591, @@ -3367,7 +3702,8 @@ }, "text": "### **Instructions**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 56611, @@ -3377,7 +3713,8 @@ }, "text": " ", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 56612, @@ -3387,7 +3724,8 @@ }, "text": "> **The purpose of this form is to provide readers of your manuscript with information about your other interests that could influence how they receive and understand your work. The form is designed to be completed electronically and stored electronically. It contains programming that allows appropriate data display. Each author should submit a separate form and is responsible for the accuracy and completeness of the submitted information. The form is in six parts.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 57083, @@ -3397,7 +3735,8 @@ }, "text": "#### **Identifying information. 1.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 57119, @@ -3407,7 +3746,8 @@ }, "text": "#### **The work under consideration for publication. 2.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 57176, @@ -3417,7 +3757,8 @@ }, "text": "This section asks for information about the work that you have submitted for publication. The time frame for this reporting is that of the work itself, from the initial conception and planning to the present. The requested information is about resources that you received, either directly or indirectly (via your institution), to enable you to complete the work. Checking \"No\" means that you did the work without receiving any financial support from any third party -- that is, the work was supported by funds from the same institution that pays your salary and that institution did not receive third-party funds with which to pay you. If you or your institution received funds from a third party to support the work, such as a government granting agency, charitable foundation or commercial sponsor, check \"Yes\".", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 57989, @@ -3427,7 +3768,8 @@ }, "text": "#### **Relevant financial activities outside the submitted work. 3.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 58058, @@ -3437,7 +3779,8 @@ }, "text": "This section asks about your financial relationships with entities in the bio-medical arena that could be perceived to influence, or that give the appearance of potentially influencing, what you wrote in the submitted work. You should disclose interactions with ANY entity that could be considered broadly relevant to the work. For example, if your article is about testing an epidermal growth factor receptor (EGFR) antagonist in lung cancer, you should report all associations with entities pursuing diagnostic or therapeutic strategies in cancer in general, not just in the area of EGFR or lung cancer.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 58663, @@ -3447,7 +3790,8 @@ }, "text": "Report all sources of revenue paid (or promised to be paid) directly to you or your institution on your behalf over the 36 months prior to submission of the work. This should include all monies from sources with relevance to the submitted work, not just monies from the entity that sponsored the research. Please note that your interactions with the work's sponsor that are outside the submitted work should also be listed here. If there is any question, it is usually better to disclose a relationship than not to do so.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 59184, @@ -3457,7 +3801,8 @@ }, "text": " For grants you have received for work outside the submitted work, you should disclose support ONLY from entities that could be perceived to be affected financially by the published work, such as drug companies, or foundations supported by entities that could be perceived to have a financial stake in the outcome. Public funding sources, such as government agencies, charitable foundations or academic institutions, need not be disclosed. For example, if a government agency sponsored a study in which you have been involved and drugs were provided by a pharmaceutical company, you need only list the pharmaceutical company.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 59809, @@ -3467,7 +3812,8 @@ }, "text": "#### **Intellectual Property. 4.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 59843, @@ -3477,7 +3823,8 @@ }, "text": "This section asks about patents and copyrights, whether pending, issued, licensed and/or receiving royalties.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 59952, @@ -3487,7 +3834,8 @@ }, "text": "#### **Relationships not covered above. 5.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 59996, @@ -3497,7 +3845,8 @@ }, "text": "Use this section to report other relationships or activities that readers could perceive to have influenced, or that give the appearance of potentially influencing, what you wrote in the submitted work.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 60198, @@ -3507,7 +3856,8 @@ }, "text": "### **Definitions.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 60218, @@ -3517,7 +3867,8 @@ }, "text": "**Entity:** government agency, foundation, commercial sponsor, academic institution, etc.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 60307, @@ -3527,7 +3878,8 @@ }, "text": "**Grant:** A grant from an entity, generally [but not always] paid to your organization", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 60394, @@ -3537,7 +3889,8 @@ }, "text": "**Personal Fees:** Monies paid to you for services rendered, generally honoraria, royalties, or fees for consulting , lectures, speakers bureaus, expert testimony, employment, or other affiliations", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 60591, @@ -3547,7 +3900,8 @@ }, "text": "**Non-Financial Support:** Examples include drugs/equipment supplied by the entity, travel paid by the entity, writing assistance, administrative support, etc**.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 60754, @@ -3557,7 +3911,8 @@ }, "text": "**Other:** Anything not covered under the previous three boxes **Pending:** The patent has been filed but not issued **Issued:** The patent has been issued by the agency **Licensed:** The patent has been licensed to an entity, whether earning royalties or not", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 61013, @@ -3567,7 +3922,8 @@ }, "text": "![](_page_28_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 61041, @@ -3577,7 +3933,8 @@ }, "text": "Did you or your institution **at any time** receive payment or services from a third party (government, commercial, private foundation, etc.) for any aspect of the submitted work (including but not limited to grants, data monitoring board, study design, manuscript preparation, statistical analysis, etc.)?", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 61347, @@ -3587,7 +3944,8 @@ }, "text": "# **Relevant financial activities outside the submitted work. Section 3.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 61421, @@ -3597,7 +3955,8 @@ }, "text": "Place a check in the appropriate boxes in the table to indicate whether you have financial relationships (regardless of amount of compensation) with entities as described in the instructions. Use one line for each entity; add as many lines as you need by clicking the \"Add +\" box. You should report relationships that were **present during the 36 months prior to publication**.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 61798, @@ -3607,7 +3966,8 @@ }, "text": "# **Intellectual Property -- Patents & Copyrights Section 4.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 61860, @@ -3617,7 +3977,8 @@ }, "text": "![](_page_29_Picture_0.jpeg)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 61888, @@ -3627,7 +3988,8 @@ }, "text": "## **Relationships not covered above Section 5.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 61937, @@ -3637,7 +3999,8 @@ }, "text": "Are there other relationships or activities that readers could perceive to have influenced, or that give the appearance of potentially influencing, what you wrote in the submitted work?", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 62122, @@ -3647,7 +4010,8 @@ }, "text": "Yes, the following relationships/conditions/circumstances are present (explain below):", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 62208, @@ -3657,7 +4021,8 @@ }, "text": "\u2714 No other relationships/conditions/circumstances that present a potential conflict of interest", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 62303, @@ -3667,7 +4032,8 @@ }, "text": "At the time of manuscript acceptance, journals will ask authors to confirm and, if necessary, update their disclosure statements. On occasion, journals may ask authors to disclose further information about reported relationships.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 62532, @@ -3677,7 +4043,8 @@ }, "text": "## **Disclosure Statement Section 6.**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 62570, @@ -3687,7 +4054,8 @@ }, "text": "Based on the above disclosures, this form will automatically generate a disclosure statement, which will appear in the box below.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 62699, @@ -3697,7 +4065,8 @@ }, "text": "Dr. Li has nothing to disclose.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 62730, @@ -3710,7 +4079,8 @@ "relations": [] } ], - "relations": [] + "relations": [], + "annotations": [] } ] } \ No newline at end of file diff --git a/tests/data/Supplementary/Word/Expected Output/mmc1.doc_bioc.json b/tests/data/Supplementary/Word/Expected Output/mmc1.doc_bioc.json index 9908a236..793320fa 100644 --- a/tests/data/Supplementary/Word/Expected Output/mmc1.doc_bioc.json +++ b/tests/data/Supplementary/Word/Expected Output/mmc1.doc_bioc.json @@ -17,7 +17,8 @@ }, "text": "The occurrence of a multidrug-resistant tuberculous retropharyngeal abscess in an immunocompetent patient: a case report", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 120, @@ -27,7 +28,8 @@ }, "text": "Tiresse N4* , Oucharqui S2*,Benaissa E1, 2, Badri B4 Bssaibis F2, Maleb A3, Elouennass M1,2", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 212, @@ -37,7 +39,8 @@ }, "text": "1Epidemiology and bacterial resistance research team/BIO-INOVA Centre, Faculty of Medicine and Pharmacy (University Mohammed V), Rabat, Morocco. ", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 357, @@ -47,7 +50,8 @@ }, "text": "2Department of Bacteriology, Mohammed V Military Teaching Hospital / Faculty of Medicine and Pharmacy (University Mohammed V), Rabat, Morocco. ", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 500, @@ -57,7 +61,8 @@ }, "text": "3Laboratory of Microbiology, Mohammed VI University Hospital / Faculty of Medicine and Pharmacy (University Mohammed the first), Oujda, Morocco.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 644, @@ -67,7 +72,8 @@ }, "text": "4Department of pneumology, Mohammed V Military Teaching Hospital / Faculty of Medicine and Pharmacy (University Mohammed V), Rabat, Morocco.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 784, @@ -77,7 +83,8 @@ }, "text": "* Oucharqui sara and Tiresse nabil have contributed equally in the elaboration of the work.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 875, @@ -87,7 +94,8 @@ }, "text": "Corresponding author: Elmostafa Benaissa : benaissaelmostafa2@gmail.com", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 947, @@ -97,7 +105,8 @@ }, "text": "Tiresse Nabil: nabil.tiresse1@gmail.com", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 986, @@ -107,7 +116,8 @@ }, "text": "Oucharqui Sara: oucharqui@gmail.com", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 1021, @@ -117,7 +127,8 @@ }, "text": "Elmostafa Benaissa : benaissaelmostafa2@gmail.com", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 1071, @@ -127,7 +138,8 @@ }, "text": "Badri bouchra: bouchra.ba04@gmail.com", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 1108, @@ -137,7 +149,8 @@ }, "text": "Bssaibis fatna: bssaibisfatna@yahoo.fr", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 1146, @@ -147,7 +160,8 @@ }, "text": "Adil Maleb: maleb.adil@gmail.com", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 1178, @@ -157,7 +171,8 @@ }, "text": "Mostafa Elouennass: elouennassm@yahoo.fr", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 1218, @@ -167,7 +182,8 @@ }, "text": "", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 1218, @@ -177,7 +193,8 @@ }, "text": "", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 1218, @@ -187,7 +204,8 @@ }, "text": "", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 1218, @@ -197,7 +215,8 @@ }, "text": "", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 1218, @@ -207,7 +226,8 @@ }, "text": "", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 1218, @@ -217,7 +237,8 @@ }, "text": "", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 1218, @@ -227,7 +248,8 @@ }, "text": "", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 1218, @@ -237,7 +259,8 @@ }, "text": "", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 1218, @@ -247,7 +270,8 @@ }, "text": "Abstract:", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 1227, @@ -257,7 +281,8 @@ }, "text": "Retropharyngeal abscess is an uncommon location of tuberculosis (TB). In this report, we describe a multidrug-resistant tuberculous retropharyngeal abscess in a 21-year-old female patient who was treated for lymph node TB for one year. CT scan revealed a large retropharyngeal abscess that was aspirated intraorally under local anesthesia. The diagnosis of TB was retained by molecular and histological study. GeneXpert MTB/ RIF (Cepheid, Sunnyvale, CA, USA),performed on the pus, showed rifampicin resistance and a first- and second-line drug resistance test using Genotype MTBDRplus VER.2 and MTBDRsl VER.1 (Hain Lifescience GmbH, Nehren, Germany) showed TB highly resistant to rifampicin, isoniazid, and aminoglycosides. Treatment is primarily medical as it combines specific antituberculous antibiotics, and aspiration for drainage of the abscess. Our patient was put on long-term 2nd line anti-TB treatment. ", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 2142, @@ -267,7 +292,8 @@ }, "text": "Key words: Multidrug-resistant, Retropharyngeal abscess, GeneXpert MTB/RIF, GenoType MTBDRplus, GenoType MTBDRsl", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 2254, @@ -277,7 +303,8 @@ }, "text": "", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 2254, @@ -287,7 +314,8 @@ }, "text": "Introduction", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 2266, @@ -297,7 +325,8 @@ }, "text": "Tuberculous retropharyngeal abscess is not frequently reported in the literature and pre-extensive tuberculous retropharyngeal abscess is even less frequently reported [1]. Early recognition of this condition is essential to prevent serious complications. The diagnosis is difficult and relies on a combination of clinical, radiological and biological arguments. We report a case of multidrug resistant (MDR) tuberculous retropharyngeal abscess in a 21-year-old female patient treated for lymph node tuberculosis (TB) for one year and discuss the different diagnostic and therapeutic elements of this pathology, highlighting the contribution of molecular biology in the effective management of MDR extra-pulmonary TB.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 2983, @@ -307,7 +336,8 @@ }, "text": "Case report", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 2994, @@ -317,7 +347,8 @@ }, "text": "This is a 21-year-old female with a history of chronic headache for several years with Chiari decompression surgery performed in 2017 and latero-cervical adenopathy diagnosed as lymph node TB on bacteriological, molecular and histological arguments in 2019. GeneXpert MTB/RIF performed on the cervical lymph node came back positive for TB, without resistance to rifampicin. She was then treated at another institution according to the national protocol which includes quadritherapy with isoniazid, rifampicin, ethambutol and pyrazinamide for 2 months followed by bitherapy with isoniazid and rifampicin for 10 months (2RHZE/10RH). The evolution was then marked by the disappearance of the lymph nodes after one year of treatment. Six months after the end of treatment, the patient presented to the emergency room with severe headaches.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 3830, @@ -327,7 +358,8 @@ }, "text": "Otherwise, no cough, chest pain, fever, or loss of appetite was reported. The patient noted no signs of trismus or difficulty breathing. She reported no known allergies and had no history of smoking or drinking alcohol. On admission, physical examination revealed a body temperature of 36.6\u00b0C, a heart rate of 90 beats/min, and a blood pressure of 117/75 mmHg. Palpation of both sides of the neck revealed no tenderness and no lymph nodes were noted. Examination of the oral cavity revealed no pathologic findings, and no posterior pharyngeal wall projections were observed. The lungs were clear on auscultation and no neurologic deficits were noted on initial clinical examination. ", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 4514, @@ -337,7 +369,8 @@ }, "text": "The biological workup showed hemoglobin at 12.6 g/l; white blood cell count at 4.8 G/l; and C-reactive protein at 0.8 mg/l. In addition, serologies for human immunodeficiency virus (HIV), hepatitis B, and hepatitis C were negative.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 4745, @@ -347,7 +380,8 @@ }, "text": "A cerebral CT scan performed as part of the etiological diagnosis fortuitously revealed a peripherally enhanced collection in the retropharyngeal area after injection of contrast medium measuring 19x21 mm, associated with an adjacent necrotic adenopathy measuring 10x06 mm. (figure 1).", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 5030, @@ -357,7 +391,8 @@ }, "text": "", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 5030, @@ -367,7 +402,8 @@ }, "text": "Figure 1: Sagittal CT scan revealed a peripherally enhanced collection in the retropharyngeal area after injection of contrast medium.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 5164, @@ -377,7 +413,8 @@ }, "text": "", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 5164, @@ -387,7 +424,8 @@ }, "text": "A cervical MRI was realized later and confirmed the presence of the retropharyngeal collection. (figure 2)", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 5270, @@ -397,7 +435,8 @@ }, "text": "", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 5270, @@ -407,7 +446,8 @@ }, "text": "", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 5270, @@ -417,7 +457,8 @@ }, "text": "Figure 2: Sagittal MRI revealed the presence of the retropharyngeal collection", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 5348, @@ -427,7 +468,8 @@ }, "text": "", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 5348, @@ -437,7 +479,8 @@ }, "text": "The abscess was drained under local anesthesia. 02 milliliters of pus were aspirated. The specimen was sent for bacteriological analysis for Mycobacterium tuberculosis complex (MTC) and banal germs as well as for pathological study. A molecular study using GeneXpert MTB/RIF (Cepheid, Sunnyvale, CA, USA) resulted in detection of MTC with detection of rifampicin resistance in less than 2 hours. In response to this rifampicin resistance, we performed other molecular tests, including GenoType MTBDRplus VER. 2 and GenoType MTBDRsl VER.1 (Hain Lifescience GmbH, Nehren, Germany) on the pus to confirm rifampicin resistance and also to investigate resistance to other anti-TB drugs. It should be noted that this technique is not validated on extrapulmonary specimens directly, although many studies have showed a good correlation with the usual resistance screening methods. The MTBDRplus VER. 2 showed resistance to both rifampicin and isoniazid, while MTBDRsl VER.1 showed resistance only to aminoglycosides. ", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 6360, @@ -447,7 +490,8 @@ }, "text": "Direct examination after special Ziehl-Nielsen staining was positive and cultures on Lowenstein-Jensen\u00ae (LJ) solid medium and Mycobacteria Growth Indicator Tube (MGIT\u00ae) liquid medium were positive after 32 days and 12 days respectively, thus confirming the molecular diagnosis.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 6637, @@ -457,7 +501,8 @@ }, "text": "A treatment was initiated on the basis of molecular data. The histopathological study confirmed the molecular diagnosis by showing epithelioid and gigantocellular granulomas with caseous necrosis, without histological evidence of malignancy.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 6878, @@ -467,7 +512,8 @@ }, "text": "Subsequently, the patient was put on a long-term protocol consisting of 6 months of bedaquiline, levofloxacin, linezolid, clofazimine, and cycloserine and 12 to 14 months of levofloxacin, linezolid, clofazimine, and cycloserine.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 7106, @@ -477,7 +523,8 @@ }, "text": "After 1 month of treatment, the antibacillary drugs appear to be well tolerated, and the patient is still being monitored.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 7228, @@ -487,7 +534,8 @@ }, "text": "Discussion", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 7238, @@ -497,7 +545,8 @@ }, "text": "", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 7238, @@ -507,7 +556,8 @@ }, "text": "TB remains a major public health problem in the world, mainly affecting developing countries [2]. Its incidence has also increased in developed countries, partly due to co-infection with HIV [2], the latter being more frequent in extra-pulmonary forms [3].", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 7494, @@ -517,7 +567,8 @@ }, "text": " The 2019 WHO report estimates the number of new cases at 10 million and the number of deaths at 1.5 million [4]. TB usually affects the lungs (pulmonary) or sometimes other organs (extrapulmonary). Excluding laryngeal TB, TB of the head and neck is rare and constitutes 2-6% of extrapulmonary TB and 0.1-1% of all forms of TB [5]. Retropharyngeal localization is rare [1].", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 7868, @@ -527,7 +578,8 @@ }, "text": "Infection of the retropharyngeal space and subsequent abscess formation are mainly due to acute bacterial infections of the head and neck region, especially in children, injury to the posterior pharyngeal wall, and forward spread of spinal TB [6].", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 8115, @@ -537,7 +589,8 @@ }, "text": "Spread to the retropharyngeal space occurs via lymphatics involving persistent retropharyngeal nodes or by hematogenous spread from pulmonary or extrapulmonary sites [5]. In our patient, the retropharyngeal abscess was probably due to lymphatic dissemination from lymph node TB because radiological exploration revealed a centimetric adenopathy with a necrotic center adjacent to the retropharyngeal abscess and there was no evidence of any distant involvement that could support hematogenous, pulmonary, or other dissemination. Tuberculous retropharyngeal abscess in an immunocompetent adult is rare [6]. ", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 8721, @@ -547,7 +600,8 @@ }, "text": "Drug-resistant TB represents a major challenge to national, regional and global TB control programs. Some MDR strains have developed additional resistance mechanisms to second-line antibacillaries, namely fluoroquinolones and aminoglycosides [7]. Each year, 500,000 cases of MDR-TB or rifampicin-resistant TB (RR-TB) and nearly 200,000 deaths are reported worldwide. ", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 9088, @@ -557,7 +611,8 @@ }, "text": "In 2019, the reported treatment success rate was 56% for MDR and extensively drug-resistant (XDR) TB cases and 39% for XDR-TB [4]. In Morocco, where TB remains endemic, the 2014 National TB Drug Resistance Survey found a low prevalence of MDR/XDR-TB (1% MDR-TB among new cases and 8.7% among previously treated cases) [4]. ", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 9412, @@ -567,7 +622,8 @@ }, "text": "In 2019, 235 cases of drug-resistant TB were treated in Morocco, and 1500 cumulative cases have been reported since 2012 [4]. MDR extrapulmonary localizations have rarely been described in the literature [3,7,8]. An Indian study published in 2014 reported 3 cases, including 2 lymph node localizations and 1 cervical cold abscess [3]. ", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 9747, @@ -577,7 +633,8 @@ }, "text": "MDR extrapulmonary forms are more frequent in young female subjects with a history of TB [8]. This is in accordance with our case. Another Moroccan study published in 2018 presented 7 cases of MDR extrapulmonary TB, of which 6 patients had a history of TB and 1 patient had a therapeutic failure [7]. 4 of these 7 patients had additional resistance to second-line anti-TB drugs [7].", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 10129, @@ -587,7 +644,8 @@ }, "text": " The diagnosis of MDR in extrapulmonary forms should be made by tissue or biological fluid sampling, but this is sometimes difficult [3]. Tuberculous retropharyngeal abscess can present with variable manifestations, ranging from asymptomatic to subtle features such as odynophagia alone and neck pain, due to early stage and lesser severity of the disease, to life-threatening respiratory obstruction [6]. Our patient had only chronic headache that can be attributed to her Chiari malformation. In addition, the general condition was preserved. On throat examination, swelling due to tuberculous retropharyngeal abscess is usually located in the midline [6].", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 10787, @@ -597,7 +655,8 @@ }, "text": "Radiologic imaging plays an important role in demonstrating the extent of the abscess and the involvement of surrounding structures [2,5]. CT has an accuracy of 89% and MRI is even more accurate, as it allows for better soft tissue analysis and allows for the assessment of vascular complications, including internal jugular vein thrombosis [2,5]. Both CT and MRI in our patient showed the retropharyngeal abscess. ", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 11202, @@ -607,7 +666,8 @@ }, "text": "TB was first diagnosed by direct microscopic examination and the discovery of acid-fast bacilli in the abscess aspirate using Ziehl-Neelsen stain, and then confirmed by culture, which remains the gold standard method [2]. ", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 11424, @@ -617,7 +677,8 @@ }, "text": "Molecular biology has demonstrated its effectiveness even on pauci-bacillary specimens by allowing the identification and detection of resistance to anti-TB drugs through several studies. GeneXpert MTB/RIF is a rapid, automated, World Health Organization (WHO)-recommended nucleic acid amplification test that is widely used for the simultaneous detection of MTC and rifampicin resistance in pulmonary and extrapulmonary specimens. It has a sensitivity of more than 80% in cerebral spine fluid, pus and biopsy fragments [7]. In our study, GeneXpert MTB/RIF (Cepheid, Sunnyvale, CA, USA) allowed identification of MTC and detection of rifampicin resistance. ", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 12081, @@ -627,7 +688,8 @@ }, "text": "In addition to the GeneXpert MTB/RIF, there are the MTBDRplus and MTBDRsl genotype tests which allow the identification of MTC from pulmonary clinical specimens or cultivated samples. The MTBDRplus test is used to identify resistance to rifampicin and isoniazid [7]. The MTBDRsl test is designed to detect resistance to the second-line antibacillary drugs, namely aminoglycosides on the gyrA gene, fluoroquinolones on the rrs gene, and ethambutol on the embB gene [7]. The MTBDRplus test and the MTBDRsl test have a sensitivity greater than 80% for the detection of resistance to rifampicin, isoniazid, fluoroquinolones, and aminoglycosides [7]. The discovery of an additional aminoglycoside resistance makes the choice of treatment even more difficult. These tests have been shown to be effective in detecting resistance to anti-TB drugs from extrapulmonary samples, even though they are not validated on these samples. This has been reported in some studies [9, 10].", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 13049, @@ -637,7 +699,8 @@ }, "text": "In our case, the aspiration was positive by GeneXpert MTB/RIF with a detection of rifampicin resistance. The MTBDRplus test confirmed resistance to rifampicin and isoniazid and the MTBDRsl test showed additional resistance to aminoglycosides. Later on, mycobacterial culture on solid and liquid media both became positive after 32 days and 12 days respectively. Pre-ultraresistant TB (pre-XDR TB) is defined as MDR/RR-TB in addition to resistance to any fluoroquinolones (levofloxacin and moxifloxacin). ", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 13553, @@ -647,7 +710,8 @@ }, "text": "Antibacillary drug resistance can be primary or secondary, primary drug resistance is defined as resistance in a patient who has never been treated for TB. Treatment with anti-TB drugs exerts selective pressure on the Mycobacterium tuberculosis population, resulting in a decrease in susceptible bacilli, an increase in drug-resistant mutants, and the emergence of drug resistance (acquired resistance). Given her previously treated lymph node TB, it seems safe to assume that our patient has acquired drug resistance. ", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 14072, @@ -657,7 +721,8 @@ }, "text": "In recent years, significant progress has been made in the rapid diagnosis of TB and drug resistance, as well as in treatment: new drugs, reduction of the age of indication for certain drugs as well as modification of the classification of drugs used to treat MDR-TB. ", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 14340, @@ -667,7 +732,8 @@ }, "text": "For MDR-TB of all forms, the WHO recommends a short regimen of 9 to 11 months, which includes a 4 to 6 month loading phase with high dose amikacin, moxifloxacin, etionamide, clofazimine, pyrazinamide, ethambutol and high dose of isoniazid. In the maintenance phase, patients are put on moxifloxacin, clofazimine, pyrazinamide and ethambutol [11]. Another recent WHO review in 2020 updated the recommendations eliminating short regimens containing injectables, replacing them with a short regimen containing bedaquiline [4]. Another WHO trial approved by the FDA in 2019 recommends the combination of bedaquiline, linezolid, and pretomanide for ultraresistant TB or XDR-TB for 9 months if the three molecules have not been taken previously [4,11].", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 15087, @@ -677,7 +743,8 @@ }, "text": "In Morocco, the short regimen has been adapted for some cases, but the old long regimen is still widely prescribed. This long regimen is based on 6 months of initial treatment with bedaquiline combined with levofloxacin, linezolid, clofazimine and cycloserine, followed by cessation of bedaquiline and maintenance of the remainder for 12 to 14 months if there is no resistance to group A and B molecules [4]. Our patient was put on a standard regimen by replacing aminoglycosides with bedaquiline. The simultaneous medical and surgical approach seems to be the best strategy for the management of tuberculous retropharyngeal abscess [3,5].", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 15726, @@ -687,7 +754,8 @@ }, "text": "As with any abscess, the mainstay of management of retro-pharyngeal tubercular abscess is drainage of the pus. Therapeutic aspiration only has been used successfully and can be repeated if necessary [2]. Anti-TB drug therapy and conservative neck stabilization should be the initial treatment if a retropharyngeal abscess is due to an extension from cervical spine TB, with a stable spine and without any neurological deficit or with minimal neurological signs [6]. If left untreated, internal jugular vein thrombosis, mediastinitis and airway obstruction are potential complications [1,2]. ", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 16317, @@ -697,7 +765,8 @@ }, "text": "Clinical, bacteriological and radiological surveillance is recommended, as well as monitoring of treatment tolerance [7,11]. The prognosis of MDR pulmonary and extrapulmonary TB has been improved thanks in part to the prescription of new anti-TB drugs such as linezolid and bedaquiline. The success of the treatment is related to the number of effective molecules still available [7]. However, high mortality has been observed in patients with XDR-TB and HIV infection. This could be explained by its synergistic relationship with TB and the emergence of MDR and XDR strains [7]. The HIV serology of our patient is negative which could further improve the prognosis of her disease.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 16998, @@ -707,7 +776,8 @@ }, "text": "Conclusion", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 17008, @@ -717,7 +787,8 @@ }, "text": "Retropharyngeal abscess is a recognized but rare presentation of TB. Unspecified symptoms and unusual location often lead to delayed diagnosis and treatment. Through this case, we highlight the importance of gene amplification tests in the effective and rapid management of this disease.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 17295, @@ -727,7 +798,8 @@ }, "text": "Competing interests:", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 17315, @@ -737,7 +809,8 @@ }, "text": "The authors declare no competing interest.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 17357, @@ -747,7 +820,8 @@ }, "text": "Author contributions:", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 17378, @@ -757,7 +831,8 @@ }, "text": "OS,TN and BE have been involved in drafting in the manuscript, BF, BY, CM, AM have revising the manuscript and ELM have given final approval of the version to be published.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 17550, @@ -767,7 +842,8 @@ }, "text": "References", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 17560, @@ -777,7 +853,8 @@ }, "text": "Supplemental file: ", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 17579, @@ -787,7 +864,8 @@ }, "text": "", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 17579, @@ -797,7 +875,8 @@ }, "text": "Definitions:", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 17591, @@ -807,7 +886,8 @@ }, "text": "RR: is defined as isolated resistance to rifampicin.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 17643, @@ -817,7 +897,8 @@ }, "text": "MDR: is defined as resistance to both rifampicin and isoniazid.", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 17706, @@ -827,7 +908,8 @@ }, "text": "Pre-XDR: is defined as multidrug resistance (MDR/RR-TB) in addition to resistance to any fluoroquinolones (levofloxacin and moxifloxacin).", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] }, { "offset": 17844, @@ -837,10 +919,12 @@ }, "text": "Ultraresistant TB or extensively drug-resistant TB (XDR-TB): is defined as multidrug resistance (MDR/RR-TB) in addition to resistance to a fluoroquinolone (levofloxacin or moxifloxacin) and at least one of bedaquiline or linezolid (or both).", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], - "relations": [] + "relations": [], + "annotations": [] } ] } \ No newline at end of file From 7b48d20007f869cad54853c0774f80396a9112df Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Tue, 3 Jun 2025 12:23:43 +0100 Subject: [PATCH 121/125] Added missing sentence field --- .../PDF/Expected Output/tp-10-08-2123-coif.pdf_bioc.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/data/Supplementary/PDF/Expected Output/tp-10-08-2123-coif.pdf_bioc.json b/tests/data/Supplementary/PDF/Expected Output/tp-10-08-2123-coif.pdf_bioc.json index 9659ed66..e283b9e9 100644 --- a/tests/data/Supplementary/PDF/Expected Output/tp-10-08-2123-coif.pdf_bioc.json +++ b/tests/data/Supplementary/PDF/Expected Output/tp-10-08-2123-coif.pdf_bioc.json @@ -4076,7 +4076,8 @@ }, "text": "### **Evaluation and Feedback**", "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "relations": [], From 794372f9719b6ce35333d8a8ac11aede09ffdbe5 Mon Sep 17 00:00:00 2001 From: Thomas Rowlands Date: Tue, 3 Jun 2025 12:38:28 +0100 Subject: [PATCH 122/125] Added missing sentences field --- .../tp-10-08-2123-coif.pdf_tables.json | 93 ++++++++++++------- 1 file changed, 62 insertions(+), 31 deletions(-) diff --git a/tests/data/Supplementary/PDF/Expected Output/tp-10-08-2123-coif.pdf_tables.json b/tests/data/Supplementary/PDF/Expected Output/tp-10-08-2123-coif.pdf_tables.json index f09df8fe..7c655c0c 100644 --- a/tests/data/Supplementary/PDF/Expected Output/tp-10-08-2123-coif.pdf_tables.json +++ b/tests/data/Supplementary/PDF/Expected Output/tp-10-08-2123-coif.pdf_tables.json @@ -91,7 +91,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -146,7 +147,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -194,7 +196,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -319,7 +322,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -367,7 +371,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -407,7 +412,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -470,7 +476,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -587,7 +594,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -627,7 +635,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -667,7 +676,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -730,7 +740,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -829,7 +840,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -884,7 +896,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -924,7 +937,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -1023,7 +1037,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -1078,7 +1093,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -1126,7 +1142,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -1225,7 +1242,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -1273,7 +1291,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -1313,7 +1332,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -1361,7 +1381,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -1478,7 +1499,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -1541,7 +1563,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -1658,7 +1681,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -1811,7 +1835,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -1859,7 +1884,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -1899,7 +1925,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -2052,7 +2079,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -2107,7 +2135,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -2155,7 +2184,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], @@ -2218,7 +2248,8 @@ } ], "annotations": [], - "relations": [] + "relations": [], + "sentences": [] } ], "annotations": [], From d7e69726ba9f5211c76e580fe080de6a25d3def3 Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Tue, 3 Jun 2025 18:15:49 +0100 Subject: [PATCH 123/125] Use secret PAT on release workflow --- .github/workflows/release.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 701af14f..71cff289 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -13,6 +13,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + # Use a custom PAT so the runners can access the private submodule + token: ${{ secrets.PAT }} + submodules: true - uses: ./.github/actions/setup with: python-version: '3.13' From 387da46e3a51675aaf2322c564496fdb98dee063 Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Tue, 3 Jun 2025 18:23:16 +0100 Subject: [PATCH 124/125] Revert "Use secret PAT on release workflow" This reverts commit d7e69726ba9f5211c76e580fe080de6a25d3def3. --- .github/workflows/release.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 71cff289..701af14f 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -13,10 +13,6 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - with: - # Use a custom PAT so the runners can access the private submodule - token: ${{ secrets.PAT }} - submodules: true - uses: ./.github/actions/setup with: python-version: '3.13' From bd28aad9db5c30a812e68e6b4fd7a1275b1f69df Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Tue, 3 Jun 2025 18:33:21 +0100 Subject: [PATCH 125/125] Inherit secrets from release to test in CI --- .github/workflows/release.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 701af14f..9339b9ef 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -7,6 +7,7 @@ on: jobs: test: uses: ./.github/workflows/ci.yml + secrets: inherit build-wheel: needs: test