From 9682d78baf91012c0c10ff9f60ddc5554aa18ec3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 23 Feb 2026 21:43:42 +0000 Subject: [PATCH 1/2] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pre-commit/pre-commit-hooks: v4.6.0 → v6.0.0](https://github.com/pre-commit/pre-commit-hooks/compare/v4.6.0...v6.0.0) - [github.com/abravalheri/validate-pyproject: v0.18 → v0.25](https://github.com/abravalheri/validate-pyproject/compare/v0.18...v0.25) - [github.com/jackdewinter/pymarkdown: v0.9.20 → v0.9.35](https://github.com/jackdewinter/pymarkdown/compare/v0.9.20...v0.9.35) - [github.com/sphinx-contrib/sphinx-lint: v0.9.1 → v1.0.2](https://github.com/sphinx-contrib/sphinx-lint/compare/v0.9.1...v1.0.2) - [github.com/astral-sh/ruff-pre-commit: v0.4.8 → v0.15.2](https://github.com/astral-sh/ruff-pre-commit/compare/v0.4.8...v0.15.2) - [github.com/pre-commit/mirrors-mypy: v1.10.0 → v1.19.1](https://github.com/pre-commit/mirrors-mypy/compare/v1.10.0...v1.19.1) --- .pre-commit-config.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ea3a62b..1ba4afb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,7 +12,7 @@ repos: language: fail files: '(?i)((^|/)(CON|PRN|AUX|NUL|COM[\d¹²³]|LPT[\d¹²³])(\.|/|$)|[<>:\"\\|?*\x00-\x1F]|/[^/]*[\.\s]/|[^/]*[\.\s]$)' - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 + rev: v6.0.0 hooks: - id: check-added-large-files - id: check-builtin-literals @@ -36,7 +36,7 @@ repos: args: [ "--pytest-test-first" ] - id: trailing-whitespace - repo: https://github.com/abravalheri/validate-pyproject - rev: v0.18 + rev: v0.25 hooks: - id: validate-pyproject - repo: https://github.com/pre-commit/pygrep-hooks @@ -48,7 +48,7 @@ repos: - id: python-no-log-warn - id: python-use-type-annotations - repo: https://github.com/jackdewinter/pymarkdown - rev: v0.9.20 + rev: v0.9.35 hooks: - id: pymarkdown args: @@ -56,7 +56,7 @@ repos: - "md013, MD041" # Line length, First line in file should be a top level heading - "scan" - repo: https://github.com/sphinx-contrib/sphinx-lint - rev: v0.9.1 + rev: v1.0.2 hooks: - id: sphinx-lint args: [ --enable=default-role ] @@ -70,7 +70,7 @@ repos: args: [ --without-hashes, --with=test, --with=docs, --format, requirements.txt, --output, requirements.txt ] - id: poetry-install - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.8 + rev: v0.15.2 hooks: - id: ruff-format - id: ruff @@ -81,7 +81,7 @@ repos: # - id: pydoclint # args: [ "--quiet" ] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.10.0 + rev: v1.19.1 hooks: - id: mypy additional_dependencies: From a02840e2b74ef6c2b25d866b5e6e8efe69bb5b91 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 23 Feb 2026 21:44:28 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- Google Colab Preview.ipynb | 600 +++++++++--------- dashapp/generate_graph_elements.py | 2 +- src/preprocessing/compute_layout.py | 6 +- .../compute_topical_distributions.py | 2 +- src/preprocessing/create_events.py | 2 +- src/preprocessing/impute_dates.py | 4 +- 6 files changed, 309 insertions(+), 307 deletions(-) diff --git a/Google Colab Preview.ipynb b/Google Colab Preview.ipynb index 54eab8e..ac0e3f9 100644 --- a/Google Colab Preview.ipynb +++ b/Google Colab Preview.ipynb @@ -1,316 +1,318 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "gyMBkEWo8nT-" - }, - "source": [ - "# Google Colab Notebook\n", - "Use this notebook to run Woogle Maps in Google Colab. Simply run all cells sequentially. It will take a few minutes to set up.\n", - "\n", - "## Step 1: Get the correct python version\n", - "By default, Google Colab uses Python 3.10, but we need Python 3.12 for Woogle Maps. The following cell install the correct Python version." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "nuowyK2s8bvd", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "503b488e-e0ed-494d-edef-28d7673c05ea" - }, - "source": [ - "!apt-get update\n", - "!apt-get install python3.12 python3.12-distutils\n", - "\n", - "# Change 'python' to point at the newly installed version\n", - "!sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1\n", - "!sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 2\n", - "\n", - "# Check that it prints \"Python 3.12.2\"\n", - "!python --version" - ], - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ok30y7IO5fey" - }, - "source": [ - "## Step 2: Clone the repository from GitHub\n", - "\n", - "Download the source code and checkout a branch that has been tested to work in this notebook. Ignore any warnings that mention `detached HEAD`." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "id": "yrhEA6Wg5i2m", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "7636d427-5ec9-4f2c-c86d-8acf96d9fcad" - }, - "source": "!git clone https://github.com/TeamEpochGithub/woogle-maps", - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "M1o5CBdP6Xw9" - }, - "source": [ - "## Step 3: Install all dependencies\n", - "\n", - "Since dependencies are managed with Poetry, we need to install pip and Poetry first before we can install any dependencies." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": true, - "id": "JEYTYiI2_yVK", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "c0a471ce-3a06-4a17-b7a0-5877c0f1e891" - }, - "source": [ - "# Install pip using get-pip\n", - "!wget https://bootstrap.pypa.io/get-pip.py\n", - "!python get-pip.py\n", - "!python3.12 -m pip install --upgrade pip --user\n", - "!pip -V" - ], - "outputs": [] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": true, - "id": "9FUKlWk9CIjp", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "52c2a182-b6e3-44d5-d180-30a1504e2d36" - }, - "source": [ - "# Use pip to install Poetry\n", - "!python -m pip install poetry" - ], - "outputs": [] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": true, - "id": "0VjAtnXvEWR6", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "d2e71341-1d97-4a66-8816-c012a184c6c9" - }, - "source": [ - "# Install python devtools, necessary for building some dependencies\n", - "!apt-get install python3.12-dev" - ], - "outputs": [] + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "gyMBkEWo8nT-" + }, + "source": [ + "# Google Colab Notebook\n", + "Use this notebook to run Woogle Maps in Google Colab. Simply run all cells sequentially. It will take a few minutes to set up.\n", + "\n", + "## Step 1: Get the correct python version\n", + "By default, Google Colab uses Python 3.10, but we need Python 3.12 for Woogle Maps. The following cell install the correct Python version." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": true, - "id": "6krIga0rCSJG", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "f66a6334-a985-4101-e9be-0d8430d069b8" - }, - "source": [ - "# Use poetry to install or build all dependencies\n", - "!cd woogle-maps; python -m poetry -q install && echo 'Finished!' || 'Failed!'" - ], - "outputs": [] + "id": "nuowyK2s8bvd", + "outputId": "503b488e-e0ed-494d-edef-28d7673c05ea" + }, + "outputs": [], + "source": [ + "!apt-get update\n", + "!apt-get install python3.12 python3.12-distutils\n", + "\n", + "# Change 'python' to point at the newly installed version\n", + "!sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1\n", + "!sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 2\n", + "\n", + "# Check that it prints \"Python 3.12.2\"\n", + "!python --version" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ok30y7IO5fey" + }, + "source": [ + "## Step 2: Clone the repository from GitHub\n", + "\n", + "Download the source code and checkout a branch that has been tested to work in this notebook. Ignore any warnings that mention `detached HEAD`." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "id": "X0oQ7rTtBDBX", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "112982bd-30d5-49e7-c3d7-bfabc5e5927d" - }, - "source": [ - "# Sanity check\n", - "!echo Missing dependencies:\n", - "!cd woogle-maps; python3.12 -m poetry install --dry-run | grep \"\\- Installing\" | grep -v \"Already installed\" || echo \"No missing dependencies found, sucess!\"" - ], - "outputs": [] + "id": "yrhEA6Wg5i2m", + "outputId": "7636d427-5ec9-4f2c-c86d-8acf96d9fcad" + }, + "outputs": [], + "source": [ + "!git clone https://github.com/TeamEpochGithub/woogle-maps" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "M1o5CBdP6Xw9" + }, + "source": [ + "## Step 3: Install all dependencies\n", + "\n", + "Since dependencies are managed with Poetry, we need to install pip and Poetry first before we can install any dependencies." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "code", - "source": [ - "# Last things to set up\n", - "!cd woogle-maps; poetry run python -c \"import nltk; nltk.download('punkt')\"" - ], - "metadata": { - "id": "phBw62nKviu0", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "53a30537-af3e-497e-9a06-68ca9cae97a4" - }, - "execution_count": 8, - "outputs": [] + "collapsed": true, + "id": "JEYTYiI2_yVK", + "outputId": "c0a471ce-3a06-4a17-b7a0-5877c0f1e891" + }, + "outputs": [], + "source": [ + "# Install pip using get-pip\n", + "!wget https://bootstrap.pypa.io/get-pip.py\n", + "!python get-pip.py\n", + "!python3.12 -m pip install --upgrade pip --user\n", + "!pip -V" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "markdown", - "metadata": { - "id": "3Mdaf6lBGZjh" - }, - "source": [ - "## Step 4: Setup a browser window to access the app\n", - "\n", - "Since localhost is not available on Google Colab, we need ngrok to access the application instead. At the end of this step, you will find the link you need." - ] + "collapsed": true, + "id": "9FUKlWk9CIjp", + "outputId": "52c2a182-b6e3-44d5-d180-30a1504e2d36" + }, + "outputs": [], + "source": [ + "# Use pip to install Poetry\n", + "!python -m pip install poetry" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "id": "rHRDQNe_GYqh", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "e43cc6d5-d61b-4455-a0a3-e0081803edc2" - }, - "source": [ - "# Install ngrok, used to open a website server from the notebook\n", - "!curl -s https://ngrok-agent.s3.amazonaws.com/ngrok.asc \\\n", - "\t| sudo tee /etc/apt/trusted.gpg.d/ngrok.asc >/dev/null \\\n", - "\t&& echo \"deb https://ngrok-agent.s3.amazonaws.com buster main\" \\\n", - "\t| sudo tee /etc/apt/sources.list.d/ngrok.list \\\n", - "\t&& sudo apt update \\\n", - "\t&& sudo apt install ngrok" - ], - "outputs": [] + "collapsed": true, + "id": "0VjAtnXvEWR6", + "outputId": "d2e71341-1d97-4a66-8816-c012a184c6c9" + }, + "outputs": [], + "source": [ + "# Install python devtools, necessary for building some dependencies\n", + "!apt-get install python3.12-dev" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "id": "ueh_NDqCKpRk", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "9134645a-923e-4348-981c-dd74bf51959e" - }, - "source": [ - "# Sign in (using throwaway ngrok account made for this purpose)\n", - "!ngrok config add-authtoken 2fe2QHsdRZYuNSJjKWEu0oIME8M_4PPSbvpQceV7d3RLepLea; sleep 3" - ], - "outputs": [] + "collapsed": true, + "id": "6krIga0rCSJG", + "outputId": "f66a6334-a985-4101-e9be-0d8430d069b8" + }, + "outputs": [], + "source": [ + "# Use poetry to install or build all dependencies\n", + "!cd woogle-maps; python -m poetry -q install && echo 'Finished!' || 'Failed!'" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "id": "omkcJpB4G7Ca" - }, - "source": [ - "# Run ngrok to tunnel Dash app port 8050 to the outside world.\n", - "# This command runs in the background.\n", - "get_ipython().system_raw('ngrok http 8060 &')\n", - "!sleep 3" - ], - "outputs": [] + "id": "X0oQ7rTtBDBX", + "outputId": "112982bd-30d5-49e7-c3d7-bfabc5e5927d" + }, + "outputs": [], + "source": [ + "# Sanity check\n", + "!echo Missing dependencies:\n", + "!cd woogle-maps; python3.12 -m poetry install --dry-run | grep \"\\- Installing\" | grep -v \"Already installed\" || echo \"No missing dependencies found, sucess!\"" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "id": "RsKMUPCVLaRQ", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "7e0d0703-feae-4eb8-bcc7-05ec73e2e90b" - }, - "source": [ - "# Check if its running, should print at least one line\n", - "!sudo lsof -PiTCP -sTCP:LISTEN | grep ngrok && echo \"Sucess!\" || echo \"Failed! nothing running. Run the cell above again, then the one below to get the link\"" - ], - "outputs": [] + "id": "phBw62nKviu0", + "outputId": "53a30537-af3e-497e-9a06-68ca9cae97a4" + }, + "outputs": [], + "source": [ + "# Last things to set up\n", + "!cd woogle-maps; poetry run python -c \"import nltk; nltk.download('punkt')\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3Mdaf6lBGZjh" + }, + "source": [ + "## Step 4: Setup a browser window to access the app\n", + "\n", + "Since localhost is not available on Google Colab, we need ngrok to access the application instead. At the end of this step, you will find the link you need." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "code", - "source": [ - "# Get the public URL where you can access the Dash app. Copy this URL.\n", - "!curl -s http://localhost:4040/api/tunnels | python -c \\\n", - " \"import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])\"" - ], - "metadata": { - "id": "UceE0Nb6tblj", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "d6e291c3-1650-494d-8522-3b695d22a963" - }, - "execution_count": 13, - "outputs": [] + "id": "rHRDQNe_GYqh", + "outputId": "e43cc6d5-d61b-4455-a0a3-e0081803edc2" + }, + "outputs": [], + "source": [ + "# Install ngrok, used to open a website server from the notebook\n", + "!curl -s https://ngrok-agent.s3.amazonaws.com/ngrok.asc \\\n", + "\t| sudo tee /etc/apt/trusted.gpg.d/ngrok.asc >/dev/null \\\n", + "\t&& echo \"deb https://ngrok-agent.s3.amazonaws.com buster main\" \\\n", + "\t| sudo tee /etc/apt/sources.list.d/ngrok.list \\\n", + "\t&& sudo apt update \\\n", + "\t&& sudo apt install ngrok" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "markdown", - "metadata": { - "id": "UR5lAgjpL8vt" - }, - "source": [ - "## Step 5: Start the app, then open link above\n", - "\n", - "Run this cell untill it says 'Dash is running on ...'.\n", - "Don't use the link starting with 127.0.0.1.\n", - "Instead, use the link above, ending with 'ngrok-free.app'. Click 'Visit Site'." - ] + "id": "ueh_NDqCKpRk", + "outputId": "9134645a-923e-4348-981c-dd74bf51959e" + }, + "outputs": [], + "source": [ + "# Sign in (using throwaway ngrok account made for this purpose)\n", + "!ngrok config add-authtoken 2fe2QHsdRZYuNSJjKWEu0oIME8M_4PPSbvpQceV7d3RLepLea; sleep 3" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "omkcJpB4G7Ca" + }, + "outputs": [], + "source": [ + "# Run ngrok to tunnel Dash app port 8050 to the outside world.\n", + "# This command runs in the background.\n", + "get_ipython().system_raw(\"ngrok http 8060 &\")\n", + "!sleep 3" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "mA8JquWtMN2F", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "84060503-36d0-4aba-c338-fc97e4a63f2b" - }, - "source": [ - "!cd woogle-maps; poetry run python dashapp/app.py" - ], - "outputs": [] - } - ], - "metadata": { + "id": "RsKMUPCVLaRQ", + "outputId": "7e0d0703-feae-4eb8-bcc7-05ec73e2e90b" + }, + "outputs": [], + "source": [ + "# Check if its running, should print at least one line\n", + "!sudo lsof -PiTCP -sTCP:LISTEN | grep ngrok && echo \"Sucess!\" || echo \"Failed! nothing running. Run the cell above again, then the one below to get the link\"" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { "colab": { - "provenance": [] + "base_uri": "https://localhost:8080/" }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" + "id": "UceE0Nb6tblj", + "outputId": "d6e291c3-1650-494d-8522-3b695d22a963" + }, + "outputs": [], + "source": [ + "# Get the public URL where you can access the Dash app. Copy this URL.\n", + "!curl -s http://localhost:4040/api/tunnels | python -c \\\n", + " \"import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UR5lAgjpL8vt" + }, + "source": [ + "## Step 5: Start the app, then open link above\n", + "\n", + "Run this cell untill it says 'Dash is running on ...'.\n", + "Don't use the link starting with 127.0.0.1.\n", + "Instead, use the link above, ending with 'ngrok-free.app'. Click 'Visit Site'." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - "language_info": { - "name": "python" - } + "id": "mA8JquWtMN2F", + "outputId": "84060503-36d0-4aba-c338-fc97e4a63f2b" + }, + "outputs": [], + "source": [ + "!cd woogle-maps; poetry run python dashapp/app.py" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 0 + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/dashapp/generate_graph_elements.py b/dashapp/generate_graph_elements.py index 4e7769a..fd63172 100644 --- a/dashapp/generate_graph_elements.py +++ b/dashapp/generate_graph_elements.py @@ -206,7 +206,7 @@ def generate_node_elements(data: pd.DataFrame) -> list[NodeElement]: for i in range(len(node_elements)): cluster_data = data.query(f"clusters == {i}") if "storyline" in cluster_data.columns: - parent_id = f"story_{cluster_data.iloc[0]["storyline"]}_1I" + parent_id = f"story_{cluster_data.iloc[0]['storyline']}_1I" else: parent_id = None diff --git a/src/preprocessing/compute_layout.py b/src/preprocessing/compute_layout.py index 76f08f0..efdd1d2 100644 --- a/src/preprocessing/compute_layout.py +++ b/src/preprocessing/compute_layout.py @@ -68,15 +68,15 @@ def custom_transform(self, data: pd.DataFrame, **transform_args: Never) -> pd.Da self.log_to_warning("The data does not contain a valid 'date' column. Uniform node spacing is forced.") self.spacing_within_story = "uniform" else: - min_date = cast(datetime, pl_data["date"].min()) - max_date = cast(datetime, pl_data["date"].max()) + min_date = cast("datetime", pl_data["date"].min()) + max_date = cast("datetime", pl_data["date"].max()) total_range_seconds = (max_date - min_date).total_seconds() node_dates = pl_data["date"].to_list() x_coords = [0.0] * pl_data.height y_coords = [0.0] * pl_data.height - max_story_length: int = cast(int, storylines["index"].list.len().max()) + max_story_length: int = cast("int", storylines["index"].list.len().max()) for story_id, node_indices in storylines.iter_rows(): y = math.ceil(story_id / 2) * ((-1) ** story_id) * 200 # Make storylines alternate between top and bottom of the main storyline diff --git a/src/preprocessing/compute_topical_distributions.py b/src/preprocessing/compute_topical_distributions.py index d9684a6..f9b443c 100644 --- a/src/preprocessing/compute_topical_distributions.py +++ b/src/preprocessing/compute_topical_distributions.py @@ -40,7 +40,7 @@ def __post_init__(self) -> None: super().__post_init__() self._pretrained_lda = LdaModel.load(self.pretrained_model_name_or_path) self._lemmatizer = WordNetLemmatizer() - self._dict = cast(Dictionary, Dictionary.load(self.dictionary_name_or_path)) + self._dict = cast("Dictionary", Dictionary.load(self.dictionary_name_or_path)) def custom_transform(self, data: pd.DataFrame, **transform_args: Never) -> pd.DataFrame: # noqa: DOC103 # type: ignore[misc] """Ensure the input Dataframe has the relevant columns. diff --git a/src/preprocessing/create_events.py b/src/preprocessing/create_events.py index a087168..fa42ee9 100644 --- a/src/preprocessing/create_events.py +++ b/src/preprocessing/create_events.py @@ -65,7 +65,7 @@ def custom_transform(self, data: pd.DataFrame, **transform_args: Never) -> pd.Da data["clusters"] = range(data.shape[0]) for idx, row in data.iterrows(): - idx = cast(int, idx) + idx = cast("int", idx) if row["discarded"]: # Extract the closest not-discarded documents data["distance"] = (data["clusters"] - idx).abs() diff --git a/src/preprocessing/impute_dates.py b/src/preprocessing/impute_dates.py index b454890..adb5c2c 100644 --- a/src/preprocessing/impute_dates.py +++ b/src/preprocessing/impute_dates.py @@ -33,7 +33,7 @@ def custom_transform(self, data: pd.DataFrame, **transform_args: Never) -> pd.Da self.log_to_warning("The input data does not have an 'embed' column. Unable to impute dates.") return data - self.log_to_terminal(f"Imputing {data["date"].isna().sum()} missing dates...") + self.log_to_terminal(f"Imputing {data['date'].isna().sum()} missing dates...") all_embeddings = np.stack(data["embed"].to_list()) not_na_rows = data[data["date"].notna()] @@ -53,6 +53,6 @@ def custom_transform(self, data: pd.DataFrame, **transform_args: Never) -> pd.Da data["date"] = data["date"].apply(lambda x: x.replace(tzinfo=ZoneInfo("Europe/Amsterdam")) if pd.notna(x) else x) if data["date"].isna().sum() != 0: # Sanity check - self.log_to_warning(f"There are still {data["date"].isna().sum()} missing dates in the data! This may cause problems later on. Continuing anyway...") + self.log_to_warning(f"There are still {data['date'].isna().sum()} missing dates in the data! This may cause problems later on. Continuing anyway...") return data.sort_values("date").reset_index().drop("index", errors="ignore")