diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index bbbe17f..66e7acf 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.7] + python-version: ["3.9"] env: PYTHON_PACKAGE: data_pipelines_cli steps: diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 91db971..6d64e00 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.9", "3.10"] + python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v3 @@ -40,12 +40,3 @@ jobs: - name: Test with tox run: | tox - - - name: Report coverage - uses: paambaati/codeclimate-action@v5.0.0 - env: - CC_TEST_REPORTER_ID: ${{ secrets.CODE_CLIMATE }} - with: - coverageCommand: coverage xml - debug: true - coverageLocations: coverage.xml:coverage.py diff --git a/.gitignore b/.gitignore index d3b6707..5eb2eba 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,9 @@ out/ .idea_modules/ +# VSCode +.vscode/ + ### macOS *.DS_Store .AppleDouble @@ -125,4 +128,4 @@ venv.bak/ docs/_build -dp-testing \ No newline at end of file +dp-testing diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 89fcf4b..b161c23 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,7 +23,7 @@ repos: - id: check-yaml - repo: https://github.com/pycqa/flake8 - rev: 4.0.1 + rev: 6.1.0 hooks: - id: flake8 additional_dependencies: [ @@ -33,7 +33,7 @@ repos: ] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.961 + rev: v1.7.1 hooks: - id: mypy additional_dependencies: diff --git a/.tool-versions b/.tool-versions new file mode 100644 index 0000000..79fed25 --- /dev/null +++ b/.tool-versions @@ -0,0 +1 @@ +python 3.12.12 3.9.24 3.10.19 3.11.14 diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..5b82f58 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,351 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +**data-pipelines-cli** (`dp`) is a CLI tool for managing data platform workflows. It orchestrates dbt projects, cloud deployments, Docker builds, and multi-service integrations (Airbyte, DataHub, Looker). Projects are created from templates using copier, compiled with environment-specific configs, and deployed to cloud storage (GCS, S3). + +**Version:** 0.31.0 | **Python:** 3.9-3.12 | **License:** Apache 2.0 + +## Quick Command Reference + +### Development + +Requires Python 3.9-3.12. + +```bash +# Setup +pip install -e .[tests,bigquery,docker,datahub,gcs,s3] +pip install -r requirements-dev.txt +pre-commit install + +# Testing +pytest --cov data_pipelines_cli --cov-report term-missing --ignore=venv +pytest tests/test_dbt_utils.py::test_specific_function +tox # Test all Python versions +tox -e py310 # Test specific version + +# Linting +pre-commit run --all-files +black data_pipelines_cli tests +flake8 data_pipelines_cli tests +mypy data_pipelines_cli +``` + +### CLI Workflow +```bash +# Initialize global config +dp init https://github.com/org/dp-config.git + +# Create project from template +dp create ./my_pipeline my-template-name --vcs-ref develop + +# Local development +dp prepare-env --env local # Setup IDE integration +dp compile --env local # Compile dbt project +dp run --env local # Run dbt models +dp test --env local # Run dbt tests +dp seed --env local # Load seed data +dp docs-serve --env local # Serve docs on port 8080 + +# Code generation +dp generate source-yaml --env local --source-path models/sources schema1 schema2 +dp generate model-yaml --env local --model-paths models/marts --overwrite +dp generate databricks-job --env prod --python-code-path jobs/script.py + +# Production deployment +dp compile --env prod --docker-build --docker-tag v1.0.0 +dp deploy --env prod \ + --docker-push \ + --datahub-ingest \ + --bi-git-key-path ~/.ssh/looker_key \ + --dags-path gs://airflow-bucket/dags \ + --blob-args gcs_creds.json + +# Cleanup +dp clean +``` + +## Architecture + +### Directory Structure +``` +data_pipelines_cli/ +├── cli.py # Entry point, command registration, global error handler +├── cli_commands/ # Command implementations +│ ├── init.py # Initialize ~/.dp.yml config +│ ├── create.py # Create project from template (copier) +│ ├── update.py # Update existing project +│ ├── compile.py # Compile: copy files, merge configs, dbt compile, docker build +│ ├── run.py # Execute dbt models +│ ├── test.py # Run dbt tests +│ ├── seed.py # Load dbt seed data +│ ├── docs.py # Serve dbt documentation +│ ├── deploy.py # Deploy: docker push, datahub, airbyte, looker, cloud sync +│ ├── publish.py # Publish dbt package to Git +│ ├── prepare_env.py # Generate ~/.dbt/profiles.yml for local dev +│ ├── clean.py # Remove build/ directory +│ ├── template.py # List available templates +│ └── generate/ # Code generation subcommands +│ ├── generate.py # Command group entry point +│ ├── source_yaml.py # Generate dbt source schemas from DB +│ ├── model_yaml.py # Generate dbt model schemas +│ ├── source_sql.py # Generate dbt source SQL +│ └── databricks_job.py # Generate Databricks job configs +├── config_generation.py # Config merging, profiles.yml generation +├── dbt_utils.py # dbt command execution with variable management +├── filesystem_utils.py # Cloud storage sync (LocalRemoteSync class) +├── jinja.py # Jinja2 variable substitution in configs +├── airbyte_utils.py # Airbyte API client (AirbyteFactory) +├── bi_utils.py # BI platform orchestration +├── looker_utils.py # LookML generation and Git deployment +├── docker_response_reader.py # Docker API response parser +├── cli_utils.py # Echo functions, subprocess wrapper +├── cli_constants.py # BUILD_DIR, ENV_CONFIGURATION_PATH, defaults +├── data_structures.py # TypedDict PODs (DataPipelinesConfig, DbtModel, etc.) +├── errors.py # Custom exception hierarchy +├── io_utils.py # File operations, git hash detection +└── vcs_utils.py # Git URL normalization +``` + +### Configuration System + +**Layered merging** with precedence (highest to lowest): +``` +CLI arguments + ↓ +config/{env}/*.yml (environment-specific) + ↓ +config/base/*.yml (defaults) + ↓ +~/.dp.yml (global vars and templates) +``` + +**Implementation:** `config_generation.read_dictionary_from_config_directory(path, env, file)` merges base + env configs using `dict(base, **env)`. + +**Variable resolution** for dbt: +```python +# dbt_utils.read_dbt_vars_from_configs(env) merges: +{ + **config/base/dbt.yml['vars'], + **config/{env}/dbt.yml['vars'], + **~/.dp.yml['vars'] +} +``` + +### Key Workflows + +#### Compile Flow +``` +dp compile --env prod --docker-build + ├─ Copy dag/ → build/dag/ + ├─ Copy config/ → build/dag/config/ + ├─ Merge configs: base + prod + ├─ Replace Jinja vars in datahub.yml + ├─ Generate profiles.yml from dbt.yml + bigquery.yml + ├─ Run: dbt deps → dbt compile → dbt docs generate → dbt source freshness + ├─ Copy target/manifest.json → build/dag/manifest.json + ├─ docker build -t repo:tag [if --docker-build] + └─ Generate Looker LookML [if bi.yml configured] +``` + +#### Deploy Flow +``` +dp deploy --env prod --docker-push --datahub-ingest + ├─ docker push repo:tag + ├─ datahub ingest -c config/prod/datahub.yml + ├─ Airbyte: create/update connections via REST API + ├─ Looker: clone repo → generate LookML → commit/push + └─ Cloud sync: LocalRemoteSync(build/dag, gs://bucket) + ├─ List local files + ├─ Push each to GCS/S3 via fsspec + └─ Delete remote files not in local +``` + +#### dbt Execution +```python +# All dbt commands use: +run_dbt_command(("run",), env, profiles_path) + → dbt run --profile bigquery --profiles-dir build/profiles/prod + --target env_execution --vars '{var1: val1, ...}' +``` + +### Important Files + +| File | Lines | Purpose | +|------|-------|---------| +| **cli_commands/compile.py** | 160+ | Orchestrates compilation: file copying, config merging, dbt compile, Docker build | +| **cli_commands/deploy.py** | 240+ | Orchestrates deployment: Docker, DataHub, Airbyte, Looker, cloud storage | +| **config_generation.py** | 175+ | Config merging logic, profiles.yml generation | +| **dbt_utils.py** | 95+ | dbt subprocess execution with variable aggregation | +| **filesystem_utils.py** | 75+ | LocalRemoteSync class for cloud storage (uses fsspec) | +| **data_structures.py** | 153+ | TypedDict definitions for all config PODs | +| **airbyte_utils.py** | 150+ | AirbyteFactory for connection management via REST API | +| **looker_utils.py** | 100+ | LookML generation (dbt2looker) and Git deployment | +| **jinja.py** | 60+ | replace_vars_with_values() for config template rendering | + +## Dependencies + +### Core (always installed) +- **click** (8.1.3): CLI framework +- **copier** (7.0.1): Project templating +- **dbt-core** (1.7.3): Data build tool +- **fsspec** (>=2024.6.0,<2025.0.0): Cloud filesystem abstraction +- **jinja2** (3.1.2): Template rendering +- **pyyaml** (6.0.1): Config parsing +- **pydantic** (<2): Validation (copier 7.0.1 requires v1.x) +- **pyyaml-include** (<2): Config includes (copier 7.0.1 requires v1.x) +- **packaging** (>=23.0): Version handling + +### Optional Extras +```bash +# dbt adapters +pip install data-pipelines-cli[bigquery] # dbt-bigquery==1.7.2 +pip install data-pipelines-cli[snowflake] # dbt-snowflake==1.7.1 +pip install data-pipelines-cli[postgres] # dbt-postgres==1.7.3 +pip install data-pipelines-cli[databricks] # dbt-databricks-factory +pip install data-pipelines-cli[dbt-all] # All adapters + +# Cloud/integrations +pip install data-pipelines-cli[docker] # docker==6.0.1 +pip install data-pipelines-cli[datahub] # acryl-datahub[dbt] +pip install data-pipelines-cli[looker] # dbt2looker==0.11.0 +pip install data-pipelines-cli[gcs] # gcsfs>=2024.6.0,<2025.0.0 +pip install data-pipelines-cli[s3] # s3fs>=2024.6.0,<2025.0.0 + +# Development +pip install data-pipelines-cli[tests] # pytest, moto, coverage, tox +pip install data-pipelines-cli[docs] # sphinx, sphinx-click +``` + +## Development Patterns + +### Error Handling +```python +# All exceptions inherit from DataPipelinesError +# Global handler in cli.py catches and formats errors +try: + command_logic() +except DataPipelinesError as err: + echo_error(f"CLI Error: {err.message}") + if err.submessage: + echo_suberror(err.submessage) + sys.exit(1) +``` + +### Optional Dependencies +```python +# Check at function start, raise clear error +try: + import docker +except ModuleNotFoundError: + raise DockerNotInstalledError() +# Error message tells user: "pip install data-pipelines-cli[docker]" +``` + +### Subprocess Execution +```python +# All subprocess calls use wrapper: +subprocess_run(["dbt", "run"], capture_output=False) +# Automatically raises SubprocessNonZeroExitError on failure +``` + +### Config Reading +```python +# Standard pattern for all config reads: +config = read_dictionary_from_config_directory( + BUILD_DIR.joinpath("dag"), + env, + "filename.yml" +) +# Returns merged dict: {...base, **env} +``` + +## Important Constants + +```python +BUILD_DIR = pathlib.Path.cwd().joinpath("build") +ENV_CONFIGURATION_PATH = pathlib.Path.home().joinpath(".dp.yml") +PROFILE_NAME_LOCAL_ENVIRONMENT = "local" +PROFILE_NAME_ENV_EXECUTION = "env_execution" +IMAGE_TAG_TO_REPLACE = "" +``` + +## PR Workflow + +1. Fork from `develop` branch +2. Install dev dependencies and pre-commit hooks +3. Write unit tests (tests mirror source structure) +4. Update CHANGELOG.md (keep-a-changelog format) +5. Ensure pre-commit passes (isort, black, flake8, mypy) +6. Squash commits with verbose PR name +7. Open PR against `develop` + +**Code Quality:** +- Max line length: 100 chars +- Type hints required (mypy checked) +- Test naming: `test_*` prefix +- Mock external services: moto (S3), gcp-storage-emulator (GCS) +- Pre-commit hooks: isort, black, flake8 6.1.0, mypy 1.7.1 + +## Release Process + +1. Run [Prepare Release](https://github.com/getindata/data-pipelines-cli/actions?query=workflow%3A%22Prepare+release%22) action +2. Review auto-generated PR for version bump and changelog +3. Merge PR to `main` +4. [Publish](https://github.com/getindata/data-pipelines-cli/actions?query=workflow%3APublish) workflow auto-publishes to PyPI and merges back to `develop` + +## Project Structure (User Projects) + +``` +my_pipeline/ # Created by dp create +├── .copier-answers.yml # Template metadata +├── dbt_project.yml # dbt configuration +├── config/ +│ ├── base/ +│ │ ├── dbt.yml # target_type, vars +│ │ ├── bigquery.yml # Warehouse credentials/settings +│ │ ├── airflow.yml # dags_path for deployment +│ │ ├── datahub.yml # Metadata ingestion config +│ │ ├── airbyte.yml # Connection definitions +│ │ └── bi.yml # Looker/BI settings +│ └── {env}/ # Environment overrides (prod, dev, staging) +│ └── *.yml # Same files as base/, merged on top +├── dag/ # Airflow/orchestration code +├── models/ +│ ├── sources/ +│ ├── staging/ +│ └── marts/ +└── build/ # Generated by dp compile (git ignored) + ├── dag/ # Copy of dag/ with configs + └── profiles/ # Generated dbt profiles.yml +``` + +## Tips + +- **BUILD_DIR** is the working directory for all compilation/execution +- **Always run `dp clean`** between environment switches to avoid stale artifacts +- **Environment names** map to dbt targets: `local` → `local`, everything else → `env_execution` +- **Jinja variables** in configs support `{{ var('key') }}` and `{{ env_var('KEY') }}` +- **Cloud storage sync** uses fsspec, so any fsspec backend works (gs://, s3://, az://, etc.) +- **Code generation** requires compilation first (needs manifest.json) +- **Test mocking:** S3 uses moto, GCS uses gcp-storage-emulator + +## Recent Changes (v0.31.0) + +**Python 3.11/3.12 Support** +- Python 3.9-3.12 fully supported (3.9 EOL Oct 2025, support ends Apr 30, 2026) +- Removed pydantic (unused, blocked 3.12) +- Updated packaging >=23.0 (setuptools compatibility) +- Updated fsspec/gcsfs/s3fs >=2024.6.0,<2025.0.0 + +**Testing** +- Tox: py39, py310, py311, py312 (112 tests, 96% coverage) +- Setuptools <75.0.0 constraint (fixes canonicalize_version) +- Pre-commit: flake8 6.1.0, mypy 1.7.1 + +**Fixes** +- Exception handling: ClientError vs bare Exception +- Removed unnecessary type: ignore +- Line length violations fixed diff --git a/CHANGELOG.md b/CHANGELOG.md index ed915d9..8c04386 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,22 @@ # Changelog -## [Unreleased] +## [0.31.0] - 2025-11-03 + +### Added + +- Python 3.11 and 3.12 support +- Deprecation warning for Python 3.9 users (support ends April 2026) +- Comprehensive local testing guide in CONTRIBUTING.md with pyenv/asdf setup instructions + +### Changed + +- Updated GitHub Actions CI to test on Python 3.9, 3.10, 3.11, and 3.12 +- Updated Python version badge in README to reflect 3.9-3.12 support +- Added `pyyaml-include<2` dependency constraint for copier 7.0.1 compatibility + +### Fixed + +- Resolved pyyaml-include version compatibility issue with copier 7.0.1 ## [0.30.0] - 2023-12-08 diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..43c994c --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1 @@ +@AGENTS.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index df91641..6a0a0ed 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,11 +1,41 @@ +# Contibution guide + +## Development Setup + +Requires Python 3.9-3.12. + +```bash +pip install -e .[tests,bigquery,docker,datahub,gcs,s3] +pip install -r requirements-dev.txt +pre-commit install +``` + +## Running Tests + +```bash +# Run all tests +pytest --cov data_pipelines_cli --cov-report term-missing --ignore=venv + +# Run specific test +pytest tests/test_dbt_utils.py::test_specific_function + +# Test all Python versions with tox +tox + +# Test specific Python version +tox -e py310 +``` + ## PR Guidelines + 1. Fork branch from `develop`. 2. Ensure to provide unit tests for new functionality. 3. Install dev requirements: `pip install -r requirements-dev.txt` and setup a hook: `pre-commit install`. -4. Update documentation accordingly. -5. Update [changelog](CHANGELOG.md) according to ["Keep a changelog"](https://keepachangelog.com/en/1.0.0/) guidelines. -6. Squash changes with a single commit as much as possible and ensure verbose PR name. -7. Open a PR against the `develop` branch. +4. Run `tox` to verify all Python versions pass. +5. Update documentation accordingly. +6. Update [changelog](CHANGELOG.md) according to ["Keep a changelog"](https://keepachangelog.com/en/1.0.0/) guidelines. +7. Squash changes with a single commit as much as possible and ensure verbose PR name. +8. Open a PR against the `develop` branch. *We reserve the right to take over and modify or abandon PRs that do not match the workflow or are abandoned.* diff --git a/README.md b/README.md index 3693d58..953d2c8 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # data-pipelines-cli -[![Python Version](https://img.shields.io/badge/python-3.9%20%7C%203.10-blue.svg)](https://github.com/getindata/data-pipelines-cli) +[![Python Version](https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue.svg)](https://github.com/getindata/data-pipelines-cli) [![PyPI Version](https://badge.fury.io/py/data-pipelines-cli.svg)](https://pypi.org/project/data-pipelines-cli/) [![Downloads](https://pepy.tech/badge/data-pipelines-cli)](https://pepy.tech/project/data-pipelines-cli) [![Maintainability](https://api.codeclimate.com/v1/badges/e44ed9383a42b59984f6/maintainability)](https://codeclimate.com/github/getindata/data-pipelines-cli/maintainability) diff --git a/data_pipelines_cli/__init__.py b/data_pipelines_cli/__init__.py index aaf427c..c8527a2 100644 --- a/data_pipelines_cli/__init__.py +++ b/data_pipelines_cli/__init__.py @@ -5,4 +5,4 @@ pipelines. """ -version = "0.30.0" +version = "0.31.0" diff --git a/data_pipelines_cli/airbyte_utils.py b/data_pipelines_cli/airbyte_utils.py index d50f2f3..714211d 100644 --- a/data_pipelines_cli/airbyte_utils.py +++ b/data_pipelines_cli/airbyte_utils.py @@ -145,5 +145,5 @@ def request_handler( data = response.json() return data except requests.exceptions.HTTPError as e: - echo_error(e.response.text) # type: ignore + echo_error(e.response.text) return None diff --git a/data_pipelines_cli/cli.py b/data_pipelines_cli/cli.py index c6845e9..8ec6f90 100644 --- a/data_pipelines_cli/cli.py +++ b/data_pipelines_cli/cli.py @@ -1,4 +1,5 @@ import sys +import warnings import click @@ -27,6 +28,15 @@ def _cli() -> None: def cli() -> None: + # Warn users about Python 3.9 deprecation + if sys.version_info[:2] == (3, 9): + warnings.warn( + "Python 3.9 support will be removed in a future release after April 2026. " + "Please upgrade to Python 3.10 or later.", + DeprecationWarning, + stacklevel=2, + ) + try: _cli() except DataPipelinesError as err: diff --git a/data_pipelines_cli/looker_utils.py b/data_pipelines_cli/looker_utils.py index f535a4a..ce26371 100644 --- a/data_pipelines_cli/looker_utils.py +++ b/data_pipelines_cli/looker_utils.py @@ -88,8 +88,8 @@ def _prepare_repo_changes(src: pathlib.Path, local_repo_gen_path: pathlib.Path) with open(f"{local_repo_gen_path}/readme.txt", "w") as readme: readme.write( - """models and views with extention '.dp.[view|model].lkml' are generated by data-pipelines-cli. - Do not edit manually! Your changes could be overwrite! + """models and views with extention '.dp.[view|model].lkml' are generated by + data-pipelines-cli. Do not edit manually! Your changes could be overwrite! """ ) diff --git a/requirements-dev.txt b/requirements-dev.txt index 45fa05c..287a6fb 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1 +1,3 @@ pre-commit==2.15.0 +setuptools>=68.0.0,<75.0.0 +wheel>=0.37.0 diff --git a/setup.py b/setup.py index c3593af..f7c6a5a 100644 --- a/setup.py +++ b/setup.py @@ -12,17 +12,18 @@ "pyyaml==6.0.1", "types-PyYAML==6.0.12.2", "copier==7.0.1", + "pyyaml-include<2", # copier 7.0.1 requires pyyaml-include 1.x + "pydantic<2", # copier 7.0.1 requires pydantic 1.x "Jinja2==3.1.2", - "fsspec==2023.12.1", - "packaging==21.3", + "fsspec>=2024.6.0,<2025.0.0", + "packaging>=23.0", "colorama==0.4.5", "dbt-core==1.7.3", - "pydantic<2", ] EXTRA_FILESYSTEMS_REQUIRE = { - "gcs": ["gcsfs==2023.12.1"], - "s3": ["s3fs==2023.12.1"], + "gcs": ["gcsfs>=2024.6.0,<2025.0.0"], + "s3": ["s3fs>=2024.6.0,<2025.0.0"], } EXTRA_REQUIRE = { @@ -51,12 +52,12 @@ "pre-commit==2.20.0", "tox==3.27.1", "tox-gh-actions==2.12.0", - "moto[s3]==4.0.11", + "moto[server,s3]>=4.2.0,<5.0.0", "gcp-storage-emulator==2022.6.11", "GitPython==3.1.29", "types-requests==2.28.11.5", - "gcsfs==2023.12.1", - "s3fs==2023.12.1", + "gcsfs>=2024.6.0,<2025.0.0", + "s3fs>=2024.6.0,<2025.0.0", ], "docs": [ "sphinx==5.3.0", @@ -72,7 +73,7 @@ setup( name="data_pipelines_cli", - version="0.30.0", + version="0.31.0", description="CLI for data platform", long_description=README, long_description_content_type="text/markdown", @@ -83,6 +84,8 @@ "Development Status :: 1 - Planning", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ], keywords="dbt airflow cli", author="Andrzej Swatowski", diff --git a/tests/test_filesystem_utils.py b/tests/test_filesystem_utils.py index 1161007..13951ed 100644 --- a/tests/test_filesystem_utils.py +++ b/tests/test_filesystem_utils.py @@ -3,48 +3,16 @@ import string import unittest -import aiobotocore -import aiobotocore.endpoint import boto3 -import botocore import fsspec -from botocore.awsrequest import AWSResponse -from moto import mock_s3 +from botocore.exceptions import ClientError +from moto.server import ThreadedMotoServer from data_pipelines_cli.errors import DataPipelinesError MY_BUCKET = "my_bucket" -# According to -# https://github.com/aio-libs/aiobotocore/issues/755#issuecomment-844273191 -# aiobotocore problems can be fixed by creating an AWSResponse with fixed -# `raw_headers` field -# Patch `aiobotocore.endpoint.convert_to_response_dict` to work with moto. -class MockedAWSResponse: - def __init__(self, response: AWSResponse): - self._response = response - self.status_code = response.status_code - self.raw = response.raw - self.raw.raw_headers = {} - - @property - async def content(self): - return self._response.content - - -def factory(original): - def patched_convert_to_response_dict(http_response, operation_model): - return original(MockedAWSResponse(http_response), operation_model) - - return patched_convert_to_response_dict - - -aiobotocore.endpoint.convert_to_response_dict = factory( - aiobotocore.endpoint.convert_to_response_dict -) - - class TestError(unittest.TestCase): def test_wrong_local_path(self): from data_pipelines_cli.filesystem_utils import LocalRemoteSync @@ -109,51 +77,74 @@ def _test_synchronize_with_delete(self, protocol: str, **remote_kwargs): ) -@mock_s3 class TestS3Synchronize(TestSynchronize): + """ + S3 tests using moto server mode for aiobotocore compatibility. + The @mock_s3 decorator doesn't work with aiobotocore's async operations. + """ + + @classmethod + def setUpClass(cls): + # Start moto server on localhost for aiobotocore compatibility + cls.server = ThreadedMotoServer(port="5555", verbose=False) + cls.server.start() + + @classmethod + def tearDownClass(cls): + cls.server.stop() + def setUp(self) -> None: + # Create S3 client pointing to moto server + self.endpoint_url = "http://127.0.0.1:5555" client = boto3.client( "s3", - region_name="eu-west-1", + region_name="us-east-1", aws_access_key_id="testing", aws_secret_access_key="testing", + endpoint_url=self.endpoint_url, ) + + # Create bucket try: - s3 = boto3.resource( - "s3", - region_name="eu-west-1", - aws_access_key_id="testing", - aws_secret_access_key="testing", - ) - s3.meta.client.head_bucket(Bucket=MY_BUCKET) - except botocore.exceptions.ClientError: + client.create_bucket(Bucket=MY_BUCKET) + except client.exceptions.BucketAlreadyExists: pass - else: - err = "{bucket} should not exist.".format(bucket=MY_BUCKET) - raise EnvironmentError(err) - - client.create_bucket( - Bucket=MY_BUCKET, - CreateBucketConfiguration={"LocationConstraint": "eu-west-1"}, - ) def tearDown(self): - s3 = boto3.resource( + # Clean up bucket contents + client = boto3.client( "s3", - region_name="eu-west-1", + region_name="us-east-1", aws_access_key_id="testing", aws_secret_access_key="testing", + endpoint_url=self.endpoint_url, ) - bucket = s3.Bucket(MY_BUCKET) - for key in bucket.objects.all(): - key.delete() - bucket.delete() + try: + # Delete all objects + response = client.list_objects_v2(Bucket=MY_BUCKET) + if "Contents" in response: + for obj in response["Contents"]: + client.delete_object(Bucket=MY_BUCKET, Key=obj["Key"]) + # Delete bucket + client.delete_bucket(Bucket=MY_BUCKET) + except ClientError: + pass def test_synchronize(self): - self._test_synchronize("s3", key="testing", password="testing") + self._test_synchronize( + "s3", + key="testing", + secret="testing", + client_kwargs={"endpoint_url": self.endpoint_url}, + ) def test_synchronize_with_delete(self): - self._test_synchronize_with_delete("s3", key="testing", password="testing") + self._test_synchronize_with_delete( + "s3", + key="testing", + secret="testing", + client_kwargs={"endpoint_url": self.endpoint_url}, + ) class TestGoogleStorageSynchronize(TestSynchronize): diff --git a/tox.ini b/tox.ini index f35d5c0..2d03e28 100644 --- a/tox.ini +++ b/tox.ini @@ -1,13 +1,21 @@ [tox] -envlist = py39, py310 +envlist = py39, py310, py311, py312 +requires = + setuptools>=68.0.0,<75.0.0 + wheel>=0.37.0 [gh-actions] python = 3.9: py39 3.10: py310 + 3.11: py311 + 3.12: py312 [testenv] +deps = + setuptools>=68.0.0,<75.0.0 + wheel>=0.37.0 extras = tests databricks