diff --git a/docs/user-guide/loading-data/index.md b/docs/user-guide/loading-data/index.md index 8f114984b..9b468e35e 100644 --- a/docs/user-guide/loading-data/index.md +++ b/docs/user-guide/loading-data/index.md @@ -94,26 +94,27 @@ To load data from a web server, you must provide a string representing a URL. It from kloppy import statsbomb dataset = statsbomb.load( - event_data=Path("http://someurl.com/match_3788741/events.json"), - lineup_data=Path("htpps://someurl.com/match_3788741/lineups.json"), + event_data="http://someurl.com/match_3788741/events.json", + lineup_data="htpps://someurl.com/match_3788741/lineups.json", ) ``` -You can pass credentials for authentication via [`set_config`][kloppy.config.set_config]. +To fetch data from an API protected with [HTTP Basic Auth](https://developer.mozilla.org/en-US/docs/Web/HTTP/Guides/Authentication), you can provide your credentials by setting the `adapters.http.basic_authentication` configuration variable. ```python -from kloppy import statsbomb -from kloppy.config import set_config - -set_config( - 'adapters.http.basic_authentication', - { 'user': 'JohnDoe', 'pass': 'asecretkey' } -) +from kloppy import wyscout +from kloppy.config import config_context + +# Use a dictionary or tuple/list: ("JohnDoe", "asecretkey") +with config_context( + "adapters.http.basic_authentication", + {"login": "JohnDoe", "password": "asecretkey"} +): + dataset = wyscout.load( + event_data="https://apirest.wyscout.com/v3/matches/3788741/events", + data_version="V3" + ) -dataset = statsbomb.load( - event_data="http://someurl.com/match_3788741/events.json", - lineup_data="htpps://someurl.com/match_3788741/lineups.json", -) ``` #### S3 diff --git a/kloppy/infra/io/adapters/http.py b/kloppy/infra/io/adapters/http.py index e4bd156d4..3e689feb1 100644 --- a/kloppy/infra/io/adapters/http.py +++ b/kloppy/infra/io/adapters/http.py @@ -1,7 +1,7 @@ import fsspec from kloppy.config import get_config -from kloppy.exceptions import AdapterError +from kloppy.exceptions import AdapterError, KloppyError from .fsspec import FSSpecAdapter @@ -47,7 +47,22 @@ def _get_filesystem( client_kwargs = {} if basic_authentication: - client_kwargs["auth"] = aiohttp.BasicAuth(*basic_authentication) + try: + if isinstance(basic_authentication, dict): + # Handle dictionary: unpack as keyword arguments (login=..., password=...) + client_kwargs["auth"] = aiohttp.BasicAuth( + **basic_authentication + ) + else: + # Handle list/tuple: unpack as positional arguments (login, password) + client_kwargs["auth"] = aiohttp.BasicAuth( + *basic_authentication + ) + except TypeError as e: + raise KloppyError( + "Invalid basic authentication configuration. " + "Provide a dictionary with 'login' and 'password' keys, or tuple." + ) from e if no_cache: return fsspec.filesystem("http", client_kwargs=client_kwargs) diff --git a/kloppy/tests/test_io.py b/kloppy/tests/test_io.py index 87574a4a6..79abaddf1 100644 --- a/kloppy/tests/test_io.py +++ b/kloppy/tests/test_io.py @@ -1,3 +1,4 @@ +import base64 import bz2 import gzip from io import BytesIO @@ -13,8 +14,8 @@ from moto.moto_server.threaded_moto_server import ThreadedMotoServer import pytest -from kloppy.config import set_config -from kloppy.exceptions import InputNotFoundError +from kloppy.config import config_context +from kloppy.exceptions import InputNotFoundError, KloppyError from kloppy.infra.io import adapters from kloppy.infra.io.adapters import Adapter from kloppy.infra.io.buffered_stream import BufferedStream @@ -418,16 +419,29 @@ def httpserver_content(self, httpserver, tmp_path): gz_content, headers={"Content-Type": "application/x-gzip"} ) + # Serve protected file with basic auth + encoded = base64.b64encode(b"Aladdin:OpenSesame").decode("utf-8") + httpserver.expect_request( + "/auth.txt", headers={"Authorization": f"Basic {encoded}"} + ).respond_with_data(txt_content) + httpserver.expect_request("/auth.txt").respond_with_data( + "Unauthorized", status=401 + ) + index = f"""""" httpserver.expect_request("/").respond_with_data( index, headers={"Content-Type": "text/html"} ) - return httpserver + + # make sure cache is reset for each test + with config_context("cache", str(tmp_path / "http_cache")): + yield httpserver def test_expand_inputs(self, httpserver): """It should be able to list the contents of an HTTP server.""" @@ -436,6 +450,7 @@ def test_expand_inputs(self, httpserver): httpserver.url_for("/testfile.txt"), httpserver.url_for("/compressed_endpoint"), httpserver.url_for("/testfile.txt.gz"), + httpserver.url_for("/auth.txt"), } assert set(expand_inputs(url)) == expected @@ -460,6 +475,47 @@ def test_write_unsupported(self, httpserver): with open_as_file(httpserver.url_for("/new.txt"), mode="wb") as fp: fp.write(b"Fail") + def test_read_with_basic_auth(self, httpserver): + """It should read a file protected with basic authentication.""" + # It should support a dict + with config_context( + "adapters.http.basic_authentication", + {"login": "Aladdin", "password": "OpenSesame"}, + ): + with open_as_file(httpserver.url_for("/auth.txt")) as fp: + assert fp.read() == b"Hello, world!" + + # It should also support a tuple + with config_context( + "adapters.http.basic_authentication", + ("Aladdin", "OpenSesame"), + ): + with open_as_file(httpserver.url_for("/auth.txt")) as fp: + assert fp.read() == b"Hello, world!" + + def test_read_with_basic_auth_wrong_credentials(self, httpserver): + """It should raise an error with incorrect basic authentication.""" + from aiohttp.client_exceptions import ClientResponseError + + with config_context( + "adapters.http.basic_authentication", + {"login": "Aladdin", "password": "CloseSesame"}, + ): + with pytest.raises(ClientResponseError): + with open_as_file(httpserver.url_for("/auth.txt")) as fp: + fp.read() + + def test_read_with_basic_auth_wrong_config(self, httpserver): + """It should raise an error with malformed basic authentication config.""" + with config_context( + "adapters.http.basic_authentication", + {"user": "Aladdin", "pass": "OpenSesame"}, # Wrong keys + ): + with pytest.raises( + KloppyError, match="Invalid basic authentication configuration" + ): + open_as_file(httpserver.url_for("/auth.txt")) + class TestZipAdapter: """Tests for ZipAdapter.""" @@ -476,10 +532,8 @@ def zip_config(self, tmp_path): z.write(tmp_path / "testfile.txt", arcname="other.txt") # Set config for test - set_config("adapters.zip.fo", str(zip_path)) - yield - # Reset config to avoid side effects on other tests - set_config("adapters.zip.fo", None) + with config_context("adapters.zip.fo", str(zip_path)): + yield def test_expand_inputs(self): """It should be able to list the contents of a zip archive.""" @@ -547,9 +601,8 @@ def configure_kloppy_s3(self): s3 = S3FileSystem( anon=False, client_kwargs={"endpoint_url": self.endpoint_uri} ) - set_config("adapters.s3.s3fs", s3) - yield - set_config("adapters.s3.s3fs", None) + with config_context("adapters.s3.s3fs", s3): + yield def test_expand_inputs(self): """It should be able to list the contents of an S3 bucket."""