Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 15 additions & 14 deletions docs/user-guide/loading-data/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,26 +94,27 @@ To load data from a web server, you must provide a string representing a URL. It
from kloppy import statsbomb

dataset = statsbomb.load(
event_data=Path("http://someurl.com/match_3788741/events.json"),
lineup_data=Path("htpps://someurl.com/match_3788741/lineups.json"),
event_data="http://someurl.com/match_3788741/events.json",
lineup_data="htpps://someurl.com/match_3788741/lineups.json",
)
```

You can pass credentials for authentication via [`set_config`][kloppy.config.set_config].
To fetch data from an API protected with [HTTP Basic Auth](https://developer.mozilla.org/en-US/docs/Web/HTTP/Guides/Authentication), you can provide your credentials by setting the `adapters.http.basic_authentication` configuration variable.

```python
from kloppy import statsbomb
from kloppy.config import set_config

set_config(
'adapters.http.basic_authentication',
{ 'user': 'JohnDoe', 'pass': 'asecretkey' }
)
from kloppy import wyscout
from kloppy.config import config_context

# Use a dictionary or tuple/list: ("JohnDoe", "asecretkey")
with config_context(
"adapters.http.basic_authentication",
{"login": "JohnDoe", "password": "asecretkey"}
):
dataset = wyscout.load(
event_data="https://apirest.wyscout.com/v3/matches/3788741/events",
data_version="V3"
)

dataset = statsbomb.load(
event_data="http://someurl.com/match_3788741/events.json",
lineup_data="htpps://someurl.com/match_3788741/lineups.json",
)
```

#### S3
Expand Down
19 changes: 17 additions & 2 deletions kloppy/infra/io/adapters/http.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import fsspec

from kloppy.config import get_config
from kloppy.exceptions import AdapterError
from kloppy.exceptions import AdapterError, KloppyError

from .fsspec import FSSpecAdapter

Expand Down Expand Up @@ -47,7 +47,22 @@ def _get_filesystem(

client_kwargs = {}
if basic_authentication:
client_kwargs["auth"] = aiohttp.BasicAuth(*basic_authentication)
try:
if isinstance(basic_authentication, dict):
# Handle dictionary: unpack as keyword arguments (login=..., password=...)
client_kwargs["auth"] = aiohttp.BasicAuth(
**basic_authentication
)
else:
# Handle list/tuple: unpack as positional arguments (login, password)
client_kwargs["auth"] = aiohttp.BasicAuth(
*basic_authentication
)
except TypeError as e:
raise KloppyError(
"Invalid basic authentication configuration. "
"Provide a dictionary with 'login' and 'password' keys, or tuple."
) from e

if no_cache:
return fsspec.filesystem("http", client_kwargs=client_kwargs)
Expand Down
73 changes: 63 additions & 10 deletions kloppy/tests/test_io.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import base64
import bz2
import gzip
from io import BytesIO
Expand All @@ -13,8 +14,8 @@
from moto.moto_server.threaded_moto_server import ThreadedMotoServer
import pytest

from kloppy.config import set_config
from kloppy.exceptions import InputNotFoundError
from kloppy.config import config_context
from kloppy.exceptions import InputNotFoundError, KloppyError
from kloppy.infra.io import adapters
from kloppy.infra.io.adapters import Adapter
from kloppy.infra.io.buffered_stream import BufferedStream
Expand Down Expand Up @@ -418,16 +419,29 @@ def httpserver_content(self, httpserver, tmp_path):
gz_content, headers={"Content-Type": "application/x-gzip"}
)

# Serve protected file with basic auth
encoded = base64.b64encode(b"Aladdin:OpenSesame").decode("utf-8")
httpserver.expect_request(
"/auth.txt", headers={"Authorization": f"Basic {encoded}"}
).respond_with_data(txt_content)
httpserver.expect_request("/auth.txt").respond_with_data(
"Unauthorized", status=401
)

index = f"""<html><body><ul>
<li><a href="/testfile.txt">Txt</a></li>
<li><a href="/compressed_endpoint">Comp</a></li>
<li><a href="{httpserver.url_for("/testfile.txt.gz")}">Gz</a></li>
<li><a href="/auth.txt">Auth</a></li>
</ul></body></html>"""

httpserver.expect_request("/").respond_with_data(
index, headers={"Content-Type": "text/html"}
)
return httpserver

# make sure cache is reset for each test
with config_context("cache", str(tmp_path / "http_cache")):
yield httpserver

def test_expand_inputs(self, httpserver):
"""It should be able to list the contents of an HTTP server."""
Expand All @@ -436,6 +450,7 @@ def test_expand_inputs(self, httpserver):
httpserver.url_for("/testfile.txt"),
httpserver.url_for("/compressed_endpoint"),
httpserver.url_for("/testfile.txt.gz"),
httpserver.url_for("/auth.txt"),
}
assert set(expand_inputs(url)) == expected

Expand All @@ -460,6 +475,47 @@ def test_write_unsupported(self, httpserver):
with open_as_file(httpserver.url_for("/new.txt"), mode="wb") as fp:
fp.write(b"Fail")

def test_read_with_basic_auth(self, httpserver):
"""It should read a file protected with basic authentication."""
# It should support a dict
with config_context(
"adapters.http.basic_authentication",
{"login": "Aladdin", "password": "OpenSesame"},
):
with open_as_file(httpserver.url_for("/auth.txt")) as fp:
assert fp.read() == b"Hello, world!"

# It should also support a tuple
with config_context(
"adapters.http.basic_authentication",
("Aladdin", "OpenSesame"),
):
with open_as_file(httpserver.url_for("/auth.txt")) as fp:
assert fp.read() == b"Hello, world!"

def test_read_with_basic_auth_wrong_credentials(self, httpserver):
"""It should raise an error with incorrect basic authentication."""
from aiohttp.client_exceptions import ClientResponseError

with config_context(
"adapters.http.basic_authentication",
{"login": "Aladdin", "password": "CloseSesame"},
):
with pytest.raises(ClientResponseError):
with open_as_file(httpserver.url_for("/auth.txt")) as fp:
fp.read()

def test_read_with_basic_auth_wrong_config(self, httpserver):
"""It should raise an error with malformed basic authentication config."""
with config_context(
"adapters.http.basic_authentication",
{"user": "Aladdin", "pass": "OpenSesame"}, # Wrong keys
):
with pytest.raises(
KloppyError, match="Invalid basic authentication configuration"
):
open_as_file(httpserver.url_for("/auth.txt"))


class TestZipAdapter:
"""Tests for ZipAdapter."""
Expand All @@ -476,10 +532,8 @@ def zip_config(self, tmp_path):
z.write(tmp_path / "testfile.txt", arcname="other.txt")

# Set config for test
set_config("adapters.zip.fo", str(zip_path))
yield
# Reset config to avoid side effects on other tests
set_config("adapters.zip.fo", None)
with config_context("adapters.zip.fo", str(zip_path)):
yield

def test_expand_inputs(self):
"""It should be able to list the contents of a zip archive."""
Expand Down Expand Up @@ -547,9 +601,8 @@ def configure_kloppy_s3(self):
s3 = S3FileSystem(
anon=False, client_kwargs={"endpoint_url": self.endpoint_uri}
)
set_config("adapters.s3.s3fs", s3)
yield
set_config("adapters.s3.s3fs", None)
with config_context("adapters.s3.s3fs", s3):
yield

def test_expand_inputs(self):
"""It should be able to list the contents of an S3 bucket."""
Expand Down
Loading