Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 46 additions & 16 deletions rain_api_core/bucket_map.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,38 @@
from collections import defaultdict
from dataclasses import dataclass, field
from typing import Generator, Iterable, Optional, Sequence, Tuple
from typing import (
Dict,
Generator,
Iterable,
List,
Literal,
Optional,
Sequence,
Set,
Tuple,
Union,
)

# By default, buckets are accessible to any logged in users. This is
# represented by an empty set.
_DEFAULT_PERMISSION_FACTORY = set

AccessPermission = Union[Set[str], None, Literal[False]]
PublicAccess = None
NoAccess: Literal[False] = False


def _is_accessible(
required_groups: Optional[set],
required_groups: AccessPermission,
groups: Optional[Iterable[str]]
) -> bool:
# Check for public access
if required_groups is None:
if required_groups is PublicAccess:
return True

if required_groups is NoAccess:
return False

# At this point public access is not allowed
if groups is None:
return False
Expand All @@ -28,7 +46,7 @@ class BucketMapEntry():
bucket_path: str
object_key: str
headers: dict = field(default_factory=dict)
_access_control: Optional[dict] = None
_access_control: Optional[Dict[str, AccessPermission]] = None

def is_accessible(self, groups: Iterable[str] = None) -> bool:
"""Check if the object is accessible with the given permissions.
Expand All @@ -41,12 +59,16 @@ def is_accessible(self, groups: Iterable[str] = None) -> bool:
required_groups = self.get_required_groups()
return _is_accessible(required_groups, groups)

def get_required_groups(self) -> Optional[set]:
def get_required_groups(self) -> AccessPermission:
"""Get a set of permissions protecting this object.

It is sufficient to have one of the permissions in the set in order to
access the object. Returns None if the object has public access. An
empty set means any logged in user can access the object.
access the object. An empty set means any logged in user can access the
object.

:returns: Set[str] -- Can access the object if any permission matches
:returns: None -- The object has public access
:returns: False -- The object cannot be accessed by anyone
"""
if not self._access_control:
return _DEFAULT_PERMISSION_FACTORY()
Expand Down Expand Up @@ -168,7 +190,7 @@ def _walk_entries(node: dict, path=()) -> Generator[Tuple[str, tuple, Optional[d
"""A generator to recursively yield all leaves of a bucket map"""

for key, val in node.items():
if key in ("PUBLIC_BUCKETS", "PRIVATE_BUCKETS"):
if key in ("PUBLIC_BUCKETS", "PRIVATE_BUCKETS", "NOACCESS_BUCKETS"):
continue

path_parts = (*path, key)
Expand Down Expand Up @@ -207,23 +229,27 @@ def _parse_access_control(bucket_map: dict) -> dict:
"""
public_buckets = bucket_map.get("PUBLIC_BUCKETS", ())
private_buckets = bucket_map.get("PRIVATE_BUCKETS", {})
no_access_buckets = bucket_map.get("NOACCESS_BUCKETS", ())

try:
access_list = [(rule, None) for rule in public_buckets]
access_list: List[Tuple[str, AccessPermission]] = [
(rule, PublicAccess) for rule in public_buckets
]
except TypeError:
access_list = []
access_list.extend((rule, set(groups)) for rule, groups in private_buckets.items())
access_list.extend((rule, NoAccess) for rule in no_access_buckets)

# Relying on the fact that `sort` is stable. The order in which we add
# public/private rules to `access_list` is therefore important.
access_list.sort(key=lambda item: len(item[0]), reverse=True)

# Convert to dictionary for easier lookup on individual buckets
# We're relying on python's dictionary keys being insertion ordered
access = defaultdict(dict)
for (rule, obj) in access_list:
access: Dict[str, Dict[str, AccessPermission]] = defaultdict(dict)
for (rule, permission) in access_list:
bucket, *prefix = rule.split("/", 1)
access[bucket]["".join(prefix)] = obj
access[bucket]["".join(prefix)] = permission

# Set default permissions. We do this after the other rules have been added
# so that the default permission rule always comes last.
Expand Down Expand Up @@ -253,11 +279,13 @@ def _check_iam_compatible(access_control: dict):
continue

parent_access = access_rules[longest_prefix]
if access is None:
# Public access is always allowed
if access is PublicAccess or parent_access is NoAccess:
# Public access is always allowed.
# If the parent has no access, then it is impossible for the
# child to be more restrictive, so any permission is allowed.
continue

if parent_access is not None:
if parent_access is not PublicAccess:
if not access or parent_access and parent_access <= access:
continue

Expand Down Expand Up @@ -285,8 +313,10 @@ def _get_longest_prefix(key: str, prefixes: Iterable[str]) -> Optional[str]:


def _access_text(access) -> str:
if access is None:
if access is PublicAccess:
return "public access"
if access is NoAccess:
return "no access"
if access == set():
return "protected access"

Expand Down
134 changes: 128 additions & 6 deletions tests/test_bucket_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,26 @@ def sample_bucket_map():
"productX": "bucket",
"nested": {
"nested2a": {
"nested3": "nested-bucket-public"
"nested3": "nested-bucket-public",
},
"nested2b": "nested-bucket-private"
}
"nested2b": "nested-bucket-private",
},
"COMBINED": "combined-bucket",
},
"PUBLIC_BUCKETS": {
"browse-bucket": "General browse Imagery",
"bucket/browse": "ProductX Browse Imagery"
"bucket/browse": "ProductX Browse Imagery",
},
"PRIVATE_BUCKETS": {
"bucket/2020/12": ["science_team"],
"nested-bucket-private": []
}
"nested-bucket-private": [],
"combined-bucket/COLLECTION_1/": ["collection_1"],
"combined-bucket/COLLECTION_2/": ["collection_2"],
"combined-bucket/COLLECTION_3/": ["collection_3"],
},
"NOACCESS_BUCKETS": {
"combined-bucket": "Bucket with many collections",
},
}


Expand Down Expand Up @@ -335,6 +342,17 @@ def test_entries(sample_bucket_map):
object_key="",
_access_control={"": set()}
),
BucketMapEntry(
bucket="combined-bucket",
bucket_path="COMBINED",
object_key="",
_access_control={
"COLLECTION_1/": {"collection_1"},
"COLLECTION_2/": {"collection_2"},
"COLLECTION_3/": {"collection_3"},
"": False,
},
),
]


Expand Down Expand Up @@ -376,6 +394,17 @@ def test_check_bucket_access(sample_bucket_map):
assert b_map.get("productX/2020/12/obj1").is_accessible() is False
assert b_map.get("nested/nested2b/obj1").is_accessible() is False
assert b_map.get("nested/nested2b/obj1").is_accessible(groups=[]) is True
assert b_map.get("COMBINED/obj").is_accessible() is False
assert b_map.get("COMBINED/obj").is_accessible(groups=[]) is False
assert b_map.get("COMBINED/obj").is_accessible(
groups=["collection_1", "collection_2", "collection_3"],
) is False
assert b_map.get("COMBINED/COLLECTION_1/obj").is_accessible() is False
assert b_map.get("COMBINED/COLLECTION_1/obj").is_accessible(groups=["collection_1"]) is True
assert b_map.get("COMBINED/COLLECTION_2/obj").is_accessible() is False
assert b_map.get("COMBINED/COLLECTION_2/obj").is_accessible(groups=["collection_2"]) is True
assert b_map.get("COMBINED/COLLECTION_3/obj").is_accessible() is False
assert b_map.get("COMBINED/COLLECTION_3/obj").is_accessible(groups=["collection_3"]) is True


def test_check_bucket_access_conflicting():
Expand Down Expand Up @@ -558,6 +587,19 @@ def test_check_iam_compatible_nested_private_first():
},
iam_compatible=True
)
_ = BucketMap(
{
"PRIVATE_BUCKETS": {
"bucket/group_1/": ["group_1"],
"bucket/group_2/": ["group_2"],
"bucket/group_3/": ["group_3"],
},
"NOACCESS_BUCKETS": {
"bucket": "Bucket",
},
},
iam_compatible=True,
)


def test_check_iam_compatible_nested_protected_then_private():
Expand Down Expand Up @@ -620,6 +662,8 @@ def test_get_required_groups(sample_bucket_map):
assert b_map.get("productX/browse/obj1").get_required_groups() is None
assert b_map.get("productX/2020/12/obj1").get_required_groups() == {"science_team"}
assert b_map.get("productX/2020/23/obj2").get_required_groups() == set()
assert b_map.get("COMBINED/obj1").get_required_groups() is False
assert b_map.get("COMBINED/COLLECTION_1/obj1").get_required_groups() == {"collection_1"}


def test_to_iam_policy_empty():
Expand Down Expand Up @@ -837,6 +881,84 @@ def test_to_iam_policy_private_with_multiple_nested_private():
}


def test_to_iam_policy_noaccess_with_multiple_nested_private():
bucket_map = {
"PATH": "bucket-name",
"PRIVATE_BUCKETS": {
"bucket-name/closed1/": ["group_1"],
"bucket-name/closed2/": ["group_2"],
"bucket-name/closed3/": ["group_1", "group_2"],
"bucket-name/closed1/open1/": [],
"bucket-name/closed1/open2/": [],
"bucket-name/closed1/open3/": [],
},
"NOACCESS_BUCKETS": {
"bucket-name": "Private bucket",
},
}
b_map = BucketMap(bucket_map)

assert b_map.to_iam_policy(groups=()) == {
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": ["s3:GetObject"],
"Resource": [
"arn:aws:s3:::bucket-name/closed1/open1/*",
"arn:aws:s3:::bucket-name/closed1/open2/*",
"arn:aws:s3:::bucket-name/closed1/open3/*",
],
},
{
"Effect": "Allow",
"Action": ["s3:ListBucket"],
"Resource": [
"arn:aws:s3:::bucket-name",
],
"Condition": {
"StringLike": {
"s3:prefix": [
"closed1/open1/*",
"closed1/open2/*",
"closed1/open3/*",
],
},
},
},
],
}

assert b_map.to_iam_policy(groups=("group_1",)) == {
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": ["s3:GetObject"],
"Resource": [
"arn:aws:s3:::bucket-name/closed1/*",
"arn:aws:s3:::bucket-name/closed3/*",
],
},
{
"Effect": "Allow",
"Action": ["s3:ListBucket"],
"Resource": [
"arn:aws:s3:::bucket-name",
],
"Condition": {
"StringLike": {
"s3:prefix": [
"closed1/*",
"closed3/*",
],
},
},
},
],
}


def test_to_iam_policy_private_with_multiple_nested_protected_multiple_buckets():
bucket_map = {
"PATH1": "bucket1",
Expand Down