From 30b06dd70dbc0b9aa84d1fd554cbb83bad5eb2e9 Mon Sep 17 00:00:00 2001 From: Rohan Weeden Date: Tue, 8 Oct 2024 12:21:58 -0400 Subject: [PATCH 1/2] Refactor access control typing --- rain_api_core/bucket_map.py | 48 +++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/rain_api_core/bucket_map.py b/rain_api_core/bucket_map.py index 811d5d4..2af96ee 100644 --- a/rain_api_core/bucket_map.py +++ b/rain_api_core/bucket_map.py @@ -1,18 +1,31 @@ from collections import defaultdict from dataclasses import dataclass, field -from typing import Generator, Iterable, Optional, Sequence, Tuple +from typing import ( + Dict, + Generator, + Iterable, + List, + Optional, + Sequence, + Set, + Tuple, + Union, +) # By default, buckets are accessible to any logged in users. This is # represented by an empty set. _DEFAULT_PERMISSION_FACTORY = set +AccessPermission = Union[Set[str], None] +PublicAccess = None + def _is_accessible( - required_groups: Optional[set], + required_groups: AccessPermission, groups: Optional[Iterable[str]] ) -> bool: # Check for public access - if required_groups is None: + if required_groups is PublicAccess: return True # At this point public access is not allowed @@ -28,7 +41,7 @@ class BucketMapEntry(): bucket_path: str object_key: str headers: dict = field(default_factory=dict) - _access_control: Optional[dict] = None + _access_control: Optional[Dict[str, AccessPermission]] = None def is_accessible(self, groups: Iterable[str] = None) -> bool: """Check if the object is accessible with the given permissions. @@ -41,12 +54,15 @@ def is_accessible(self, groups: Iterable[str] = None) -> bool: required_groups = self.get_required_groups() return _is_accessible(required_groups, groups) - def get_required_groups(self) -> Optional[set]: + def get_required_groups(self) -> AccessPermission: """Get a set of permissions protecting this object. It is sufficient to have one of the permissions in the set in order to - access the object. Returns None if the object has public access. An - empty set means any logged in user can access the object. + access the object. An empty set means any logged in user can access the + object. + + :returns: Set[str] -- Can access the object if any permission matches + :returns: None -- The object has public access """ if not self._access_control: return _DEFAULT_PERMISSION_FACTORY() @@ -209,7 +225,9 @@ def _parse_access_control(bucket_map: dict) -> dict: private_buckets = bucket_map.get("PRIVATE_BUCKETS", {}) try: - access_list = [(rule, None) for rule in public_buckets] + access_list: List[Tuple[str, AccessPermission]] = [ + (rule, PublicAccess) for rule in public_buckets + ] except TypeError: access_list = [] access_list.extend((rule, set(groups)) for rule, groups in private_buckets.items()) @@ -220,10 +238,10 @@ def _parse_access_control(bucket_map: dict) -> dict: # Convert to dictionary for easier lookup on individual buckets # We're relying on python's dictionary keys being insertion ordered - access = defaultdict(dict) - for (rule, obj) in access_list: + access: Dict[str, Dict[str, AccessPermission]] = defaultdict(dict) + for (rule, permission) in access_list: bucket, *prefix = rule.split("/", 1) - access[bucket]["".join(prefix)] = obj + access[bucket]["".join(prefix)] = permission # Set default permissions. We do this after the other rules have been added # so that the default permission rule always comes last. @@ -253,11 +271,11 @@ def _check_iam_compatible(access_control: dict): continue parent_access = access_rules[longest_prefix] - if access is None: - # Public access is always allowed + if access is PublicAccess: + # Public access is always allowed. continue - if parent_access is not None: + if parent_access is not PublicAccess: if not access or parent_access and parent_access <= access: continue @@ -285,7 +303,7 @@ def _get_longest_prefix(key: str, prefixes: Iterable[str]) -> Optional[str]: def _access_text(access) -> str: - if access is None: + if access is PublicAccess: return "public access" if access == set(): return "protected access" From d8ed6b17c706555177c7d32898a0ffb35e04f33d Mon Sep 17 00:00:00 2001 From: Rohan Weeden Date: Tue, 8 Oct 2024 12:25:54 -0400 Subject: [PATCH 2/2] Implement NOACCESS_BUCKETS key This allows you to completely block access for some prefix which was previously impossible to do. This is useful for setting up an IAM compatible bucket with access for different prefixes, since the parent always needs to have equally or more restrictive permissions as the child prefixes. Setting the '' prefix to have no access, allows you to easily set whatever permission you want on any other prefix without causing IAM compatibility issues. --- rain_api_core/bucket_map.py | 18 ++++- tests/test_bucket_map.py | 134 ++++++++++++++++++++++++++++++++++-- 2 files changed, 143 insertions(+), 9 deletions(-) diff --git a/rain_api_core/bucket_map.py b/rain_api_core/bucket_map.py index 2af96ee..cea8995 100644 --- a/rain_api_core/bucket_map.py +++ b/rain_api_core/bucket_map.py @@ -5,6 +5,7 @@ Generator, Iterable, List, + Literal, Optional, Sequence, Set, @@ -16,8 +17,9 @@ # represented by an empty set. _DEFAULT_PERMISSION_FACTORY = set -AccessPermission = Union[Set[str], None] +AccessPermission = Union[Set[str], None, Literal[False]] PublicAccess = None +NoAccess: Literal[False] = False def _is_accessible( @@ -28,6 +30,9 @@ def _is_accessible( if required_groups is PublicAccess: return True + if required_groups is NoAccess: + return False + # At this point public access is not allowed if groups is None: return False @@ -63,6 +68,7 @@ def get_required_groups(self) -> AccessPermission: :returns: Set[str] -- Can access the object if any permission matches :returns: None -- The object has public access + :returns: False -- The object cannot be accessed by anyone """ if not self._access_control: return _DEFAULT_PERMISSION_FACTORY() @@ -184,7 +190,7 @@ def _walk_entries(node: dict, path=()) -> Generator[Tuple[str, tuple, Optional[d """A generator to recursively yield all leaves of a bucket map""" for key, val in node.items(): - if key in ("PUBLIC_BUCKETS", "PRIVATE_BUCKETS"): + if key in ("PUBLIC_BUCKETS", "PRIVATE_BUCKETS", "NOACCESS_BUCKETS"): continue path_parts = (*path, key) @@ -223,6 +229,7 @@ def _parse_access_control(bucket_map: dict) -> dict: """ public_buckets = bucket_map.get("PUBLIC_BUCKETS", ()) private_buckets = bucket_map.get("PRIVATE_BUCKETS", {}) + no_access_buckets = bucket_map.get("NOACCESS_BUCKETS", ()) try: access_list: List[Tuple[str, AccessPermission]] = [ @@ -231,6 +238,7 @@ def _parse_access_control(bucket_map: dict) -> dict: except TypeError: access_list = [] access_list.extend((rule, set(groups)) for rule, groups in private_buckets.items()) + access_list.extend((rule, NoAccess) for rule in no_access_buckets) # Relying on the fact that `sort` is stable. The order in which we add # public/private rules to `access_list` is therefore important. @@ -271,8 +279,10 @@ def _check_iam_compatible(access_control: dict): continue parent_access = access_rules[longest_prefix] - if access is PublicAccess: + if access is PublicAccess or parent_access is NoAccess: # Public access is always allowed. + # If the parent has no access, then it is impossible for the + # child to be more restrictive, so any permission is allowed. continue if parent_access is not PublicAccess: @@ -305,6 +315,8 @@ def _get_longest_prefix(key: str, prefixes: Iterable[str]) -> Optional[str]: def _access_text(access) -> str: if access is PublicAccess: return "public access" + if access is NoAccess: + return "no access" if access == set(): return "protected access" diff --git a/tests/test_bucket_map.py b/tests/test_bucket_map.py index c53a021..8554cbd 100644 --- a/tests/test_bucket_map.py +++ b/tests/test_bucket_map.py @@ -15,19 +15,26 @@ def sample_bucket_map(): "productX": "bucket", "nested": { "nested2a": { - "nested3": "nested-bucket-public" + "nested3": "nested-bucket-public", }, - "nested2b": "nested-bucket-private" - } + "nested2b": "nested-bucket-private", + }, + "COMBINED": "combined-bucket", }, "PUBLIC_BUCKETS": { "browse-bucket": "General browse Imagery", - "bucket/browse": "ProductX Browse Imagery" + "bucket/browse": "ProductX Browse Imagery", }, "PRIVATE_BUCKETS": { "bucket/2020/12": ["science_team"], - "nested-bucket-private": [] - } + "nested-bucket-private": [], + "combined-bucket/COLLECTION_1/": ["collection_1"], + "combined-bucket/COLLECTION_2/": ["collection_2"], + "combined-bucket/COLLECTION_3/": ["collection_3"], + }, + "NOACCESS_BUCKETS": { + "combined-bucket": "Bucket with many collections", + }, } @@ -335,6 +342,17 @@ def test_entries(sample_bucket_map): object_key="", _access_control={"": set()} ), + BucketMapEntry( + bucket="combined-bucket", + bucket_path="COMBINED", + object_key="", + _access_control={ + "COLLECTION_1/": {"collection_1"}, + "COLLECTION_2/": {"collection_2"}, + "COLLECTION_3/": {"collection_3"}, + "": False, + }, + ), ] @@ -376,6 +394,17 @@ def test_check_bucket_access(sample_bucket_map): assert b_map.get("productX/2020/12/obj1").is_accessible() is False assert b_map.get("nested/nested2b/obj1").is_accessible() is False assert b_map.get("nested/nested2b/obj1").is_accessible(groups=[]) is True + assert b_map.get("COMBINED/obj").is_accessible() is False + assert b_map.get("COMBINED/obj").is_accessible(groups=[]) is False + assert b_map.get("COMBINED/obj").is_accessible( + groups=["collection_1", "collection_2", "collection_3"], + ) is False + assert b_map.get("COMBINED/COLLECTION_1/obj").is_accessible() is False + assert b_map.get("COMBINED/COLLECTION_1/obj").is_accessible(groups=["collection_1"]) is True + assert b_map.get("COMBINED/COLLECTION_2/obj").is_accessible() is False + assert b_map.get("COMBINED/COLLECTION_2/obj").is_accessible(groups=["collection_2"]) is True + assert b_map.get("COMBINED/COLLECTION_3/obj").is_accessible() is False + assert b_map.get("COMBINED/COLLECTION_3/obj").is_accessible(groups=["collection_3"]) is True def test_check_bucket_access_conflicting(): @@ -558,6 +587,19 @@ def test_check_iam_compatible_nested_private_first(): }, iam_compatible=True ) + _ = BucketMap( + { + "PRIVATE_BUCKETS": { + "bucket/group_1/": ["group_1"], + "bucket/group_2/": ["group_2"], + "bucket/group_3/": ["group_3"], + }, + "NOACCESS_BUCKETS": { + "bucket": "Bucket", + }, + }, + iam_compatible=True, + ) def test_check_iam_compatible_nested_protected_then_private(): @@ -620,6 +662,8 @@ def test_get_required_groups(sample_bucket_map): assert b_map.get("productX/browse/obj1").get_required_groups() is None assert b_map.get("productX/2020/12/obj1").get_required_groups() == {"science_team"} assert b_map.get("productX/2020/23/obj2").get_required_groups() == set() + assert b_map.get("COMBINED/obj1").get_required_groups() is False + assert b_map.get("COMBINED/COLLECTION_1/obj1").get_required_groups() == {"collection_1"} def test_to_iam_policy_empty(): @@ -837,6 +881,84 @@ def test_to_iam_policy_private_with_multiple_nested_private(): } +def test_to_iam_policy_noaccess_with_multiple_nested_private(): + bucket_map = { + "PATH": "bucket-name", + "PRIVATE_BUCKETS": { + "bucket-name/closed1/": ["group_1"], + "bucket-name/closed2/": ["group_2"], + "bucket-name/closed3/": ["group_1", "group_2"], + "bucket-name/closed1/open1/": [], + "bucket-name/closed1/open2/": [], + "bucket-name/closed1/open3/": [], + }, + "NOACCESS_BUCKETS": { + "bucket-name": "Private bucket", + }, + } + b_map = BucketMap(bucket_map) + + assert b_map.to_iam_policy(groups=()) == { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": ["s3:GetObject"], + "Resource": [ + "arn:aws:s3:::bucket-name/closed1/open1/*", + "arn:aws:s3:::bucket-name/closed1/open2/*", + "arn:aws:s3:::bucket-name/closed1/open3/*", + ], + }, + { + "Effect": "Allow", + "Action": ["s3:ListBucket"], + "Resource": [ + "arn:aws:s3:::bucket-name", + ], + "Condition": { + "StringLike": { + "s3:prefix": [ + "closed1/open1/*", + "closed1/open2/*", + "closed1/open3/*", + ], + }, + }, + }, + ], + } + + assert b_map.to_iam_policy(groups=("group_1",)) == { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": ["s3:GetObject"], + "Resource": [ + "arn:aws:s3:::bucket-name/closed1/*", + "arn:aws:s3:::bucket-name/closed3/*", + ], + }, + { + "Effect": "Allow", + "Action": ["s3:ListBucket"], + "Resource": [ + "arn:aws:s3:::bucket-name", + ], + "Condition": { + "StringLike": { + "s3:prefix": [ + "closed1/*", + "closed3/*", + ], + }, + }, + }, + ], + } + + def test_to_iam_policy_private_with_multiple_nested_protected_multiple_buckets(): bucket_map = { "PATH1": "bucket1",