From 8750f6323c67390cd6a19f23f9b2df6f8e057054 Mon Sep 17 00:00:00 2001 From: Tejas Rajopadhye Date: Thu, 26 Dec 2024 15:21:03 -0600 Subject: [PATCH 01/26] Notification Improvements Stage 1, 70% --- .../catalog/tasks/catalog_indexer_task.py | 6 + .../services/admin_notifications.py | 22 ++++ .../ses_email_notification_service.py | 29 +++++ .../omics/tasks/omics_workflows_fetcher.py | 92 +++++++------- .../redshift_table_share_processor.py | 7 +- .../s3_datasets/tasks/tables_syncer.py | 106 +++++++++------- .../glue_table_share_processor.py | 6 +- .../s3_access_point_share_processor.py | 6 +- .../s3_bucket_share_processor.py | 6 +- .../services/share_notification_service.py | 118 +++++++++++++----- .../services/share_processor_manager.py | 2 +- .../shares_base/services/sharing_service.py | 52 +++++++- .../tasks/persistent_email_reminders_task.py | 37 +++--- .../tasks/share_expiration_task.py | 106 +++++++++------- .../shares_base/tasks/share_manager_task.py | 6 + .../shares_base/tasks/share_reapplier_task.py | 96 ++++++++------ .../shares_base/tasks/share_verifier_task.py | 44 +++++-- deploy/stacks/container.py | 16 +-- .../test_redshift_table_processor.py | 20 +-- 19 files changed, 518 insertions(+), 259 deletions(-) create mode 100644 backend/dataall/modules/notifications/services/admin_notifications.py diff --git a/backend/dataall/modules/catalog/tasks/catalog_indexer_task.py b/backend/dataall/modules/catalog/tasks/catalog_indexer_task.py index 807b712a3..4183c4bc5 100644 --- a/backend/dataall/modules/catalog/tasks/catalog_indexer_task.py +++ b/backend/dataall/modules/catalog/tasks/catalog_indexer_task.py @@ -8,6 +8,7 @@ from dataall.base.db import get_engine from dataall.base.loader import load_modules, ImportMode from dataall.base.utils.alarm_service import AlarmService +from dataall.modules.notifications.services.admin_notifications import AdminNotificationService log = logging.getLogger(__name__) @@ -32,6 +33,11 @@ def index_objects(cls, engine, with_deletes='False'): return len(indexed_object_uris) except Exception as e: AlarmService().trigger_catalog_indexing_failure_alarm(error=str(e)) + AdminNotificationService().notify_admins_with_error_log( + process_error='Exception occurred during cataloging task', + error_logs=[str(e)], + process_name='Catalog Task' + ) raise e @classmethod diff --git a/backend/dataall/modules/notifications/services/admin_notifications.py b/backend/dataall/modules/notifications/services/admin_notifications.py new file mode 100644 index 000000000..02b15e179 --- /dev/null +++ b/backend/dataall/modules/notifications/services/admin_notifications.py @@ -0,0 +1,22 @@ +from typing import List + +from dataall.modules.notifications.services.ses_email_notification_service import SESEmailNotificationService + + +class AdminNotificationService: + admin_group = 'DAAdministrators' + + def notify_admins_with_error_log(self, process_error: str, error_logs: List[str], process_name:str = ''): + + subject = f'Data.all alert | Attention Required | Failure in : {process_name}' + email_message = f""" + Following error occurred when , {process_error}

+ """ + for error_log in error_logs: + email_message += error_log + "

" + + SESEmailNotificationService.create_and_send_email_notifications( + subject=subject, + msg=email_message, + recipient_groups_list=[AdminNotificationService.admin_group] + ) \ No newline at end of file diff --git a/backend/dataall/modules/notifications/services/ses_email_notification_service.py b/backend/dataall/modules/notifications/services/ses_email_notification_service.py index 29edfce64..000c0d2da 100644 --- a/backend/dataall/modules/notifications/services/ses_email_notification_service.py +++ b/backend/dataall/modules/notifications/services/ses_email_notification_service.py @@ -3,6 +3,7 @@ from dataall.base.aws.cognito import Cognito from dataall.base.aws.ses import Ses +from dataall.base.config import config from dataall.base.services.service_provider_factory import ServiceProviderFactory from dataall.modules.notifications.services.base_email_notification_service import BaseEmailNotificationService @@ -60,3 +61,31 @@ def send_email_to_users(email_list, email_provider, message, subject): # https://aws.amazon.com/blogs/messaging-and-targeting/how-to-send-messages-to-multiple-recipients-with-amazon-simple-email-service-ses/ for emailId in email_list: email_provider.send_email([emailId], message, subject) + + @staticmethod + def create_and_send_email_notifications(subject, msg, recipient_groups_list=None, recipient_email_ids=None): + """ + Method to directly send email notification instead of creating an SQS Task + This approach is used while sending email notifications in an ECS task ( e.g. persistent email reminder task, share expiration task, etc ) + Emails send to groups mentioned in recipient_groups_list and / or emails mentioned in recipient_email_ids + """ + if recipient_groups_list is None: + recipient_groups_list = [] + if recipient_email_ids is None: + recipient_email_ids = [] + + share_notification_config = config.get_property( + 'modules.datasets_base.features.share_notifications', default=None + ) + if share_notification_config: + for share_notification_config_type in share_notification_config.keys(): + n_config = share_notification_config[share_notification_config_type] + if n_config.get('active', False) == True: + if share_notification_config_type == 'email': + SESEmailNotificationService.send_email_task( + subject, msg, recipient_groups_list, recipient_email_ids + ) + else: + log.info(f'Notification type : {share_notification_config_type} is not active') + else: + log.info('Notifications are not active') \ No newline at end of file diff --git a/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py b/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py index d20d69cfc..40c52acd4 100644 --- a/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py +++ b/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py @@ -5,6 +5,7 @@ from dataall.core.environment.db.environment_models import Environment from dataall.base.db import get_engine +from dataall.modules.notifications.services.admin_notifications import AdminNotificationService from dataall.modules.omics.aws.omics_client import OmicsClient from dataall.modules.omics.db.omics_models import OmicsWorkflow from dataall.modules.omics.api.enums import OmicsWorkflowType @@ -16,49 +17,56 @@ def fetch_omics_workflows(engine): """List Omics workflows.""" - log.info('Starting omics workflows fetcher') - with engine.scoped_session() as session: - environments = OmicsRepository(session).list_environments_with_omics_enabled() - # designed for ready2run and private workflows; when private workflow support is - # introduced, we will need go over all environments - if len(environments) == 0: - log.info('No environments found. Nothing to do.') - return True - env = environments[0] - ready_workflows = OmicsClient(awsAccountId=env.AwsAccountId, region=env.region).list_workflows( - type=OmicsWorkflowType.READY2RUN.value - ) - # Removing private workflows until fully supported after initial launch - # private_workflows = OmicsClient.list_workflows(awsAccountId=env.AwsAccountId, region=env.region, type=OmicsWorkflowType.PRIVATE.value) - workflows = ready_workflows # + private_workflows - log.info(f'Found workflows {str(workflows)} in environment {env.environmentUri}') - for workflow in workflows: - log.info(f"Processing workflow name={workflow['name']}, id={workflow['id']}...") - existing_workflow = OmicsRepository(session).get_workflow_by_id(workflow['id']) - if existing_workflow is not None: - log.info( - f"Workflow name={workflow['name']}, id={workflow['id']} has already been registered in database. Updating information..." - ) - existing_workflow.name = workflow['name'] - existing_workflow.label = workflow['name'] - session.commit() - - else: - log.info( - f"Workflow name={workflow['name']} , id={workflow['id']} in environment {env.environmentUri} is new. Registering..." - ) - omicsWorkflow = OmicsWorkflow( - id=workflow['id'], - name=workflow['name'], - arn=workflow['arn'], - type=workflow['type'], - environmentUri=env.environmentUri, - label=workflow['name'], - owner=env.environmentUri, - ) - OmicsRepository(session).save_omics_workflow(omicsWorkflow) - return True + try: + log.info('Starting omics workflows fetcher') + with engine.scoped_session() as session: + environments = OmicsRepository(session).list_environments_with_omics_enabled() + # designed for ready2run and private workflows; when private workflow support is + # introduced, we will need go over all environments + if len(environments) == 0: + log.info('No environments found. Nothing to do.') + return True + env = environments[0] + ready_workflows = OmicsClient(awsAccountId=env.AwsAccountId, region=env.region).list_workflows( + type=OmicsWorkflowType.READY2RUN.value + ) + # Removing private workflows until fully supported after initial launch + # private_workflows = OmicsClient.list_workflows(awsAccountId=env.AwsAccountId, region=env.region, type=OmicsWorkflowType.PRIVATE.value) + workflows = ready_workflows # + private_workflows + log.info(f'Found workflows {str(workflows)} in environment {env.environmentUri}') + for workflow in workflows: + log.info(f"Processing workflow name={workflow['name']}, id={workflow['id']}...") + existing_workflow = OmicsRepository(session).get_workflow_by_id(workflow['id']) + if existing_workflow is not None: + log.info( + f"Workflow name={workflow['name']}, id={workflow['id']} has already been registered in database. Updating information..." + ) + existing_workflow.name = workflow['name'] + existing_workflow.label = workflow['name'] + session.commit() + else: + log.info( + f"Workflow name={workflow['name']} , id={workflow['id']} in environment {env.environmentUri} is new. Registering..." + ) + omicsWorkflow = OmicsWorkflow( + id=workflow['id'], + name=workflow['name'], + arn=workflow['arn'], + type=workflow['type'], + environmentUri=env.environmentUri, + label=workflow['name'], + owner=env.environmentUri, + ) + OmicsRepository(session).save_omics_workflow(omicsWorkflow) + return True + except Exception as e: + log.error(f'Error occured while processing omics workflow task due to: {e}') + AdminNotificationService().notify_admins_with_error_log( + process_error='Error occured while processing omics workflow task', + error_logs=[str(e)], + process_name='Omics Workflow' + ) if __name__ == '__main__': ENVNAME = os.environ.get('envname', 'local') diff --git a/backend/dataall/modules/redshift_datasets_shares/services/redshift_table_share_processor.py b/backend/dataall/modules/redshift_datasets_shares/services/redshift_table_share_processor.py index ec9a05932..6c49c3dee 100644 --- a/backend/dataall/modules/redshift_datasets_shares/services/redshift_table_share_processor.py +++ b/backend/dataall/modules/redshift_datasets_shares/services/redshift_table_share_processor.py @@ -441,7 +441,7 @@ def process_revoked_shares(self) -> bool: return False return success - def verify_shares(self) -> bool: + def verify_shares_health_status(self) -> bool: """ 1) (in source namespace) Check the datashare exists 2) (in source namespace) Check that schema is added to datashare @@ -459,7 +459,7 @@ def verify_shares(self) -> bool: 9) (in target namespace) Check that the redshift role has select access to the requested table in the local db. 10) (in target namespace) Check that the redshift role has select access to the requested table in the external schema. """ - + share_object_item_health_status = True log.info('##### Verifying Redshift tables #######') if not self.tables: log.info('No tables to verify. Skipping...') @@ -599,11 +599,12 @@ def verify_shares(self) -> bool: ' | '.join(ds_level_errors) + ' | ' + ' | '.join(tbl_level_errors), datetime.now(), ) + share_object_item_health_status = False else: ShareStatusRepository.update_share_item_health_status( self.session, share_item, ShareItemHealthStatus.Healthy.value, None, datetime.now() ) - return True + return share_object_item_health_status def cleanup_shares(self) -> bool: """ diff --git a/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py b/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py index 9bd47dd3c..fea3edcde 100644 --- a/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py +++ b/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py @@ -7,6 +7,7 @@ from dataall.core.environment.db.environment_models import Environment, EnvironmentGroup from dataall.core.environment.services.environment_service import EnvironmentService from dataall.base.db import get_engine +from dataall.modules.notifications.services.admin_notifications import AdminNotificationService from dataall.modules.s3_datasets.aws.glue_dataset_client import DatasetCrawler from dataall.modules.s3_datasets.aws.lf_table_client import LakeFormationTableClient from dataall.modules.s3_datasets.services.dataset_table_service import DatasetTableService @@ -20,60 +21,75 @@ def sync_tables(engine): - with engine.scoped_session() as session: - processed_tables = [] - all_datasets: [S3Dataset] = DatasetRepository.list_all_active_datasets(session) - log.info(f'Found {len(all_datasets)} datasets for tables sync') - dataset: S3Dataset - for dataset in all_datasets: - log.info(f'Synchronizing dataset {dataset.name}|{dataset.datasetUri} tables') - env: Environment = ( - session.query(Environment) - .filter( - and_( - Environment.environmentUri == dataset.environmentUri, - Environment.deleted.is_(None), + task_exceptions = [] + try: + with engine.scoped_session() as session: + processed_tables = [] + all_datasets: [S3Dataset] = DatasetRepository.list_all_active_datasets(session) + log.info(f'Found {len(all_datasets)} datasets for tables sync') + dataset: S3Dataset + for dataset in all_datasets: + log.info(f'Synchronizing dataset {dataset.name}|{dataset.datasetUri} tables') + env: Environment = ( + session.query(Environment) + .filter( + and_( + Environment.environmentUri == dataset.environmentUri, + Environment.deleted.is_(None), + ) ) + .first() + ) + env_group: EnvironmentGroup = EnvironmentService.get_environment_group( + session, dataset.SamlAdminGroupName, env.environmentUri ) - .first() - ) - env_group: EnvironmentGroup = EnvironmentService.get_environment_group( - session, dataset.SamlAdminGroupName, env.environmentUri - ) - try: - if not env or not is_assumable_pivot_role(env): - log.info(f'Dataset {dataset.GlueDatabaseName} has an invalid environment') - else: - tables = DatasetCrawler(dataset).list_glue_database_tables(dataset.S3BucketName) + try: + if not env or not is_assumable_pivot_role(env): + log.info(f'Dataset {dataset.GlueDatabaseName} has an invalid environment') + else: + tables = DatasetCrawler(dataset).list_glue_database_tables(dataset.S3BucketName) - log.info(f'Found {len(tables)} tables on Glue database {dataset.GlueDatabaseName}') + log.info(f'Found {len(tables)} tables on Glue database {dataset.GlueDatabaseName}') - DatasetTableService.sync_existing_tables(session, uri=dataset.datasetUri, glue_tables=tables) + DatasetTableService.sync_existing_tables(session, uri=dataset.datasetUri, glue_tables=tables) - tables = session.query(DatasetTable).filter(DatasetTable.datasetUri == dataset.datasetUri).all() + tables = session.query(DatasetTable).filter(DatasetTable.datasetUri == dataset.datasetUri).all() - log.info('Updating tables permissions on Lake Formation...') + log.info('Updating tables permissions on Lake Formation...') - for table in tables: - LakeFormationTableClient(table).grant_principals_all_table_permissions( - principals=[ - SessionHelper.get_delegation_role_arn(env.AwsAccountId, env.region), - env_group.environmentIAMRoleArn, - ], - ) + for table in tables: + LakeFormationTableClient(table).grant_principals_all_table_permissions( + principals=[ + SessionHelper.get_delegation_role_arn(env.AwsAccountId, env.region), + env_group.environmentIAMRoleArn, + ], + ) - processed_tables.extend(tables) + processed_tables.extend(tables) - DatasetTableIndexer.upsert_all(session, dataset_uri=dataset.datasetUri) - DatasetIndexer.upsert(session=session, dataset_uri=dataset.datasetUri) - except Exception as e: - log.error( - f'Failed to sync tables for dataset ' - f'{dataset.AwsAccountId}/{dataset.GlueDatabaseName} ' - f'due to: {e}' - ) - DatasetAlarmService().trigger_dataset_sync_failure_alarm(dataset, str(e)) - return processed_tables + DatasetTableIndexer.upsert_all(session, dataset_uri=dataset.datasetUri) + DatasetIndexer.upsert(session=session, dataset_uri=dataset.datasetUri) + except Exception as e: + log.error( + f'Failed to sync tables for dataset ' + f'{dataset.AwsAccountId}/{dataset.GlueDatabaseName} ' + f'due to: {e}' + ) + DatasetAlarmService().trigger_dataset_sync_failure_alarm(dataset, str(e)) + task_exceptions.append(str(e)) + return processed_tables + except Exception as e: + log.error( + f'Error while running table syncer task due to: {e}' + ) + task_exceptions.append(str(e)) + finally: + if len(task_exceptions) > 0: + AdminNotificationService().notify_admins_with_error_log( + process_name='Table Syncer', + error_logs=task_exceptions, + process_error='Error while running table syncer task' + ) def is_assumable_pivot_role(env: Environment): diff --git a/backend/dataall/modules/s3_datasets_shares/services/share_processors/glue_table_share_processor.py b/backend/dataall/modules/s3_datasets_shares/services/share_processors/glue_table_share_processor.py index 97282b574..359666d82 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/share_processors/glue_table_share_processor.py +++ b/backend/dataall/modules/s3_datasets_shares/services/share_processors/glue_table_share_processor.py @@ -353,8 +353,9 @@ def process_revoked_shares(self) -> bool: success = False return success - def verify_shares(self) -> bool: + def verify_shares_health_status(self) -> bool: log.info('##### Verifying tables #######') + share_object_item_health_status = True if not self.tables: log.info('No tables to verify. Skipping...') else: @@ -430,11 +431,12 @@ def verify_shares(self) -> bool: ' | '.join(manager.db_level_errors) + ' | ' + ' | '.join(manager.tbl_level_errors), datetime.now(), ) + share_object_item_health_status = False else: ShareStatusRepository.update_share_item_health_status( self.session, share_item, ShareItemHealthStatus.Healthy.value, None, datetime.now() ) - return True + return share_object_item_health_status def cleanup_shares(self) -> bool: """ diff --git a/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_access_point_share_processor.py b/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_access_point_share_processor.py index d522bb79d..41b214490 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_access_point_share_processor.py +++ b/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_access_point_share_processor.py @@ -172,7 +172,8 @@ def process_revoked_shares(self) -> bool: return success - def verify_shares(self) -> bool: + def verify_shares_health_status(self) -> bool: + share_object_item_health_status = True log.info('##### Verifying folders shares #######') if not self.folders: log.info('No Folders to verify. Skipping...') @@ -208,11 +209,12 @@ def verify_shares(self) -> bool: ' | '.join(manager.folder_errors), datetime.now(), ) + share_object_item_health_status = False else: ShareStatusRepository.update_share_item_health_status( self.session, sharing_item, ShareItemHealthStatus.Healthy.value, None, datetime.now() ) - return True + return share_object_item_health_status def cleanup_shares(self) -> bool: """ diff --git a/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_bucket_share_processor.py b/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_bucket_share_processor.py index 447988156..ef968ed5d 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_bucket_share_processor.py +++ b/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_bucket_share_processor.py @@ -158,7 +158,8 @@ def process_revoked_shares(self) -> bool: return success - def verify_shares(self) -> bool: + def verify_shares_health_status(self) -> bool: + share_object_item_health_status = True log.info('##### Verifying S3 bucket share #######') if not self.buckets: log.info('No Buckets to verify. Skipping...') @@ -192,11 +193,12 @@ def verify_shares(self) -> bool: ' | '.join(manager.bucket_errors), datetime.now(), ) + share_object_item_health_status = False else: ShareStatusRepository.update_share_item_health_status( self.session, sharing_item, ShareItemHealthStatus.Healthy.value, None, datetime.now() ) - return True + return share_object_item_health_status def cleanup_shares(self) -> bool: """ diff --git a/backend/dataall/modules/shares_base/services/share_notification_service.py b/backend/dataall/modules/shares_base/services/share_notification_service.py index f8e664883..f03317bb9 100644 --- a/backend/dataall/modules/shares_base/services/share_notification_service.py +++ b/backend/dataall/modules/shares_base/services/share_notification_service.py @@ -1,6 +1,7 @@ import logging import enum import os +from typing import List from dataall.base.config import config from dataall.core.tasks.db.task_models import Task @@ -24,6 +25,9 @@ class DataSharingNotificationType(enum.Enum): SHARE_OBJECT_EXTENSION_REJECTED = 'SHARE_OBJECT_EXTENSION_REJECTED' SHARE_OBJECT_REJECTED = 'SHARE_OBJECT_REJECTED' SHARE_OBJECT_PENDING_APPROVAL = 'SHARE_OBJECT_PENDING_APPROVAL' + SHARE_OBJECT_FAILED = 'SHARE_OBJECT_FAILED' + SHARE_OBJECT_UNHEALTHY = 'SHARE_OBJECT_UNHEALTHY' + SHARE_OBJECT_HEALTHY = 'SHARE_OBJECT_HEALTHY' DATASET_VERSION = 'DATASET_VERSION' @@ -102,7 +106,7 @@ def notify_persistent_email_reminder(self, email_id: str): msg=msg_intro.replace('
', '').replace('', '').replace('', ''), ) - self._create_and_send_email_notifications( + SESEmailNotificationService.create_and_send_email_notifications( subject=subject, msg=email_notification_msg, recipient_groups_list=[self.dataset.SamlAdminGroupName, self.dataset.stewards], @@ -219,7 +223,7 @@ def notify_share_expiration_to_owners(self): msg=msg_intro.replace('
', '').replace('', '').replace('', ''), ) - self._create_and_send_email_notifications( + SESEmailNotificationService.create_and_send_email_notifications( subject=subject, msg=email_notification_msg, recipient_groups_list=[self.dataset.SamlAdminGroupName, self.dataset.stewards], @@ -253,11 +257,84 @@ def notify_share_expiration_to_requesters(self): msg=msg_intro.replace('
', '').replace('', '').replace('', ''), ) - self._create_and_send_email_notifications( + SESEmailNotificationService.create_and_send_email_notifications( subject=subject, msg=email_notification_msg, recipient_groups_list=[self.share.groupUri] ) return notifications + def notify_share_object_failed(self): + share_link_text = '' + if os.environ.get('frontend_domain_url'): + share_link_text = ( + f'

Please visit data.all share link ' + f'to take action or view more details' + ) + msg = ( + f'Share request made for dataset: {self.dataset.label} with requestor principal: {self.share.principalRoleName} failed.

' + f'You can delete and resubmit the failed items in the share. If your share item still remains in the Share_Failed state then please get in touch with data.all admins.' + ) + subject = f'Data.all | Attention Required | Share failed for {self.dataset.label}' + email_notification_msg = msg + share_link_text + + notifications = self.register_notifications( + notification_type=DataSharingNotificationType.SHARE_OBJECT_FAILED.value, msg=msg + ) + + SESEmailNotificationService.create_and_send_email_notifications(subject=subject, msg=email_notification_msg, recipient_groups_list=[self.share.groupUri, self.dataset.SamlAdminGroupName, self.dataset.stewards]) + + return notifications + + # Send notification when any of the share item gets into unhealthy state + def notify_share_object_items_unhealthy(self): + share_link_text = '' + if os.environ.get('frontend_domain_url'): + share_link_text = ( + f'

Please visit data.all share link ' + f'to take action or view more details' + ) + msg = ( + f'Hello Team,
' + f'Your share with share uri: {self.share.shareUri} has one or more unhealthy share items.

' + f'Once you visit your share link you can click on the Reapply button and this should correct your share and get it into an healthy state. If this doesn\'t get your share in healthy state then please get in touch with data.all admins for your share.' + f'
If you are using any terraform / cloudformation or any other IaC to also manage your bucket policy, kms policy and requestor IAM role, please make them aware of the data.all changes so that they don\'t wipe off data.all related policies' + ) + subject = f'Data.all | Attention Required | Share for {self.dataset.label} dataset in unhealthy state' + email_notification_msg = msg + share_link_text + + notifications = self.register_notifications( + notification_type=DataSharingNotificationType.SHARE_OBJECT_UNHEALTHY.value, msg=msg, to_recipients=[self.share.groupUri] + ) + + SESEmailNotificationService.create_and_send_email_notifications(subject=subject, msg=email_notification_msg, recipient_groups_list=[self.share.groupUri]) + + return notifications + + # Send notifications when a share gets into healthy state. + # These notifications are sent when a share which was initially unhealthy goes into healthy state + def notify_share_object_items_healthy(self): + share_link_text = '' + if os.environ.get('frontend_domain_url'): + share_link_text = ( + f'

Please visit data.all share link ' + f'to take action or view more details' + ) + msg = ( + f'Hello Team,
' + f'Your share with share uri: {self.share.shareUri} is in healthy state
' + ) + subject = f'Data.all | Share for {self.dataset.label} dataset now in healthy state' + email_notification_msg = msg + share_link_text + + notifications = self.register_notifications( + notification_type=DataSharingNotificationType.SHARE_OBJECT_HEALTHY.value, msg=msg, to_recipients=[self.share.groupUri] + ) + + SESEmailNotificationService.create_and_send_email_notifications(subject=subject, msg=email_notification_msg, recipient_groups_list=[self.share.groupUri]) + return notifications + def _get_share_object_targeted_users(self): targeted_users = list() targeted_users.append(self.dataset.SamlAdminGroupName) @@ -266,15 +343,20 @@ def _get_share_object_targeted_users(self): targeted_users.append(self.share.groupUri) return targeted_users - def register_notifications(self, notification_type, msg): + def register_notifications(self, notification_type, msg, to_recipients: List[any] = None): """ Notifications sent to: + if to_recipients is None - dataset.SamlAdminGroupName - dataset.stewards - share.groupUri + else + - to_recipients """ + if not to_recipients: + to_recipients = self.notification_target_users notifications = [] - for recipient in self.notification_target_users: + for recipient in to_recipients: log.info(f'Creating notification for {recipient}, msg {msg}') notifications.append( NotificationRepository.create_notification( @@ -333,29 +415,3 @@ def _create_notification_task(self, subject, msg): else: log.info('Notifications are not active') - def _create_and_send_email_notifications(self, subject, msg, recipient_groups_list=None, recipient_email_ids=None): - """ - Method to directly send email notification instead of creating an SQS Task - This approach is used while sending email notifications in an ECS task ( e.g. persistent email reminder task, share expiration task, etc ) - Emails send to groups mentioned in recipient_groups_list and / or emails mentioned in recipient_email_ids - """ - if recipient_groups_list is None: - recipient_groups_list = [] - if recipient_email_ids is None: - recipient_email_ids = [] - - share_notification_config = config.get_property( - 'modules.datasets_base.features.share_notifications', default=None - ) - if share_notification_config: - for share_notification_config_type in share_notification_config.keys(): - n_config = share_notification_config[share_notification_config_type] - if n_config.get('active', False) == True: - if share_notification_config_type == 'email': - SESEmailNotificationService.send_email_task( - subject, msg, recipient_groups_list, recipient_email_ids - ) - else: - log.info(f'Notification type : {share_notification_config_type} is not active') - else: - log.info('Notifications are not active') diff --git a/backend/dataall/modules/shares_base/services/share_processor_manager.py b/backend/dataall/modules/shares_base/services/share_processor_manager.py index e16637e56..4c989c9aa 100644 --- a/backend/dataall/modules/shares_base/services/share_processor_manager.py +++ b/backend/dataall/modules/shares_base/services/share_processor_manager.py @@ -19,7 +19,7 @@ def process_revoked_shares(self) -> bool: ... @abstractmethod - def verify_shares(self) -> bool: + def verify_shares_health_status(self) -> bool: """Executes a series of actions to verify share items using the share manager. Returns True if the verifying was successful""" ... diff --git a/backend/dataall/modules/shares_base/services/sharing_service.py b/backend/dataall/modules/shares_base/services/sharing_service.py index dd8749bb5..dc1711a21 100644 --- a/backend/dataall/modules/shares_base/services/sharing_service.py +++ b/backend/dataall/modules/shares_base/services/sharing_service.py @@ -1,14 +1,17 @@ import logging from dataclasses import dataclass -from typing import Any +from typing import Any, List + from dataall.core.resource_lock.db.resource_lock_repositories import ResourceLockRepository from dataall.base.db import Engine from dataall.core.environment.db.environment_models import ConsumptionRole, Environment, EnvironmentGroup +from dataall.modules.notifications.services.admin_notifications import AdminNotificationService from dataall.modules.shares_base.db.share_object_state_machines import ( ShareObjectSM, ShareItemSM, ) +from dataall.modules.shares_base.services.share_notification_service import ShareNotificationService from dataall.modules.shares_base.services.shares_enums import ( ShareItemHealthStatus, ShareObjectActions, @@ -127,11 +130,16 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: log.exception('Error occurred during share approval') new_share_item_state = share_item_sm.run_transition(ShareItemActions.Failure.value) share_item_sm.update_state(session, share_data.share.shareUri, new_share_item_state) - return False + share_successful = False finally: new_share_state = share_object_sm.run_transition(ShareObjectActions.Finish.value) share_object_sm.update_state(session, share_data.share, new_share_state) + if not share_successful: + # Create UI and email notifications + ShareNotificationService(session=session, dataset=share_data.dataset, + share=share_data.share).notify_share_object_failed() + return share_successful @classmethod def revoke_share(cls, engine: Engine, share_uri: str) -> bool: @@ -224,7 +232,7 @@ def revoke_share(cls, engine: Engine, share_uri: str) -> bool: log.error(f'Error occurred during share revoking: {e}') new_share_item_state = share_item_sm.run_transition(ShareItemActions.Failure.value) share_item_sm.update_state(session, share_data.share.shareUri, new_share_item_state) - return False + revoke_successful = False finally: existing_pending_items = ShareStatusRepository.check_pending_share_items(session, share_uri) @@ -233,6 +241,11 @@ def revoke_share(cls, engine: Engine, share_uri: str) -> bool: else: new_share_state = share_sm.run_transition(ShareObjectActions.Finish.value) share_sm.update_state(session, share_data.share, new_share_state) + if not revoke_successful: + # Create UI and email notifications + ShareNotificationService(session=session, dataset=share_data.dataset, + share=share_data.share).notify_share_object_failed() + return revoke_successful @classmethod def verify_share( @@ -255,6 +268,7 @@ def verify_share( ------- """ with engine.scoped_session() as session: + share_object_item_health_status: List = [] share_data, share_items = cls._get_share_data_and_items(session, share_uri, status, healthStatus) for type, processor in ShareProcessorManager.SHARING_PROCESSORS.items(): try: @@ -268,12 +282,19 @@ def verify_share( healthStatus=healthStatus, ) if shareable_items: - processor.Processor(session, share_data, shareable_items).verify_shares() + health_status = processor.Processor(session, share_data, shareable_items).verify_shares_health_status() + share_object_item_health_status.append(health_status) else: log.info(f'There are no items to verify of type {type.value}') except Exception as e: log.error(f'Error occurred during share verifying of {type.value}: {e}') + AdminNotificationService().notify_admins_with_error_log( + process_error=f'Error occurred during verification of share with uri: {share_data.share.shareUri} for processor type: {type.value} due to an unknown exception', + error_logs=[str(e)], process_name='Sharing Service') + if False in share_object_item_health_status: + log.info(f'Sending notifications since share object item(s) for share: {share_data.share.shareUri} are in unhealthy state after verifying shares') + ShareNotificationService(session=session, dataset=share_data.dataset, share=share_data.share).notify_share_object_items_unhealthy() return True @classmethod @@ -295,6 +316,7 @@ def reapply_share(cls, engine: Engine, share_uri: str) -> bool: False if any re-apply of share item(s) failed """ reapply_successful = True + code_exception_list = [] with engine.scoped_session() as session: share_data, share_items = cls._get_share_data_and_items( session, share_uri, None, ShareItemHealthStatus.PendingReApply.value @@ -334,11 +356,23 @@ def reapply_share(cls, engine: Engine, share_uri: str) -> bool: log.info(f'Reapplying {type.value} succeeded = {success}') if not success: reapply_successful = False + if success: + log.info(f'Sending notifications to the share owner to inform that the share with uri: {share_data.share.shareUri} is now in healthy state') + ShareNotificationService(session=session, dataset=share_data.dataset, + share=share_data.share).notify_share_object_items_healthy() else: log.info(f'There are no items to reapply of type {type.value}') except Exception as e: log.error(f'Error occurred during share reapplying of {type.value}: {e}') - + AdminNotificationService().notify_admins_with_error_log( + process_error=f'Error occurred during reapplying of share with uri: {share_data.share.shareUri} for processor type: {type.value} due to an unknown exception', + error_logs=[str(e)], process_name='Sharing Service') + + if not reapply_successful: + log.info( + f'Sending notifications since share object item(s) for share: {share_data.share.shareUri} are in unhealthy state after reapplying shares') + ShareNotificationService(session=session, dataset=share_data.dataset, + share=share_data.share).notify_share_object_items_unhealthy() return reapply_successful except ResourceLockTimeout as e: @@ -349,11 +383,19 @@ def reapply_share(cls, engine: Engine, share_uri: str) -> bool: new_status=ShareItemHealthStatus.Unhealthy.value, message=str(e), ) + code_exception_list.append(str(e)) except Exception as e: log.exception('Error occurred during share approval') + code_exception_list.append(str(e)) return False + finally: + if len(code_exception_list) > 0: + AdminNotificationService().notify_admins_with_error_log( + process_error=f'Error occurred during reapplying of share with uri: {share_data.share.shareUri}', + error_logs=[str(e)], process_name='Sharing Service') + @classmethod def cleanup_share( cls, diff --git a/backend/dataall/modules/shares_base/tasks/persistent_email_reminders_task.py b/backend/dataall/modules/shares_base/tasks/persistent_email_reminders_task.py index 42635f6f9..54d2574b8 100644 --- a/backend/dataall/modules/shares_base/tasks/persistent_email_reminders_task.py +++ b/backend/dataall/modules/shares_base/tasks/persistent_email_reminders_task.py @@ -2,6 +2,7 @@ import os import sys from dataall.base.loader import load_modules, ImportMode +from dataall.modules.notifications.services.admin_notifications import AdminNotificationService from dataall.modules.shares_base.db.share_object_models import ShareObject from dataall.base.db import get_engine from dataall.modules.shares_base.db.share_object_repositories import ShareObjectRepository @@ -17,20 +18,28 @@ def persistent_email_reminders(engine): A method used by the scheduled ECS Task to run persistent_email_reminder() process against ALL active share objects within data.all and send emails to all pending shares. """ - with engine.scoped_session() as session: - log.info('Running Persistent Email Reminders Task') - pending_shares = ShareObjectRepository.fetch_submitted_shares_with_notifications(session=session) - log.info(f'Found {len(pending_shares)} pending shares') - pending_share: ShareObject - for pending_share in pending_shares: - log.info(f'Sending Email Reminder for Share: {pending_share.shareUri}') - share = ShareObjectRepository.get_share_by_uri(session, pending_share.shareUri) - dataset = DatasetBaseRepository.get_dataset_by_uri(session, share.datasetUri) - ShareNotificationService(session=session, dataset=dataset, share=share).notify_persistent_email_reminder( - email_id=share.owner - ) - log.info(f'Email reminder sent for share {share.shareUri}') - log.info('Completed Persistent Email Reminders Task') + try: + with engine.scoped_session() as session: + log.info('Running Persistent Email Reminders Task') + pending_shares = ShareObjectRepository.fetch_submitted_shares_with_notifications(session=session) + log.info(f'Found {len(pending_shares)} pending shares') + pending_share: ShareObject + for pending_share in pending_shares: + log.info(f'Sending Email Reminder for Share: {pending_share.shareUri}') + share = ShareObjectRepository.get_share_by_uri(session, pending_share.shareUri) + dataset = DatasetBaseRepository.get_dataset_by_uri(session, share.datasetUri) + ShareNotificationService(session=session, dataset=dataset, share=share).notify_persistent_email_reminder( + email_id=share.owner + ) + log.info(f'Email reminder sent for share {share.shareUri}') + log.info('Completed Persistent Email Reminders Task') + except Exception as e: + log.error(f'Error while running persistent email reminder task: {e}') + AdminNotificationService().notify_admins_with_error_log( + process_name='Persistent Email Service', + error_logs=[str(e)], + process_error='Error while running persistent email reminder task' + ) if __name__ == '__main__': diff --git a/backend/dataall/modules/shares_base/tasks/share_expiration_task.py b/backend/dataall/modules/shares_base/tasks/share_expiration_task.py index 51328e55a..b2ddde75d 100644 --- a/backend/dataall/modules/shares_base/tasks/share_expiration_task.py +++ b/backend/dataall/modules/shares_base/tasks/share_expiration_task.py @@ -4,6 +4,7 @@ from datetime import datetime from dataall.base.loader import load_modules, ImportMode from dataall.base.db import get_engine +from dataall.modules.notifications.services.admin_notifications import AdminNotificationService from dataall.modules.shares_base.db.share_object_repositories import ShareObjectRepository from dataall.modules.shares_base.db.share_object_state_machines import ShareObjectSM, ShareItemSM from dataall.modules.shares_base.db.share_state_machines_repositories import ShareStatusRepository @@ -19,56 +20,69 @@ def share_expiration_checker(engine): """ Checks all the share objects which have expiryDate on them and then revokes or notifies users based on if its expired or not """ - with engine.scoped_session() as session: - log.info('Starting share expiration task') - shares = ShareObjectRepository.get_all_active_shares_with_expiration(session) - log.info(f'Fetched {len(shares)} active shares with expiration') - for share in shares: - try: - if share.expiryDate.date() < datetime.today().date(): - log.info(f'Revoking share with uri: {share.shareUri} as it is expired') - # Put all share items in revoke state and then revoke - share_items_to_revoke = ShareObjectRepository.get_all_share_items_in_share( - session, share.shareUri, ['Share_Succeeded'] - ) - item_uris = [share_item.shareItemUri for share_item in share_items_to_revoke] - revoked_items_states = ShareStatusRepository.get_share_items_states( - session, share.shareUri, item_uris - ) + task_exceptions = [] + try: + with engine.scoped_session() as session: + log.info('Starting share expiration task') + shares = ShareObjectRepository.get_all_active_shares_with_expiration(session) + log.info(f'Fetched {len(shares)} active shares with expiration') + for share in shares: + try: + if share.expiryDate.date() < datetime.today().date(): + log.info(f'Revoking share with uri: {share.shareUri} as it is expired') + # Put all share items in revoke state and then revoke + share_items_to_revoke = ShareObjectRepository.get_all_share_items_in_share( + session, share.shareUri, ['Share_Succeeded'] + ) + item_uris = [share_item.shareItemUri for share_item in share_items_to_revoke] + revoked_items_states = ShareStatusRepository.get_share_items_states( + session, share.shareUri, item_uris + ) - share_sm = ShareObjectSM(share.status) - new_share_state = share_sm.run_transition(ShareObjectActions.RevokeItems.value) + share_sm = ShareObjectSM(share.status) + new_share_state = share_sm.run_transition(ShareObjectActions.RevokeItems.value) - for item_state in revoked_items_states: - item_sm = ShareItemSM(item_state) - new_state = item_sm.run_transition(ShareObjectActions.RevokeItems.value) - for item in share_items_to_revoke: - if item.status == item_state: - item_sm.update_state_single_item(session, item, new_state) + for item_state in revoked_items_states: + item_sm = ShareItemSM(item_state) + new_state = item_sm.run_transition(ShareObjectActions.RevokeItems.value) + for item in share_items_to_revoke: + if item.status == item_state: + item_sm.update_state_single_item(session, item, new_state) - share_sm.update_state(session, share, new_share_state) - SharingService.revoke_share(engine=engine, share_uri=share.shareUri) - else: - log.info(f'Share with share uri: {share.shareUri} has not yet expired') - dataset = DatasetBaseRepository.get_dataset_by_uri(session, share.datasetUri) - if share.submittedForExtension: - log.info( - f'Sending notifications to the owners: {dataset.SamlAdminGroupName}, {dataset.stewards} as share extension requested for share with uri: {share.shareUri}' - ) - ShareNotificationService( - session=session, dataset=dataset, share=share - ).notify_share_expiration_to_owners() + share_sm.update_state(session, share, new_share_state) + SharingService.revoke_share(engine=engine, share_uri=share.shareUri) else: - log.info( - f'Sending notifications to the requesters with group: {share.groupUri} as share extension is not requested for share with uri: {share.shareUri}' - ) - ShareNotificationService( - session=session, dataset=dataset, share=share - ).notify_share_expiration_to_requesters() - except Exception as e: - log.error( - f'Error occured while processing share expiration processing for share with URI: {share.shareUri} due to: {e}' - ) + log.info(f'Share with share uri: {share.shareUri} has not yet expired') + dataset = DatasetBaseRepository.get_dataset_by_uri(session, share.datasetUri) + if share.submittedForExtension: + log.info( + f'Sending notifications to the owners: {dataset.SamlAdminGroupName}, {dataset.stewards} as share extension requested for share with uri: {share.shareUri}' + ) + ShareNotificationService( + session=session, dataset=dataset, share=share + ).notify_share_expiration_to_owners() + else: + log.info( + f'Sending notifications to the requesters with group: {share.groupUri} as share extension is not requested for share with uri: {share.shareUri}' + ) + ShareNotificationService( + session=session, dataset=dataset, share=share + ).notify_share_expiration_to_requesters() + except Exception as e: + log.error( + f'Error occurred while processing share expiration processing for share with URI: {share.shareUri} due to: {e}' + ) + task_exceptions.append(f'Error occurred while processing share expiration processing for share with URI: {share.shareUri} due to: {e}') + except Exception as e: + log.error(f'Error occurred while processing share expiration due to : {e}') + task_exceptions.append(f'Error occurred while processing share expiration due to: {e}') + finally: + if len(task_exceptions) > 0: + AdminNotificationService().notify_admins_with_error_log( + process_error='Error occurred while processing share expiration task', + error_logs=task_exceptions, + process_name='Share Expiration Task' + ) if __name__ == '__main__': diff --git a/backend/dataall/modules/shares_base/tasks/share_manager_task.py b/backend/dataall/modules/shares_base/tasks/share_manager_task.py index d120251c9..585b76397 100644 --- a/backend/dataall/modules/shares_base/tasks/share_manager_task.py +++ b/backend/dataall/modules/shares_base/tasks/share_manager_task.py @@ -2,6 +2,7 @@ import os import sys +from dataall.modules.notifications.services.admin_notifications import AdminNotificationService from dataall.modules.shares_base.services.sharing_service import SharingService from dataall.base.db import get_engine from dataall.base.loader import load_modules, ImportMode @@ -25,4 +26,9 @@ except Exception as e: log.error(f'Sharing task failed due to: {e}') + AdminNotificationService().notify_admins_with_error_log( + process_error='Error occurred while running Sharing task', + error_logs=[str(e)], + process_name='Sharing Service' + ) raise e diff --git a/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py b/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py index 225f069bd..3c337e04d 100644 --- a/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py +++ b/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py @@ -1,7 +1,9 @@ import logging import os import sys +from typing import List +from dataall.modules.notifications.services.admin_notifications import AdminNotificationService from dataall.modules.shares_base.db.share_object_repositories import ShareObjectRepository from dataall.modules.shares_base.db.share_object_models import ShareObject from dataall.modules.shares_base.db.share_state_machines_repositories import ShareStatusRepository @@ -17,47 +19,71 @@ class EcsBulkShareRepplyService: @classmethod def process_reapply_shares_for_dataset(cls, engine, dataset_uri): - with engine.scoped_session() as session: - processed_share_objects = [] - share_objects_for_dataset = ShareObjectRepository.list_active_share_object_for_dataset( - session=session, dataset_uri=dataset_uri - ) - log.info(f'Found {len(share_objects_for_dataset)} active share objects on dataset with uri: {dataset_uri}') - share_object: ShareObject - for share_object in share_objects_for_dataset: - log.info( - f'Re-applying Share Items for Share Object (Share URI: {share_object.shareUri} ) with Requestor: {share_object.principalId} on Target Dataset: {share_object.datasetUri}' - ) - processed_share_objects.append(share_object.shareUri) - ShareStatusRepository.update_share_item_health_status_batch( - session=session, - share_uri=share_object.shareUri, - old_status=ShareItemHealthStatus.Unhealthy.value, - new_status=ShareItemHealthStatus.PendingReApply.value, + task_exceptions = [] + try: + with engine.scoped_session() as session: + share_objects_for_dataset = ShareObjectRepository.list_active_share_object_for_dataset( + session=session, dataset_uri=dataset_uri ) - SharingService.reapply_share(engine, share_uri=share_object.shareUri) + log.info(f'Found {len(share_objects_for_dataset)} active share objects on dataset with uri: {dataset_uri}') + processed_share_objects, task_exceptions = cls._reapply_share_objects(engine=engine, session=session, share_objects=share_objects_for_dataset) return processed_share_objects + except Exception as e: + log.error(f'Error occurred while reapplying share task due to: {e}') + task_exceptions.append(f'Error occurred while reapplying share task due to: {e}') + finally: + if len(task_exceptions) > 0: + AdminNotificationService().notify_admins_with_error_log( + process_error='Error occurred while processing share during reapplying task', + error_logs=task_exceptions, + process_name='Share Reapplier Task' + ) @classmethod - def process_reapply_shares(cls, engine): - with engine.scoped_session() as session: - processed_share_objects = [] - all_share_objects: [ShareObject] = ShareObjectRepository.list_all_active_share_objects(session) - log.info(f'Found {len(all_share_objects)} share objects ') - share_object: ShareObject - for share_object in all_share_objects: - log.info( - f'Re-applying Share Items for Share Object with Requestor: {share_object.principalId} on Target Dataset: {share_object.datasetUri}' - ) - processed_share_objects.append(share_object.shareUri) - ShareStatusRepository.update_share_item_health_status_batch( - session=session, - share_uri=share_object.shareUri, - old_status=ShareItemHealthStatus.Unhealthy.value, - new_status=ShareItemHealthStatus.PendingReApply.value, - ) + def _reapply_share_objects(cls, engine, session, share_objects: List[ShareObject]): + share_object: ShareObject + processed_share_objects = [] + task_exceptions = [] + for share_object in share_objects: + log.info( + f'Re-applying Share Items for Share Object, Share URI: {share_object.shareUri} ) with Requestor: {share_object.principalId} on Target Dataset: {share_object.datasetUri}' + ) + processed_share_objects.append(share_object.shareUri) + ShareStatusRepository.update_share_item_health_status_batch( + session=session, + share_uri=share_object.shareUri, + old_status=ShareItemHealthStatus.Unhealthy.value, + new_status=ShareItemHealthStatus.PendingReApply.value, + ) + try: SharingService.reapply_share(engine, share_uri=share_object.shareUri) + except Exception as e: + log.error( + f'Error occurred while reapplying share for share with uri:{share_object.shareUri} due to: {e}') + task_exceptions.append( + f'Error occurred while reapplying share for share with uri:{share_object.shareUri} due to: {e}') + return (processed_share_objects, task_exceptions) + @classmethod + def process_reapply_shares(cls, engine): + task_exceptions = [] + try: + with engine.scoped_session() as session: + all_share_objects: [ShareObject] = ShareObjectRepository.list_all_active_share_objects(session) + log.info(f'Found {len(all_share_objects)} share objects ') + share_object: ShareObject + processed_share_objects, task_exceptions = cls._reapply_share_objects(engine=engine, session=session, + share_objects=all_share_objects) return processed_share_objects + except Exception as e: + log.error(f'Error occurred while reapplying share task due to: {e}') + task_exceptions.append(f'Error occurred while reapplying share task due to: {e}') + finally: + if len(task_exceptions) > 0: + AdminNotificationService().notify_admins_with_error_log( + process_error='Error occurred while processing share during reapplying task', + error_logs=task_exceptions, + process_name='Share Reapplier Task' + ) def reapply_shares(engine, dataset_uri): diff --git a/backend/dataall/modules/shares_base/tasks/share_verifier_task.py b/backend/dataall/modules/shares_base/tasks/share_verifier_task.py index a731a5756..1114844d1 100644 --- a/backend/dataall/modules/shares_base/tasks/share_verifier_task.py +++ b/backend/dataall/modules/shares_base/tasks/share_verifier_task.py @@ -1,6 +1,8 @@ import logging import os import sys + +from dataall.modules.notifications.services.admin_notifications import AdminNotificationService from dataall.modules.shares_base.db.share_object_repositories import ShareObjectRepository from dataall.modules.shares_base.db.share_object_models import ShareObject from dataall.modules.shares_base.services.shares_enums import ShareItemStatus @@ -19,20 +21,36 @@ def verify_shares(engine): A method used by the scheduled ECS Task to run verify_shares() process against ALL shared items in ALL active share objects within data.all and update the health status of those shared items. """ - with engine.scoped_session() as session: - processed_share_objects = [] - all_share_objects: [ShareObject] = ShareObjectRepository.list_all_active_share_objects(session) - log.info(f'Found {len(all_share_objects)} share objects verify ') - share_object: ShareObject - for share_object in all_share_objects: - log.info( - f'Verifying Share Items for Share Object with Requestor: {share_object.principalId} on Target Dataset: {share_object.datasetUri}' - ) - processed_share_objects.append(share_object.shareUri) - SharingService.verify_share( - engine, share_uri=share_object.shareUri, status=ShareItemStatus.Share_Succeeded.value, healthStatus=None + task_exceptions = [] + try: + with engine.scoped_session() as session: + processed_share_objects = [] + all_share_objects: [ShareObject] = ShareObjectRepository.list_all_active_share_objects(session) + log.info(f'Found {len(all_share_objects)} share objects verify ') + share_object: ShareObject + for share_object in all_share_objects: + log.info( + f'Verifying Share Items for Share Object with Requestor: {share_object.principalId} on Target Dataset: {share_object.datasetUri}' + ) + processed_share_objects.append(share_object.shareUri) + try: + SharingService.verify_share( + engine, share_uri=share_object.shareUri, status=ShareItemStatus.Share_Succeeded.value, healthStatus=None + ) + except Exception as e: + log.error(f'Error occurred while verifying share with uri: {share_object.shareUri} due to: {e}') + task_exceptions.append(f'Error occurred while verifying share with uri: {share_object.shareUri} due to: {e}') + return processed_share_objects + except Exception as e: + log.error(f'Error occurred while verifying shares task due to: {e}') + task_exceptions.append(f'Error occurred while verifying shares task due to: {e}') + finally: + if len(task_exceptions) > 0: + AdminNotificationService().notify_admins_with_error_log( + process_error='Error occurred while verifying shares task', + error_logs=task_exceptions, + process_name='Share Verifier' ) - return processed_share_objects def trigger_reapply_task(): diff --git a/deploy/stacks/container.py b/deploy/stacks/container.py index a65a609e6..e555e8c7c 100644 --- a/deploy/stacks/container.py +++ b/deploy/stacks/container.py @@ -161,7 +161,7 @@ def __init__( command=['python3.9', '-m', 'dataall.core.environment.tasks.env_stacks_updater'], container_id='container', ecr_repository=ecr_repository, - environment=self._create_env(), + environment=self.env_vars, image_tag=self._cdkproxy_image_tag, log_group=self.create_log_group(envname, resource_prefix, log_group_name='stacks-updater'), schedule_expression=Schedule.expression('cron(0 1 * * ? *)'), @@ -217,7 +217,7 @@ def add_catalog_indexer_task(self): command=['python3.9', '-m', 'dataall.modules.catalog.tasks.catalog_indexer_task'], container_id=container_id, ecr_repository=self._ecr_repository, - environment=self._create_env(), + environment=self.env_vars, image_tag=self._cdkproxy_image_tag, log_group=self.create_log_group(self._envname, self._resource_prefix, log_group_name='catalog-indexer'), schedule_expression=Schedule.expression('rate(6 hours)'), @@ -261,7 +261,7 @@ def add_share_management_task(self): f'ShareManagementTaskContainer{self._envname}', container_name='container', image=ecs.ContainerImage.from_ecr_repository(repository=self._ecr_repository, tag=self._cdkproxy_image_tag), - environment=self._create_env(), + environment=self.env_vars, command=['python3.9', '-m', 'dataall.modules.shares_base.tasks.share_manager_task'], logging=ecs.LogDriver.aws_logs( stream_prefix='task', @@ -292,7 +292,7 @@ def add_share_verifier_task(self): command=['python3.9', '-m', 'dataall.modules.shares_base.tasks.share_verifier_task'], container_id='container', ecr_repository=self._ecr_repository, - environment=self._create_env(), + environment=self.env_vars, image_tag=self._cdkproxy_image_tag, log_group=self.create_log_group(self._envname, self._resource_prefix, log_group_name='share-verifier'), schedule_expression=Schedule.expression('rate(7 days)'), @@ -321,7 +321,7 @@ def add_share_reapplier_task(self): f'ShareReapplierTaskContainer{self._envname}', container_name='container', image=ecs.ContainerImage.from_ecr_repository(repository=self._ecr_repository, tag=self._cdkproxy_image_tag), - environment=self._create_env(), + environment=self.env_vars, command=['python3.9', '-m', 'dataall.modules.shares_base.tasks.share_reapplier_task'], logging=ecs.LogDriver.aws_logs( stream_prefix='task', @@ -383,7 +383,7 @@ def add_subscription_task(self): ], container_id='container', ecr_repository=self._ecr_repository, - environment=self._create_env(), + environment=self.env_vars, image_tag=self._cdkproxy_image_tag, log_group=self.create_log_group(self._envname, self._resource_prefix, log_group_name='subscriptions'), schedule_expression=Schedule.expression('rate(15 minutes)'), @@ -403,7 +403,7 @@ def add_sync_dataset_table_task(self): command=['python3.9', '-m', 'dataall.modules.s3_datasets.tasks.tables_syncer'], container_id='container', ecr_repository=self._ecr_repository, - environment=self._create_env(), + environment=self.env_vars, image_tag=self._cdkproxy_image_tag, log_group=self.create_log_group(self._envname, self._resource_prefix, log_group_name='tables-syncer'), schedule_expression=Schedule.expression('rate(15 minutes)'), @@ -423,7 +423,7 @@ def add_omics_fetch_workflows_task(self): command=['python3.9', '-m', 'dataall.modules.omics.tasks.omics_workflows_fetcher'], container_id='container', ecr_repository=self._ecr_repository, - environment=self._create_env(), + environment=self.env_vars, image_tag=self._cdkproxy_image_tag, log_group=self.create_log_group( self._envname, self._resource_prefix, log_group_name='omics-workflows-fetcher' diff --git a/tests/modules/redshift_datasets_shares/test_redshift_table_processor.py b/tests/modules/redshift_datasets_shares/test_redshift_table_processor.py index d49009ec8..96dac2f96 100644 --- a/tests/modules/redshift_datasets_shares/test_redshift_table_processor.py +++ b/tests/modules/redshift_datasets_shares/test_redshift_table_processor.py @@ -226,7 +226,7 @@ def test_verify_redshift_cross_account_share_all_successful( mock_redshift_shares, ): # When - response = redshift_processor_cross_account.verify_shares() + response = redshift_processor_cross_account.verify_shares_health_status() # Then assert_that(response).is_true() mock_redshift_data_shares.return_value.check_datashare_exists.assert_called_with( @@ -276,7 +276,7 @@ def test_verify_redshift_share_datashare_does_not_exist( # Given mock_redshift_data_shares.return_value.check_datashare_exists.return_value = False # When - response = redshift_processor_cross_account.verify_shares() + response = redshift_processor_cross_account.verify_shares_health_status() # Then with db.scoped_session() as session: item = ShareObjectRepository.get_share_item_by_uri(session, redshift_requested_table.shareItemUri) @@ -290,7 +290,7 @@ def test_verify_redshift_schema_not_added_to_datashare( # Given mock_redshift_data_shares.return_value.check_schema_in_datashare.return_value = False # When - response = redshift_processor_cross_account.verify_shares() + response = redshift_processor_cross_account.verify_shares_health_status() # Then with db.scoped_session() as session: item = ShareObjectRepository.get_share_item_by_uri(session, redshift_requested_table.shareItemUri) @@ -304,7 +304,7 @@ def test_verify_consumer_permissions_to_datashare_wrong_status_cross_account( # Given mock_redshift_shares.return_value.get_datashare_status.return_value = 'UNAUTHORIZED' # When - response = redshift_processor_cross_account.verify_shares() + response = redshift_processor_cross_account.verify_shares_health_status() with db.scoped_session() as session: item = ShareObjectRepository.get_share_item_by_uri(session, redshift_requested_table.shareItemUri) assert_that(item.healthStatus).is_equal_to(ShareItemHealthStatus.Unhealthy.value) @@ -317,7 +317,7 @@ def test_verify_consumer_permissions_to_datashare_same_account( # Given mock_redshift_data_shares.return_value.check_consumer_permissions_to_datashare.return_value = False # When - response = redshift_processor_same_account.verify_shares() + response = redshift_processor_same_account.verify_shares_health_status() with db.scoped_session() as session: item = ShareObjectRepository.get_share_item_by_uri(session, redshift_requested_table_2.shareItemUri) assert_that(item.healthStatus).is_equal_to(ShareItemHealthStatus.Unhealthy.value) @@ -331,7 +331,7 @@ def test_verify_redshift_share_database_does_not_exist( # Given mock_redshift_data_shares.return_value.check_database_exists.return_value = False # When - response = redshift_processor_cross_account.verify_shares() + response = redshift_processor_cross_account.verify_shares_health_status() # Then with db.scoped_session() as session: item = ShareObjectRepository.get_share_item_by_uri(session, redshift_requested_table.shareItemUri) @@ -345,7 +345,7 @@ def test_verify_role_permissions_to_database( # Given mock_redshift_data_shares.return_value.check_role_permissions_in_database.return_value = False # When - response = redshift_processor_cross_account.verify_shares() + response = redshift_processor_cross_account.verify_shares_health_status() # Then with db.scoped_session() as session: item = ShareObjectRepository.get_share_item_by_uri(session, redshift_requested_table.shareItemUri) @@ -361,7 +361,7 @@ def test_verify_external_schema_exists( # Given mock_redshift_data_shares.return_value.check_schema_exists.return_value = False # When - response = redshift_processor_cross_account.verify_shares() + response = redshift_processor_cross_account.verify_shares_health_status() # Then with db.scoped_session() as session: item = ShareObjectRepository.get_share_item_by_uri(session, redshift_requested_table.shareItemUri) @@ -375,7 +375,7 @@ def test_verify_role_permissions_to_schema( # Given mock_redshift_data_shares.return_value.check_role_permissions_in_schema.return_value = False # When - response = redshift_processor_cross_account.verify_shares() + response = redshift_processor_cross_account.verify_shares_health_status() # Then with db.scoped_session() as session: item = ShareObjectRepository.get_share_item_by_uri(session, redshift_requested_table.shareItemUri) @@ -389,7 +389,7 @@ def test_verify_table_not_added_to_datashare( # Given mock_redshift_data_shares.return_value.check_table_in_datashare.return_value = False # When - response = redshift_processor_cross_account.verify_shares() + response = redshift_processor_cross_account.verify_shares_health_status() # Then with db.scoped_session() as session: item = ShareObjectRepository.get_share_item_by_uri(session, redshift_requested_table.shareItemUri) From 1cd2224c8f72428d43f04fe8b463c40e06165ac8 Mon Sep 17 00:00:00 2001 From: Tejas Rajopadhye Date: Thu, 26 Dec 2024 20:17:47 -0600 Subject: [PATCH 02/26] Changes in the principal name --- .../services/share_notification_service.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/backend/dataall/modules/shares_base/services/share_notification_service.py b/backend/dataall/modules/shares_base/services/share_notification_service.py index f03317bb9..d0e93b755 100644 --- a/backend/dataall/modules/shares_base/services/share_notification_service.py +++ b/backend/dataall/modules/shares_base/services/share_notification_service.py @@ -53,7 +53,7 @@ def notify_share_object_submission(self, email_id: str): share_link_text = '' if os.environ.get('frontend_domain_url'): share_link_text = f'

Please visit data.all share link to take action or view more details' - msg = f'User {email_id} SUBMITTED share request for dataset {self.dataset.label} for principal {self.share.principalId}' + msg = f'User {email_id} SUBMITTED share request for dataset {self.dataset.label} for principal {self.share.principalRoleName}' subject = f'Data.all | Share Request Submitted for {self.dataset.label}' email_notification_msg = msg + share_link_text @@ -68,7 +68,7 @@ def notify_share_object_extension_submission(self, email_id: str): share_link_text = '' if os.environ.get('frontend_domain_url'): share_link_text = f'

Please visit data.all share link to take action or view more details' - msg = f'User {email_id} SUBMITTED share extension request for dataset {self.dataset.label} for principal {self.share.principalId}' + msg = f'User {email_id} SUBMITTED share extension request for dataset {self.dataset.label} for principal {self.share.principalRoleName}' subject = f'Data.all | Share Extension Request Submitted for {self.dataset.label}' email_notification_msg = msg + share_link_text @@ -90,7 +90,7 @@ def notify_persistent_email_reminder(self, email_id: str): msg_intro = f"""Dear User, This is a reminder that a share request for the dataset "{self.dataset.label}" submitted by {email_id} - on behalf of principal "{self.share.principalId}" is still pending and has not been addressed. + on behalf of principal "{self.share.principalRoleName}" is still pending and has not been addressed. """ msg_end = """Your prompt attention in this matter is greatly appreciated. @@ -123,7 +123,7 @@ def notify_share_object_approval(self, email_id: str): ) msg = ( f'User {email_id} APPROVED share request for dataset {self.dataset.label} ' - f'for principal {self.share.principalId}' + f'for principal {self.share.principalRoleName}' ) subject = f'Data.all | Share Request Approved for {self.dataset.label}' email_notification_msg = msg + share_link_text @@ -145,7 +145,7 @@ def notify_share_object_extension_approval(self, email_id: str): ) msg = ( f'User {email_id} APPROVED share extension request for dataset {self.dataset.label} ' - f'for principal {self.share.principalId}' + f'for principal {self.share.principalRoleName}' ) subject = f'Data.all | Share Extension Request Approved for {self.dataset.label}' email_notification_msg = msg + share_link_text @@ -162,13 +162,13 @@ def notify_share_object_rejection(self, email_id: str): if os.environ.get('frontend_domain_url'): share_link_text = f'

Please visit data.all share link to take action or view more details' if self.share.status == ShareObjectStatus.Rejected.value: - msg = f'User {email_id} REJECTED share request for dataset {self.dataset.label} for principal {self.share.principalId}' + msg = f'User {email_id} REJECTED share request for dataset {self.dataset.label} for principal {self.share.principalRoleName}' subject = f'Data.all | Share Request Rejected for {self.dataset.label}' elif self.share.status == ShareObjectStatus.Revoked.value: - msg = f'User {email_id} REVOKED share request for dataset {self.dataset.label} for principal {self.share.principalId}' + msg = f'User {email_id} REVOKED share request for dataset {self.dataset.label} for principal {self.share.principalRoleName}' subject = f'Data.all | Share Request Revoked for {self.dataset.label}' else: - msg = f'User {email_id} REJECTED/REVOKED share request for dataset {self.dataset.label} for principal {self.share.principalId}' + msg = f'User {email_id} REJECTED/REVOKED share request for dataset {self.dataset.label} for principal {self.share.principalRoleName}' subject = f'Data.all | Share Request Rejected / Revoked for {self.dataset.label}' email_notification_msg = msg + share_link_text @@ -183,7 +183,7 @@ def notify_share_object_extension_rejection(self, email_id: str): share_link_text = '' if os.environ.get('frontend_domain_url'): share_link_text = f'

Please visit data.all share link to take action or view more details' - msg = f'User {email_id} REJECTED share extension request for dataset {self.dataset.label} on principal {self.share.principalId}' + msg = f'User {email_id} REJECTED share extension request for dataset {self.dataset.label} on principal {self.share.principalRoleName}' subject = f'Data.all | Share Extension Request Rejected for {self.dataset.label}' email_notification_msg = msg + share_link_text From 1728d528d1f4283bfca1e6098f0ed2eb4e0da143 Mon Sep 17 00:00:00 2001 From: Tejas Rajopadhye Date: Mon, 30 Dec 2024 11:51:56 -0600 Subject: [PATCH 03/26] Weekly email task --- .../modules/notifications/task/__init__.py | 0 .../task/weekly_digest_reminder.py | 156 ++++++++++++++++++ .../db/share_object_repositories.py | 15 ++ 3 files changed, 171 insertions(+) create mode 100644 backend/dataall/modules/notifications/task/__init__.py create mode 100644 backend/dataall/modules/notifications/task/weekly_digest_reminder.py diff --git a/backend/dataall/modules/notifications/task/__init__.py b/backend/dataall/modules/notifications/task/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/dataall/modules/notifications/task/weekly_digest_reminder.py b/backend/dataall/modules/notifications/task/weekly_digest_reminder.py new file mode 100644 index 000000000..d36a7689b --- /dev/null +++ b/backend/dataall/modules/notifications/task/weekly_digest_reminder.py @@ -0,0 +1,156 @@ +import logging +import os +from typing import List, Dict + +from dataall.base.db import get_engine +from dataall.base.loader import load_modules, ImportMode +from dataall.core.environment.db.environment_models import Environment +from dataall.core.stacks.api.enums import StackStatus +from dataall.core.stacks.db.stack_repositories import StackRepository +from dataall.modules.datasets_base.db.dataset_models import DatasetBase +from dataall.modules.notifications.services.admin_notifications import AdminNotificationService +from dataall.modules.notifications.services.ses_email_notification_service import SESEmailNotificationService +from dataall.modules.shares_base.db.share_object_models import ShareObject +from dataall.modules.shares_base.db.share_object_repositories import ShareObjectRepository +from dataall.modules.shares_base.services.shares_enums import ShareItemHealthStatus + +log = logging.getLogger(__name__) + + +class ResourceBundle: + def __init__(self): + self.share_object_list = [] + self.dataset_object_list = [] + self.environment_object_list = [] +def send_reminder_email(engine): + task_exceptions = [] + try: + with engine.scoped_session() as session: + # Get all the shares which are in unhealthy state + + # Todo : Check if distinct needed for the shareobject repository + unhealthy_share_objects: List[ShareObject] = ShareObjectRepository.get_share_object_with_health_status(session=session, health_status_list=[ShareItemHealthStatus.Unhealthy.value]) + + # Get all the dataset which are in unhealthy state + all_datasets: List[DatasetBase] = session.query(DatasetBase).all() + unhealthy_datasets: List[DatasetBase] = [] + unhealthy_stack_status: List[StackStatus] = [ + StackStatus.CREATE_FAILED.value, + StackStatus.DELETE_FAILED.value, + StackStatus.UPDATE_FAILED.value, + StackStatus.UPDATE_ROLLBACK_FAILED.value, + StackStatus.ROLLBACK_FAILED.value + ] + for dataset in all_datasets: + if StackRepository.find_stack_by_target_uri(session=session, target_uri=dataset.datasetUri, statuses=unhealthy_stack_status) is not None: + unhealthy_datasets.append(dataset) + + # Get all the environments which are in unhealthy state + all_environments: List[Environment] = session.query(Environment).all() + unhealthy_environments: List[Environment] = [] + for environment in all_environments: + if StackRepository.find_stack_by_target_uri(session=session, target_uri=environment.environmentUri, statuses=unhealthy_stack_status) is not None: + unhealthy_environments.append(environment) + + # {team: ResourceBundle} + group_name_to_resource_map: Dict[str, ResourceBundle] = {} + def _map_teams_to_resources(list_of_resources, group_attr, resource_type): + for resource in list_of_resources: + group_name = resource.__getattribute__(group_attr) + if group_name not in group_name_to_resource_map: + resource_bundle = ResourceBundle() + resource_bundle.__getattribute__(resource_type).append(resource) + group_name_to_resource_map[group_name] = resource_bundle + else: + resource_bundle = group_name_to_resource_map.get(group_name) + resource_bundle.__getattribute__(resource_type).append(resource) + + _map_teams_to_resources(list_of_resources=unhealthy_share_objects, group_attr="groupUri", resource_type="share_object_list") + _map_teams_to_resources(list_of_resources=unhealthy_environments, group_attr="SamlGroupName", resource_type="environment_object_list") + _map_teams_to_resources(list_of_resources=unhealthy_datasets, group_attr="SamlAdminGroupName", resource_type="dataset_object_list") + + for group, resource_bundle in group_name_to_resource_map.items(): + email_body = _construct_email_body(resource_bundle) + subject = 'Attention Required | Data.all weekly digest' + try: + SESEmailNotificationService.create_and_send_email_notifications(subject=subject, msg=email_body, recipient_groups_list=[group]) + except Exception as e: + log.error(f"Error occurred in sending email while weekly reminder task due to: {e}") + task_exceptions.append(f"Error occurred in sending email while weekly reminder task due to: {e}") + except Exception as e: + log.error(f"Error occurred while running the weekly reminder task: {e}") + task_exceptions.append(f"Error occurred while running the weekly reminder task: {e}") + finally: + if len(task_exceptions) > 0: + AdminNotificationService().notify_admins_with_error_log( + process_error="Error occured while running the weekly reminder task", + error_logs=task_exceptions, + process_name="Weekly reminder task" + ) + + +def _construct_email_body(resource_bundle: ResourceBundle): + msg_heading = """ + Dear Team,
+ You have following data.all resources in unhealthy state. Please click on the links provided to get to the affected resource. + Please correct affected resources ASAP.

+ + For shares which are in unhealthy state, you can re-apply share by clicking on the "Reapply share" button
+ For environments and datasets which are in unhealthy state, you can go to the AWS account and check the stack associated with that environment and check the root cause of the stack. Once you address the root cause issue, you can click on "Update Stack" on the Stack Page.
+ """ + msg_content = """""" + share_object_table_content = _create_table_for_resource(resource_bundle.share_object_list, "shareUri", "/console/shares/", "Share Object") + dataset_object_table_content = _create_table_for_resource(resource_bundle.dataset_object_list, "datasetUri", "/console/s3-datasets/", "Dataset") + environment_object_table_content = _create_table_for_resource(resource_bundle.environment_object_list, "environmentUri", "/console/environments/", "Environment") + + msg_content += share_object_table_content + "

" + dataset_object_table_content + "

" + environment_object_table_content + "

" + + msg_footer = """ + In case your stack(s) or share object is still in unhealthy state after applying remedial measures, please contact data.all team.

+ Regards,
+ data.all Team + """ + + return msg_heading + msg_content + msg_footer + +def _create_table_for_resource(list_of_resources, uri_attr, link_uri, object_type): + table_heading = """ + + + Type + + + Link + + + """ + table_body = """""" + for resource in list_of_resources: + table_body += f""" + + + {object_type} + + + {os.environ.get('frontend_domain_url', '') + link_uri + resource.__getattribute__(uri_attr)} + + + <> + """ + table = f""" + + {table_heading} + {table_body} +
+ """ + + return table + + + +if __name__ == '__main__': + log.info("Starting weekly reminders task") + load_modules(modes={ImportMode.SHARES_TASK}) + ENVNAME = os.environ.get('envname', 'local') + ENGINE = get_engine(envname=ENVNAME) + send_reminder_email(engine=ENGINE) diff --git a/backend/dataall/modules/shares_base/db/share_object_repositories.py b/backend/dataall/modules/shares_base/db/share_object_repositories.py index 5d1f0ff8e..99b358c39 100644 --- a/backend/dataall/modules/shares_base/db/share_object_repositories.py +++ b/backend/dataall/modules/shares_base/db/share_object_repositories.py @@ -194,6 +194,21 @@ def get_share_data_items_by_type(session, share, share_type_model, share_type_ur query = query.filter(ShareObjectItem.healthStatus == healthStatus) return query.all() + @staticmethod + def get_share_object_with_health_status(session, health_status_list: List[str] = None): + query = ( + session.query(ShareObject) + .join( + ShareObjectItem, + ShareObjectItem.shareUri == ShareObject.shareUri + ).filter( + ShareObjectItem.healthStatus.in_(health_status_list) + ) + ) + + return query.all() + + @staticmethod def get_all_share_items_in_share(session, share_uri, status=None, healthStatus=None): query = ( From daa709e542c7d12eece4d0fe2f897053b40a320f Mon Sep 17 00:00:00 2001 From: Tejas Rajopadhye Date: Tue, 31 Dec 2024 14:37:38 -0600 Subject: [PATCH 04/26] Weekly notifications code and minor changes --- .../services/admin_notifications.py | 4 +- .../task/weekly_digest_reminder.py | 156 ------------- .../notifications/{task => tasks}/__init__.py | 0 .../tasks/weekly_digest_reminder.py | 216 ++++++++++++++++++ .../omics/tasks/omics_workflows_fetcher.py | 2 +- .../db/share_object_repositories.py | 5 + .../services/share_notification_service.py | 28 ++- .../shares_base/services/sharing_service.py | 15 +- .../shares_base/tasks/share_manager_task.py | 2 +- .../shares_base/tasks/share_reapplier_task.py | 27 ++- 10 files changed, 266 insertions(+), 189 deletions(-) delete mode 100644 backend/dataall/modules/notifications/task/weekly_digest_reminder.py rename backend/dataall/modules/notifications/{task => tasks}/__init__.py (100%) create mode 100644 backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py diff --git a/backend/dataall/modules/notifications/services/admin_notifications.py b/backend/dataall/modules/notifications/services/admin_notifications.py index 02b15e179..e2eb6cbec 100644 --- a/backend/dataall/modules/notifications/services/admin_notifications.py +++ b/backend/dataall/modules/notifications/services/admin_notifications.py @@ -10,11 +10,13 @@ def notify_admins_with_error_log(self, process_error: str, error_logs: List[str] subject = f'Data.all alert | Attention Required | Failure in : {process_name}' email_message = f""" - Following error occurred when , {process_error}

+ Following error occurred -

{process_error}

""" for error_log in error_logs: email_message += error_log + "

" + email_message += "Please check the logs in cloudwatch for more details" + SESEmailNotificationService.create_and_send_email_notifications( subject=subject, msg=email_message, diff --git a/backend/dataall/modules/notifications/task/weekly_digest_reminder.py b/backend/dataall/modules/notifications/task/weekly_digest_reminder.py deleted file mode 100644 index d36a7689b..000000000 --- a/backend/dataall/modules/notifications/task/weekly_digest_reminder.py +++ /dev/null @@ -1,156 +0,0 @@ -import logging -import os -from typing import List, Dict - -from dataall.base.db import get_engine -from dataall.base.loader import load_modules, ImportMode -from dataall.core.environment.db.environment_models import Environment -from dataall.core.stacks.api.enums import StackStatus -from dataall.core.stacks.db.stack_repositories import StackRepository -from dataall.modules.datasets_base.db.dataset_models import DatasetBase -from dataall.modules.notifications.services.admin_notifications import AdminNotificationService -from dataall.modules.notifications.services.ses_email_notification_service import SESEmailNotificationService -from dataall.modules.shares_base.db.share_object_models import ShareObject -from dataall.modules.shares_base.db.share_object_repositories import ShareObjectRepository -from dataall.modules.shares_base.services.shares_enums import ShareItemHealthStatus - -log = logging.getLogger(__name__) - - -class ResourceBundle: - def __init__(self): - self.share_object_list = [] - self.dataset_object_list = [] - self.environment_object_list = [] -def send_reminder_email(engine): - task_exceptions = [] - try: - with engine.scoped_session() as session: - # Get all the shares which are in unhealthy state - - # Todo : Check if distinct needed for the shareobject repository - unhealthy_share_objects: List[ShareObject] = ShareObjectRepository.get_share_object_with_health_status(session=session, health_status_list=[ShareItemHealthStatus.Unhealthy.value]) - - # Get all the dataset which are in unhealthy state - all_datasets: List[DatasetBase] = session.query(DatasetBase).all() - unhealthy_datasets: List[DatasetBase] = [] - unhealthy_stack_status: List[StackStatus] = [ - StackStatus.CREATE_FAILED.value, - StackStatus.DELETE_FAILED.value, - StackStatus.UPDATE_FAILED.value, - StackStatus.UPDATE_ROLLBACK_FAILED.value, - StackStatus.ROLLBACK_FAILED.value - ] - for dataset in all_datasets: - if StackRepository.find_stack_by_target_uri(session=session, target_uri=dataset.datasetUri, statuses=unhealthy_stack_status) is not None: - unhealthy_datasets.append(dataset) - - # Get all the environments which are in unhealthy state - all_environments: List[Environment] = session.query(Environment).all() - unhealthy_environments: List[Environment] = [] - for environment in all_environments: - if StackRepository.find_stack_by_target_uri(session=session, target_uri=environment.environmentUri, statuses=unhealthy_stack_status) is not None: - unhealthy_environments.append(environment) - - # {team: ResourceBundle} - group_name_to_resource_map: Dict[str, ResourceBundle] = {} - def _map_teams_to_resources(list_of_resources, group_attr, resource_type): - for resource in list_of_resources: - group_name = resource.__getattribute__(group_attr) - if group_name not in group_name_to_resource_map: - resource_bundle = ResourceBundle() - resource_bundle.__getattribute__(resource_type).append(resource) - group_name_to_resource_map[group_name] = resource_bundle - else: - resource_bundle = group_name_to_resource_map.get(group_name) - resource_bundle.__getattribute__(resource_type).append(resource) - - _map_teams_to_resources(list_of_resources=unhealthy_share_objects, group_attr="groupUri", resource_type="share_object_list") - _map_teams_to_resources(list_of_resources=unhealthy_environments, group_attr="SamlGroupName", resource_type="environment_object_list") - _map_teams_to_resources(list_of_resources=unhealthy_datasets, group_attr="SamlAdminGroupName", resource_type="dataset_object_list") - - for group, resource_bundle in group_name_to_resource_map.items(): - email_body = _construct_email_body(resource_bundle) - subject = 'Attention Required | Data.all weekly digest' - try: - SESEmailNotificationService.create_and_send_email_notifications(subject=subject, msg=email_body, recipient_groups_list=[group]) - except Exception as e: - log.error(f"Error occurred in sending email while weekly reminder task due to: {e}") - task_exceptions.append(f"Error occurred in sending email while weekly reminder task due to: {e}") - except Exception as e: - log.error(f"Error occurred while running the weekly reminder task: {e}") - task_exceptions.append(f"Error occurred while running the weekly reminder task: {e}") - finally: - if len(task_exceptions) > 0: - AdminNotificationService().notify_admins_with_error_log( - process_error="Error occured while running the weekly reminder task", - error_logs=task_exceptions, - process_name="Weekly reminder task" - ) - - -def _construct_email_body(resource_bundle: ResourceBundle): - msg_heading = """ - Dear Team,
- You have following data.all resources in unhealthy state. Please click on the links provided to get to the affected resource. - Please correct affected resources ASAP.

- - For shares which are in unhealthy state, you can re-apply share by clicking on the "Reapply share" button
- For environments and datasets which are in unhealthy state, you can go to the AWS account and check the stack associated with that environment and check the root cause of the stack. Once you address the root cause issue, you can click on "Update Stack" on the Stack Page.
- """ - msg_content = """""" - share_object_table_content = _create_table_for_resource(resource_bundle.share_object_list, "shareUri", "/console/shares/", "Share Object") - dataset_object_table_content = _create_table_for_resource(resource_bundle.dataset_object_list, "datasetUri", "/console/s3-datasets/", "Dataset") - environment_object_table_content = _create_table_for_resource(resource_bundle.environment_object_list, "environmentUri", "/console/environments/", "Environment") - - msg_content += share_object_table_content + "

" + dataset_object_table_content + "

" + environment_object_table_content + "

" - - msg_footer = """ - In case your stack(s) or share object is still in unhealthy state after applying remedial measures, please contact data.all team.

- Regards,
- data.all Team - """ - - return msg_heading + msg_content + msg_footer - -def _create_table_for_resource(list_of_resources, uri_attr, link_uri, object_type): - table_heading = """ - - - Type - - - Link - - - """ - table_body = """""" - for resource in list_of_resources: - table_body += f""" - - - {object_type} - - - {os.environ.get('frontend_domain_url', '') + link_uri + resource.__getattribute__(uri_attr)} - - - <> - """ - table = f""" - - {table_heading} - {table_body} -
- """ - - return table - - - -if __name__ == '__main__': - log.info("Starting weekly reminders task") - load_modules(modes={ImportMode.SHARES_TASK}) - ENVNAME = os.environ.get('envname', 'local') - ENGINE = get_engine(envname=ENVNAME) - send_reminder_email(engine=ENGINE) diff --git a/backend/dataall/modules/notifications/task/__init__.py b/backend/dataall/modules/notifications/tasks/__init__.py similarity index 100% rename from backend/dataall/modules/notifications/task/__init__.py rename to backend/dataall/modules/notifications/tasks/__init__.py diff --git a/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py b/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py new file mode 100644 index 000000000..13450a6c0 --- /dev/null +++ b/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py @@ -0,0 +1,216 @@ +import logging +import os +from typing import List, Dict, Any + +from dataall.base.db import get_engine +from dataall.base.loader import load_modules, ImportMode +from dataall.core.environment.db.environment_models import Environment +from dataall.core.stacks.api.enums import StackStatus +from dataall.core.stacks.db.stack_repositories import StackRepository +from dataall.modules.datasets_base.db.dataset_models import DatasetBase +from dataall.modules.datasets_base.db.dataset_repositories import DatasetBaseRepository +from dataall.modules.notifications.services.admin_notifications import AdminNotificationService +from dataall.modules.notifications.services.ses_email_notification_service import SESEmailNotificationService +from dataall.modules.shares_base.db.share_object_models import ShareObject +from dataall.modules.shares_base.db.share_object_repositories import ShareObjectRepository +from dataall.modules.shares_base.services.shares_enums import ShareItemHealthStatus + +log = logging.getLogger(__name__) + +class NotificationResource: + def __init__(self, resource, resource_type: str, resource_status: str, receivers: List[str] = None ): + self.resource = resource + self.resource_type = resource_type + self.resource_status = resource_status + self.receivers_list = set(receivers) + + +class NotificationResourceBundle: + def __init__(self): + self.share_object_notifications: List[NotificationResource] = [] + self.dataset_object_notifications: List[NotificationResource] = [] + self.environment_object_notifications: List[NotificationResource] = [] + + +def _get_pending_share_notifications(session): + pending_shares = ShareObjectRepository.get_shares_with_statuses(session=session, status_list=['Submitted']) + share_dataset_map: Dict[ShareObject, DatasetBase] = { + share: DatasetBaseRepository.get_dataset_by_uri(session=session, dataset_uri=share.datasetUri) for share in + pending_shares} + return [ + NotificationResource( + resource=share, + resource_type='Share Object', + resource_status=f'{share.status} - Pending Approval', + receivers=[share_dataset_map[share].SamlAdminGroupName, share_dataset_map[share].stewards]) + for share in share_dataset_map + ] + + +def _get_unhealthy_share_notification(session): + unhealthy_share_objects: List[ShareObject] = ShareObjectRepository.get_share_object_with_health_status( + session=session, health_status_list=[ShareItemHealthStatus.Unhealthy.value]) + return [ + NotificationResource(resource=share, resource_type='Share_object', resource_status='Unhealthy', + receivers=[share.groupUri]) for share in unhealthy_share_objects] + +def _get_unhealthy_stack_by_type(session, target_uri: str, target_type: Any): + unhealthy_stack_status: List[StackStatus] = [ + StackStatus.CREATE_FAILED.value, + StackStatus.DELETE_FAILED.value, + StackStatus.UPDATE_FAILED.value, + StackStatus.UPDATE_ROLLBACK_FAILED.value, + StackStatus.ROLLBACK_FAILED.value + ] + resource_objects = session.query(target_type).all() + unhealthy_datasets_notification_resources: List[NotificationResource] = [] + + for resource in resource_objects: + stack = StackRepository.find_stack_by_target_uri(session=session, target_uri=resource.__getattribute__(target_uri), + statuses=unhealthy_stack_status) + if stack is not None: + notification_resource = NotificationResource(resource=resource, resource_type=target_type.__name__, resource_status=stack.status, receivers=_get_receivers_for_stack(resource=resource, target_type=target_type)) + unhealthy_datasets_notification_resources.append(notification_resource) + + return unhealthy_datasets_notification_resources + +def _get_receivers_for_stack(resource, target_type): + if target_type.__name__ == 'Dataset': + return [resource.SamlAdminGroupName, resource.stewards] + if target_type.__name__ == 'Environment': + return [resource.SamlGroupName] + +def _map_groups_to_resource_bundles(list_of_notifications: List[NotificationResource], resource_bundle_type: str): + for notification in list_of_notifications: + # Get all the receivers groups + notification_receiver_groups = notification.receivers_list + for receiver_group_name in notification_receiver_groups: + if receiver_group_name in group_name_to_resource_map: + resource_bundle = group_name_to_resource_map.get(receiver_group_name) + resource_bundle.__getattribute__(resource_bundle_type).append(notification) + else: + resource_bundle = NotificationResourceBundle() + resource_bundle.__getattribute__(resource_bundle_type).append(notification) + group_name_to_resource_map[receiver_group_name] = resource_bundle + +def send_reminder_email(engine): + task_exceptions = [] + try: + with engine.scoped_session() as session: + # Get all shares in submitted state + pending_share_notification_resources = _get_pending_share_notifications(session=session) + + # Todo : Check if distinct needed for the share object repository + unhealthy_share_objects_notification_resources = _get_unhealthy_share_notification(session=session) + + # Get all the dataset which are in unhealthy state + unhealthy_datasets_notification_resources = _get_unhealthy_stack_by_type(session=session, target_uri='datasetUri', target_type=DatasetBase) + + # Get all the environments which are in unhealthy state + unhealthy_environment_notification_resources = _get_unhealthy_stack_by_type(session=session, target_uri='environmentUri', target_type=Environment) + + _map_groups_to_resource_bundles(list_of_notifications=pending_share_notification_resources, resource_bundle_type="share_object_notifications") + _map_groups_to_resource_bundles(list_of_notifications=unhealthy_share_objects_notification_resources, resource_bundle_type="share_object_notifications") + _map_groups_to_resource_bundles(list_of_notifications=unhealthy_datasets_notification_resources, resource_bundle_type="dataset_object_notifications") + _map_groups_to_resource_bundles(list_of_notifications=unhealthy_environment_notification_resources, resource_bundle_type="environment_object_notifications") + + for group, resource_bundle in group_name_to_resource_map.items(): + email_body = _construct_email_body(resource_bundle) + subject = 'Attention Required | Data.all weekly digest' + try: + SESEmailNotificationService.create_and_send_email_notifications(subject=subject, msg=email_body, recipient_groups_list=[group]) + except Exception as e: + log.error(f"Error occurred in sending email while weekly reminder task due to: {e}") + task_exceptions.append(f"Error occurred in sending email while weekly reminder task due to: {e}") + except Exception as e: + log.error(f"Error occurred while running the weekly reminder task: {e}") + task_exceptions.append(f"Error occurred while running the weekly reminder task: {e}") + finally: + if len(task_exceptions) > 0: + AdminNotificationService().notify_admins_with_error_log( + process_error="Error occurred while running the weekly reminder task", + error_logs=task_exceptions, + process_name="Weekly reminder task" + ) + + + +def _construct_email_body(resource_bundle: NotificationResourceBundle): + msg_heading = """ + Dear Team,
+ You have following data.all resources in unhealthy state. Please click on the links provided to get to the affected resource. + Please correct affected resources ASAP.

+ + For shares which are in unhealthy state, you can re-apply share by clicking on the "Reapply share" button
+ For environments and datasets which are in unhealthy state, you can go to the AWS account and check the stack associated with that environment and check the root cause of the stack. Once you address the root cause issue, you can click on "Update Stack" on the Stack Page.
+ """ + msg_content = """""" + share_object_table_content = _create_table_for_resource(resource_bundle.share_object_notifications, "shareUri", + "/console/shares/") if len(resource_bundle.share_object_notifications) > 0 else "" + dataset_object_table_content = _create_table_for_resource(resource_bundle.dataset_object_notifications, + "datasetUri", + "/console/s3-datasets/") if len(resource_bundle.dataset_object_notifications) > 0 else "" + environment_object_table_content = _create_table_for_resource(resource_bundle.environment_object_notifications, + "environmentUri", + "/console/environments/") if len(resource_bundle.environment_object_notifications) > 0 else "" + + msg_content += share_object_table_content + dataset_object_table_content + environment_object_table_content + "

" + + msg_footer = """ + In case your stack(s) or share object is still in unhealthy state after applying remedial measures, please contact data.all team.

+ Regards,
+ data.all Team + """ + + return msg_heading + msg_content + msg_footer + +def _create_table_for_resource(list_of_resources, uri_attr, link_uri): + table_heading = """ + + + Type + + + Link + + + Status + + + """ + table_body = """""" + for resource in list_of_resources: + table_body += f""" + + + {resource.resource_type} + + + {os.environ.get('frontend_domain_url', '') + link_uri + resource.resource.__getattribute__(uri_attr)} + + + {resource.resource_status} + + + <> + """ + table = f""" + + {table_heading} + {table_body} +
+
+
+ """ + + return table + + + +if __name__ == '__main__': + log.info("Starting weekly reminders task") + load_modules(modes={ImportMode.SHARES_TASK}) + ENVNAME = os.environ.get('envname', 'dkrcompose') + ENGINE = get_engine(envname=ENVNAME) + group_name_to_resource_map: Dict[str, NotificationResourceBundle] = {} + send_reminder_email(engine=ENGINE) diff --git a/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py b/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py index 40c52acd4..b648e8573 100644 --- a/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py +++ b/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py @@ -63,7 +63,7 @@ def fetch_omics_workflows(engine): except Exception as e: log.error(f'Error occured while processing omics workflow task due to: {e}') AdminNotificationService().notify_admins_with_error_log( - process_error='Error occured while processing omics workflow task', + process_error='Error occurred while processing omics workflow task', error_logs=[str(e)], process_name='Omics Workflow' ) diff --git a/backend/dataall/modules/shares_base/db/share_object_repositories.py b/backend/dataall/modules/shares_base/db/share_object_repositories.py index 99b358c39..8d2da4e47 100644 --- a/backend/dataall/modules/shares_base/db/share_object_repositories.py +++ b/backend/dataall/modules/shares_base/db/share_object_repositories.py @@ -490,6 +490,11 @@ def fetch_submitted_shares_with_notifications(session): ) return pending_shares + @staticmethod + def get_shares_with_statuses(session, status_list: List[str]): + query = session.query(ShareObject).filter(ShareObject.status.in_(status_list)) + return query.all() + @staticmethod def get_all_active_shares_with_expiration(session): return ( diff --git a/backend/dataall/modules/shares_base/services/share_notification_service.py b/backend/dataall/modules/shares_base/services/share_notification_service.py index d0e93b755..896deafd4 100644 --- a/backend/dataall/modules/shares_base/services/share_notification_service.py +++ b/backend/dataall/modules/shares_base/services/share_notification_service.py @@ -271,11 +271,15 @@ def notify_share_object_failed(self): f'to take action or view more details' ) msg = ( - f'Share request made for dataset: {self.dataset.label} with requestor principal: {self.share.principalRoleName} failed.

' + f'Share request made for dataset: {self.dataset.label} with requestor principal: {self.share.principalRoleName} failed.

' f'You can delete and resubmit the failed items in the share. If your share item still remains in the Share_Failed state then please get in touch with data.all admins.' ) + msg_footer = """ + Regards,
+ data.all team + """ subject = f'Data.all | Attention Required | Share failed for {self.dataset.label}' - email_notification_msg = msg + share_link_text + email_notification_msg = msg + share_link_text + "

" + msg_footer notifications = self.register_notifications( notification_type=DataSharingNotificationType.SHARE_OBJECT_FAILED.value, msg=msg @@ -296,12 +300,16 @@ def notify_share_object_items_unhealthy(self): ) msg = ( f'Hello Team,
' - f'Your share with share uri: {self.share.shareUri} has one or more unhealthy share items.

' - f'Once you visit your share link you can click on the Reapply button and this should correct your share and get it into an healthy state. If this doesn\'t get your share in healthy state then please get in touch with data.all admins for your share.' - f'
If you are using any terraform / cloudformation or any other IaC to also manage your bucket policy, kms policy and requestor IAM role, please make them aware of the data.all changes so that they don\'t wipe off data.all related policies' + f'Your share with share uri: {self.share.shareUri} has one or more unhealthy share items.

' + f'Once you visit your share link you can click on the Reapply button and this should correct your share and get it into an healthy state. If this doesn\'t get your share in healthy state then please get in touch with data.all admins for your share.' + f'

Please note: If you are using any terraform / cloudformation or any other IaC to also manage your bucket policy, kms policy and requestor IAM role, please make them aware of the data.all changes so that they don\'t wipe off data.all related policies' ) + msg_footer = """ + Regards,
+ data.all team + """ subject = f'Data.all | Attention Required | Share for {self.dataset.label} dataset in unhealthy state' - email_notification_msg = msg + share_link_text + email_notification_msg = msg + share_link_text + "

" + msg_footer notifications = self.register_notifications( notification_type=DataSharingNotificationType.SHARE_OBJECT_UNHEALTHY.value, msg=msg, to_recipients=[self.share.groupUri] @@ -323,10 +331,14 @@ def notify_share_object_items_healthy(self): ) msg = ( f'Hello Team,
' - f'Your share with share uri: {self.share.shareUri} is in healthy state
' + f'Your share with share uri: {self.share.shareUri} is now in healthy state after reapplying the share.
' ) + msg_footer = """ + Regards,
+ data.all team + """ subject = f'Data.all | Share for {self.dataset.label} dataset now in healthy state' - email_notification_msg = msg + share_link_text + email_notification_msg = msg + share_link_text + "

" + msg_footer notifications = self.register_notifications( notification_type=DataSharingNotificationType.SHARE_OBJECT_HEALTHY.value, msg=msg, to_recipients=[self.share.groupUri] diff --git a/backend/dataall/modules/shares_base/services/sharing_service.py b/backend/dataall/modules/shares_base/services/sharing_service.py index dc1711a21..9dee9c7c3 100644 --- a/backend/dataall/modules/shares_base/services/sharing_service.py +++ b/backend/dataall/modules/shares_base/services/sharing_service.py @@ -364,9 +364,7 @@ def reapply_share(cls, engine: Engine, share_uri: str) -> bool: log.info(f'There are no items to reapply of type {type.value}') except Exception as e: log.error(f'Error occurred during share reapplying of {type.value}: {e}') - AdminNotificationService().notify_admins_with_error_log( - process_error=f'Error occurred during reapplying of share with uri: {share_data.share.shareUri} for processor type: {type.value} due to an unknown exception', - error_logs=[str(e)], process_name='Sharing Service') + code_exception_list.append(f'Error occurred during reapplying of share with uri: {share_data.share.shareUri} for processor type: {type.value} due to an unknown exception: {e}') if not reapply_successful: log.info( @@ -375,18 +373,19 @@ def reapply_share(cls, engine: Engine, share_uri: str) -> bool: share=share_data.share).notify_share_object_items_unhealthy() return reapply_successful - except ResourceLockTimeout as e: + except ResourceLockTimeout as timeout_exception: ShareStatusRepository.update_share_item_health_status_batch( session, share_uri, old_status=ShareItemHealthStatus.PendingReApply.value, new_status=ShareItemHealthStatus.Unhealthy.value, - message=str(e), + message=str(timeout_exception), ) - code_exception_list.append(str(e)) + code_exception_list.append(str(timeout_exception)) + return False except Exception as e: - log.exception('Error occurred during share approval') + log.exception(f'Error occurred during share reapply: {e}') code_exception_list.append(str(e)) return False @@ -394,7 +393,7 @@ def reapply_share(cls, engine: Engine, share_uri: str) -> bool: if len(code_exception_list) > 0: AdminNotificationService().notify_admins_with_error_log( process_error=f'Error occurred during reapplying of share with uri: {share_data.share.shareUri}', - error_logs=[str(e)], process_name='Sharing Service') + error_logs=code_exception_list, process_name='Sharing Service') @classmethod def cleanup_share( diff --git a/backend/dataall/modules/shares_base/tasks/share_manager_task.py b/backend/dataall/modules/shares_base/tasks/share_manager_task.py index 585b76397..d6c611214 100644 --- a/backend/dataall/modules/shares_base/tasks/share_manager_task.py +++ b/backend/dataall/modules/shares_base/tasks/share_manager_task.py @@ -27,7 +27,7 @@ except Exception as e: log.error(f'Sharing task failed due to: {e}') AdminNotificationService().notify_admins_with_error_log( - process_error='Error occurred while running Sharing task', + process_error=f'Error occurred while running sharing task for share with uri: {os.getenv("shareUri", "Share URI not available")}', error_logs=[str(e)], process_name='Sharing Service' ) diff --git a/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py b/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py index 3c337e04d..368a1732c 100644 --- a/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py +++ b/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py @@ -45,23 +45,22 @@ def _reapply_share_objects(cls, engine, session, share_objects: List[ShareObject processed_share_objects = [] task_exceptions = [] for share_object in share_objects: - log.info( - f'Re-applying Share Items for Share Object, Share URI: {share_object.shareUri} ) with Requestor: {share_object.principalId} on Target Dataset: {share_object.datasetUri}' - ) - processed_share_objects.append(share_object.shareUri) - ShareStatusRepository.update_share_item_health_status_batch( - session=session, - share_uri=share_object.shareUri, - old_status=ShareItemHealthStatus.Unhealthy.value, - new_status=ShareItemHealthStatus.PendingReApply.value, - ) try: + log.info( + f'Re-applying Share Items for Share Object, Share URI: {share_object.shareUri} ) with Requestor: {share_object.principalId} on Target Dataset: {share_object.datasetUri}' + ) + processed_share_objects.append(share_object.shareUri) + ShareStatusRepository.update_share_item_health_status_batch( + session=session, + share_uri=share_object.shareUri, + old_status=ShareItemHealthStatus.Unhealthy.value, + new_status=ShareItemHealthStatus.PendingReApply.value, + ) SharingService.reapply_share(engine, share_uri=share_object.shareUri) except Exception as e: - log.error( - f'Error occurred while reapplying share for share with uri:{share_object.shareUri} due to: {e}') - task_exceptions.append( - f'Error occurred while reapplying share for share with uri:{share_object.shareUri} due to: {e}') + error_formatted = f'Error occurred while reapplying share in the reapplie task for share with uri:{share_object.shareUri} due to: {e}' + log.error(error_formatted) + task_exceptions.append(error_formatted) return (processed_share_objects, task_exceptions) @classmethod def process_reapply_shares(cls, engine): From 84bf9febc0b3f88d49482ccbca9e541d1dbb6014 Mon Sep 17 00:00:00 2001 From: Tejas Rajopadhye Date: Thu, 2 Jan 2025 15:06:53 -0600 Subject: [PATCH 05/26] Glue Table change notifications --- .../tasks/weekly_digest_reminder.py | 12 ++-- .../db/dataset_table_repositories.py | 5 ++ .../services/dataset_table_notifications.py | 67 +++++++++++++++++++ .../services/dataset_table_service.py | 7 +- .../s3_datasets/tasks/tables_syncer.py | 12 +++- .../db/share_object_repositories.py | 13 +++- 6 files changed, 104 insertions(+), 12 deletions(-) create mode 100644 backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py diff --git a/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py b/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py index 13450a6c0..a84ec592d 100644 --- a/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py +++ b/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py @@ -137,12 +137,13 @@ def send_reminder_email(engine): def _construct_email_body(resource_bundle: NotificationResourceBundle): msg_heading = """ - Dear Team,
- You have following data.all resources in unhealthy state. Please click on the links provided to get to the affected resource. - Please correct affected resources ASAP.

+ Dear Team,

+ This email contains data.al resources where you need to take some actions. For resources which are in unhealthy state we request you to take actions ASAP so as to minimize any disruptions.

+ + Helpful Tips:

For shares which are in unhealthy state, you can re-apply share by clicking on the "Reapply share" button
- For environments and datasets which are in unhealthy state, you can go to the AWS account and check the stack associated with that environment and check the root cause of the stack. Once you address the root cause issue, you can click on "Update Stack" on the Stack Page.
+ For environments and datasets which are in unhealthy state, you can go to the AWS account and check the stack associated with that environment/dataset and check the root cause of the stack. Once you address the root cause issue, you can click on "Update Stack" on the Stack Page.


""" msg_content = """""" share_object_table_content = _create_table_for_resource(resource_bundle.share_object_notifications, "shareUri", @@ -157,7 +158,7 @@ def _construct_email_body(resource_bundle: NotificationResourceBundle): msg_content += share_object_table_content + dataset_object_table_content + environment_object_table_content + "

" msg_footer = """ - In case your stack(s) or share object is still in unhealthy state after applying remedial measures, please contact data.all team.

+ In case your stack(s) or share object(s) are still in unhealthy state after applying remedial measures, please contact data.all team.

Regards,
data.all Team """ @@ -192,7 +193,6 @@ def _create_table_for_resource(list_of_resources, uri_attr, link_uri): {resource.resource_status} - <> """ table = f""" diff --git a/backend/dataall/modules/s3_datasets/db/dataset_table_repositories.py b/backend/dataall/modules/s3_datasets/db/dataset_table_repositories.py index 6fd86610b..f46c19567 100644 --- a/backend/dataall/modules/s3_datasets/db/dataset_table_repositories.py +++ b/backend/dataall/modules/s3_datasets/db/dataset_table_repositories.py @@ -1,5 +1,6 @@ import logging from datetime import datetime +from typing import Dict from sqlalchemy.sql import and_ @@ -62,18 +63,22 @@ def get_dataset_table_by_uri(session, table_uri): @staticmethod def update_existing_tables_status(existing_tables, glue_tables): + updated_tables_status_map: Dict[str: str] = {} for existing_table in existing_tables: if existing_table.GlueTableName not in [t['Name'] for t in glue_tables]: existing_table.LastGlueTableStatus = 'Deleted' + updated_tables_status_map[existing_table.GlueTableName] = 'Deleted' logger.info(f'Existing Table {existing_table.GlueTableName} status set to Deleted from Glue') elif ( existing_table.GlueTableName in [t['Name'] for t in glue_tables] and existing_table.LastGlueTableStatus == 'Deleted' ): existing_table.LastGlueTableStatus = 'InSync' + updated_tables_status_map[existing_table.GlueTableName] = 'InSync: Updated to InSync from Deleted' logger.info( f'Updating Existing Table {existing_table.GlueTableName} status set to InSync from Deleted after found in Glue' ) + return updated_tables_status_map @staticmethod def find_all_active_tables(session, dataset_uri): diff --git a/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py b/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py new file mode 100644 index 000000000..9ba608534 --- /dev/null +++ b/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py @@ -0,0 +1,67 @@ +from typing import Dict +import logging +from dataall.modules.notifications.services.ses_email_notification_service import SESEmailNotificationService +from dataall.modules.s3_datasets.db.dataset_models import S3Dataset +from dataall.modules.shares_base.db.share_object_repositories import ShareObjectRepository + +log = logging.getLogger(__name__) + +class DatasetTableNotifications: + + def __init__(self, dataset: S3Dataset): + self.dataset: S3Dataset = dataset + + def notify_dataset_table_updates(self, session, table_status_map: Dict[str, str]): + subject = f"Data.all Update | Glue tables updated for dataset: {self.dataset.name}" + table_content = self._construct_html_table_from_glue_status_map(table_status_map) + msg_body = f""" + Dear Team,

+ Following tables have been updated for dataset: {self.dataset.name}

+ + {table_content}

+ """ + msg_footer = """ + Regards,
+ data.all team + """ + msg = msg_body + msg_footer + + SESEmailNotificationService.create_and_send_email_notifications(subject=subject, msg=msg, recipient_groups_list=[self.dataset.SamlAdminGroupName, self.dataset.stewards]) + + # Find all the shares made on this dataset + shares = ShareObjectRepository.find_dataset_shares(session=session, dataset_uri=self.dataset.datasetUri, share_statues=['Processed']) + if shares: + subject = f"Alert: Data.all Update | Glue table updated for dataset: {self.dataset.name}" + for share in shares: + msg_footer = f""" + You have an active share with uri: {share.shareUri}. If there is any table requested by you on the dataset: {self.dataset.name} for that share it may have been affected in case if the tables are deleted.
+ Note: Please check with the dataset owner if there is any missing table from your share - as it is likely deleted from the dataset.
If the table exists in the dataset and is successfully shared but you are unable to access the table, then please reach out to the data.all team

+ Regards, + data.all team + """ + msg = msg_body + msg_footer + SESEmailNotificationService.create_and_send_email_notifications(subject=subject, msg=msg, recipient_groups_list=[share.groupUri]) + + @classmethod + def _construct_html_table_from_glue_status_map(cls, table_status_map): + table_heading = """ + + + + + """ + table_body = """""" + for table_name, table_status in table_status_map.items(): + table_body += f""" + + + + + """ + table_content = f""" +
Glue Table NameStatus
{table_name}{table_status}
+ {table_heading} + {table_body} +
+ """ + return table_content diff --git a/backend/dataall/modules/s3_datasets/services/dataset_table_service.py b/backend/dataall/modules/s3_datasets/services/dataset_table_service.py index 21336c58b..bf95beda7 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_table_service.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_table_service.py @@ -136,18 +136,21 @@ def sync_tables_for_dataset(cls, uri): @staticmethod def sync_existing_tables(session, uri, glue_tables=None): dataset: S3Dataset = DatasetRepository.get_dataset_by_uri(session, uri) + updated_table_status_map = {} if dataset: existing_tables = DatasetTableRepository.find_dataset_tables(session, uri) existing_table_names = [e.GlueTableName for e in existing_tables] existing_dataset_tables_map = {t.GlueTableName: t for t in existing_tables} - DatasetTableRepository.update_existing_tables_status(existing_tables, glue_tables) + updated_table_status_map = DatasetTableRepository.update_existing_tables_status(existing_tables, glue_tables) log.info(f'existing_tables={glue_tables}') + for table in glue_tables: if table['Name'] not in existing_table_names: log.info(f'Storing new table: {table} for dataset db {dataset.GlueDatabaseName}') updated_table = DatasetTableRepository.create_synced_table(session, dataset, table) DatasetTableService._attach_dataset_table_permission(session, dataset, updated_table.tableUri) + updated_table_status_map[updated_table.GlueTableName] = 'Newly Added' else: log.info(f'Updating table: {table} for dataset db {dataset.GlueDatabaseName}') updated_table: DatasetTable = existing_dataset_tables_map.get(table['Name']) @@ -155,7 +158,7 @@ def sync_existing_tables(session, uri, glue_tables=None): DatasetTableRepository.sync_table_columns(session, updated_table, table) - return True + return updated_table_status_map @staticmethod def _attach_dataset_table_permission(session, dataset: S3Dataset, table_uri): diff --git a/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py b/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py index fea3edcde..d95c71a5d 100644 --- a/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py +++ b/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py @@ -10,6 +10,7 @@ from dataall.modules.notifications.services.admin_notifications import AdminNotificationService from dataall.modules.s3_datasets.aws.glue_dataset_client import DatasetCrawler from dataall.modules.s3_datasets.aws.lf_table_client import LakeFormationTableClient +from dataall.modules.s3_datasets.services.dataset_table_notifications import DatasetTableNotifications from dataall.modules.s3_datasets.services.dataset_table_service import DatasetTableService from dataall.modules.s3_datasets.db.dataset_repositories import DatasetRepository from dataall.modules.s3_datasets.db.dataset_models import DatasetTable, S3Dataset @@ -51,7 +52,16 @@ def sync_tables(engine): log.info(f'Found {len(tables)} tables on Glue database {dataset.GlueDatabaseName}') - DatasetTableService.sync_existing_tables(session, uri=dataset.datasetUri, glue_tables=tables) + table_status_map = DatasetTableService.sync_existing_tables(session, uri=dataset.datasetUri, glue_tables=tables) + + # Send notifications + if table_status_map: + try: + DatasetTableNotifications(dataset=dataset).notify_dataset_table_updates(session=session, table_status_map=table_status_map) + except Exception as e: + error_log = f"Error occurred while sending email to notify about changes to the glue tables for dataset with uri: {dataset.datasetUri} due to: {e}" + task_exceptions.append(error_log) + tables = session.query(DatasetTable).filter(DatasetTable.datasetUri == dataset.datasetUri).all() diff --git a/backend/dataall/modules/shares_base/db/share_object_repositories.py b/backend/dataall/modules/shares_base/db/share_object_repositories.py index 8d2da4e47..b04b9751d 100644 --- a/backend/dataall/modules/shares_base/db/share_object_repositories.py +++ b/backend/dataall/modules/shares_base/db/share_object_repositories.py @@ -42,11 +42,18 @@ def find_share(session, dataset: DatasetBase, env, principal_id, principal_role_ ) @staticmethod - def find_dataset_shares(session, dataset_uri): - return session.query(ShareObject).filter(ShareObject.datasetUri == dataset_uri).all() + def find_dataset_shares(session, dataset_uri: str, share_statues: List[str] = None): + query = session.query(ShareObject).filter(ShareObject.datasetUri == dataset_uri) + + if share_statues: + query = query.filter(ShareObject.status.in_(share_statues)) + + return query.all() @staticmethod - def find_share_by_dataset_attributes(session, dataset_uri, dataset_owner, groups=[]): + def find_share_by_dataset_attributes(session, dataset_uri, dataset_owner, groups = None): + if groups is None: + groups = [] share: ShareObject = ( session.query(ShareObject) .filter(ShareObject.datasetUri == dataset_uri) From 8c861fff2f5c9e50e9756a3982f1726e1715dddd Mon Sep 17 00:00:00 2001 From: Tejas Rajopadhye Date: Thu, 2 Jan 2025 18:26:35 -0600 Subject: [PATCH 06/26] Additional comments and logs plus weekly reminder task --- .../catalog/tasks/catalog_indexer_task.py | 4 +- .../services/admin_notifications.py | 12 ++- .../tasks/weekly_digest_reminder.py | 99 ++++++++++++++----- .../omics/tasks/omics_workflows_fetcher.py | 2 +- .../services/dataset_table_notifications.py | 56 +++++++---- .../services/dataset_table_service.py | 1 + .../s3_datasets/tasks/tables_syncer.py | 2 +- .../shares_base/services/sharing_service.py | 2 - .../tasks/persistent_email_reminders_task.py | 14 ++- .../shares_base/tasks/share_reapplier_task.py | 1 - deploy/stacks/container.py | 29 ++++++ 11 files changed, 165 insertions(+), 57 deletions(-) diff --git a/backend/dataall/modules/catalog/tasks/catalog_indexer_task.py b/backend/dataall/modules/catalog/tasks/catalog_indexer_task.py index 4183c4bc5..222cb1954 100644 --- a/backend/dataall/modules/catalog/tasks/catalog_indexer_task.py +++ b/backend/dataall/modules/catalog/tasks/catalog_indexer_task.py @@ -32,10 +32,12 @@ def index_objects(cls, engine, with_deletes='False'): CatalogIndexerTask._delete_old_objects(indexed_object_uris) return len(indexed_object_uris) except Exception as e: + error_log = f'Error occurred while indexing objects during the cataloging task. Exception: {e}' + log.error(error_log) AlarmService().trigger_catalog_indexing_failure_alarm(error=str(e)) AdminNotificationService().notify_admins_with_error_log( process_error='Exception occurred during cataloging task', - error_logs=[str(e)], + error_logs=[error_log], process_name='Catalog Task' ) raise e diff --git a/backend/dataall/modules/notifications/services/admin_notifications.py b/backend/dataall/modules/notifications/services/admin_notifications.py index e2eb6cbec..8990a27b4 100644 --- a/backend/dataall/modules/notifications/services/admin_notifications.py +++ b/backend/dataall/modules/notifications/services/admin_notifications.py @@ -6,7 +6,15 @@ class AdminNotificationService: admin_group = 'DAAdministrators' - def notify_admins_with_error_log(self, process_error: str, error_logs: List[str], process_name:str = ''): + """ + Send email notifications to Admin Group i.e. DAAdministrators in data.all + Args - + 1. process_error - string describing in short the error / exception details + 2. error_logs - List of all the exception error logs + 3. process_name - Code where the exception occurred. Example, inside an ECS task like cataloging task, etc or inside a graphql service + """ + @classmethod + def notify_admins_with_error_log(cls, process_error: str, error_logs: List[str], process_name:str = ''): subject = f'Data.all alert | Attention Required | Failure in : {process_name}' email_message = f""" @@ -20,5 +28,5 @@ def notify_admins_with_error_log(self, process_error: str, error_logs: List[str] SESEmailNotificationService.create_and_send_email_notifications( subject=subject, msg=email_message, - recipient_groups_list=[AdminNotificationService.admin_group] + recipient_groups_list=[cls.admin_group] ) \ No newline at end of file diff --git a/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py b/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py index a84ec592d..d40d597fe 100644 --- a/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py +++ b/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py @@ -17,14 +17,24 @@ log = logging.getLogger(__name__) +""" +A container for holding the resource ( e.g. dataset, share object, environment, etc ), receivers and health status ( resource_status ) for sending notifications +""" + + class NotificationResource: - def __init__(self, resource, resource_type: str, resource_status: str, receivers: List[str] = None ): + def __init__(self, resource, resource_type: str, resource_status: str, receivers: List[str] = None): self.resource = resource self.resource_type = resource_type self.resource_status = resource_status self.receivers_list = set(receivers) +""" +Notification Bundle - Contains list of notification events for different types of resources ( dataset, shares, environment ) +""" + + class NotificationResourceBundle: def __init__(self): self.share_object_notifications: List[NotificationResource] = [] @@ -34,6 +44,7 @@ def __init__(self): def _get_pending_share_notifications(session): pending_shares = ShareObjectRepository.get_shares_with_statuses(session=session, status_list=['Submitted']) + log.info(f"Found {len(pending_shares)} pending shares with share object status - Submitted") share_dataset_map: Dict[ShareObject, DatasetBase] = { share: DatasetBaseRepository.get_dataset_by_uri(session=session, dataset_uri=share.datasetUri) for share in pending_shares} @@ -43,17 +54,19 @@ def _get_pending_share_notifications(session): resource_type='Share Object', resource_status=f'{share.status} - Pending Approval', receivers=[share_dataset_map[share].SamlAdminGroupName, share_dataset_map[share].stewards]) - for share in share_dataset_map + for share in share_dataset_map ] def _get_unhealthy_share_notification(session): unhealthy_share_objects: List[ShareObject] = ShareObjectRepository.get_share_object_with_health_status( session=session, health_status_list=[ShareItemHealthStatus.Unhealthy.value]) + log.info(f"Found {len(unhealthy_share_objects)} unhealthy share objects") return [ NotificationResource(resource=share, resource_type='Share_object', resource_status='Unhealthy', receivers=[share.groupUri]) for share in unhealthy_share_objects] + def _get_unhealthy_stack_by_type(session, target_uri: str, target_type: Any): unhealthy_stack_status: List[StackStatus] = [ StackStatus.CREATE_FAILED.value, @@ -63,16 +76,24 @@ def _get_unhealthy_stack_by_type(session, target_uri: str, target_type: Any): StackStatus.ROLLBACK_FAILED.value ] resource_objects = session.query(target_type).all() - unhealthy_datasets_notification_resources: List[NotificationResource] = [] + unhealthy_stack_notification_resources: List[NotificationResource] = [] + log.info(f"Found {len(unhealthy_stack_notification_resources)} unhealthy {target_type}") + # Check if stack associated with these datasets / environment exists + # If yes, create a notification resource for resource in resource_objects: - stack = StackRepository.find_stack_by_target_uri(session=session, target_uri=resource.__getattribute__(target_uri), + stack = StackRepository.find_stack_by_target_uri(session=session, + target_uri=resource.__getattribute__(target_uri), statuses=unhealthy_stack_status) if stack is not None: - notification_resource = NotificationResource(resource=resource, resource_type=target_type.__name__, resource_status=stack.status, receivers=_get_receivers_for_stack(resource=resource, target_type=target_type)) - unhealthy_datasets_notification_resources.append(notification_resource) + notification_resource = NotificationResource(resource=resource, resource_type=target_type.__name__, + resource_status=stack.status, + receivers=_get_receivers_for_stack(resource=resource, + target_type=target_type)) + unhealthy_stack_notification_resources.append(notification_resource) + + return unhealthy_stack_notification_resources - return unhealthy_datasets_notification_resources def _get_receivers_for_stack(resource, target_type): if target_type.__name__ == 'Dataset': @@ -80,45 +101,66 @@ def _get_receivers_for_stack(resource, target_type): if target_type.__name__ == 'Environment': return [resource.SamlGroupName] +""" +Function to create a map of group name : resource bundle, where each resource bundle contains dataset, share and environment notification lists. +Iterated over all the notification ( NotificationResources ) and then segregate based on the dataset, shares & environment notifications and map the bundle to a team. +""" def _map_groups_to_resource_bundles(list_of_notifications: List[NotificationResource], resource_bundle_type: str): for notification in list_of_notifications: # Get all the receivers groups notification_receiver_groups = notification.receivers_list for receiver_group_name in notification_receiver_groups: - if receiver_group_name in group_name_to_resource_map: - resource_bundle = group_name_to_resource_map.get(receiver_group_name) + if receiver_group_name in group_name_to_resource_bundle_map: + resource_bundle = group_name_to_resource_bundle_map.get(receiver_group_name) resource_bundle.__getattribute__(resource_bundle_type).append(notification) else: resource_bundle = NotificationResourceBundle() resource_bundle.__getattribute__(resource_bundle_type).append(notification) - group_name_to_resource_map[receiver_group_name] = resource_bundle + group_name_to_resource_bundle_map[receiver_group_name] = resource_bundle + def send_reminder_email(engine): task_exceptions = [] + resources_type_tuple = () try: with engine.scoped_session() as session: # Get all shares in submitted state pending_share_notification_resources = _get_pending_share_notifications(session=session) - + resources_type_tuple.append((pending_share_notification_resources, "share_object_notifications")) # Todo : Check if distinct needed for the share object repository + # Get all shares in unhealthy state unhealthy_share_objects_notification_resources = _get_unhealthy_share_notification(session=session) - + resources_type_tuple.append((unhealthy_share_objects_notification_resources, "share_object_notifications")) # Get all the dataset which are in unhealthy state - unhealthy_datasets_notification_resources = _get_unhealthy_stack_by_type(session=session, target_uri='datasetUri', target_type=DatasetBase) - + unhealthy_datasets_notification_resources = _get_unhealthy_stack_by_type(session=session, + target_uri='datasetUri', + target_type=DatasetBase) + resources_type_tuple.append((unhealthy_share_objects_notification_resources, "dataset_object_notifications")) # Get all the environments which are in unhealthy state - unhealthy_environment_notification_resources = _get_unhealthy_stack_by_type(session=session, target_uri='environmentUri', target_type=Environment) + unhealthy_environment_notification_resources = _get_unhealthy_stack_by_type(session=session, + target_uri='environmentUri', + target_type=Environment) + resources_type_tuple.append( + (unhealthy_environment_notification_resources, "environment_object_notifications")) + + # For each notification resource ( i.e. share notification, dataset notification, etc ), + # function _map_groups_to_resource_bundles maps each team name : resource bundle + # Equivalent to calling + # _map_groups_to_resource_bundles(list_of_notifications=pending_share_notification_resources, + # resource_bundle_type="share_object_notifications") + # _map_groups_to_resource_bundles(list_of_notifications=unhealthy_share_objects_notification_resources, + # resource_bundle_type="share_object_notifications") .... - _map_groups_to_resource_bundles(list_of_notifications=pending_share_notification_resources, resource_bundle_type="share_object_notifications") - _map_groups_to_resource_bundles(list_of_notifications=unhealthy_share_objects_notification_resources, resource_bundle_type="share_object_notifications") - _map_groups_to_resource_bundles(list_of_notifications=unhealthy_datasets_notification_resources, resource_bundle_type="dataset_object_notifications") - _map_groups_to_resource_bundles(list_of_notifications=unhealthy_environment_notification_resources, resource_bundle_type="environment_object_notifications") + for notification_resources, resource_bundle_type in resources_type_tuple: + _map_groups_to_resource_bundles(list_of_notifications=notification_resources, resource_bundle_type=resource_bundle_type) - for group, resource_bundle in group_name_to_resource_map.items(): + for group, resource_bundle in group_name_to_resource_bundle_map.items(): email_body = _construct_email_body(resource_bundle) + log.debug(email_body) subject = 'Attention Required | Data.all weekly digest' try: - SESEmailNotificationService.create_and_send_email_notifications(subject=subject, msg=email_body, recipient_groups_list=[group]) + SESEmailNotificationService.create_and_send_email_notifications(subject=subject, msg=email_body, + recipient_groups_list=[group]) except Exception as e: log.error(f"Error occurred in sending email while weekly reminder task due to: {e}") task_exceptions.append(f"Error occurred in sending email while weekly reminder task due to: {e}") @@ -127,6 +169,7 @@ def send_reminder_email(engine): task_exceptions.append(f"Error occurred while running the weekly reminder task: {e}") finally: if len(task_exceptions) > 0: + log.info("Sending email notifications to the admin team") AdminNotificationService().notify_admins_with_error_log( process_error="Error occurred while running the weekly reminder task", error_logs=task_exceptions, @@ -134,7 +177,6 @@ def send_reminder_email(engine): ) - def _construct_email_body(resource_bundle: NotificationResourceBundle): msg_heading = """ Dear Team,

@@ -147,13 +189,16 @@ def _construct_email_body(resource_bundle: NotificationResourceBundle): """ msg_content = """""" share_object_table_content = _create_table_for_resource(resource_bundle.share_object_notifications, "shareUri", - "/console/shares/") if len(resource_bundle.share_object_notifications) > 0 else "" + "/console/shares/") if len( + resource_bundle.share_object_notifications) > 0 else "" dataset_object_table_content = _create_table_for_resource(resource_bundle.dataset_object_notifications, "datasetUri", - "/console/s3-datasets/") if len(resource_bundle.dataset_object_notifications) > 0 else "" + "/console/s3-datasets/") if len( + resource_bundle.dataset_object_notifications) > 0 else "" environment_object_table_content = _create_table_for_resource(resource_bundle.environment_object_notifications, "environmentUri", - "/console/environments/") if len(resource_bundle.environment_object_notifications) > 0 else "" + "/console/environments/") if len( + resource_bundle.environment_object_notifications) > 0 else "" msg_content += share_object_table_content + dataset_object_table_content + environment_object_table_content + "

" @@ -165,6 +210,7 @@ def _construct_email_body(resource_bundle: NotificationResourceBundle): return msg_heading + msg_content + msg_footer + def _create_table_for_resource(list_of_resources, uri_attr, link_uri): table_heading = """ @@ -206,11 +252,10 @@ def _create_table_for_resource(list_of_resources, uri_attr, link_uri): return table - if __name__ == '__main__': log.info("Starting weekly reminders task") load_modules(modes={ImportMode.SHARES_TASK}) ENVNAME = os.environ.get('envname', 'dkrcompose') ENGINE = get_engine(envname=ENVNAME) - group_name_to_resource_map: Dict[str, NotificationResourceBundle] = {} + group_name_to_resource_bundle_map: Dict[str, NotificationResourceBundle] = {} send_reminder_email(engine=ENGINE) diff --git a/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py b/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py index b648e8573..fe285564b 100644 --- a/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py +++ b/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py @@ -61,7 +61,7 @@ def fetch_omics_workflows(engine): OmicsRepository(session).save_omics_workflow(omicsWorkflow) return True except Exception as e: - log.error(f'Error occured while processing omics workflow task due to: {e}') + log.error(f'Error occurred while processing omics workflow task due to: {e}') AdminNotificationService().notify_admins_with_error_log( process_error='Error occurred while processing omics workflow task', error_logs=[str(e)], diff --git a/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py b/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py index 9ba608534..cb120b437 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py @@ -11,12 +11,46 @@ class DatasetTableNotifications: def __init__(self, dataset: S3Dataset): self.dataset: S3Dataset = dataset + """ + Sends email notification on glue table updates to the dataset owners. + Also, if there exists shares on that dataset, then send email notifications to the requestors informing updates + table_status_map - Dictionary of GlueTableName and table status ( InSync, Deleted, etc ) + """ def notify_dataset_table_updates(self, session, table_status_map: Dict[str, str]): + # Construct and send email reminders for datasets + self._send_email_reminders_for_dataset(table_status_map) + + # Find all the shares made on this dataset + shares = ShareObjectRepository.find_dataset_shares(session=session, dataset_uri=self.dataset.datasetUri, share_statues=['Processed']) + if shares: + for share in shares: + self._send_email_notification_for_share(share, table_status_map) + + def _send_email_notification_for_share(self, share, table_status_map): + subject = f"Alert: Data.all Update | Glue table updated for dataset: {self.dataset.name}" + msg_footer = f""" + You have an active share with uri: {share.shareUri}. If there is any table requested by you on the dataset: {self.dataset.name} for that share it may have been affected in case if the tables are deleted.
+ Note: Please check with the dataset owner if there is any missing table from your share - as it is likely deleted from the dataset.
If the table exists in the dataset and is successfully shared but you are unable to access the table, then please reach out to the data.all team

+ Regards, + data.all team + """ + table_content = self._construct_html_table_from_glue_status_map(table_status_map) + msg_body = f""" + Dear Team,

+ Following tables have been updated for dataset: {self.dataset.name}

+ + {table_content}

+ """ + msg = msg_body + msg_footer + SESEmailNotificationService.create_and_send_email_notifications(subject=subject, msg=msg, + recipient_groups_list=[share.groupUri]) + + def _send_email_reminders_for_dataset(self, table_status_map): subject = f"Data.all Update | Glue tables updated for dataset: {self.dataset.name}" table_content = self._construct_html_table_from_glue_status_map(table_status_map) msg_body = f""" Dear Team,

- Following tables have been updated for dataset: {self.dataset.name}

+ Following tables have been updated for dataset: {self.dataset.name}.

{table_content}

""" @@ -25,22 +59,10 @@ def notify_dataset_table_updates(self, session, table_status_map: Dict[str, str data.all team """ msg = msg_body + msg_footer - - SESEmailNotificationService.create_and_send_email_notifications(subject=subject, msg=msg, recipient_groups_list=[self.dataset.SamlAdminGroupName, self.dataset.stewards]) - - # Find all the shares made on this dataset - shares = ShareObjectRepository.find_dataset_shares(session=session, dataset_uri=self.dataset.datasetUri, share_statues=['Processed']) - if shares: - subject = f"Alert: Data.all Update | Glue table updated for dataset: {self.dataset.name}" - for share in shares: - msg_footer = f""" - You have an active share with uri: {share.shareUri}. If there is any table requested by you on the dataset: {self.dataset.name} for that share it may have been affected in case if the tables are deleted.
- Note: Please check with the dataset owner if there is any missing table from your share - as it is likely deleted from the dataset.
If the table exists in the dataset and is successfully shared but you are unable to access the table, then please reach out to the data.all team

- Regards, - data.all team - """ - msg = msg_body + msg_footer - SESEmailNotificationService.create_and_send_email_notifications(subject=subject, msg=msg, recipient_groups_list=[share.groupUri]) + SESEmailNotificationService.create_and_send_email_notifications(subject=subject, msg=msg, + recipient_groups_list=[ + self.dataset.SamlAdminGroupName, + self.dataset.stewards]) @classmethod def _construct_html_table_from_glue_status_map(cls, table_status_map): diff --git a/backend/dataall/modules/s3_datasets/services/dataset_table_service.py b/backend/dataall/modules/s3_datasets/services/dataset_table_service.py index bf95beda7..f909a3da9 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_table_service.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_table_service.py @@ -158,6 +158,7 @@ def sync_existing_tables(session, uri, glue_tables=None): DatasetTableRepository.sync_table_columns(session, updated_table, table) + log.debug(f'Updated tables map: {updated_table_status_map}') return updated_table_status_map @staticmethod diff --git a/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py b/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py index d95c71a5d..e8ea7cb90 100644 --- a/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py +++ b/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py @@ -54,8 +54,8 @@ def sync_tables(engine): table_status_map = DatasetTableService.sync_existing_tables(session, uri=dataset.datasetUri, glue_tables=tables) - # Send notifications if table_status_map: + log.info('Sending email notification after dataset table updates were found') try: DatasetTableNotifications(dataset=dataset).notify_dataset_table_updates(session=session, table_status_map=table_status_map) except Exception as e: diff --git a/backend/dataall/modules/shares_base/services/sharing_service.py b/backend/dataall/modules/shares_base/services/sharing_service.py index 9dee9c7c3..cc95b79cc 100644 --- a/backend/dataall/modules/shares_base/services/sharing_service.py +++ b/backend/dataall/modules/shares_base/services/sharing_service.py @@ -136,7 +136,6 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: new_share_state = share_object_sm.run_transition(ShareObjectActions.Finish.value) share_object_sm.update_state(session, share_data.share, new_share_state) if not share_successful: - # Create UI and email notifications ShareNotificationService(session=session, dataset=share_data.dataset, share=share_data.share).notify_share_object_failed() return share_successful @@ -242,7 +241,6 @@ def revoke_share(cls, engine: Engine, share_uri: str) -> bool: new_share_state = share_sm.run_transition(ShareObjectActions.Finish.value) share_sm.update_state(session, share_data.share, new_share_state) if not revoke_successful: - # Create UI and email notifications ShareNotificationService(session=session, dataset=share_data.dataset, share=share_data.share).notify_share_object_failed() return revoke_successful diff --git a/backend/dataall/modules/shares_base/tasks/persistent_email_reminders_task.py b/backend/dataall/modules/shares_base/tasks/persistent_email_reminders_task.py index 54d2574b8..6aadc9deb 100644 --- a/backend/dataall/modules/shares_base/tasks/persistent_email_reminders_task.py +++ b/backend/dataall/modules/shares_base/tasks/persistent_email_reminders_task.py @@ -18,6 +18,7 @@ def persistent_email_reminders(engine): A method used by the scheduled ECS Task to run persistent_email_reminder() process against ALL active share objects within data.all and send emails to all pending shares. """ + task_exceptions = [] try: with engine.scoped_session() as session: log.info('Running Persistent Email Reminders Task') @@ -35,11 +36,14 @@ def persistent_email_reminders(engine): log.info('Completed Persistent Email Reminders Task') except Exception as e: log.error(f'Error while running persistent email reminder task: {e}') - AdminNotificationService().notify_admins_with_error_log( - process_name='Persistent Email Service', - error_logs=[str(e)], - process_error='Error while running persistent email reminder task' - ) + task_exceptions.append(f'Error while running persistent email reminder task: {e}') + finally: + if len(task_exceptions) > 0: + AdminNotificationService().notify_admins_with_error_log( + process_name='Persistent Email Task', + error_logs=task_exceptions, + process_error='Error while running persistent email reminder task' + ) if __name__ == '__main__': diff --git a/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py b/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py index 368a1732c..ed6e04576 100644 --- a/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py +++ b/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py @@ -69,7 +69,6 @@ def process_reapply_shares(cls, engine): with engine.scoped_session() as session: all_share_objects: [ShareObject] = ShareObjectRepository.list_all_active_share_objects(session) log.info(f'Found {len(all_share_objects)} share objects ') - share_object: ShareObject processed_share_objects, task_exceptions = cls._reapply_share_objects(engine=engine, session=session, share_objects=all_share_objects) return processed_share_objects diff --git a/deploy/stacks/container.py b/deploy/stacks/container.py index e555e8c7c..9f7606536 100644 --- a/deploy/stacks/container.py +++ b/deploy/stacks/container.py @@ -207,6 +207,7 @@ def __init__( self.add_share_reapplier_task() self.add_omics_fetch_workflows_task() self.add_persistent_email_reminders_task() + self.add_weekly_reminder_task() self.add_share_expiration_task() @run_if(['modules.s3_datasets.active', 'modules.dashboards.active']) @@ -372,6 +373,34 @@ def add_persistent_email_reminders_task(self): ) self.ecs_task_definitions_families.append(persistent_email_reminders_task.task_definition.family) + # Config for persistent reminder will be updated in the Stage 2 for GH - 1420 + @run_if(['modules.datasets_base.features.share_notifications.email.persistent_reminders']) + def add_weekly_reminder_task(self): + weekly_email_reminders_task, weekly_email_reminders_task_def = self.set_scheduled_task( + cluster=self.ecs_cluster, + command=[ + 'python3.9', + '-m', + 'dataall.modules.notifications.tasks.weekly_digest_reminder', + ], + container_id='container', + ecr_repository=self._ecr_repository, + environment=self.env_vars, + image_tag=self._cdkproxy_image_tag, + log_group=self.create_log_group( + self._envname, self._resource_prefix, log_group_name='weekly-email-reminders' + ), + schedule_expression=Schedule.expression('cron(0 9 ? * 2 *)'), # Run at 9:00 AM UTC every Monday + scheduled_task_id=f'{self._resource_prefix}-{self._envname}-weekly-email-reminders-schedule', + task_id=f'{self._resource_prefix}-{self._envname}-weekly-email-reminders', + task_role=self.task_role, + vpc=self._vpc, + security_group=self.scheduled_tasks_sg, + prod_sizing=self._prod_sizing, + ) + self.ecs_task_definitions_families.append(weekly_email_reminders_task.task_definition.family) + + @run_if(['modules.s3_datasets.active']) def add_subscription_task(self): subscriptions_task, subscription_task_def = self.set_scheduled_task( From 99ed2ece21c841d0bcd9d7a3d66d8461eb39d9c8 Mon Sep 17 00:00:00 2001 From: Tejas Rajopadhye Date: Fri, 10 Jan 2025 14:01:33 -0600 Subject: [PATCH 07/26] Syncing changes from local deploy --- .../services/ses_email_notification_service.py | 6 +++--- .../modules/s3_datasets/db/dataset_table_repositories.py | 9 ++++++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/backend/dataall/modules/notifications/services/ses_email_notification_service.py b/backend/dataall/modules/notifications/services/ses_email_notification_service.py index 000c0d2da..795580b7a 100644 --- a/backend/dataall/modules/notifications/services/ses_email_notification_service.py +++ b/backend/dataall/modules/notifications/services/ses_email_notification_service.py @@ -1,7 +1,6 @@ # Email Notification Provider implements the email notification service abstract method import logging -from dataall.base.aws.cognito import Cognito from dataall.base.aws.ses import Ses from dataall.base.config import config from dataall.base.services.service_provider_factory import ServiceProviderFactory @@ -38,9 +37,8 @@ def send_email_task(subject, message, recipient_groups_list, recipient_email_lis email_provider = SESEmailNotificationService.get_email_provider_instance( recipient_groups_list, recipient_email_list ) + identityProvider = ServiceProviderFactory.get_service_provider_instance() try: - identityProvider = ServiceProviderFactory.get_service_provider_instance() - email_ids_to_send_emails = email_provider.get_email_ids_from_groupList( email_provider.recipient_group_list, identityProvider ) @@ -51,6 +49,8 @@ def send_email_task(subject, message, recipient_groups_list, recipient_email_lis SESEmailNotificationService.send_email_to_users(email_ids_to_send_emails, email_provider, message, subject) except Exception as e: + email_ids_to_send_emails = email_provider.get_email_ids_from_groupList(['DAAdministrators'] , identityProvider) + SESEmailNotificationService.send_email_to_users(email_ids_to_send_emails, email_provider, f'Error sending email due to: {e}', 'Data.all alert | Attention Required | Failure in: Email Notification Service') raise e else: return True diff --git a/backend/dataall/modules/s3_datasets/db/dataset_table_repositories.py b/backend/dataall/modules/s3_datasets/db/dataset_table_repositories.py index f46c19567..1c325feca 100644 --- a/backend/dataall/modules/s3_datasets/db/dataset_table_repositories.py +++ b/backend/dataall/modules/s3_datasets/db/dataset_table_repositories.py @@ -66,9 +66,12 @@ def update_existing_tables_status(existing_tables, glue_tables): updated_tables_status_map: Dict[str: str] = {} for existing_table in existing_tables: if existing_table.GlueTableName not in [t['Name'] for t in glue_tables]: - existing_table.LastGlueTableStatus = 'Deleted' - updated_tables_status_map[existing_table.GlueTableName] = 'Deleted' - logger.info(f'Existing Table {existing_table.GlueTableName} status set to Deleted from Glue') + if existing_table.LastGlueTableStatus != 'Deleted': + existing_table.LastGlueTableStatus = 'Deleted' + updated_tables_status_map[existing_table.GlueTableName] = 'Deleted' + logger.info(f'Existing Table {existing_table.GlueTableName} status set to Deleted from Glue') + else: + logger.info(f'Existing Table {existing_table.GlueTableName} status already set Deleted') elif ( existing_table.GlueTableName in [t['Name'] for t in glue_tables] and existing_table.LastGlueTableStatus == 'Deleted' From c119c556b38d3867f3a98775f1f63435abf5e992 Mon Sep 17 00:00:00 2001 From: Tejas Rajopadhye Date: Tue, 14 Jan 2025 13:10:54 -0600 Subject: [PATCH 08/26] Adding new changes --- .../ses_email_notification_service.py | 5 +- .../tasks/weekly_digest_reminder.py | 18 +- .../services/share_notification_service.py | 2 +- .../shares_base/services/sharing_service.py | 581 ++++++++++-------- 4 files changed, 322 insertions(+), 284 deletions(-) diff --git a/backend/dataall/modules/notifications/services/ses_email_notification_service.py b/backend/dataall/modules/notifications/services/ses_email_notification_service.py index 795580b7a..b75890b11 100644 --- a/backend/dataall/modules/notifications/services/ses_email_notification_service.py +++ b/backend/dataall/modules/notifications/services/ses_email_notification_service.py @@ -74,10 +74,9 @@ def create_and_send_email_notifications(subject, msg, recipient_groups_list=None if recipient_email_ids is None: recipient_email_ids = [] - share_notification_config = config.get_property( + if share_notification_config := config.get_property( 'modules.datasets_base.features.share_notifications', default=None - ) - if share_notification_config: + ): for share_notification_config_type in share_notification_config.keys(): n_config = share_notification_config[share_notification_config_type] if n_config.get('active', False) == True: diff --git a/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py b/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py index d40d597fe..7320389e9 100644 --- a/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py +++ b/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py @@ -1,6 +1,6 @@ import logging import os -from typing import List, Dict, Any +from typing import List, Dict, Any, Tuple from dataall.base.db import get_engine from dataall.base.loader import load_modules, ImportMode @@ -63,7 +63,7 @@ def _get_unhealthy_share_notification(session): session=session, health_status_list=[ShareItemHealthStatus.Unhealthy.value]) log.info(f"Found {len(unhealthy_share_objects)} unhealthy share objects") return [ - NotificationResource(resource=share, resource_type='Share_object', resource_status='Unhealthy', + NotificationResource(resource=share, resource_type='Share Object', resource_status='Unhealthy', receivers=[share.groupUri]) for share in unhealthy_share_objects] @@ -102,7 +102,7 @@ def _get_receivers_for_stack(resource, target_type): return [resource.SamlGroupName] """ -Function to create a map of group name : resource bundle, where each resource bundle contains dataset, share and environment notification lists. +Function to create a map of {group name : resource bundle}, where each resource bundle contains dataset, share and environment notification lists. Iterated over all the notification ( NotificationResources ) and then segregate based on the dataset, shares & environment notifications and map the bundle to a team. """ def _map_groups_to_resource_bundles(list_of_notifications: List[NotificationResource], resource_bundle_type: str): @@ -121,7 +121,7 @@ def _map_groups_to_resource_bundles(list_of_notifications: List[NotificationReso def send_reminder_email(engine): task_exceptions = [] - resources_type_tuple = () + resources_type_tuple: List[Tuple] = [] try: with engine.scoped_session() as session: # Get all shares in submitted state @@ -145,12 +145,6 @@ def send_reminder_email(engine): # For each notification resource ( i.e. share notification, dataset notification, etc ), # function _map_groups_to_resource_bundles maps each team name : resource bundle - # Equivalent to calling - # _map_groups_to_resource_bundles(list_of_notifications=pending_share_notification_resources, - # resource_bundle_type="share_object_notifications") - # _map_groups_to_resource_bundles(list_of_notifications=unhealthy_share_objects_notification_resources, - # resource_bundle_type="share_object_notifications") .... - for notification_resources, resource_bundle_type in resources_type_tuple: _map_groups_to_resource_bundles(list_of_notifications=notification_resources, resource_bundle_type=resource_bundle_type) @@ -181,11 +175,11 @@ def _construct_email_body(resource_bundle: NotificationResourceBundle): msg_heading = """ Dear Team,

- This email contains data.al resources where you need to take some actions. For resources which are in unhealthy state we request you to take actions ASAP so as to minimize any disruptions.

+ This email contains data.all resources where you need to take some actions. For resources which are in unhealthy state we request you to take actions ASAP so as to minimize any disruptions.

Helpful Tips:

For shares which are in unhealthy state, you can re-apply share by clicking on the "Reapply share" button
- For environments and datasets which are in unhealthy state, you can go to the AWS account and check the stack associated with that environment/dataset and check the root cause of the stack. Once you address the root cause issue, you can click on "Update Stack" on the Stack Page.


+ For environments and datasets which are in unhealthy state, you can go to the AWS account and check the stack associated with that environment/dataset and check the root cause of the stack. Once you address the root cause issue, you can click on "Update Stack" on the stack page of the data.all resource in the data.all UI


""" msg_content = """""" share_object_table_content = _create_table_for_resource(resource_bundle.share_object_notifications, "shareUri", diff --git a/backend/dataall/modules/shares_base/services/share_notification_service.py b/backend/dataall/modules/shares_base/services/share_notification_service.py index 896deafd4..a48ba6c66 100644 --- a/backend/dataall/modules/shares_base/services/share_notification_service.py +++ b/backend/dataall/modules/shares_base/services/share_notification_service.py @@ -272,7 +272,7 @@ def notify_share_object_failed(self): ) msg = ( f'Share request made for dataset: {self.dataset.label} with requestor principal: {self.share.principalRoleName} failed.

' - f'You can delete and resubmit the failed items in the share. If your share item still remains in the Share_Failed state then please get in touch with data.all admins.' + f'Please reach out to the data.all team for troubleshooting problems with this share' ) msg_footer = """ Regards,
diff --git a/backend/dataall/modules/shares_base/services/sharing_service.py b/backend/dataall/modules/shares_base/services/sharing_service.py index cc95b79cc..50ca4d59b 100644 --- a/backend/dataall/modules/shares_base/services/sharing_service.py +++ b/backend/dataall/modules/shares_base/services/sharing_service.py @@ -59,86 +59,100 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: True if sharing succeeds, False if sharing fails """ - with engine.scoped_session() as session: - share_data, share_items = cls._get_share_data_and_items( - session, share_uri, ShareItemStatus.Share_Approved.value - ) - share_object_sm = ShareObjectSM(share_data.share.status) - share_item_sm = ShareItemSM(ShareItemStatus.Share_Approved.value) - - log.info(f'Starting share {share_data.share.shareUri}') - new_share_state = share_object_sm.run_transition(ShareObjectActions.Start.value) - share_object_sm.update_state(session, share_data.share, new_share_state) - - resources = [(share_data.dataset.datasetUri, share_data.dataset.__tablename__)] - resources.append( - (share_data.share.principalId, ConsumptionRole.__tablename__) - if share_data.share.principalType == PrincipalType.ConsumptionRole.value - else ( - f'{share_data.share.principalId}-{share_data.share.environmentUri}', - EnvironmentGroup.__tablename__, + task_exceptions = [] + share_successful = True + try: + with engine.scoped_session() as session: + share_data, share_items = cls._get_share_data_and_items( + session, share_uri, ShareItemStatus.Share_Approved.value ) - ) + share_object_sm = ShareObjectSM(share_data.share.status) + share_item_sm = ShareItemSM(ShareItemStatus.Share_Approved.value) - share_successful = True - try: - with ResourceLockRepository.acquire_lock_with_retry( - resources=resources, - session=session, - acquired_by_uri=share_data.share.shareUri, - acquired_by_type=share_data.share.__tablename__, - ): - for type, processor in ShareProcessorManager.SHARING_PROCESSORS.items(): - try: - log.info(f'Granting permissions of {type.value}') - shareable_items = ShareObjectRepository.get_share_data_items_by_type( - session, - share_data.share, - processor.shareable_type, - processor.shareable_uri, - status=ShareItemStatus.Share_Approved.value, - ) - if shareable_items: - success = processor.Processor( - session, share_data, shareable_items - ).process_approved_shares() - log.info(f'Sharing {type.value} succeeded = {success}') - if not success: - share_successful = False - else: - log.info(f'There are no items to share of type {type.value}') - except Exception as e: - log.exception(f'Error occurred during sharing of {type.value}') - ShareStatusRepository.update_share_item_status_batch( - session, - share_uri, - old_status=ShareItemStatus.Share_Approved.value, - new_status=ShareItemStatus.Share_Failed.value, - share_item_type=processor.type, - ) - ShareStatusRepository.update_share_item_status_batch( - session, - share_uri, - old_status=ShareItemStatus.Share_In_Progress.value, - new_status=ShareItemStatus.Share_Failed.value, - share_item_type=processor.type, - ) - share_successful = False - return share_successful - - except Exception as e: - log.exception('Error occurred during share approval') - new_share_item_state = share_item_sm.run_transition(ShareItemActions.Failure.value) - share_item_sm.update_state(session, share_data.share.shareUri, new_share_item_state) - share_successful = False - - finally: - new_share_state = share_object_sm.run_transition(ShareObjectActions.Finish.value) + log.info(f'Starting share {share_data.share.shareUri}') + new_share_state = share_object_sm.run_transition(ShareObjectActions.Start.value) share_object_sm.update_state(session, share_data.share, new_share_state) - if not share_successful: - ShareNotificationService(session=session, dataset=share_data.dataset, - share=share_data.share).notify_share_object_failed() - return share_successful + + resources = [(share_data.dataset.datasetUri, share_data.dataset.__tablename__)] + resources.append( + (share_data.share.principalId, ConsumptionRole.__tablename__) + if share_data.share.principalType == PrincipalType.ConsumptionRole.value + else ( + f'{share_data.share.principalId}-{share_data.share.environmentUri}', + EnvironmentGroup.__tablename__, + ) + ) + + try: + with ResourceLockRepository.acquire_lock_with_retry( + resources=resources, + session=session, + acquired_by_uri=share_data.share.shareUri, + acquired_by_type=share_data.share.__tablename__, + ): + for type, processor in ShareProcessorManager.SHARING_PROCESSORS.items(): + try: + log.info(f'Granting permissions of {type.value}') + shareable_items = ShareObjectRepository.get_share_data_items_by_type( + session, + share_data.share, + processor.shareable_type, + processor.shareable_uri, + status=ShareItemStatus.Share_Approved.value, + ) + if shareable_items: + success = processor.Processor( + session, share_data, shareable_items + ).process_approved_shares() + log.info(f'Sharing {type.value} succeeded = {success}') + if not success: + share_successful = False + else: + log.info(f'There are no items to share of type {type.value}') + except Exception as e: + log.exception(f'Error occurred during sharing of {type.value}') + ShareStatusRepository.update_share_item_status_batch( + session, + share_uri, + old_status=ShareItemStatus.Share_Approved.value, + new_status=ShareItemStatus.Share_Failed.value, + share_item_type=processor.type, + ) + ShareStatusRepository.update_share_item_status_batch( + session, + share_uri, + old_status=ShareItemStatus.Share_In_Progress.value, + new_status=ShareItemStatus.Share_Failed.value, + share_item_type=processor.type, + ) + task_exceptions.append(str(e)) + share_successful = False + except Exception as e: + log.exception(f'Error occurred during share approval: {e}') + new_share_item_state = share_item_sm.run_transition(ShareItemActions.Failure.value) + share_item_sm.update_state(session, share_data.share.shareUri, new_share_item_state) + task_exceptions.append(str(e)) + share_successful = False + finally: + new_share_state = share_object_sm.run_transition(ShareObjectActions.Finish.value) + share_object_sm.update_state(session, share_data.share, new_share_state) + + except Exception as e: + log.error(f'Unexpected error occurred while processing share with uri: {share_uri} due to: {e}') + share_successful = False + task_exceptions.append(str(e)) + finally: + if not share_successful: + ShareNotificationService(session=session, dataset=share_data.dataset, + share=share_data.share).notify_share_object_failed() + if len(task_exceptions) > 0: + AdminNotificationService().notify_admins_with_error_log( + process_error=f'Error occurred while processing share with uri: {share_uri}', + process_name='Sharing Service', + error_logs=task_exceptions + ) + + return share_successful @classmethod def revoke_share(cls, engine: Engine, share_uri: str) -> bool: @@ -159,99 +173,113 @@ def revoke_share(cls, engine: Engine, share_uri: str) -> bool: True if revoking succeeds False if revoking failed """ - with engine.scoped_session() as session: - share_data, share_items = cls._get_share_data_and_items( - session, share_uri, ShareItemStatus.Revoke_Approved.value - ) + task_exceptions = [] + revoke_successful = True + try: + with engine.scoped_session() as session: + share_data, share_items = cls._get_share_data_and_items( + session, share_uri, ShareItemStatus.Revoke_Approved.value + ) - share_sm = ShareObjectSM(share_data.share.status) - share_item_sm = ShareItemSM(ShareItemStatus.Revoke_Approved.value) + share_sm = ShareObjectSM(share_data.share.status) + share_item_sm = ShareItemSM(ShareItemStatus.Revoke_Approved.value) - log.info(f'Starting revoke {share_data.share.shareUri}') - new_share_state = share_sm.run_transition(ShareObjectActions.Start.value) - share_sm.update_state(session, share_data.share, new_share_state) + log.info(f'Starting revoke {share_data.share.shareUri}') + new_share_state = share_sm.run_transition(ShareObjectActions.Start.value) + share_sm.update_state(session, share_data.share, new_share_state) - resources = [(share_data.dataset.datasetUri, share_data.dataset.__tablename__)] - resources.append( - (share_data.share.principalId, ConsumptionRole.__tablename__) - if share_data.share.principalType == PrincipalType.ConsumptionRole.value - else ( - f'{share_data.share.principalId}-{share_data.share.environmentUri}', - EnvironmentGroup.__tablename__, + resources = [(share_data.dataset.datasetUri, share_data.dataset.__tablename__)] + resources.append( + (share_data.share.principalId, ConsumptionRole.__tablename__) + if share_data.share.principalType == PrincipalType.ConsumptionRole.value + else ( + f'{share_data.share.principalId}-{share_data.share.environmentUri}', + EnvironmentGroup.__tablename__, + ) ) - ) - revoke_successful = True - try: - with ResourceLockRepository.acquire_lock_with_retry( - resources=resources, - session=session, - acquired_by_uri=share_data.share.shareUri, - acquired_by_type=share_data.share.__tablename__, - ): - for type, processor in ShareProcessorManager.SHARING_PROCESSORS.items(): - try: - log.info(f'Revoking permissions with {type.value}') - shareable_items = ShareObjectRepository.get_share_data_items_by_type( - session, - share_data.share, - processor.shareable_type, - processor.shareable_uri, - status=ShareItemStatus.Revoke_Approved.value, - ) - if shareable_items: - success = processor.Processor( - session, share_data, shareable_items - ).process_revoked_shares() - log.info(f'Revoking {type.value} succeeded = {success}') - if not success: - revoke_successful = False - else: - log.info(f'There are no items to revoke of type {type.value}') - except Exception as e: - log.error(f'Error occurred during share revoking of {type.value}: {e}') - ShareStatusRepository.update_share_item_status_batch( - session, - share_uri, - old_status=ShareItemStatus.Revoke_Approved.value, - new_status=ShareItemStatus.Revoke_Failed.value, - share_item_type=processor.type, - ) - ShareStatusRepository.update_share_item_status_batch( - session, - share_uri, - old_status=ShareItemStatus.Revoke_In_Progress.value, - new_status=ShareItemStatus.Revoke_Failed.value, - share_item_type=processor.type, - ) - revoke_successful = False - - return revoke_successful - except Exception as e: - log.error(f'Error occurred during share revoking: {e}') - new_share_item_state = share_item_sm.run_transition(ShareItemActions.Failure.value) - share_item_sm.update_state(session, share_data.share.shareUri, new_share_item_state) - revoke_successful = False - - finally: - existing_pending_items = ShareStatusRepository.check_pending_share_items(session, share_uri) - if existing_pending_items: - new_share_state = share_sm.run_transition(ShareObjectActions.FinishPending.value) - else: - new_share_state = share_sm.run_transition(ShareObjectActions.Finish.value) - share_sm.update_state(session, share_data.share, new_share_state) - if not revoke_successful: - ShareNotificationService(session=session, dataset=share_data.dataset, - share=share_data.share).notify_share_object_failed() - return revoke_successful + try: + with ResourceLockRepository.acquire_lock_with_retry( + resources=resources, + session=session, + acquired_by_uri=share_data.share.shareUri, + acquired_by_type=share_data.share.__tablename__, + ): + for type, processor in ShareProcessorManager.SHARING_PROCESSORS.items(): + try: + log.info(f'Revoking permissions with {type.value}') + shareable_items = ShareObjectRepository.get_share_data_items_by_type( + session, + share_data.share, + processor.shareable_type, + processor.shareable_uri, + status=ShareItemStatus.Revoke_Approved.value, + ) + if shareable_items: + success = processor.Processor( + session, share_data, shareable_items + ).process_revoked_shares() + log.info(f'Revoking {type.value} succeeded = {success}') + if not success: + revoke_successful = False + else: + log.info(f'There are no items to revoke of type {type.value}') + except Exception as e: + log.error(f'Error occurred during share revoking of {type.value}: {e}') + ShareStatusRepository.update_share_item_status_batch( + session, + share_uri, + old_status=ShareItemStatus.Revoke_Approved.value, + new_status=ShareItemStatus.Revoke_Failed.value, + share_item_type=processor.type, + ) + ShareStatusRepository.update_share_item_status_batch( + session, + share_uri, + old_status=ShareItemStatus.Revoke_In_Progress.value, + new_status=ShareItemStatus.Revoke_Failed.value, + share_item_type=processor.type, + ) + task_exceptions.append(str(e)) + revoke_successful = False + except Exception as e: + log.error(f'Error occurred during share revoking: {e}') + new_share_item_state = share_item_sm.run_transition(ShareItemActions.Failure.value) + share_item_sm.update_state(session, share_data.share.shareUri, new_share_item_state) + revoke_successful = False + task_exceptions.append(str(e)) + finally: + existing_pending_items = ShareStatusRepository.check_pending_share_items(session, share_uri) + if existing_pending_items: + new_share_state = share_sm.run_transition(ShareObjectActions.FinishPending.value) + else: + new_share_state = share_sm.run_transition(ShareObjectActions.Finish.value) + share_sm.update_state(session, share_data.share, new_share_state) + + except Exception as e: + log.error(f'Unexpected error occurred while revoking a share with uri: {share_uri} due to: {e}') + revoke_successful = False + task_exceptions.append(str(e)) + finally: + if not revoke_successful: + ShareNotificationService(session=session, dataset=share_data.dataset, + share=share_data.share).notify_share_object_failed() + if len(task_exceptions) > 0: + AdminNotificationService().notify_admins_with_error_log( + process_error=f'Error occurred while revoking share with uri: {share_uri}', + process_name='Sharing Service', + error_logs=task_exceptions + ) + + return revoke_successful @classmethod def verify_share( - cls, - engine: Engine, - share_uri: str, - status: str = None, - healthStatus: str = ShareItemHealthStatus.PendingVerify.value, + cls, + engine: Engine, + share_uri: str, + status: str = None, + healthStatus: str = ShareItemHealthStatus.PendingVerify.value, ) -> bool: """ 1) Retrieves share data and items in specified status and health state (by default - PendingVerify) @@ -265,35 +293,44 @@ def verify_share( Returns True when completed ------- """ - with engine.scoped_session() as session: - share_object_item_health_status: List = [] - share_data, share_items = cls._get_share_data_and_items(session, share_uri, status, healthStatus) - for type, processor in ShareProcessorManager.SHARING_PROCESSORS.items(): - try: - log.info(f'Verifying permissions with {type.value}') - shareable_items = ShareObjectRepository.get_share_data_items_by_type( - session, - share_data.share, - processor.shareable_type, - processor.shareable_uri, - status=status, - healthStatus=healthStatus, - ) - if shareable_items: - health_status = processor.Processor(session, share_data, shareable_items).verify_shares_health_status() - share_object_item_health_status.append(health_status) - else: - log.info(f'There are no items to verify of type {type.value}') - except Exception as e: - log.error(f'Error occurred during share verifying of {type.value}: {e}') - AdminNotificationService().notify_admins_with_error_log( - process_error=f'Error occurred during verification of share with uri: {share_data.share.shareUri} for processor type: {type.value} due to an unknown exception', - error_logs=[str(e)], process_name='Sharing Service') - - if False in share_object_item_health_status: - log.info(f'Sending notifications since share object item(s) for share: {share_data.share.shareUri} are in unhealthy state after verifying shares') - ShareNotificationService(session=session, dataset=share_data.dataset, share=share_data.share).notify_share_object_items_unhealthy() - return True + task_exceptions = [] + try: + with engine.scoped_session() as session: + health_status_list: List[bool] = [] + share_data, share_items = cls._get_share_data_and_items(session, share_uri, status, healthStatus) + for type, processor in ShareProcessorManager.SHARING_PROCESSORS.items(): + try: + log.info(f'Verifying permissions with {type.value}') + shareable_items = ShareObjectRepository.get_share_data_items_by_type( + session, + share_data.share, + processor.shareable_type, + processor.shareable_uri, + status=status, + healthStatus=healthStatus, + ) + if shareable_items: + health_status = processor.Processor(session, share_data, shareable_items).verify_shares_health_status() + health_status_list.append(health_status) + else: + log.info(f'There are no items to verify of type {type.value}') + except Exception as e: + log.error(f'Error occurred during share verifying of {type.value}: {e}') + task_exceptions.append(str(e)) + if False in health_status_list: + log.info(f'Sending notifications since share object item(s) for share: {share_data.share.shareUri} are in unhealthy state after verifying shares') + ShareNotificationService(session=session, dataset=share_data.dataset, share=share_data.share).notify_share_object_items_unhealthy() + except Exception as e: + log.error(f'Unexpected error occurred while verifying share with uri: {share_uri} due to: {e}') + task_exceptions.append(str(e)) + finally: + if len(task_exceptions) > 0: + AdminNotificationService().notify_admins_with_error_log( + process_error=f'Error occurred during verification of share with uri: {share_data.share.shareUri} ', + error_logs=task_exceptions, + process_name='Sharing Service' + ) + return True @classmethod def reapply_share(cls, engine: Engine, share_uri: str) -> bool: @@ -315,89 +352,97 @@ def reapply_share(cls, engine: Engine, share_uri: str) -> bool: """ reapply_successful = True code_exception_list = [] - with engine.scoped_session() as session: - share_data, share_items = cls._get_share_data_and_items( - session, share_uri, None, ShareItemHealthStatus.PendingReApply.value - ) - resources = [(share_data.dataset.datasetUri, share_data.dataset.__tablename__)] - resources.append( - (share_data.share.principalId, ConsumptionRole.__tablename__) - if share_data.share.principalType == PrincipalType.ConsumptionRole.value - else ( - f'{share_data.share.principalId}-{share_data.share.environmentUri}', - EnvironmentGroup.__tablename__, + try: + with engine.scoped_session() as session: + share_data, share_items = cls._get_share_data_and_items( + session, share_uri, None, ShareItemHealthStatus.PendingReApply.value ) - ) - - try: - with ResourceLockRepository.acquire_lock_with_retry( - resources=resources, - session=session, - acquired_by_uri=share_data.share.shareUri, - acquired_by_type=share_data.share.__tablename__, - ): - for type, processor in ShareProcessorManager.SHARING_PROCESSORS.items(): - try: - log.info(f'Reapplying permissions to {type.value}') - shareable_items = ShareObjectRepository.get_share_data_items_by_type( - session, - share_data.share, - processor.shareable_type, - processor.shareable_uri, - None, - ShareItemHealthStatus.PendingReApply.value, - ) - if shareable_items: - success = processor.Processor( - session, share_data, shareable_items - ).process_approved_shares() - log.info(f'Reapplying {type.value} succeeded = {success}') - if not success: - reapply_successful = False - if success: - log.info(f'Sending notifications to the share owner to inform that the share with uri: {share_data.share.shareUri} is now in healthy state') - ShareNotificationService(session=session, dataset=share_data.dataset, - share=share_data.share).notify_share_object_items_healthy() - else: - log.info(f'There are no items to reapply of type {type.value}') - except Exception as e: - log.error(f'Error occurred during share reapplying of {type.value}: {e}') - code_exception_list.append(f'Error occurred during reapplying of share with uri: {share_data.share.shareUri} for processor type: {type.value} due to an unknown exception: {e}') - - if not reapply_successful: - log.info( - f'Sending notifications since share object item(s) for share: {share_data.share.shareUri} are in unhealthy state after reapplying shares') - ShareNotificationService(session=session, dataset=share_data.dataset, - share=share_data.share).notify_share_object_items_unhealthy() - return reapply_successful - - except ResourceLockTimeout as timeout_exception: - ShareStatusRepository.update_share_item_health_status_batch( - session, - share_uri, - old_status=ShareItemHealthStatus.PendingReApply.value, - new_status=ShareItemHealthStatus.Unhealthy.value, - message=str(timeout_exception), + resources = [(share_data.dataset.datasetUri, share_data.dataset.__tablename__)] + resources.append( + (share_data.share.principalId, ConsumptionRole.__tablename__) + if share_data.share.principalType == PrincipalType.ConsumptionRole.value + else ( + f'{share_data.share.principalId}-{share_data.share.environmentUri}', + EnvironmentGroup.__tablename__, + ) ) - code_exception_list.append(str(timeout_exception)) - return False - - except Exception as e: - log.exception(f'Error occurred during share reapply: {e}') - code_exception_list.append(str(e)) - return False - finally: - if len(code_exception_list) > 0: - AdminNotificationService().notify_admins_with_error_log( - process_error=f'Error occurred during reapplying of share with uri: {share_data.share.shareUri}', - error_logs=code_exception_list, process_name='Sharing Service') + try: + with ResourceLockRepository.acquire_lock_with_retry( + resources=resources, + session=session, + acquired_by_uri=share_data.share.shareUri, + acquired_by_type=share_data.share.__tablename__, + ): + for type, processor in ShareProcessorManager.SHARING_PROCESSORS.items(): + try: + log.info(f'Reapplying permissions to {type.value}') + shareable_items = ShareObjectRepository.get_share_data_items_by_type( + session, + share_data.share, + processor.shareable_type, + processor.shareable_uri, + None, + ShareItemHealthStatus.PendingReApply.value, + ) + if shareable_items: + success = processor.Processor( + session, share_data, shareable_items + ).process_approved_shares() + log.info(f'Reapplying {type.value} succeeded = {success}') + if not success: + reapply_successful = False + else: + log.info(f'There are no items to reapply of type {type.value}') + except Exception as e: + log.error(f'Error occurred during share reapplying of {type.value}: {e}') + code_exception_list.append(f'Error occurred during reapplying of share with uri: {share_data.share.shareUri} for processor type: {type.value} due to an unknown exception: {e}') + + if not reapply_successful: + log.info( + f'Sending notifications since share object item(s) for share: {share_data.share.shareUri} are in unhealthy state after reapplying shares') + ShareNotificationService(session=session, dataset=share_data.dataset, + share=share_data.share).notify_share_object_items_unhealthy() + else: + if len(code_exception_list) == 0: + log.info( + f'Sending notifications to the share owner to inform that the share with uri: {share_data.share.shareUri} is now in healthy state') + ShareNotificationService(session=session, dataset=share_data.dataset, + share=share_data.share).notify_share_object_items_healthy() + + except ResourceLockTimeout as timeout_exception: + ShareStatusRepository.update_share_item_health_status_batch( + session, + share_uri, + old_status=ShareItemHealthStatus.PendingReApply.value, + new_status=ShareItemHealthStatus.Unhealthy.value, + message=str(timeout_exception), + ) + code_exception_list.append(str(timeout_exception)) + except Exception as e: + log.error(f'Unexpected error occurred while reapplying share with uri: {share_uri} due to: {e}') + ShareStatusRepository.update_share_item_health_status_batch( + session, + share_uri, + old_status=ShareItemHealthStatus.PendingReApply.value, + new_status=ShareItemHealthStatus.Unhealthy.value, + message='Unexpected error occurred while reapplying share', + ) + code_exception_list.append(str(e)) + finally: + if len(code_exception_list) > 0: + AdminNotificationService().notify_admins_with_error_log( + process_error=f'Error occurred during reapplying of share with uri: {share_data.share.shareUri}', + error_logs=code_exception_list, + process_name='Sharing Service' + ) + return reapply_successful @classmethod def cleanup_share( - cls, - engine: Engine, - share_uri: str, + cls, + engine: Engine, + share_uri: str, ) -> bool: """ 1) Retrieves share data and items in share From f1e4079c77e7eb83e23e83b651223d659cbb0b27 Mon Sep 17 00:00:00 2001 From: Tejas Rajopadhye Date: Tue, 14 Jan 2025 15:26:33 -0600 Subject: [PATCH 09/26] Minor correction in the create bucket policy code when access to bucket policu is removed --- backend/dataall/modules/s3_datasets_shares/aws/s3_client.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/dataall/modules/s3_datasets_shares/aws/s3_client.py b/backend/dataall/modules/s3_datasets_shares/aws/s3_client.py index ff596c36a..77238fd26 100755 --- a/backend/dataall/modules/s3_datasets_shares/aws/s3_client.py +++ b/backend/dataall/modules/s3_datasets_shares/aws/s3_client.py @@ -173,6 +173,8 @@ def create_bucket_policy(self, bucket_name: str, policy: str, fix_malformed_prin else: log.error(f'Failed to create bucket policy. MalformedPolicy: {policy}') raise e + else: + raise e except Exception as e: log.error(f'Bucket policy created failed on bucket {bucket_name} of {self._account_id} : {e}') raise e From 870a835a5bfda2b0d213cb48d04cef39bd9bbde6 Mon Sep 17 00:00:00 2001 From: Tejas Rajopadhye Date: Tue, 14 Jan 2025 18:04:41 -0600 Subject: [PATCH 10/26] New changes --- .../modules/notifications/tasks/weekly_digest_reminder.py | 2 +- .../modules/s3_datasets/services/dataset_table_notifications.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py b/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py index 7320389e9..2452ee084 100644 --- a/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py +++ b/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py @@ -135,7 +135,7 @@ def send_reminder_email(engine): unhealthy_datasets_notification_resources = _get_unhealthy_stack_by_type(session=session, target_uri='datasetUri', target_type=DatasetBase) - resources_type_tuple.append((unhealthy_share_objects_notification_resources, "dataset_object_notifications")) + resources_type_tuple.append((unhealthy_datasets_notification_resources, "dataset_object_notifications")) # Get all the environments which are in unhealthy state unhealthy_environment_notification_resources = _get_unhealthy_stack_by_type(session=session, target_uri='environmentUri', diff --git a/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py b/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py index cb120b437..32af8239a 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py @@ -31,7 +31,7 @@ def _send_email_notification_for_share(self, share, table_status_map): msg_footer = f""" You have an active share with uri: {share.shareUri}. If there is any table requested by you on the dataset: {self.dataset.name} for that share it may have been affected in case if the tables are deleted.
Note: Please check with the dataset owner if there is any missing table from your share - as it is likely deleted from the dataset.
If the table exists in the dataset and is successfully shared but you are unable to access the table, then please reach out to the data.all team

- Regards, + Regards,
data.all team """ table_content = self._construct_html_table_from_glue_status_map(table_status_map) From 5b8ec01fca667bf40bf5c65da1d45c9393fab02d Mon Sep 17 00:00:00 2001 From: Tejas Rajopadhye Date: Tue, 14 Jan 2025 18:49:02 -0600 Subject: [PATCH 11/26] Adding new changes for admin notifications --- .../services/admin_notifications.py | 13 ++++++++++--- .../services/redshift_table_share_processor.py | 16 ++++++++++++++++ .../glue_table_share_processor.py | 16 ++++++++++++++++ .../s3_access_point_share_processor.py | 11 +++++++++++ .../s3_bucket_share_processor.py | 11 +++++++++++ .../services/share_notification_service.py | 1 - config.json | 3 ++- 7 files changed, 66 insertions(+), 5 deletions(-) diff --git a/backend/dataall/modules/notifications/services/admin_notifications.py b/backend/dataall/modules/notifications/services/admin_notifications.py index 8990a27b4..d97c60b07 100644 --- a/backend/dataall/modules/notifications/services/admin_notifications.py +++ b/backend/dataall/modules/notifications/services/admin_notifications.py @@ -1,11 +1,13 @@ +import logging from typing import List +from dataall.core.groups.db.constants import DataallGroups from dataall.modules.notifications.services.ses_email_notification_service import SESEmailNotificationService +from dataall.base.config import config +log = logging.getLogger(__name__) class AdminNotificationService: - admin_group = 'DAAdministrators' - """ Send email notifications to Admin Group i.e. DAAdministrators in data.all Args - @@ -15,6 +17,11 @@ class AdminNotificationService: """ @classmethod def notify_admins_with_error_log(cls, process_error: str, error_logs: List[str], process_name:str = ''): + if config.get_property( + 'modules.datasets_base.features.share_notifications.email.parameters.admin_notifications', default=False + ) is False: + log.info("Admin notifications are switched off") + return subject = f'Data.all alert | Attention Required | Failure in : {process_name}' email_message = f""" @@ -28,5 +35,5 @@ def notify_admins_with_error_log(cls, process_error: str, error_logs: List[str], SESEmailNotificationService.create_and_send_email_notifications( subject=subject, msg=email_message, - recipient_groups_list=[cls.admin_group] + recipient_groups_list=[DataallGroups.admin] ) \ No newline at end of file diff --git a/backend/dataall/modules/redshift_datasets_shares/services/redshift_table_share_processor.py b/backend/dataall/modules/redshift_datasets_shares/services/redshift_table_share_processor.py index 6c49c3dee..6c066df55 100644 --- a/backend/dataall/modules/redshift_datasets_shares/services/redshift_table_share_processor.py +++ b/backend/dataall/modules/redshift_datasets_shares/services/redshift_table_share_processor.py @@ -2,6 +2,7 @@ from datetime import datetime from typing import List from dataall.base.utils.naming_convention import NamingConventionService, NamingConventionPattern +from dataall.modules.notifications.services.admin_notifications import AdminNotificationService from dataall.modules.shares_base.services.sharing_service import ShareData from dataall.modules.shares_base.services.share_processor_manager import SharesProcessorInterface from dataall.modules.shares_base.db.share_object_repositories import ShareObjectRepository @@ -260,6 +261,11 @@ def process_approved_shares(self) -> bool: ShareStatusRepository.update_share_item_health_status( self.session, share_item, ShareItemHealthStatus.Unhealthy.value, str(e), datetime.now() ) + AdminNotificationService().notify_admins_with_error_log( + process_error='Error occurred while processing redshift table share request', + process_name='redshift table share processor', + error_logs=[str(e)] + ) return False return success @@ -364,6 +370,11 @@ def process_revoked_shares(self) -> bool: ShareStatusRepository.update_share_item_health_status( self.session, share_item, ShareItemHealthStatus.Unhealthy.value, str(e), datetime.now() ) + AdminNotificationService().notify_admins_with_error_log( + process_error='Error occurred while revoking redshift table share request', + process_name='redshift tables share processor', + error_logs=[str(e)] + ) self.session.commit() try: if success: @@ -438,6 +449,11 @@ def process_revoked_shares(self) -> bool: ShareStatusRepository.update_share_item_health_status( self.session, share_item, ShareItemHealthStatus.Unhealthy.value, str(e), datetime.now() ) + AdminNotificationService().notify_admins_with_error_log( + process_error='Error occurred while revoking redshift table share request', + process_name='redshift tables share processor', + error_logs=[str(e)] + ) return False return success diff --git a/backend/dataall/modules/s3_datasets_shares/services/share_processors/glue_table_share_processor.py b/backend/dataall/modules/s3_datasets_shares/services/share_processors/glue_table_share_processor.py index 359666d82..4ea1b554f 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/share_processors/glue_table_share_processor.py +++ b/backend/dataall/modules/s3_datasets_shares/services/share_processors/glue_table_share_processor.py @@ -3,6 +3,7 @@ from warnings import warn from datetime import datetime from dataall.core.environment.services.environment_service import EnvironmentService +from dataall.modules.notifications.services.admin_notifications import AdminNotificationService from dataall.modules.shares_base.services.shares_enums import ( ShareItemHealthStatus, ShareItemStatus, @@ -197,6 +198,11 @@ def process_approved_shares(self) -> bool: ) success = False manager.handle_share_failure(table=table, error=e) + AdminNotificationService().notify_admins_with_error_log( + process_error='Error occurred while processing glue table share request', + process_name='s3 glue table share processor', + error_logs=[str(e)] + ) return success def process_revoked_shares(self) -> bool: @@ -321,6 +327,11 @@ def process_revoked_shares(self) -> bool: success = False manager.handle_revoke_failure(table=table, error=e) + AdminNotificationService().notify_admins_with_error_log( + process_error='Error occurred while revoking glue tables share request', + process_name='glue tables share processor', + error_logs=[str(e)] + ) try: if self.tables: @@ -350,6 +361,11 @@ def process_revoked_shares(self) -> bool: f'Failed to clean-up database permissions or delete shared database {manager.shared_db_name} ' f'due to: {e}' ) + AdminNotificationService().notify_admins_with_error_log( + process_error='Error occurred while revoking glue tables share request', + process_name='glue tables share processor', + error_logs=[str(e)] + ) success = False return success diff --git a/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_access_point_share_processor.py b/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_access_point_share_processor.py index 41b214490..ab1527770 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_access_point_share_processor.py +++ b/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_access_point_share_processor.py @@ -2,6 +2,7 @@ from datetime import datetime from typing import List +from dataall.modules.notifications.services.admin_notifications import AdminNotificationService from dataall.modules.shares_base.services.share_exceptions import PrincipalRoleNotFound from dataall.modules.s3_datasets_shares.services.share_managers import S3AccessPointShareManager from dataall.modules.s3_datasets_shares.services.s3_share_service import S3ShareService @@ -99,6 +100,11 @@ def process_approved_shares(self) -> bool: ) success = False manager.handle_share_failure(e) + AdminNotificationService().notify_admins_with_error_log( + process_error='Error occurred while processing access point share request', + process_name='s3 access point share processor', + error_logs=[str(e)] + ) return success def process_revoked_shares(self) -> bool: @@ -169,6 +175,11 @@ def process_revoked_shares(self) -> bool: # statements which can throw exceptions but are not critical manager.handle_revoke_failure(e) + AdminNotificationService().notify_admins_with_error_log( + process_error='Error occurred while revoking access point share request', + process_name='s3 access point share processor', + error_logs=[str(e)] + ) return success diff --git a/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_bucket_share_processor.py b/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_bucket_share_processor.py index ef968ed5d..f2fcba4db 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_bucket_share_processor.py +++ b/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_bucket_share_processor.py @@ -3,6 +3,7 @@ from logging import exception from typing import List +from dataall.modules.notifications.services.admin_notifications import AdminNotificationService from dataall.modules.shares_base.services.share_exceptions import PrincipalRoleNotFound from dataall.modules.s3_datasets_shares.services.share_managers import S3BucketShareManager from dataall.modules.s3_datasets_shares.services.s3_share_service import S3ShareService @@ -93,6 +94,11 @@ def process_approved_shares(self) -> bool: ) success = False manager.handle_share_failure(e) + AdminNotificationService().notify_admins_with_error_log( + process_error='Error occurred while processing s3 bucket share request', + process_name='s3 bucket share processor', + error_logs=[str(e)] + ) return success def process_revoked_shares(self) -> bool: @@ -155,6 +161,11 @@ def process_revoked_shares(self) -> bool: # statements which can throw exceptions but are not critical manager.handle_revoke_failure(e) + AdminNotificationService().notify_admins_with_error_log( + process_error='Error occurred while revoking s3 bucket manager', + process_name='s3 bucket share processor', + error_logs=[str(e)] + ) return success diff --git a/backend/dataall/modules/shares_base/services/share_notification_service.py b/backend/dataall/modules/shares_base/services/share_notification_service.py index a48ba6c66..7079dca29 100644 --- a/backend/dataall/modules/shares_base/services/share_notification_service.py +++ b/backend/dataall/modules/shares_base/services/share_notification_service.py @@ -302,7 +302,6 @@ def notify_share_object_items_unhealthy(self): f'Hello Team,
' f'Your share with share uri: {self.share.shareUri} has one or more unhealthy share items.

' f'Once you visit your share link you can click on the Reapply button and this should correct your share and get it into an healthy state. If this doesn\'t get your share in healthy state then please get in touch with data.all admins for your share.' - f'

Please note: If you are using any terraform / cloudformation or any other IaC to also manage your bucket policy, kms policy and requestor IAM role, please make them aware of the data.all changes so that they don\'t wipe off data.all related policies' ) msg_footer = """ Regards,
diff --git a/config.json b/config.json index e3af66063..411bd3436 100644 --- a/config.json +++ b/config.json @@ -32,7 +32,8 @@ "active": false, "persistent_reminders": false, "parameters": { - "group_notifications": true + "group_notifications": true, + "admin_notifications": true } } }, From 17158e273a0ec345c928e2afce9a9d4c58069777 Mon Sep 17 00:00:00 2001 From: Tejas Rajopadhye Date: Tue, 14 Jan 2025 18:49:53 -0600 Subject: [PATCH 12/26] Linting update --- .../catalog/tasks/catalog_indexer_task.py | 2 +- .../services/admin_notifications.py | 27 ++-- .../ses_email_notification_service.py | 13 +- .../tasks/weekly_digest_reminder.py | 125 +++++++++++------- .../omics/tasks/omics_workflows_fetcher.py | 3 +- .../redshift_table_share_processor.py | 6 +- .../db/dataset_table_repositories.py | 2 +- .../services/dataset_table_notifications.py | 25 ++-- .../services/dataset_table_service.py | 4 +- .../s3_datasets/tasks/tables_syncer.py | 27 ++-- .../glue_table_share_processor.py | 6 +- .../s3_access_point_share_processor.py | 4 +- .../s3_bucket_share_processor.py | 4 +- .../db/share_object_repositories.py | 11 +- .../services/share_notification_service.py | 31 +++-- .../shares_base/services/sharing_service.py | 90 +++++++------ .../tasks/persistent_email_reminders_task.py | 8 +- .../tasks/share_expiration_task.py | 6 +- .../shares_base/tasks/share_manager_task.py | 2 +- .../shares_base/tasks/share_reapplier_task.py | 18 ++- .../shares_base/tasks/share_verifier_task.py | 11 +- deploy/stacks/container.py | 1 - 22 files changed, 249 insertions(+), 177 deletions(-) diff --git a/backend/dataall/modules/catalog/tasks/catalog_indexer_task.py b/backend/dataall/modules/catalog/tasks/catalog_indexer_task.py index 222cb1954..82aeafab9 100644 --- a/backend/dataall/modules/catalog/tasks/catalog_indexer_task.py +++ b/backend/dataall/modules/catalog/tasks/catalog_indexer_task.py @@ -38,7 +38,7 @@ def index_objects(cls, engine, with_deletes='False'): AdminNotificationService().notify_admins_with_error_log( process_error='Exception occurred during cataloging task', error_logs=[error_log], - process_name='Catalog Task' + process_name='Catalog Task', ) raise e diff --git a/backend/dataall/modules/notifications/services/admin_notifications.py b/backend/dataall/modules/notifications/services/admin_notifications.py index d97c60b07..18f60b748 100644 --- a/backend/dataall/modules/notifications/services/admin_notifications.py +++ b/backend/dataall/modules/notifications/services/admin_notifications.py @@ -7,20 +7,25 @@ log = logging.getLogger(__name__) + class AdminNotificationService: """ Send email notifications to Admin Group i.e. DAAdministrators in data.all Args - 1. process_error - string describing in short the error / exception details - 2. error_logs - List of all the exception error logs + 2. error_logs - List of all the exception error logs 3. process_name - Code where the exception occurred. Example, inside an ECS task like cataloging task, etc or inside a graphql service """ + @classmethod - def notify_admins_with_error_log(cls, process_error: str, error_logs: List[str], process_name:str = ''): - if config.get_property( - 'modules.datasets_base.features.share_notifications.email.parameters.admin_notifications', default=False - ) is False: - log.info("Admin notifications are switched off") + def notify_admins_with_error_log(cls, process_error: str, error_logs: List[str], process_name: str = ''): + if ( + config.get_property( + 'modules.datasets_base.features.share_notifications.email.parameters.admin_notifications', default=False + ) + is False + ): + log.info('Admin notifications are switched off') return subject = f'Data.all alert | Attention Required | Failure in : {process_name}' @@ -28,12 +33,10 @@ def notify_admins_with_error_log(cls, process_error: str, error_logs: List[str], Following error occurred -

{process_error}

""" for error_log in error_logs: - email_message += error_log + "

" + email_message += error_log + '

' - email_message += "Please check the logs in cloudwatch for more details" + email_message += 'Please check the logs in cloudwatch for more details' SESEmailNotificationService.create_and_send_email_notifications( - subject=subject, - msg=email_message, - recipient_groups_list=[DataallGroups.admin] - ) \ No newline at end of file + subject=subject, msg=email_message, recipient_groups_list=[DataallGroups.admin] + ) diff --git a/backend/dataall/modules/notifications/services/ses_email_notification_service.py b/backend/dataall/modules/notifications/services/ses_email_notification_service.py index b75890b11..c0a08214d 100644 --- a/backend/dataall/modules/notifications/services/ses_email_notification_service.py +++ b/backend/dataall/modules/notifications/services/ses_email_notification_service.py @@ -49,8 +49,15 @@ def send_email_task(subject, message, recipient_groups_list, recipient_email_lis SESEmailNotificationService.send_email_to_users(email_ids_to_send_emails, email_provider, message, subject) except Exception as e: - email_ids_to_send_emails = email_provider.get_email_ids_from_groupList(['DAAdministrators'] , identityProvider) - SESEmailNotificationService.send_email_to_users(email_ids_to_send_emails, email_provider, f'Error sending email due to: {e}', 'Data.all alert | Attention Required | Failure in: Email Notification Service') + email_ids_to_send_emails = email_provider.get_email_ids_from_groupList( + ['DAAdministrators'], identityProvider + ) + SESEmailNotificationService.send_email_to_users( + email_ids_to_send_emails, + email_provider, + f'Error sending email due to: {e}', + 'Data.all alert | Attention Required | Failure in: Email Notification Service', + ) raise e else: return True @@ -87,4 +94,4 @@ def create_and_send_email_notifications(subject, msg, recipient_groups_list=None else: log.info(f'Notification type : {share_notification_config_type} is not active') else: - log.info('Notifications are not active') \ No newline at end of file + log.info('Notifications are not active') diff --git a/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py b/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py index 2452ee084..029398700 100644 --- a/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py +++ b/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py @@ -44,27 +44,33 @@ def __init__(self): def _get_pending_share_notifications(session): pending_shares = ShareObjectRepository.get_shares_with_statuses(session=session, status_list=['Submitted']) - log.info(f"Found {len(pending_shares)} pending shares with share object status - Submitted") + log.info(f'Found {len(pending_shares)} pending shares with share object status - Submitted') share_dataset_map: Dict[ShareObject, DatasetBase] = { - share: DatasetBaseRepository.get_dataset_by_uri(session=session, dataset_uri=share.datasetUri) for share in - pending_shares} + share: DatasetBaseRepository.get_dataset_by_uri(session=session, dataset_uri=share.datasetUri) + for share in pending_shares + } return [ NotificationResource( resource=share, resource_type='Share Object', resource_status=f'{share.status} - Pending Approval', - receivers=[share_dataset_map[share].SamlAdminGroupName, share_dataset_map[share].stewards]) + receivers=[share_dataset_map[share].SamlAdminGroupName, share_dataset_map[share].stewards], + ) for share in share_dataset_map ] def _get_unhealthy_share_notification(session): unhealthy_share_objects: List[ShareObject] = ShareObjectRepository.get_share_object_with_health_status( - session=session, health_status_list=[ShareItemHealthStatus.Unhealthy.value]) - log.info(f"Found {len(unhealthy_share_objects)} unhealthy share objects") + session=session, health_status_list=[ShareItemHealthStatus.Unhealthy.value] + ) + log.info(f'Found {len(unhealthy_share_objects)} unhealthy share objects') return [ - NotificationResource(resource=share, resource_type='Share Object', resource_status='Unhealthy', - receivers=[share.groupUri]) for share in unhealthy_share_objects] + NotificationResource( + resource=share, resource_type='Share Object', resource_status='Unhealthy', receivers=[share.groupUri] + ) + for share in unhealthy_share_objects + ] def _get_unhealthy_stack_by_type(session, target_uri: str, target_type: Any): @@ -73,23 +79,25 @@ def _get_unhealthy_stack_by_type(session, target_uri: str, target_type: Any): StackStatus.DELETE_FAILED.value, StackStatus.UPDATE_FAILED.value, StackStatus.UPDATE_ROLLBACK_FAILED.value, - StackStatus.ROLLBACK_FAILED.value + StackStatus.ROLLBACK_FAILED.value, ] resource_objects = session.query(target_type).all() unhealthy_stack_notification_resources: List[NotificationResource] = [] - log.info(f"Found {len(unhealthy_stack_notification_resources)} unhealthy {target_type}") + log.info(f'Found {len(unhealthy_stack_notification_resources)} unhealthy {target_type}') # Check if stack associated with these datasets / environment exists # If yes, create a notification resource for resource in resource_objects: - stack = StackRepository.find_stack_by_target_uri(session=session, - target_uri=resource.__getattribute__(target_uri), - statuses=unhealthy_stack_status) + stack = StackRepository.find_stack_by_target_uri( + session=session, target_uri=resource.__getattribute__(target_uri), statuses=unhealthy_stack_status + ) if stack is not None: - notification_resource = NotificationResource(resource=resource, resource_type=target_type.__name__, - resource_status=stack.status, - receivers=_get_receivers_for_stack(resource=resource, - target_type=target_type)) + notification_resource = NotificationResource( + resource=resource, + resource_type=target_type.__name__, + resource_status=stack.status, + receivers=_get_receivers_for_stack(resource=resource, target_type=target_type), + ) unhealthy_stack_notification_resources.append(notification_resource) return unhealthy_stack_notification_resources @@ -101,10 +109,13 @@ def _get_receivers_for_stack(resource, target_type): if target_type.__name__ == 'Environment': return [resource.SamlGroupName] + """ Function to create a map of {group name : resource bundle}, where each resource bundle contains dataset, share and environment notification lists. Iterated over all the notification ( NotificationResources ) and then segregate based on the dataset, shares & environment notifications and map the bundle to a team. """ + + def _map_groups_to_resource_bundles(list_of_notifications: List[NotificationResource], resource_bundle_type: str): for notification in list_of_notifications: # Get all the receivers groups @@ -126,48 +137,52 @@ def send_reminder_email(engine): with engine.scoped_session() as session: # Get all shares in submitted state pending_share_notification_resources = _get_pending_share_notifications(session=session) - resources_type_tuple.append((pending_share_notification_resources, "share_object_notifications")) + resources_type_tuple.append((pending_share_notification_resources, 'share_object_notifications')) # Todo : Check if distinct needed for the share object repository # Get all shares in unhealthy state unhealthy_share_objects_notification_resources = _get_unhealthy_share_notification(session=session) - resources_type_tuple.append((unhealthy_share_objects_notification_resources, "share_object_notifications")) + resources_type_tuple.append((unhealthy_share_objects_notification_resources, 'share_object_notifications')) # Get all the dataset which are in unhealthy state - unhealthy_datasets_notification_resources = _get_unhealthy_stack_by_type(session=session, - target_uri='datasetUri', - target_type=DatasetBase) - resources_type_tuple.append((unhealthy_datasets_notification_resources, "dataset_object_notifications")) + unhealthy_datasets_notification_resources = _get_unhealthy_stack_by_type( + session=session, target_uri='datasetUri', target_type=DatasetBase + ) + resources_type_tuple.append((unhealthy_datasets_notification_resources, 'dataset_object_notifications')) # Get all the environments which are in unhealthy state - unhealthy_environment_notification_resources = _get_unhealthy_stack_by_type(session=session, - target_uri='environmentUri', - target_type=Environment) + unhealthy_environment_notification_resources = _get_unhealthy_stack_by_type( + session=session, target_uri='environmentUri', target_type=Environment + ) resources_type_tuple.append( - (unhealthy_environment_notification_resources, "environment_object_notifications")) + (unhealthy_environment_notification_resources, 'environment_object_notifications') + ) # For each notification resource ( i.e. share notification, dataset notification, etc ), # function _map_groups_to_resource_bundles maps each team name : resource bundle for notification_resources, resource_bundle_type in resources_type_tuple: - _map_groups_to_resource_bundles(list_of_notifications=notification_resources, resource_bundle_type=resource_bundle_type) + _map_groups_to_resource_bundles( + list_of_notifications=notification_resources, resource_bundle_type=resource_bundle_type + ) for group, resource_bundle in group_name_to_resource_bundle_map.items(): email_body = _construct_email_body(resource_bundle) log.debug(email_body) subject = 'Attention Required | Data.all weekly digest' try: - SESEmailNotificationService.create_and_send_email_notifications(subject=subject, msg=email_body, - recipient_groups_list=[group]) + SESEmailNotificationService.create_and_send_email_notifications( + subject=subject, msg=email_body, recipient_groups_list=[group] + ) except Exception as e: - log.error(f"Error occurred in sending email while weekly reminder task due to: {e}") - task_exceptions.append(f"Error occurred in sending email while weekly reminder task due to: {e}") + log.error(f'Error occurred in sending email while weekly reminder task due to: {e}') + task_exceptions.append(f'Error occurred in sending email while weekly reminder task due to: {e}') except Exception as e: - log.error(f"Error occurred while running the weekly reminder task: {e}") - task_exceptions.append(f"Error occurred while running the weekly reminder task: {e}") + log.error(f'Error occurred while running the weekly reminder task: {e}') + task_exceptions.append(f'Error occurred while running the weekly reminder task: {e}') finally: if len(task_exceptions) > 0: - log.info("Sending email notifications to the admin team") + log.info('Sending email notifications to the admin team') AdminNotificationService().notify_admins_with_error_log( - process_error="Error occurred while running the weekly reminder task", + process_error='Error occurred while running the weekly reminder task', error_logs=task_exceptions, - process_name="Weekly reminder task" + process_name='Weekly reminder task', ) @@ -182,19 +197,27 @@ def _construct_email_body(resource_bundle: NotificationResourceBundle): For environments and datasets which are in unhealthy state, you can go to the AWS account and check the stack associated with that environment/dataset and check the root cause of the stack. Once you address the root cause issue, you can click on "Update Stack" on the stack page of the data.all resource in the data.all UI


""" msg_content = """""" - share_object_table_content = _create_table_for_resource(resource_bundle.share_object_notifications, "shareUri", - "/console/shares/") if len( - resource_bundle.share_object_notifications) > 0 else "" - dataset_object_table_content = _create_table_for_resource(resource_bundle.dataset_object_notifications, - "datasetUri", - "/console/s3-datasets/") if len( - resource_bundle.dataset_object_notifications) > 0 else "" - environment_object_table_content = _create_table_for_resource(resource_bundle.environment_object_notifications, - "environmentUri", - "/console/environments/") if len( - resource_bundle.environment_object_notifications) > 0 else "" - - msg_content += share_object_table_content + dataset_object_table_content + environment_object_table_content + "

" + share_object_table_content = ( + _create_table_for_resource(resource_bundle.share_object_notifications, 'shareUri', '/console/shares/') + if len(resource_bundle.share_object_notifications) > 0 + else '' + ) + dataset_object_table_content = ( + _create_table_for_resource(resource_bundle.dataset_object_notifications, 'datasetUri', '/console/s3-datasets/') + if len(resource_bundle.dataset_object_notifications) > 0 + else '' + ) + environment_object_table_content = ( + _create_table_for_resource( + resource_bundle.environment_object_notifications, 'environmentUri', '/console/environments/' + ) + if len(resource_bundle.environment_object_notifications) > 0 + else '' + ) + + msg_content += ( + share_object_table_content + dataset_object_table_content + environment_object_table_content + '

' + ) msg_footer = """ In case your stack(s) or share object(s) are still in unhealthy state after applying remedial measures, please contact data.all team.

@@ -247,7 +270,7 @@ def _create_table_for_resource(list_of_resources, uri_attr, link_uri): if __name__ == '__main__': - log.info("Starting weekly reminders task") + log.info('Starting weekly reminders task') load_modules(modes={ImportMode.SHARES_TASK}) ENVNAME = os.environ.get('envname', 'dkrcompose') ENGINE = get_engine(envname=ENVNAME) diff --git a/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py b/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py index fe285564b..7f8253dac 100644 --- a/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py +++ b/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py @@ -65,9 +65,10 @@ def fetch_omics_workflows(engine): AdminNotificationService().notify_admins_with_error_log( process_error='Error occurred while processing omics workflow task', error_logs=[str(e)], - process_name='Omics Workflow' + process_name='Omics Workflow', ) + if __name__ == '__main__': ENVNAME = os.environ.get('envname', 'local') ENGINE = get_engine(envname=ENVNAME) diff --git a/backend/dataall/modules/redshift_datasets_shares/services/redshift_table_share_processor.py b/backend/dataall/modules/redshift_datasets_shares/services/redshift_table_share_processor.py index 6c066df55..c90280f82 100644 --- a/backend/dataall/modules/redshift_datasets_shares/services/redshift_table_share_processor.py +++ b/backend/dataall/modules/redshift_datasets_shares/services/redshift_table_share_processor.py @@ -264,7 +264,7 @@ def process_approved_shares(self) -> bool: AdminNotificationService().notify_admins_with_error_log( process_error='Error occurred while processing redshift table share request', process_name='redshift table share processor', - error_logs=[str(e)] + error_logs=[str(e)], ) return False return success @@ -373,7 +373,7 @@ def process_revoked_shares(self) -> bool: AdminNotificationService().notify_admins_with_error_log( process_error='Error occurred while revoking redshift table share request', process_name='redshift tables share processor', - error_logs=[str(e)] + error_logs=[str(e)], ) self.session.commit() try: @@ -452,7 +452,7 @@ def process_revoked_shares(self) -> bool: AdminNotificationService().notify_admins_with_error_log( process_error='Error occurred while revoking redshift table share request', process_name='redshift tables share processor', - error_logs=[str(e)] + error_logs=[str(e)], ) return False return success diff --git a/backend/dataall/modules/s3_datasets/db/dataset_table_repositories.py b/backend/dataall/modules/s3_datasets/db/dataset_table_repositories.py index 1c325feca..52c5ca587 100644 --- a/backend/dataall/modules/s3_datasets/db/dataset_table_repositories.py +++ b/backend/dataall/modules/s3_datasets/db/dataset_table_repositories.py @@ -63,7 +63,7 @@ def get_dataset_table_by_uri(session, table_uri): @staticmethod def update_existing_tables_status(existing_tables, glue_tables): - updated_tables_status_map: Dict[str: str] = {} + updated_tables_status_map: Dict[str:str] = {} for existing_table in existing_tables: if existing_table.GlueTableName not in [t['Name'] for t in glue_tables]: if existing_table.LastGlueTableStatus != 'Deleted': diff --git a/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py b/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py index 32af8239a..825e4f08a 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py @@ -6,8 +6,8 @@ log = logging.getLogger(__name__) -class DatasetTableNotifications: +class DatasetTableNotifications: def __init__(self, dataset: S3Dataset): self.dataset: S3Dataset = dataset @@ -16,18 +16,21 @@ def __init__(self, dataset: S3Dataset): Also, if there exists shares on that dataset, then send email notifications to the requestors informing updates table_status_map - Dictionary of GlueTableName and table status ( InSync, Deleted, etc ) """ - def notify_dataset_table_updates(self, session, table_status_map: Dict[str, str]): + + def notify_dataset_table_updates(self, session, table_status_map: Dict[str, str]): # Construct and send email reminders for datasets self._send_email_reminders_for_dataset(table_status_map) # Find all the shares made on this dataset - shares = ShareObjectRepository.find_dataset_shares(session=session, dataset_uri=self.dataset.datasetUri, share_statues=['Processed']) + shares = ShareObjectRepository.find_dataset_shares( + session=session, dataset_uri=self.dataset.datasetUri, share_statues=['Processed'] + ) if shares: for share in shares: self._send_email_notification_for_share(share, table_status_map) def _send_email_notification_for_share(self, share, table_status_map): - subject = f"Alert: Data.all Update | Glue table updated for dataset: {self.dataset.name}" + subject = f'Alert: Data.all Update | Glue table updated for dataset: {self.dataset.name}' msg_footer = f""" You have an active share with uri: {share.shareUri}. If there is any table requested by you on the dataset: {self.dataset.name} for that share it may have been affected in case if the tables are deleted.
Note: Please check with the dataset owner if there is any missing table from your share - as it is likely deleted from the dataset.
If the table exists in the dataset and is successfully shared but you are unable to access the table, then please reach out to the data.all team

@@ -42,11 +45,12 @@ def _send_email_notification_for_share(self, share, table_status_map): {table_content}

""" msg = msg_body + msg_footer - SESEmailNotificationService.create_and_send_email_notifications(subject=subject, msg=msg, - recipient_groups_list=[share.groupUri]) + SESEmailNotificationService.create_and_send_email_notifications( + subject=subject, msg=msg, recipient_groups_list=[share.groupUri] + ) def _send_email_reminders_for_dataset(self, table_status_map): - subject = f"Data.all Update | Glue tables updated for dataset: {self.dataset.name}" + subject = f'Data.all Update | Glue tables updated for dataset: {self.dataset.name}' table_content = self._construct_html_table_from_glue_status_map(table_status_map) msg_body = f""" Dear Team,

@@ -59,10 +63,9 @@ def _send_email_reminders_for_dataset(self, table_status_map): data.all team """ msg = msg_body + msg_footer - SESEmailNotificationService.create_and_send_email_notifications(subject=subject, msg=msg, - recipient_groups_list=[ - self.dataset.SamlAdminGroupName, - self.dataset.stewards]) + SESEmailNotificationService.create_and_send_email_notifications( + subject=subject, msg=msg, recipient_groups_list=[self.dataset.SamlAdminGroupName, self.dataset.stewards] + ) @classmethod def _construct_html_table_from_glue_status_map(cls, table_status_map): diff --git a/backend/dataall/modules/s3_datasets/services/dataset_table_service.py b/backend/dataall/modules/s3_datasets/services/dataset_table_service.py index f909a3da9..d67fa45c5 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_table_service.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_table_service.py @@ -142,7 +142,9 @@ def sync_existing_tables(session, uri, glue_tables=None): existing_table_names = [e.GlueTableName for e in existing_tables] existing_dataset_tables_map = {t.GlueTableName: t for t in existing_tables} - updated_table_status_map = DatasetTableRepository.update_existing_tables_status(existing_tables, glue_tables) + updated_table_status_map = DatasetTableRepository.update_existing_tables_status( + existing_tables, glue_tables + ) log.info(f'existing_tables={glue_tables}') for table in glue_tables: diff --git a/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py b/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py index e8ea7cb90..3872af988 100644 --- a/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py +++ b/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py @@ -52,17 +52,20 @@ def sync_tables(engine): log.info(f'Found {len(tables)} tables on Glue database {dataset.GlueDatabaseName}') - table_status_map = DatasetTableService.sync_existing_tables(session, uri=dataset.datasetUri, glue_tables=tables) + table_status_map = DatasetTableService.sync_existing_tables( + session, uri=dataset.datasetUri, glue_tables=tables + ) if table_status_map: log.info('Sending email notification after dataset table updates were found') try: - DatasetTableNotifications(dataset=dataset).notify_dataset_table_updates(session=session, table_status_map=table_status_map) + DatasetTableNotifications(dataset=dataset).notify_dataset_table_updates( + session=session, table_status_map=table_status_map + ) except Exception as e: - error_log = f"Error occurred while sending email to notify about changes to the glue tables for dataset with uri: {dataset.datasetUri} due to: {e}" + error_log = f'Error occurred while sending email to notify about changes to the glue tables for dataset with uri: {dataset.datasetUri} due to: {e}' task_exceptions.append(error_log) - tables = session.query(DatasetTable).filter(DatasetTable.datasetUri == dataset.datasetUri).all() log.info('Updating tables permissions on Lake Formation...') @@ -89,17 +92,15 @@ def sync_tables(engine): task_exceptions.append(str(e)) return processed_tables except Exception as e: - log.error( - f'Error while running table syncer task due to: {e}' - ) + log.error(f'Error while running table syncer task due to: {e}') task_exceptions.append(str(e)) finally: - if len(task_exceptions) > 0: - AdminNotificationService().notify_admins_with_error_log( - process_name='Table Syncer', - error_logs=task_exceptions, - process_error='Error while running table syncer task' - ) + if len(task_exceptions) > 0: + AdminNotificationService().notify_admins_with_error_log( + process_name='Table Syncer', + error_logs=task_exceptions, + process_error='Error while running table syncer task', + ) def is_assumable_pivot_role(env: Environment): diff --git a/backend/dataall/modules/s3_datasets_shares/services/share_processors/glue_table_share_processor.py b/backend/dataall/modules/s3_datasets_shares/services/share_processors/glue_table_share_processor.py index 4ea1b554f..e282dab34 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/share_processors/glue_table_share_processor.py +++ b/backend/dataall/modules/s3_datasets_shares/services/share_processors/glue_table_share_processor.py @@ -201,7 +201,7 @@ def process_approved_shares(self) -> bool: AdminNotificationService().notify_admins_with_error_log( process_error='Error occurred while processing glue table share request', process_name='s3 glue table share processor', - error_logs=[str(e)] + error_logs=[str(e)], ) return success @@ -330,7 +330,7 @@ def process_revoked_shares(self) -> bool: AdminNotificationService().notify_admins_with_error_log( process_error='Error occurred while revoking glue tables share request', process_name='glue tables share processor', - error_logs=[str(e)] + error_logs=[str(e)], ) try: @@ -364,7 +364,7 @@ def process_revoked_shares(self) -> bool: AdminNotificationService().notify_admins_with_error_log( process_error='Error occurred while revoking glue tables share request', process_name='glue tables share processor', - error_logs=[str(e)] + error_logs=[str(e)], ) success = False return success diff --git a/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_access_point_share_processor.py b/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_access_point_share_processor.py index ab1527770..aaa4096f3 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_access_point_share_processor.py +++ b/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_access_point_share_processor.py @@ -103,7 +103,7 @@ def process_approved_shares(self) -> bool: AdminNotificationService().notify_admins_with_error_log( process_error='Error occurred while processing access point share request', process_name='s3 access point share processor', - error_logs=[str(e)] + error_logs=[str(e)], ) return success @@ -178,7 +178,7 @@ def process_revoked_shares(self) -> bool: AdminNotificationService().notify_admins_with_error_log( process_error='Error occurred while revoking access point share request', process_name='s3 access point share processor', - error_logs=[str(e)] + error_logs=[str(e)], ) return success diff --git a/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_bucket_share_processor.py b/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_bucket_share_processor.py index f2fcba4db..c97c39795 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_bucket_share_processor.py +++ b/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_bucket_share_processor.py @@ -97,7 +97,7 @@ def process_approved_shares(self) -> bool: AdminNotificationService().notify_admins_with_error_log( process_error='Error occurred while processing s3 bucket share request', process_name='s3 bucket share processor', - error_logs=[str(e)] + error_logs=[str(e)], ) return success @@ -164,7 +164,7 @@ def process_revoked_shares(self) -> bool: AdminNotificationService().notify_admins_with_error_log( process_error='Error occurred while revoking s3 bucket manager', process_name='s3 bucket share processor', - error_logs=[str(e)] + error_logs=[str(e)], ) return success diff --git a/backend/dataall/modules/shares_base/db/share_object_repositories.py b/backend/dataall/modules/shares_base/db/share_object_repositories.py index b04b9751d..c6d1c70b3 100644 --- a/backend/dataall/modules/shares_base/db/share_object_repositories.py +++ b/backend/dataall/modules/shares_base/db/share_object_repositories.py @@ -51,7 +51,7 @@ def find_dataset_shares(session, dataset_uri: str, share_statues: List[str] = No return query.all() @staticmethod - def find_share_by_dataset_attributes(session, dataset_uri, dataset_owner, groups = None): + def find_share_by_dataset_attributes(session, dataset_uri, dataset_owner, groups=None): if groups is None: groups = [] share: ShareObject = ( @@ -205,17 +205,12 @@ def get_share_data_items_by_type(session, share, share_type_model, share_type_ur def get_share_object_with_health_status(session, health_status_list: List[str] = None): query = ( session.query(ShareObject) - .join( - ShareObjectItem, - ShareObjectItem.shareUri == ShareObject.shareUri - ).filter( - ShareObjectItem.healthStatus.in_(health_status_list) - ) + .join(ShareObjectItem, ShareObjectItem.shareUri == ShareObject.shareUri) + .filter(ShareObjectItem.healthStatus.in_(health_status_list)) ) return query.all() - @staticmethod def get_all_share_items_in_share(session, share_uri, status=None, healthStatus=None): query = ( diff --git a/backend/dataall/modules/shares_base/services/share_notification_service.py b/backend/dataall/modules/shares_base/services/share_notification_service.py index 7079dca29..901273fad 100644 --- a/backend/dataall/modules/shares_base/services/share_notification_service.py +++ b/backend/dataall/modules/shares_base/services/share_notification_service.py @@ -279,13 +279,17 @@ def notify_share_object_failed(self): data.all team """ subject = f'Data.all | Attention Required | Share failed for {self.dataset.label}' - email_notification_msg = msg + share_link_text + "

" + msg_footer + email_notification_msg = msg + share_link_text + '

' + msg_footer notifications = self.register_notifications( notification_type=DataSharingNotificationType.SHARE_OBJECT_FAILED.value, msg=msg ) - SESEmailNotificationService.create_and_send_email_notifications(subject=subject, msg=email_notification_msg, recipient_groups_list=[self.share.groupUri, self.dataset.SamlAdminGroupName, self.dataset.stewards]) + SESEmailNotificationService.create_and_send_email_notifications( + subject=subject, + msg=email_notification_msg, + recipient_groups_list=[self.share.groupUri, self.dataset.SamlAdminGroupName, self.dataset.stewards], + ) return notifications @@ -301,20 +305,24 @@ def notify_share_object_items_unhealthy(self): msg = ( f'Hello Team,
' f'Your share with share uri: {self.share.shareUri} has one or more unhealthy share items.

' - f'Once you visit your share link you can click on the Reapply button and this should correct your share and get it into an healthy state. If this doesn\'t get your share in healthy state then please get in touch with data.all admins for your share.' + f"Once you visit your share link you can click on the Reapply button and this should correct your share and get it into an healthy state. If this doesn't get your share in healthy state then please get in touch with data.all admins for your share." ) msg_footer = """ Regards,
data.all team """ subject = f'Data.all | Attention Required | Share for {self.dataset.label} dataset in unhealthy state' - email_notification_msg = msg + share_link_text + "

" + msg_footer + email_notification_msg = msg + share_link_text + '

' + msg_footer notifications = self.register_notifications( - notification_type=DataSharingNotificationType.SHARE_OBJECT_UNHEALTHY.value, msg=msg, to_recipients=[self.share.groupUri] + notification_type=DataSharingNotificationType.SHARE_OBJECT_UNHEALTHY.value, + msg=msg, + to_recipients=[self.share.groupUri], ) - SESEmailNotificationService.create_and_send_email_notifications(subject=subject, msg=email_notification_msg, recipient_groups_list=[self.share.groupUri]) + SESEmailNotificationService.create_and_send_email_notifications( + subject=subject, msg=email_notification_msg, recipient_groups_list=[self.share.groupUri] + ) return notifications @@ -337,13 +345,17 @@ def notify_share_object_items_healthy(self): data.all team """ subject = f'Data.all | Share for {self.dataset.label} dataset now in healthy state' - email_notification_msg = msg + share_link_text + "

" + msg_footer + email_notification_msg = msg + share_link_text + '

' + msg_footer notifications = self.register_notifications( - notification_type=DataSharingNotificationType.SHARE_OBJECT_HEALTHY.value, msg=msg, to_recipients=[self.share.groupUri] + notification_type=DataSharingNotificationType.SHARE_OBJECT_HEALTHY.value, + msg=msg, + to_recipients=[self.share.groupUri], ) - SESEmailNotificationService.create_and_send_email_notifications(subject=subject, msg=email_notification_msg, recipient_groups_list=[self.share.groupUri]) + SESEmailNotificationService.create_and_send_email_notifications( + subject=subject, msg=email_notification_msg, recipient_groups_list=[self.share.groupUri] + ) return notifications def _get_share_object_targeted_users(self): @@ -425,4 +437,3 @@ def _create_notification_task(self, subject, msg): log.info(f'Notification type : {share_notification_config_type} is not active') else: log.info('Notifications are not active') - diff --git a/backend/dataall/modules/shares_base/services/sharing_service.py b/backend/dataall/modules/shares_base/services/sharing_service.py index 50ca4d59b..2575ba0b2 100644 --- a/backend/dataall/modules/shares_base/services/sharing_service.py +++ b/backend/dataall/modules/shares_base/services/sharing_service.py @@ -85,10 +85,10 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: try: with ResourceLockRepository.acquire_lock_with_retry( - resources=resources, - session=session, - acquired_by_uri=share_data.share.shareUri, - acquired_by_type=share_data.share.__tablename__, + resources=resources, + session=session, + acquired_by_uri=share_data.share.shareUri, + acquired_by_type=share_data.share.__tablename__, ): for type, processor in ShareProcessorManager.SHARING_PROCESSORS.items(): try: @@ -143,13 +143,14 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: task_exceptions.append(str(e)) finally: if not share_successful: - ShareNotificationService(session=session, dataset=share_data.dataset, - share=share_data.share).notify_share_object_failed() + ShareNotificationService( + session=session, dataset=share_data.dataset, share=share_data.share + ).notify_share_object_failed() if len(task_exceptions) > 0: AdminNotificationService().notify_admins_with_error_log( process_error=f'Error occurred while processing share with uri: {share_uri}', process_name='Sharing Service', - error_logs=task_exceptions + error_logs=task_exceptions, ) return share_successful @@ -200,10 +201,10 @@ def revoke_share(cls, engine: Engine, share_uri: str) -> bool: try: with ResourceLockRepository.acquire_lock_with_retry( - resources=resources, - session=session, - acquired_by_uri=share_data.share.shareUri, - acquired_by_type=share_data.share.__tablename__, + resources=resources, + session=session, + acquired_by_uri=share_data.share.shareUri, + acquired_by_type=share_data.share.__tablename__, ): for type, processor in ShareProcessorManager.SHARING_PROCESSORS.items(): try: @@ -262,24 +263,25 @@ def revoke_share(cls, engine: Engine, share_uri: str) -> bool: task_exceptions.append(str(e)) finally: if not revoke_successful: - ShareNotificationService(session=session, dataset=share_data.dataset, - share=share_data.share).notify_share_object_failed() + ShareNotificationService( + session=session, dataset=share_data.dataset, share=share_data.share + ).notify_share_object_failed() if len(task_exceptions) > 0: AdminNotificationService().notify_admins_with_error_log( process_error=f'Error occurred while revoking share with uri: {share_uri}', process_name='Sharing Service', - error_logs=task_exceptions + error_logs=task_exceptions, ) return revoke_successful @classmethod def verify_share( - cls, - engine: Engine, - share_uri: str, - status: str = None, - healthStatus: str = ShareItemHealthStatus.PendingVerify.value, + cls, + engine: Engine, + share_uri: str, + status: str = None, + healthStatus: str = ShareItemHealthStatus.PendingVerify.value, ) -> bool: """ 1) Retrieves share data and items in specified status and health state (by default - PendingVerify) @@ -310,7 +312,9 @@ def verify_share( healthStatus=healthStatus, ) if shareable_items: - health_status = processor.Processor(session, share_data, shareable_items).verify_shares_health_status() + health_status = processor.Processor( + session, share_data, shareable_items + ).verify_shares_health_status() health_status_list.append(health_status) else: log.info(f'There are no items to verify of type {type.value}') @@ -318,8 +322,12 @@ def verify_share( log.error(f'Error occurred during share verifying of {type.value}: {e}') task_exceptions.append(str(e)) if False in health_status_list: - log.info(f'Sending notifications since share object item(s) for share: {share_data.share.shareUri} are in unhealthy state after verifying shares') - ShareNotificationService(session=session, dataset=share_data.dataset, share=share_data.share).notify_share_object_items_unhealthy() + log.info( + f'Sending notifications since share object item(s) for share: {share_data.share.shareUri} are in unhealthy state after verifying shares' + ) + ShareNotificationService( + session=session, dataset=share_data.dataset, share=share_data.share + ).notify_share_object_items_unhealthy() except Exception as e: log.error(f'Unexpected error occurred while verifying share with uri: {share_uri} due to: {e}') task_exceptions.append(str(e)) @@ -328,7 +336,7 @@ def verify_share( AdminNotificationService().notify_admins_with_error_log( process_error=f'Error occurred during verification of share with uri: {share_data.share.shareUri} ', error_logs=task_exceptions, - process_name='Sharing Service' + process_name='Sharing Service', ) return True @@ -369,10 +377,10 @@ def reapply_share(cls, engine: Engine, share_uri: str) -> bool: try: with ResourceLockRepository.acquire_lock_with_retry( - resources=resources, - session=session, - acquired_by_uri=share_data.share.shareUri, - acquired_by_type=share_data.share.__tablename__, + resources=resources, + session=session, + acquired_by_uri=share_data.share.shareUri, + acquired_by_type=share_data.share.__tablename__, ): for type, processor in ShareProcessorManager.SHARING_PROCESSORS.items(): try: @@ -396,19 +404,25 @@ def reapply_share(cls, engine: Engine, share_uri: str) -> bool: log.info(f'There are no items to reapply of type {type.value}') except Exception as e: log.error(f'Error occurred during share reapplying of {type.value}: {e}') - code_exception_list.append(f'Error occurred during reapplying of share with uri: {share_data.share.shareUri} for processor type: {type.value} due to an unknown exception: {e}') + code_exception_list.append( + f'Error occurred during reapplying of share with uri: {share_data.share.shareUri} for processor type: {type.value} due to an unknown exception: {e}' + ) if not reapply_successful: log.info( - f'Sending notifications since share object item(s) for share: {share_data.share.shareUri} are in unhealthy state after reapplying shares') - ShareNotificationService(session=session, dataset=share_data.dataset, - share=share_data.share).notify_share_object_items_unhealthy() + f'Sending notifications since share object item(s) for share: {share_data.share.shareUri} are in unhealthy state after reapplying shares' + ) + ShareNotificationService( + session=session, dataset=share_data.dataset, share=share_data.share + ).notify_share_object_items_unhealthy() else: if len(code_exception_list) == 0: log.info( - f'Sending notifications to the share owner to inform that the share with uri: {share_data.share.shareUri} is now in healthy state') - ShareNotificationService(session=session, dataset=share_data.dataset, - share=share_data.share).notify_share_object_items_healthy() + f'Sending notifications to the share owner to inform that the share with uri: {share_data.share.shareUri} is now in healthy state' + ) + ShareNotificationService( + session=session, dataset=share_data.dataset, share=share_data.share + ).notify_share_object_items_healthy() except ResourceLockTimeout as timeout_exception: ShareStatusRepository.update_share_item_health_status_batch( @@ -434,15 +448,15 @@ def reapply_share(cls, engine: Engine, share_uri: str) -> bool: AdminNotificationService().notify_admins_with_error_log( process_error=f'Error occurred during reapplying of share with uri: {share_data.share.shareUri}', error_logs=code_exception_list, - process_name='Sharing Service' + process_name='Sharing Service', ) return reapply_successful @classmethod def cleanup_share( - cls, - engine: Engine, - share_uri: str, + cls, + engine: Engine, + share_uri: str, ) -> bool: """ 1) Retrieves share data and items in share diff --git a/backend/dataall/modules/shares_base/tasks/persistent_email_reminders_task.py b/backend/dataall/modules/shares_base/tasks/persistent_email_reminders_task.py index 6aadc9deb..b9e3f9b28 100644 --- a/backend/dataall/modules/shares_base/tasks/persistent_email_reminders_task.py +++ b/backend/dataall/modules/shares_base/tasks/persistent_email_reminders_task.py @@ -29,9 +29,9 @@ def persistent_email_reminders(engine): log.info(f'Sending Email Reminder for Share: {pending_share.shareUri}') share = ShareObjectRepository.get_share_by_uri(session, pending_share.shareUri) dataset = DatasetBaseRepository.get_dataset_by_uri(session, share.datasetUri) - ShareNotificationService(session=session, dataset=dataset, share=share).notify_persistent_email_reminder( - email_id=share.owner - ) + ShareNotificationService( + session=session, dataset=dataset, share=share + ).notify_persistent_email_reminder(email_id=share.owner) log.info(f'Email reminder sent for share {share.shareUri}') log.info('Completed Persistent Email Reminders Task') except Exception as e: @@ -42,7 +42,7 @@ def persistent_email_reminders(engine): AdminNotificationService().notify_admins_with_error_log( process_name='Persistent Email Task', error_logs=task_exceptions, - process_error='Error while running persistent email reminder task' + process_error='Error while running persistent email reminder task', ) diff --git a/backend/dataall/modules/shares_base/tasks/share_expiration_task.py b/backend/dataall/modules/shares_base/tasks/share_expiration_task.py index b2ddde75d..14349dcbd 100644 --- a/backend/dataall/modules/shares_base/tasks/share_expiration_task.py +++ b/backend/dataall/modules/shares_base/tasks/share_expiration_task.py @@ -72,7 +72,9 @@ def share_expiration_checker(engine): log.error( f'Error occurred while processing share expiration processing for share with URI: {share.shareUri} due to: {e}' ) - task_exceptions.append(f'Error occurred while processing share expiration processing for share with URI: {share.shareUri} due to: {e}') + task_exceptions.append( + f'Error occurred while processing share expiration processing for share with URI: {share.shareUri} due to: {e}' + ) except Exception as e: log.error(f'Error occurred while processing share expiration due to : {e}') task_exceptions.append(f'Error occurred while processing share expiration due to: {e}') @@ -81,7 +83,7 @@ def share_expiration_checker(engine): AdminNotificationService().notify_admins_with_error_log( process_error='Error occurred while processing share expiration task', error_logs=task_exceptions, - process_name='Share Expiration Task' + process_name='Share Expiration Task', ) diff --git a/backend/dataall/modules/shares_base/tasks/share_manager_task.py b/backend/dataall/modules/shares_base/tasks/share_manager_task.py index d6c611214..9c55d9b50 100644 --- a/backend/dataall/modules/shares_base/tasks/share_manager_task.py +++ b/backend/dataall/modules/shares_base/tasks/share_manager_task.py @@ -29,6 +29,6 @@ AdminNotificationService().notify_admins_with_error_log( process_error=f'Error occurred while running sharing task for share with uri: {os.getenv("shareUri", "Share URI not available")}', error_logs=[str(e)], - process_name='Sharing Service' + process_name='Sharing Service', ) raise e diff --git a/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py b/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py index ed6e04576..86f63e528 100644 --- a/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py +++ b/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py @@ -25,8 +25,12 @@ def process_reapply_shares_for_dataset(cls, engine, dataset_uri): share_objects_for_dataset = ShareObjectRepository.list_active_share_object_for_dataset( session=session, dataset_uri=dataset_uri ) - log.info(f'Found {len(share_objects_for_dataset)} active share objects on dataset with uri: {dataset_uri}') - processed_share_objects, task_exceptions = cls._reapply_share_objects(engine=engine, session=session, share_objects=share_objects_for_dataset) + log.info( + f'Found {len(share_objects_for_dataset)} active share objects on dataset with uri: {dataset_uri}' + ) + processed_share_objects, task_exceptions = cls._reapply_share_objects( + engine=engine, session=session, share_objects=share_objects_for_dataset + ) return processed_share_objects except Exception as e: log.error(f'Error occurred while reapplying share task due to: {e}') @@ -36,7 +40,7 @@ def process_reapply_shares_for_dataset(cls, engine, dataset_uri): AdminNotificationService().notify_admins_with_error_log( process_error='Error occurred while processing share during reapplying task', error_logs=task_exceptions, - process_name='Share Reapplier Task' + process_name='Share Reapplier Task', ) @classmethod @@ -62,6 +66,7 @@ def _reapply_share_objects(cls, engine, session, share_objects: List[ShareObject log.error(error_formatted) task_exceptions.append(error_formatted) return (processed_share_objects, task_exceptions) + @classmethod def process_reapply_shares(cls, engine): task_exceptions = [] @@ -69,8 +74,9 @@ def process_reapply_shares(cls, engine): with engine.scoped_session() as session: all_share_objects: [ShareObject] = ShareObjectRepository.list_all_active_share_objects(session) log.info(f'Found {len(all_share_objects)} share objects ') - processed_share_objects, task_exceptions = cls._reapply_share_objects(engine=engine, session=session, - share_objects=all_share_objects) + processed_share_objects, task_exceptions = cls._reapply_share_objects( + engine=engine, session=session, share_objects=all_share_objects + ) return processed_share_objects except Exception as e: log.error(f'Error occurred while reapplying share task due to: {e}') @@ -80,7 +86,7 @@ def process_reapply_shares(cls, engine): AdminNotificationService().notify_admins_with_error_log( process_error='Error occurred while processing share during reapplying task', error_logs=task_exceptions, - process_name='Share Reapplier Task' + process_name='Share Reapplier Task', ) diff --git a/backend/dataall/modules/shares_base/tasks/share_verifier_task.py b/backend/dataall/modules/shares_base/tasks/share_verifier_task.py index 1114844d1..c2fef9786 100644 --- a/backend/dataall/modules/shares_base/tasks/share_verifier_task.py +++ b/backend/dataall/modules/shares_base/tasks/share_verifier_task.py @@ -35,11 +35,16 @@ def verify_shares(engine): processed_share_objects.append(share_object.shareUri) try: SharingService.verify_share( - engine, share_uri=share_object.shareUri, status=ShareItemStatus.Share_Succeeded.value, healthStatus=None + engine, + share_uri=share_object.shareUri, + status=ShareItemStatus.Share_Succeeded.value, + healthStatus=None, ) except Exception as e: log.error(f'Error occurred while verifying share with uri: {share_object.shareUri} due to: {e}') - task_exceptions.append(f'Error occurred while verifying share with uri: {share_object.shareUri} due to: {e}') + task_exceptions.append( + f'Error occurred while verifying share with uri: {share_object.shareUri} due to: {e}' + ) return processed_share_objects except Exception as e: log.error(f'Error occurred while verifying shares task due to: {e}') @@ -49,7 +54,7 @@ def verify_shares(engine): AdminNotificationService().notify_admins_with_error_log( process_error='Error occurred while verifying shares task', error_logs=task_exceptions, - process_name='Share Verifier' + process_name='Share Verifier', ) diff --git a/deploy/stacks/container.py b/deploy/stacks/container.py index 9f7606536..a6cc16591 100644 --- a/deploy/stacks/container.py +++ b/deploy/stacks/container.py @@ -400,7 +400,6 @@ def add_weekly_reminder_task(self): ) self.ecs_task_definitions_families.append(weekly_email_reminders_task.task_definition.family) - @run_if(['modules.s3_datasets.active']) def add_subscription_task(self): subscriptions_task, subscription_task_def = self.set_scheduled_task( From fd32987c3e3544e525474ea2e72effe8aa2ce26d Mon Sep 17 00:00:00 2001 From: Tejas Rajopadhye Date: Wed, 15 Jan 2025 11:45:31 -0600 Subject: [PATCH 13/26] Admin notification improvements --- backend/dataall/core/groups/db/constants.py | 5 +++ .../share_managers/lf_share_manager.py | 34 +++++++++++++++++++ .../s3_access_point_share_manager.py | 11 ++++++ .../share_managers/s3_bucket_share_manager.py | 11 ++++++ .../glue_table_share_processor.py | 16 +-------- .../s3_access_point_share_processor.py | 10 ------ .../s3_bucket_share_processor.py | 10 ------ .../shares_base/services/sharing_service.py | 25 +++++++------- 8 files changed, 74 insertions(+), 48 deletions(-) create mode 100644 backend/dataall/core/groups/db/constants.py diff --git a/backend/dataall/core/groups/db/constants.py b/backend/dataall/core/groups/db/constants.py new file mode 100644 index 000000000..5f8ca0a6d --- /dev/null +++ b/backend/dataall/core/groups/db/constants.py @@ -0,0 +1,5 @@ +from enum import Enum + + +class DataallGroups: + admin = 'DAAdminstrators' diff --git a/backend/dataall/modules/s3_datasets_shares/services/share_managers/lf_share_manager.py b/backend/dataall/modules/s3_datasets_shares/services/share_managers/lf_share_manager.py index e2951c3a3..5962b8827 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/share_managers/lf_share_manager.py +++ b/backend/dataall/modules/s3_datasets_shares/services/share_managers/lf_share_manager.py @@ -9,6 +9,7 @@ from dataall.base.aws.sts import SessionHelper from dataall.base.db import exceptions from dataall.core.environment.services.environment_service import EnvironmentService +from dataall.modules.notifications.services.admin_notifications import AdminNotificationService from dataall.modules.s3_datasets.db.dataset_models import DatasetTable from dataall.modules.s3_datasets_shares.aws.glue_client import GlueClient from dataall.modules.s3_datasets_shares.aws.lakeformation_client import LakeFormationClient @@ -634,6 +635,11 @@ def handle_share_failure( ) S3ShareAlarmService().trigger_table_sharing_failure_alarm(table, self.share, self.target_environment) + AdminNotificationService().notify_admins_with_error_log( + process_error=f'Error occurred while processing glue tables share request for share with uri: {self.share.shareUri}', + process_name='Glue tables share processor', + error_logs=[str(error)], + ) return True def handle_revoke_failure( @@ -654,6 +660,34 @@ def handle_revoke_failure( f'due to: {error}' ) S3ShareAlarmService().trigger_revoke_table_sharing_failure_alarm(table, self.share, self.target_environment) + AdminNotificationService().notify_admins_with_error_log( + process_error=f'Error occurred while revoking glue tables share request for share with uri: {self.share.shareUri}', + process_name='Glue tables share processor', + error_logs=[str(error)], + ) + return True + + def handle_revoke_clean_up_failure( + self, + error: Exception, + ) -> True: + """ + Handles share failure by raising an alarm to alarmsTopic + :param table: DatasetTable + :param error: share error + :return: True if alarm published successfully + """ + logger.error( + f'Failed to clean up database permission or delete database ' + f'from source account {self.source_environment.AwsAccountId}//{self.source_environment.region} ' + f'with target account {self.target_environment.AwsAccountId}/{self.target_environment.region} ' + f'due to: {error}' + ) + AdminNotificationService().notify_admins_with_error_log( + process_error=f'Error occurred while revoking glue tables share request for share with uri: {self.share.shareUri} when cleaning database permissions', + process_name='Glue tables share processor', + error_logs=[str(error)], + ) return True def handle_share_failure_for_all_tables(self, tables, error, share_item_status, reapply=False): diff --git a/backend/dataall/modules/s3_datasets_shares/services/share_managers/s3_access_point_share_manager.py b/backend/dataall/modules/s3_datasets_shares/services/share_managers/s3_access_point_share_manager.py index e32d6c3a7..cf7023a30 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/share_managers/s3_access_point_share_manager.py +++ b/backend/dataall/modules/s3_datasets_shares/services/share_managers/s3_access_point_share_manager.py @@ -6,6 +6,7 @@ from dataall.core.environment.services.environment_service import EnvironmentService from dataall.base.db import utils from dataall.base.aws.sts import SessionHelper +from dataall.modules.notifications.services.admin_notifications import AdminNotificationService from dataall.modules.s3_datasets_shares.aws.s3_client import ( S3ControlClient, S3Client, @@ -744,6 +745,11 @@ def handle_share_failure(self, error: Exception) -> None: S3ShareAlarmService().trigger_folder_sharing_failure_alarm( self.target_folder, self.share, self.target_environment ) + AdminNotificationService().notify_admins_with_error_log( + process_error='Error occurred while processing access point share request', + process_name='s3 access point share processor', + error_logs=[str(error)], + ) def handle_revoke_failure(self, error: Exception) -> bool: """ @@ -761,6 +767,11 @@ def handle_revoke_failure(self, error: Exception) -> bool: S3ShareAlarmService().trigger_revoke_folder_sharing_failure_alarm( self.target_folder, self.share, self.target_environment ) + AdminNotificationService().notify_admins_with_error_log( + process_error='Error occurred while revoking access point share request', + process_name='s3 access point share processor', + error_logs=[str(error)], + ) return True @staticmethod diff --git a/backend/dataall/modules/s3_datasets_shares/services/share_managers/s3_bucket_share_manager.py b/backend/dataall/modules/s3_datasets_shares/services/share_managers/s3_bucket_share_manager.py index ca9de13d3..38956099d 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/share_managers/s3_bucket_share_manager.py +++ b/backend/dataall/modules/s3_datasets_shares/services/share_managers/s3_bucket_share_manager.py @@ -6,6 +6,7 @@ from dataall.base.aws.sts import SessionHelper from dataall.core.environment.db.environment_models import Environment from dataall.core.environment.services.environment_service import EnvironmentService +from dataall.modules.notifications.services.admin_notifications import AdminNotificationService from dataall.modules.s3_datasets.db.dataset_models import DatasetBucket from dataall.modules.s3_datasets_shares.aws.kms_client import ( KmsClient, @@ -598,6 +599,11 @@ def handle_share_failure(self, error: Exception) -> bool: S3ShareAlarmService().trigger_s3_bucket_sharing_failure_alarm( self.target_bucket, self.share, self.target_environment ) + AdminNotificationService().notify_admins_with_error_log( + process_error=f'Error occurred while processing s3 bucket share request for share with uri: {self.share.shareUri}', + process_name='s3 bucket share processor', + error_logs=[str(error)], + ) return True def handle_revoke_failure(self, error: Exception) -> bool: @@ -616,6 +622,11 @@ def handle_revoke_failure(self, error: Exception) -> bool: S3ShareAlarmService().trigger_revoke_s3_bucket_sharing_failure_alarm( self.target_bucket, self.share, self.target_environment ) + AdminNotificationService().notify_admins_with_error_log( + process_error=f'Error occurred while revoking s3 bucket manager for share with uri: {self.share.shareUri}', + process_name='s3 bucket share processor', + error_logs=[str(error)], + ) return True @staticmethod diff --git a/backend/dataall/modules/s3_datasets_shares/services/share_processors/glue_table_share_processor.py b/backend/dataall/modules/s3_datasets_shares/services/share_processors/glue_table_share_processor.py index e282dab34..0a7fdb93c 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/share_processors/glue_table_share_processor.py +++ b/backend/dataall/modules/s3_datasets_shares/services/share_processors/glue_table_share_processor.py @@ -198,11 +198,6 @@ def process_approved_shares(self) -> bool: ) success = False manager.handle_share_failure(table=table, error=e) - AdminNotificationService().notify_admins_with_error_log( - process_error='Error occurred while processing glue table share request', - process_name='s3 glue table share processor', - error_logs=[str(e)], - ) return success def process_revoked_shares(self) -> bool: @@ -327,11 +322,6 @@ def process_revoked_shares(self) -> bool: success = False manager.handle_revoke_failure(table=table, error=e) - AdminNotificationService().notify_admins_with_error_log( - process_error='Error occurred while revoking glue tables share request', - process_name='glue tables share processor', - error_logs=[str(e)], - ) try: if self.tables: @@ -361,11 +351,7 @@ def process_revoked_shares(self) -> bool: f'Failed to clean-up database permissions or delete shared database {manager.shared_db_name} ' f'due to: {e}' ) - AdminNotificationService().notify_admins_with_error_log( - process_error='Error occurred while revoking glue tables share request', - process_name='glue tables share processor', - error_logs=[str(e)], - ) + manager.handle_revoke_clean_up_failure(error=e) success = False return success diff --git a/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_access_point_share_processor.py b/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_access_point_share_processor.py index aaa4096f3..c9637a279 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_access_point_share_processor.py +++ b/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_access_point_share_processor.py @@ -100,11 +100,6 @@ def process_approved_shares(self) -> bool: ) success = False manager.handle_share_failure(e) - AdminNotificationService().notify_admins_with_error_log( - process_error='Error occurred while processing access point share request', - process_name='s3 access point share processor', - error_logs=[str(e)], - ) return success def process_revoked_shares(self) -> bool: @@ -175,11 +170,6 @@ def process_revoked_shares(self) -> bool: # statements which can throw exceptions but are not critical manager.handle_revoke_failure(e) - AdminNotificationService().notify_admins_with_error_log( - process_error='Error occurred while revoking access point share request', - process_name='s3 access point share processor', - error_logs=[str(e)], - ) return success diff --git a/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_bucket_share_processor.py b/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_bucket_share_processor.py index c97c39795..c1d61f0bc 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_bucket_share_processor.py +++ b/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_bucket_share_processor.py @@ -94,11 +94,6 @@ def process_approved_shares(self) -> bool: ) success = False manager.handle_share_failure(e) - AdminNotificationService().notify_admins_with_error_log( - process_error='Error occurred while processing s3 bucket share request', - process_name='s3 bucket share processor', - error_logs=[str(e)], - ) return success def process_revoked_shares(self) -> bool: @@ -161,11 +156,6 @@ def process_revoked_shares(self) -> bool: # statements which can throw exceptions but are not critical manager.handle_revoke_failure(e) - AdminNotificationService().notify_admins_with_error_log( - process_error='Error occurred while revoking s3 bucket manager', - process_name='s3 bucket share processor', - error_logs=[str(e)], - ) return success diff --git a/backend/dataall/modules/shares_base/services/sharing_service.py b/backend/dataall/modules/shares_base/services/sharing_service.py index 2575ba0b2..40c754277 100644 --- a/backend/dataall/modules/shares_base/services/sharing_service.py +++ b/backend/dataall/modules/shares_base/services/sharing_service.py @@ -59,7 +59,7 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: True if sharing succeeds, False if sharing fails """ - task_exceptions = [] + service_exceptions = [] share_successful = True try: with engine.scoped_session() as session: @@ -125,32 +125,31 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: new_status=ShareItemStatus.Share_Failed.value, share_item_type=processor.type, ) - task_exceptions.append(str(e)) + service_exceptions.append(str(e)) share_successful = False except Exception as e: log.exception(f'Error occurred during share approval: {e}') new_share_item_state = share_item_sm.run_transition(ShareItemActions.Failure.value) share_item_sm.update_state(session, share_data.share.shareUri, new_share_item_state) - task_exceptions.append(str(e)) + service_exceptions.append(str(e)) share_successful = False finally: new_share_state = share_object_sm.run_transition(ShareObjectActions.Finish.value) share_object_sm.update_state(session, share_data.share, new_share_state) - except Exception as e: log.error(f'Unexpected error occurred while processing share with uri: {share_uri} due to: {e}') share_successful = False - task_exceptions.append(str(e)) + service_exceptions.append(str(e)) finally: if not share_successful: ShareNotificationService( session=session, dataset=share_data.dataset, share=share_data.share ).notify_share_object_failed() - if len(task_exceptions) > 0: + if len(service_exceptions) > 0: AdminNotificationService().notify_admins_with_error_log( process_error=f'Error occurred while processing share with uri: {share_uri}', process_name='Sharing Service', - error_logs=task_exceptions, + error_logs=service_exceptions, ) return share_successful @@ -174,7 +173,7 @@ def revoke_share(cls, engine: Engine, share_uri: str) -> bool: True if revoking succeeds False if revoking failed """ - task_exceptions = [] + service_exceptions = [] revoke_successful = True try: with engine.scoped_session() as session: @@ -241,14 +240,14 @@ def revoke_share(cls, engine: Engine, share_uri: str) -> bool: new_status=ShareItemStatus.Revoke_Failed.value, share_item_type=processor.type, ) - task_exceptions.append(str(e)) + service_exceptions.append(str(e)) revoke_successful = False except Exception as e: log.error(f'Error occurred during share revoking: {e}') new_share_item_state = share_item_sm.run_transition(ShareItemActions.Failure.value) share_item_sm.update_state(session, share_data.share.shareUri, new_share_item_state) revoke_successful = False - task_exceptions.append(str(e)) + service_exceptions.append(str(e)) finally: existing_pending_items = ShareStatusRepository.check_pending_share_items(session, share_uri) if existing_pending_items: @@ -260,17 +259,17 @@ def revoke_share(cls, engine: Engine, share_uri: str) -> bool: except Exception as e: log.error(f'Unexpected error occurred while revoking a share with uri: {share_uri} due to: {e}') revoke_successful = False - task_exceptions.append(str(e)) + service_exceptions.append(str(e)) finally: if not revoke_successful: ShareNotificationService( session=session, dataset=share_data.dataset, share=share_data.share ).notify_share_object_failed() - if len(task_exceptions) > 0: + if len(service_exceptions) > 0: AdminNotificationService().notify_admins_with_error_log( process_error=f'Error occurred while revoking share with uri: {share_uri}', process_name='Sharing Service', - error_logs=task_exceptions, + error_logs=service_exceptions, ) return revoke_successful From 457ce8290c27b6ccbe4f37fe31dde1abbce00671 Mon Sep 17 00:00:00 2001 From: Tejas Rajopadhye Date: Wed, 15 Jan 2025 16:36:39 -0600 Subject: [PATCH 14/26] Adding more refactoring changes --- backend/dataall/core/groups/db/constants.py | 2 +- .../notifications/services/ses_email_notification_service.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/backend/dataall/core/groups/db/constants.py b/backend/dataall/core/groups/db/constants.py index 5f8ca0a6d..930669163 100644 --- a/backend/dataall/core/groups/db/constants.py +++ b/backend/dataall/core/groups/db/constants.py @@ -2,4 +2,4 @@ class DataallGroups: - admin = 'DAAdminstrators' + admin = 'DAAdministrators' diff --git a/backend/dataall/modules/notifications/services/ses_email_notification_service.py b/backend/dataall/modules/notifications/services/ses_email_notification_service.py index c0a08214d..49d1813e9 100644 --- a/backend/dataall/modules/notifications/services/ses_email_notification_service.py +++ b/backend/dataall/modules/notifications/services/ses_email_notification_service.py @@ -4,6 +4,7 @@ from dataall.base.aws.ses import Ses from dataall.base.config import config from dataall.base.services.service_provider_factory import ServiceProviderFactory +from dataall.core.groups.db.constants import DataallGroups from dataall.modules.notifications.services.base_email_notification_service import BaseEmailNotificationService log = logging.getLogger(__name__) @@ -50,7 +51,7 @@ def send_email_task(subject, message, recipient_groups_list, recipient_email_lis except Exception as e: email_ids_to_send_emails = email_provider.get_email_ids_from_groupList( - ['DAAdministrators'], identityProvider + [DataallGroups.admin], identityProvider ) SESEmailNotificationService.send_email_to_users( email_ids_to_send_emails, From 878082a694542f7550a090ae9a973d33b2b200e7 Mon Sep 17 00:00:00 2001 From: Tejas Rajopadhye Date: Tue, 21 Jan 2025 14:15:04 -0600 Subject: [PATCH 15/26] Refactoring and corrections --- .../dataall/base/feature_toggle_checker.py | 20 +++++ backend/dataall/core/groups/db/constants.py | 3 - .../catalog/tasks/catalog_indexer_task.py | 2 +- .../services/admin_notifications.py | 25 +++---- .../ses_email_notification_service.py | 5 +- .../tasks/weekly_digest_reminder.py | 46 +++++++----- .../redshift_table_share_processor.py | 11 +-- .../services/dataset_table_notifications.py | 2 +- .../s3_datasets/tasks/tables_syncer.py | 74 +++++++++++-------- .../s3_datasets_shares/aws/s3_client.py | 17 ++--- .../share_managers/lf_share_manager.py | 6 +- .../s3_access_point_share_manager.py | 4 +- .../share_managers/s3_bucket_share_manager.py | 4 +- .../glue_table_share_processor.py | 4 - .../db/share_object_repositories.py | 11 ++- .../shares_base/services/sharing_service.py | 41 +++++----- .../shares_base/tasks/share_reapplier_task.py | 14 ++-- .../shares_base/tasks/share_verifier_task.py | 7 +- 18 files changed, 165 insertions(+), 131 deletions(-) diff --git a/backend/dataall/base/feature_toggle_checker.py b/backend/dataall/base/feature_toggle_checker.py index 5e945beab..12f8fe747 100644 --- a/backend/dataall/base/feature_toggle_checker.py +++ b/backend/dataall/base/feature_toggle_checker.py @@ -3,11 +3,14 @@ """ import functools +import logging from typing import List, Any, Optional, Callable from dataall.base.config import config from dataall.base.utils.decorator_utls import process_func +log = logging.getLogger(__name__) + def is_feature_enabled(config_property: str): def decorator(f): @@ -56,3 +59,20 @@ def decorated(*args, **kwargs): return fn_decorator(decorated) return decorator + + +def is_config_active(config_property: str, default_value: Any): + def decorator(f): + fn, fn_decorator = process_func(f) + + @functools.wraps(fn) + def decorated(*args, **kwargs): + value = config.get_property(config_property, default_value) + if not value: + log.info(f'Config - {config_property} is inactive') + return + return fn(*args, **kwargs) + + return fn_decorator(decorated) + + return decorator diff --git a/backend/dataall/core/groups/db/constants.py b/backend/dataall/core/groups/db/constants.py index 930669163..391b186a2 100644 --- a/backend/dataall/core/groups/db/constants.py +++ b/backend/dataall/core/groups/db/constants.py @@ -1,5 +1,2 @@ -from enum import Enum - - class DataallGroups: admin = 'DAAdministrators' diff --git a/backend/dataall/modules/catalog/tasks/catalog_indexer_task.py b/backend/dataall/modules/catalog/tasks/catalog_indexer_task.py index 82aeafab9..94c0aba03 100644 --- a/backend/dataall/modules/catalog/tasks/catalog_indexer_task.py +++ b/backend/dataall/modules/catalog/tasks/catalog_indexer_task.py @@ -38,7 +38,7 @@ def index_objects(cls, engine, with_deletes='False'): AdminNotificationService().notify_admins_with_error_log( process_error='Exception occurred during cataloging task', error_logs=[error_log], - process_name='Catalog Task', + process_name=cls.__name__, ) raise e diff --git a/backend/dataall/modules/notifications/services/admin_notifications.py b/backend/dataall/modules/notifications/services/admin_notifications.py index 18f60b748..2a9a85171 100644 --- a/backend/dataall/modules/notifications/services/admin_notifications.py +++ b/backend/dataall/modules/notifications/services/admin_notifications.py @@ -1,9 +1,9 @@ import logging from typing import List +from dataall.base.feature_toggle_checker import is_config_active from dataall.core.groups.db.constants import DataallGroups from dataall.modules.notifications.services.ses_email_notification_service import SESEmailNotificationService -from dataall.base.config import config log = logging.getLogger(__name__) @@ -17,25 +17,20 @@ class AdminNotificationService: 3. process_name - Code where the exception occurred. Example, inside an ECS task like cataloging task, etc or inside a graphql service """ - @classmethod - def notify_admins_with_error_log(cls, process_error: str, error_logs: List[str], process_name: str = ''): - if ( - config.get_property( - 'modules.datasets_base.features.share_notifications.email.parameters.admin_notifications', default=False - ) - is False - ): - log.info('Admin notifications are switched off') - return - + @staticmethod + @is_config_active( + config_property='modules.datasets_base.features.share_notifications.email.parameters.admin_notifications', + default_value=False, + ) + def notify_admins_with_error_log(process_error: str, error_logs: List[str], process_name: str = ''): subject = f'Data.all alert | Attention Required | Failure in : {process_name}' email_message = f""" - Following error occurred -

{process_error}

+ Following error occurred -

{process_error} """ for error_log in error_logs: - email_message += error_log + '

' + email_message += '

'.join(error_log) - email_message += 'Please check the logs in cloudwatch for more details' + email_message += '

Please check the logs in cloudwatch for more details' SESEmailNotificationService.create_and_send_email_notifications( subject=subject, msg=email_message, recipient_groups_list=[DataallGroups.admin] diff --git a/backend/dataall/modules/notifications/services/ses_email_notification_service.py b/backend/dataall/modules/notifications/services/ses_email_notification_service.py index 49d1813e9..ec074f079 100644 --- a/backend/dataall/modules/notifications/services/ses_email_notification_service.py +++ b/backend/dataall/modules/notifications/services/ses_email_notification_service.py @@ -85,9 +85,8 @@ def create_and_send_email_notifications(subject, msg, recipient_groups_list=None if share_notification_config := config.get_property( 'modules.datasets_base.features.share_notifications', default=None ): - for share_notification_config_type in share_notification_config.keys(): - n_config = share_notification_config[share_notification_config_type] - if n_config.get('active', False) == True: + for share_notification_config_type, n_config in share_notification_config.items(): + if n_config.get('active', False): if share_notification_config_type == 'email': SESEmailNotificationService.send_email_task( subject, msg, recipient_groups_list, recipient_email_ids diff --git a/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py b/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py index 029398700..dc178c479 100644 --- a/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py +++ b/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py @@ -1,6 +1,7 @@ import logging import os -from typing import List, Dict, Any, Tuple +from dataclasses import dataclass, field +from typing import List, Dict, Any, Tuple, Set from dataall.base.db import get_engine from dataall.base.loader import load_modules, ImportMode @@ -22,12 +23,12 @@ """ +@dataclass class NotificationResource: - def __init__(self, resource, resource_type: str, resource_status: str, receivers: List[str] = None): - self.resource = resource - self.resource_type = resource_type - self.resource_status = resource_status - self.receivers_list = set(receivers) + resource: any + resource_type: str + resource_status: str + receivers: Set[str] = field(default_factory=set) """ @@ -35,11 +36,21 @@ def __init__(self, resource, resource_type: str, resource_status: str, receivers """ +@dataclass class NotificationResourceBundle: - def __init__(self): - self.share_object_notifications: List[NotificationResource] = [] - self.dataset_object_notifications: List[NotificationResource] = [] - self.environment_object_notifications: List[NotificationResource] = [] + """ + A collection of notification resources, categorized by object type. + """ + + share_object_notifications: List[NotificationResource] = field(default_factory=list) + dataset_object_notifications: List[NotificationResource] = field(default_factory=list) + environment_object_notifications: List[NotificationResource] = field(default_factory=list) + + +""" +Mapping between the group / team name and the associated notification events ( in the form of NotificationResourceBundle ) +""" +group_name_to_resource_bundle_map: Dict[str, NotificationResourceBundle] = {} def _get_pending_share_notifications(session): @@ -54,20 +65,20 @@ def _get_pending_share_notifications(session): resource=share, resource_type='Share Object', resource_status=f'{share.status} - Pending Approval', - receivers=[share_dataset_map[share].SamlAdminGroupName, share_dataset_map[share].stewards], + receivers={share_dataset_map[share].SamlAdminGroupName, share_dataset_map[share].stewards}, ) for share in share_dataset_map ] def _get_unhealthy_share_notification(session): - unhealthy_share_objects: List[ShareObject] = ShareObjectRepository.get_share_object_with_health_status( + unhealthy_share_objects: List[ShareObject] = ShareObjectRepository.get_share_objects_with_item_health_status( session=session, health_status_list=[ShareItemHealthStatus.Unhealthy.value] ) log.info(f'Found {len(unhealthy_share_objects)} unhealthy share objects') return [ NotificationResource( - resource=share, resource_type='Share Object', resource_status='Unhealthy', receivers=[share.groupUri] + resource=share, resource_type='Share Object', resource_status='Unhealthy', receivers={share.groupUri} ) for share in unhealthy_share_objects ] @@ -105,9 +116,9 @@ def _get_unhealthy_stack_by_type(session, target_uri: str, target_type: Any): def _get_receivers_for_stack(resource, target_type): if target_type.__name__ == 'Dataset': - return [resource.SamlAdminGroupName, resource.stewards] + return {resource.SamlAdminGroupName, resource.stewards} if target_type.__name__ == 'Environment': - return [resource.SamlGroupName] + return {resource.SamlGroupName} """ @@ -119,7 +130,7 @@ def _get_receivers_for_stack(resource, target_type): def _map_groups_to_resource_bundles(list_of_notifications: List[NotificationResource], resource_bundle_type: str): for notification in list_of_notifications: # Get all the receivers groups - notification_receiver_groups = notification.receivers_list + notification_receiver_groups = notification.receivers for receiver_group_name in notification_receiver_groups: if receiver_group_name in group_name_to_resource_bundle_map: resource_bundle = group_name_to_resource_bundle_map.get(receiver_group_name) @@ -164,7 +175,7 @@ def send_reminder_email(engine): for group, resource_bundle in group_name_to_resource_bundle_map.items(): email_body = _construct_email_body(resource_bundle) - log.debug(email_body) + log.debug(f' Sending email to group: {group} with email content: {email_body}') subject = 'Attention Required | Data.all weekly digest' try: SESEmailNotificationService.create_and_send_email_notifications( @@ -274,5 +285,4 @@ def _create_table_for_resource(list_of_resources, uri_attr, link_uri): load_modules(modes={ImportMode.SHARES_TASK}) ENVNAME = os.environ.get('envname', 'dkrcompose') ENGINE = get_engine(envname=ENVNAME) - group_name_to_resource_bundle_map: Dict[str, NotificationResourceBundle] = {} send_reminder_email(engine=ENGINE) diff --git a/backend/dataall/modules/redshift_datasets_shares/services/redshift_table_share_processor.py b/backend/dataall/modules/redshift_datasets_shares/services/redshift_table_share_processor.py index c90280f82..91eaffc5a 100644 --- a/backend/dataall/modules/redshift_datasets_shares/services/redshift_table_share_processor.py +++ b/backend/dataall/modules/redshift_datasets_shares/services/redshift_table_share_processor.py @@ -263,7 +263,7 @@ def process_approved_shares(self) -> bool: ) AdminNotificationService().notify_admins_with_error_log( process_error='Error occurred while processing redshift table share request', - process_name='redshift table share processor', + process_name=self.__class__.__name__, error_logs=[str(e)], ) return False @@ -354,13 +354,14 @@ def process_revoked_shares(self) -> bool: except Exception as e: success = False - log.error( + error_msg = ( f'Failed to process revoked redshift dataset {self.dataset.name} ' f'table {table.name} ' f'from source {self.source_connection.name} in namespace {self.source_connection.nameSpaceId} ' f'with target {self.target_connection.name} in namespace {self.target_connection.nameSpaceId} ' f'due to: {e}' ) + log.error(error_msg) share_item = ShareObjectRepository.find_sharable_item( self.session, self.share.shareUri, table.rsTableUri ) @@ -372,8 +373,8 @@ def process_revoked_shares(self) -> bool: ) AdminNotificationService().notify_admins_with_error_log( process_error='Error occurred while revoking redshift table share request', - process_name='redshift tables share processor', - error_logs=[str(e)], + process_name=self.__class__.__name__, + error_logs=[error_msg], ) self.session.commit() try: @@ -451,7 +452,7 @@ def process_revoked_shares(self) -> bool: ) AdminNotificationService().notify_admins_with_error_log( process_error='Error occurred while revoking redshift table share request', - process_name='redshift tables share processor', + process_name=self.__class__.__name__, error_logs=[str(e)], ) return False diff --git a/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py b/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py index 825e4f08a..49bf4884f 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py @@ -23,7 +23,7 @@ def notify_dataset_table_updates(self, session, table_status_map: Dict[str, str] # Find all the shares made on this dataset shares = ShareObjectRepository.find_dataset_shares( - session=session, dataset_uri=self.dataset.datasetUri, share_statues=['Processed'] + session=session, dataset_uri=self.dataset.datasetUri, share_statuses=['Processed'] ) if shares: for share in shares: diff --git a/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py b/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py index 3872af988..f5e77be54 100644 --- a/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py +++ b/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py @@ -31,16 +31,7 @@ def sync_tables(engine): dataset: S3Dataset for dataset in all_datasets: log.info(f'Synchronizing dataset {dataset.name}|{dataset.datasetUri} tables') - env: Environment = ( - session.query(Environment) - .filter( - and_( - Environment.environmentUri == dataset.environmentUri, - Environment.deleted.is_(None), - ) - ) - .first() - ) + env: Environment = _get_environment_for_dataset(dataset, session) env_group: EnvironmentGroup = EnvironmentService.get_environment_group( session, dataset.SamlAdminGroupName, env.environmentUri ) @@ -49,34 +40,20 @@ def sync_tables(engine): log.info(f'Dataset {dataset.GlueDatabaseName} has an invalid environment') else: tables = DatasetCrawler(dataset).list_glue_database_tables(dataset.S3BucketName) - log.info(f'Found {len(tables)} tables on Glue database {dataset.GlueDatabaseName}') table_status_map = DatasetTableService.sync_existing_tables( session, uri=dataset.datasetUri, glue_tables=tables ) - if table_status_map: - log.info('Sending email notification after dataset table updates were found') - try: - DatasetTableNotifications(dataset=dataset).notify_dataset_table_updates( - session=session, table_status_map=table_status_map - ) - except Exception as e: - error_log = f'Error occurred while sending email to notify about changes to the glue tables for dataset with uri: {dataset.datasetUri} due to: {e}' - task_exceptions.append(error_log) - - tables = session.query(DatasetTable).filter(DatasetTable.datasetUri == dataset.datasetUri).all() + # Send email notification if there are any table additions/ deletions + _send_notification_after_table_updates(dataset, session, table_status_map, task_exceptions) + # For all tables in dataset, grant lake formation permission to all principals on the tables + tables = _get_tables_for_dataset(dataset, session) log.info('Updating tables permissions on Lake Formation...') - for table in tables: - LakeFormationTableClient(table).grant_principals_all_table_permissions( - principals=[ - SessionHelper.get_delegation_role_arn(env.AwsAccountId, env.region), - env_group.environmentIAMRoleArn, - ], - ) + _grant_lf_table_permissions_to_all_principals(env, env_group, table) processed_tables.extend(tables) @@ -94,6 +71,7 @@ def sync_tables(engine): except Exception as e: log.error(f'Error while running table syncer task due to: {e}') task_exceptions.append(str(e)) + raise e finally: if len(task_exceptions) > 0: AdminNotificationService().notify_admins_with_error_log( @@ -103,6 +81,44 @@ def sync_tables(engine): ) +def _send_notification_after_table_updates(dataset, session, table_status_map, task_exceptions): + if table_status_map: + log.info('Sending email notification after dataset table updates were found') + try: + DatasetTableNotifications(dataset=dataset).notify_dataset_table_updates( + session=session, table_status_map=table_status_map + ) + except Exception as e: + error_log = f'Error occurred while sending email to notify about changes to the glue tables for dataset with uri: {dataset.datasetUri} due to: {e}' + task_exceptions.append(error_log) + + +def _get_tables_for_dataset(dataset, session): + return session.query(DatasetTable).filter(DatasetTable.datasetUri == dataset.datasetUri).all() + + +def _grant_lf_table_permissions_to_all_principals(env, env_group, table): + LakeFormationTableClient(table).grant_principals_all_table_permissions( + principals=[ + SessionHelper.get_delegation_role_arn(env.AwsAccountId, env.region), + env_group.environmentIAMRoleArn, + ], + ) + + +def _get_environment_for_dataset(dataset, session): + return ( + session.query(Environment) + .filter( + and_( + Environment.environmentUri == dataset.environmentUri, + Environment.deleted.is_(None), + ) + ) + .first() + ) + + def is_assumable_pivot_role(env: Environment): aws_session = SessionHelper.remote_session(accountid=env.AwsAccountId, region=env.region) if not aws_session: diff --git a/backend/dataall/modules/s3_datasets_shares/aws/s3_client.py b/backend/dataall/modules/s3_datasets_shares/aws/s3_client.py index 77238fd26..ce9c2e183 100755 --- a/backend/dataall/modules/s3_datasets_shares/aws/s3_client.py +++ b/backend/dataall/modules/s3_datasets_shares/aws/s3_client.py @@ -163,17 +163,14 @@ def create_bucket_policy(self, bucket_name: str, policy: str, fix_malformed_prin ) log.info(f'Created bucket policy of {bucket_name} on {self._account_id} successfully') except ClientError as e: - if e.response['Error']['Code'] == 'MalformedPolicy': - if fix_malformed_principals: - log.info('MalformedPolicy. Lets try again') - fixed_policy = SharePolicyVerifier.remove_malformed_principal( - policy, DATAALL_BUCKET_SIDS, self._account_id, self.region - ) - self.create_bucket_policy(bucket_name, fixed_policy, False) - else: - log.error(f'Failed to create bucket policy. MalformedPolicy: {policy}') - raise e + if e.response['Error']['Code'] == 'MalformedPolicy' and fix_malformed_principals: + log.info('MalformedPolicy. Lets try again') + fixed_policy = SharePolicyVerifier.remove_malformed_principal( + policy, DATAALL_BUCKET_SIDS, self._account_id, self.region + ) + self.create_bucket_policy(bucket_name, fixed_policy, False) else: + log.error(f'Failed to create bucket policy: {policy} due to: {e}') raise e except Exception as e: log.error(f'Bucket policy created failed on bucket {bucket_name} of {self._account_id} : {e}') diff --git a/backend/dataall/modules/s3_datasets_shares/services/share_managers/lf_share_manager.py b/backend/dataall/modules/s3_datasets_shares/services/share_managers/lf_share_manager.py index 5962b8827..a7d0c6034 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/share_managers/lf_share_manager.py +++ b/backend/dataall/modules/s3_datasets_shares/services/share_managers/lf_share_manager.py @@ -637,7 +637,7 @@ def handle_share_failure( S3ShareAlarmService().trigger_table_sharing_failure_alarm(table, self.share, self.target_environment) AdminNotificationService().notify_admins_with_error_log( process_error=f'Error occurred while processing glue tables share request for share with uri: {self.share.shareUri}', - process_name='Glue tables share processor', + process_name=self.__class__.__name__, error_logs=[str(error)], ) return True @@ -662,7 +662,7 @@ def handle_revoke_failure( S3ShareAlarmService().trigger_revoke_table_sharing_failure_alarm(table, self.share, self.target_environment) AdminNotificationService().notify_admins_with_error_log( process_error=f'Error occurred while revoking glue tables share request for share with uri: {self.share.shareUri}', - process_name='Glue tables share processor', + process_name=self.__class__.__name__, error_logs=[str(error)], ) return True @@ -685,7 +685,7 @@ def handle_revoke_clean_up_failure( ) AdminNotificationService().notify_admins_with_error_log( process_error=f'Error occurred while revoking glue tables share request for share with uri: {self.share.shareUri} when cleaning database permissions', - process_name='Glue tables share processor', + process_name=self.__class__.__name__, error_logs=[str(error)], ) return True diff --git a/backend/dataall/modules/s3_datasets_shares/services/share_managers/s3_access_point_share_manager.py b/backend/dataall/modules/s3_datasets_shares/services/share_managers/s3_access_point_share_manager.py index cf7023a30..f315869f6 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/share_managers/s3_access_point_share_manager.py +++ b/backend/dataall/modules/s3_datasets_shares/services/share_managers/s3_access_point_share_manager.py @@ -747,7 +747,7 @@ def handle_share_failure(self, error: Exception) -> None: ) AdminNotificationService().notify_admins_with_error_log( process_error='Error occurred while processing access point share request', - process_name='s3 access point share processor', + process_name=self.__class__.__name__, error_logs=[str(error)], ) @@ -769,7 +769,7 @@ def handle_revoke_failure(self, error: Exception) -> bool: ) AdminNotificationService().notify_admins_with_error_log( process_error='Error occurred while revoking access point share request', - process_name='s3 access point share processor', + process_name=self.__class__.__name__, error_logs=[str(error)], ) return True diff --git a/backend/dataall/modules/s3_datasets_shares/services/share_managers/s3_bucket_share_manager.py b/backend/dataall/modules/s3_datasets_shares/services/share_managers/s3_bucket_share_manager.py index 38956099d..0eb46dab7 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/share_managers/s3_bucket_share_manager.py +++ b/backend/dataall/modules/s3_datasets_shares/services/share_managers/s3_bucket_share_manager.py @@ -601,7 +601,7 @@ def handle_share_failure(self, error: Exception) -> bool: ) AdminNotificationService().notify_admins_with_error_log( process_error=f'Error occurred while processing s3 bucket share request for share with uri: {self.share.shareUri}', - process_name='s3 bucket share processor', + process_name=self.__class__.__name__, error_logs=[str(error)], ) return True @@ -624,7 +624,7 @@ def handle_revoke_failure(self, error: Exception) -> bool: ) AdminNotificationService().notify_admins_with_error_log( process_error=f'Error occurred while revoking s3 bucket manager for share with uri: {self.share.shareUri}', - process_name='s3 bucket share processor', + process_name=self.__class__.__name__, error_logs=[str(error)], ) return True diff --git a/backend/dataall/modules/s3_datasets_shares/services/share_processors/glue_table_share_processor.py b/backend/dataall/modules/s3_datasets_shares/services/share_processors/glue_table_share_processor.py index 0a7fdb93c..68bf4c426 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/share_processors/glue_table_share_processor.py +++ b/backend/dataall/modules/s3_datasets_shares/services/share_processors/glue_table_share_processor.py @@ -347,10 +347,6 @@ def process_revoked_shares(self) -> bool: log.info('Deleting target shared database...') manager.delete_shared_database_in_target() except Exception as e: - log.error( - f'Failed to clean-up database permissions or delete shared database {manager.shared_db_name} ' - f'due to: {e}' - ) manager.handle_revoke_clean_up_failure(error=e) success = False return success diff --git a/backend/dataall/modules/shares_base/db/share_object_repositories.py b/backend/dataall/modules/shares_base/db/share_object_repositories.py index c6d1c70b3..b517e8e04 100644 --- a/backend/dataall/modules/shares_base/db/share_object_repositories.py +++ b/backend/dataall/modules/shares_base/db/share_object_repositories.py @@ -42,11 +42,11 @@ def find_share(session, dataset: DatasetBase, env, principal_id, principal_role_ ) @staticmethod - def find_dataset_shares(session, dataset_uri: str, share_statues: List[str] = None): + def find_dataset_shares(session, dataset_uri: str, share_statuses: List[str] = None): query = session.query(ShareObject).filter(ShareObject.datasetUri == dataset_uri) - if share_statues: - query = query.filter(ShareObject.status.in_(share_statues)) + if share_statuses: + query = query.filter(ShareObject.status.in_(share_statuses)) return query.all() @@ -202,7 +202,10 @@ def get_share_data_items_by_type(session, share, share_type_model, share_type_ur return query.all() @staticmethod - def get_share_object_with_health_status(session, health_status_list: List[str] = None): + def get_share_objects_with_item_health_status(session, health_status_list: List[str] = None): + if health_status_list is None: + health_status_list = [] + query = ( session.query(ShareObject) .join(ShareObjectItem, ShareObjectItem.shareUri == ShareObject.shareUri) diff --git a/backend/dataall/modules/shares_base/services/sharing_service.py b/backend/dataall/modules/shares_base/services/sharing_service.py index 40c754277..886c20b66 100644 --- a/backend/dataall/modules/shares_base/services/sharing_service.py +++ b/backend/dataall/modules/shares_base/services/sharing_service.py @@ -148,7 +148,7 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: if len(service_exceptions) > 0: AdminNotificationService().notify_admins_with_error_log( process_error=f'Error occurred while processing share with uri: {share_uri}', - process_name='Sharing Service', + process_name=cls.__name__, error_logs=service_exceptions, ) @@ -268,7 +268,7 @@ def revoke_share(cls, engine: Engine, share_uri: str) -> bool: if len(service_exceptions) > 0: AdminNotificationService().notify_admins_with_error_log( process_error=f'Error occurred while revoking share with uri: {share_uri}', - process_name='Sharing Service', + process_name=cls.__name__, error_logs=service_exceptions, ) @@ -294,10 +294,10 @@ def verify_share( Returns True when completed ------- """ - task_exceptions = [] + service_exceptions = [] try: with engine.scoped_session() as session: - health_status_list: List[bool] = [] + shares_health_status: bool = True share_data, share_items = cls._get_share_data_and_items(session, share_uri, status, healthStatus) for type, processor in ShareProcessorManager.SHARING_PROCESSORS.items(): try: @@ -311,16 +311,15 @@ def verify_share( healthStatus=healthStatus, ) if shareable_items: - health_status = processor.Processor( + shares_health_status &= processor.Processor( session, share_data, shareable_items ).verify_shares_health_status() - health_status_list.append(health_status) else: log.info(f'There are no items to verify of type {type.value}') except Exception as e: log.error(f'Error occurred during share verifying of {type.value}: {e}') - task_exceptions.append(str(e)) - if False in health_status_list: + service_exceptions.append(str(e)) + if not shares_health_status: log.info( f'Sending notifications since share object item(s) for share: {share_data.share.shareUri} are in unhealthy state after verifying shares' ) @@ -329,13 +328,13 @@ def verify_share( ).notify_share_object_items_unhealthy() except Exception as e: log.error(f'Unexpected error occurred while verifying share with uri: {share_uri} due to: {e}') - task_exceptions.append(str(e)) + service_exceptions.append(str(e)) finally: - if len(task_exceptions) > 0: + if len(service_exceptions) > 0: AdminNotificationService().notify_admins_with_error_log( process_error=f'Error occurred during verification of share with uri: {share_data.share.shareUri} ', - error_logs=task_exceptions, - process_name='Sharing Service', + error_logs=service_exceptions, + process_name=cls.__name__, ) return True @@ -358,7 +357,7 @@ def reapply_share(cls, engine: Engine, share_uri: str) -> bool: False if any re-apply of share item(s) failed """ reapply_successful = True - code_exception_list = [] + service_exceptions = [] try: with engine.scoped_session() as session: share_data, share_items = cls._get_share_data_and_items( @@ -403,7 +402,7 @@ def reapply_share(cls, engine: Engine, share_uri: str) -> bool: log.info(f'There are no items to reapply of type {type.value}') except Exception as e: log.error(f'Error occurred during share reapplying of {type.value}: {e}') - code_exception_list.append( + service_exceptions.append( f'Error occurred during reapplying of share with uri: {share_data.share.shareUri} for processor type: {type.value} due to an unknown exception: {e}' ) @@ -415,7 +414,7 @@ def reapply_share(cls, engine: Engine, share_uri: str) -> bool: session=session, dataset=share_data.dataset, share=share_data.share ).notify_share_object_items_unhealthy() else: - if len(code_exception_list) == 0: + if len(service_exceptions) == 0: log.info( f'Sending notifications to the share owner to inform that the share with uri: {share_data.share.shareUri} is now in healthy state' ) @@ -431,7 +430,7 @@ def reapply_share(cls, engine: Engine, share_uri: str) -> bool: new_status=ShareItemHealthStatus.Unhealthy.value, message=str(timeout_exception), ) - code_exception_list.append(str(timeout_exception)) + service_exceptions.append(str(timeout_exception)) except Exception as e: log.error(f'Unexpected error occurred while reapplying share with uri: {share_uri} due to: {e}') ShareStatusRepository.update_share_item_health_status_batch( @@ -441,13 +440,13 @@ def reapply_share(cls, engine: Engine, share_uri: str) -> bool: new_status=ShareItemHealthStatus.Unhealthy.value, message='Unexpected error occurred while reapplying share', ) - code_exception_list.append(str(e)) + service_exceptions.append(str(e)) finally: - if len(code_exception_list) > 0: + if len(service_exceptions) > 0: AdminNotificationService().notify_admins_with_error_log( - process_error=f'Error occurred during reapplying of share with uri: {share_data.share.shareUri}', - error_logs=code_exception_list, - process_name='Sharing Service', + process_error=f'Error occurred during reapplying of share with uri: {share_uri}', + error_logs=service_exceptions, + process_name=cls.__name__, ) return reapply_successful diff --git a/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py b/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py index 86f63e528..f19b90729 100644 --- a/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py +++ b/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py @@ -33,14 +33,15 @@ def process_reapply_shares_for_dataset(cls, engine, dataset_uri): ) return processed_share_objects except Exception as e: - log.error(f'Error occurred while reapplying share task due to: {e}') - task_exceptions.append(f'Error occurred while reapplying share task due to: {e}') + error_msg = f'Error occurred while reapplying share task due to: {e}' + log.error(error_msg) + task_exceptions.append(error_msg) finally: if len(task_exceptions) > 0: AdminNotificationService().notify_admins_with_error_log( process_error='Error occurred while processing share during reapplying task', error_logs=task_exceptions, - process_name='Share Reapplier Task', + process_name=cls.__name__, ) @classmethod @@ -79,14 +80,15 @@ def process_reapply_shares(cls, engine): ) return processed_share_objects except Exception as e: - log.error(f'Error occurred while reapplying share task due to: {e}') - task_exceptions.append(f'Error occurred while reapplying share task due to: {e}') + error_msg = f'Error occurred while reapplying share task due to: {e}' + log.error(error_msg) + task_exceptions.append(error_msg) finally: if len(task_exceptions) > 0: AdminNotificationService().notify_admins_with_error_log( process_error='Error occurred while processing share during reapplying task', error_logs=task_exceptions, - process_name='Share Reapplier Task', + process_name=cls.__name__, ) diff --git a/backend/dataall/modules/shares_base/tasks/share_verifier_task.py b/backend/dataall/modules/shares_base/tasks/share_verifier_task.py index c2fef9786..79c8ed657 100644 --- a/backend/dataall/modules/shares_base/tasks/share_verifier_task.py +++ b/backend/dataall/modules/shares_base/tasks/share_verifier_task.py @@ -41,10 +41,9 @@ def verify_shares(engine): healthStatus=None, ) except Exception as e: - log.error(f'Error occurred while verifying share with uri: {share_object.shareUri} due to: {e}') - task_exceptions.append( - f'Error occurred while verifying share with uri: {share_object.shareUri} due to: {e}' - ) + error_msg = f'Error occurred while verifying share with uri: {share_object.shareUri} due to: {e}' + log.error(error_msg) + task_exceptions.append(error_msg) return processed_share_objects except Exception as e: log.error(f'Error occurred while verifying shares task due to: {e}') From 401b0de3cb9206b1f1280d1fd0875fbb4e8c3954 Mon Sep 17 00:00:00 2001 From: Tejas Rajopadhye Date: Tue, 21 Jan 2025 19:09:37 -0600 Subject: [PATCH 16/26] Minor changes after testing --- .../modules/notifications/services/admin_notifications.py | 4 +--- .../dataall/modules/shares_base/services/sharing_service.py | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/backend/dataall/modules/notifications/services/admin_notifications.py b/backend/dataall/modules/notifications/services/admin_notifications.py index 2a9a85171..c8acbb65a 100644 --- a/backend/dataall/modules/notifications/services/admin_notifications.py +++ b/backend/dataall/modules/notifications/services/admin_notifications.py @@ -27,9 +27,7 @@ def notify_admins_with_error_log(process_error: str, error_logs: List[str], proc email_message = f""" Following error occurred -

{process_error} """ - for error_log in error_logs: - email_message += '

'.join(error_log) - + email_message += '

'.join(error_logs) email_message += '

Please check the logs in cloudwatch for more details' SESEmailNotificationService.create_and_send_email_notifications( diff --git a/backend/dataall/modules/shares_base/services/sharing_service.py b/backend/dataall/modules/shares_base/services/sharing_service.py index 886c20b66..e2d230212 100644 --- a/backend/dataall/modules/shares_base/services/sharing_service.py +++ b/backend/dataall/modules/shares_base/services/sharing_service.py @@ -332,7 +332,7 @@ def verify_share( finally: if len(service_exceptions) > 0: AdminNotificationService().notify_admins_with_error_log( - process_error=f'Error occurred during verification of share with uri: {share_data.share.shareUri} ', + process_error=f'Error occurred during verification of share with uri: {share_uri} ', error_logs=service_exceptions, process_name=cls.__name__, ) From 63dd9f1497431e2f38a11687465bac5e40502a54 Mon Sep 17 00:00:00 2001 From: Tejas Rajopadhye Date: Wed, 22 Jan 2025 14:32:30 -0600 Subject: [PATCH 17/26] Adding new changes and corrections --- .../ses_email_notification_service.py | 8 ++-- .../tasks/weekly_digest_reminder.py | 36 ++++++++++------- .../db/dataset_table_repositories.py | 31 ++++++++++---- .../services/dataset_table_notifications.py | 40 +++++++++---------- .../services/dataset_table_service.py | 13 +++--- .../s3_datasets/tasks/tables_syncer.py | 11 +++-- .../db/share_object_repositories.py | 19 +++++++++ 7 files changed, 102 insertions(+), 56 deletions(-) diff --git a/backend/dataall/modules/notifications/services/ses_email_notification_service.py b/backend/dataall/modules/notifications/services/ses_email_notification_service.py index ec074f079..c3e640873 100644 --- a/backend/dataall/modules/notifications/services/ses_email_notification_service.py +++ b/backend/dataall/modules/notifications/services/ses_email_notification_service.py @@ -40,9 +40,11 @@ def send_email_task(subject, message, recipient_groups_list, recipient_email_lis ) identityProvider = ServiceProviderFactory.get_service_provider_instance() try: - email_ids_to_send_emails = email_provider.get_email_ids_from_groupList( - email_provider.recipient_group_list, identityProvider - ) + email_ids_to_send_emails = set() + if len(recipient_groups_list) > 0: + email_ids_to_send_emails = email_provider.get_email_ids_from_groupList( + email_provider.recipient_group_list, identityProvider + ) if len(recipient_email_list) > 0: email_ids_to_send_emails.update(recipient_email_list) diff --git a/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py b/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py index dc178c479..6350bd7a1 100644 --- a/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py +++ b/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py @@ -5,6 +5,7 @@ from dataall.base.db import get_engine from dataall.base.loader import load_modules, ImportMode +from dataall.base.services.service_provider_factory import ServiceProviderFactory from dataall.core.environment.db.environment_models import Environment from dataall.core.stacks.api.enums import StackStatus from dataall.core.stacks.db.stack_repositories import StackRepository @@ -50,7 +51,7 @@ class NotificationResourceBundle: """ Mapping between the group / team name and the associated notification events ( in the form of NotificationResourceBundle ) """ -group_name_to_resource_bundle_map: Dict[str, NotificationResourceBundle] = {} +user_email_to_resource_bundle_map: Dict[str, NotificationResourceBundle] = {} def _get_pending_share_notifications(session): @@ -127,18 +128,22 @@ def _get_receivers_for_stack(resource, target_type): """ -def _map_groups_to_resource_bundles(list_of_notifications: List[NotificationResource], resource_bundle_type: str): +def _map_email_ids_to_resource_bundles(list_of_notifications: List[NotificationResource], resource_bundle_type: str): for notification in list_of_notifications: # Get all the receivers groups notification_receiver_groups = notification.receivers - for receiver_group_name in notification_receiver_groups: - if receiver_group_name in group_name_to_resource_bundle_map: - resource_bundle = group_name_to_resource_bundle_map.get(receiver_group_name) + service_provider = ServiceProviderFactory.get_service_provider_instance() + email_ids: Set = set() + for group in notification_receiver_groups: + email_ids.update(service_provider.get_user_emailids_from_group(groupName=group)) + for email_id in email_ids: + if email_id in user_email_to_resource_bundle_map: + resource_bundle = user_email_to_resource_bundle_map.get(email_id) resource_bundle.__getattribute__(resource_bundle_type).append(notification) else: resource_bundle = NotificationResourceBundle() resource_bundle.__getattribute__(resource_bundle_type).append(notification) - group_name_to_resource_bundle_map[receiver_group_name] = resource_bundle + user_email_to_resource_bundle_map[email_id] = resource_bundle def send_reminder_email(engine): @@ -149,7 +154,6 @@ def send_reminder_email(engine): # Get all shares in submitted state pending_share_notification_resources = _get_pending_share_notifications(session=session) resources_type_tuple.append((pending_share_notification_resources, 'share_object_notifications')) - # Todo : Check if distinct needed for the share object repository # Get all shares in unhealthy state unhealthy_share_objects_notification_resources = _get_unhealthy_share_notification(session=session) resources_type_tuple.append((unhealthy_share_objects_notification_resources, 'share_object_notifications')) @@ -169,24 +173,26 @@ def send_reminder_email(engine): # For each notification resource ( i.e. share notification, dataset notification, etc ), # function _map_groups_to_resource_bundles maps each team name : resource bundle for notification_resources, resource_bundle_type in resources_type_tuple: - _map_groups_to_resource_bundles( + _map_email_ids_to_resource_bundles( list_of_notifications=notification_resources, resource_bundle_type=resource_bundle_type ) - for group, resource_bundle in group_name_to_resource_bundle_map.items(): + for email_id, resource_bundle in user_email_to_resource_bundle_map.items(): email_body = _construct_email_body(resource_bundle) - log.debug(f' Sending email to group: {group} with email content: {email_body}') + log.debug(f' Sending email to user: {email_id} with email content: {email_body}') subject = 'Attention Required | Data.all weekly digest' try: SESEmailNotificationService.create_and_send_email_notifications( - subject=subject, msg=email_body, recipient_groups_list=[group] + subject=subject, msg=email_body, recipient_email_ids=[email_id] ) except Exception as e: - log.error(f'Error occurred in sending email while weekly reminder task due to: {e}') - task_exceptions.append(f'Error occurred in sending email while weekly reminder task due to: {e}') + err_msg = f'Error occurred in sending email while weekly reminder task due to: {e}' + log.error(err_msg) + task_exceptions.append(err_msg) except Exception as e: - log.error(f'Error occurred while running the weekly reminder task: {e}') - task_exceptions.append(f'Error occurred while running the weekly reminder task: {e}') + err_msg = f'Error occurred while running the weekly reminder task: {e}' + log.error(err_msg) + task_exceptions.append(err_msg) finally: if len(task_exceptions) > 0: log.info('Sending email notifications to the admin team') diff --git a/backend/dataall/modules/s3_datasets/db/dataset_table_repositories.py b/backend/dataall/modules/s3_datasets/db/dataset_table_repositories.py index 52c5ca587..6a3df3a28 100644 --- a/backend/dataall/modules/s3_datasets/db/dataset_table_repositories.py +++ b/backend/dataall/modules/s3_datasets/db/dataset_table_repositories.py @@ -1,6 +1,7 @@ import logging +from dataclasses import dataclass, field from datetime import datetime -from typing import Dict +from typing import Dict, List from sqlalchemy.sql import and_ @@ -12,10 +13,23 @@ DatasetTableDataFilter, ) from dataall.base.utils import json_utils +from dataall.modules.shares_base.db.share_object_models import ShareObject +from dataall.modules.shares_base.db.share_object_repositories import ShareObjectRepository +from dataall.modules.shares_base.services.shares_enums import ShareItemStatus logger = logging.getLogger(__name__) +""" +Dataclass containing status of the dataset table and the share objects where the dataset table is present +""" +@dataclass +class DatasetTableShareDetails: + tableUri: str + status: str + share_objects: List[ShareObject] = field(default_factory=list) + + class DatasetTableRepository: @staticmethod def save(session, table: DatasetTable): @@ -62,22 +76,25 @@ def get_dataset_table_by_uri(session, table_uri): return table @staticmethod - def update_existing_tables_status(existing_tables, glue_tables): - updated_tables_status_map: Dict[str:str] = {} + def update_existing_tables_status(session, existing_tables, glue_tables): + # Map between tables and the details about the table ( i.e. status, share object on that table ) + updated_tables_status_map: Dict[DatasetTable, DatasetTableShareDetails] = dict() for existing_table in existing_tables: if existing_table.GlueTableName not in [t['Name'] for t in glue_tables]: if existing_table.LastGlueTableStatus != 'Deleted': existing_table.LastGlueTableStatus = 'Deleted' - updated_tables_status_map[existing_table.GlueTableName] = 'Deleted' + # Get all the share objects where the table is used + dataset_shares: List[ShareObject] = ShareObjectRepository.list_dataset_shares_for_item_uris(session=session, dataset_uri=existing_table.datasetUri, share_item_shared_states=[ShareItemStatus.Share_Succeeded.value], item_uris=[existing_table.tableUri]) + updated_tables_status_map[existing_table] = DatasetTableShareDetails(status='Deleted', share_objects=dataset_shares, tableUri=existing_table.tableUri) logger.info(f'Existing Table {existing_table.GlueTableName} status set to Deleted from Glue') else: logger.info(f'Existing Table {existing_table.GlueTableName} status already set Deleted') elif ( - existing_table.GlueTableName in [t['Name'] for t in glue_tables] - and existing_table.LastGlueTableStatus == 'Deleted' + existing_table.GlueTableName in [t['Name'] for t in glue_tables] + and existing_table.LastGlueTableStatus == 'Deleted' ): existing_table.LastGlueTableStatus = 'InSync' - updated_tables_status_map[existing_table.GlueTableName] = 'InSync: Updated to InSync from Deleted' + updated_tables_status_map[existing_table] = DatasetTableShareDetails(status='InSync: Updated to InSync from Deleted', share_objects=[], tableUri=existing_table.tableUri) # Keeping share object empty as no user needs to be informed when a table gets in sync logger.info( f'Updating Existing Table {existing_table.GlueTableName} status set to InSync from Deleted after found in Glue' ) diff --git a/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py b/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py index 49bf4884f..413785f0e 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py @@ -1,7 +1,8 @@ -from typing import Dict +from typing import Dict, List import logging from dataall.modules.notifications.services.ses_email_notification_service import SESEmailNotificationService -from dataall.modules.s3_datasets.db.dataset_models import S3Dataset +from dataall.modules.s3_datasets.db.dataset_models import S3Dataset, DatasetTable +from dataall.modules.s3_datasets.db.dataset_table_repositories import DatasetTableShareDetails from dataall.modules.shares_base.db.share_object_repositories import ShareObjectRepository log = logging.getLogger(__name__) @@ -17,27 +18,24 @@ def __init__(self, dataset: S3Dataset): table_status_map - Dictionary of GlueTableName and table status ( InSync, Deleted, etc ) """ - def notify_dataset_table_updates(self, session, table_status_map: Dict[str, str]): - # Construct and send email reminders for datasets - self._send_email_reminders_for_dataset(table_status_map) + def notify_dataset_table_updates(self, dataset_table_status_map: Dict[DatasetTable, DatasetTableShareDetails]): + self._send_email_reminders_for_dataset(dataset_table_status_map) - # Find all the shares made on this dataset - shares = ShareObjectRepository.find_dataset_shares( - session=session, dataset_uri=self.dataset.datasetUri, share_statuses=['Processed'] - ) - if shares: - for share in shares: - self._send_email_notification_for_share(share, table_status_map) + for dataset_table, table_share_details in dataset_table_status_map.items(): + share_on_tables = table_share_details.share_objects + if share_on_tables: + for share in share_on_tables: + self._send_email_notification_for_share(share, dataset_table_status_map) - def _send_email_notification_for_share(self, share, table_status_map): + def _send_email_notification_for_share(self, share, dataset_table_status_map): subject = f'Alert: Data.all Update | Glue table updated for dataset: {self.dataset.name}' msg_footer = f""" - You have an active share with uri: {share.shareUri}. If there is any table requested by you on the dataset: {self.dataset.name} for that share it may have been affected in case if the tables are deleted.
- Note: Please check with the dataset owner if there is any missing table from your share - as it is likely deleted from the dataset.
If the table exists in the dataset and is successfully shared but you are unable to access the table, then please reach out to the data.all team

+ You have an active share with uri: {share.shareUri}. If there are any table(s) requested by you on the dataset: {self.dataset.name}, then for that share the table might be affected in case the tables were deleted.
+
Note: Please check with the dataset owner if there is any missing table from your share - as it is likely deleted from the dataset.
If the table exists in the dataset and is successfully shared but you are unable to access the table, then please reach out to the data.all team

Regards,
data.all team """ - table_content = self._construct_html_table_from_glue_status_map(table_status_map) + table_content = self._construct_html_table_from_glue_status_map(dataset_table_status_map) msg_body = f""" Dear Team,

Following tables have been updated for dataset: {self.dataset.name}

@@ -50,7 +48,7 @@ def _send_email_notification_for_share(self, share, table_status_map): ) def _send_email_reminders_for_dataset(self, table_status_map): - subject = f'Data.all Update | Glue tables updated for dataset: {self.dataset.name}' + subject = f'Data.all Update | Glue table(s) updated for dataset: {self.dataset.name}' table_content = self._construct_html_table_from_glue_status_map(table_status_map) msg_body = f""" Dear Team,

@@ -68,7 +66,7 @@ def _send_email_reminders_for_dataset(self, table_status_map): ) @classmethod - def _construct_html_table_from_glue_status_map(cls, table_status_map): + def _construct_html_table_from_glue_status_map(cls, dataset_table_status_map): table_heading = """ Glue Table Name @@ -76,11 +74,11 @@ def _construct_html_table_from_glue_status_map(cls, table_status_map): """ table_body = """""" - for table_name, table_status in table_status_map.items(): + for dataset_table, dataset_table_details in dataset_table_status_map.items(): table_body += f""" - {table_name} - {table_status} + {dataset_table.GlueTableName} + {dataset_table_details.status} """ table_content = f""" diff --git a/backend/dataall/modules/s3_datasets/services/dataset_table_service.py b/backend/dataall/modules/s3_datasets/services/dataset_table_service.py index d67fa45c5..7a0c23065 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_table_service.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_table_service.py @@ -1,4 +1,6 @@ import logging +from typing import Dict + from dataall.base.context import get_context from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService @@ -6,7 +8,7 @@ from dataall.core.environment.services.environment_service import EnvironmentService from dataall.modules.s3_datasets.aws.athena_table_client import AthenaTableClient from dataall.modules.s3_datasets.aws.glue_dataset_client import DatasetCrawler -from dataall.modules.s3_datasets.db.dataset_table_repositories import DatasetTableRepository +from dataall.modules.s3_datasets.db.dataset_table_repositories import DatasetTableRepository, DatasetTableShareDetails from dataall.modules.s3_datasets.db.dataset_table_data_filter_repositories import DatasetTableDataFilterRepository from dataall.modules.s3_datasets.indexers.table_indexer import DatasetTableIndexer from dataall.modules.s3_datasets.indexers.dataset_indexer import DatasetIndexer @@ -136,15 +138,14 @@ def sync_tables_for_dataset(cls, uri): @staticmethod def sync_existing_tables(session, uri, glue_tables=None): dataset: S3Dataset = DatasetRepository.get_dataset_by_uri(session, uri) - updated_table_status_map = {} + updated_table_status_map: Dict[DatasetTable, DatasetTableShareDetails] = dict() if dataset: existing_tables = DatasetTableRepository.find_dataset_tables(session, uri) existing_table_names = [e.GlueTableName for e in existing_tables] existing_dataset_tables_map = {t.GlueTableName: t for t in existing_tables} - updated_table_status_map = DatasetTableRepository.update_existing_tables_status( - existing_tables, glue_tables - ) + updated_table_status_map = DatasetTableRepository.update_existing_tables_status(session, existing_tables, + glue_tables) log.info(f'existing_tables={glue_tables}') for table in glue_tables: @@ -152,7 +153,7 @@ def sync_existing_tables(session, uri, glue_tables=None): log.info(f'Storing new table: {table} for dataset db {dataset.GlueDatabaseName}') updated_table = DatasetTableRepository.create_synced_table(session, dataset, table) DatasetTableService._attach_dataset_table_permission(session, dataset, updated_table.tableUri) - updated_table_status_map[updated_table.GlueTableName] = 'Newly Added' + updated_table_status_map[updated_table] = DatasetTableShareDetails(status='Newly Added', share_objects=[], tableUri=updated_table.tableUri) # No share object exist on newly added tables else: log.info(f'Updating table: {table} for dataset db {dataset.GlueDatabaseName}') updated_table: DatasetTable = existing_dataset_tables_map.get(table['Name']) diff --git a/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py b/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py index f5e77be54..96a156334 100644 --- a/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py +++ b/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py @@ -2,6 +2,7 @@ import os import sys from operator import and_ +from typing import Dict, List from dataall.base.aws.sts import SessionHelper from dataall.core.environment.db.environment_models import Environment, EnvironmentGroup @@ -10,6 +11,7 @@ from dataall.modules.notifications.services.admin_notifications import AdminNotificationService from dataall.modules.s3_datasets.aws.glue_dataset_client import DatasetCrawler from dataall.modules.s3_datasets.aws.lf_table_client import LakeFormationTableClient +from dataall.modules.s3_datasets.db.dataset_table_repositories import DatasetTableShareDetails from dataall.modules.s3_datasets.services.dataset_table_notifications import DatasetTableNotifications from dataall.modules.s3_datasets.services.dataset_table_service import DatasetTableService from dataall.modules.s3_datasets.db.dataset_repositories import DatasetRepository @@ -47,7 +49,7 @@ def sync_tables(engine): ) # Send email notification if there are any table additions/ deletions - _send_notification_after_table_updates(dataset, session, table_status_map, task_exceptions) + _send_email_notification_for_table_updates(dataset, table_status_map, task_exceptions) # For all tables in dataset, grant lake formation permission to all principals on the tables tables = _get_tables_for_dataset(dataset, session) @@ -81,13 +83,14 @@ def sync_tables(engine): ) -def _send_notification_after_table_updates(dataset, session, table_status_map, task_exceptions): +def _send_email_notification_for_table_updates(dataset: S3Dataset, + table_status_map: Dict[DatasetTable, DatasetTableShareDetails], + task_exceptions: List[str]): if table_status_map: log.info('Sending email notification after dataset table updates were found') try: DatasetTableNotifications(dataset=dataset).notify_dataset_table_updates( - session=session, table_status_map=table_status_map - ) + dataset_table_status_map=table_status_map) except Exception as e: error_log = f'Error occurred while sending email to notify about changes to the glue tables for dataset with uri: {dataset.datasetUri} due to: {e}' task_exceptions.append(error_log) diff --git a/backend/dataall/modules/shares_base/db/share_object_repositories.py b/backend/dataall/modules/shares_base/db/share_object_repositories.py index b517e8e04..7f6b5b4f4 100644 --- a/backend/dataall/modules/shares_base/db/share_object_repositories.py +++ b/backend/dataall/modules/shares_base/db/share_object_repositories.py @@ -62,6 +62,25 @@ def find_share_by_dataset_attributes(session, dataset_uri, dataset_owner, groups ) return share + @staticmethod + def list_dataset_shares_for_item_uris( + session, dataset_uri: str, share_item_shared_states: List[str], item_uris: List[str] + ) -> [ShareObject]: + query = ( + session.query(ShareObject) + .outerjoin(ShareObjectItem, ShareObjectItem.shareUri == ShareObject.shareUri) + .filter( + and_( + ShareObject.datasetUri == dataset_uri, + ShareObject.deleted.is_(None), + ShareObjectItem.status.in_(share_item_shared_states), + ShareObjectItem.itemUri.in_(item_uris) + ) + ) + ) + + return query.all() + @staticmethod def list_dataset_shares_with_existing_shared_items( session, dataset_uri, share_item_shared_states, environment_uri=None, item_type=None From 828edf8187ab3ae6531b020fb917936a119110f0 Mon Sep 17 00:00:00 2001 From: Tejas Rajopadhye Date: Wed, 22 Jan 2025 14:39:14 -0600 Subject: [PATCH 18/26] Minor change and linting --- .../db/dataset_table_repositories.py | 21 ++++++++++++++----- .../services/dataset_table_notifications.py | 2 +- .../services/dataset_table_service.py | 9 +++++--- .../s3_datasets/tasks/tables_syncer.py | 9 ++++---- .../db/share_object_repositories.py | 2 +- 5 files changed, 29 insertions(+), 14 deletions(-) diff --git a/backend/dataall/modules/s3_datasets/db/dataset_table_repositories.py b/backend/dataall/modules/s3_datasets/db/dataset_table_repositories.py index 6a3df3a28..e674e44de 100644 --- a/backend/dataall/modules/s3_datasets/db/dataset_table_repositories.py +++ b/backend/dataall/modules/s3_datasets/db/dataset_table_repositories.py @@ -23,6 +23,8 @@ """ Dataclass containing status of the dataset table and the share objects where the dataset table is present """ + + @dataclass class DatasetTableShareDetails: tableUri: str @@ -84,17 +86,26 @@ def update_existing_tables_status(session, existing_tables, glue_tables): if existing_table.LastGlueTableStatus != 'Deleted': existing_table.LastGlueTableStatus = 'Deleted' # Get all the share objects where the table is used - dataset_shares: List[ShareObject] = ShareObjectRepository.list_dataset_shares_for_item_uris(session=session, dataset_uri=existing_table.datasetUri, share_item_shared_states=[ShareItemStatus.Share_Succeeded.value], item_uris=[existing_table.tableUri]) - updated_tables_status_map[existing_table] = DatasetTableShareDetails(status='Deleted', share_objects=dataset_shares, tableUri=existing_table.tableUri) + dataset_shares: List[ShareObject] = ShareObjectRepository.list_dataset_shares_for_item_uris( + session=session, + dataset_uri=existing_table.datasetUri, + share_item_shared_states=[ShareItemStatus.Share_Succeeded.value], + item_uris=[existing_table.tableUri], + ) + updated_tables_status_map[existing_table] = DatasetTableShareDetails( + status='Deleted', share_objects=dataset_shares, tableUri=existing_table.tableUri + ) logger.info(f'Existing Table {existing_table.GlueTableName} status set to Deleted from Glue') else: logger.info(f'Existing Table {existing_table.GlueTableName} status already set Deleted') elif ( - existing_table.GlueTableName in [t['Name'] for t in glue_tables] - and existing_table.LastGlueTableStatus == 'Deleted' + existing_table.GlueTableName in [t['Name'] for t in glue_tables] + and existing_table.LastGlueTableStatus == 'Deleted' ): existing_table.LastGlueTableStatus = 'InSync' - updated_tables_status_map[existing_table] = DatasetTableShareDetails(status='InSync: Updated to InSync from Deleted', share_objects=[], tableUri=existing_table.tableUri) # Keeping share object empty as no user needs to be informed when a table gets in sync + updated_tables_status_map[existing_table] = DatasetTableShareDetails( + status='InSync: Updated to InSync from Deleted', share_objects=[], tableUri=existing_table.tableUri + ) # Keeping share object empty as no user needs to be informed when a table gets in sync logger.info( f'Updating Existing Table {existing_table.GlueTableName} status set to InSync from Deleted after found in Glue' ) diff --git a/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py b/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py index 413785f0e..9778c7fc1 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py @@ -28,7 +28,7 @@ def notify_dataset_table_updates(self, dataset_table_status_map: Dict[DatasetTab self._send_email_notification_for_share(share, dataset_table_status_map) def _send_email_notification_for_share(self, share, dataset_table_status_map): - subject = f'Alert: Data.all Update | Glue table updated for dataset: {self.dataset.name}' + subject = f'Alert: Data.all Update | Glue table(s) updated for dataset: {self.dataset.name}' msg_footer = f""" You have an active share with uri: {share.shareUri}. If there are any table(s) requested by you on the dataset: {self.dataset.name}, then for that share the table might be affected in case the tables were deleted.

Note: Please check with the dataset owner if there is any missing table from your share - as it is likely deleted from the dataset.
If the table exists in the dataset and is successfully shared but you are unable to access the table, then please reach out to the data.all team

diff --git a/backend/dataall/modules/s3_datasets/services/dataset_table_service.py b/backend/dataall/modules/s3_datasets/services/dataset_table_service.py index 7a0c23065..45b5ddcb1 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_table_service.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_table_service.py @@ -144,8 +144,9 @@ def sync_existing_tables(session, uri, glue_tables=None): existing_table_names = [e.GlueTableName for e in existing_tables] existing_dataset_tables_map = {t.GlueTableName: t for t in existing_tables} - updated_table_status_map = DatasetTableRepository.update_existing_tables_status(session, existing_tables, - glue_tables) + updated_table_status_map = DatasetTableRepository.update_existing_tables_status( + session, existing_tables, glue_tables + ) log.info(f'existing_tables={glue_tables}') for table in glue_tables: @@ -153,7 +154,9 @@ def sync_existing_tables(session, uri, glue_tables=None): log.info(f'Storing new table: {table} for dataset db {dataset.GlueDatabaseName}') updated_table = DatasetTableRepository.create_synced_table(session, dataset, table) DatasetTableService._attach_dataset_table_permission(session, dataset, updated_table.tableUri) - updated_table_status_map[updated_table] = DatasetTableShareDetails(status='Newly Added', share_objects=[], tableUri=updated_table.tableUri) # No share object exist on newly added tables + updated_table_status_map[updated_table] = DatasetTableShareDetails( + status='Newly Added', share_objects=[], tableUri=updated_table.tableUri + ) # No share object exist on newly added tables else: log.info(f'Updating table: {table} for dataset db {dataset.GlueDatabaseName}') updated_table: DatasetTable = existing_dataset_tables_map.get(table['Name']) diff --git a/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py b/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py index 96a156334..9fb82fb4c 100644 --- a/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py +++ b/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py @@ -83,14 +83,15 @@ def sync_tables(engine): ) -def _send_email_notification_for_table_updates(dataset: S3Dataset, - table_status_map: Dict[DatasetTable, DatasetTableShareDetails], - task_exceptions: List[str]): +def _send_email_notification_for_table_updates( + dataset: S3Dataset, table_status_map: Dict[DatasetTable, DatasetTableShareDetails], task_exceptions: List[str] +): if table_status_map: log.info('Sending email notification after dataset table updates were found') try: DatasetTableNotifications(dataset=dataset).notify_dataset_table_updates( - dataset_table_status_map=table_status_map) + dataset_table_status_map=table_status_map + ) except Exception as e: error_log = f'Error occurred while sending email to notify about changes to the glue tables for dataset with uri: {dataset.datasetUri} due to: {e}' task_exceptions.append(error_log) diff --git a/backend/dataall/modules/shares_base/db/share_object_repositories.py b/backend/dataall/modules/shares_base/db/share_object_repositories.py index 7f6b5b4f4..758c639bd 100644 --- a/backend/dataall/modules/shares_base/db/share_object_repositories.py +++ b/backend/dataall/modules/shares_base/db/share_object_repositories.py @@ -74,7 +74,7 @@ def list_dataset_shares_for_item_uris( ShareObject.datasetUri == dataset_uri, ShareObject.deleted.is_(None), ShareObjectItem.status.in_(share_item_shared_states), - ShareObjectItem.itemUri.in_(item_uris) + ShareObjectItem.itemUri.in_(item_uris), ) ) ) From 8be42987f7784cbb253da973b63235eaff0599f3 Mon Sep 17 00:00:00 2001 From: Tejas Rajopadhye Date: Wed, 22 Jan 2025 16:58:56 -0600 Subject: [PATCH 19/26] Formatting changes --- .../modules/notifications/tasks/weekly_digest_reminder.py | 4 ++-- .../s3_datasets/services/dataset_table_notifications.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py b/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py index 6350bd7a1..2a2a28dd0 100644 --- a/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py +++ b/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py @@ -135,7 +135,7 @@ def _map_email_ids_to_resource_bundles(list_of_notifications: List[NotificationR service_provider = ServiceProviderFactory.get_service_provider_instance() email_ids: Set = set() for group in notification_receiver_groups: - email_ids.update(service_provider.get_user_emailids_from_group(groupName=group)) + email_ids.update(service_provider.get_user_emailids_from_group(role_name=group)) for email_id in email_ids: if email_id in user_email_to_resource_bundle_map: resource_bundle = user_email_to_resource_bundle_map.get(email_id) @@ -275,7 +275,7 @@ def _create_table_for_resource(list_of_resources, uri_attr, link_uri): """ table = f""" - +
{table_heading} {table_body}
diff --git a/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py b/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py index 9778c7fc1..f1fcb4d42 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py @@ -82,7 +82,7 @@ def _construct_html_table_from_glue_status_map(cls, dataset_table_status_map): """ table_content = f""" - +
{table_heading} {table_body}
From a408553ce0f11fa686ec3aca2821ec8094a399d2 Mon Sep 17 00:00:00 2001 From: Tejas Rajopadhye Date: Thu, 23 Jan 2025 14:20:33 -0600 Subject: [PATCH 20/26] Adding improvements after review comments --- .../notifications/tasks/notification_enums.py | 20 ++++++ .../tasks/weekly_digest_reminder.py | 65 ++++++++++++++----- .../omics/tasks/omics_workflows_fetcher.py | 3 +- .../s3_datasets/tasks/tables_syncer.py | 4 +- .../db/share_object_repositories.py | 7 +- .../shares_base/services/sharing_service.py | 9 +-- .../tasks/persistent_email_reminders_task.py | 6 +- .../tasks/share_expiration_task.py | 15 ++--- .../shares_base/tasks/share_manager_task.py | 2 +- .../shares_base/tasks/share_reapplier_task.py | 2 + .../shares_base/tasks/share_verifier_task.py | 23 +++---- 11 files changed, 105 insertions(+), 51 deletions(-) create mode 100644 backend/dataall/modules/notifications/tasks/notification_enums.py diff --git a/backend/dataall/modules/notifications/tasks/notification_enums.py b/backend/dataall/modules/notifications/tasks/notification_enums.py new file mode 100644 index 000000000..819917d5a --- /dev/null +++ b/backend/dataall/modules/notifications/tasks/notification_enums.py @@ -0,0 +1,20 @@ +from enum import Enum + +from dataall.core.stacks.api.enums import StackStatus + + +class ResourceStatus(Enum): + PENDINGAPPROVAL = 'Submitted - Pending Approval' + HEALTHY = 'Healthy' + UNHEALTHY = 'Unhealthy' + CREATE_FAILED = (StackStatus.CREATE_FAILED.value,) + DELETE_FAILED = (StackStatus.DELETE_FAILED.value,) + UPDATE_FAILED = StackStatus.UPDATE_FAILED.value + UPDATE_ROLLBACK_FAILED = StackStatus.UPDATE_ROLLBACK_FAILED.value + ROLLBACK_FAILED = StackStatus.ROLLBACK_FAILED.value + + +class ResourceType(Enum): + SHAREOBJECT = 'Share Object' + DATASET = 'Dataset' + ENVIRONMENT = 'Environment' diff --git a/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py b/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py index 2a2a28dd0..b1110667a 100644 --- a/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py +++ b/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py @@ -13,6 +13,7 @@ from dataall.modules.datasets_base.db.dataset_repositories import DatasetBaseRepository from dataall.modules.notifications.services.admin_notifications import AdminNotificationService from dataall.modules.notifications.services.ses_email_notification_service import SESEmailNotificationService +from dataall.modules.notifications.tasks.notification_enums import ResourceStatus, ResourceType from dataall.modules.shares_base.db.share_object_models import ShareObject from dataall.modules.shares_base.db.share_object_repositories import ShareObjectRepository from dataall.modules.shares_base.services.shares_enums import ShareItemHealthStatus @@ -27,8 +28,8 @@ @dataclass class NotificationResource: resource: any - resource_type: str - resource_status: str + resource_type: ResourceType + resource_status: ResourceStatus receivers: Set[str] = field(default_factory=set) @@ -49,7 +50,8 @@ class NotificationResourceBundle: """ -Mapping between the group / team name and the associated notification events ( in the form of NotificationResourceBundle ) +{user_email_id: Notification bundle} +Notification bundle consists of share, dataset & environment notification events ( NotificationResource ) """ user_email_to_resource_bundle_map: Dict[str, NotificationResourceBundle] = {} @@ -64,8 +66,8 @@ def _get_pending_share_notifications(session): return [ NotificationResource( resource=share, - resource_type='Share Object', - resource_status=f'{share.status} - Pending Approval', + resource_type=ResourceType.SHAREOBJECT, + resource_status=ResourceStatus.PENDINGAPPROVAL, receivers={share_dataset_map[share].SamlAdminGroupName, share_dataset_map[share].stewards}, ) for share in share_dataset_map @@ -73,18 +75,28 @@ def _get_pending_share_notifications(session): def _get_unhealthy_share_notification(session): - unhealthy_share_objects: List[ShareObject] = ShareObjectRepository.get_share_objects_with_item_health_status( + unhealthy_share_objects: List[ShareObject] = ShareObjectRepository.list_share_objects_with_item_health_status( session=session, health_status_list=[ShareItemHealthStatus.Unhealthy.value] ) log.info(f'Found {len(unhealthy_share_objects)} unhealthy share objects') return [ NotificationResource( - resource=share, resource_type='Share Object', resource_status='Unhealthy', receivers={share.groupUri} + resource=share, + resource_type=ResourceType.SHAREOBJECT, + resource_status=ResourceStatus.UNHEALTHY, + receivers={share.groupUri}, ) for share in unhealthy_share_objects ] +""" +Function to fetch all the unhealthy stacks for a target type +target_uri : any unique uri representing a resource ( datasetUri, environmentUri, etc ) +target_type: any db model representing a data.all resource ( e.g. DatasetBase, Environment, etc ) +""" + + def _get_unhealthy_stack_by_type(session, target_uri: str, target_type: Any): unhealthy_stack_status: List[StackStatus] = [ StackStatus.CREATE_FAILED.value, @@ -106,8 +118,8 @@ def _get_unhealthy_stack_by_type(session, target_uri: str, target_type: Any): if stack is not None: notification_resource = NotificationResource( resource=resource, - resource_type=target_type.__name__, - resource_status=stack.status, + resource_type=_get_resource_type_for_stack(target_type), + resource_status=_get_resource_status_for_stack(stack.status), receivers=_get_receivers_for_stack(resource=resource, target_type=target_type), ) unhealthy_stack_notification_resources.append(notification_resource) @@ -116,26 +128,46 @@ def _get_unhealthy_stack_by_type(session, target_uri: str, target_type: Any): def _get_receivers_for_stack(resource, target_type): + """Returns team(s) / group(s) as per the target_type model (DatasetBase, Environment, etc )""" if target_type.__name__ == 'Dataset': return {resource.SamlAdminGroupName, resource.stewards} if target_type.__name__ == 'Environment': return {resource.SamlGroupName} +def _get_resource_type_for_stack(target_type): + """Returns ResourceType as per the target_type model (DatasetBase, Environment, etc )""" + if target_type.__name__ == 'Dataset': + return ResourceType.DATASET + if target_type.__name__ == 'Environment': + return ResourceType.ENVIRONMENT + + +def _get_resource_status_for_stack(stack_status): + """Returns the enum associated with the stack's status. + Iterates over all enums of ResourceStatus and returns enums with matching values with the stack status + """ + + for resource_status in ResourceStatus: + if resource_status.value == stack_status: + return resource_status + + """ -Function to create a map of {group name : resource bundle}, where each resource bundle contains dataset, share and environment notification lists. -Iterated over all the notification ( NotificationResources ) and then segregate based on the dataset, shares & environment notifications and map the bundle to a team. +Function to create a map of {user_email_id : resource bundle}, where each resource bundle contains dataset, share and environment notification lists. +Iterated over all the notification ( NotificationResources ) and then segregate based on the dataset, shares & environment notifications and map the bundle to a user. """ def _map_email_ids_to_resource_bundles(list_of_notifications: List[NotificationResource], resource_bundle_type: str): for notification in list_of_notifications: - # Get all the receivers groups notification_receiver_groups = notification.receivers service_provider = ServiceProviderFactory.get_service_provider_instance() + email_ids: Set = set() for group in notification_receiver_groups: email_ids.update(service_provider.get_user_emailids_from_group(role_name=group)) + for email_id in email_ids: if email_id in user_email_to_resource_bundle_map: resource_bundle = user_email_to_resource_bundle_map.get(email_id) @@ -154,14 +186,17 @@ def send_reminder_email(engine): # Get all shares in submitted state pending_share_notification_resources = _get_pending_share_notifications(session=session) resources_type_tuple.append((pending_share_notification_resources, 'share_object_notifications')) + # Get all shares in unhealthy state unhealthy_share_objects_notification_resources = _get_unhealthy_share_notification(session=session) resources_type_tuple.append((unhealthy_share_objects_notification_resources, 'share_object_notifications')) + # Get all the dataset which are in unhealthy state unhealthy_datasets_notification_resources = _get_unhealthy_stack_by_type( session=session, target_uri='datasetUri', target_type=DatasetBase ) resources_type_tuple.append((unhealthy_datasets_notification_resources, 'dataset_object_notifications')) + # Get all the environments which are in unhealthy state unhealthy_environment_notification_resources = _get_unhealthy_stack_by_type( session=session, target_uri='environmentUri', target_type=Environment @@ -179,7 +214,7 @@ def send_reminder_email(engine): for email_id, resource_bundle in user_email_to_resource_bundle_map.items(): email_body = _construct_email_body(resource_bundle) - log.debug(f' Sending email to user: {email_id} with email content: {email_body}') + log.debug(f'Sending email to user: {email_id} with email content: {email_body}') subject = 'Attention Required | Data.all weekly digest' try: SESEmailNotificationService.create_and_send_email_notifications( @@ -264,13 +299,13 @@ def _create_table_for_resource(list_of_resources, uri_attr, link_uri): table_body += f""" - {resource.resource_type} + {resource.resource_type.value} {os.environ.get('frontend_domain_url', '') + link_uri + resource.resource.__getattribute__(uri_attr)} - {resource.resource_status} + {resource.resource_status.value} """ diff --git a/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py b/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py index 7f8253dac..2673c10e6 100644 --- a/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py +++ b/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py @@ -65,8 +65,9 @@ def fetch_omics_workflows(engine): AdminNotificationService().notify_admins_with_error_log( process_error='Error occurred while processing omics workflow task', error_logs=[str(e)], - process_name='Omics Workflow', + process_name='Omics Workflow task', ) + raise e if __name__ == '__main__': diff --git a/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py b/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py index 9fb82fb4c..5ed293399 100644 --- a/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py +++ b/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py @@ -52,7 +52,7 @@ def sync_tables(engine): _send_email_notification_for_table_updates(dataset, table_status_map, task_exceptions) # For all tables in dataset, grant lake formation permission to all principals on the tables - tables = _get_tables_for_dataset(dataset, session) + tables = _list_tables_for_dataset(dataset, session) log.info('Updating tables permissions on Lake Formation...') for table in tables: _grant_lf_table_permissions_to_all_principals(env, env_group, table) @@ -97,7 +97,7 @@ def _send_email_notification_for_table_updates( task_exceptions.append(error_log) -def _get_tables_for_dataset(dataset, session): +def _list_tables_for_dataset(dataset, session): return session.query(DatasetTable).filter(DatasetTable.datasetUri == dataset.datasetUri).all() diff --git a/backend/dataall/modules/shares_base/db/share_object_repositories.py b/backend/dataall/modules/shares_base/db/share_object_repositories.py index 758c639bd..c2153b094 100644 --- a/backend/dataall/modules/shares_base/db/share_object_repositories.py +++ b/backend/dataall/modules/shares_base/db/share_object_repositories.py @@ -42,12 +42,9 @@ def find_share(session, dataset: DatasetBase, env, principal_id, principal_role_ ) @staticmethod - def find_dataset_shares(session, dataset_uri: str, share_statuses: List[str] = None): + def find_dataset_shares(session, dataset_uri: str): query = session.query(ShareObject).filter(ShareObject.datasetUri == dataset_uri) - if share_statuses: - query = query.filter(ShareObject.status.in_(share_statuses)) - return query.all() @staticmethod @@ -221,7 +218,7 @@ def get_share_data_items_by_type(session, share, share_type_model, share_type_ur return query.all() @staticmethod - def get_share_objects_with_item_health_status(session, health_status_list: List[str] = None): + def list_share_objects_with_item_health_status(session, health_status_list: List[str] = None): if health_status_list is None: health_status_list = [] diff --git a/backend/dataall/modules/shares_base/services/sharing_service.py b/backend/dataall/modules/shares_base/services/sharing_service.py index e2d230212..a3d8c2198 100644 --- a/backend/dataall/modules/shares_base/services/sharing_service.py +++ b/backend/dataall/modules/shares_base/services/sharing_service.py @@ -225,7 +225,7 @@ def revoke_share(cls, engine: Engine, share_uri: str) -> bool: else: log.info(f'There are no items to revoke of type {type.value}') except Exception as e: - log.error(f'Error occurred during share revoking of {type.value}: {e}') + log.exception(f'Error occurred during share revoking of {type.value}: {e}') ShareStatusRepository.update_share_item_status_batch( session, share_uri, @@ -243,7 +243,7 @@ def revoke_share(cls, engine: Engine, share_uri: str) -> bool: service_exceptions.append(str(e)) revoke_successful = False except Exception as e: - log.error(f'Error occurred during share revoking: {e}') + log.exception(f'Error occurred during share revoking: {e}') new_share_item_state = share_item_sm.run_transition(ShareItemActions.Failure.value) share_item_sm.update_state(session, share_data.share.shareUri, new_share_item_state) revoke_successful = False @@ -317,7 +317,7 @@ def verify_share( else: log.info(f'There are no items to verify of type {type.value}') except Exception as e: - log.error(f'Error occurred during share verifying of {type.value}: {e}') + log.exception(f'Error occurred during share verifying of {type.value}: {e}') service_exceptions.append(str(e)) if not shares_health_status: log.info( @@ -401,7 +401,7 @@ def reapply_share(cls, engine: Engine, share_uri: str) -> bool: else: log.info(f'There are no items to reapply of type {type.value}') except Exception as e: - log.error(f'Error occurred during share reapplying of {type.value}: {e}') + log.exception(f'Error occurred during share reapplying of {type.value}: {e}') service_exceptions.append( f'Error occurred during reapplying of share with uri: {share_data.share.shareUri} for processor type: {type.value} due to an unknown exception: {e}' ) @@ -423,6 +423,7 @@ def reapply_share(cls, engine: Engine, share_uri: str) -> bool: ).notify_share_object_items_healthy() except ResourceLockTimeout as timeout_exception: + log.error(f'Resource lock timed out for share with uri: {share_uri}') ShareStatusRepository.update_share_item_health_status_batch( session, share_uri, diff --git a/backend/dataall/modules/shares_base/tasks/persistent_email_reminders_task.py b/backend/dataall/modules/shares_base/tasks/persistent_email_reminders_task.py index b9e3f9b28..2063a1eee 100644 --- a/backend/dataall/modules/shares_base/tasks/persistent_email_reminders_task.py +++ b/backend/dataall/modules/shares_base/tasks/persistent_email_reminders_task.py @@ -35,8 +35,10 @@ def persistent_email_reminders(engine): log.info(f'Email reminder sent for share {share.shareUri}') log.info('Completed Persistent Email Reminders Task') except Exception as e: - log.error(f'Error while running persistent email reminder task: {e}') - task_exceptions.append(f'Error while running persistent email reminder task: {e}') + err_msg = f'Error while running persistent email reminder task: {e}' + log.error(err_msg) + task_exceptions.append(err_msg) + raise e finally: if len(task_exceptions) > 0: AdminNotificationService().notify_admins_with_error_log( diff --git a/backend/dataall/modules/shares_base/tasks/share_expiration_task.py b/backend/dataall/modules/shares_base/tasks/share_expiration_task.py index 14349dcbd..fa057dc7e 100644 --- a/backend/dataall/modules/shares_base/tasks/share_expiration_task.py +++ b/backend/dataall/modules/shares_base/tasks/share_expiration_task.py @@ -69,15 +69,14 @@ def share_expiration_checker(engine): session=session, dataset=dataset, share=share ).notify_share_expiration_to_requesters() except Exception as e: - log.error( - f'Error occurred while processing share expiration processing for share with URI: {share.shareUri} due to: {e}' - ) - task_exceptions.append( - f'Error occurred while processing share expiration processing for share with URI: {share.shareUri} due to: {e}' - ) + err_msg = f'Error occurred while processing share expiration processing for share with URI: {share.shareUri} due to: {e}' + log.exception(err_msg) + task_exceptions.append(err_msg) except Exception as e: - log.error(f'Error occurred while processing share expiration due to : {e}') - task_exceptions.append(f'Error occurred while processing share expiration due to: {e}') + err_msg = f'Error occurred while processing share expiration due to : {e}' + log.error(err_msg) + task_exceptions.append(err_msg) + raise e finally: if len(task_exceptions) > 0: AdminNotificationService().notify_admins_with_error_log( diff --git a/backend/dataall/modules/shares_base/tasks/share_manager_task.py b/backend/dataall/modules/shares_base/tasks/share_manager_task.py index 9c55d9b50..8fa410d81 100644 --- a/backend/dataall/modules/shares_base/tasks/share_manager_task.py +++ b/backend/dataall/modules/shares_base/tasks/share_manager_task.py @@ -25,7 +25,7 @@ log.info('Sharing task finished successfully') except Exception as e: - log.error(f'Sharing task failed due to: {e}') + log.exception(f'Sharing task failed due to: {e}') AdminNotificationService().notify_admins_with_error_log( process_error=f'Error occurred while running sharing task for share with uri: {os.getenv("shareUri", "Share URI not available")}', error_logs=[str(e)], diff --git a/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py b/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py index f19b90729..43b74a9cb 100644 --- a/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py +++ b/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py @@ -36,6 +36,7 @@ def process_reapply_shares_for_dataset(cls, engine, dataset_uri): error_msg = f'Error occurred while reapplying share task due to: {e}' log.error(error_msg) task_exceptions.append(error_msg) + raise e finally: if len(task_exceptions) > 0: AdminNotificationService().notify_admins_with_error_log( @@ -83,6 +84,7 @@ def process_reapply_shares(cls, engine): error_msg = f'Error occurred while reapplying share task due to: {e}' log.error(error_msg) task_exceptions.append(error_msg) + raise e finally: if len(task_exceptions) > 0: AdminNotificationService().notify_admins_with_error_log( diff --git a/backend/dataall/modules/shares_base/tasks/share_verifier_task.py b/backend/dataall/modules/shares_base/tasks/share_verifier_task.py index 79c8ed657..c1c771ecb 100644 --- a/backend/dataall/modules/shares_base/tasks/share_verifier_task.py +++ b/backend/dataall/modules/shares_base/tasks/share_verifier_task.py @@ -33,21 +33,18 @@ def verify_shares(engine): f'Verifying Share Items for Share Object with Requestor: {share_object.principalId} on Target Dataset: {share_object.datasetUri}' ) processed_share_objects.append(share_object.shareUri) - try: - SharingService.verify_share( - engine, - share_uri=share_object.shareUri, - status=ShareItemStatus.Share_Succeeded.value, - healthStatus=None, - ) - except Exception as e: - error_msg = f'Error occurred while verifying share with uri: {share_object.shareUri} due to: {e}' - log.error(error_msg) - task_exceptions.append(error_msg) + SharingService.verify_share( + engine, + share_uri=share_object.shareUri, + status=ShareItemStatus.Share_Succeeded.value, + healthStatus=None, + ) return processed_share_objects except Exception as e: - log.error(f'Error occurred while verifying shares task due to: {e}') - task_exceptions.append(f'Error occurred while verifying shares task due to: {e}') + err_msg = f'Error occurred while verifying shares task due to: {e}' + log.error(err_msg) + task_exceptions.append(err_msg) + raise e finally: if len(task_exceptions) > 0: AdminNotificationService().notify_admins_with_error_log( From 8b9b10c6bd384a41bb5b8e4a57fe1e0e51812526 Mon Sep 17 00:00:00 2001 From: Tejas Rajopadhye Date: Fri, 24 Jan 2025 10:12:03 -0600 Subject: [PATCH 21/26] Small slight changes --- .../dataall/modules/notifications/tasks/notification_enums.py | 4 ++-- .../modules/notifications/tasks/weekly_digest_reminder.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/backend/dataall/modules/notifications/tasks/notification_enums.py b/backend/dataall/modules/notifications/tasks/notification_enums.py index 819917d5a..916d670a1 100644 --- a/backend/dataall/modules/notifications/tasks/notification_enums.py +++ b/backend/dataall/modules/notifications/tasks/notification_enums.py @@ -7,8 +7,8 @@ class ResourceStatus(Enum): PENDINGAPPROVAL = 'Submitted - Pending Approval' HEALTHY = 'Healthy' UNHEALTHY = 'Unhealthy' - CREATE_FAILED = (StackStatus.CREATE_FAILED.value,) - DELETE_FAILED = (StackStatus.DELETE_FAILED.value,) + CREATE_FAILED = StackStatus.CREATE_FAILED.value + DELETE_FAILED = StackStatus.DELETE_FAILED.value UPDATE_FAILED = StackStatus.UPDATE_FAILED.value UPDATE_ROLLBACK_FAILED = StackStatus.UPDATE_ROLLBACK_FAILED.value ROLLBACK_FAILED = StackStatus.ROLLBACK_FAILED.value diff --git a/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py b/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py index b1110667a..bd14dab04 100644 --- a/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py +++ b/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py @@ -222,12 +222,13 @@ def send_reminder_email(engine): ) except Exception as e: err_msg = f'Error occurred in sending email while weekly reminder task due to: {e}' - log.error(err_msg) + log.exception(err_msg) task_exceptions.append(err_msg) except Exception as e: err_msg = f'Error occurred while running the weekly reminder task: {e}' log.error(err_msg) task_exceptions.append(err_msg) + raise e finally: if len(task_exceptions) > 0: log.info('Sending email notifications to the admin team') From d83d85c7339317603f5adfcace84ffefb03cb47f Mon Sep 17 00:00:00 2001 From: Tejas Rajopadhye Date: Fri, 24 Jan 2025 10:16:36 -0600 Subject: [PATCH 22/26] Weekly notification enums --- .../modules/notifications/tasks/weekly_digest_reminder.py | 2 +- .../{notification_enums.py => weekly_notification_enums.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename backend/dataall/modules/notifications/tasks/{notification_enums.py => weekly_notification_enums.py} (100%) diff --git a/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py b/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py index bd14dab04..23c40793a 100644 --- a/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py +++ b/backend/dataall/modules/notifications/tasks/weekly_digest_reminder.py @@ -13,7 +13,7 @@ from dataall.modules.datasets_base.db.dataset_repositories import DatasetBaseRepository from dataall.modules.notifications.services.admin_notifications import AdminNotificationService from dataall.modules.notifications.services.ses_email_notification_service import SESEmailNotificationService -from dataall.modules.notifications.tasks.notification_enums import ResourceStatus, ResourceType +from dataall.modules.notifications.tasks.weekly_notification_enums import ResourceStatus, ResourceType from dataall.modules.shares_base.db.share_object_models import ShareObject from dataall.modules.shares_base.db.share_object_repositories import ShareObjectRepository from dataall.modules.shares_base.services.shares_enums import ShareItemHealthStatus diff --git a/backend/dataall/modules/notifications/tasks/notification_enums.py b/backend/dataall/modules/notifications/tasks/weekly_notification_enums.py similarity index 100% rename from backend/dataall/modules/notifications/tasks/notification_enums.py rename to backend/dataall/modules/notifications/tasks/weekly_notification_enums.py From 215ebe4be420a469ad5b306abe45be81e23bb9fa Mon Sep 17 00:00:00 2001 From: Tejas Rajopadhye Date: Fri, 24 Jan 2025 10:40:04 -0600 Subject: [PATCH 23/26] Few corrections --- .../modules/shares_base/db/share_object_repositories.py | 5 +---- .../shares_base/services/share_notification_service.py | 6 +++--- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/backend/dataall/modules/shares_base/db/share_object_repositories.py b/backend/dataall/modules/shares_base/db/share_object_repositories.py index c2153b094..91c6401de 100644 --- a/backend/dataall/modules/shares_base/db/share_object_repositories.py +++ b/backend/dataall/modules/shares_base/db/share_object_repositories.py @@ -218,10 +218,7 @@ def get_share_data_items_by_type(session, share, share_type_model, share_type_ur return query.all() @staticmethod - def list_share_objects_with_item_health_status(session, health_status_list: List[str] = None): - if health_status_list is None: - health_status_list = [] - + def list_share_objects_with_item_health_status(session, health_status_list: List[str]): query = ( session.query(ShareObject) .join(ShareObjectItem, ShareObjectItem.shareUri == ShareObject.shareUri) diff --git a/backend/dataall/modules/shares_base/services/share_notification_service.py b/backend/dataall/modules/shares_base/services/share_notification_service.py index 901273fad..56cfd6c7e 100644 --- a/backend/dataall/modules/shares_base/services/share_notification_service.py +++ b/backend/dataall/modules/shares_base/services/share_notification_service.py @@ -282,7 +282,7 @@ def notify_share_object_failed(self): email_notification_msg = msg + share_link_text + '

' + msg_footer notifications = self.register_notifications( - notification_type=DataSharingNotificationType.SHARE_OBJECT_FAILED.value, msg=msg + notification_type=DataSharingNotificationType.SHARE_OBJECT_FAILED.value, msg=msg.replace('
', '').replace('', '').replace('', '') ) SESEmailNotificationService.create_and_send_email_notifications( @@ -316,7 +316,7 @@ def notify_share_object_items_unhealthy(self): notifications = self.register_notifications( notification_type=DataSharingNotificationType.SHARE_OBJECT_UNHEALTHY.value, - msg=msg, + msg=msg.replace('
', '').replace('', '').replace('', ''), to_recipients=[self.share.groupUri], ) @@ -349,7 +349,7 @@ def notify_share_object_items_healthy(self): notifications = self.register_notifications( notification_type=DataSharingNotificationType.SHARE_OBJECT_HEALTHY.value, - msg=msg, + msg=msg.replace('
', '').replace('', '').replace('', ''), to_recipients=[self.share.groupUri], ) From 7a92d9a2260d7299d6bc1d5fdd7bac8ea20c95bc Mon Sep 17 00:00:00 2001 From: Tejas Rajopadhye Date: Tue, 28 Jan 2025 12:04:02 -0600 Subject: [PATCH 24/26] Minor corrections --- .../s3_datasets/services/dataset_table_notifications.py | 2 +- .../services/share_processors/glue_table_share_processor.py | 1 - .../share_processors/s3_access_point_share_processor.py | 1 - .../services/share_processors/s3_bucket_share_processor.py | 3 --- .../dataall/modules/shares_base/tasks/share_reapplier_task.py | 2 +- 5 files changed, 2 insertions(+), 7 deletions(-) diff --git a/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py b/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py index f1fcb4d42..72c6b6d72 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_table_notifications.py @@ -77,7 +77,7 @@ def _construct_html_table_from_glue_status_map(cls, dataset_table_status_map): for dataset_table, dataset_table_details in dataset_table_status_map.items(): table_body += f""" - {dataset_table.GlueTableName} + {dataset_table.label} {dataset_table_details.status} """ diff --git a/backend/dataall/modules/s3_datasets_shares/services/share_processors/glue_table_share_processor.py b/backend/dataall/modules/s3_datasets_shares/services/share_processors/glue_table_share_processor.py index 68bf4c426..a45a80387 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/share_processors/glue_table_share_processor.py +++ b/backend/dataall/modules/s3_datasets_shares/services/share_processors/glue_table_share_processor.py @@ -3,7 +3,6 @@ from warnings import warn from datetime import datetime from dataall.core.environment.services.environment_service import EnvironmentService -from dataall.modules.notifications.services.admin_notifications import AdminNotificationService from dataall.modules.shares_base.services.shares_enums import ( ShareItemHealthStatus, ShareItemStatus, diff --git a/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_access_point_share_processor.py b/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_access_point_share_processor.py index c9637a279..41b214490 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_access_point_share_processor.py +++ b/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_access_point_share_processor.py @@ -2,7 +2,6 @@ from datetime import datetime from typing import List -from dataall.modules.notifications.services.admin_notifications import AdminNotificationService from dataall.modules.shares_base.services.share_exceptions import PrincipalRoleNotFound from dataall.modules.s3_datasets_shares.services.share_managers import S3AccessPointShareManager from dataall.modules.s3_datasets_shares.services.s3_share_service import S3ShareService diff --git a/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_bucket_share_processor.py b/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_bucket_share_processor.py index c1d61f0bc..c7ce373a0 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_bucket_share_processor.py +++ b/backend/dataall/modules/s3_datasets_shares/services/share_processors/s3_bucket_share_processor.py @@ -1,9 +1,6 @@ import logging from datetime import datetime -from logging import exception from typing import List - -from dataall.modules.notifications.services.admin_notifications import AdminNotificationService from dataall.modules.shares_base.services.share_exceptions import PrincipalRoleNotFound from dataall.modules.s3_datasets_shares.services.share_managers import S3BucketShareManager from dataall.modules.s3_datasets_shares.services.s3_share_service import S3ShareService diff --git a/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py b/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py index 43b74a9cb..97c497aca 100644 --- a/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py +++ b/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py @@ -55,7 +55,6 @@ def _reapply_share_objects(cls, engine, session, share_objects: List[ShareObject log.info( f'Re-applying Share Items for Share Object, Share URI: {share_object.shareUri} ) with Requestor: {share_object.principalId} on Target Dataset: {share_object.datasetUri}' ) - processed_share_objects.append(share_object.shareUri) ShareStatusRepository.update_share_item_health_status_batch( session=session, share_uri=share_object.shareUri, @@ -63,6 +62,7 @@ def _reapply_share_objects(cls, engine, session, share_objects: List[ShareObject new_status=ShareItemHealthStatus.PendingReApply.value, ) SharingService.reapply_share(engine, share_uri=share_object.shareUri) + processed_share_objects.append(share_object.shareUri) except Exception as e: error_formatted = f'Error occurred while reapplying share in the reapplie task for share with uri:{share_object.shareUri} due to: {e}' log.error(error_formatted) From f60e4ce76ee73b2629422e108cd9b3fd234bc89c Mon Sep 17 00:00:00 2001 From: Tejas Rajopadhye Date: Tue, 28 Jan 2025 14:37:10 -0600 Subject: [PATCH 25/26] Resolving tests --- .../modules/shares_base/services/share_notification_service.py | 3 ++- tests/core/permissions/test_tenant.py | 2 +- tests/modules/notifications/test_notification_service.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/backend/dataall/modules/shares_base/services/share_notification_service.py b/backend/dataall/modules/shares_base/services/share_notification_service.py index 56cfd6c7e..5343948fb 100644 --- a/backend/dataall/modules/shares_base/services/share_notification_service.py +++ b/backend/dataall/modules/shares_base/services/share_notification_service.py @@ -282,7 +282,8 @@ def notify_share_object_failed(self): email_notification_msg = msg + share_link_text + '

' + msg_footer notifications = self.register_notifications( - notification_type=DataSharingNotificationType.SHARE_OBJECT_FAILED.value, msg=msg.replace('
', '').replace('', '').replace('', '') + notification_type=DataSharingNotificationType.SHARE_OBJECT_FAILED.value, + msg=msg.replace('
', '').replace('', '').replace('', ''), ) SESEmailNotificationService.create_and_send_email_notifications( diff --git a/tests/core/permissions/test_tenant.py b/tests/core/permissions/test_tenant.py index c2e7abbd2..85cb5b35a 100644 --- a/tests/core/permissions/test_tenant.py +++ b/tests/core/permissions/test_tenant.py @@ -8,7 +8,7 @@ def mock_cognito_client(mocker): mock_client = MagicMock() - mocker.patch('dataall.modules.notifications.services.ses_email_notification_service.Cognito', mock_client) + mocker.patch('dataall.base.aws.cognito.Cognito', mock_client) return mock_client diff --git a/tests/modules/notifications/test_notification_service.py b/tests/modules/notifications/test_notification_service.py index 66b634b96..31821db92 100644 --- a/tests/modules/notifications/test_notification_service.py +++ b/tests/modules/notifications/test_notification_service.py @@ -10,7 +10,7 @@ def mock_cognito_client(mocker): mock_client = MagicMock() - mocker.patch('dataall.modules.notifications.services.ses_email_notification_service.Cognito', mock_client) + mocker.patch('dataall.base.aws.cognito.Cognito', mock_client) return mock_client From 83156c7975913ebf286fddccf9a626d6ecbca7ae Mon Sep 17 00:00:00 2001 From: Tejas Rajopadhye Date: Wed, 29 Jan 2025 11:29:34 -0600 Subject: [PATCH 26/26] Filed reformatted by ruff --- backend/api_handler.py | 2 +- backend/cdkproxymain.py | 2 +- .../dataall/base/api/gql/graphql_directive.py | 4 ++- backend/dataall/base/api/gql/graphql_input.py | 2 +- .../base/api/gql/graphql_union_type.py | 2 +- backend/dataall/base/api/gql/schema.py | 2 +- backend/dataall/base/aws/parameter_store.py | 2 +- backend/dataall/base/aws/quicksight.py | 4 +-- .../dataall/base/cdkproxy/cdk_cli_wrapper.py | 8 ++--- backend/dataall/base/db/dbconfig.py | 2 +- backend/dataall/base/loader.py | 6 ++-- backend/dataall/base/utils/alarm_service.py | 2 +- .../dataall/base/utils/api_handler_utils.py | 4 +-- .../dataall/base/utils/iam_policy_utils.py | 2 +- .../dataall/base/utils/naming_convention.py | 2 +- .../env_role_core_policies/service_policy.py | 4 +-- .../core/environment/cdk/environment_stack.py | 4 +-- .../services/environment_service.py | 8 ++--- .../db/organization_repositories.py | 2 +- backend/dataall/core/stacks/api/resolvers.py | 2 +- .../services/dashboard_quicksight_service.py | 2 +- .../cdk/datapipelines_cdk_pipeline.py | 6 ++-- .../cdk/datapipelines_pipeline.py | 2 +- .../services/datapipelines_service.py | 2 +- .../services/maintenance_service.py | 4 +-- .../mlstudio/db/mlstudio_repositories.py | 2 +- .../omics/tasks/omics_workflows_fetcher.py | 6 ++-- .../s3_datasets/aws/athena_table_client.py | 2 +- .../aws/s3_dataset_bucket_policy_client.py | 2 +- .../gluedatabasecustomresource/index.py | 4 +-- .../assets/glueprofilingjob/glue_script.py | 8 ++--- .../modules/s3_datasets/cdk/dataset_stack.py | 6 ++-- .../s3_datasets/db/dataset_repositories.py | 4 +-- .../s3_datasets_shares/aws/glue_client.py | 31 +++++++------------ .../aws/lakeformation_client.py | 11 ++----- .../s3_datasets_shares/aws/ram_client.py | 14 +++------ .../s3_share_managed_policy_service.py | 4 +-- .../share_managers/lf_share_manager.py | 6 ++-- .../s3_access_point_share_manager.py | 4 +-- .../tasks/dataset_subscription_task.py | 3 +- .../tasks/subscriptions/sqs_poller.py | 6 ++-- .../services/share_logs_service.py | 2 +- .../services/share_notification_service.py | 2 +- .../services/share_object_service.py | 2 +- .../worksheets/db/worksheet_repositories.py | 2 +- .../worksheets/services/worksheet_service.py | 2 +- deploy/configs/frontend_config.py | 16 +++++----- .../sync_congito_params/index.py | 4 +-- deploy/stacks/backend_stack.py | 2 +- deploy/stacks/cognito.py | 2 +- deploy/stacks/pipeline.py | 24 +++++++------- deploy/stacks/vpc.py | 6 ++-- tests/core/conftest.py | 2 +- tests/modules/s3_datasets/test_dataset.py | 4 +-- .../s3_datasets/test_dataset_glossary.py | 2 +- .../modules/s3_datasets/test_dataset_table.py | 2 +- .../test_s3_access_point_share_manager.py | 2 +- .../modules/dashboards/aws_clients.py | 4 +-- .../modules/s3_datasets/global_conftest.py | 6 ++-- 59 files changed, 130 insertions(+), 151 deletions(-) diff --git a/backend/api_handler.py b/backend/api_handler.py index 439551f0a..1eded2d7b 100644 --- a/backend/api_handler.py +++ b/backend/api_handler.py @@ -66,7 +66,7 @@ def adapted(obj, info, **kwargs): executable_schema = get_executable_schema() end = perf_counter() -print(f'Lambda Context ' f'Initialization took: {end - start:.3f} sec') +print(f'Lambda Context Initialization took: {end - start:.3f} sec') def handler(event, context): diff --git a/backend/cdkproxymain.py b/backend/cdkproxymain.py index a76682fd8..9ad789c58 100644 --- a/backend/cdkproxymain.py +++ b/backend/cdkproxymain.py @@ -18,7 +18,7 @@ logger = logging.getLogger('cdksass') ENVNAME = os.getenv('envname', 'local') -logger.warning(f"Application started for envname= `{ENVNAME}` DH_DOCKER_VERSION:{os.environ.get('DH_DOCKER_VERSION')}") +logger.warning(f'Application started for envname= `{ENVNAME}` DH_DOCKER_VERSION:{os.environ.get("DH_DOCKER_VERSION")}') def connect(): diff --git a/backend/dataall/base/api/gql/graphql_directive.py b/backend/dataall/base/api/gql/graphql_directive.py index 70c97c671..d81ccd563 100644 --- a/backend/dataall/base/api/gql/graphql_directive.py +++ b/backend/dataall/base/api/gql/graphql_directive.py @@ -18,7 +18,9 @@ def gql(self, with_directives=True): if not len(self.args.keys()): return f'@{self.name}' else: - return f"@{self.name}({','.join([k+':'+DirectiveArgs.to_string(self.args[k]) for k in self.args.keys()])})" + return ( + f'@{self.name}({",".join([k + ":" + DirectiveArgs.to_string(self.args[k]) for k in self.args.keys()])})' + ) if __name__ == '__main__': diff --git a/backend/dataall/base/api/gql/graphql_input.py b/backend/dataall/base/api/gql/graphql_input.py index 8195971ff..32cf59222 100644 --- a/backend/dataall/base/api/gql/graphql_input.py +++ b/backend/dataall/base/api/gql/graphql_input.py @@ -15,5 +15,5 @@ def gql(self): description_str = f'"""{self.description}"""{n}' if self.description else '' # args = f"{', '.join([arg.name+':'+ arg.type.gql() for arg in self.arguments])}" - args = f"{', '.join([arg.gql() for arg in self.arguments])}" + args = f'{", ".join([arg.gql() for arg in self.arguments])}' return description_str + n.join(textwrap.wrap(f'input {self.name}{{{n} {args} }}')) diff --git a/backend/dataall/base/api/gql/graphql_union_type.py b/backend/dataall/base/api/gql/graphql_union_type.py index d7b5c0512..813877f40 100644 --- a/backend/dataall/base/api/gql/graphql_union_type.py +++ b/backend/dataall/base/api/gql/graphql_union_type.py @@ -25,7 +25,7 @@ def __init__(self, name, types=[], type_registry=None, resolver=lambda *_, **__: def gql(self, *args, **kwargs): types = self.type_registry.types() if self.type_registry else self.types - return f"union {self.name} = {'|'.join([get_named_type(t).name for t in types])}" + return f'union {self.name} = {"|".join([get_named_type(t).name for t in types])}' if __name__ == '__main__': diff --git a/backend/dataall/base/api/gql/schema.py b/backend/dataall/base/api/gql/schema.py index d6f1978d6..54461581e 100644 --- a/backend/dataall/base/api/gql/schema.py +++ b/backend/dataall/base/api/gql/schema.py @@ -90,7 +90,7 @@ def gql(self, with_directives=True): if len(self.unions): unions = f"""{n.join([u.gql() for u in self.unions])}{n}""" - types = f"""{n} {n.join([n+t.gql(with_directives=with_directives)+n for t in self.types])}""" + types = f"""{n} {n.join([n + t.gql(with_directives=with_directives) + n for t in self.types])}""" return f"""{enums}{input_types}{unions}{types}""" def visit(self, visitors=[]): diff --git a/backend/dataall/base/aws/parameter_store.py b/backend/dataall/base/aws/parameter_store.py index 40b13eeda..ebfbb9c91 100644 --- a/backend/dataall/base/aws/parameter_store.py +++ b/backend/dataall/base/aws/parameter_store.py @@ -18,7 +18,7 @@ def __init__(self): @staticmethod def client(AwsAccountId=None, region=None, role=None): if AwsAccountId: - log.info(f"SSM Parameter remote session with role:{role if role else 'PivotRole'}") + log.info(f'SSM Parameter remote session with role:{role if role else "PivotRole"}') session = SessionHelper.remote_session(accountid=AwsAccountId, region=region, role=role) else: log.info('SSM Parameter session in central account') diff --git a/backend/dataall/base/aws/quicksight.py b/backend/dataall/base/aws/quicksight.py index d25540a4c..40a46c21e 100644 --- a/backend/dataall/base/aws/quicksight.py +++ b/backend/dataall/base/aws/quicksight.py @@ -117,14 +117,14 @@ def check_quicksight_enterprise_subscription(AwsAccountId, region=None): else: if response['AccountInfo']['Edition'] not in ['ENTERPRISE', 'ENTERPRISE_AND_Q']: raise Exception( - f"Quicksight Subscription found in Account: {AwsAccountId} of incorrect type: {response['AccountInfo']['Edition']}" + f'Quicksight Subscription found in Account: {AwsAccountId} of incorrect type: {response["AccountInfo"]["Edition"]}' ) else: if response['AccountInfo']['AccountSubscriptionStatus'] == 'ACCOUNT_CREATED': return True else: raise Exception( - f"Quicksight Subscription found in Account: {AwsAccountId} not active. Status = {response['AccountInfo']['AccountSubscriptionStatus']}" + f'Quicksight Subscription found in Account: {AwsAccountId} not active. Status = {response["AccountInfo"]["AccountSubscriptionStatus"]}' ) except client.exceptions.ResourceNotFoundException: diff --git a/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py b/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py index 43e1ccb17..4ef9aca25 100644 --- a/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py +++ b/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py @@ -135,7 +135,7 @@ def deploy_cdk_stack(engine: Engine, stackid: str, app_path: str = None, path: s CommandSanitizer(input_args) cmd = [ - '' '. ~/.nvm/nvm.sh &&', + '. ~/.nvm/nvm.sh &&', 'cdk', 'deploy --all', '--require-approval', @@ -161,7 +161,7 @@ def deploy_cdk_stack(engine: Engine, stackid: str, app_path: str = None, path: s f'"{sys.executable} {app_path}"', '--verbose', ] - logger.info(f"Running command : \n {' '.join(cmd)}") + logger.info(f'Running command : \n {" ".join(cmd)}') # This command is too complex to be executed as a list of commands. We need to run it with shell=True # However, the input arguments have to be sanitized with the CommandSanitizer @@ -217,7 +217,7 @@ def describe_stack(stack, engine: Engine = None, stackid: str = None): def cdk_installed(): cmd1 = ['.', '~/.nvm/nvm.sh'] - logger.info(f"Running command {' '.join(cmd1)}") + logger.info(f'Running command {" ".join(cmd1)}') subprocess.run( cmd1, text=True, @@ -229,7 +229,7 @@ def cdk_installed(): ) cmd2 = ['cdk', '--version'] - logger.info(f"Running command {' '.join(cmd2)}") + logger.info(f'Running command {" ".join(cmd2)}') subprocess.run( cmd2, text=True, diff --git a/backend/dataall/base/db/dbconfig.py b/backend/dataall/base/db/dbconfig.py index 3494d0d14..01e7da94d 100644 --- a/backend/dataall/base/db/dbconfig.py +++ b/backend/dataall/base/db/dbconfig.py @@ -16,7 +16,7 @@ def __init__(self, user: str, pwd: str, host: str, db: str, schema: str): for param in (user, db, schema): if len(param) > _POSTGRES_MAX_LEN: raise ValueError( - f"PostgreSQL doesn't allow values more than 63 characters" f' parameters {user}, {db}, {schema}' + f"PostgreSQL doesn't allow values more than 63 characters parameters {user}, {db}, {schema}" ) if len(host) > _MAX_HOST_LENGTH: diff --git a/backend/dataall/base/loader.py b/backend/dataall/base/loader.py index cf56003a5..3595cdd53 100644 --- a/backend/dataall/base/loader.py +++ b/backend/dataall/base/loader.py @@ -238,8 +238,7 @@ def _check_loading_correct(in_config: Set[str], modes: Set[ImportMode]): for module in _all_modules(): if module.is_supported(modes) and module not in expected_load: raise ImportError( - f'ModuleInterface has not been initialized for module {module.name()}. ' - 'Declare the module in depends_on' + f'ModuleInterface has not been initialized for module {module.name()}. Declare the module in depends_on' ) # 4) Checks all references for modules (when ModuleInterfaces don't exist or not supported) @@ -260,8 +259,7 @@ def _describe_loading(in_config: Set[str], inactive: Set[str]): log.debug(f'The {name} module was loaded') if name in inactive: log.info( - f'There is a module that depends on {module.name()}. ' - "The module has been loaded despite it's inactive." + f"There is a module that depends on {module.name()}. The module has been loaded despite it's inactive." ) elif name not in in_config: log.info( diff --git a/backend/dataall/base/utils/alarm_service.py b/backend/dataall/base/utils/alarm_service.py index f42da8bf6..bdf05a73d 100644 --- a/backend/dataall/base/utils/alarm_service.py +++ b/backend/dataall/base/utils/alarm_service.py @@ -36,7 +36,7 @@ def trigger_stack_deployment_failure_alarm(self, stack: Stack): - State Change: OK -> ALARM - Reason for State Change: Stack Deployment Failure - Timestamp: {datetime.now()} -- CW Log Group: {f"/dataall/{self.envname}/cdkproxy/{stack.EcsTaskArn.split('/')[-1]}"} +- CW Log Group: {f'/dataall/{self.envname}/cdkproxy/{stack.EcsTaskArn.split("/")[-1]}'} """ return self.publish_message_to_alarms_topic(subject, message) diff --git a/backend/dataall/base/utils/api_handler_utils.py b/backend/dataall/base/utils/api_handler_utils.py index a43c100b8..181711112 100644 --- a/backend/dataall/base/utils/api_handler_utils.py +++ b/backend/dataall/base/utils/api_handler_utils.py @@ -39,7 +39,7 @@ def redact_creds(event): def get_cognito_groups(claims): if not claims: raise ValueError( - 'Received empty claims. ' 'Please verify authorizer configuration', + 'Received empty claims. Please verify authorizer configuration', claims, ) groups = list() @@ -134,7 +134,7 @@ def check_reauth(query, auth_time, username): log.info(f'ReAuth Required for User {username} on Operation {query.get("operationName", "")}, Error: {e}') return send_unauthorized_response( operation=query.get('operationName', 'operation'), - message=f"ReAuth Required To Perform This Action {query.get('operationName', '')}", + message=f'ReAuth Required To Perform This Action {query.get("operationName", "")}', extension={'code': 'REAUTH'}, ) diff --git a/backend/dataall/base/utils/iam_policy_utils.py b/backend/dataall/base/utils/iam_policy_utils.py index 2d0479f6a..3e985f93e 100644 --- a/backend/dataall/base/utils/iam_policy_utils.py +++ b/backend/dataall/base/utils/iam_policy_utils.py @@ -156,5 +156,5 @@ def _policy_splitter( resulting_statement = statement_builder(split=split, subset=subset) split += 1 resulting_statements.append(resulting_statement) - logger.info(f'Statement divided into {split+1} smaller statements') + logger.info(f'Statement divided into {split + 1} smaller statements') return resulting_statements diff --git a/backend/dataall/base/utils/naming_convention.py b/backend/dataall/base/utils/naming_convention.py index 04cc3f753..ecccfc7d5 100644 --- a/backend/dataall/base/utils/naming_convention.py +++ b/backend/dataall/base/utils/naming_convention.py @@ -55,7 +55,7 @@ def build_compliant_name(self) -> str: separator = NamingConventionPattern[self.service].value['separator'] max_length = NamingConventionPattern[self.service].value['max_length'] suffix = f'-{self.target_uri}' if len(self.target_uri) else '' - return f"{slugify(self.resource_prefix + '-' + self.target_label[:(max_length - len(self.resource_prefix + self.target_uri))] + suffix, regex_pattern=fr'{regex}', separator=separator, lowercase=True)}" + return f'{slugify(self.resource_prefix + "-" + self.target_label[: (max_length - len(self.resource_prefix + self.target_uri))] + suffix, regex_pattern=rf"{regex}", separator=separator, lowercase=True)}' def validate_name(self): regex = NamingConventionPattern[self.service].value['regex'] diff --git a/backend/dataall/core/environment/cdk/env_role_core_policies/service_policy.py b/backend/dataall/core/environment/cdk/env_role_core_policies/service_policy.py index 0ad5b67ff..781d50875 100644 --- a/backend/dataall/core/environment/cdk/env_role_core_policies/service_policy.py +++ b/backend/dataall/core/environment/cdk/env_role_core_policies/service_policy.py @@ -127,8 +127,8 @@ def generate_policies(self) -> [aws_iam.ManagedPolicy]: policies.append( aws_iam.ManagedPolicy( self.stack, - f'{self.id}-{index+1}', - managed_policy_name=f'{self.id}-{index+1}', + f'{self.id}-{index + 1}', + managed_policy_name=f'{self.id}-{index + 1}', statements=chunk, ) ) diff --git a/backend/dataall/core/environment/cdk/environment_stack.py b/backend/dataall/core/environment/cdk/environment_stack.py index 85694cda1..c154bbd52 100644 --- a/backend/dataall/core/environment/cdk/environment_stack.py +++ b/backend/dataall/core/environment/cdk/environment_stack.py @@ -124,7 +124,7 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): pivot_role_as_part_of_environment_stack = ParameterStoreManager.get_parameter_value( region=os.getenv('AWS_REGION', 'eu-west-1'), - parameter_path=f"/dataall/{os.getenv('envname', 'local')}/pivotRole/enablePivotRoleAutoCreate", + parameter_path=f'/dataall/{os.getenv("envname", "local")}/pivotRole/enablePivotRoleAutoCreate', ) self.create_pivot_role = True if pivot_role_as_part_of_environment_stack == 'True' else False self.engine = self.get_engine() @@ -582,7 +582,7 @@ def create_topic(self, construct_id, central_account, environment, kms_key): def create_integration_tests_role(self): toolingAccount = ParameterStoreManager.get_parameter_value( region=os.getenv('AWS_REGION', 'eu-west-1'), - parameter_path=f"/dataall/{os.getenv('envname', 'local')}/toolingAccount", + parameter_path=f'/dataall/{os.getenv("envname", "local")}/toolingAccount', ) self.test_role = iam.Role( self, diff --git a/backend/dataall/core/environment/services/environment_service.py b/backend/dataall/core/environment/services/environment_service.py index c9833b27f..b064d759f 100644 --- a/backend/dataall/core/environment/services/environment_service.py +++ b/backend/dataall/core/environment/services/environment_service.py @@ -105,8 +105,8 @@ def validate_account_region(data, session): if environment: raise exceptions.InvalidInput( 'AwsAccount/region', - f"{data.get('AwsAccountId')}/{data.get('region')}", - f"unique. An environment for {data.get('AwsAccountId')}/{data.get('region')} already exists", + f'{data.get("AwsAccountId")}/{data.get("region")}', + f'unique. An environment for {data.get("AwsAccountId")}/{data.get("region")} already exists', ) @staticmethod @@ -163,7 +163,7 @@ def _validate_permissions(session, uri, g_permissions, group): def _get_pivot_role_as_part_of_environment(): ssm_param = ParameterStoreManager.get_parameter_value( region=os.getenv('AWS_REGION', 'eu-west-1'), - parameter_path=f"/dataall/{os.getenv('envname', 'local')}/pivotRole/enablePivotRoleAutoCreate", + parameter_path=f'/dataall/{os.getenv("envname", "local")}/pivotRole/enablePivotRoleAutoCreate', ) return ssm_param == 'True' @@ -243,7 +243,7 @@ def create_environment(uri, data=None): isOrganizationDefaultEnvironment=False, EnvironmentDefaultIAMRoleName=data.get('EnvironmentDefaultIAMRoleArn', 'unknown').split('/')[-1], EnvironmentDefaultIAMRoleArn=data.get('EnvironmentDefaultIAMRoleArn', 'unknown'), - CDKRoleArn=f"arn:aws:iam::{data.get('AwsAccountId')}:role/{cdk_role_name}", + CDKRoleArn=f'arn:aws:iam::{data.get("AwsAccountId")}:role/{cdk_role_name}', resourcePrefix=data.get('resourcePrefix'), ) diff --git a/backend/dataall/core/organizations/db/organization_repositories.py b/backend/dataall/core/organizations/db/organization_repositories.py index 4134e7fc6..d6782bfea 100644 --- a/backend/dataall/core/organizations/db/organization_repositories.py +++ b/backend/dataall/core/organizations/db/organization_repositories.py @@ -48,7 +48,7 @@ def query_user_organizations(session, username, groups, filter) -> Query: models.Organization.label.ilike('%' + filter.get('term') + '%'), models.Organization.description.ilike('%' + filter.get('term') + '%'), models.Organization.tags.contains( - f"{{{NamingConventionService(pattern=NamingConventionPattern.DEFAULT_SEARCH, target_label=filter.get('term')).sanitize()}}}" + f'{{{NamingConventionService(pattern=NamingConventionPattern.DEFAULT_SEARCH, target_label=filter.get("term")).sanitize()}}}' ), ) ) diff --git a/backend/dataall/core/stacks/api/resolvers.py b/backend/dataall/core/stacks/api/resolvers.py index 22f1b7fa5..db9d2b0f3 100644 --- a/backend/dataall/core/stacks/api/resolvers.py +++ b/backend/dataall/core/stacks/api/resolvers.py @@ -77,7 +77,7 @@ def resolve_task_id(context, source: Stack, **kwargs): def get_stack_logs(context: Context, source, targetUri: str = None, targetType: str = None): query = StackService.get_stack_logs(target_uri=targetUri, target_type=targetType) envname = os.getenv('envname', 'local') - log_group_name = f"/{Parameter().get_parameter(env=envname, path='resourcePrefix')}/{envname}/ecs/cdkproxy" + log_group_name = f'/{Parameter().get_parameter(env=envname, path="resourcePrefix")}/{envname}/ecs/cdkproxy' log_query_period_days = config.get_property('core.log_query_period_days', 1) results = CloudWatch.run_query( diff --git a/backend/dataall/modules/dashboards/services/dashboard_quicksight_service.py b/backend/dataall/modules/dashboards/services/dashboard_quicksight_service.py index 73a27e9f7..9ac54a484 100644 --- a/backend/dataall/modules/dashboards/services/dashboard_quicksight_service.py +++ b/backend/dataall/modules/dashboards/services/dashboard_quicksight_service.py @@ -46,7 +46,7 @@ def get_quicksight_reader_url(cls, uri): ) session_type = ParameterStoreManager.get_parameter_value( - parameter_path=f"/dataall/{os.getenv('envname', 'local')}/quicksight/sharedDashboardsSessions" + parameter_path=f'/dataall/{os.getenv("envname", "local")}/quicksight/sharedDashboardsSessions' ) if session_type == 'reader': diff --git a/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_pipeline.py b/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_pipeline.py index b0c6ce698..cb040ad68 100644 --- a/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_pipeline.py +++ b/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_pipeline.py @@ -123,7 +123,7 @@ def initialize_repo(self): 'git remote add origin ${REPO_URL}', ] - logger.info(f"Running Commands: {'; '.join(cmd_init)}") + logger.info(f'Running Commands: {"; ".join(cmd_init)}') CommandSanitizer(args=[self.pipeline.repo, self.pipeline.SamlGroupName]) @@ -236,7 +236,7 @@ def git_push_repo(self): 'git push -u origin main', ] - logger.info(f"Running Commands: {'; '.join(git_cmds)}") + logger.info(f'Running Commands: {"; ".join(git_cmds)}') # This command does not include any customer upstream input # no sanitization is needed and shell=true does not impose a risk @@ -258,7 +258,7 @@ def clean_up_repo(pipeline_dir): code_dir_path = os.path.realpath(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'blueprints')) cmd = ['rm', '-rf', f'./{pipeline_dir}'] - logger.info(f"Running command : \n {' '.join(cmd)}") + logger.info(f'Running command : \n {" ".join(cmd)}') process = subprocess.run( cmd, text=True, shell=False, encoding='utf-8', capture_output=True, cwd=code_dir_path diff --git a/backend/dataall/modules/datapipelines/cdk/datapipelines_pipeline.py b/backend/dataall/modules/datapipelines/cdk/datapipelines_pipeline.py index c9d74dd11..c39c52ba4 100644 --- a/backend/dataall/modules/datapipelines/cdk/datapipelines_pipeline.py +++ b/backend/dataall/modules/datapipelines/cdk/datapipelines_pipeline.py @@ -531,7 +531,7 @@ def write_ddk_json_multienvironment( def initialize_repo(pipeline, code_dir_path, env_vars): cmd_init = [f'mkdir {pipeline.repo}', f'cp -R data_pipeline_blueprint/* {pipeline.repo}/'] - logger.info(f"Running Commands: {'; '.join(cmd_init)}") + logger.info(f'Running Commands: {"; ".join(cmd_init)}') CommandSanitizer(args=[pipeline.repo]) diff --git a/backend/dataall/modules/datapipelines/services/datapipelines_service.py b/backend/dataall/modules/datapipelines/services/datapipelines_service.py index ed010ef9e..552119042 100644 --- a/backend/dataall/modules/datapipelines/services/datapipelines_service.py +++ b/backend/dataall/modules/datapipelines/services/datapipelines_service.py @@ -129,7 +129,7 @@ def create_pipeline_environment( environmentLabel=environment.label, pipelineUri=pipeline.DataPipelineUri, pipelineLabel=pipeline.label, - envPipelineUri=f"{pipeline.DataPipelineUri}{environment.environmentUri}{data['stage']}", + envPipelineUri=f'{pipeline.DataPipelineUri}{environment.environmentUri}{data["stage"]}', AwsAccountId=environment.AwsAccountId, region=environment.region, stage=data['stage'], diff --git a/backend/dataall/modules/maintenance/services/maintenance_service.py b/backend/dataall/modules/maintenance/services/maintenance_service.py index 4fb270859..ecf71a2cb 100644 --- a/backend/dataall/modules/maintenance/services/maintenance_service.py +++ b/backend/dataall/modules/maintenance/services/maintenance_service.py @@ -107,7 +107,7 @@ def get_maintenance_window_status(): # Check if ECS tasks are running ecs_cluster_name = ParameterStoreManager.get_parameter_value( region=os.getenv('AWS_REGION', 'eu-west-1'), - parameter_path=f"/dataall/{os.getenv('envname', 'local')}/ecs/cluster/name", + parameter_path=f'/dataall/{os.getenv("envname", "local")}/ecs/cluster/name', ) if Ecs.is_task_running(cluster_name=ecs_cluster_name): logger.info(f'Current maintenance window status - {maintenance_record.status}') @@ -143,7 +143,7 @@ def _get_maintenance_window_mode(engine): def _get_ecs_rules(): ecs_scheduled_rules = ParameterStoreManager.get_parameters_by_path( region=os.getenv('AWS_REGION', 'eu-west-1'), - parameter_path=f"/dataall/{os.getenv('envname', 'local')}/ecs/ecs_scheduled_tasks/rule", + parameter_path=f'/dataall/{os.getenv("envname", "local")}/ecs/ecs_scheduled_tasks/rule', ) logger.debug(ecs_scheduled_rules) return [item['Value'] for item in ecs_scheduled_rules] diff --git a/backend/dataall/modules/mlstudio/db/mlstudio_repositories.py b/backend/dataall/modules/mlstudio/db/mlstudio_repositories.py index 29c7d2e0b..10486867b 100644 --- a/backend/dataall/modules/mlstudio/db/mlstudio_repositories.py +++ b/backend/dataall/modules/mlstudio/db/mlstudio_repositories.py @@ -76,7 +76,7 @@ def count_resources(session, environment, group_uri): @staticmethod def create_sagemaker_studio_domain(session, username, environment, data): domain = SagemakerStudioDomain( - label=f"{data.get('label')}-domain", + label=f'{data.get("label")}-domain', owner=username, description=data.get('description', 'No description provided'), tags=data.get('tags', []), diff --git a/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py b/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py index 2673c10e6..acd748f81 100644 --- a/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py +++ b/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py @@ -35,11 +35,11 @@ def fetch_omics_workflows(engine): workflows = ready_workflows # + private_workflows log.info(f'Found workflows {str(workflows)} in environment {env.environmentUri}') for workflow in workflows: - log.info(f"Processing workflow name={workflow['name']}, id={workflow['id']}...") + log.info(f'Processing workflow name={workflow["name"]}, id={workflow["id"]}...') existing_workflow = OmicsRepository(session).get_workflow_by_id(workflow['id']) if existing_workflow is not None: log.info( - f"Workflow name={workflow['name']}, id={workflow['id']} has already been registered in database. Updating information..." + f'Workflow name={workflow["name"]}, id={workflow["id"]} has already been registered in database. Updating information...' ) existing_workflow.name = workflow['name'] existing_workflow.label = workflow['name'] @@ -47,7 +47,7 @@ def fetch_omics_workflows(engine): else: log.info( - f"Workflow name={workflow['name']} , id={workflow['id']} in environment {env.environmentUri} is new. Registering..." + f'Workflow name={workflow["name"]} , id={workflow["id"]} in environment {env.environmentUri} is new. Registering...' ) omicsWorkflow = OmicsWorkflow( id=workflow['id'], diff --git a/backend/dataall/modules/s3_datasets/aws/athena_table_client.py b/backend/dataall/modules/s3_datasets/aws/athena_table_client.py index fb5b2dde0..d0ba676c7 100644 --- a/backend/dataall/modules/s3_datasets/aws/athena_table_client.py +++ b/backend/dataall/modules/s3_datasets/aws/athena_table_client.py @@ -25,7 +25,7 @@ def get_table(self): try: env_workgroup = self._client.get_work_group(WorkGroup=self._env.EnvironmentDefaultAthenaWorkGroup) except ClientError as e: - log.info(f'Workgroup {self._env.EnvironmentDefaultAthenaWorkGroup} can not be found' f'due to: {e}') + log.info(f'Workgroup {self._env.EnvironmentDefaultAthenaWorkGroup} can not be founddue to: {e}') connection = connect( aws_access_key_id=self._creds.access_key, diff --git a/backend/dataall/modules/s3_datasets/aws/s3_dataset_bucket_policy_client.py b/backend/dataall/modules/s3_datasets/aws/s3_dataset_bucket_policy_client.py index e5a4f926a..2581e31d7 100644 --- a/backend/dataall/modules/s3_datasets/aws/s3_dataset_bucket_policy_client.py +++ b/backend/dataall/modules/s3_datasets/aws/s3_dataset_bucket_policy_client.py @@ -68,7 +68,7 @@ def put_bucket_policy(self, policy): log.info(f'Bucket Policy updated: {response}') update_policy_report.update({'status': 'SUCCEEDED'}) except ClientError as e: - log.error(f'Failed to update bucket policy ' f"on '{dataset.S3BucketName}' policy {policy} " f'due to {e} ') + log.error(f"Failed to update bucket policy on '{dataset.S3BucketName}' policy {policy} due to {e} ") update_policy_report.update({'status': 'FAILED'}) return update_policy_report diff --git a/backend/dataall/modules/s3_datasets/cdk/assets/gluedatabasecustomresource/index.py b/backend/dataall/modules/s3_datasets/cdk/assets/gluedatabasecustomresource/index.py index 63cd9fc41..376352c3b 100644 --- a/backend/dataall/modules/s3_datasets/cdk/assets/gluedatabasecustomresource/index.py +++ b/backend/dataall/modules/s3_datasets/cdk/assets/gluedatabasecustomresource/index.py @@ -58,10 +58,10 @@ def on_create(event): ) except ClientError as e: log.exception( - f"Could not create Glue Database {props['DatabaseInput']['Name']} in aws://{AWS_ACCOUNT}/{AWS_REGION}, received {str(e)}" + f'Could not create Glue Database {props["DatabaseInput"]["Name"]} in aws://{AWS_ACCOUNT}/{AWS_REGION}, received {str(e)}' ) raise Exception( - f"Could not create Glue Database {props['DatabaseInput']['Name']} in aws://{AWS_ACCOUNT}/{AWS_REGION}, received {str(e)}" + f'Could not create Glue Database {props["DatabaseInput"]["Name"]} in aws://{AWS_ACCOUNT}/{AWS_REGION}, received {str(e)}' ) Entries = [] diff --git a/backend/dataall/modules/s3_datasets/cdk/assets/glueprofilingjob/glue_script.py b/backend/dataall/modules/s3_datasets/cdk/assets/glueprofilingjob/glue_script.py index 2ffd07b57..2ea970056 100644 --- a/backend/dataall/modules/s3_datasets/cdk/assets/glueprofilingjob/glue_script.py +++ b/backend/dataall/modules/s3_datasets/cdk/assets/glueprofilingjob/glue_script.py @@ -41,7 +41,7 @@ ] try: args = getResolvedOptions(sys.argv, list_args) - logger.info(f"Table arg passed profiling will run only on specified table >>> {args['table']}") + logger.info(f'Table arg passed profiling will run only on specified table >>> {args["table"]}') except Exception as e: logger.info(f'No Table arg passed profiling will run on all dataset tables: {e}') list_args.remove('table') @@ -78,7 +78,7 @@ def get_database_tables(client, database): all_database_tables.append(table['Name']) return all_database_tables except ClientError as e: - logger.error(f'Could not retrieve all ' f'database {database} tables ') + logger.error(f'Could not retrieve all database {database} tables ') raise e @@ -94,7 +94,7 @@ def run_table_profiling( ): response = glue.get_table(DatabaseName=database, Name=table) location = response['Table'].get('StorageDescriptor', {}).get('Location') - output_directory = f"s3://{results_bucket}/profiling/results/{dataset_uri}/{table}/{args['JOB_RUN_ID']}" + output_directory = f's3://{results_bucket}/profiling/results/{dataset_uri}/{table}/{args["JOB_RUN_ID"]}' if location: logger.debug('Profiling table for %s %s ', database, table) @@ -158,7 +158,7 @@ def run_table_profiling( response = s3_client.put_object( Bucket=results_bucket, - Key=f"profiling/results/{dataset_uri}/{table}/{args['JOB_RUN_ID']}/results.json", + Key=f'profiling/results/{dataset_uri}/{table}/{args["JOB_RUN_ID"]}/results.json', Body=json.dumps(profiling_results), ) logger.info(f'JSON written to s3: {response}') diff --git a/backend/dataall/modules/s3_datasets/cdk/dataset_stack.py b/backend/dataall/modules/s3_datasets/cdk/dataset_stack.py index 4f3bb8018..4a8a422bf 100644 --- a/backend/dataall/modules/s3_datasets/cdk/dataset_stack.py +++ b/backend/dataall/modules/s3_datasets/cdk/dataset_stack.py @@ -555,9 +555,7 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): '--datasetBucket': dataset.S3BucketName, '--apiUrl': 'None', '--snsTopicArn': 'None', - '--extra-jars': ( - f's3://{env.EnvironmentDefaultBucketName}' f'/profiling/code/jars/deequ-2.0.7-spark-3.3.jar' - ), + '--extra-jars': (f's3://{env.EnvironmentDefaultBucketName}/profiling/code/jars/deequ-2.0.7-spark-3.3.jar'), '--enable-metrics': 'true', '--enable-continuous-cloudwatch-log': 'true', '--enable-glue-datacatalog': 'true', @@ -576,7 +574,7 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): command=glue.CfnJob.JobCommandProperty( name='glueetl', python_version='3', - script_location=(f's3://{env.EnvironmentDefaultBucketName}' f'/profiling/code/glue_script.py'), + script_location=(f's3://{env.EnvironmentDefaultBucketName}/profiling/code/glue_script.py'), ), default_arguments=job_args, glue_version='4.0', diff --git a/backend/dataall/modules/s3_datasets/db/dataset_repositories.py b/backend/dataall/modules/s3_datasets/db/dataset_repositories.py index bc9fe1efd..9e4b4ff39 100644 --- a/backend/dataall/modules/s3_datasets/db/dataset_repositories.py +++ b/backend/dataall/modules/s3_datasets/db/dataset_repositories.py @@ -122,8 +122,8 @@ def _set_dataset_aws_resources(dataset: S3Dataset, data, environment): ).build_compliant_name() iam_role_arn = f'arn:aws:iam::{dataset.AwsAccountId}:role/{iam_role_name}' if data.get('adminRoleName'): - dataset.IAMDatasetAdminRoleArn = f"arn:aws:iam::{dataset.AwsAccountId}:role/{data['adminRoleName']}" - dataset.IAMDatasetAdminUserArn = f"arn:aws:iam::{dataset.AwsAccountId}:role/{data['adminRoleName']}" + dataset.IAMDatasetAdminRoleArn = f'arn:aws:iam::{dataset.AwsAccountId}:role/{data["adminRoleName"]}' + dataset.IAMDatasetAdminUserArn = f'arn:aws:iam::{dataset.AwsAccountId}:role/{data["adminRoleName"]}' else: dataset.IAMDatasetAdminRoleArn = iam_role_arn dataset.IAMDatasetAdminUserArn = iam_role_arn diff --git a/backend/dataall/modules/s3_datasets_shares/aws/glue_client.py b/backend/dataall/modules/s3_datasets_shares/aws/glue_client.py index e0a317fe9..296e620db 100644 --- a/backend/dataall/modules/s3_datasets_shares/aws/glue_client.py +++ b/backend/dataall/modules/s3_datasets_shares/aws/glue_client.py @@ -17,17 +17,17 @@ def __init__(self, account_id, region, database): def create_database(self, location): try: - log.info(f'Creating database {self._database} ' f'in account {self._account_id}...') + log.info(f'Creating database {self._database} in account {self._account_id}...') existing_database = self.get_glue_database() if existing_database: glue_database_created = True else: self._create_glue_database(location) glue_database_created = True - log.info(f'Successfully created database {self._database}' f'in account {self._account_id}') + log.info(f'Successfully created database {self._database}in account {self._account_id}') return glue_database_created except ClientError as e: - log.error(f'Failed to create database {self._database} ' f'in account {self._account_id} ' f'due to {e}') + log.error(f'Failed to create database {self._database} in account {self._account_id} due to {e}') raise e def _create_glue_database(self, location): @@ -47,7 +47,7 @@ def _create_glue_database(self, location): def get_glue_database(self): try: - log.info(f'Getting database {self._database} ' f'in account {self._account_id}...') + log.info(f'Getting database {self._database} in account {self._account_id}...') database = self._client.get_database(CatalogId=self._account_id, Name=self._database) return database except ClientError: @@ -56,7 +56,7 @@ def get_glue_database(self): def database_exists(self, database_name): try: - log.info(f'Check database exists {self._database} ' f'in account {self._account_id}...') + log.info(f'Check database exists {self._database} in account {self._account_id}...') self._client.get_database(CatalogId=self._account_id, Name=database_name) return True except ClientError: @@ -65,9 +65,7 @@ def database_exists(self, database_name): def table_exists(self, table_name): try: - log.info( - f'Check table exists {table_name} ' f'in database {self._database} ' f'in account {self._account_id}...' - ) + log.info(f'Check table exists {table_name} in database {self._database} in account {self._account_id}...') table = self._client.get_table(CatalogId=self._account_id, DatabaseName=self._database, Name=table_name) log.info(f'Glue table {table_name} found in account {self._account_id} in database {self._database}') return table @@ -78,9 +76,7 @@ def table_exists(self, table_name): def delete_table(self, table_name): database = self._database try: - log.info( - f'Deleting table {table_name} ' f'in database {self._database} ' f'in catalog {self._account_id}...' - ) + log.info(f'Deleting table {table_name} in database {self._database} in catalog {self._account_id}...') response = self._client.delete_table(CatalogId=self._account_id, DatabaseName=database, Name=table_name) log.info( f'Successfully deleted table {table_name} ' @@ -91,10 +87,7 @@ def delete_table(self, table_name): return response except ClientError as e: log.error( - f'Could not delete table {table_name} ' - f'in database {database} ' - f'in catalog {self._account_id} ' - f'due to: {e}' + f'Could not delete table {table_name} in database {database} in catalog {self._account_id} due to: {e}' ) raise e @@ -111,7 +104,7 @@ def create_resource_link(self, resource_link_name, table, catalog_id, database): } try: - log.info(f'Creating ResourceLink {resource_link_name} ' f'in database {shared_database}...') + log.info(f'Creating ResourceLink {resource_link_name} in database {shared_database}...') resource_link = self.table_exists(resource_link_name) if resource_link: log.info( @@ -143,14 +136,14 @@ def delete_database(self): account_id = self._account_id database = self._database try: - log.info(f'Deleting database {self._database} ' f'in account {self._account_id}...') + log.info(f'Deleting database {self._database} in account {self._account_id}...') existing_database = self.get_glue_database() if existing_database: self._client.delete_database(CatalogId=account_id, Name=database) - log.info(f'Successfully deleted database {database} ' f'in account {account_id}') + log.info(f'Successfully deleted database {database} in account {account_id}') return True except ClientError as e: - log.error(f'Could not delete database {database} ' f'in account {account_id} ' f'due to: {e}') + log.error(f'Could not delete database {database} in account {account_id} due to: {e}') raise e def get_source_catalog(self): diff --git a/backend/dataall/modules/s3_datasets_shares/aws/lakeformation_client.py b/backend/dataall/modules/s3_datasets_shares/aws/lakeformation_client.py index 72e44ceb4..13e82c3d6 100644 --- a/backend/dataall/modules/s3_datasets_shares/aws/lakeformation_client.py +++ b/backend/dataall/modules/s3_datasets_shares/aws/lakeformation_client.py @@ -150,7 +150,7 @@ def _grant_permissions_to_resource( permissions_with_grant_options=permissions_with_grant_options, check_resource=check_resource, ): - log.info(f'Granting principal {principal} ' f'permissions {permissions} ' f'to {str(resource)}...') + log.info(f'Granting principal {principal} permissions {permissions} to {str(resource)}...') # We define the grant with "permissions" instead of "missing_permissions" because we want to avoid # duplicates done by data.all, but we want to avoid dependencies with external grants grant_dict = dict( @@ -428,7 +428,7 @@ def _check_permissions_to_resource( check_resource: dict = None, ) -> bool: try: - log.info(f'Checking principal {principal} ' f'permissions {permissions} ' f'to {str(resource)}...') + log.info(f'Checking principal {principal} permissions {permissions} to {str(resource)}...') check_dict = dict( Principal={'DataLakePrincipalIdentifier': principal}, Resource=check_resource if check_resource else resource, @@ -463,10 +463,5 @@ def _check_permissions_to_resource( ) return True except ClientError as e: - log.error( - f'Could not list principal {principal} ' - f'permissions {permissions} ' - f'to {str(resource)} ' - f'due to: {e}' - ) + log.error(f'Could not list principal {principal} permissions {permissions} to {str(resource)} due to: {e}') raise e diff --git a/backend/dataall/modules/s3_datasets_shares/aws/ram_client.py b/backend/dataall/modules/s3_datasets_shares/aws/ram_client.py index 01d6a8f71..20d7e8fea 100644 --- a/backend/dataall/modules/s3_datasets_shares/aws/ram_client.py +++ b/backend/dataall/modules/s3_datasets_shares/aws/ram_client.py @@ -31,7 +31,7 @@ def _get_resource_share_invitations(self, resource_share_arns, sender_account, r ] return filtered_invitations except ClientError as e: - log.error(f'Failed retrieving RAM resource ' f'share invitations {resource_share_arns} due to {e}') + log.error(f'Failed retrieving RAM resource share invitations {resource_share_arns} due to {e}') raise e def _accept_resource_share_invitation(self, resource_share_invitation_arn): @@ -43,9 +43,9 @@ def _accept_resource_share_invitation(self, resource_share_invitation_arn): return response.get('resourceShareInvitation') except ClientError as e: if e.response['Error']['Code'] == 'ResourceShareInvitationAlreadyAcceptedException': - log.info(f'Failed to accept RAM invitation ' f'{resource_share_invitation_arn} already accepted') + log.info(f'Failed to accept RAM invitation {resource_share_invitation_arn} already accepted') else: - log.error(f'Failed to accept RAM invitation ' f'{resource_share_invitation_arn} due to {e}') + log.error(f'Failed to accept RAM invitation {resource_share_invitation_arn} due to {e}') raise e @staticmethod @@ -54,9 +54,7 @@ def check_ram_invitation_status( ): source_ram = RamClient(source_account_id, source_region) - resource_arn = ( - f'arn:aws:glue:{source_region}:{source_account_id}:' f'table/{source_database}/{source_table_name}' - ) + resource_arn = f'arn:aws:glue:{source_region}:{source_account_id}:table/{source_database}/{source_table_name}' associations = source_ram._list_resource_share_associations(resource_arn) resource_share_arns = [a['resourceShareArn'] for a in associations if a['status'] == 'ASSOCIATED'] @@ -99,9 +97,7 @@ def accept_ram_invitation( source_ram = RamClient(source_account_id, source_region) target_ram = RamClient(target_account_id, target_region) - resource_arn = ( - f'arn:aws:glue:{source_region}:{source_account_id}:' f'table/{source_database}/{source_table_name}' - ) + resource_arn = f'arn:aws:glue:{source_region}:{source_account_id}:table/{source_database}/{source_table_name}' associations = source_ram._list_resource_share_associations(resource_arn) resource_share_arns = [a['resourceShareArn'] for a in associations] diff --git a/backend/dataall/modules/s3_datasets_shares/services/s3_share_managed_policy_service.py b/backend/dataall/modules/s3_datasets_shares/services/s3_share_managed_policy_service.py index 11d0e68c3..ec53e74d9 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/s3_share_managed_policy_service.py +++ b/backend/dataall/modules/s3_datasets_shares/services/s3_share_managed_policy_service.py @@ -68,7 +68,7 @@ def add_missing_resources_to_policy_statement( policy_actions = S3_ALLOWED_ACTIONS if resource_type == 's3' else [f'{resource_type}:*'] index = self._get_statement_by_sid(policy_document, statement_sid) if index is None: - log.info(f'{statement_sid} does NOT exists for Managed policy {policy_name} ' f'creating statement...') + log.info(f'{statement_sid} does NOT exists for Managed policy {policy_name} creating statement...') additional_policy = { 'Sid': statement_sid, 'Effect': 'Allow', @@ -97,7 +97,7 @@ def remove_resource_from_statement(self, target_resources: list, statement_sid: index = self._get_statement_by_sid(policy_document, statement_sid) log.info(f'Removing {target_resources} from Statement[{index}] in Managed policy {policy_name} ...') if index is None: - log.info(f'{statement_sid} does NOT exists for Managed policy {policy_name} ' f'skipping...') + log.info(f'{statement_sid} does NOT exists for Managed policy {policy_name} skipping...') else: policy_statement = policy_document['Statement'][index] for target_resource in target_resources: diff --git a/backend/dataall/modules/s3_datasets_shares/services/share_managers/lf_share_manager.py b/backend/dataall/modules/s3_datasets_shares/services/share_managers/lf_share_manager.py index a7d0c6034..168443847 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/share_managers/lf_share_manager.py +++ b/backend/dataall/modules/s3_datasets_shares/services/share_managers/lf_share_manager.py @@ -774,9 +774,7 @@ def check_catalog_account_exists_and_verify(self): ) return False except Exception as e: - logger.error( - f'Failed to initialise catalog account details for share - {self.share.shareUri} ' f'due to: {e}' - ) + logger.error(f'Failed to initialise catalog account details for share - {self.share.shareUri} due to: {e}') return None return True @@ -796,7 +794,7 @@ def get_catalog_account_details(self): ).get_source_catalog() return catalog_dict.get('account_id'), catalog_dict.get('region'), catalog_dict.get('database_name') except Exception as e: - logger.error(f'Failed to fetch catalog account details for share - {self.share.shareUri} ' f'due to: {e}') + logger.error(f'Failed to fetch catalog account details for share - {self.share.shareUri} due to: {e}') return None, None, None def initialize_clients(self): diff --git a/backend/dataall/modules/s3_datasets_shares/services/share_managers/s3_access_point_share_manager.py b/backend/dataall/modules/s3_datasets_shares/services/share_managers/s3_access_point_share_manager.py index f315869f6..a45af899f 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/share_managers/s3_access_point_share_manager.py +++ b/backend/dataall/modules/s3_datasets_shares/services/share_managers/s3_access_point_share_manager.py @@ -566,12 +566,12 @@ def update_dataset_bucket_key_policy(self): for target_sid in perms_to_sids(self.share.permissions, SidType.KmsAccessPointPolicy): if target_sid in statements.keys(): - logger.info(f'KMS key policy contains share statement {target_sid}, ' f'updating the current one') + logger.info(f'KMS key policy contains share statement {target_sid}, updating the current one') statements[target_sid] = add_target_arn_to_statement_principal( statements[target_sid], target_requester_arn ) else: - logger.info(f'KMS key does not contain share statement {target_sid}, ' f'generating a new one') + logger.info(f'KMS key does not contain share statement {target_sid}, generating a new one') statements[target_sid] = self.generate_default_kms_policy_statement( target_requester_arn, target_sid ) diff --git a/backend/dataall/modules/s3_datasets_shares/tasks/dataset_subscription_task.py b/backend/dataall/modules/s3_datasets_shares/tasks/dataset_subscription_task.py index a08fb5cf8..f9e896363 100644 --- a/backend/dataall/modules/s3_datasets_shares/tasks/dataset_subscription_task.py +++ b/backend/dataall/modules/s3_datasets_shares/tasks/dataset_subscription_task.py @@ -121,8 +121,7 @@ def publish_sns_message(self, session, message, dataset, share_items, prefix, ta message = { 'location': prefix, 'owner': dataset.owner, - 'message': f'Dataset owner {dataset.owner} ' - f'has updated the table shared with you {prefix}', + 'message': f'Dataset owner {dataset.owner} has updated the table shared with you {prefix}', } sns_client = SnsDatasetClient(environment, dataset) diff --git a/backend/dataall/modules/s3_datasets_shares/tasks/subscriptions/sqs_poller.py b/backend/dataall/modules/s3_datasets_shares/tasks/subscriptions/sqs_poller.py index 89497e62a..b02d420b5 100644 --- a/backend/dataall/modules/s3_datasets_shares/tasks/subscriptions/sqs_poller.py +++ b/backend/dataall/modules/s3_datasets_shares/tasks/subscriptions/sqs_poller.py @@ -20,7 +20,7 @@ def poll_queues(queues): for queue in queues: sqs = boto3.client( - 'sqs', region_name=queue['region'], endpoint_url=f"https://sqs.{queue['region']}.amazonaws.com" + 'sqs', region_name=queue['region'], endpoint_url=f'https://sqs.{queue["region"]}.amazonaws.com' ) try: response = sqs.receive_message( @@ -31,10 +31,10 @@ def poll_queues(queues): ) if not response or not response.get('Messages'): - log.info(f"No new messages available from queue: {queue['url']}") + log.info(f'No new messages available from queue: {queue["url"]}') if response and response.get('Messages'): - log.info(f"Available messages from queue: {response['Messages']}") + log.info(f'Available messages from queue: {response["Messages"]}') for message in response['Messages']: if message.get('Body'): log.info('Consumed message from queue: %s' % message) diff --git a/backend/dataall/modules/shares_base/services/share_logs_service.py b/backend/dataall/modules/shares_base/services/share_logs_service.py index ea73632d5..7bc4ae1f8 100644 --- a/backend/dataall/modules/shares_base/services/share_logs_service.py +++ b/backend/dataall/modules/shares_base/services/share_logs_service.py @@ -69,7 +69,7 @@ def get_share_logs(shareUri): envname = os.getenv('envname', 'local') log_query_period_days = config.get_property('core.log_query_period_days', 1) log.info(f'log_query_period_days: {log_query_period_days}') - log_group_name = f"/{Parameter().get_parameter(env=envname, path='resourcePrefix')}/{envname}/ecs/share-manager" + log_group_name = f'/{Parameter().get_parameter(env=envname, path="resourcePrefix")}/{envname}/ecs/share-manager' query_for_name = ShareLogsService._get_share_logs_name_query(shareUri=shareUri) name_query_result = CloudWatch.run_query( diff --git a/backend/dataall/modules/shares_base/services/share_notification_service.py b/backend/dataall/modules/shares_base/services/share_notification_service.py index 5343948fb..9d231aeae 100644 --- a/backend/dataall/modules/shares_base/services/share_notification_service.py +++ b/backend/dataall/modules/shares_base/services/share_notification_service.py @@ -240,7 +240,7 @@ def notify_share_expiration_to_requesters(self): ) msg_intro = f"""Dear User,
- This is a reminder that your share request for the dataset "{self.dataset.label}" will get expired on {self.share.expiryDate.date().strftime("%B %d, %Y")}. Please request a share extension request before it to have continued access to the dataset. + This is a reminder that your share request for the dataset "{self.dataset.label}" will get expired on {self.share.expiryDate.date().strftime('%B %d, %Y')}. Please request a share extension request before it to have continued access to the dataset.

Note: If you fail request for an extension and if it expires, the share item will be revoked which will result in loss of access to the dataset. """ diff --git a/backend/dataall/modules/shares_base/services/share_object_service.py b/backend/dataall/modules/shares_base/services/share_object_service.py index 6d3b9e83a..40314d73d 100644 --- a/backend/dataall/modules/shares_base/services/share_object_service.py +++ b/backend/dataall/modules/shares_base/services/share_object_service.py @@ -495,7 +495,7 @@ def update_share_expiration_period(cls, uri: str, expiration, nonExpirable) -> b if share_item_invalid_state: raise Exception( - f"Cannot update share object's expiration as it share items are in incorrect state { ', '.join(invalid_states)}" + f"Cannot update share object's expiration as it share items are in incorrect state {', '.join(invalid_states)}" ) if nonExpirable: diff --git a/backend/dataall/modules/worksheets/db/worksheet_repositories.py b/backend/dataall/modules/worksheets/db/worksheet_repositories.py index a2b4b8054..b44023c67 100644 --- a/backend/dataall/modules/worksheets/db/worksheet_repositories.py +++ b/backend/dataall/modules/worksheets/db/worksheet_repositories.py @@ -46,7 +46,7 @@ def query_user_worksheets(session, username, groups, filter) -> Query: Worksheet.label.ilike('%' + filter.get('term') + '%'), Worksheet.description.ilike('%' + filter.get('term') + '%'), Worksheet.tags.contains( - f"{{{NamingConventionService(pattern=NamingConventionPattern.DEFAULT_SEARCH, target_label=filter.get('term')).sanitize()}}}" + f'{{{NamingConventionService(pattern=NamingConventionPattern.DEFAULT_SEARCH, target_label=filter.get("term")).sanitize()}}}' ), ) ) diff --git a/backend/dataall/modules/worksheets/services/worksheet_service.py b/backend/dataall/modules/worksheets/services/worksheet_service.py index ffea373d9..65a657fd1 100644 --- a/backend/dataall/modules/worksheets/services/worksheet_service.py +++ b/backend/dataall/modules/worksheets/services/worksheet_service.py @@ -38,7 +38,7 @@ def create_worksheet(data=None) -> Worksheet: context = get_context() if data['SamlAdminGroupName'] not in context.groups: raise exceptions.UnauthorizedOperation( - 'CREATE_WORKSHEET', f"user {context.username} does not belong to group {data['SamlAdminGroupName']}" + 'CREATE_WORKSHEET', f'user {context.username} does not belong to group {data["SamlAdminGroupName"]}' ) with context.db_engine.scoped_session() as session: worksheet = Worksheet( diff --git a/deploy/configs/frontend_config.py b/deploy/configs/frontend_config.py index dbc63ad15..1f2852a1a 100644 --- a/deploy/configs/frontend_config.py +++ b/deploy/configs/frontend_config.py @@ -71,14 +71,14 @@ def create_react_env_file( if custom_auth: file_content = ( file_content - + f"""REACT_APP_CUSTOM_AUTH={custom_auth.get("provider", "none")} -REACT_APP_CUSTOM_AUTH_URL={custom_auth.get("url", "none")} -REACT_APP_CUSTOM_AUTH_REDIRECT_URL={custom_auth.get("redirect_url", "none")} -REACT_APP_CUSTOM_AUTH_CLIENT_ID={custom_auth.get("client_id", "none")} -REACT_APP_CUSTOM_AUTH_RESP_TYPES={custom_auth.get("response_types", "none")} -REACT_APP_CUSTOM_AUTH_SCOPES={custom_auth.get("scopes", "none")} -REACT_APP_CUSTOM_AUTH_EMAIL_CLAIM_MAPPING={custom_auth.get("claims_mapping_email", "none")} -REACT_APP_CUSTOM_AUTH_USERID_CLAIM_MAPPING={custom_auth.get("claims_mapping_user_id", "none")} + + f"""REACT_APP_CUSTOM_AUTH={custom_auth.get('provider', 'none')} +REACT_APP_CUSTOM_AUTH_URL={custom_auth.get('url', 'none')} +REACT_APP_CUSTOM_AUTH_REDIRECT_URL={custom_auth.get('redirect_url', 'none')} +REACT_APP_CUSTOM_AUTH_CLIENT_ID={custom_auth.get('client_id', 'none')} +REACT_APP_CUSTOM_AUTH_RESP_TYPES={custom_auth.get('response_types', 'none')} +REACT_APP_CUSTOM_AUTH_SCOPES={custom_auth.get('scopes', 'none')} +REACT_APP_CUSTOM_AUTH_EMAIL_CLAIM_MAPPING={custom_auth.get('claims_mapping_email', 'none')} +REACT_APP_CUSTOM_AUTH_USERID_CLAIM_MAPPING={custom_auth.get('claims_mapping_user_id', 'none')} """ ) else: diff --git a/deploy/custom_resources/sync_congito_params/index.py b/deploy/custom_resources/sync_congito_params/index.py index a67d2b59a..5caa42efc 100644 --- a/deploy/custom_resources/sync_congito_params/index.py +++ b/deploy/custom_resources/sync_congito_params/index.py @@ -21,7 +21,7 @@ def on_event(event, context): def on_create(event): print('**Sync Cognito Parameters') - parameters = get_parameters(ssm, f"/dataall/{event['ResourceProperties']['envname']}/cognito") + parameters = get_parameters(ssm, f'/dataall/{event["ResourceProperties"]["envname"]}/cognito') print('all cognito params', parameters) response_data = sync_parameter_store(parameters) return response_data @@ -42,7 +42,7 @@ def sync_parameter_store(parameters): try: ssm_us_east_1.put_parameter( Name=_parameter_store['Name'], - Description=f"mirror of {_parameter_store['Name']} in eu-west-1 ", + Description=f'mirror of {_parameter_store["Name"]} in eu-west-1 ', Value=_parameter_store['Value'], Type='String', Overwrite=True, diff --git a/deploy/stacks/backend_stack.py b/deploy/stacks/backend_stack.py index 20beb33ea..ace6d42d0 100644 --- a/deploy/stacks/backend_stack.py +++ b/deploy/stacks/backend_stack.py @@ -86,7 +86,7 @@ def __init__( vpce_connection = ec2.Connections(security_groups=[vpc_endpoints_sg]) self.s3_prefix_list = self.get_s3_prefix_list() - self.pivot_role_name = f"dataallPivotRole{'-cdk' if enable_pivot_role_auto_create else ''}" + self.pivot_role_name = f'dataallPivotRole{"-cdk" if enable_pivot_role_auto_create else ""}' ParamStoreStack( self, diff --git a/deploy/stacks/cognito.py b/deploy/stacks/cognito.py index f9e89f17c..6965607bb 100644 --- a/deploy/stacks/cognito.py +++ b/deploy/stacks/cognito.py @@ -97,7 +97,7 @@ def __init__( f'UserPool{envname}', user_pool=self.user_pool, cognito_domain=cognito.CognitoDomainOptions( - domain_prefix=f"{resource_prefix.replace('-', '')}{envname}{self.region.replace('-', '')}{self.account}" + domain_prefix=f'{resource_prefix.replace("-", "")}{envname}{self.region.replace("-", "")}{self.account}' ), ) jwt_token_duration = 180 if with_approval_tests else 60 diff --git a/deploy/stacks/pipeline.py b/deploy/stacks/pipeline.py index 82a247bd6..af5d4c31d 100644 --- a/deploy/stacks/pipeline.py +++ b/deploy/stacks/pipeline.py @@ -205,7 +205,7 @@ def __init__( if target_env.get('with_approval'): backend_stage.add_pre( pipelines.ManualApprovalStep( - id=f"Approve{target_env['envname']}Deployment", + id=f'Approve{target_env["envname"]}Deployment', comment=f'Approve deployment for environment {target_env["envname"]}', ) ) @@ -587,7 +587,7 @@ def set_ecr_stage( self, target_env, ): - repository_name = f"{self.resource_prefix}-{target_env['envname']}-ecr-repository" + repository_name = f'{self.resource_prefix}-{target_env["envname"]}-ecr-repository' ecr_stage = self.pipeline.add_stage( ECRStage( self, @@ -610,13 +610,13 @@ def set_ecr_stage( privileged=True, environment_variables={ 'REPOSITORY_URI': codebuild.BuildEnvironmentVariable( - value=f"{target_env['account']}.dkr.ecr.{target_env['region']}.amazonaws.com/{repository_name}" + value=f'{target_env["account"]}.dkr.ecr.{target_env["region"]}.amazonaws.com/{repository_name}' ), 'IMAGE_TAG': codebuild.BuildEnvironmentVariable(value=f'lambdas-{self.image_tag}'), }, ), commands=[ - f"make deploy-image type=lambda image-tag=$IMAGE_TAG account={target_env['account']} region={target_env['region']} repo={repository_name}", + f'make deploy-image type=lambda image-tag=$IMAGE_TAG account={target_env["account"]} region={target_env["region"]} repo={repository_name}', ], role=self.baseline_codebuild_role.without_policy_updates(), vpc=self.vpc, @@ -628,13 +628,13 @@ def set_ecr_stage( privileged=True, environment_variables={ 'REPOSITORY_URI': codebuild.BuildEnvironmentVariable( - value=f"{target_env['account']}.dkr.ecr.{target_env['region']}.amazonaws.com/{repository_name}" + value=f'{target_env["account"]}.dkr.ecr.{target_env["region"]}.amazonaws.com/{repository_name}' ), 'IMAGE_TAG': codebuild.BuildEnvironmentVariable(value=f'cdkproxy-{self.image_tag}'), }, ), commands=[ - f"make deploy-image type=ecs image-tag=$IMAGE_TAG account={target_env['account']} region={target_env['region']} repo={repository_name}", + f'make deploy-image type=ecs image-tag=$IMAGE_TAG account={target_env["account"]} region={target_env["region"]} repo={repository_name}', ], role=self.baseline_codebuild_role.without_policy_updates(), vpc=self.vpc, @@ -646,7 +646,7 @@ def set_backend_stage(self, target_env, repository_name): backend_stage = self.pipeline.add_stage( BackendStage( self, - f"{self.resource_prefix}-{target_env['envname']}-backend-stage", + f'{self.resource_prefix}-{target_env["envname"]}-backend-stage', env={ 'account': target_env['account'], 'region': target_env['region'], @@ -749,7 +749,7 @@ def set_stacks_updater_stage( self, target_env, ): - wave = self.pipeline.add_wave(f"{self.resource_prefix}-{target_env['envname']}-stacks-updater-stage") + wave = self.pipeline.add_wave(f'{self.resource_prefix}-{target_env["envname"]}-stacks-updater-stage') wave.add_post( pipelines.CodeBuildStep( id='StacksUpdater', @@ -780,7 +780,7 @@ def set_cloudfront_stage(self, target_env): cloudfront_stage = self.pipeline.add_stage( CloudfrontStage( self, - f"{self.resource_prefix}-{target_env['envname']}-cloudfront-stage", + f'{self.resource_prefix}-{target_env["envname"]}-cloudfront-stage', env={ 'account': target_env['account'], 'region': 'us-east-1', @@ -849,11 +849,11 @@ def set_cloudfront_stage(self, target_env): *front_stage_actions, self.cw_rum_config_action(target_env), ) - self.pipeline.add_wave(f"{self.resource_prefix}-{target_env['envname']}-frontend-stage").add_post( + self.pipeline.add_wave(f'{self.resource_prefix}-{target_env["envname"]}-frontend-stage').add_post( *front_stage_actions ) if target_env.get('custom_auth', None) is None: - self.pipeline.add_wave(f"{self.resource_prefix}-{target_env['envname']}-docs-stage").add_post( + self.pipeline.add_wave(f'{self.resource_prefix}-{target_env["envname"]}-docs-stage').add_post( pipelines.CodeBuildStep( id='UpdateDocumentation', build_environment=codebuild.BuildEnvironment( @@ -861,7 +861,7 @@ def set_cloudfront_stage(self, target_env): ), commands=[ f'aws codeartifact login --tool pip --repository {self.codeartifact.codeartifact_pip_repo_name} --domain {self.codeartifact.codeartifact_domain_name} --domain-owner {self.codeartifact.domain.attr_owner}', - f"make assume-role REMOTE_ACCOUNT_ID={target_env['account']} REMOTE_ROLE={self.resource_prefix}-{target_env['envname']}-S3DeploymentRole EXTERNAL_ID={get_tooling_account_external_id(target_env['account'])}", + f'make assume-role REMOTE_ACCOUNT_ID={target_env["account"]} REMOTE_ROLE={self.resource_prefix}-{target_env["envname"]}-S3DeploymentRole EXTERNAL_ID={get_tooling_account_external_id(target_env["account"])}', '. ./.env.assumed_role', 'aws sts get-caller-identity', 'export AWS_DEFAULT_REGION=us-east-1', diff --git a/deploy/stacks/vpc.py b/deploy/stacks/vpc.py index a29105324..4f51df449 100644 --- a/deploy/stacks/vpc.py +++ b/deploy/stacks/vpc.py @@ -55,10 +55,10 @@ def __init__( self.private_subnets.append(subnet.subnet_id) CfnOutput( self, - f'{resource_prefix}-{envname}-privateSubnet-{index+1}', - export_name=f'{resource_prefix}-{envname}-privateSubnet-{index+1}', + f'{resource_prefix}-{envname}-privateSubnet-{index + 1}', + export_name=f'{resource_prefix}-{envname}-privateSubnet-{index + 1}', value=subnet.subnet_id, - description=f'{resource_prefix}-{envname}-privateSubnet-{index+1}', + description=f'{resource_prefix}-{envname}-privateSubnet-{index + 1}', ) ssm.StringParameter( diff --git a/tests/core/conftest.py b/tests/core/conftest.py index eff62140a..91fe499d1 100644 --- a/tests/core/conftest.py +++ b/tests/core/conftest.py @@ -12,7 +12,7 @@ def factory(org, envname, owner, group, account, region, desc='test', parameters if not parameters: parameters = {'dashboardsEnabled': 'true'} - key = f"{org.organizationUri}{envname}{owner}{''.join(group or '-')}{account}{region}" + key = f'{org.organizationUri}{envname}{owner}{"".join(group or "-")}{account}{region}' if cache.get(key): return cache[key] response = client.query( diff --git a/tests/modules/s3_datasets/test_dataset.py b/tests/modules/s3_datasets/test_dataset.py index c3801edfa..c96589233 100644 --- a/tests/modules/s3_datasets/test_dataset.py +++ b/tests/modules/s3_datasets/test_dataset.py @@ -232,7 +232,7 @@ def test_update_dataset_unauthorized(dataset1, client, group): def test_add_tables(table, dataset1, db): for i in range(0, 10): - table(dataset=dataset1, name=f'table{i+1}', username=dataset1.owner) + table(dataset=dataset1, name=f'table{i + 1}', username=dataset1.owner) with db.scoped_session() as session: nb = session.query(DatasetTable).count() @@ -241,7 +241,7 @@ def test_add_tables(table, dataset1, db): def test_add_locations(location, dataset1, db): for i in range(0, 10): - location(dataset=dataset1, name=f'unstructured{i+1}', username=dataset1.owner) + location(dataset=dataset1, name=f'unstructured{i + 1}', username=dataset1.owner) with db.scoped_session() as session: nb = session.query(DatasetStorageLocation).count() diff --git a/tests/modules/s3_datasets/test_dataset_glossary.py b/tests/modules/s3_datasets/test_dataset_glossary.py index 5a25d1b34..b3fe9d9b4 100644 --- a/tests/modules/s3_datasets/test_dataset_glossary.py +++ b/tests/modules/s3_datasets/test_dataset_glossary.py @@ -13,7 +13,7 @@ def _columns(db, dataset_fixture, table_fixture) -> List[DatasetTableColumn]: c = DatasetTableColumn( datasetUri=dataset_fixture.datasetUri, tableUri=table_fixture.tableUri, - label=f'c{i+1}', + label=f'c{i + 1}', AWSAccountId=dataset_fixture.restricted.AwsAccountId, region=dataset_fixture.restricted.region, GlueTableName='table', diff --git a/tests/modules/s3_datasets/test_dataset_table.py b/tests/modules/s3_datasets/test_dataset_table.py index 1736c4958..4361eb15a 100644 --- a/tests/modules/s3_datasets/test_dataset_table.py +++ b/tests/modules/s3_datasets/test_dataset_table.py @@ -20,7 +20,7 @@ def mock_lf_client(mocker, mock_aws_client): def test_add_tables(table, dataset_fixture, db): for i in range(0, 10): - table(dataset=dataset_fixture, name=f'table{i+1}', username=dataset_fixture.owner) + table(dataset=dataset_fixture, name=f'table{i + 1}', username=dataset_fixture.owner) with db.scoped_session() as session: nb = session.query(DatasetTable).count() diff --git a/tests/modules/s3_datasets_shares/tasks/test_s3_access_point_share_manager.py b/tests/modules/s3_datasets_shares/tasks/test_s3_access_point_share_manager.py index 8c0d1bb69..5d2423d8d 100644 --- a/tests/modules/s3_datasets_shares/tasks/test_s3_access_point_share_manager.py +++ b/tests/modules/s3_datasets_shares/tasks/test_s3_access_point_share_manager.py @@ -627,7 +627,7 @@ def test_manage_access_point_and_policy_1(mocker, target_environment: Environmen mocker.patch( 'dataall.base.aws.sts.SessionHelper.get_role_ids', - return_value=['dataset_admin_role_id:*', 'source_env_admin_role_id:*' 'source_account_pivot_role_id:*'], + return_value=['dataset_admin_role_id:*', 'source_env_admin_role_id:*source_account_pivot_role_id:*'], ) mocker.patch( diff --git a/tests_new/integration_tests/modules/dashboards/aws_clients.py b/tests_new/integration_tests/modules/dashboards/aws_clients.py index 286c9e82e..e521cb208 100644 --- a/tests_new/integration_tests/modules/dashboards/aws_clients.py +++ b/tests_new/integration_tests/modules/dashboards/aws_clients.py @@ -23,14 +23,14 @@ def check_enterprise_account_exists(self): else: if response['AccountInfo']['Edition'] not in ['ENTERPRISE', 'ENTERPRISE_AND_Q']: log.info( - f"Quicksight Subscription found in Account: {self._account_id} of incorrect type: {response['AccountInfo']['Edition']}" + f'Quicksight Subscription found in Account: {self._account_id} of incorrect type: {response["AccountInfo"]["Edition"]}' ) return False else: if response['AccountInfo']['AccountSubscriptionStatus'] == 'ACCOUNT_CREATED': return True log.info( - f"Quicksight Subscription found in Account: {self._account_id} not active. Status = {response['AccountInfo']['AccountSubscriptionStatus']}" + f'Quicksight Subscription found in Account: {self._account_id} not active. Status = {response["AccountInfo"]["AccountSubscriptionStatus"]}' ) return False except self._client.exceptions.ResourceNotFoundException: diff --git a/tests_new/integration_tests/modules/s3_datasets/global_conftest.py b/tests_new/integration_tests/modules/s3_datasets/global_conftest.py index 4f17212c7..b914b23b7 100644 --- a/tests_new/integration_tests/modules/s3_datasets/global_conftest.py +++ b/tests_new/integration_tests/modules/s3_datasets/global_conftest.py @@ -82,7 +82,7 @@ def create_aws_imported_resources( ).create_key_with_alias(kms_alias_name) bucket = S3Client(session=aws_client, region=env['region']).create_bucket( bucket_name=bucket_name, - kms_key_arn=f"arn:aws:kms:{env['region']}:{env['AwsAccountId']}:key/{kms_id}", + kms_key_arn=f'arn:aws:kms:{env["region"]}:{env["AwsAccountId"]}:key/{kms_id}', ) else: bucket = S3Client(session=aws_client, region=env['region']).create_bucket( @@ -496,7 +496,7 @@ def updated_persistent_s3_dataset1(client1, persistent_s3_dataset1): @pytest.fixture(scope='session') def persistent_imported_sse_s3_dataset1(client1, group1, persistent_env1, persistent_env1_aws_client, testdata): - bucket_name = f"dataalltesting{persistent_env1.environmentUri}perssses3{persistent_env1['AwsAccountId']}" + bucket_name = f'dataalltesting{persistent_env1.environmentUri}perssses3{persistent_env1["AwsAccountId"]}' bucket = None try: s3_client = S3Client(session=persistent_env1_aws_client, region=persistent_env1['region']) @@ -525,7 +525,7 @@ def persistent_imported_sse_s3_dataset1(client1, group1, persistent_env1, persis def persistent_imported_kms_s3_dataset1( client1, group1, persistent_env1, persistent_env1_aws_client, persistent_env1_integration_role_arn, testdata ): - resource_name = f"dataalltesting{persistent_env1.environmentUri}perskms{persistent_env1['AwsAccountId']}" + resource_name = f'dataalltesting{persistent_env1.environmentUri}perskms{persistent_env1["AwsAccountId"]}' existing_bucket = S3Client(session=persistent_env1_aws_client, region=persistent_env1['region']).bucket_exists( resource_name )