From f5c842c902bced21fa3fa55a56ad2210f4839034 Mon Sep 17 00:00:00 2001 From: Becky Reamy Date: Wed, 12 Nov 2025 14:54:26 -0500 Subject: [PATCH] KPMP-6260: Check for missing slides after inserting to ensure we are all up to date --- data_management/services/dlu_management.py | 28 +++++++++--------- data_management/services/slide_management.py | 30 ++++++++++++-------- data_management/watch_files.py | 1 + 3 files changed, 34 insertions(+), 25 deletions(-) diff --git a/data_management/services/dlu_management.py b/data_management/services/dlu_management.py index 04a62f6..e4a2fdb 100644 --- a/data_management/services/dlu_management.py +++ b/data_management/services/dlu_management.py @@ -101,11 +101,6 @@ def update_dlu_package(self, package_id: str, fields_values: dict): values = query_info["values"][0:] + (package_id,) query = "UPDATE data_manager_data_v SET " + query_info["set_clause"] + " WHERE dlu_package_id = %s" self.db.insert_data(query, values) - - def get_missing_slides(self, redcap_id: str): - return self.db.get_data( - "select * from missing_slides_v where spectrack_redcap_record_id = %s", - (redcap_id,)), def update_missing_slides(self, redcap_id: str): return self.db.get_data( @@ -270,27 +265,34 @@ def get_slide_manifest_import_by_kit(self, kit_id, stain): def set_error_message_slide_scan_curation(self, error, image_id): self.db.insert_data("UPDATE slide_scan_curation set error_message = %s where image_id = %s", (error, image_id,)) - - def set_error_message_slide_scan_curation_redcap_id(self, error, redcap_id): - self.db.insert_data_no_alert("UPDATE slide_scan_curation set error_message = %s where redcap_id = %s", - (error, redcap_id,)) def find_slide_scan_info_by_package_id(self, package_id): - self.db.get_data("SELECT * FROM slide_scan_v WHERE dlu_package_id = %s", + return self.db.get_data("SELECT * FROM slide_scan_v WHERE dlu_package_id = %s", (package_id,)) def is_package_missing_slides(self, package_id): - self.db.get_data("SELECT * FROM slide_scan_v WHERE dlu_package_id = %s and missing_slides = 1", + return self.db.get_data("SELECT * FROM slide_scan_v WHERE dlu_package_id = %s and missing_slides = 1", (package_id,)) + def slides_marked_missing_by_redcap_id(self, redcap_id: str): + return self.db.get_data("SELECT * FROM slide_scan_v WHERE redcap_id = %s AND missing_slides = 1", + (redcap_id,)) + + def get_missing_slides_from_view(self, redcap_id: str): + return self.db.get_data("select * from missing_slides_v where spectrack_redcap_record_id = %s", + (redcap_id,)) + def is_slides_in_error(self, package_id): - self.db.get_data("SELECT * FROM slide_scan_curation WHERE dlu_package_id = %s and error_message IS NOT NULL", + return self.db.get_data("SELECT * FROM slide_scan_curation WHERE dlu_package_id = %s and error_message IS NOT NULL", (package_id,)) def find_not_approved_filenames(self, package_id): - self.db.get_data("SELECT * FROM slide_scan_curation WHERE approve_file_name = 'yes' AND dlu_package_id = %s", + return self.db.get_data("SELECT * FROM slide_scan_curation WHERE approve_file_name = 'yes' AND dlu_package_id = %s", (package_id,)) + def update_missing_slide_flag(self, image_id): + return self.db.insert_data("UPDATE slide_scan_curation SET missing_slides = 0 WHERE image_id = %s", + (image_id,)) if __name__ == "__main__": dlu_management = DluManagement() diff --git a/data_management/services/slide_management.py b/data_management/services/slide_management.py index 6d3fd87..72cf473 100644 --- a/data_management/services/slide_management.py +++ b/data_management/services/slide_management.py @@ -74,6 +74,7 @@ def __init__(self, db): def process_slide_manifest_imports(self): new_records = self.db.get_new_slide_manifest_import_rows() + redcap_ids_processed = [] for record in new_records: record_in_error = False error_message = "" @@ -84,7 +85,7 @@ def process_slide_manifest_imports(self): error_message = "No redcap_id found for kit_id " + kit_id + "; " logger.error(error_message) continue - + if record["accession"] is not None: new_file_name = self.determine_new_slide_name(sample_id=record["accession"], kit_id=kit_id, stain_info=record["stain"], block_id=record["block_id"]) @@ -108,22 +109,27 @@ def process_slide_manifest_imports(self): new_file_name=new_file_name, source_file_name=source_file_name, source_folder_name=source_folder_name) self.db.insert_into_slide_scan_curation(slide_scan.get_dmd_tuple()) - check_missing_slides = self.db.get_missing_slides(redcap_id) + check_missing_slides = self.db.get_missing_slides_from_view(redcap_id) + redcap_ids_processed.append(redcap_id) if not all(check_missing_slides): - if error_message != None: - - error_message += "There are missing slide(s) for participant " + redcap_id + "; " - elif error_message is None: - error_message = "There are missing slide(s) for participant " + redcap_id + "; " - logger.info(error_message) self.db.update_missing_slides(redcap_id) - - # Can't use record_in_error here because we can't set an error message for an image_id that doesn't exist - self.db.set_error_message_slide_scan_curation_redcap_id(error=error_message, redcap_id=redcap_id) - + if record_in_error: self.db.set_error_message_slide_scan_curation(image_id=image_id, error=error_message) + for redcap_id in redcap_ids_processed: + self.update_missing_slides(redcap_id) + + def update_missing_slides(self, redcap_id: str): + # This MAY seem redundant, however this will ensure that we unmark any missing slides records that just got + # the missing one added + missing_slides = self.db.get_missing_slides_from_view(redcap_id) + if not missing_slides or len(missing_slides) ==0 : + slides_marked_missing = self.db.slides_marked_missing_by_redcap_id(redcap_id) + if slides_marked_missing and len(slides_marked_missing) > 0: + for slide in slides_marked_missing: + self.db.update_missing_slide_flag(slide['image_id']) + def determine_new_slide_name(self, sample_id: str, kit_id: str, stain_info: str, block_id: str): slides_for_kit = self.db.get_slide_manifest_import_by_kit(kit_id, stain_info) diff --git a/data_management/watch_files.py b/data_management/watch_files.py index 5192976..72469ca 100644 --- a/data_management/watch_files.py +++ b/data_management/watch_files.py @@ -113,6 +113,7 @@ def move_packages_to_DLU(self, packages): self.dlu_state.set_package_state(package_id, PackageState.UPLOAD_SUCCEEDED) self.dlu_state.clear_cache() + def fill_in_null_package_ids(self): self.slide_management.fill_in_package_ids()