diff --git a/tests/test_maintenance_mode.py b/tests/test_maintenance_mode.py index 0cee1d4..4faf007 100644 --- a/tests/test_maintenance_mode.py +++ b/tests/test_maintenance_mode.py @@ -42,10 +42,38 @@ def _build_progress(values): tpp_connection.cursor().execute("DELETE FROM BuildProgress") +@pytest.fixture +def cleanup_coded_event_snomed_table(tpp_connection): + yield + with tpp_connection.cursor() as cursor: + table = "CodedEvent_SNOMED" + cursor.execute( + f""" + IF OBJECT_ID('{table}', 'U') IS NOT NULL + DROP TABLE {table} + """ + ) + + +def create_coded_event_snomed_table(tpp_connection): + with tpp_connection.cursor() as cursor: + table = "CodedEvent_SNOMED" + cursor.execute( + f""" + IF OBJECT_ID('{table}', 'U') IS NOT NULL + DROP TABLE {table} + + CREATE TABLE {table} ( + CodedEvent_ID INT, + ) + """ + ) + + @pytest.mark.parametrize( - "description,events,is_in_maintenance_mode", + "description,events,is_in_maintenance_mode,expected_build_count", [ - ("No events", [], False), + ("No events", [], False, 0), ( "Historical finished maintenance mode", [ @@ -72,6 +100,7 @@ def _build_progress(values): ), ], False, + 0, ), ( "Historical unfinished maintenance mode", @@ -113,6 +142,7 @@ def _build_progress(values): ), ], False, + 0, ), ( "Inconsistent entries, assume maintenance for safety", @@ -154,6 +184,7 @@ def _build_progress(values): ), ], True, + 1, ), ( "Swapping tables", @@ -174,6 +205,7 @@ def _build_progress(values): ), ], True, + 1, ), ( "Building CodedEvent_SNOMED", @@ -201,18 +233,85 @@ def _build_progress(values): ), ], True, + 1, + ), + ( + "Multiple ongoing builds", + [ + ( + "OpenSAFELY", + "2025-06-12T14:25:10", + "2025-06-12T14:25:10", + "9999-12-31T00:00:00", + None, + ), + ( + "Swap Tables", + "2025-06-12T14:25:10", + "2025-06-12T14:25:10", + "9999-12-31T00:00:00", + None, + ), + ( + "OpenSAFELY", + "2026-02-01T14:25:10", + "2026-02-01T14:25:10", + "9999-12-31T00:00:00", + None, + ), + ], + True, + 2, ), ], ) def test_in_maintenance_mode( - tpp_connection, build_progress_factory, description, events, is_in_maintenance_mode + tpp_connection, + build_progress_factory, + description, + events, + is_in_maintenance_mode, + expected_build_count, ): for event in events: build_progress_factory(event) verify_build_progress_count(tpp_connection, events) - mode = in_maintenance_mode(tpp_connection) + mode, build_count = in_maintenance_mode(tpp_connection) assert mode is is_in_maintenance_mode, description + assert build_count == expected_build_count + + +@pytest.mark.parametrize( + "table_available,is_in_maintenance_mode", [(True, False), (False, True)] +) +def test_in_maintenance_mode_checks_coded_event_snomed_availability( + tpp_connection, + build_progress_factory, + cleanup_coded_event_snomed_table, + table_available, + is_in_maintenance_mode, +): + # Main build has started, SwapTables and CodedEvent_SNOMED events have not started yet + # We are not in maintenance mode according to the BuildProgress table, but the final + # check to ensure availability of the CodedEvent_SNOMED table can override this + events = [ + ( + "OpenSAFELY", + "2025-06-12T14:25:10", + "2025-06-12T14:25:10", + "9999-12-31T00:00:00", + None, + ) + ] + for event in events: + build_progress_factory(event) + verify_build_progress_count(tpp_connection, events) + + if table_available: + create_coded_event_snomed_table(tpp_connection) + + assert in_maintenance_mode(tpp_connection)[0] == is_in_maintenance_mode def test_in_maintenance_mode_custom_event(tpp_connection, build_progress_factory): @@ -239,7 +338,7 @@ def test_in_maintenance_mode_custom_event(tpp_connection, build_progress_factory build_progress_factory(event) verify_build_progress_count(tpp_connection, events) - assert in_maintenance_mode(tpp_connection) is True + assert in_maintenance_mode(tpp_connection)[0] is True def verify_build_progress_count(tpp_connection, events): diff --git a/tpp_database_utils/main.py b/tpp_database_utils/main.py index c822001..993aeb9 100644 --- a/tpp_database_utils/main.py +++ b/tpp_database_utils/main.py @@ -10,12 +10,13 @@ def in_maintenance_mode(tpp_connection): - mode = maintenance_mode.in_maintenance_mode(tpp_connection) - + mode, build_count = maintenance_mode.in_maintenance_mode(tpp_connection) # This should be the only output on stdout, # anything else needs to be on stderr if mode: - print("db-maintenance") + print(f"db-maintenance;{build_count}") + else: + print(f"none;{build_count}") def update_custom_medication_dictionary(tpp_connection, temp_database_name): diff --git a/tpp_database_utils/maintenance_mode.py b/tpp_database_utils/maintenance_mode.py index 1d051ef..81ef8da 100644 --- a/tpp_database_utils/maintenance_mode.py +++ b/tpp_database_utils/maintenance_mode.py @@ -1,5 +1,7 @@ import os +from pymssql.exceptions import ProgrammingError + def in_maintenance_mode(tpp_connection): """TPP Specific maintenance mode query. @@ -26,38 +28,63 @@ def in_maintenance_mode(tpp_connection): The trigger to exit maintenance mode is when the final OpenSAFELY event finishes. - """ + Occasionally the build is slow (usually due to intensive concurrent DB queries), + and the previous week's build does not complete before the next build starts. In + this case we end up with two ongoing builds, so in order to determine DB status + we need to check for ongoing SwapTables or CodedEvent_SNOMED events since the start + of the earlier build. The presence of two ongoing builds is a flag for something + that we want to alert on, so we also return the build count. + """ cursor = tpp_connection.cursor() + # Select the TWO most recently started overall OpenSAFELY build events cursor.execute( """ - SELECT TOP 1 EventStart, EventEnd + SELECT TOP 2 EventStart, EventEnd FROM BuildProgress WHERE Event = 'OpenSAFELY' ORDER BY EventStart DESC """ ) - latest_rebuild = cursor.fetchone() - if not latest_rebuild: + latest_rebuilds = cursor.fetchall() + if not latest_rebuilds: # No events at all, we can't be in maintenance mode - return False + return False, 0 - rebuild_start, rebuild_end = latest_rebuild - if rebuild_end.year < 9999: - # Rebuild has completed therefore we aren't in maintenance mode - return False + # Of the two most recently started builds, iddentify those that are ongoing (i.e. + # those with an end date of 9999-12-31) + _, most_recent_end = latest_rebuilds[0] + if most_recent_end.year < 9999: + # The most recent build is complete, so we're not in maintenance mode, and any + # previous build is a historical one which we can ignore + return False, 0 + ongoing_builds = [rebuild for rebuild in latest_rebuilds if rebuild[1].year == 9999] + build_count = len(ongoing_builds) + + # Check for incomplete events starting after the start of the earliest ongoing build + earliest_build_start_date = min(ongoing_builds)[0] cursor.execute( - "SELECT Event FROM BuildProgress WHERE EventStart >= %s", - rebuild_start, + "SELECT Event, EventEnd FROM BuildProgress WHERE EventStart >= %s", + earliest_build_start_date, ) - current_events = {row[0] for row in cursor.fetchall()} + current_events = {row[0] for row in cursor.fetchall() if row[1].year == 9999} # Env var allows quick change of start event logic if needed start_events = os.environ.get( "TPP_MAINTENANCE_START_EVENT", "Swap Tables,CodedEvent_SNOMED" ).split(",") + in_maintenance_mode = bool(current_events.intersection(start_events)) + + if not in_maintenance_mode: + # According to the events, we're not in maintenance mode. As a final check, + # make sure that the CodedEvent_SNOMED table really is available. + try: + cursor.execute("SELECT TOP 1 CodedEvent_ID FROM CodedEvent_SNOMED") + except ProgrammingError: + in_maintenance_mode = True + # We start maintenance mode as soon as we see any of the "trigger" events # and then don't exit until the entire build is finished - return bool(current_events.intersection(start_events)) + return in_maintenance_mode, build_count