diff --git a/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py b/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py index 4f942417ae0e..6865609f9511 100644 --- a/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py +++ b/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py @@ -13,7 +13,9 @@ from django.contrib.sites.models import Site from django.core import mail from django.core.cache import cache +from django.db import connection from django.test import TestCase +from django.test.utils import CaptureQueriesContext from django.urls import reverse from enterprise.models import ( EnterpriseCourseEnrollment, @@ -1078,9 +1080,78 @@ def cleanup_and_assert_status(self, data=None, expected_status=status.HTTP_204_N assert response.status_code == expected_status return response - def test_simple_success(self): - self.cleanup_and_assert_status() - assert not UserRetirementStatus.objects.all() + def _assert_redacted_update_delete_queries(self, queries, redacted_username, redacted_email, redacted_name): + """ + Helper method to verify UPDATE and DELETE queries contain correct field-value assignments. + + Args: + queries: List of captured query dicts from CaptureQueriesContext + redacted_username: Expected redacted username value + redacted_email: Expected redacted email value + redacted_name: Expected redacted name value + """ + update_queries = [q for q in queries if 'UPDATE' in q['sql'] and 'user_api_userretirementstatus' in q['sql']] + delete_queries = [q for q in queries if 'DELETE' in q['sql'] and 'user_api_userretirementstatus' in q['sql']] + + # Should have 9 UPDATE and 9 DELETE queries + assert len(update_queries) == 9, f"Expected 9 UPDATE queries, found {len(update_queries)}" + assert len(delete_queries) == 9, f"Expected 9 DELETE queries, found {len(delete_queries)}" + + # Verify UPDATE queries contain the correct field-value assignments + for update_query in update_queries: + sql_lower = update_query['sql'] + # Check that the correct field is set with the correct value + # This ensures that if someone swaps the assignments, the test will fail + assert "original_username" in sql_lower and f"= '{redacted_username}'" in sql_lower, ( + f"UPDATE query missing 'original_username = {redacted_username}': {sql_lower}" + ) + assert "original_email" in sql_lower and f"= '{redacted_email}'" in sql_lower, ( + f"UPDATE query missing 'original_email = {redacted_email}': {sql_lower}" + ) + assert "original_name" in sql_lower and f"= '{redacted_name}'" in sql_lower, ( + f"UPDATE query missing 'original_name = {redacted_name}': {sql_lower}" + ) + + def test_default_redacted_values(self): + """ + Test basic cleanup with default redacted values. + Verify that redaction (UPDATE) happens before deletion (DELETE). + Captures actual SQL queries to ensure UPDATE queries contain correct field-value assignments. + """ + with CaptureQueriesContext(connection) as context: + self.cleanup_and_assert_status() + + # Verify records are deleted after redaction + retirements = UserRetirementStatus.objects.all() + assert retirements.count() == 0 + + # Verify UPDATE and DELETE queries with default 'redacted' value + self._assert_redacted_update_delete_queries(context.captured_queries, 'redacted', 'redacted', 'redacted') + + def test_custom_redacted_values(self): + """Test that custom redacted values are applied before deletion.""" + custom_username = 'username-redacted-12345' + custom_email = 'email-redacted-67890' + custom_name = 'name-redacted-abcde' + + data = { + 'usernames': self.usernames, + 'redacted_username': custom_username, + 'redacted_email': custom_email, + 'redacted_name': custom_name + } + + with CaptureQueriesContext(connection) as context: + self.cleanup_and_assert_status(data=data) + + # Verify records are deleted after redaction + retirements = UserRetirementStatus.objects.all() + assert retirements.count() == 0 + + # Verify UPDATE and DELETE queries with custom redacted values + self._assert_redacted_update_delete_queries( + context.captured_queries, custom_username, custom_email, custom_name + ) def test_leaves_other_users(self): remaining_usernames = [] diff --git a/openedx/core/djangoapps/user_api/accounts/views.py b/openedx/core/djangoapps/user_api/accounts/views.py index c3ff6ce7a2f2..b80e5c6fbb88 100644 --- a/openedx/core/djangoapps/user_api/accounts/views.py +++ b/openedx/core/djangoapps/user_api/accounts/views.py @@ -1024,14 +1024,20 @@ def cleanup(self, request): ``` { - 'usernames': ['user1', 'user2', ...] + 'usernames': ['user1', 'user2', ...], + 'redacted_username': 'Value to store in username field', + 'redacted_email': 'Value to store in email field', + 'redacted_name': 'Value to store in name field' } ``` - Deletes a batch of retirement requests by username. + Redacts a batch of retirement requests by redacting PII fields. """ try: usernames = request.data["usernames"] + redacted_username = request.data.get("redacted_username", "redacted") + redacted_email = request.data.get("redacted_email", "redacted") + redacted_name = request.data.get("redacted_name", "redacted") if not isinstance(usernames, list): raise TypeError("Usernames should be an array.") @@ -1045,7 +1051,16 @@ def cleanup(self, request): if len(usernames) != len(retirements): raise UserRetirementStatus.DoesNotExist("Not all usernames exist in the COMPLETE state.") - retirements.delete() + # Redact PII fields first, then delete. In case an ETL tool is syncing data + # to a downstream data warehouse, and treats the deletes as soft-deletes, + # the data will have first been redacted, protecting the sensitive PII. + for retirement in retirements: + retirement.original_username = redacted_username + retirement.original_email = redacted_email + retirement.original_name = redacted_name + retirement.save() + retirement.delete() + return Response(status=status.HTTP_204_NO_CONTENT) except (RetirementStateError, UserRetirementStatus.DoesNotExist, TypeError) as exc: return Response(str(exc), status=status.HTTP_400_BAD_REQUEST)