diff --git a/apps/organizations/models.py b/apps/organizations/models.py index 4a55736b..bf284b30 100644 --- a/apps/organizations/models.py +++ b/apps/organizations/models.py @@ -641,6 +641,7 @@ class TermsAndConditions(HasAutoTranslatedFields, OrganizationRelated, models.Mo auto_translated_fields: list[str] = ["html:content"] auto_translate_all_languages: bool = True + auto_translate_instantly: bool = True organization = models.OneToOneField( "organizations.Organization", diff --git a/services/translator/mixins.py b/services/translator/mixins.py index 0f13520d..4d5d3a41 100644 --- a/services/translator/mixins.py +++ b/services/translator/mixins.py @@ -67,6 +67,7 @@ class HasAutoTranslatedFields(metaclass=TranslatedModelMeta): auto_translated_fields: list[str] = [] auto_translate_all_languages: bool = False + auto_translate_instantly: bool = False _auto_translated_fields: list[str] = [] _html_auto_translated_fields: list[str] = [] @@ -103,12 +104,14 @@ def update_translated_fields(self, force_update: bool = True): or getattr(self, field) != self._original_auto_translated_fields_values[field] ): - AutoTranslatedField.objects.update_or_create( + auto_translated_field, _ = AutoTranslatedField.objects.update_or_create( content_type=content_type, object_id=str(self.pk), field_name=field, defaults={"up_to_date": False, "field_type": field_type}, ) + if self.auto_translate_instantly: + auto_translated_field.update_translation() def _delete_auto_translated_fields(self): AutoTranslatedField.objects.filter( diff --git a/services/translator/models.py b/services/translator/models.py index 94f443e6..e16c5859 100644 --- a/services/translator/models.py +++ b/services/translator/models.py @@ -1,6 +1,16 @@ +import re + +from bs4 import BeautifulSoup +from django.conf import settings from django.contrib.contenttypes.models import ContentType from django.db import models +from apps.commons.mixins import OrganizationRelated + +from .interface import AzureTranslatorService + +AZURE_MAX_LENGTH = 50000 + class AutoTranslatedField(models.Model): """ @@ -37,3 +47,130 @@ class Meta: def instance(self) -> models.Model: """Return the related instance.""" return self.content_type.get_object_for_this_type(pk=self.object_id) + + @staticmethod + def split_content( + content: str, max_length: int, text_type: str = "plain" + ) -> list[str] | list[BeautifulSoup]: + """ + Split content into chunks of max_length, trying to split at html tags. + + Maximum length for Azure Translator is 50 000 characters per request accross all + languages. + + For example, sending a translate request of 3 000 characters to translate to three + different languages results in a request size of 3 000 x 3 = 9 000 characters + + This function splits the content into chunks either at the end of a html tag or + at the last space before max_length. + """ + if text_type == "html": + soup = BeautifulSoup(content, "html.parser") + return soup.find_all(recursive=False) + + if len(content) <= max_length: + return [content] + chunks = [] + start = 0 + while start < len(content): + end = start + max_length + if end >= len(content): + chunks.append(content[start:]) + break + split_at = content.rfind(" ", start, end) + if split_at == -1 or split_at <= start: + split_at = end + chunks.append(content[start:split_at]) + start = split_at + return chunks + + def update_translation(self): + instance = self.instance + field_name = self.field_name + content = getattr(instance, field_name, "") + if not isinstance(instance, OrganizationRelated): + raise ValueError( + f"{instance._meta.model.__name__} does not support translations. " + "`OrganizationRelated` mixin is required for automatic translations." + ) + if getattr(instance, "auto_translate_all_languages", False): + languages = ( + settings.REQUIRED_LANGUAGES + if any( + o.auto_translate_content + for o in instance.get_related_organizations() + ) + else {} + ) + else: + organizations = [ + o + for o in instance.get_related_organizations() + if o.auto_translate_content + ] + # iter over languages in set (remove duplicate language) + languages: set[str] = { + lang for org in organizations for lang in org.languages + } + if languages: + base_max_length = AZURE_MAX_LENGTH * 0.8 # Safety margin + max_length = int(base_max_length // len(languages)) + if content: + chunks = self.split_content( + content, max_length, text_type=self.field_type + ) + translations = {} + detected_languages = [] + for chunk in chunks: + if ( + self.field_type == "html" + and (not str(chunk).strip() or not chunk.get_text(strip=True)) + ) or (re.findall(r'data:image\/[a-zA-Z]+;base64,[^"\']+', content)): + chunk_translations = [ + {"to": lang, "text": str(chunk)} for lang in languages + ] + else: + chunk = str(chunk) + if len(chunk) <= max_length: + chunk_translations, detected_language = ( + AzureTranslatorService.translate_text_content( + str(chunk), languages, self.field_type + ) + ) + detected_languages.append(detected_language) + elif len(chunk) < base_max_length: + for lang in languages: + lang_chunk_translation, detected_language = ( + AzureTranslatorService.translate_text_content( + str(chunk), [lang], self.field_type + ) + ) + chunk_translations.append( + { + "to": lang, + "text": lang_chunk_translation[0]["text"], + } + ) + detected_languages.append(detected_language) + else: + chunk_translations = [ + {"to": lang, "text": str(chunk)} for lang in languages + ] + translations = { + f"{field_name}_{translation['to']}": ( + translations.get(f"{field_name}_{translation['to']}", "") + + translation["text"] + ) + for translation in chunk_translations + } + # Use the most common detected language among chunks + if detected_languages: + detected_language = max( + set(detected_languages), key=detected_languages.count + ) + translations[f"{field_name}_detected_language"] = detected_language + else: + translations = {f"{field_name}_{lang}": content for lang in languages} + instance._meta.model.objects.filter(pk=instance.pk).update(**translations) + self.up_to_date = True + self.save() diff --git a/services/translator/tasks.py b/services/translator/tasks.py index 23b76b71..7f16e092 100644 --- a/services/translator/tasks.py +++ b/services/translator/tasks.py @@ -6,7 +6,6 @@ from projects.celery import app from .models import AutoTranslatedField -from .utils import update_auto_translated_field logger = logging.getLogger(__name__) @@ -16,7 +15,7 @@ def automatic_translations(): for field in AutoTranslatedField.objects.filter(up_to_date=False): try: - update_auto_translated_field(field) + field.update_translation() except Exception as e: # noqa: PIE786 logger.error(f"Error updating auto-translated field {field.id}: {e}") @@ -48,7 +47,7 @@ def translate_object( for field in queryset: try: - update_auto_translated_field(field) + field.update_translation() except Exception as e: # noqa: PIE786 logger.error( f"Error updating model-translated {model} field {field.id}: {e}" diff --git a/services/translator/testcases.py b/services/translator/testcases.py new file mode 100644 index 00000000..22423ea4 --- /dev/null +++ b/services/translator/testcases.py @@ -0,0 +1,33 @@ +from types import SimpleNamespace + +from apps.commons.test import JwtAPITestCase + + +class MockTranslateTestCase(JwtAPITestCase): + @classmethod + def translator_side_effect( + cls, body: list[str], to_language: list[str], text_type: str = "plain" + ) -> list[dict]: + """ + This side effect is meant to be used with unittest mock. It will mock every call + made to the Azure translator API. + + Arguments + --------- + - content (str): The text content to be translated. + - languages (list of str): The target languages for translation. + + Returns + ------- + - A list of SimpleNamespace objects that simulates the Azure translator API response. + """ + + return [ + SimpleNamespace( + detected_language=SimpleNamespace(language="en", score=1.0), + translations=[ + SimpleNamespace(text=f"{lang} : {body[0]}", to=lang) + for lang in to_language + ], + ) + ] diff --git a/services/translator/tests/tasks/test_update_translations_task.py b/services/translator/tests/tasks/test_update_translations_task.py index 17388af2..640f9e13 100644 --- a/services/translator/tests/tasks/test_update_translations_task.py +++ b/services/translator/tests/tasks/test_update_translations_task.py @@ -1,4 +1,3 @@ -from types import SimpleNamespace from unittest.mock import call, patch from django.conf import settings @@ -9,7 +8,6 @@ from apps.accounts.factories import PeopleGroupFactory, UserFactory from apps.accounts.models import ProjectUser from apps.announcements.factories import AnnouncementFactory -from apps.commons.test import JwtAPITestCase from apps.feedbacks.factories import CommentFactory, ReviewFactory from apps.files.factories import ( AttachmentFileFactory, @@ -45,45 +43,19 @@ ) from services.translator.models import AutoTranslatedField from services.translator.tasks import automatic_translations -from services.translator.utils import update_auto_translated_field +from services.translator.tests.tasks.test_update_translations_task import ( + MockTranslateTestCase, +) faker = Faker() -class MockTranslateTestCase(JwtAPITestCase): - @classmethod - def translator_side_effect( - cls, body: list[str], to_language: list[str], text_type: str = "plain" - ) -> list[dict]: - """ - This side effect is meant to be used with unittest mock. It will mock every call - made to the Azure translator API. - - Arguments - --------- - - content (str): The text content to be translated. - - languages (list of str): The target languages for translation. - - Returns - ------- - - A list of SimpleNamespace objects that simulates the Azure translator API response. - """ - - return [ - SimpleNamespace( - detected_language=SimpleNamespace(language="en", score=1.0), - translations=[ - SimpleNamespace(text=f"{lang} : {body[0]}", to=lang) - for lang in to_language - ], - ) - ] - - class UpdateTranslationsTestCase(MockTranslateTestCase): @classmethod - def setUpTestData(cls) -> None: + @patch("azure.ai.translation.text.TextTranslationClient.translate") + def setUpTestData(cls, mock_translate) -> None: super().setUpTestData() + mock_translate.side_effect = cls.translator_side_effect cls.organization_data = { field: ( f"

{faker.word()}

" @@ -214,9 +186,8 @@ def setUpTestData(cls) -> None: cls.organization_2, cls.organization_3, ]: - TermsAndConditions.objects.update_or_create( - organization=organization, defaults=data - ) + TermsAndConditions.objects.get_or_create(organization=organization) + TermsAndConditions.objects.filter(organization=organization).update(**data) cls.instances.append( { "model": TermsAndConditions, @@ -457,7 +428,7 @@ def test_safe_translation_with_base64_image(self, mock_translate): field_name="description", ) - update_auto_translated_field(field) + field.update_translation() mock_translate.assert_has_calls([]) @patch("azure.ai.translation.text.TextTranslationClient.translate") @@ -482,7 +453,7 @@ def test_split_content_html(self, mock_translate): object_id=project.pk, field_name="description", ) - update_auto_translated_field(field) + field.update_translation() mock_translate.assert_has_calls( [ call( diff --git a/services/translator/tests/views/test_terms_and_conditions_translated_fields.py b/services/translator/tests/views/test_terms_and_conditions_translated_fields.py new file mode 100644 index 00000000..2d3f4a02 --- /dev/null +++ b/services/translator/tests/views/test_terms_and_conditions_translated_fields.py @@ -0,0 +1,70 @@ +from unittest.mock import call, patch + +from django.conf import settings +from django.contrib.contenttypes.models import ContentType +from django.urls import reverse +from faker import Faker +from rest_framework import status + +from apps.accounts.factories import UserFactory +from apps.accounts.utils import get_superadmins_group +from apps.organizations.factories import OrganizationFactory +from apps.organizations.models import TermsAndConditions +from services.translator.models import AutoTranslatedField +from services.translator.testcases import MockTranslateTestCase + +faker = Faker() + + +class TermsAndConditionsTranslatedFieldsTestCase(MockTranslateTestCase): + @classmethod + def setUpTestData(cls) -> None: + super().setUpTestData() + cls.organization = OrganizationFactory(auto_translate_content=True) + cls.superadmin = UserFactory(groups=[get_superadmins_group()]) + cls.content_type = ContentType.objects.get_for_model(TermsAndConditions) + + @patch("azure.ai.translation.text.TextTranslationClient.translate") + def test_update_terms_and_conditions(self, mock_translate): + mock_translate.side_effect = self.translator_side_effect + self.client.force_authenticate(self.superadmin) + terms_and_conditions = self.organization.terms_and_conditions + payload = {"content": f"

{faker.text()}

"} + response = self.client.patch( + reverse( + "TermsAndConditions-detail", + args=(self.organization.code, terms_and_conditions.id), + ), + payload, + ) + self.assertEqual(response.status_code, status.HTTP_200_OK) + auto_translated_fields = AutoTranslatedField.objects.filter( + content_type=self.content_type, object_id=terms_and_conditions.id + ) + self.assertEqual( + auto_translated_fields.count(), + len(TermsAndConditions._auto_translated_fields), + ) + self.assertSetEqual( + {field.field_name for field in auto_translated_fields}, + set(TermsAndConditions._auto_translated_fields), + ) + for field in auto_translated_fields: + self.assertTrue(field.up_to_date) + terms_and_conditions.refresh_from_db() + mock_translate.assert_has_calls( + [ + call( + body=[ + getattr( + terms_and_conditions, + field.split(":", 1)[1] if ":" in field else field, + ) + ], + to_language=({str(lang) for lang in settings.REQUIRED_LANGUAGES}), + text_type=(field.split(":", 1)[0] if ":" in field else "plain"), + ) + for field in TermsAndConditions.auto_translated_fields + ], + any_order=True, + ) diff --git a/services/translator/utils.py b/services/translator/utils.py deleted file mode 100644 index 843f439d..00000000 --- a/services/translator/utils.py +++ /dev/null @@ -1,132 +0,0 @@ -import re - -from bs4 import BeautifulSoup -from django.conf import settings - -from apps.commons.mixins import OrganizationRelated - -from .interface import AzureTranslatorService -from .models import AutoTranslatedField - -AZURE_MAX_LENGTH = 50000 - - -def split_content( - content: str, max_length: int, text_type: str = "plain" -) -> list[str] | list[BeautifulSoup]: - """ - Split content into chunks of max_length, trying to split at html tags. - - Maximum length for Azure Translator is 50 000 characters per request accross all - languages. - - For example, sending a translate request of 3 000 characters to translate to three - different languages results in a request size of 3 000 x 3 = 9 000 characters - - This function splits the content into chunks either at the end of a html tag or - at the last space before max_length. - """ - if text_type == "html": - soup = BeautifulSoup(content, "html.parser") - return soup.find_all(recursive=False) - - if len(content) <= max_length: - return [content] - chunks = [] - start = 0 - while start < len(content): - end = start + max_length - if end >= len(content): - chunks.append(content[start:]) - break - split_at = content.rfind(" ", start, end) - if split_at == -1 or split_at <= start: - split_at = end - chunks.append(content[start:split_at]) - start = split_at - return chunks - - -def update_auto_translated_field(field: AutoTranslatedField): - instance = field.instance - field_name = field.field_name - content = getattr(instance, field_name, "") - if not isinstance(instance, OrganizationRelated): - raise ValueError( - f"{instance._meta.model.__name__} does not support translations. " - "`OrganizationRelated` mixin is required for automatic translations." - ) - if getattr(instance, "auto_translate_all_languages", False): - languages = ( - settings.REQUIRED_LANGUAGES - if any( - o.auto_translate_content for o in instance.get_related_organizations() - ) - else {} - ) - else: - organizations = [ - o for o in instance.get_related_organizations() if o.auto_translate_content - ] - # iter over languages in set (remove duplicate language) - languages: set[str] = {lang for org in organizations for lang in org.languages} - if languages: - base_max_length = AZURE_MAX_LENGTH * 0.8 # Safety margin - max_length = int(base_max_length // len(languages)) - if content: - chunks = split_content(content, max_length, text_type=field.field_type) - translations = {} - detected_languages = [] - for chunk in chunks: - if ( - field.field_type == "html" - and (not str(chunk).strip() or not chunk.get_text(strip=True)) - ) or (re.findall(r'data:image\/[a-zA-Z]+;base64,[^"\']+', content)): - chunk_translations = [ - {"to": lang, "text": str(chunk)} for lang in languages - ] - else: - chunk = str(chunk) - if len(chunk) <= max_length: - chunk_translations, detected_language = ( - AzureTranslatorService.translate_text_content( - str(chunk), languages, field.field_type - ) - ) - detected_languages.append(detected_language) - elif len(chunk) < base_max_length: - for lang in languages: - lang_chunk_translation, detected_language = ( - AzureTranslatorService.translate_text_content( - str(chunk), [lang], field.field_type - ) - ) - chunk_translations.append( - { - "to": lang, - "text": lang_chunk_translation[0]["text"], - } - ) - detected_languages.append(detected_language) - else: - chunk_translations = [ - {"to": lang, "text": str(chunk)} for lang in languages - ] - translations = { - f"{field_name}_{translation['to']}": ( - translations.get(f"{field_name}_{translation['to']}", "") - + translation["text"] - ) - for translation in chunk_translations - } - # Use the most common detected language among chunks - if detected_languages: - detected_language = max( - set(detected_languages), key=detected_languages.count - ) - translations[f"{field_name}_detected_language"] = detected_language - else: - translations = {f"{field_name}_{lang}": content for lang in languages} - instance._meta.model.objects.filter(pk=instance.pk).update(**translations) - field.up_to_date = True - field.save()