+
# Multiomix
@@ -15,6 +15,7 @@ This document is focused on the **development** of the system. If you are lookin
- Node JS >= `20.x` (tested version: `20.x`)
- [Modulector][modulector] `2.2.0`
- [BioAPI][bioapi] `1.2.1`
+- R `4.4.2` (required for `differential-expression`)
## Installation
@@ -66,6 +67,7 @@ Every time you want to work with Multiomix, you need to follow the below steps:
1. `python3 -m celery -A multiomics_intermediate worker -l info -Q stats`
1. `python3 -m celery -A multiomics_intermediate worker -l info -Q inference`
1. `python3 -m celery -A multiomics_intermediate worker -l info -Q sync_datasets`
+ 1. `python3 -m celery -A multiomics_intermediate worker -l info -Q differential_expression`
1. If you want to check Task in the GUI you can run [Flower](https://flower.readthedocs.io/en/latest/index.html) `python3 -m celery -A multiomics_intermediate flower`
**NOTE:** maybe in Windows is needed to add `--pool=solo` to the previous commands. Example: `python3 -m celery -A multiomics_intermediate worker -l info -Q correlation_analysis --concurrency 1 --pool=solo`
diff --git a/config/requirements.txt b/config/requirements.txt
index 253e7564..1279b8de 100644
--- a/config/requirements.txt
+++ b/config/requirements.txt
@@ -28,3 +28,5 @@ scipy==1.13.0
statsmodels==0.14.2
xlrd==2.0.1
openpyxl==3.1.5
+rpy2==3.6.1
+urllib3==2.5.0
\ No newline at end of file
diff --git a/config/requirements_celery.txt b/config/requirements_celery.txt
index 859c0823..5c6b30bc 100644
--- a/config/requirements_celery.txt
+++ b/config/requirements_celery.txt
@@ -25,3 +25,5 @@ scipy==1.13.0
statsmodels==0.14.2
xlrd==2.0.1
openpyxl==3.1.5
+rpy2==3.6.1
+urllib3==2.5.0
\ No newline at end of file
diff --git a/docker-compose_dist.yml b/docker-compose_dist.yml
index ca1ac3db..b8aa0abe 100644
--- a/docker-compose_dist.yml
+++ b/docker-compose_dist.yml
@@ -233,6 +233,21 @@ services:
# REDIS_HOST: 'redis'
# REDIS_PORT: 6379
+ # Celery worker for differential expression
+ differential-expression-worker:
+ image: omicsdatascience/multiomix:5.6.0-celery
+ restart: 'always'
+ depends_on:
+ - db
+ - mongo
+ volumes:
+ - media_data:/src/media
+ environment:
+ <<: *common-variables
+ QUEUE_NAME: 'differential_expression' # This MUST NOT be changed
+ CONCURRENCY: 2
+ # PostgreSQL, Mongo y Redis usan los valores por defecto del resto de servicios
+
# Django backend service
multiomix:
image: omicsdatascience/multiomix:5.6.0
@@ -290,6 +305,7 @@ services:
- stats-worker
- inference-worker
- sync-datasets-worker
+ - differential-expression-worker
volumes:
mongo_data:
diff --git a/src/api_service/migrations/0062_alter_experiment_clinical_source_and_more.py b/src/api_service/migrations/0062_alter_experiment_clinical_source_and_more.py
new file mode 100644
index 00000000..c86db344
--- /dev/null
+++ b/src/api_service/migrations/0062_alter_experiment_clinical_source_and_more.py
@@ -0,0 +1,142 @@
+# Generated by Django 4.2.19 on 2026-01-14 21:38
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ (
+ "datasets_synchronization",
+ "0036_alter_cgdsstudy_clinical_patient_dataset_and_more",
+ ),
+ ("genes", "0002_auto_20210114_2331"),
+ ("user_files", "0015_alter_userfile_options"),
+ ("api_service", "0061_alter_experiment_shared_users"),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name="experiment",
+ name="clinical_source",
+ field=models.ForeignKey(
+ blank=True,
+ null=True,
+ on_delete=django.db.models.deletion.SET_NULL,
+ related_name="experiments_as_clinical_source",
+ to="api_service.experimentclinicalsource",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="experiment",
+ name="gem_source",
+ field=models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="experiments_as_gem_source",
+ to="api_service.experimentsource",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="experiment",
+ name="mRNA_source",
+ field=models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="experiments_as_mrna_source",
+ to="api_service.experimentsource",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="experimentclinicalsource",
+ name="extra_cgds_dataset",
+ field=models.ForeignKey(
+ blank=True,
+ null=True,
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="experiment_clinical_sources_as_extra_cgds_dataset",
+ to="datasets_synchronization.cgdsdataset",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="experimentsource",
+ name="cgds_dataset",
+ field=models.ForeignKey(
+ blank=True,
+ null=True,
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="experiment_sources_as_cgds_dataset",
+ to="datasets_synchronization.cgdsdataset",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="experimentsource",
+ name="user_file",
+ field=models.ForeignKey(
+ blank=True,
+ null=True,
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="experiment_sources_as_user_file",
+ to="user_files.userfile",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="genecnacombination",
+ name="experiment",
+ field=models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="%(class)ss",
+ to="api_service.experiment",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="genecnacombination",
+ name="gene",
+ field=models.ForeignKey(
+ db_column="gene",
+ db_constraint=False,
+ on_delete=django.db.models.deletion.DO_NOTHING,
+ related_name="%(class)ss_as_gene",
+ to="genes.gene",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="genemethylationcombination",
+ name="experiment",
+ field=models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="%(class)ss",
+ to="api_service.experiment",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="genemethylationcombination",
+ name="gene",
+ field=models.ForeignKey(
+ db_column="gene",
+ db_constraint=False,
+ on_delete=django.db.models.deletion.DO_NOTHING,
+ related_name="%(class)ss_as_gene",
+ to="genes.gene",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="genemirnacombination",
+ name="experiment",
+ field=models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="%(class)ss",
+ to="api_service.experiment",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="genemirnacombination",
+ name="gene",
+ field=models.ForeignKey(
+ db_column="gene",
+ db_constraint=False,
+ on_delete=django.db.models.deletion.DO_NOTHING,
+ related_name="%(class)ss_as_gene",
+ to="genes.gene",
+ ),
+ ),
+ ]
diff --git a/src/api_service/models.py b/src/api_service/models.py
index e18ec18e..a6227a9b 100644
--- a/src/api_service/models.py
+++ b/src/api_service/models.py
@@ -335,7 +335,9 @@ def number_of_rows(self) -> int:
"""
if self.user_file:
return self.user_file.number_of_rows
- return self.__get_cgds_datasets_joined_df().shape[0]
+ if self.cgds_dataset:
+ return self.__get_cgds_datasets_joined_df().shape[0]
+ return 0
@property
def number_of_samples(self) -> int:
@@ -345,7 +347,9 @@ def number_of_samples(self) -> int:
"""
if self.user_file:
return self.user_file.number_of_samples
- return self.__get_cgds_datasets_joined_df().shape[1]
+ if self.cgds_dataset:
+ return self.__get_cgds_datasets_joined_df().shape[1]
+ return 0
class Experiment(models.Model):
@@ -396,10 +400,8 @@ class Experiment(models.Model):
# TODO: this can be stored in the Methylation type entity. Set the corresponding nullity in the new schema
correlate_with_all_genes: bool = models.BooleanField(blank=False, null=False, default=True)
- shared_institutions = models.ManyToManyField(Institution, blank=True,
- related_name='shared_correlation_analysis')
- shared_users = models.ManyToManyField(User, blank=True,
- related_name='shared_users_correlation_analysis')
+ shared_institutions = models.ManyToManyField(Institution, blank=True, related_name='shared_correlation_analysis')
+ shared_users = models.ManyToManyField(User, blank=True, related_name='shared_users_correlation_analysis')
is_public = models.BooleanField(blank=False, null=False, default=False)
@property
@@ -417,7 +419,7 @@ def get_combination_class(self):
"""
return get_combination_class(self.type)
- def get_clinical_columns(self):
+ def get_clinical_columns(self) -> list[str]:
"""
Gets a list of columns from the clinical data
@return: List of fields in clinical data
diff --git a/src/api_service/utils.py b/src/api_service/utils.py
index 511cc9fa..ff242e33 100644
--- a/src/api_service/utils.py
+++ b/src/api_service/utils.py
@@ -131,5 +131,7 @@ def get_cgds_dataset(cgds_study: CGDSStudy, file_type: FileType) -> Optional[CGD
return cgds_study.cna_dataset
elif file_type == FileType.METHYLATION:
return cgds_study.methylation_dataset
+ elif file_type == FileType.CLINICAL:
+ return cgds_study.clinical_patient_dataset
else:
return None
diff --git a/src/api_service/websocket_functions.py b/src/api_service/websocket_functions.py
index 237e34b9..9fc51255 100644
--- a/src/api_service/websocket_functions.py
+++ b/src/api_service/websocket_functions.py
@@ -40,7 +40,7 @@ def send_update_cgds_studies_command():
def send_update_biomarkers_command(user_id: int):
"""
- Sends a message indicating that an Biomarker's state update has occurred
+ Sends a message indicating that a Biomarker's state update has occurred
"""
user_group_name = f'notifications_{user_id}'
message = {
@@ -51,7 +51,7 @@ def send_update_biomarkers_command(user_id: int):
def send_update_user_file_command(user_id: int):
"""
- Sends a message indicating that an user file's state update has occurred
+ Sends a message indicating that a user file's state update has occurred
"""
user_group_name = f'notifications_{user_id}'
message = {
@@ -84,6 +84,18 @@ def send_update_trained_models_command(user_id: int):
send_message(user_group_name, message)
+def send_update_differential_expression_experiments_command(user_id: int):
+ """
+ Sends a message indicating that a DifferentialExpressionExperiment state update has occurred
+ @param user_id: DifferentialExpressionExperiment's user's id to send the WS message
+ """
+ user_group_name = f'notifications_{user_id}'
+ message = {
+ 'command': 'update_differential_expression_experiments'
+ }
+ send_message(user_group_name, message)
+
+
def send_update_prediction_experiment_command(user_id: int):
"""
Sends a message indicating that a InferenceExperiment state update has occurred
@@ -107,9 +119,10 @@ def send_update_cluster_label_set_command(user_id: int):
}
send_message(user_group_name, message)
+
def send_update_institutions_command(user_id: int):
"""
- Sends a message indicating that a Institution state update has occurred
+ Sends a message indicating that an Institution state update has occurred
@param user_id: Institution's user's id to send the WS message
"""
user_group_name = f'notifications_{user_id}'
@@ -118,13 +131,14 @@ def send_update_institutions_command(user_id: int):
}
send_message(user_group_name, message)
+
def send_update_user_for_institution_command(user_id: int):
"""
- Sends a message indicating that a Institution_user state update has occurred
+ Sends a message indicating that an Institution_user state update has occurred
@param user_id: Institution's user's id to send the WS message
"""
user_group_name = f'notifications_{user_id}'
message = {
'command': 'update_user_for_institution'
}
- send_message(user_group_name, message)
\ No newline at end of file
+ send_message(user_group_name, message)
diff --git a/src/datasets_synchronization/admin.py b/src/datasets_synchronization/admin.py
index 670b8c4c..63341976 100644
--- a/src/datasets_synchronization/admin.py
+++ b/src/datasets_synchronization/admin.py
@@ -42,9 +42,9 @@ def delete_queryset(self, request, queryset):
'mirna_dataset__name', 'mrna_dataset__name')
-
class SurvivalColumnsTupleAdmin(admin.ModelAdmin):
"""Useful for SurvivalColumnsTupleCGDSDataset and SurvivalColumnsTupleUserFile models."""
+
@staticmethod
@admin.display(description='CGDS Dataset')
def dataset(obj: Union[SurvivalColumnsTupleCGDSDataset, SurvivalColumnsTupleUserFile]) -> str:
@@ -53,6 +53,7 @@ def dataset(obj: Union[SurvivalColumnsTupleCGDSDataset, SurvivalColumnsTupleUser
list_display = ('pk', 'dataset', 'time_column', 'event_column')
search_fields = ('time_column', 'event_column')
+
# IMPORTANT: these models should be managed in the CGDS Panel in the frontend!
admin.site.register(CGDSStudy, CGDSStudyAdmin)
admin.site.register(CGDSDataset, CGDSDatasetAdmin)
diff --git a/src/datasets_synchronization/migrations/0036_alter_cgdsstudy_clinical_patient_dataset_and_more.py b/src/datasets_synchronization/migrations/0036_alter_cgdsstudy_clinical_patient_dataset_and_more.py
new file mode 100644
index 00000000..189242c2
--- /dev/null
+++ b/src/datasets_synchronization/migrations/0036_alter_cgdsstudy_clinical_patient_dataset_and_more.py
@@ -0,0 +1,80 @@
+# Generated by Django 4.2.19 on 2026-01-14 21:38
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ("datasets_synchronization", "0035_auto_20230922_2356"),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name="cgdsstudy",
+ name="clinical_patient_dataset",
+ field=models.OneToOneField(
+ blank=True,
+ null=True,
+ on_delete=django.db.models.deletion.SET_NULL,
+ related_name="cgds_studies_as_clinical_patient_dataset",
+ to="datasets_synchronization.cgdsdataset",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="cgdsstudy",
+ name="clinical_sample_dataset",
+ field=models.OneToOneField(
+ blank=True,
+ null=True,
+ on_delete=django.db.models.deletion.SET_NULL,
+ related_name="cgds_studies_as_clinical_sample_dataset",
+ to="datasets_synchronization.cgdsdataset",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="cgdsstudy",
+ name="cna_dataset",
+ field=models.OneToOneField(
+ blank=True,
+ null=True,
+ on_delete=django.db.models.deletion.SET_NULL,
+ related_name="cgds_studies_as_cna_dataset",
+ to="datasets_synchronization.cgdsdataset",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="cgdsstudy",
+ name="methylation_dataset",
+ field=models.OneToOneField(
+ blank=True,
+ null=True,
+ on_delete=django.db.models.deletion.SET_NULL,
+ related_name="cgds_studies_as_methylation_dataset",
+ to="datasets_synchronization.cgdsdataset",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="cgdsstudy",
+ name="mirna_dataset",
+ field=models.OneToOneField(
+ blank=True,
+ null=True,
+ on_delete=django.db.models.deletion.SET_NULL,
+ related_name="cgds_studies_as_mirna_dataset",
+ to="datasets_synchronization.cgdsdataset",
+ ),
+ ),
+ migrations.AlterField(
+ model_name="cgdsstudy",
+ name="mrna_dataset",
+ field=models.OneToOneField(
+ blank=True,
+ null=True,
+ on_delete=django.db.models.deletion.SET_NULL,
+ related_name="cgds_studies_as_mrna_dataset",
+ to="datasets_synchronization.cgdsdataset",
+ ),
+ ),
+ ]
diff --git a/src/datasets_synchronization/models.py b/src/datasets_synchronization/models.py
index 41482be8..2ed80a33 100644
--- a/src/datasets_synchronization/models.py
+++ b/src/datasets_synchronization/models.py
@@ -104,6 +104,7 @@ def __get_reverse_study(self) -> Optional['CGDSDataset']:
return cast(Optional['CGDSDataset'], self.clinical_patient_dataset)
elif hasattr(self, 'clinical_sample_dataset'):
return cast(Optional['CGDSDataset'], self.clinical_sample_dataset)
+ return None
@property
def study(self) -> Optional['CGDSDataset']:
@@ -111,7 +112,7 @@ def study(self) -> Optional['CGDSDataset']:
def __str__(self) -> str:
study_name = self.study.name if self.study else '-'
- return f'File: {self.file_path} | Col: {self.mongo_collection_name} | Assigned to study: {study_name}'
+ return f'PK: {self.pk} | File: {self.file_path} | Col: {self.mongo_collection_name} | Assigned to study: {study_name}'
def __compute_number_of_row_and_samples_and_save(self) -> None:
"""
diff --git a/src/datasets_synchronization/serializers.py b/src/datasets_synchronization/serializers.py
index 26bd47fe..337e716f 100644
--- a/src/datasets_synchronization/serializers.py
+++ b/src/datasets_synchronization/serializers.py
@@ -28,7 +28,7 @@ class Meta:
fields = ['id', 'time_column', 'event_column']
def get_fields(self, *args, **kwargs):
- fields = super(SurvivalColumnsTupleCGDSSimpleSerializer, self).get_fields(*args, **kwargs)
+ fields = super(SurvivalColumnsTupleCGDSSimpleSerializer, self).get_fields()
request = self.context.get('request', None)
if request and getattr(request, 'method', None) == "POST":
fields['id'].required = False
@@ -123,9 +123,9 @@ def __check_collection_name(mongo_collection_name: str, editing_cgds_dataset_id:
})
def __update_cgds_dataset(
- self,
- cgds_dataset_instance: CGDSDataset,
- validated_data_pop
+ self,
+ cgds_dataset_instance: CGDSDataset,
+ validated_data_pop
) -> Optional[CGDSDataset]:
"""
Updates a CGDSDataset instance from a request data
@@ -186,10 +186,9 @@ def __update_survival_columns(cgds_dataset_instance: CGDSDataset, validated_data
# If there's an existing id, updates the element
if 'id' in survival_column:
try:
- survival_column_obj: SurvivalColumnsTupleCGDSDataset = SurvivalColumnsTupleCGDSDataset. \
- objects.get(
- pk=survival_column['id']
- )
+ survival_column_obj: SurvivalColumnsTupleCGDSDataset = SurvivalColumnsTupleCGDSDataset.objects.get(
+ pk=survival_column['id']
+ )
survival_column_obj.time_column = survival_column['time_column']
survival_column_obj.event_column = survival_column['event_column']
survival_column_obj.save()
@@ -309,21 +308,17 @@ def update(self, instance: CGDSStudy, validated_data):
class SimpleCGDSDatasetSerializer(serializers.ModelSerializer):
+ """CGDSDataset serializer with few fields for list views."""
+ name = serializers.CharField(source='study.name', read_only=True)
+ description = serializers.CharField(source='study.description', read_only=True)
+ version = serializers.CharField(source='study.version', read_only=True)
+ file_obj = serializers.SerializerMethodField(method_name='get_file_obj')
+
class Meta:
model = CGDSDataset
- fields = []
-
- def to_representation(self, instance):
- # Gets the file content for user_file
- data = super(SimpleCGDSDatasetSerializer, self).to_representation(instance)
-
- # Serialize the study
- study = instance.study
- data['name'] = study.name
- data['description'] = study.description
- data['version'] = study.version
- data['date_last_synchronization'] = instance.date_last_synchronization
- data['file_type'] = instance.file_type
- data['file_obj'] = None
-
- return data
+ fields = ['id', 'name', 'description', 'version', 'date_last_synchronization', 'file_type', 'file_obj']
+
+ @staticmethod
+ def get_file_obj(_instance: CGDSDataset):
+ """Returns None to avoid sending the file in list views."""
+ return None
diff --git a/src/datasets_synchronization/urls.py b/src/datasets_synchronization/urls.py
index 595a6b54..e5f71a5d 100644
--- a/src/datasets_synchronization/urls.py
+++ b/src/datasets_synchronization/urls.py
@@ -7,6 +7,9 @@
# CGDS Studies
path('studies', views.CGDSStudyList.as_view(), name='cgds_studies'),
path('studies/