OpenSourcePolitics · hellpe · May 23, 2025
diff --git a/data_utils/form_summary/forms_summary.py b/data_utils/form_summary/forms_summary.py
@@ -10,6 +10,30 @@
 )
 from ..utils import MTB
 
+def clean_and_prepare_dataframe(df, lang):
+    df.columns = (
+        df.columns.str.lower()
+        .str.replace(' ', '_')
+    )
+    if lang == 'fr':
+        df['has_custom_body'] = df['corps_personnalisé'].notna()
+        df = df[
+            ['titre_de_la_question', 'type_de_question', 'position', 'has_custom_body']
+        ].drop_duplicates()
+    elif lang == 'en':
+        df['has_custom_body'] = df['custom_body'].notna()
+        df = df[
+            ['question_title', 'question_type', 'position', 'has_custom_body']
+        ].drop_duplicates()
+    else:
+        raise NotImplementedError(
+            "Provided language is not implemented yet"
+        )
+    df = df.\
+        sort_values(by=['position', 'has_custom_body'], ascending=False).\
+        drop_duplicates(subset='position', keep='first').\
+        sort_values(by='position')
+    return df.values.tolist()
 
 class FormsSummary:
     def __init__(self, credentials):
@@ -32,7 +56,6 @@ def __init__(self, credentials):
             name=credentials['FORM_NAME'],
             collection_id=self.collection_id
         )
-        self.get_questions_parameters()
 
     def get_database_id(self):
         self.database_id = MTB.get_item_info(
@@ -75,30 +98,15 @@ def get_questions_parameters(self):
         import pandas as pd
         res = MTB.get_card_data(card_id=self.form_model_id)
         df = pd.DataFrame(res)
-        df.columns = (
-            df.columns.str.lower()
-            .str.replace(' ', '_')
-        )
-        if self.language == 'fr':
-            df = df[
-                ['titre_de_la_question', 'type_de_question', 'position']
-            ].drop_duplicates()
-        elif self.language == 'en':
-            df = df[
-                ['question_title', 'question_type', 'position']
-            ].drop_duplicates()
-        else:
-            raise NotImplementedError(
-                "Provided language is not implemented yet"
-            )
 
-        self.questions_parameters = df.values.tolist()
+        return clean_and_prepare_dataframe(df, self.language)
 
     def create_question_summary(self):
         chart_list = []
-        for question in self.questions_parameters:
-            question_title, question_type, position = question
-            chart = None
+        questions_parameters = self.get_questions_parameters()
+        for question in questions_parameters:
+            question_title, question_type, position, has_custom_body = question
+            charts = []
             chart_filter = Filter('=', 'position', position)
             question_name = f"{position}. {question_title}"
             if question_type in ["short_answer", "long_answer"]:
@@ -108,7 +116,22 @@ def create_question_summary(self):
                 chart.set_fields(
                     Fields([{'name': 'answer', 'type': 'type/Text'}])
                 )
-            elif question_type in ["single_option"]:
+                charts.append(chart)
+            if question_type in ["single_option", "multiple_option"] and has_custom_body:
+                custom_body_question_name = question_name + " (champ libre)"
+                chart = TableChart(custom_body_question_name, self)
+                chart.set_filters(chart_filter)
+                chart.set_filters(Filter('!=', 'answer', 'Pas de réponse'))
+                # TODO: find how to create a "not empty" filter (https://discourse.metabase.com/t/api-how-to-create-a-not-empty-filter/234624)
+                #chart.set_filters(Filter('not-empty', 'custom_body', ''))
+                chart.set_fields(
+                    Fields([
+                        {'name': 'answer', 'type': 'type/Text'},
+                        {'name': 'custom_body', 'type': 'type/Text'}
+                        ])
+                )
+                charts.append(chart)
+            if question_type in ["single_option"]:
                 chart = PieChart(question_name, self)
                 chart.set_filters(chart_filter)
                 chart.set_aggregation(
@@ -117,7 +140,8 @@ def create_question_summary(self):
                         Fields([{'name': 'answer', 'type': 'type/Text'}])
                     )
                 )
-            elif question_type in ["multiple_option"]:
+                charts.append(chart)
+            if question_type in ["multiple_option"]:
                 chart = BarChart(question_name, self)
                 chart.set_filters(chart_filter)
                 chart.set_aggregation(
@@ -136,7 +160,8 @@ def create_question_summary(self):
                         }
                     }]
                 )
-            elif question_type in ["matrix_single", "matrix_multiple"]:
+                charts.append(chart)
+            if question_type in ["matrix_single", "matrix_multiple"]:
                 chart = BarChart(question_name, self)
                 chart.set_filters(chart_filter)
                 chart.set_aggregation(
@@ -168,7 +193,8 @@ def create_question_summary(self):
                         }
                     }]
                 )
-            elif question_type in ["files"]:
+                charts.append(chart)
+            if question_type in ["files"]:
                 chart = TableChart(question_name, self)
                 chart.set_filters(chart_filter)
                 chart.set_fields(
@@ -179,7 +205,8 @@ def create_question_summary(self):
                         ]
                     )
                 )
-            elif question_type in ["sorting"]:
+                charts.append(chart)
+            if question_type in ["sorting"]:
                 chart = HorizontalBarChart(question_name, self)
                 chart.set_filters(chart_filter)
                 chart.set_aggregation(
@@ -206,6 +233,8 @@ def create_question_summary(self):
                         }
                     }]
                 )
-            created_chart = chart.create_chart()
-            chart_list.append([chart, created_chart])
+                charts.append(chart)
+            for chart in charts:
+                created_chart = chart.create_chart()
+                chart_list.append([chart, created_chart])
         return chart_list
diff --git a/data_utils/form_summary/test_form_summary.py b/data_utils/form_summary/test_form_summary.py
@@ -0,0 +1,47 @@
+from .forms_summary import clean_and_prepare_dataframe
+from ..utils import MTB
+import unittest
+import pandas as pd
+import numpy as np
+
+class TestDataframeProcessing(unittest.TestCase):
+
+    def setUp(self):
+        self.language = 'en'
+        self.df_single_option = pd.DataFrame({
+            'session_token': ['420000', '530000', '420000', '530000'],
+            'question_type': ['single_option', 'single_option', 'single_option', 'single_option'],
+            'question_title': ['Souhaitez-vous venir ?', 'Souhaitez-vous venir ?', 'Avez-vous bu ?', 'Avez-vous bu ?'],
+            'answer': ['Je souhaite venir car :', 'Je ne souhaite pas venir car :', 'Oui', 'Non'],
+            'position': ['1', '1', '2', '2'],
+            'custom_body': [np.nan, 'Je ne suis pas disponible', np.nan, np.nan]
+        })
+
+        self.df_multiple_option = pd.DataFrame({
+            'session_token': ['420000', '420000', '530000', '530000'],
+            'question_type': ['multiple_option', 'multiple_option', 'multiple_option', 'multiple_option'],
+            'question_title': ['Couleurs préférées ?', 'Couleurs préférées ?', 'Couleurs préférées ?', 'Couleurs préférées ?'],
+            'answer': ['Vert', 'Bleu', 'Vert', 'Bleu'],
+            'position': ['3', '3', '3', '3'],
+            'custom_body': [np.nan, 'ça me rappelle la mer', np.nan, np.nan]
+        })
+
+    def test_has_custom_body(self):
+        """Test that the questions to which someone has answered with a custom body are detected."""
+        data = clean_and_prepare_dataframe(self.df_single_option, self.language)
+        expected_data = [
+            ['Souhaitez-vous venir ?', 'single_option', '1', True],
+            ['Avez-vous bu ?', 'single_option', '2', False]
+        ]  
+        self.assertEqual(data, expected_data)
+
+    def test_has_custom_body_multiple_option(self):
+        """Test that the questions to which someone has answered with a custom body are detected."""
+        data = clean_and_prepare_dataframe(self.df_multiple_option, self.language)
+        expected_data = [
+            ['Couleurs préférées ?', 'multiple_option', '3', True]
+        ]  
+        self.assertEqual(data, expected_data)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/pyproject.toml b/pyproject.toml
@@ -18,6 +18,7 @@ psycopg2 = "^2.9.6"
 openpyxl = "^3.1.2"
 citric = "^0.7.0"
 python-dotenv = "^1.0.0"
+pytest = "^8.3.5"
 
 [tool.poetry.scripts]
 # card_changer