diff --git a/.gitignore b/.gitignore
index b7faf40..97df104 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,8 @@
+# Personal
+Data/
+Tests/
+tester_2.ipynb
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[codz]
diff --git a/cleaning_enrollments_data.py b/cleaning_enrollments_data.py
new file mode 100644
index 0000000..870303d
--- /dev/null
+++ b/cleaning_enrollments_data.py
@@ -0,0 +1,85 @@
+import pandas as pd
+import numpy as np
+
+class EnrollmentsCleaning:
+    def __init__(self, raw_data):
+        self.raw_data = raw_data
+    
+    def Drop_columns(self, df):
+        COLUMNS_TO_DROP = ['Full Name']
+        result = df.drop(columns=COLUMNS_TO_DROP)
+        return result
+    
+    def Fix_nan_values(self, df):
+        # Fix NaN values
+        NAN_VALUE_SUBSTITUTE = 'NA'
+        columns_to_fix = {
+            'Projected Start Date': NAN_VALUE_SUBSTITUTE, 'Actual Start Date': NAN_VALUE_SUBSTITUTE, 'Projected End Date': NAN_VALUE_SUBSTITUTE,
+            'Actual End Date': NAN_VALUE_SUBSTITUTE, 'Outcome': NAN_VALUE_SUBSTITUTE
+        }
+        # 'ATP Cohort' NA will handle in a separed function
+        for column, substitute_value in columns_to_fix.items():
+            df[column] = df[column].fillna(substitute_value)
+        
+        return df
+    
+    def Rename_values(self, df):
+        # Fix change name Data Analitics 2 to Data Analysis 2 for consistency
+        df.loc[df['Service'] == 'Data Analytics 2', 'Service'] = 'Data Analysis 2'
+        return df
+    
+    def Delete_values(self, df):
+        # Delete values not needed
+        # 'Referral to External Service', 'Supportive Services Referral', are deleted because dont have a "Projected Start Date" 
+        values_not_needed = {
+            'Service': ['Software Development 1', 'Software Development 2', 'Web Development 1', 'Web Development 2', 'Data Analysis 1','Data Analysis 2', 'Referral to External Service', 'Supportive Services Referral']
+        }
+        for column, value in values_not_needed.items():
+            df = df[~df[column].isin(value)]
+        return df
+        
+    def Set_data_types(self, df):
+        # DataTypes
+        column_datatype: dict = {'Auto Id': str, 'KY Region': str, 'Assessment ID': str, 'EnrollmentId': str,
+        'Enrollment Service Name': str, 'Service': str, 'Projected Start Date': str,
+        'Actual Start Date': str, 'Projected End Date': str, 'Actual End Date': str, 'Outcome': str,
+        'ATP Cohort': 'datetime64[ns]'}
+        # TODO: 'Projected Start Date', 'Actual Start Date', 'Projected End Date', 'Actual End Date' are all datetime types but have a value fix of NA
+        
+        for column, type in column_datatype.items():
+            df[column] = df[column].astype(type)
+        return df
+    
+    def Find_cohort(self, id: str, projected_start_date: str, cohort_to_find: str, df_to_clean: pd.DataFrame):
+        ## Q: What to do with Service: ['Referral to External Service', 'Supportive Services Referral']
+        ## TODO: Clean the NaTType before this function runs
+        if pd.isna(cohort_to_find):
+            student_df = df_to_clean[df_to_clean['Auto Id'] == id]
+            # remove ATP Cohort NA values, it can be more than one
+            student_df: pd.DataFrame = student_df[~student_df['ATP Cohort'].isna()]
+            cohorts_participaded = student_df['ATP Cohort'].astype('datetime64[ns]').unique()
+            
+            # print(cohorts_participaded)
+            if len(cohorts_participaded) == 1:
+                return cohorts_participaded[0]
+            else:
+                # cohorts_participaded.append(pd.to_datetime(projected_start_date))
+                stimated_module_date = np.datetime64(projected_start_date)
+                cohorts_participaded = np.append(cohorts_participaded, stimated_module_date)
+                cohorts_participaded.sort()
+                previus_date = cohorts_participaded[0]
+                for cohort in cohorts_participaded:
+                    if stimated_module_date == cohort:
+                        return previus_date
+        else:
+            return np.datetime64(cohort_to_find)
+
+    def Get_clean_data(self):
+        df = self.raw_data
+        df = self.Drop_columns(df)
+        df = self.Fix_nan_values(df)
+        df = self.Rename_values(df)
+        df = self.Delete_values(df)
+        df = self.Set_data_types(df)
+        df['ATP Cohort'] = df.apply(lambda row: self.Find_cohort(row['Auto Id'], row['Projected Start Date'], row['ATP Cohort'], df), axis=1)
+        return df
\ No newline at end of file
diff --git a/completion_rate_data.py b/completion_rate_data.py
new file mode 100644
index 0000000..48597d9
--- /dev/null
+++ b/completion_rate_data.py
@@ -0,0 +1,52 @@
+import pandas as pd
+
+class Completion_rate_data:
+    def __init__(self, data):
+        self.data = data
+        self.__pathways = [
+            'Web Development M1',
+            'Web Development M2',
+            'Web Development M3',
+            'Web Development M4',
+            'Data Analysis M1', 
+            'Data Analysis M2',
+            'Data Analysis M3',
+            'Data Analysis M4', 
+            'Software Development M1',
+            'Software Development M2',
+            'Software Development M3',
+            'Software Development M4',
+            'Quality Assurance M1', 
+            'Quality Assurance M2',
+            'Quality Assurance M3', 
+            'Quality Assurance M4', 
+            'User Experience M1', 
+            'User Experience M2',
+            'User Experience M3', 
+            'User Experience M4',
+        ]
+
+        # Not the best Pandas way to do it:
+    def Get_completion_percentages(self, cohort: str = 'All cohorts') -> pd.DataFrame:
+        
+
+        if cohort == 'All cohorts':
+            data = self.data
+        else:
+            data = self.data[self.data['ATP Cohort'] == pd.Timestamp(cohort)]
+
+        completion_dictionary = {}
+
+        for path in self.__pathways:
+            outcome = data[data['Service'] == path]['Outcome'].value_counts(normalize=True).reset_index()
+            completion_dictionary[path] = {row.Outcome: row.proportion for row in outcome.itertuples(index=True)}
+        
+        result_df = pd.DataFrame(completion_dictionary).transpose().fillna(0).rename_axis('Module').reset_index()
+
+        result_df['Pathway'] = result_df['Module'].apply(lambda x: x[:x.rfind(' ')]) # intended to be able to sort by pathway
+        return result_df
+    # TODO: Add test
+
+    def Get_pathways_name(self, df: pd.DataFrame) -> list:
+        return list(df['Pathway'].unique())
+
diff --git a/most_common_pathways_taken_data.py b/most_common_pathways_taken_data.py
new file mode 100644
index 0000000..845c59f
--- /dev/null
+++ b/most_common_pathways_taken_data.py
@@ -0,0 +1,41 @@
+import pandas as pd
+
+class Most_common_pathways_taken_data:
+    def __init__(self, data):
+        self.data = data
+        self.__starter_pathways = [
+            'Web Development M1',
+            'Data Analysis M1', 
+            'Software Development M1',
+            'Quality Assurance M1', 
+            'User Experience M1',
+        ]
+        self.starter_only_df = self.Get_starting_pathways()
+
+    def Get_starting_pathways(self): 
+        """
+            Returns a pandas.DataFrame were all the services are the biginning paths
+
+            Args: 
+                df: pandas.DataFrame
+
+            Return:
+                pandas.DataFrame
+        """
+        mask_starter_pathways = self.data['Service'].isin(self.__starter_pathways)
+        return self.data[mask_starter_pathways]
+
+    def Get_cohorts_list(self):
+        df = self.starter_only_df
+        cohorts = list(pd.to_datetime(df['ATP Cohort'][df['ATP Cohort'] != 'NA']).sort_values(ascending=True).astype(str).unique())
+        cohorts.insert(0, 'All cohorts')
+        return cohorts
+
+    def Get_data_by_cohort(self, cohort: str = 'All cohorts') -> pd.DataFrame:
+        df = self.starter_only_df
+        if cohort == 'All cohorts':
+            result = df.value_counts('Service').reset_index()
+        else:
+            result = df[df['ATP Cohort'] == str(pd.to_datetime(cohort))].value_counts('Service').reset_index()
+        
+        return result
\ No newline at end of file
diff --git a/visualization_examples.ipynb b/visualization_examples.ipynb
new file mode 100644
index 0000000..8fa5ef8
--- /dev/null
+++ b/visualization_examples.ipynb
@@ -0,0 +1,246 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "b0c6df40",
+   "metadata": {},
+   "source": [
+    "# Visualization examples\n",
+    "\n",
+    "Visualizion was not turn into a class because the project will use Google Locker for dashboard creation, this notebook only works to showcase how to use the Data Manipulation classes."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fc151064",
+   "metadata": {},
+   "source": [
+    "## Imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "47cd23cd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import plotly.express as px\n",
+    "import plotly.graph_objects as go\n",
+    "from dash import Dash, dcc, html, Input, Output\n",
+    "from most_common_pathways_taken_data import Most_common_pathways_taken_data\n",
+    "from compleation_rate_data import Compleation_rate_data\n",
+    "from cleaning_enrollments_data import EnrollmentsCleaning"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cc61af47",
+   "metadata": {},
+   "source": [
+    "## Cleaning data\n",
+    "\n",
+    "This step should be done before the use of any of the Data classes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ba57e157",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cleaner = EnrollmentsCleaning(pd.read_excel('Data\\\\Raw\\\\ARC Enrollments.xlsx'))\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4225b677",
+   "metadata": {},
+   "source": [
+    "## Most common pathway taken:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "fa1b6e02",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "        <iframe\n",
+       "            width=\"100%\"\n",
+       "            height=\"650\"\n",
+       "            src=\"http://127.0.0.1:8052/\"\n",
+       "            frameborder=\"0\"\n",
+       "            allowfullscreen\n",
+       "            \n",
+       "        ></iframe>\n",
+       "        "
+      ],
+      "text/plain": [
+       "<IPython.lib.display.IFrame at 0x15cfa21ba70>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "def Dash_most_selected_path_by_cohort() -> Dash: # Need to pass the dataframe argument because of how the Data is structure\n",
+    "    app = Dash(__name__)\n",
+    "    # Const\n",
+    "    data_class = Most_common_pathways_taken_data(cleaner.Get_clean_data())\n",
+    "    starter_only_enrollments = data_class.Get_starting_pathways() # This function should be able to comunicate with the data without argument\n",
+    "\n",
+    "    dropdown_options = data_class.Get_cohorts_list()\n",
+    "    pathway_color = {\n",
+    "        'Web Development M1': 'blue',\n",
+    "        'Data Analysis M1': 'red', \n",
+    "        'Software Development M1': 'green',\n",
+    "        'Quality Assurance M1': 'yellow', \n",
+    "        'User Experience M1': 'purple'\n",
+    "    }\n",
+    "\n",
+    "    # Display\n",
+    "    app.layout = html.Div([\n",
+    "        html.H2('Cohorts', style={'text-align': \"center\"}),\n",
+    "        html.P('Select Cohort:'),\n",
+    "        dcc.Dropdown(\n",
+    "            id=\"dropdown\",\n",
+    "            options=dropdown_options,\n",
+    "            value=dropdown_options[0],\n",
+    "            clearable=False,\n",
+    "        ),\n",
+    "        dcc.Graph(id=\"graph\")\n",
+    "        \n",
+    "    ], style={'backgroundColor':'white'})\n",
+    "\n",
+    "    @app.callback(\n",
+    "        Output(\"graph\", \"figure\"),\n",
+    "        Input(\"dropdown\", \"value\"))\n",
+    "\n",
+    "    # Graph\n",
+    "    def tt(time):\n",
+    "        df = data_class.Get_data_by_cohort(time)\n",
+    "        fig = px.pie(df, names='Service', values='count', color='Service', color_discrete_map=pathway_color)\n",
+    "        return fig\n",
+    "\n",
+    "    return app\n",
+    "\n",
+    "    # TODO: Add number of students per each cohort \n",
+    "    # TODO: Fix the options on the selection \n",
+    "    # TODO: make colors better\n",
+    "\n",
+    "Dash_most_selected_path_by_cohort().run(debug=True, port=8052)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6b5b514e",
+   "metadata": {},
+   "source": [
+    "## Compleation rates:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "c0b7d44e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "        <iframe\n",
+       "            width=\"100%\"\n",
+       "            height=\"650\"\n",
+       "            src=\"http://127.0.0.1:8053/\"\n",
+       "            frameborder=\"0\"\n",
+       "            allowfullscreen\n",
+       "            \n",
+       "        ></iframe>\n",
+       "        "
+      ],
+      "text/plain": [
+       "<IPython.lib.display.IFrame at 0x15cf9e13c50>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "def Dash_completion_rates_by_path() -> Dash: # TODO: fix data structure so visualization doesn't use df\n",
+    "    app2 = Dash(__name__)\n",
+    "    # Const\n",
+    "    data_class = Compleation_rate_data(cleaner.Get_clean_data())\n",
+    "    completion_df = data_class.Get_completion_percentages()\n",
+    "    options = data_class.Get_pathways_name(completion_df)\n",
+    "\n",
+    "    pathway_color = {\n",
+    "        'Software Development': 'green', \n",
+    "        'Web Development': 'blue', \n",
+    "        'Data Analysis': 'red',\n",
+    "        'Quality Assurance': 'yellow', \n",
+    "        'User Experience': 'purple'\n",
+    "    }\n",
+    "\n",
+    "    # Display\n",
+    "    app2.layout = html.Div([\n",
+    "        html.H2('Pathways Completion', style={'text-align': \"center\"}),\n",
+    "        html.P('Select pathway:'),\n",
+    "        dcc.Dropdown(\n",
+    "            id=\"dropdown\",\n",
+    "            options=options,\n",
+    "            value=options[0],\n",
+    "            clearable=False,\n",
+    "        ),\n",
+    "        dcc.Graph(id=\"graph\")\n",
+    "        \n",
+    "    ], style={'backgroundColor':'white'})\n",
+    "\n",
+    "    @app2.callback(\n",
+    "        Output(\"graph\", \"figure\"),\n",
+    "        Input(\"dropdown\", \"value\"))\n",
+    "\n",
+    "    # Graph\n",
+    "    # TODO: Need to add an extra selection box with the cohorts\n",
+    "    def Display_pathway_completion(p):\n",
+    "        df = completion_df[completion_df['Pathway'] == p]\n",
+    "        fig = px.bar(df, x='Module', y='Successfully Completed')\n",
+    "        return fig\n",
+    "\n",
+    "    return app2\n",
+    "\n",
+    "Dash_completion_rates_by_path().run(debug=True, port=8053)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}