From ddf500790d99dea3a35b9ebf6d4e4025d9a27664 Mon Sep 17 00:00:00 2001 From: Euclides Date: Wed, 9 Jul 2025 14:45:12 -0400 Subject: [PATCH 1/9] First enrollments cleaning --- Cleaning_enrollments_data.ipynb | 49 +++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 Cleaning_enrollments_data.ipynb diff --git a/Cleaning_enrollments_data.ipynb b/Cleaning_enrollments_data.ipynb new file mode 100644 index 0000000..0e71a9f --- /dev/null +++ b/Cleaning_enrollments_data.ipynb @@ -0,0 +1,49 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "f1f8e04d", + "metadata": {}, + "source": [ + "# ARC Application - Data cleanning" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f782ef33", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import plotly.express as px" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a32e170c", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "enrollments = pd.read_excel('..\\\\Data\\\\Raw\\\\ARC Enrollments.xlsx')\n", + "enrollments\n" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From e46d478a6065fa1f53b0917101f86dd843669c28 Mon Sep 17 00:00:00 2001 From: Euclides Date: Thu, 10 Jul 2025 02:14:35 -0400 Subject: [PATCH 2/9] renormalizing file --- Cleaning_enrollments_data.ipynb | 3366 ++++++++++++++++++++++++++++++- 1 file changed, 3352 insertions(+), 14 deletions(-) diff --git a/Cleaning_enrollments_data.ipynb b/Cleaning_enrollments_data.ipynb index 0e71a9f..aac9c8c 100644 --- a/Cleaning_enrollments_data.ipynb +++ b/Cleaning_enrollments_data.ipynb @@ -8,40 +8,3378 @@ "# ARC Application - Data cleanning" ] }, + { + "cell_type": "markdown", + "id": "86a96267", + "metadata": {}, + "source": [ + "All data cleaning will be in the function 'Enrollments cleaning'\n", + "\n", + "Notes:\n", + "- 'KY Region' field all the values are 'SOAR'\n", + "\n", + "Questions:\n", + "- Why are they 'Actual Start Date' fields as NA when there is a Actual End Date" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 91, "id": "f782ef33", - "metadata": { - "vscode": { - "languageId": "plaintext" - } - }, + "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import plotly.express as px" ] }, + { + "cell_type": "markdown", + "id": "0b28f740", + "metadata": {}, + "source": [ + "# Result:" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "id": "2efeac68", + "metadata": {}, + "outputs": [], + "source": [ + "def Enrollments_cleaning(df: pd.DataFrame) -> pd.DataFrame:\n", + " # Columns to clean\n", + " COLUMNS_TO_DROP = ['Full Name']\n", + " enrollments = df.drop(columns=COLUMNS_TO_DROP)\n", + "\n", + " # Fix NaN values\n", + " NAN_VALUE_SUBSTITUTE = 'NA'\n", + " columns_to_fix = {\n", + " 'Projected Start Date': NAN_VALUE_SUBSTITUTE, 'Actual Start Date': NAN_VALUE_SUBSTITUTE, 'Projected End Date': NAN_VALUE_SUBSTITUTE,\n", + " 'Actual End Date': NAN_VALUE_SUBSTITUTE, 'Outcome': NAN_VALUE_SUBSTITUTE, 'ATP Cohort': NAN_VALUE_SUBSTITUTE\n", + " }\n", + "\n", + " for column, na_value in columns_to_fix.items():\n", + " enrollments[column] = enrollments[column].fillna(na_value)\n", + "\n", + " return enrollments" + ] + }, + { + "cell_type": "markdown", + "id": "10146c9e", + "metadata": {}, + "source": [ + "# Exploratory Data Analysis" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 93, "id": "a32e170c", - "metadata": { - "vscode": { - "languageId": "plaintext" + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Auto Id", + "rawType": "object", + "type": "string" + }, + { + "name": "KY Region", + "rawType": "object", + "type": "string" + }, + { + "name": "Assessment ID", + "rawType": "object", + "type": "string" + }, + { + "name": "EnrollmentId", + "rawType": "object", + "type": "string" + }, + { + "name": "Enrollment Service Name", + "rawType": "object", + "type": "string" + }, + { + "name": "Service", + "rawType": "object", + "type": "string" + }, + { + "name": "Projected Start Date", + "rawType": "object", + "type": "unknown" + }, + { + "name": "Actual Start Date", + "rawType": "object", + "type": "unknown" + }, + { + "name": "Projected End Date", + "rawType": "object", + "type": "unknown" + }, + { + "name": "Actual End Date", + "rawType": "object", + "type": "unknown" + }, + { + "name": "Outcome", + "rawType": "object", + "type": "string" + }, + { + "name": "ATP Cohort", + "rawType": "object", + "type": "unknown" + } + ], + "ref": "8188b1e0-d8ce-49d1-b22e-b47f52f9b422", + "rows": [ + [ + "0", + "202109-5224", + "SOAR", + "OA-003348", + "Enrollment-1386", + "ES-0011193", + "Career Readiness Workshop", + "2021-11-11 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA" + ], + [ + "1", + "202109-5224", + "SOAR", + "OA-003348", + "Enrollment-1386", + "ES-0013492", + "Software Development 1", + "2022-01-05 00:00:00", + "2022-01-05 00:00:00", + "2022-04-06 00:00:00", + "2022-04-06 00:00:00", + "Successfully Completed", + "2022-01-01 00:00:00" + ], + [ + "2", + "202109-5224", + "SOAR", + "OA-003348", + "Enrollment-1386", + "ES-0014187", + "Career Readiness Workshop", + "2022-03-07 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA" + ], + [ + "3", + "202109-5224", + "SOAR", + "OA-003348", + "Enrollment-1386", + "ES-0015022", + "Software Development 2", + "2022-05-04 00:00:00", + "2022-05-04 00:00:00", + "2022-07-29 00:00:00", + "2022-07-29 00:00:00", + "Successfully Completed", + "2022-05-01 00:00:00" + ], + [ + "4", + "202109-5224", + "SOAR", + "OA-003348", + "Enrollment-1386", + "ES-0015075", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-12-14 00:00:00", + "Successfully Completed", + "2021-09-01 00:00:00" + ], + [ + "5", + "202109-5230", + "SOAR", + "OA-003352", + "Enrollment-1389", + "ES-0015087", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-09-28 00:00:00", + "Did Not Complete", + "2021-09-01 00:00:00" + ], + [ + "6", + "202109-5233", + "SOAR", + "OA-003355", + "Enrollment-1392", + "ES-0011185", + "Career Readiness Workshop", + "2021-11-11 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA" + ], + [ + "7", + "202109-5233", + "SOAR", + "OA-003355", + "Enrollment-1392", + "ES-0013525", + "Web Development 2", + "2022-01-05 00:00:00", + "2022-01-05 00:00:00", + "2022-04-06 00:00:00", + "2022-04-06 00:00:00", + "Successfully Completed", + "2022-01-01 00:00:00" + ], + [ + "8", + "202109-5233", + "SOAR", + "OA-003355", + "Enrollment-1392", + "ES-0015081", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-12-14 00:00:00", + "Successfully Completed", + "2021-09-01 00:00:00" + ], + [ + "9", + "202109-5236", + "SOAR", + "OA-003358", + "Enrollment-1395", + "ES-0015092", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-12-01 00:00:00", + "Did Not Complete", + "2021-09-01 00:00:00" + ], + [ + "10", + "202109-5237", + "SOAR", + "OA-003359", + "Enrollment-1396", + "ES-0011212", + "One-on-one Job Readiness", + "2021-11-23 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA" + ], + [ + "11", + "202109-5237", + "SOAR", + "OA-003359", + "Enrollment-1396", + "ES-0015086", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-12-01 00:00:00", + "Did Not Complete", + "2021-09-01 00:00:00" + ], + [ + "12", + "202109-5237", + "SOAR", + "OA-003359", + "Enrollment-1396", + "ES-0017175", + "Data Analysis 1", + "2023-01-04 00:00:00", + "NA", + "NA", + "2023-03-31 00:00:00", + "Successfully Completed", + "2023-01-01 00:00:00" + ], + [ + "13", + "202109-5237", + "SOAR", + "OA-003359", + "Enrollment-1396", + "ES-0017537", + "Career Readiness Workshop", + "2023-02-16 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA" + ], + [ + "14", + "202109-5237", + "SOAR", + "OA-003359", + "Enrollment-1396", + "ES-0018722", + "Data Analytics 2", + "2023-05-09 00:00:00", + "NA", + "NA", + "2023-08-07 00:00:00", + "Successfully Completed", + "2023-05-01 00:00:00" + ], + [ + "15", + "202109-5237", + "SOAR", + "OA-003359", + "Enrollment-1396", + "ES-0023302", + "One-on-one Job Readiness", + "2024-06-12 00:00:00", + "2024-06-12 00:00:00", + "2024-06-12 00:00:00", + "2024-06-12 00:00:00", + "NA", + "NA" + ], + [ + "16", + "202109-5237", + "SOAR", + "OA-003359", + "Enrollment-1396", + "ES-0023396", + "One-on-one Job Readiness", + "2024-05-23 00:00:00", + "NA", + "2024-05-23 00:00:00", + "NA", + "NA", + "NA" + ], + [ + "17", + "202109-5238", + "SOAR", + "OA-003360", + "Enrollment-1398", + "ES-0015085", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-09 00:00:00", + "2021-12-09 00:00:00", + "Did Not Complete", + "2021-09-01 00:00:00" + ], + [ + "18", + "202109-5239", + "SOAR", + "OA-003363", + "Enrollment-1401", + "ES-0017159", + "Web Development 1", + "2021-09-08 00:00:00", + "NA", + "NA", + "2021-09-23 00:00:00", + "Did Not Complete", + "2021-09-01 00:00:00" + ], + [ + "19", + "202109-5243", + "SOAR", + "OA-003366", + "Enrollment-1404", + "ES-0011221", + "One-on-one Job Readiness", + "2021-12-03 00:00:00", + "2021-12-03 00:00:00", + "NA", + "NA", + "NA", + "NA" + ], + [ + "20", + "202109-5243", + "SOAR", + "OA-003366", + "Enrollment-1404", + "ES-0013503", + "Software Development 1", + "2022-01-05 00:00:00", + "2022-01-05 00:00:00", + "2022-04-06 00:00:00", + "2022-04-06 00:00:00", + "Successfully Completed", + "2022-01-01 00:00:00" + ], + [ + "21", + "202109-5243", + "SOAR", + "OA-003366", + "Enrollment-1404", + "ES-0014300", + "One-on-one Job Readiness", + "2022-03-29 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA" + ], + [ + "22", + "202109-5243", + "SOAR", + "OA-003366", + "Enrollment-1404", + "ES-0015052", + "Software Development 2", + "2022-05-04 00:00:00", + "2022-05-04 00:00:00", + "2022-07-29 00:00:00", + "2022-07-29 00:00:00", + "Successfully Completed", + "2022-05-01 00:00:00" + ], + [ + "23", + "202109-5243", + "SOAR", + "OA-003366", + "Enrollment-1404", + "ES-0015076", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-12-14 00:00:00", + "Successfully Completed", + "2021-09-01 00:00:00" + ], + [ + "24", + "202109-5243", + "SOAR", + "OA-003366", + "Enrollment-1404", + "ES-0017237", + "Data Analysis 1", + "2022-08-24 00:00:00", + "NA", + "NA", + "2022-11-18 00:00:00", + "Successfully Completed", + "2022-09-01 00:00:00" + ], + [ + "25", + "202109-5243", + "SOAR", + "OA-003366", + "Enrollment-1404", + "ES-0017238", + "Data Analytics 2", + "2023-01-04 00:00:00", + "NA", + "NA", + "2023-03-31 00:00:00", + "Successfully Completed", + "2023-01-01 00:00:00" + ], + [ + "26", + "202109-5243", + "SOAR", + "OA-003366", + "Enrollment-1404", + "ES-0018733", + "Web Development 2", + "2023-05-11 00:00:00", + "NA", + "NA", + "2023-08-07 00:00:00", + "Successfully Completed", + "2023-05-01 00:00:00" + ], + [ + "27", + "202109-5245", + "SOAR", + "OA-003367", + "Enrollment-1405", + "ES-0011174", + "Career Readiness Workshop", + "2021-11-08 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA" + ], + [ + "28", + "202109-5245", + "SOAR", + "OA-003367", + "Enrollment-1405", + "ES-0013507", + "Software Development 1", + "2022-01-05 00:00:00", + "2022-01-05 00:00:00", + "2022-05-06 00:00:00", + "2022-05-06 00:00:00", + "Successfully Completed", + "2022-01-01 00:00:00" + ], + [ + "29", + "202109-5245", + "SOAR", + "OA-003367", + "Enrollment-1405", + "ES-0014268", + "One-on-one Job Readiness", + "2022-03-22 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA" + ], + [ + "30", + "202109-5245", + "SOAR", + "OA-003367", + "Enrollment-1405", + "ES-0015065", + "Web Development 2", + "2022-05-04 00:00:00", + "2022-05-04 00:00:00", + "2022-07-29 00:00:00", + "2022-07-29 00:00:00", + "Did Not Complete", + "2022-05-01 00:00:00" + ], + [ + "31", + "202109-5245", + "SOAR", + "OA-003367", + "Enrollment-1405", + "ES-0015080", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-12-14 00:00:00", + "Successfully Completed", + "2021-09-01 00:00:00" + ], + [ + "32", + "202109-5245", + "SOAR", + "OA-003367", + "Enrollment-1405", + "ES-0023385", + "One-on-one Job Readiness", + "2024-01-29 00:00:00", + "NA", + "2024-01-29 00:00:00", + "NA", + "NA", + "NA" + ], + [ + "33", + "202109-5272", + "SOAR", + "OA-003395", + "Enrollment-1434", + "ES-0011175", + "Career Readiness Workshop", + "2021-11-08 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA" + ], + [ + "34", + "202109-5272", + "SOAR", + "OA-003395", + "Enrollment-1434", + "ES-0013526", + "Web Development 2", + "2022-01-05 00:00:00", + "2022-01-05 00:00:00", + "2022-04-06 00:00:00", + "2022-04-06 00:00:00", + "Successfully Completed", + "2022-01-01 00:00:00" + ], + [ + "35", + "202109-5272", + "SOAR", + "OA-003395", + "Enrollment-1434", + "ES-0015078", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-12-14 00:00:00", + "Successfully Completed", + "2021-09-01 00:00:00" + ], + [ + "36", + "202109-5272", + "SOAR", + "OA-003395", + "Enrollment-1434", + "ES-0017252", + "Software Development 1", + "2022-08-24 00:00:00", + "NA", + "NA", + "2022-10-03 00:00:00", + "Did Not Complete", + "2022-09-01 00:00:00" + ], + [ + "37", + "202109-5273", + "SOAR", + "OA-003396", + "Enrollment-1435", + "ES-0015089", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-10-27 00:00:00", + "Did Not Complete", + "2021-09-01 00:00:00" + ], + [ + "38", + "202109-5274", + "SOAR", + "OA-003397", + "Enrollment-1608", + "ES-0011204", + "Career Readiness Workshop", + "2021-11-18 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA" + ], + [ + "39", + "202109-5274", + "SOAR", + "OA-003397", + "Enrollment-1608", + "ES-0013483", + "Data Analysis 1", + "2022-01-05 00:00:00", + "2022-01-05 00:00:00", + "2022-04-06 00:00:00", + "2022-04-06 00:00:00", + "Successfully Completed", + "2022-01-01 00:00:00" + ], + [ + "40", + "202109-5274", + "SOAR", + "OA-003397", + "Enrollment-1608", + "ES-0014299", + "One-on-one Job Readiness", + "2022-03-29 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA" + ], + [ + "41", + "202109-5274", + "SOAR", + "OA-003397", + "Enrollment-1608", + "ES-0015047", + "Data Analytics 2", + "2022-05-04 00:00:00", + "2022-05-04 00:00:00", + "2022-07-29 00:00:00", + "2022-06-08 00:00:00", + "Did Not Complete", + "2022-05-01 00:00:00" + ], + [ + "42", + "202109-5274", + "SOAR", + "OA-003397", + "Enrollment-1608", + "ES-0015084", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-12-14 00:00:00", + "Successfully Completed", + "2021-09-01 00:00:00" + ], + [ + "43", + "202109-5275", + "SOAR", + "OA-003398", + "Enrollment-1609", + "ES-0015088", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-10-11 00:00:00", + "Did Not Complete", + "2021-09-01 00:00:00" + ], + [ + "44", + "202109-5276", + "SOAR", + "OA-003399", + "Enrollment-1611", + "ES-0015091", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-10-28 00:00:00", + "Did Not Complete", + "2021-09-01 00:00:00" + ], + [ + "45", + "202109-5277", + "SOAR", + "OA-003400", + "Enrollment-4425", + "ES-0011213", + "One-on-one Job Readiness", + "2021-11-23 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA" + ], + [ + "46", + "202109-5277", + "SOAR", + "OA-003400", + "Enrollment-4425", + "ES-0013995", + "Software Development 1", + "2022-01-05 00:00:00", + "2022-01-05 00:00:00", + "2022-04-06 00:00:00", + "2022-07-29 00:00:00", + "Successfully Completed", + "2022-01-01 00:00:00" + ], + [ + "47", + "202109-5277", + "SOAR", + "OA-003400", + "Enrollment-4425", + "ES-0014191", + "Career Readiness Workshop", + "2022-03-07 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA" + ], + [ + "48", + "202109-5277", + "SOAR", + "OA-003400", + "Enrollment-4425", + "ES-0015050", + "Software Development 2", + "2022-05-04 00:00:00", + "2022-05-04 00:00:00", + "2022-07-29 00:00:00", + "2022-07-29 00:00:00", + "Successfully Completed", + "2022-05-01 00:00:00" + ], + [ + "49", + "202109-5277", + "SOAR", + "OA-003400", + "Enrollment-4425", + "ES-0015074", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-12-14 00:00:00", + "Successfully Completed", + "2021-09-01 00:00:00" + ] + ], + "shape": { + "columns": 12, + "rows": 2033 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Auto IdKY RegionAssessment IDEnrollmentIdEnrollment Service NameServiceProjected Start DateActual Start DateProjected End DateActual End DateOutcomeATP Cohort
0202109-5224SOAROA-003348Enrollment-1386ES-0011193Career Readiness Workshop2021-11-11 00:00:00NANANANANA
1202109-5224SOAROA-003348Enrollment-1386ES-0013492Software Development 12022-01-05 00:00:002022-01-05 00:00:002022-04-06 00:00:002022-04-06 00:00:00Successfully Completed2022-01-01 00:00:00
2202109-5224SOAROA-003348Enrollment-1386ES-0014187Career Readiness Workshop2022-03-07 00:00:00NANANANANA
3202109-5224SOAROA-003348Enrollment-1386ES-0015022Software Development 22022-05-04 00:00:002022-05-04 00:00:002022-07-29 00:00:002022-07-29 00:00:00Successfully Completed2022-05-01 00:00:00
4202109-5224SOAROA-003348Enrollment-1386ES-0015075Web Development 12021-09-08 00:00:002021-09-08 00:00:002021-12-14 00:00:002021-12-14 00:00:00Successfully Completed2021-09-01 00:00:00
.......................................
2028202504-21723SOAROA-022760Enrollment-14196ES-0035149Intro to Programming Core2025-05-12 00:00:002025-05-12 00:00:002025-06-27 00:00:00NANA2025-05-01 00:00:00
2029202505-22788SOAROA-023710Enrollment-14213ES-0035212Intro to Programming Core2025-05-14 00:00:002025-05-13 00:00:002025-06-17 00:00:00NANA2025-05-01 00:00:00
2030202408-16568SOAROA-017961Enrollment-14833ES-0036429Intro to Programming Core2025-05-12 00:00:00NANANADid Not Complete2025-05-01 00:00:00
2031202408-16568SOAROA-017961Enrollment-14833ES-0036430Supportive Services ReferralNANANANANANA
2032202503-21420SOAROA-022426Enrollment-15195ES-0038953Intro to Programming Core2025-05-12 00:00:00NA2025-06-27 00:00:00NANA2025-05-01 00:00:00
\n", + "

2033 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " Auto Id KY Region Assessment ID EnrollmentId \\\n", + "0 202109-5224 SOAR OA-003348 Enrollment-1386 \n", + "1 202109-5224 SOAR OA-003348 Enrollment-1386 \n", + "2 202109-5224 SOAR OA-003348 Enrollment-1386 \n", + "3 202109-5224 SOAR OA-003348 Enrollment-1386 \n", + "4 202109-5224 SOAR OA-003348 Enrollment-1386 \n", + "... ... ... ... ... \n", + "2028 202504-21723 SOAR OA-022760 Enrollment-14196 \n", + "2029 202505-22788 SOAR OA-023710 Enrollment-14213 \n", + "2030 202408-16568 SOAR OA-017961 Enrollment-14833 \n", + "2031 202408-16568 SOAR OA-017961 Enrollment-14833 \n", + "2032 202503-21420 SOAR OA-022426 Enrollment-15195 \n", + "\n", + " Enrollment Service Name Service \\\n", + "0 ES-0011193 Career Readiness Workshop \n", + "1 ES-0013492 Software Development 1 \n", + "2 ES-0014187 Career Readiness Workshop \n", + "3 ES-0015022 Software Development 2 \n", + "4 ES-0015075 Web Development 1 \n", + "... ... ... \n", + "2028 ES-0035149 Intro to Programming Core \n", + "2029 ES-0035212 Intro to Programming Core \n", + "2030 ES-0036429 Intro to Programming Core \n", + "2031 ES-0036430 Supportive Services Referral \n", + "2032 ES-0038953 Intro to Programming Core \n", + "\n", + " Projected Start Date Actual Start Date Projected End Date \\\n", + "0 2021-11-11 00:00:00 NA NA \n", + "1 2022-01-05 00:00:00 2022-01-05 00:00:00 2022-04-06 00:00:00 \n", + "2 2022-03-07 00:00:00 NA NA \n", + "3 2022-05-04 00:00:00 2022-05-04 00:00:00 2022-07-29 00:00:00 \n", + "4 2021-09-08 00:00:00 2021-09-08 00:00:00 2021-12-14 00:00:00 \n", + "... ... ... ... \n", + "2028 2025-05-12 00:00:00 2025-05-12 00:00:00 2025-06-27 00:00:00 \n", + "2029 2025-05-14 00:00:00 2025-05-13 00:00:00 2025-06-17 00:00:00 \n", + "2030 2025-05-12 00:00:00 NA NA \n", + "2031 NA NA NA \n", + "2032 2025-05-12 00:00:00 NA 2025-06-27 00:00:00 \n", + "\n", + " Actual End Date Outcome ATP Cohort \n", + "0 NA NA NA \n", + "1 2022-04-06 00:00:00 Successfully Completed 2022-01-01 00:00:00 \n", + "2 NA NA NA \n", + "3 2022-07-29 00:00:00 Successfully Completed 2022-05-01 00:00:00 \n", + "4 2021-12-14 00:00:00 Successfully Completed 2021-09-01 00:00:00 \n", + "... ... ... ... \n", + "2028 NA NA 2025-05-01 00:00:00 \n", + "2029 NA NA 2025-05-01 00:00:00 \n", + "2030 NA Did Not Complete 2025-05-01 00:00:00 \n", + "2031 NA NA NA \n", + "2032 NA NA 2025-05-01 00:00:00 \n", + "\n", + "[2033 rows x 12 columns]" + ] + }, + "execution_count": 93, + "metadata": {}, + "output_type": "execute_result" } - }, - "outputs": [], + ], "source": [ - "enrollments = pd.read_excel('..\\\\Data\\\\Raw\\\\ARC Enrollments.xlsx')\n", + "enrollments = pd.read_excel('Data\\\\Raw\\\\ARC Enrollments.xlsx')\n", + "enrollments = Enrollments_cleaning(enrollments)\n", "enrollments\n" ] + }, + { + "cell_type": "markdown", + "id": "a6805ca6", + "metadata": {}, + "source": [ + "### Understanding the categories of data\n" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "id": "423d5090", + "metadata": {}, + "outputs": [], + "source": [ + "pathways = [\n", + " 'Software Development 1', \n", + " 'Software Development 2', \n", + " 'Web Development 1', \n", + " 'Web Development 2', \n", + " 'Data Analysis 1',\n", + " 'Data Analytics 2',\n", + " 'Web Development M1',\n", + " 'Web Development M2',\n", + " 'Web Development M3',\n", + " 'Web Development M4',\n", + " 'Data Analysis M1', \n", + " 'Data Analysis M2',\n", + " 'Data Analysis M3',\n", + " 'Data Analysis M4', \n", + " 'Software Development M1',\n", + " 'Software Development M2',\n", + " 'Software Development M3',\n", + " 'Software Development M4',\n", + " 'Quality Assurance M1', \n", + " 'Quality Assurance M2',\n", + " 'Quality Assurance M3', \n", + " 'Quality Assurance M4', \n", + " 'User Experience M1', \n", + " 'User Experience M2',\n", + " 'User Experience M3', \n", + " 'User Experience M4',\n", + "]\n", + "\n", + "workshops = [\n", + " 'JavaScript - React',\n", + " 'AWS',\n", + " 'Salesforce',\n", + " 'UofL Cyber Security Certificate',\n", + " 'Intro to Programming Core',\n", + " 'Artificial Intelligence M1',\n", + " 'Technical Project Management',\n", + "]\n", + "\n", + "support_ways = [\n", + " 'Career Readiness Workshop',\n", + " 'One-on-one Job Readiness',\n", + " 'Interview Guidance and Practice',\n", + " 'Remote Jobs Workshop (EDA Grant)',\n", + " 'Employer Event (Code:You)',\n", + " 'Loaner Laptop',\n", + " 'Demo Day Participant',\n", + " 'Tech Communications Workshop',\n", + " 'Remote Jobs Workshop (non EDA)',\n", + " 'Referral to External Service',\n", + " 'Supportive Services Referral', \n", + " 'Resume Review and Optimization',\n", + " 'Revised Resume Review'\n", + "]\n", + "\n", + "# This category is only used to find the path choose by students:\n", + "Starter_pathways = [\n", + " 'Software Development 1', \n", + " 'Web Development 1', \n", + " 'Data Analysis 1',\n", + " 'Web Development M1',\n", + " 'Data Analysis M1', \n", + " 'Software Development M1',\n", + " 'Quality Assurance M1', \n", + " 'User Experience M1',\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "id": "61a579c5", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Auto Id", + "rawType": "object", + "type": "string" + }, + { + "name": "KY Region", + "rawType": "object", + "type": "string" + }, + { + "name": "Assessment ID", + "rawType": "object", + "type": "string" + }, + { + "name": "EnrollmentId", + "rawType": "object", + "type": "string" + }, + { + "name": "Enrollment Service Name", + "rawType": "object", + "type": "string" + }, + { + "name": "Service", + "rawType": "object", + "type": "string" + }, + { + "name": "Projected Start Date", + "rawType": "object", + "type": "unknown" + }, + { + "name": "Actual Start Date", + "rawType": "object", + "type": "unknown" + }, + { + "name": "Projected End Date", + "rawType": "object", + "type": "unknown" + }, + { + "name": "Actual End Date", + "rawType": "object", + "type": "unknown" + }, + { + "name": "Outcome", + "rawType": "object", + "type": "string" + }, + { + "name": "ATP Cohort", + "rawType": "object", + "type": "unknown" + }, + { + "name": "Category", + "rawType": "object", + "type": "string" + } + ], + "ref": "e5e63e5c-563c-4854-8ac3-209fb212f89f", + "rows": [ + [ + "0", + "202109-5224", + "SOAR", + "OA-003348", + "Enrollment-1386", + "ES-0011193", + "Career Readiness Workshop", + "2021-11-11 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA", + "Student Support" + ], + [ + "1", + "202109-5224", + "SOAR", + "OA-003348", + "Enrollment-1386", + "ES-0013492", + "Software Development 1", + "2022-01-05 00:00:00", + "2022-01-05 00:00:00", + "2022-04-06 00:00:00", + "2022-04-06 00:00:00", + "Successfully Completed", + "2022-01-01 00:00:00", + "Pathway" + ], + [ + "2", + "202109-5224", + "SOAR", + "OA-003348", + "Enrollment-1386", + "ES-0014187", + "Career Readiness Workshop", + "2022-03-07 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA", + "Student Support" + ], + [ + "3", + "202109-5224", + "SOAR", + "OA-003348", + "Enrollment-1386", + "ES-0015022", + "Software Development 2", + "2022-05-04 00:00:00", + "2022-05-04 00:00:00", + "2022-07-29 00:00:00", + "2022-07-29 00:00:00", + "Successfully Completed", + "2022-05-01 00:00:00", + "Pathway" + ], + [ + "4", + "202109-5224", + "SOAR", + "OA-003348", + "Enrollment-1386", + "ES-0015075", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-12-14 00:00:00", + "Successfully Completed", + "2021-09-01 00:00:00", + "Pathway" + ], + [ + "5", + "202109-5230", + "SOAR", + "OA-003352", + "Enrollment-1389", + "ES-0015087", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-09-28 00:00:00", + "Did Not Complete", + "2021-09-01 00:00:00", + "Pathway" + ], + [ + "6", + "202109-5233", + "SOAR", + "OA-003355", + "Enrollment-1392", + "ES-0011185", + "Career Readiness Workshop", + "2021-11-11 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA", + "Student Support" + ], + [ + "7", + "202109-5233", + "SOAR", + "OA-003355", + "Enrollment-1392", + "ES-0013525", + "Web Development 2", + "2022-01-05 00:00:00", + "2022-01-05 00:00:00", + "2022-04-06 00:00:00", + "2022-04-06 00:00:00", + "Successfully Completed", + "2022-01-01 00:00:00", + "Pathway" + ], + [ + "8", + "202109-5233", + "SOAR", + "OA-003355", + "Enrollment-1392", + "ES-0015081", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-12-14 00:00:00", + "Successfully Completed", + "2021-09-01 00:00:00", + "Pathway" + ], + [ + "9", + "202109-5236", + "SOAR", + "OA-003358", + "Enrollment-1395", + "ES-0015092", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-12-01 00:00:00", + "Did Not Complete", + "2021-09-01 00:00:00", + "Pathway" + ], + [ + "10", + "202109-5237", + "SOAR", + "OA-003359", + "Enrollment-1396", + "ES-0011212", + "One-on-one Job Readiness", + "2021-11-23 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA", + "Student Support" + ], + [ + "11", + "202109-5237", + "SOAR", + "OA-003359", + "Enrollment-1396", + "ES-0015086", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-12-01 00:00:00", + "Did Not Complete", + "2021-09-01 00:00:00", + "Pathway" + ], + [ + "12", + "202109-5237", + "SOAR", + "OA-003359", + "Enrollment-1396", + "ES-0017175", + "Data Analysis 1", + "2023-01-04 00:00:00", + "NA", + "NA", + "2023-03-31 00:00:00", + "Successfully Completed", + "2023-01-01 00:00:00", + "Pathway" + ], + [ + "13", + "202109-5237", + "SOAR", + "OA-003359", + "Enrollment-1396", + "ES-0017537", + "Career Readiness Workshop", + "2023-02-16 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA", + "Student Support" + ], + [ + "14", + "202109-5237", + "SOAR", + "OA-003359", + "Enrollment-1396", + "ES-0018722", + "Data Analytics 2", + "2023-05-09 00:00:00", + "NA", + "NA", + "2023-08-07 00:00:00", + "Successfully Completed", + "2023-05-01 00:00:00", + "Pathway" + ], + [ + "15", + "202109-5237", + "SOAR", + "OA-003359", + "Enrollment-1396", + "ES-0023302", + "One-on-one Job Readiness", + "2024-06-12 00:00:00", + "2024-06-12 00:00:00", + "2024-06-12 00:00:00", + "2024-06-12 00:00:00", + "NA", + "NA", + "Student Support" + ], + [ + "16", + "202109-5237", + "SOAR", + "OA-003359", + "Enrollment-1396", + "ES-0023396", + "One-on-one Job Readiness", + "2024-05-23 00:00:00", + "NA", + "2024-05-23 00:00:00", + "NA", + "NA", + "NA", + "Student Support" + ], + [ + "17", + "202109-5238", + "SOAR", + "OA-003360", + "Enrollment-1398", + "ES-0015085", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-09 00:00:00", + "2021-12-09 00:00:00", + "Did Not Complete", + "2021-09-01 00:00:00", + "Pathway" + ], + [ + "18", + "202109-5239", + "SOAR", + "OA-003363", + "Enrollment-1401", + "ES-0017159", + "Web Development 1", + "2021-09-08 00:00:00", + "NA", + "NA", + "2021-09-23 00:00:00", + "Did Not Complete", + "2021-09-01 00:00:00", + "Pathway" + ], + [ + "19", + "202109-5243", + "SOAR", + "OA-003366", + "Enrollment-1404", + "ES-0011221", + "One-on-one Job Readiness", + "2021-12-03 00:00:00", + "2021-12-03 00:00:00", + "NA", + "NA", + "NA", + "NA", + "Student Support" + ], + [ + "20", + "202109-5243", + "SOAR", + "OA-003366", + "Enrollment-1404", + "ES-0013503", + "Software Development 1", + "2022-01-05 00:00:00", + "2022-01-05 00:00:00", + "2022-04-06 00:00:00", + "2022-04-06 00:00:00", + "Successfully Completed", + "2022-01-01 00:00:00", + "Pathway" + ], + [ + "21", + "202109-5243", + "SOAR", + "OA-003366", + "Enrollment-1404", + "ES-0014300", + "One-on-one Job Readiness", + "2022-03-29 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA", + "Student Support" + ], + [ + "22", + "202109-5243", + "SOAR", + "OA-003366", + "Enrollment-1404", + "ES-0015052", + "Software Development 2", + "2022-05-04 00:00:00", + "2022-05-04 00:00:00", + "2022-07-29 00:00:00", + "2022-07-29 00:00:00", + "Successfully Completed", + "2022-05-01 00:00:00", + "Pathway" + ], + [ + "23", + "202109-5243", + "SOAR", + "OA-003366", + "Enrollment-1404", + "ES-0015076", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-12-14 00:00:00", + "Successfully Completed", + "2021-09-01 00:00:00", + "Pathway" + ], + [ + "24", + "202109-5243", + "SOAR", + "OA-003366", + "Enrollment-1404", + "ES-0017237", + "Data Analysis 1", + "2022-08-24 00:00:00", + "NA", + "NA", + "2022-11-18 00:00:00", + "Successfully Completed", + "2022-09-01 00:00:00", + "Pathway" + ], + [ + "25", + "202109-5243", + "SOAR", + "OA-003366", + "Enrollment-1404", + "ES-0017238", + "Data Analytics 2", + "2023-01-04 00:00:00", + "NA", + "NA", + "2023-03-31 00:00:00", + "Successfully Completed", + "2023-01-01 00:00:00", + "Pathway" + ], + [ + "26", + "202109-5243", + "SOAR", + "OA-003366", + "Enrollment-1404", + "ES-0018733", + "Web Development 2", + "2023-05-11 00:00:00", + "NA", + "NA", + "2023-08-07 00:00:00", + "Successfully Completed", + "2023-05-01 00:00:00", + "Pathway" + ], + [ + "27", + "202109-5245", + "SOAR", + "OA-003367", + "Enrollment-1405", + "ES-0011174", + "Career Readiness Workshop", + "2021-11-08 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA", + "Student Support" + ], + [ + "28", + "202109-5245", + "SOAR", + "OA-003367", + "Enrollment-1405", + "ES-0013507", + "Software Development 1", + "2022-01-05 00:00:00", + "2022-01-05 00:00:00", + "2022-05-06 00:00:00", + "2022-05-06 00:00:00", + "Successfully Completed", + "2022-01-01 00:00:00", + "Pathway" + ], + [ + "29", + "202109-5245", + "SOAR", + "OA-003367", + "Enrollment-1405", + "ES-0014268", + "One-on-one Job Readiness", + "2022-03-22 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA", + "Student Support" + ], + [ + "30", + "202109-5245", + "SOAR", + "OA-003367", + "Enrollment-1405", + "ES-0015065", + "Web Development 2", + "2022-05-04 00:00:00", + "2022-05-04 00:00:00", + "2022-07-29 00:00:00", + "2022-07-29 00:00:00", + "Did Not Complete", + "2022-05-01 00:00:00", + "Pathway" + ], + [ + "31", + "202109-5245", + "SOAR", + "OA-003367", + "Enrollment-1405", + "ES-0015080", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-12-14 00:00:00", + "Successfully Completed", + "2021-09-01 00:00:00", + "Pathway" + ], + [ + "32", + "202109-5245", + "SOAR", + "OA-003367", + "Enrollment-1405", + "ES-0023385", + "One-on-one Job Readiness", + "2024-01-29 00:00:00", + "NA", + "2024-01-29 00:00:00", + "NA", + "NA", + "NA", + "Student Support" + ], + [ + "33", + "202109-5272", + "SOAR", + "OA-003395", + "Enrollment-1434", + "ES-0011175", + "Career Readiness Workshop", + "2021-11-08 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA", + "Student Support" + ], + [ + "34", + "202109-5272", + "SOAR", + "OA-003395", + "Enrollment-1434", + "ES-0013526", + "Web Development 2", + "2022-01-05 00:00:00", + "2022-01-05 00:00:00", + "2022-04-06 00:00:00", + "2022-04-06 00:00:00", + "Successfully Completed", + "2022-01-01 00:00:00", + "Pathway" + ], + [ + "35", + "202109-5272", + "SOAR", + "OA-003395", + "Enrollment-1434", + "ES-0015078", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-12-14 00:00:00", + "Successfully Completed", + "2021-09-01 00:00:00", + "Pathway" + ], + [ + "36", + "202109-5272", + "SOAR", + "OA-003395", + "Enrollment-1434", + "ES-0017252", + "Software Development 1", + "2022-08-24 00:00:00", + "NA", + "NA", + "2022-10-03 00:00:00", + "Did Not Complete", + "2022-09-01 00:00:00", + "Pathway" + ], + [ + "37", + "202109-5273", + "SOAR", + "OA-003396", + "Enrollment-1435", + "ES-0015089", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-10-27 00:00:00", + "Did Not Complete", + "2021-09-01 00:00:00", + "Pathway" + ], + [ + "38", + "202109-5274", + "SOAR", + "OA-003397", + "Enrollment-1608", + "ES-0011204", + "Career Readiness Workshop", + "2021-11-18 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA", + "Student Support" + ], + [ + "39", + "202109-5274", + "SOAR", + "OA-003397", + "Enrollment-1608", + "ES-0013483", + "Data Analysis 1", + "2022-01-05 00:00:00", + "2022-01-05 00:00:00", + "2022-04-06 00:00:00", + "2022-04-06 00:00:00", + "Successfully Completed", + "2022-01-01 00:00:00", + "Pathway" + ], + [ + "40", + "202109-5274", + "SOAR", + "OA-003397", + "Enrollment-1608", + "ES-0014299", + "One-on-one Job Readiness", + "2022-03-29 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA", + "Student Support" + ], + [ + "41", + "202109-5274", + "SOAR", + "OA-003397", + "Enrollment-1608", + "ES-0015047", + "Data Analytics 2", + "2022-05-04 00:00:00", + "2022-05-04 00:00:00", + "2022-07-29 00:00:00", + "2022-06-08 00:00:00", + "Did Not Complete", + "2022-05-01 00:00:00", + "Pathway" + ], + [ + "42", + "202109-5274", + "SOAR", + "OA-003397", + "Enrollment-1608", + "ES-0015084", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-12-14 00:00:00", + "Successfully Completed", + "2021-09-01 00:00:00", + "Pathway" + ], + [ + "43", + "202109-5275", + "SOAR", + "OA-003398", + "Enrollment-1609", + "ES-0015088", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-10-11 00:00:00", + "Did Not Complete", + "2021-09-01 00:00:00", + "Pathway" + ], + [ + "44", + "202109-5276", + "SOAR", + "OA-003399", + "Enrollment-1611", + "ES-0015091", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-10-28 00:00:00", + "Did Not Complete", + "2021-09-01 00:00:00", + "Pathway" + ], + [ + "45", + "202109-5277", + "SOAR", + "OA-003400", + "Enrollment-4425", + "ES-0011213", + "One-on-one Job Readiness", + "2021-11-23 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA", + "Student Support" + ], + [ + "46", + "202109-5277", + "SOAR", + "OA-003400", + "Enrollment-4425", + "ES-0013995", + "Software Development 1", + "2022-01-05 00:00:00", + "2022-01-05 00:00:00", + "2022-04-06 00:00:00", + "2022-07-29 00:00:00", + "Successfully Completed", + "2022-01-01 00:00:00", + "Pathway" + ], + [ + "47", + "202109-5277", + "SOAR", + "OA-003400", + "Enrollment-4425", + "ES-0014191", + "Career Readiness Workshop", + "2022-03-07 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA", + "Student Support" + ], + [ + "48", + "202109-5277", + "SOAR", + "OA-003400", + "Enrollment-4425", + "ES-0015050", + "Software Development 2", + "2022-05-04 00:00:00", + "2022-05-04 00:00:00", + "2022-07-29 00:00:00", + "2022-07-29 00:00:00", + "Successfully Completed", + "2022-05-01 00:00:00", + "Pathway" + ], + [ + "49", + "202109-5277", + "SOAR", + "OA-003400", + "Enrollment-4425", + "ES-0015074", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-12-14 00:00:00", + "Successfully Completed", + "2021-09-01 00:00:00", + "Pathway" + ] + ], + "shape": { + "columns": 13, + "rows": 2033 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Auto IdKY RegionAssessment IDEnrollmentIdEnrollment Service NameServiceProjected Start DateActual Start DateProjected End DateActual End DateOutcomeATP CohortCategory
0202109-5224SOAROA-003348Enrollment-1386ES-0011193Career Readiness Workshop2021-11-11 00:00:00NANANANANAStudent Support
1202109-5224SOAROA-003348Enrollment-1386ES-0013492Software Development 12022-01-05 00:00:002022-01-05 00:00:002022-04-06 00:00:002022-04-06 00:00:00Successfully Completed2022-01-01 00:00:00Pathway
2202109-5224SOAROA-003348Enrollment-1386ES-0014187Career Readiness Workshop2022-03-07 00:00:00NANANANANAStudent Support
3202109-5224SOAROA-003348Enrollment-1386ES-0015022Software Development 22022-05-04 00:00:002022-05-04 00:00:002022-07-29 00:00:002022-07-29 00:00:00Successfully Completed2022-05-01 00:00:00Pathway
4202109-5224SOAROA-003348Enrollment-1386ES-0015075Web Development 12021-09-08 00:00:002021-09-08 00:00:002021-12-14 00:00:002021-12-14 00:00:00Successfully Completed2021-09-01 00:00:00Pathway
..........................................
2028202504-21723SOAROA-022760Enrollment-14196ES-0035149Intro to Programming Core2025-05-12 00:00:002025-05-12 00:00:002025-06-27 00:00:00NANA2025-05-01 00:00:00Workshop
2029202505-22788SOAROA-023710Enrollment-14213ES-0035212Intro to Programming Core2025-05-14 00:00:002025-05-13 00:00:002025-06-17 00:00:00NANA2025-05-01 00:00:00Workshop
2030202408-16568SOAROA-017961Enrollment-14833ES-0036429Intro to Programming Core2025-05-12 00:00:00NANANADid Not Complete2025-05-01 00:00:00Workshop
2031202408-16568SOAROA-017961Enrollment-14833ES-0036430Supportive Services ReferralNANANANANANAStudent Support
2032202503-21420SOAROA-022426Enrollment-15195ES-0038953Intro to Programming Core2025-05-12 00:00:00NA2025-06-27 00:00:00NANA2025-05-01 00:00:00Workshop
\n", + "

2033 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " Auto Id KY Region Assessment ID EnrollmentId \\\n", + "0 202109-5224 SOAR OA-003348 Enrollment-1386 \n", + "1 202109-5224 SOAR OA-003348 Enrollment-1386 \n", + "2 202109-5224 SOAR OA-003348 Enrollment-1386 \n", + "3 202109-5224 SOAR OA-003348 Enrollment-1386 \n", + "4 202109-5224 SOAR OA-003348 Enrollment-1386 \n", + "... ... ... ... ... \n", + "2028 202504-21723 SOAR OA-022760 Enrollment-14196 \n", + "2029 202505-22788 SOAR OA-023710 Enrollment-14213 \n", + "2030 202408-16568 SOAR OA-017961 Enrollment-14833 \n", + "2031 202408-16568 SOAR OA-017961 Enrollment-14833 \n", + "2032 202503-21420 SOAR OA-022426 Enrollment-15195 \n", + "\n", + " Enrollment Service Name Service \\\n", + "0 ES-0011193 Career Readiness Workshop \n", + "1 ES-0013492 Software Development 1 \n", + "2 ES-0014187 Career Readiness Workshop \n", + "3 ES-0015022 Software Development 2 \n", + "4 ES-0015075 Web Development 1 \n", + "... ... ... \n", + "2028 ES-0035149 Intro to Programming Core \n", + "2029 ES-0035212 Intro to Programming Core \n", + "2030 ES-0036429 Intro to Programming Core \n", + "2031 ES-0036430 Supportive Services Referral \n", + "2032 ES-0038953 Intro to Programming Core \n", + "\n", + " Projected Start Date Actual Start Date Projected End Date \\\n", + "0 2021-11-11 00:00:00 NA NA \n", + "1 2022-01-05 00:00:00 2022-01-05 00:00:00 2022-04-06 00:00:00 \n", + "2 2022-03-07 00:00:00 NA NA \n", + "3 2022-05-04 00:00:00 2022-05-04 00:00:00 2022-07-29 00:00:00 \n", + "4 2021-09-08 00:00:00 2021-09-08 00:00:00 2021-12-14 00:00:00 \n", + "... ... ... ... \n", + "2028 2025-05-12 00:00:00 2025-05-12 00:00:00 2025-06-27 00:00:00 \n", + "2029 2025-05-14 00:00:00 2025-05-13 00:00:00 2025-06-17 00:00:00 \n", + "2030 2025-05-12 00:00:00 NA NA \n", + "2031 NA NA NA \n", + "2032 2025-05-12 00:00:00 NA 2025-06-27 00:00:00 \n", + "\n", + " Actual End Date Outcome ATP Cohort \\\n", + "0 NA NA NA \n", + "1 2022-04-06 00:00:00 Successfully Completed 2022-01-01 00:00:00 \n", + "2 NA NA NA \n", + "3 2022-07-29 00:00:00 Successfully Completed 2022-05-01 00:00:00 \n", + "4 2021-12-14 00:00:00 Successfully Completed 2021-09-01 00:00:00 \n", + "... ... ... ... \n", + "2028 NA NA 2025-05-01 00:00:00 \n", + "2029 NA NA 2025-05-01 00:00:00 \n", + "2030 NA Did Not Complete 2025-05-01 00:00:00 \n", + "2031 NA NA NA \n", + "2032 NA NA 2025-05-01 00:00:00 \n", + "\n", + " Category \n", + "0 Student Support \n", + "1 Pathway \n", + "2 Student Support \n", + "3 Pathway \n", + "4 Pathway \n", + "... ... \n", + "2028 Workshop \n", + "2029 Workshop \n", + "2030 Workshop \n", + "2031 Student Support \n", + "2032 Workshop \n", + "\n", + "[2033 rows x 13 columns]" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "enrollments['Category'] = ''\n", + "\n", + "enrollments.loc[enrollments['Service'].isin(pathways), 'Category'] = 'Pathway'\n", + "enrollments.loc[enrollments['Service'].isin(workshops), 'Category'] = 'Workshop'\n", + "enrollments.loc[enrollments['Service'].isin(support_ways), 'Category'] = 'Student Support'\n", + "\n", + "enrollments" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "id": "64432809", + "metadata": {}, + "outputs": [], + "source": [ + "pie_df = enrollments.value_counts('Category').reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "id": "712aeaff", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "domain": { + "x": [ + 0, + 1 + ], + "y": [ + 0, + 1 + ] + }, + "hovertemplate": "Category=%{label}
count=%{value}", + "labels": [ + "Pathway", + "Workshop", + "Student Support" + ], + "legendgroup": "", + "name": "", + "showlegend": true, + "type": "pie", + "values": { + "bdata": "7gMHAvwB", + "dtype": "i2" + } + } + ], + "layout": { + "legend": { + "tracegroupgap": 0 + }, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermap": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermap" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Data structure by type of Service" + } + } + } + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = px.pie(pie_df, values='count', names='Category', title='Data structure by type of Service')\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "0ecaa27b", + "metadata": {}, + "source": [ + "### Understanding 'ATP Cohort'" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "id": "dac5ffdb", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "Outcome", + "rawType": "object", + "type": "string" + }, + { + "name": "count", + "rawType": "int64", + "type": "integer" + } + ], + "ref": "2a570831-e290-4a11-8c91-12e20641721e", + "rows": [ + [ + "NA", + "445" + ], + [ + "Successfully Completed", + "10" + ], + [ + "Did Not Complete", + "5" + ] + ], + "shape": { + "columns": 1, + "rows": 3 + } + }, + "text/plain": [ + "Outcome\n", + "NA 445\n", + "Successfully Completed 10\n", + "Did Not Complete 5\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 100, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "enrollments[enrollments['ATP Cohort'] == 'NA'].value_counts('Outcome')" + ] } ], "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" } }, "nbformat": 4, From fc284f3ca603e2275f3e81b3fffc101dd6d54759 Mon Sep 17 00:00:00 2001 From: Euclides Date: Thu, 10 Jul 2025 12:31:25 -0400 Subject: [PATCH 3/9] Added test for cleaning function --- Cleaning_enrollments_data.ipynb | 1138 +------------------------------ 1 file changed, 29 insertions(+), 1109 deletions(-) diff --git a/Cleaning_enrollments_data.ipynb b/Cleaning_enrollments_data.ipynb index aac9c8c..09020d3 100644 --- a/Cleaning_enrollments_data.ipynb +++ b/Cleaning_enrollments_data.ipynb @@ -24,7 +24,7 @@ }, { "cell_type": "code", - "execution_count": 91, + "execution_count": 37, "id": "f782ef33", "metadata": {}, "outputs": [], @@ -43,7 +43,7 @@ }, { "cell_type": "code", - "execution_count": 92, + "execution_count": 38, "id": "2efeac68", "metadata": {}, "outputs": [], @@ -60,12 +60,24 @@ " 'Actual End Date': NAN_VALUE_SUBSTITUTE, 'Outcome': NAN_VALUE_SUBSTITUTE, 'ATP Cohort': NAN_VALUE_SUBSTITUTE\n", " }\n", "\n", - " for column, na_value in columns_to_fix.items():\n", - " enrollments[column] = enrollments[column].fillna(na_value)\n", + " for column, substitute_value in columns_to_fix.items():\n", + " enrollments[column] = enrollments[column].fillna(substitute_value)\n", "\n", " return enrollments" ] }, + { + "cell_type": "code", + "execution_count": 39, + "id": "c34df585", + "metadata": {}, + "outputs": [], + "source": [ + "def Test_enrollments_cleaning(clean_df: pd.DataFrame):\n", + " # Parameter can be change to an in-function call of the data cleanner with the DF\n", + " assert ~clean_df.isna().any().any(), 'The Dataframe has na values.'" + ] + }, { "cell_type": "markdown", "id": "10146c9e", @@ -76,1107 +88,15 @@ }, { "cell_type": "code", - "execution_count": 93, + "execution_count": 40, "id": "a32e170c", "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.microsoft.datawrangler.viewer.v0+json": { - "columns": [ - { - "name": "index", - "rawType": "int64", - "type": "integer" - }, - { - "name": "Auto Id", - "rawType": "object", - "type": "string" - }, - { - "name": "KY Region", - "rawType": "object", - "type": "string" - }, - { - "name": "Assessment ID", - "rawType": "object", - "type": "string" - }, - { - "name": "EnrollmentId", - "rawType": "object", - "type": "string" - }, - { - "name": "Enrollment Service Name", - "rawType": "object", - "type": "string" - }, - { - "name": "Service", - "rawType": "object", - "type": "string" - }, - { - "name": "Projected Start Date", - "rawType": "object", - "type": "unknown" - }, - { - "name": "Actual Start Date", - "rawType": "object", - "type": "unknown" - }, - { - "name": "Projected End Date", - "rawType": "object", - "type": "unknown" - }, - { - "name": "Actual End Date", - "rawType": "object", - "type": "unknown" - }, - { - "name": "Outcome", - "rawType": "object", - "type": "string" - }, - { - "name": "ATP Cohort", - "rawType": "object", - "type": "unknown" - } - ], - "ref": "8188b1e0-d8ce-49d1-b22e-b47f52f9b422", - "rows": [ - [ - "0", - "202109-5224", - "SOAR", - "OA-003348", - "Enrollment-1386", - "ES-0011193", - "Career Readiness Workshop", - "2021-11-11 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA" - ], - [ - "1", - "202109-5224", - "SOAR", - "OA-003348", - "Enrollment-1386", - "ES-0013492", - "Software Development 1", - "2022-01-05 00:00:00", - "2022-01-05 00:00:00", - "2022-04-06 00:00:00", - "2022-04-06 00:00:00", - "Successfully Completed", - "2022-01-01 00:00:00" - ], - [ - "2", - "202109-5224", - "SOAR", - "OA-003348", - "Enrollment-1386", - "ES-0014187", - "Career Readiness Workshop", - "2022-03-07 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA" - ], - [ - "3", - "202109-5224", - "SOAR", - "OA-003348", - "Enrollment-1386", - "ES-0015022", - "Software Development 2", - "2022-05-04 00:00:00", - "2022-05-04 00:00:00", - "2022-07-29 00:00:00", - "2022-07-29 00:00:00", - "Successfully Completed", - "2022-05-01 00:00:00" - ], - [ - "4", - "202109-5224", - "SOAR", - "OA-003348", - "Enrollment-1386", - "ES-0015075", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-12-14 00:00:00", - "Successfully Completed", - "2021-09-01 00:00:00" - ], - [ - "5", - "202109-5230", - "SOAR", - "OA-003352", - "Enrollment-1389", - "ES-0015087", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-09-28 00:00:00", - "Did Not Complete", - "2021-09-01 00:00:00" - ], - [ - "6", - "202109-5233", - "SOAR", - "OA-003355", - "Enrollment-1392", - "ES-0011185", - "Career Readiness Workshop", - "2021-11-11 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA" - ], - [ - "7", - "202109-5233", - "SOAR", - "OA-003355", - "Enrollment-1392", - "ES-0013525", - "Web Development 2", - "2022-01-05 00:00:00", - "2022-01-05 00:00:00", - "2022-04-06 00:00:00", - "2022-04-06 00:00:00", - "Successfully Completed", - "2022-01-01 00:00:00" - ], - [ - "8", - "202109-5233", - "SOAR", - "OA-003355", - "Enrollment-1392", - "ES-0015081", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-12-14 00:00:00", - "Successfully Completed", - "2021-09-01 00:00:00" - ], - [ - "9", - "202109-5236", - "SOAR", - "OA-003358", - "Enrollment-1395", - "ES-0015092", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-12-01 00:00:00", - "Did Not Complete", - "2021-09-01 00:00:00" - ], - [ - "10", - "202109-5237", - "SOAR", - "OA-003359", - "Enrollment-1396", - "ES-0011212", - "One-on-one Job Readiness", - "2021-11-23 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA" - ], - [ - "11", - "202109-5237", - "SOAR", - "OA-003359", - "Enrollment-1396", - "ES-0015086", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-12-01 00:00:00", - "Did Not Complete", - "2021-09-01 00:00:00" - ], - [ - "12", - "202109-5237", - "SOAR", - "OA-003359", - "Enrollment-1396", - "ES-0017175", - "Data Analysis 1", - "2023-01-04 00:00:00", - "NA", - "NA", - "2023-03-31 00:00:00", - "Successfully Completed", - "2023-01-01 00:00:00" - ], - [ - "13", - "202109-5237", - "SOAR", - "OA-003359", - "Enrollment-1396", - "ES-0017537", - "Career Readiness Workshop", - "2023-02-16 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA" - ], - [ - "14", - "202109-5237", - "SOAR", - "OA-003359", - "Enrollment-1396", - "ES-0018722", - "Data Analytics 2", - "2023-05-09 00:00:00", - "NA", - "NA", - "2023-08-07 00:00:00", - "Successfully Completed", - "2023-05-01 00:00:00" - ], - [ - "15", - "202109-5237", - "SOAR", - "OA-003359", - "Enrollment-1396", - "ES-0023302", - "One-on-one Job Readiness", - "2024-06-12 00:00:00", - "2024-06-12 00:00:00", - "2024-06-12 00:00:00", - "2024-06-12 00:00:00", - "NA", - "NA" - ], - [ - "16", - "202109-5237", - "SOAR", - "OA-003359", - "Enrollment-1396", - "ES-0023396", - "One-on-one Job Readiness", - "2024-05-23 00:00:00", - "NA", - "2024-05-23 00:00:00", - "NA", - "NA", - "NA" - ], - [ - "17", - "202109-5238", - "SOAR", - "OA-003360", - "Enrollment-1398", - "ES-0015085", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-09 00:00:00", - "2021-12-09 00:00:00", - "Did Not Complete", - "2021-09-01 00:00:00" - ], - [ - "18", - "202109-5239", - "SOAR", - "OA-003363", - "Enrollment-1401", - "ES-0017159", - "Web Development 1", - "2021-09-08 00:00:00", - "NA", - "NA", - "2021-09-23 00:00:00", - "Did Not Complete", - "2021-09-01 00:00:00" - ], - [ - "19", - "202109-5243", - "SOAR", - "OA-003366", - "Enrollment-1404", - "ES-0011221", - "One-on-one Job Readiness", - "2021-12-03 00:00:00", - "2021-12-03 00:00:00", - "NA", - "NA", - "NA", - "NA" - ], - [ - "20", - "202109-5243", - "SOAR", - "OA-003366", - "Enrollment-1404", - "ES-0013503", - "Software Development 1", - "2022-01-05 00:00:00", - "2022-01-05 00:00:00", - "2022-04-06 00:00:00", - "2022-04-06 00:00:00", - "Successfully Completed", - "2022-01-01 00:00:00" - ], - [ - "21", - "202109-5243", - "SOAR", - "OA-003366", - "Enrollment-1404", - "ES-0014300", - "One-on-one Job Readiness", - "2022-03-29 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA" - ], - [ - "22", - "202109-5243", - "SOAR", - "OA-003366", - "Enrollment-1404", - "ES-0015052", - "Software Development 2", - "2022-05-04 00:00:00", - "2022-05-04 00:00:00", - "2022-07-29 00:00:00", - "2022-07-29 00:00:00", - "Successfully Completed", - "2022-05-01 00:00:00" - ], - [ - "23", - "202109-5243", - "SOAR", - "OA-003366", - "Enrollment-1404", - "ES-0015076", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-12-14 00:00:00", - "Successfully Completed", - "2021-09-01 00:00:00" - ], - [ - "24", - "202109-5243", - "SOAR", - "OA-003366", - "Enrollment-1404", - "ES-0017237", - "Data Analysis 1", - "2022-08-24 00:00:00", - "NA", - "NA", - "2022-11-18 00:00:00", - "Successfully Completed", - "2022-09-01 00:00:00" - ], - [ - "25", - "202109-5243", - "SOAR", - "OA-003366", - "Enrollment-1404", - "ES-0017238", - "Data Analytics 2", - "2023-01-04 00:00:00", - "NA", - "NA", - "2023-03-31 00:00:00", - "Successfully Completed", - "2023-01-01 00:00:00" - ], - [ - "26", - "202109-5243", - "SOAR", - "OA-003366", - "Enrollment-1404", - "ES-0018733", - "Web Development 2", - "2023-05-11 00:00:00", - "NA", - "NA", - "2023-08-07 00:00:00", - "Successfully Completed", - "2023-05-01 00:00:00" - ], - [ - "27", - "202109-5245", - "SOAR", - "OA-003367", - "Enrollment-1405", - "ES-0011174", - "Career Readiness Workshop", - "2021-11-08 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA" - ], - [ - "28", - "202109-5245", - "SOAR", - "OA-003367", - "Enrollment-1405", - "ES-0013507", - "Software Development 1", - "2022-01-05 00:00:00", - "2022-01-05 00:00:00", - "2022-05-06 00:00:00", - "2022-05-06 00:00:00", - "Successfully Completed", - "2022-01-01 00:00:00" - ], - [ - "29", - "202109-5245", - "SOAR", - "OA-003367", - "Enrollment-1405", - "ES-0014268", - "One-on-one Job Readiness", - "2022-03-22 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA" - ], - [ - "30", - "202109-5245", - "SOAR", - "OA-003367", - "Enrollment-1405", - "ES-0015065", - "Web Development 2", - "2022-05-04 00:00:00", - "2022-05-04 00:00:00", - "2022-07-29 00:00:00", - "2022-07-29 00:00:00", - "Did Not Complete", - "2022-05-01 00:00:00" - ], - [ - "31", - "202109-5245", - "SOAR", - "OA-003367", - "Enrollment-1405", - "ES-0015080", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-12-14 00:00:00", - "Successfully Completed", - "2021-09-01 00:00:00" - ], - [ - "32", - "202109-5245", - "SOAR", - "OA-003367", - "Enrollment-1405", - "ES-0023385", - "One-on-one Job Readiness", - "2024-01-29 00:00:00", - "NA", - "2024-01-29 00:00:00", - "NA", - "NA", - "NA" - ], - [ - "33", - "202109-5272", - "SOAR", - "OA-003395", - "Enrollment-1434", - "ES-0011175", - "Career Readiness Workshop", - "2021-11-08 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA" - ], - [ - "34", - "202109-5272", - "SOAR", - "OA-003395", - "Enrollment-1434", - "ES-0013526", - "Web Development 2", - "2022-01-05 00:00:00", - "2022-01-05 00:00:00", - "2022-04-06 00:00:00", - "2022-04-06 00:00:00", - "Successfully Completed", - "2022-01-01 00:00:00" - ], - [ - "35", - "202109-5272", - "SOAR", - "OA-003395", - "Enrollment-1434", - "ES-0015078", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-12-14 00:00:00", - "Successfully Completed", - "2021-09-01 00:00:00" - ], - [ - "36", - "202109-5272", - "SOAR", - "OA-003395", - "Enrollment-1434", - "ES-0017252", - "Software Development 1", - "2022-08-24 00:00:00", - "NA", - "NA", - "2022-10-03 00:00:00", - "Did Not Complete", - "2022-09-01 00:00:00" - ], - [ - "37", - "202109-5273", - "SOAR", - "OA-003396", - "Enrollment-1435", - "ES-0015089", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-10-27 00:00:00", - "Did Not Complete", - "2021-09-01 00:00:00" - ], - [ - "38", - "202109-5274", - "SOAR", - "OA-003397", - "Enrollment-1608", - "ES-0011204", - "Career Readiness Workshop", - "2021-11-18 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA" - ], - [ - "39", - "202109-5274", - "SOAR", - "OA-003397", - "Enrollment-1608", - "ES-0013483", - "Data Analysis 1", - "2022-01-05 00:00:00", - "2022-01-05 00:00:00", - "2022-04-06 00:00:00", - "2022-04-06 00:00:00", - "Successfully Completed", - "2022-01-01 00:00:00" - ], - [ - "40", - "202109-5274", - "SOAR", - "OA-003397", - "Enrollment-1608", - "ES-0014299", - "One-on-one Job Readiness", - "2022-03-29 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA" - ], - [ - "41", - "202109-5274", - "SOAR", - "OA-003397", - "Enrollment-1608", - "ES-0015047", - "Data Analytics 2", - "2022-05-04 00:00:00", - "2022-05-04 00:00:00", - "2022-07-29 00:00:00", - "2022-06-08 00:00:00", - "Did Not Complete", - "2022-05-01 00:00:00" - ], - [ - "42", - "202109-5274", - "SOAR", - "OA-003397", - "Enrollment-1608", - "ES-0015084", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-12-14 00:00:00", - "Successfully Completed", - "2021-09-01 00:00:00" - ], - [ - "43", - "202109-5275", - "SOAR", - "OA-003398", - "Enrollment-1609", - "ES-0015088", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-10-11 00:00:00", - "Did Not Complete", - "2021-09-01 00:00:00" - ], - [ - "44", - "202109-5276", - "SOAR", - "OA-003399", - "Enrollment-1611", - "ES-0015091", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-10-28 00:00:00", - "Did Not Complete", - "2021-09-01 00:00:00" - ], - [ - "45", - "202109-5277", - "SOAR", - "OA-003400", - "Enrollment-4425", - "ES-0011213", - "One-on-one Job Readiness", - "2021-11-23 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA" - ], - [ - "46", - "202109-5277", - "SOAR", - "OA-003400", - "Enrollment-4425", - "ES-0013995", - "Software Development 1", - "2022-01-05 00:00:00", - "2022-01-05 00:00:00", - "2022-04-06 00:00:00", - "2022-07-29 00:00:00", - "Successfully Completed", - "2022-01-01 00:00:00" - ], - [ - "47", - "202109-5277", - "SOAR", - "OA-003400", - "Enrollment-4425", - "ES-0014191", - "Career Readiness Workshop", - "2022-03-07 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA" - ], - [ - "48", - "202109-5277", - "SOAR", - "OA-003400", - "Enrollment-4425", - "ES-0015050", - "Software Development 2", - "2022-05-04 00:00:00", - "2022-05-04 00:00:00", - "2022-07-29 00:00:00", - "2022-07-29 00:00:00", - "Successfully Completed", - "2022-05-01 00:00:00" - ], - [ - "49", - "202109-5277", - "SOAR", - "OA-003400", - "Enrollment-4425", - "ES-0015074", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-12-14 00:00:00", - "Successfully Completed", - "2021-09-01 00:00:00" - ] - ], - "shape": { - "columns": 12, - "rows": 2033 - } - }, - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Auto IdKY RegionAssessment IDEnrollmentIdEnrollment Service NameServiceProjected Start DateActual Start DateProjected End DateActual End DateOutcomeATP Cohort
0202109-5224SOAROA-003348Enrollment-1386ES-0011193Career Readiness Workshop2021-11-11 00:00:00NANANANANA
1202109-5224SOAROA-003348Enrollment-1386ES-0013492Software Development 12022-01-05 00:00:002022-01-05 00:00:002022-04-06 00:00:002022-04-06 00:00:00Successfully Completed2022-01-01 00:00:00
2202109-5224SOAROA-003348Enrollment-1386ES-0014187Career Readiness Workshop2022-03-07 00:00:00NANANANANA
3202109-5224SOAROA-003348Enrollment-1386ES-0015022Software Development 22022-05-04 00:00:002022-05-04 00:00:002022-07-29 00:00:002022-07-29 00:00:00Successfully Completed2022-05-01 00:00:00
4202109-5224SOAROA-003348Enrollment-1386ES-0015075Web Development 12021-09-08 00:00:002021-09-08 00:00:002021-12-14 00:00:002021-12-14 00:00:00Successfully Completed2021-09-01 00:00:00
.......................................
2028202504-21723SOAROA-022760Enrollment-14196ES-0035149Intro to Programming Core2025-05-12 00:00:002025-05-12 00:00:002025-06-27 00:00:00NANA2025-05-01 00:00:00
2029202505-22788SOAROA-023710Enrollment-14213ES-0035212Intro to Programming Core2025-05-14 00:00:002025-05-13 00:00:002025-06-17 00:00:00NANA2025-05-01 00:00:00
2030202408-16568SOAROA-017961Enrollment-14833ES-0036429Intro to Programming Core2025-05-12 00:00:00NANANADid Not Complete2025-05-01 00:00:00
2031202408-16568SOAROA-017961Enrollment-14833ES-0036430Supportive Services ReferralNANANANANANA
2032202503-21420SOAROA-022426Enrollment-15195ES-0038953Intro to Programming Core2025-05-12 00:00:00NA2025-06-27 00:00:00NANA2025-05-01 00:00:00
\n", - "

2033 rows × 12 columns

\n", - "
" - ], - "text/plain": [ - " Auto Id KY Region Assessment ID EnrollmentId \\\n", - "0 202109-5224 SOAR OA-003348 Enrollment-1386 \n", - "1 202109-5224 SOAR OA-003348 Enrollment-1386 \n", - "2 202109-5224 SOAR OA-003348 Enrollment-1386 \n", - "3 202109-5224 SOAR OA-003348 Enrollment-1386 \n", - "4 202109-5224 SOAR OA-003348 Enrollment-1386 \n", - "... ... ... ... ... \n", - "2028 202504-21723 SOAR OA-022760 Enrollment-14196 \n", - "2029 202505-22788 SOAR OA-023710 Enrollment-14213 \n", - "2030 202408-16568 SOAR OA-017961 Enrollment-14833 \n", - "2031 202408-16568 SOAR OA-017961 Enrollment-14833 \n", - "2032 202503-21420 SOAR OA-022426 Enrollment-15195 \n", - "\n", - " Enrollment Service Name Service \\\n", - "0 ES-0011193 Career Readiness Workshop \n", - "1 ES-0013492 Software Development 1 \n", - "2 ES-0014187 Career Readiness Workshop \n", - "3 ES-0015022 Software Development 2 \n", - "4 ES-0015075 Web Development 1 \n", - "... ... ... \n", - "2028 ES-0035149 Intro to Programming Core \n", - "2029 ES-0035212 Intro to Programming Core \n", - "2030 ES-0036429 Intro to Programming Core \n", - "2031 ES-0036430 Supportive Services Referral \n", - "2032 ES-0038953 Intro to Programming Core \n", - "\n", - " Projected Start Date Actual Start Date Projected End Date \\\n", - "0 2021-11-11 00:00:00 NA NA \n", - "1 2022-01-05 00:00:00 2022-01-05 00:00:00 2022-04-06 00:00:00 \n", - "2 2022-03-07 00:00:00 NA NA \n", - "3 2022-05-04 00:00:00 2022-05-04 00:00:00 2022-07-29 00:00:00 \n", - "4 2021-09-08 00:00:00 2021-09-08 00:00:00 2021-12-14 00:00:00 \n", - "... ... ... ... \n", - "2028 2025-05-12 00:00:00 2025-05-12 00:00:00 2025-06-27 00:00:00 \n", - "2029 2025-05-14 00:00:00 2025-05-13 00:00:00 2025-06-17 00:00:00 \n", - "2030 2025-05-12 00:00:00 NA NA \n", - "2031 NA NA NA \n", - "2032 2025-05-12 00:00:00 NA 2025-06-27 00:00:00 \n", - "\n", - " Actual End Date Outcome ATP Cohort \n", - "0 NA NA NA \n", - "1 2022-04-06 00:00:00 Successfully Completed 2022-01-01 00:00:00 \n", - "2 NA NA NA \n", - "3 2022-07-29 00:00:00 Successfully Completed 2022-05-01 00:00:00 \n", - "4 2021-12-14 00:00:00 Successfully Completed 2021-09-01 00:00:00 \n", - "... ... ... ... \n", - "2028 NA NA 2025-05-01 00:00:00 \n", - "2029 NA NA 2025-05-01 00:00:00 \n", - "2030 NA Did Not Complete 2025-05-01 00:00:00 \n", - "2031 NA NA NA \n", - "2032 NA NA 2025-05-01 00:00:00 \n", - "\n", - "[2033 rows x 12 columns]" - ] - }, - "execution_count": 93, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "enrollments = pd.read_excel('Data\\\\Raw\\\\ARC Enrollments.xlsx')\n", "enrollments = Enrollments_cleaning(enrollments)\n", - "enrollments\n" + "enrollments\n", + "Test_enrollments_cleaning(enrollments)\n" ] }, { @@ -1189,7 +109,7 @@ }, { "cell_type": "code", - "execution_count": 94, + "execution_count": 41, "id": "423d5090", "metadata": {}, "outputs": [], @@ -1264,7 +184,7 @@ }, { "cell_type": "code", - "execution_count": 95, + "execution_count": 42, "id": "61a579c5", "metadata": {}, "outputs": [ @@ -1343,7 +263,7 @@ "type": "string" } ], - "ref": "e5e63e5c-563c-4854-8ac3-209fb212f89f", + "ref": "60433c33-2973-4ecf-8a66-39b937632565", "rows": [ [ "0", @@ -2436,7 +1356,7 @@ "[2033 rows x 13 columns]" ] }, - "execution_count": 95, + "execution_count": 42, "metadata": {}, "output_type": "execute_result" } @@ -2453,7 +1373,7 @@ }, { "cell_type": "code", - "execution_count": 96, + "execution_count": 43, "id": "64432809", "metadata": {}, "outputs": [], @@ -2463,7 +1383,7 @@ }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 44, "id": "712aeaff", "metadata": {}, "outputs": [ @@ -3306,7 +2226,7 @@ }, { "cell_type": "code", - "execution_count": 100, + "execution_count": 45, "id": "dac5ffdb", "metadata": {}, "outputs": [ @@ -3325,7 +2245,7 @@ "type": "integer" } ], - "ref": "2a570831-e290-4a11-8c91-12e20641721e", + "ref": "847f75cd-6b6c-4d4c-a1dc-5c109a6499d9", "rows": [ [ "NA", @@ -3353,7 +2273,7 @@ "Name: count, dtype: int64" ] }, - "execution_count": 100, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } From e42ab48b24c67d9794866625f71469f4d2b41eff Mon Sep 17 00:00:00 2001 From: Euclides Date: Fri, 11 Jul 2025 00:32:12 -0400 Subject: [PATCH 4/9] Most common pathways taken by cohort --- historical_student_data_dashboards.ipynb | 1475 ++++++++++++++++++++++ 1 file changed, 1475 insertions(+) create mode 100644 historical_student_data_dashboards.ipynb diff --git a/historical_student_data_dashboards.ipynb b/historical_student_data_dashboards.ipynb new file mode 100644 index 0000000..e158be7 --- /dev/null +++ b/historical_student_data_dashboards.ipynb @@ -0,0 +1,1475 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c73760d6", + "metadata": {}, + "source": [ + "# Dashboard Historical student data\n", + "\n", + "## 1. Most common pathways taken:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "ec855ef5", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import plotly.express as px\n", + "import plotly.graph_objects as go\n", + "from dash import Dash, dcc, html, Input, Output" + ] + }, + { + "cell_type": "markdown", + "id": "49621639", + "metadata": {}, + "source": [ + "### Data cleaning: (This can be extracted to an object or just create a funciton that is call once and outputs a clean file, while the process is not define I will just add the cleaning functions to this file)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "bc04972b", + "metadata": {}, + "outputs": [], + "source": [ + "# Cleaning and testing function\n", + "def Enrollments_cleaning(df: pd.DataFrame) -> pd.DataFrame:\n", + " # Columns to clean\n", + " COLUMNS_TO_DROP = ['Full Name']\n", + " result = df.drop(columns=COLUMNS_TO_DROP)\n", + "\n", + " # Fix NaN values\n", + " NAN_VALUE_SUBSTITUTE = 'NA'\n", + " columns_to_fix = {\n", + " 'Projected Start Date': NAN_VALUE_SUBSTITUTE, 'Actual Start Date': NAN_VALUE_SUBSTITUTE, 'Projected End Date': NAN_VALUE_SUBSTITUTE,\n", + " 'Actual End Date': NAN_VALUE_SUBSTITUTE, 'Outcome': NAN_VALUE_SUBSTITUTE, 'ATP Cohort': NAN_VALUE_SUBSTITUTE\n", + " }\n", + "\n", + " for column, substitute_value in columns_to_fix.items():\n", + " result[column] = result[column].fillna(substitute_value)\n", + "\n", + " # Added the tests inside the cleaning function because it cannot be on a separeted folder structure until testing methods are define\n", + " Test_enrollments_cleaning(result)\n", + "\n", + " return result\n", + "\n", + "def Test_enrollments_cleaning(clean_df: pd.DataFrame):\n", + " # Parameter can be change to an in-function call of the data cleanner with the DF\n", + " assert ~clean_df.isna().any().any(), 'The Dataframe has na values.'" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "15b7c4ec", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Auto Id", + "rawType": "object", + "type": "string" + }, + { + "name": "KY Region", + "rawType": "object", + "type": "string" + }, + { + "name": "Assessment ID", + "rawType": "object", + "type": "string" + }, + { + "name": "EnrollmentId", + "rawType": "object", + "type": "string" + }, + { + "name": "Enrollment Service Name", + "rawType": "object", + "type": "string" + }, + { + "name": "Service", + "rawType": "object", + "type": "string" + }, + { + "name": "Projected Start Date", + "rawType": "object", + "type": "unknown" + }, + { + "name": "Actual Start Date", + "rawType": "object", + "type": "unknown" + }, + { + "name": "Projected End Date", + "rawType": "object", + "type": "unknown" + }, + { + "name": "Actual End Date", + "rawType": "object", + "type": "unknown" + }, + { + "name": "Outcome", + "rawType": "object", + "type": "string" + }, + { + "name": "ATP Cohort", + "rawType": "object", + "type": "unknown" + } + ], + "ref": "fe7f52b6-8b54-40db-a406-410d2e28f45d", + "rows": [ + [ + "0", + "202109-5224", + "SOAR", + "OA-003348", + "Enrollment-1386", + "ES-0011193", + "Career Readiness Workshop", + "2021-11-11 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA" + ], + [ + "1", + "202109-5224", + "SOAR", + "OA-003348", + "Enrollment-1386", + "ES-0013492", + "Software Development 1", + "2022-01-05 00:00:00", + "2022-01-05 00:00:00", + "2022-04-06 00:00:00", + "2022-04-06 00:00:00", + "Successfully Completed", + "2022-01-01 00:00:00" + ], + [ + "2", + "202109-5224", + "SOAR", + "OA-003348", + "Enrollment-1386", + "ES-0014187", + "Career Readiness Workshop", + "2022-03-07 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA" + ], + [ + "3", + "202109-5224", + "SOAR", + "OA-003348", + "Enrollment-1386", + "ES-0015022", + "Software Development 2", + "2022-05-04 00:00:00", + "2022-05-04 00:00:00", + "2022-07-29 00:00:00", + "2022-07-29 00:00:00", + "Successfully Completed", + "2022-05-01 00:00:00" + ], + [ + "4", + "202109-5224", + "SOAR", + "OA-003348", + "Enrollment-1386", + "ES-0015075", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-12-14 00:00:00", + "Successfully Completed", + "2021-09-01 00:00:00" + ], + [ + "5", + "202109-5230", + "SOAR", + "OA-003352", + "Enrollment-1389", + "ES-0015087", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-09-28 00:00:00", + "Did Not Complete", + "2021-09-01 00:00:00" + ], + [ + "6", + "202109-5233", + "SOAR", + "OA-003355", + "Enrollment-1392", + "ES-0011185", + "Career Readiness Workshop", + "2021-11-11 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA" + ], + [ + "7", + "202109-5233", + "SOAR", + "OA-003355", + "Enrollment-1392", + "ES-0013525", + "Web Development 2", + "2022-01-05 00:00:00", + "2022-01-05 00:00:00", + "2022-04-06 00:00:00", + "2022-04-06 00:00:00", + "Successfully Completed", + "2022-01-01 00:00:00" + ], + [ + "8", + "202109-5233", + "SOAR", + "OA-003355", + "Enrollment-1392", + "ES-0015081", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-12-14 00:00:00", + "Successfully Completed", + "2021-09-01 00:00:00" + ], + [ + "9", + "202109-5236", + "SOAR", + "OA-003358", + "Enrollment-1395", + "ES-0015092", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-12-01 00:00:00", + "Did Not Complete", + "2021-09-01 00:00:00" + ], + [ + "10", + "202109-5237", + "SOAR", + "OA-003359", + "Enrollment-1396", + "ES-0011212", + "One-on-one Job Readiness", + "2021-11-23 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA" + ], + [ + "11", + "202109-5237", + "SOAR", + "OA-003359", + "Enrollment-1396", + "ES-0015086", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-12-01 00:00:00", + "Did Not Complete", + "2021-09-01 00:00:00" + ], + [ + "12", + "202109-5237", + "SOAR", + "OA-003359", + "Enrollment-1396", + "ES-0017175", + "Data Analysis 1", + "2023-01-04 00:00:00", + "NA", + "NA", + "2023-03-31 00:00:00", + "Successfully Completed", + "2023-01-01 00:00:00" + ], + [ + "13", + "202109-5237", + "SOAR", + "OA-003359", + "Enrollment-1396", + "ES-0017537", + "Career Readiness Workshop", + "2023-02-16 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA" + ], + [ + "14", + "202109-5237", + "SOAR", + "OA-003359", + "Enrollment-1396", + "ES-0018722", + "Data Analytics 2", + "2023-05-09 00:00:00", + "NA", + "NA", + "2023-08-07 00:00:00", + "Successfully Completed", + "2023-05-01 00:00:00" + ], + [ + "15", + "202109-5237", + "SOAR", + "OA-003359", + "Enrollment-1396", + "ES-0023302", + "One-on-one Job Readiness", + "2024-06-12 00:00:00", + "2024-06-12 00:00:00", + "2024-06-12 00:00:00", + "2024-06-12 00:00:00", + "NA", + "NA" + ], + [ + "16", + "202109-5237", + "SOAR", + "OA-003359", + "Enrollment-1396", + "ES-0023396", + "One-on-one Job Readiness", + "2024-05-23 00:00:00", + "NA", + "2024-05-23 00:00:00", + "NA", + "NA", + "NA" + ], + [ + "17", + "202109-5238", + "SOAR", + "OA-003360", + "Enrollment-1398", + "ES-0015085", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-09 00:00:00", + "2021-12-09 00:00:00", + "Did Not Complete", + "2021-09-01 00:00:00" + ], + [ + "18", + "202109-5239", + "SOAR", + "OA-003363", + "Enrollment-1401", + "ES-0017159", + "Web Development 1", + "2021-09-08 00:00:00", + "NA", + "NA", + "2021-09-23 00:00:00", + "Did Not Complete", + "2021-09-01 00:00:00" + ], + [ + "19", + "202109-5243", + "SOAR", + "OA-003366", + "Enrollment-1404", + "ES-0011221", + "One-on-one Job Readiness", + "2021-12-03 00:00:00", + "2021-12-03 00:00:00", + "NA", + "NA", + "NA", + "NA" + ], + [ + "20", + "202109-5243", + "SOAR", + "OA-003366", + "Enrollment-1404", + "ES-0013503", + "Software Development 1", + "2022-01-05 00:00:00", + "2022-01-05 00:00:00", + "2022-04-06 00:00:00", + "2022-04-06 00:00:00", + "Successfully Completed", + "2022-01-01 00:00:00" + ], + [ + "21", + "202109-5243", + "SOAR", + "OA-003366", + "Enrollment-1404", + "ES-0014300", + "One-on-one Job Readiness", + "2022-03-29 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA" + ], + [ + "22", + "202109-5243", + "SOAR", + "OA-003366", + "Enrollment-1404", + "ES-0015052", + "Software Development 2", + "2022-05-04 00:00:00", + "2022-05-04 00:00:00", + "2022-07-29 00:00:00", + "2022-07-29 00:00:00", + "Successfully Completed", + "2022-05-01 00:00:00" + ], + [ + "23", + "202109-5243", + "SOAR", + "OA-003366", + "Enrollment-1404", + "ES-0015076", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-12-14 00:00:00", + "Successfully Completed", + "2021-09-01 00:00:00" + ], + [ + "24", + "202109-5243", + "SOAR", + "OA-003366", + "Enrollment-1404", + "ES-0017237", + "Data Analysis 1", + "2022-08-24 00:00:00", + "NA", + "NA", + "2022-11-18 00:00:00", + "Successfully Completed", + "2022-09-01 00:00:00" + ], + [ + "25", + "202109-5243", + "SOAR", + "OA-003366", + "Enrollment-1404", + "ES-0017238", + "Data Analytics 2", + "2023-01-04 00:00:00", + "NA", + "NA", + "2023-03-31 00:00:00", + "Successfully Completed", + "2023-01-01 00:00:00" + ], + [ + "26", + "202109-5243", + "SOAR", + "OA-003366", + "Enrollment-1404", + "ES-0018733", + "Web Development 2", + "2023-05-11 00:00:00", + "NA", + "NA", + "2023-08-07 00:00:00", + "Successfully Completed", + "2023-05-01 00:00:00" + ], + [ + "27", + "202109-5245", + "SOAR", + "OA-003367", + "Enrollment-1405", + "ES-0011174", + "Career Readiness Workshop", + "2021-11-08 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA" + ], + [ + "28", + "202109-5245", + "SOAR", + "OA-003367", + "Enrollment-1405", + "ES-0013507", + "Software Development 1", + "2022-01-05 00:00:00", + "2022-01-05 00:00:00", + "2022-05-06 00:00:00", + "2022-05-06 00:00:00", + "Successfully Completed", + "2022-01-01 00:00:00" + ], + [ + "29", + "202109-5245", + "SOAR", + "OA-003367", + "Enrollment-1405", + "ES-0014268", + "One-on-one Job Readiness", + "2022-03-22 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA" + ], + [ + "30", + "202109-5245", + "SOAR", + "OA-003367", + "Enrollment-1405", + "ES-0015065", + "Web Development 2", + "2022-05-04 00:00:00", + "2022-05-04 00:00:00", + "2022-07-29 00:00:00", + "2022-07-29 00:00:00", + "Did Not Complete", + "2022-05-01 00:00:00" + ], + [ + "31", + "202109-5245", + "SOAR", + "OA-003367", + "Enrollment-1405", + "ES-0015080", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-12-14 00:00:00", + "Successfully Completed", + "2021-09-01 00:00:00" + ], + [ + "32", + "202109-5245", + "SOAR", + "OA-003367", + "Enrollment-1405", + "ES-0023385", + "One-on-one Job Readiness", + "2024-01-29 00:00:00", + "NA", + "2024-01-29 00:00:00", + "NA", + "NA", + "NA" + ], + [ + "33", + "202109-5272", + "SOAR", + "OA-003395", + "Enrollment-1434", + "ES-0011175", + "Career Readiness Workshop", + "2021-11-08 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA" + ], + [ + "34", + "202109-5272", + "SOAR", + "OA-003395", + "Enrollment-1434", + "ES-0013526", + "Web Development 2", + "2022-01-05 00:00:00", + "2022-01-05 00:00:00", + "2022-04-06 00:00:00", + "2022-04-06 00:00:00", + "Successfully Completed", + "2022-01-01 00:00:00" + ], + [ + "35", + "202109-5272", + "SOAR", + "OA-003395", + "Enrollment-1434", + "ES-0015078", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-12-14 00:00:00", + "Successfully Completed", + "2021-09-01 00:00:00" + ], + [ + "36", + "202109-5272", + "SOAR", + "OA-003395", + "Enrollment-1434", + "ES-0017252", + "Software Development 1", + "2022-08-24 00:00:00", + "NA", + "NA", + "2022-10-03 00:00:00", + "Did Not Complete", + "2022-09-01 00:00:00" + ], + [ + "37", + "202109-5273", + "SOAR", + "OA-003396", + "Enrollment-1435", + "ES-0015089", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-10-27 00:00:00", + "Did Not Complete", + "2021-09-01 00:00:00" + ], + [ + "38", + "202109-5274", + "SOAR", + "OA-003397", + "Enrollment-1608", + "ES-0011204", + "Career Readiness Workshop", + "2021-11-18 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA" + ], + [ + "39", + "202109-5274", + "SOAR", + "OA-003397", + "Enrollment-1608", + "ES-0013483", + "Data Analysis 1", + "2022-01-05 00:00:00", + "2022-01-05 00:00:00", + "2022-04-06 00:00:00", + "2022-04-06 00:00:00", + "Successfully Completed", + "2022-01-01 00:00:00" + ], + [ + "40", + "202109-5274", + "SOAR", + "OA-003397", + "Enrollment-1608", + "ES-0014299", + "One-on-one Job Readiness", + "2022-03-29 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA" + ], + [ + "41", + "202109-5274", + "SOAR", + "OA-003397", + "Enrollment-1608", + "ES-0015047", + "Data Analytics 2", + "2022-05-04 00:00:00", + "2022-05-04 00:00:00", + "2022-07-29 00:00:00", + "2022-06-08 00:00:00", + "Did Not Complete", + "2022-05-01 00:00:00" + ], + [ + "42", + "202109-5274", + "SOAR", + "OA-003397", + "Enrollment-1608", + "ES-0015084", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-12-14 00:00:00", + "Successfully Completed", + "2021-09-01 00:00:00" + ], + [ + "43", + "202109-5275", + "SOAR", + "OA-003398", + "Enrollment-1609", + "ES-0015088", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-10-11 00:00:00", + "Did Not Complete", + "2021-09-01 00:00:00" + ], + [ + "44", + "202109-5276", + "SOAR", + "OA-003399", + "Enrollment-1611", + "ES-0015091", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-10-28 00:00:00", + "Did Not Complete", + "2021-09-01 00:00:00" + ], + [ + "45", + "202109-5277", + "SOAR", + "OA-003400", + "Enrollment-4425", + "ES-0011213", + "One-on-one Job Readiness", + "2021-11-23 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA" + ], + [ + "46", + "202109-5277", + "SOAR", + "OA-003400", + "Enrollment-4425", + "ES-0013995", + "Software Development 1", + "2022-01-05 00:00:00", + "2022-01-05 00:00:00", + "2022-04-06 00:00:00", + "2022-07-29 00:00:00", + "Successfully Completed", + "2022-01-01 00:00:00" + ], + [ + "47", + "202109-5277", + "SOAR", + "OA-003400", + "Enrollment-4425", + "ES-0014191", + "Career Readiness Workshop", + "2022-03-07 00:00:00", + "NA", + "NA", + "NA", + "NA", + "NA" + ], + [ + "48", + "202109-5277", + "SOAR", + "OA-003400", + "Enrollment-4425", + "ES-0015050", + "Software Development 2", + "2022-05-04 00:00:00", + "2022-05-04 00:00:00", + "2022-07-29 00:00:00", + "2022-07-29 00:00:00", + "Successfully Completed", + "2022-05-01 00:00:00" + ], + [ + "49", + "202109-5277", + "SOAR", + "OA-003400", + "Enrollment-4425", + "ES-0015074", + "Web Development 1", + "2021-09-08 00:00:00", + "2021-09-08 00:00:00", + "2021-12-14 00:00:00", + "2021-12-14 00:00:00", + "Successfully Completed", + "2021-09-01 00:00:00" + ] + ], + "shape": { + "columns": 12, + "rows": 2033 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Auto IdKY RegionAssessment IDEnrollmentIdEnrollment Service NameServiceProjected Start DateActual Start DateProjected End DateActual End DateOutcomeATP Cohort
0202109-5224SOAROA-003348Enrollment-1386ES-0011193Career Readiness Workshop2021-11-11 00:00:00NANANANANA
1202109-5224SOAROA-003348Enrollment-1386ES-0013492Software Development 12022-01-05 00:00:002022-01-05 00:00:002022-04-06 00:00:002022-04-06 00:00:00Successfully Completed2022-01-01 00:00:00
2202109-5224SOAROA-003348Enrollment-1386ES-0014187Career Readiness Workshop2022-03-07 00:00:00NANANANANA
3202109-5224SOAROA-003348Enrollment-1386ES-0015022Software Development 22022-05-04 00:00:002022-05-04 00:00:002022-07-29 00:00:002022-07-29 00:00:00Successfully Completed2022-05-01 00:00:00
4202109-5224SOAROA-003348Enrollment-1386ES-0015075Web Development 12021-09-08 00:00:002021-09-08 00:00:002021-12-14 00:00:002021-12-14 00:00:00Successfully Completed2021-09-01 00:00:00
.......................................
2028202504-21723SOAROA-022760Enrollment-14196ES-0035149Intro to Programming Core2025-05-12 00:00:002025-05-12 00:00:002025-06-27 00:00:00NANA2025-05-01 00:00:00
2029202505-22788SOAROA-023710Enrollment-14213ES-0035212Intro to Programming Core2025-05-14 00:00:002025-05-13 00:00:002025-06-17 00:00:00NANA2025-05-01 00:00:00
2030202408-16568SOAROA-017961Enrollment-14833ES-0036429Intro to Programming Core2025-05-12 00:00:00NANANADid Not Complete2025-05-01 00:00:00
2031202408-16568SOAROA-017961Enrollment-14833ES-0036430Supportive Services ReferralNANANANANANA
2032202503-21420SOAROA-022426Enrollment-15195ES-0038953Intro to Programming Core2025-05-12 00:00:00NA2025-06-27 00:00:00NANA2025-05-01 00:00:00
\n", + "

2033 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " Auto Id KY Region Assessment ID EnrollmentId \\\n", + "0 202109-5224 SOAR OA-003348 Enrollment-1386 \n", + "1 202109-5224 SOAR OA-003348 Enrollment-1386 \n", + "2 202109-5224 SOAR OA-003348 Enrollment-1386 \n", + "3 202109-5224 SOAR OA-003348 Enrollment-1386 \n", + "4 202109-5224 SOAR OA-003348 Enrollment-1386 \n", + "... ... ... ... ... \n", + "2028 202504-21723 SOAR OA-022760 Enrollment-14196 \n", + "2029 202505-22788 SOAR OA-023710 Enrollment-14213 \n", + "2030 202408-16568 SOAR OA-017961 Enrollment-14833 \n", + "2031 202408-16568 SOAR OA-017961 Enrollment-14833 \n", + "2032 202503-21420 SOAR OA-022426 Enrollment-15195 \n", + "\n", + " Enrollment Service Name Service \\\n", + "0 ES-0011193 Career Readiness Workshop \n", + "1 ES-0013492 Software Development 1 \n", + "2 ES-0014187 Career Readiness Workshop \n", + "3 ES-0015022 Software Development 2 \n", + "4 ES-0015075 Web Development 1 \n", + "... ... ... \n", + "2028 ES-0035149 Intro to Programming Core \n", + "2029 ES-0035212 Intro to Programming Core \n", + "2030 ES-0036429 Intro to Programming Core \n", + "2031 ES-0036430 Supportive Services Referral \n", + "2032 ES-0038953 Intro to Programming Core \n", + "\n", + " Projected Start Date Actual Start Date Projected End Date \\\n", + "0 2021-11-11 00:00:00 NA NA \n", + "1 2022-01-05 00:00:00 2022-01-05 00:00:00 2022-04-06 00:00:00 \n", + "2 2022-03-07 00:00:00 NA NA \n", + "3 2022-05-04 00:00:00 2022-05-04 00:00:00 2022-07-29 00:00:00 \n", + "4 2021-09-08 00:00:00 2021-09-08 00:00:00 2021-12-14 00:00:00 \n", + "... ... ... ... \n", + "2028 2025-05-12 00:00:00 2025-05-12 00:00:00 2025-06-27 00:00:00 \n", + "2029 2025-05-14 00:00:00 2025-05-13 00:00:00 2025-06-17 00:00:00 \n", + "2030 2025-05-12 00:00:00 NA NA \n", + "2031 NA NA NA \n", + "2032 2025-05-12 00:00:00 NA 2025-06-27 00:00:00 \n", + "\n", + " Actual End Date Outcome ATP Cohort \n", + "0 NA NA NA \n", + "1 2022-04-06 00:00:00 Successfully Completed 2022-01-01 00:00:00 \n", + "2 NA NA NA \n", + "3 2022-07-29 00:00:00 Successfully Completed 2022-05-01 00:00:00 \n", + "4 2021-12-14 00:00:00 Successfully Completed 2021-09-01 00:00:00 \n", + "... ... ... ... \n", + "2028 NA NA 2025-05-01 00:00:00 \n", + "2029 NA NA 2025-05-01 00:00:00 \n", + "2030 NA Did Not Complete 2025-05-01 00:00:00 \n", + "2031 NA NA NA \n", + "2032 NA NA 2025-05-01 00:00:00 \n", + "\n", + "[2033 rows x 12 columns]" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "enrollments = pd.read_excel('Data\\\\Raw\\\\ARC Enrollments.xlsx')\n", + "enrollments = Enrollments_cleaning(enrollments)\n", + "enrollments" + ] + }, + { + "cell_type": "markdown", + "id": "8c9d2634", + "metadata": {}, + "source": [ + "### 1.1 Most common path by period\n", + "- Periods are going to be define by ATP Cohort" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0773f9b7", + "metadata": {}, + "outputs": [], + "source": [ + "# Starter pathways are the only path that have to be taken into consideration for each period student pathway choosing\n", + "Starter_pathways = [\n", + " 'Software Development 1', \n", + " 'Web Development 1', \n", + " 'Data Analysis 1',\n", + " 'Web Development M1',\n", + " 'Data Analysis M1', \n", + " 'Software Development M1',\n", + " 'Quality Assurance M1', \n", + " 'User Experience M1',\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "c4902e7d", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "Service", + "rawType": "object", + "type": "string" + }, + { + "name": "count", + "rawType": "int64", + "type": "integer" + } + ], + "ref": "9f4d7ec5-8d5e-4982-82fa-982ad126f08b", + "rows": [ + [ + "Career Readiness Workshop", + "224" + ], + [ + "One-on-one Job Readiness", + "87" + ], + [ + "Remote Jobs Workshop (EDA Grant)", + "30" + ], + [ + "Referral to External Service", + "20" + ], + [ + "Supportive Services Referral", + "20" + ], + [ + "Tech Communications Workshop", + "20" + ], + [ + "Employer Event (Code:You)", + "14" + ], + [ + "Demo Day Participant", + "12" + ], + [ + "Remote Jobs Workshop (non EDA)", + "8" + ], + [ + "Technical Project Management", + "6" + ], + [ + "Resume Review and Optimization", + "6" + ], + [ + "Interview Guidance and Practice", + "5" + ], + [ + "Data Analysis M4", + "2" + ], + [ + "Web Development M4", + "2" + ], + [ + "Revised Resume Review", + "2" + ], + [ + "Software Development M4", + "1" + ], + [ + "Web Development 1", + "1" + ] + ], + "shape": { + "columns": 1, + "rows": 17 + } + }, + "text/plain": [ + "Service\n", + "Career Readiness Workshop 224\n", + "One-on-one Job Readiness 87\n", + "Remote Jobs Workshop (EDA Grant) 30\n", + "Referral to External Service 20\n", + "Supportive Services Referral 20\n", + "Tech Communications Workshop 20\n", + "Employer Event (Code:You) 14\n", + "Demo Day Participant 12\n", + "Remote Jobs Workshop (non EDA) 8\n", + "Technical Project Management 6\n", + "Resume Review and Optimization 6\n", + "Interview Guidance and Practice 5\n", + "Data Analysis M4 2\n", + "Web Development M4 2\n", + "Revised Resume Review 2\n", + "Software Development M4 1\n", + "Web Development 1 1\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# What are the NA values in 'ATP Cohort'?\n", + "enrollments[enrollments['ATP Cohort'] == 'NA'].value_counts('Service')\n", + "\n", + "# looks like pathways are not represented in ATP Cohort NA Values (Probably has more to do with support entries)\n", + "# only 6 of the 460 ATP cohort 'NA' values are pathways" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "ba47a304", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Timestamp('2022-01-01 00:00:00'),\n", + " Timestamp('2022-05-01 00:00:00'),\n", + " Timestamp('2021-09-01 00:00:00'),\n", + " Timestamp('2023-01-01 00:00:00'),\n", + " Timestamp('2023-05-01 00:00:00'),\n", + " Timestamp('2022-09-01 00:00:00'),\n", + " Timestamp('2024-05-01 00:00:00'),\n", + " Timestamp('2024-08-01 00:00:00'),\n", + " Timestamp('2023-08-01 00:00:00'),\n", + " Timestamp('2024-01-01 00:00:00'),\n", + " Timestamp('2025-01-01 00:00:00'),\n", + " Timestamp('2025-05-01 00:00:00')]" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cohorts = list(enrollments['ATP Cohort'].unique())\n", + "cohorts.remove('NA')\n", + "cohorts\n" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "beb37448", + "metadata": {}, + "outputs": [], + "source": [ + "mask_starter_pathways = enrollments['Service'].isin(Starter_pathways)\n", + "starter_only_enrollments = enrollments[mask_starter_pathways]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5436f5e6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "app = Dash(__name__)\n", + "# Const\n", + "options = cohorts\n", + "pathway_color = {\n", + " 'Software Development 1': 'green', \n", + " 'Web Development 1': 'blue', \n", + " 'Data Analysis 1': 'red',\n", + " 'Web Development M1': 'blue',\n", + " 'Data Analysis M1': 'red', \n", + " 'Software Development M1': 'green',\n", + " 'Quality Assurance M1': 'yellow', \n", + " 'User Experience M1': 'purple'\n", + "}\n", + "\n", + "# Display\n", + "app.layout = html.Div([\n", + " html.H2('Cohorts', style={'text-align': \"center\"}),\n", + " html.P('Select Cohort:'),\n", + " dcc.Dropdown(\n", + " id=\"dropdown\",\n", + " options=options,\n", + " value=options[0],\n", + " clearable=False,\n", + " ),\n", + " dcc.Graph(id=\"graph\")\n", + " \n", + "], style={'backgroundColor':'white'})\n", + "\n", + "@app.callback(\n", + " Output(\"graph\", \"figure\"),\n", + " Input(\"dropdown\", \"value\"))\n", + "\n", + "# Graph\n", + "def tt(time):\n", + " df = starter_only_enrollments[starter_only_enrollments['ATP Cohort'] == pd.Timestamp(time)].value_counts('Service').reset_index()\n", + " df['color'] = df['Service'].map(pathway_color)\n", + " fig = px.pie(df, names='Service', values='count', color='Service', color_discrete_map=pathway_color)\n", + " return fig\n", + "\n", + "app.run(debug=True)\n", + "\n", + "# TODO: Add number of students per each cohort \n", + "# TODO: Fix the options on the selection \n", + "# TODO: make colors better" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 6e6433b112094befcc1573c6b3276a7a60eed0ef Mon Sep 17 00:00:00 2001 From: Euclides Date: Fri, 11 Jul 2025 10:40:53 -0400 Subject: [PATCH 5/9] Added all cohorts category to pie chart --- historical_student_data_dashboards.ipynb | 40 ++++++++++++++---------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/historical_student_data_dashboards.ipynb b/historical_student_data_dashboards.ipynb index e158be7..eb8d69f 100644 --- a/historical_student_data_dashboards.ipynb +++ b/historical_student_data_dashboards.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 12, "id": "ec855ef5", "metadata": {}, "outputs": [], @@ -33,7 +33,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 13, "id": "bc04972b", "metadata": {}, "outputs": [], @@ -66,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 14, "id": "15b7c4ec", "metadata": {}, "outputs": [ @@ -140,7 +140,7 @@ "type": "unknown" } ], - "ref": "fe7f52b6-8b54-40db-a406-410d2e28f45d", + "ref": "3bf69254-35b4-4a26-bf81-50d0f0877e5e", "rows": [ [ "0", @@ -1158,7 +1158,7 @@ "[2033 rows x 12 columns]" ] }, - "execution_count": 32, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -1180,7 +1180,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "0773f9b7", "metadata": {}, "outputs": [], @@ -1200,7 +1200,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 16, "id": "c4902e7d", "metadata": {}, "outputs": [ @@ -1219,7 +1219,7 @@ "type": "integer" } ], - "ref": "9f4d7ec5-8d5e-4982-82fa-982ad126f08b", + "ref": "d00bd9eb-31ee-44bd-b8dc-5c724e292ede", "rows": [ [ "Career Readiness Workshop", @@ -1317,7 +1317,7 @@ "Name: count, dtype: int64" ] }, - "execution_count": 34, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1332,14 +1332,15 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 22, "id": "ba47a304", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[Timestamp('2022-01-01 00:00:00'),\n", + "['All cohorts',\n", + " Timestamp('2022-01-01 00:00:00'),\n", " Timestamp('2022-05-01 00:00:00'),\n", " Timestamp('2021-09-01 00:00:00'),\n", " Timestamp('2023-01-01 00:00:00'),\n", @@ -1353,7 +1354,7 @@ " Timestamp('2025-05-01 00:00:00')]" ] }, - "execution_count": 35, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -1361,12 +1362,13 @@ "source": [ "cohorts = list(enrollments['ATP Cohort'].unique())\n", "cohorts.remove('NA')\n", - "cohorts\n" + "cohorts.insert(0, 'All cohorts')\n", + "cohorts" ] }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 18, "id": "beb37448", "metadata": {}, "outputs": [], @@ -1377,7 +1379,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "5436f5e6", "metadata": {}, "outputs": [ @@ -1396,7 +1398,7 @@ " " ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1438,7 +1440,11 @@ "\n", "# Graph\n", "def tt(time):\n", - " df = starter_only_enrollments[starter_only_enrollments['ATP Cohort'] == pd.Timestamp(time)].value_counts('Service').reset_index()\n", + " if time == 'All cohorts':\n", + " df = starter_only_enrollments.value_counts('Service').reset_index()\n", + " else:\n", + " df = starter_only_enrollments[starter_only_enrollments['ATP Cohort'] == pd.Timestamp(time)].value_counts('Service').reset_index()\n", + "\n", " df['color'] = df['Service'].map(pathway_color)\n", " fig = px.pie(df, names='Service', values='count', color='Service', color_discrete_map=pathway_color)\n", " return fig\n", From e6a2b6312afa582ac782476e3600ada5ff433c61 Mon Sep 17 00:00:00 2001 From: Euclides Date: Fri, 11 Jul 2025 14:50:59 -0400 Subject: [PATCH 6/9] Started developing for completion percentages --- historical_student_data_dashboards.ipynb | 1676 +++++++++++++++++++++- 1 file changed, 1660 insertions(+), 16 deletions(-) diff --git a/historical_student_data_dashboards.ipynb b/historical_student_data_dashboards.ipynb index eb8d69f..e1633d0 100644 --- a/historical_student_data_dashboards.ipynb +++ b/historical_student_data_dashboards.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 86, "id": "ec855ef5", "metadata": {}, "outputs": [], @@ -33,7 +33,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 87, "id": "bc04972b", "metadata": {}, "outputs": [], @@ -66,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 88, "id": "15b7c4ec", "metadata": {}, "outputs": [ @@ -140,7 +140,7 @@ "type": "unknown" } ], - "ref": "3bf69254-35b4-4a26-bf81-50d0f0877e5e", + "ref": "fe8d8e5e-67bd-4ba2-8468-2d78bc22e701", "rows": [ [ "0", @@ -1158,7 +1158,7 @@ "[2033 rows x 12 columns]" ] }, - "execution_count": 14, + "execution_count": 88, "metadata": {}, "output_type": "execute_result" } @@ -1180,12 +1180,13 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 89, "id": "0773f9b7", "metadata": {}, "outputs": [], "source": [ "# Starter pathways are the only path that have to be taken into consideration for each period student pathway choosing\n", + "# TODO: maybe make them generate automatically \n", "Starter_pathways = [\n", " 'Software Development 1', \n", " 'Web Development 1', \n", @@ -1200,7 +1201,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 90, "id": "c4902e7d", "metadata": {}, "outputs": [ @@ -1219,7 +1220,7 @@ "type": "integer" } ], - "ref": "d00bd9eb-31ee-44bd-b8dc-5c724e292ede", + "ref": "4b783bd9-3580-4316-b4ef-e79e0f73a9b2", "rows": [ [ "Career Readiness Workshop", @@ -1317,7 +1318,7 @@ "Name: count, dtype: int64" ] }, - "execution_count": 16, + "execution_count": 90, "metadata": {}, "output_type": "execute_result" } @@ -1332,7 +1333,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 91, "id": "ba47a304", "metadata": {}, "outputs": [ @@ -1354,7 +1355,7 @@ " Timestamp('2025-05-01 00:00:00')]" ] }, - "execution_count": 22, + "execution_count": 91, "metadata": {}, "output_type": "execute_result" } @@ -1368,7 +1369,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 92, "id": "beb37448", "metadata": {}, "outputs": [], @@ -1379,7 +1380,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 93, "id": "5436f5e6", "metadata": {}, "outputs": [ @@ -1390,7 +1391,7 @@ " " + "" ] }, "metadata": {}, @@ -1440,6 +1441,7 @@ "\n", "# Graph\n", "def tt(time):\n", + " # TODO: Separate the \n", " if time == 'All cohorts':\n", " df = starter_only_enrollments.value_counts('Service').reset_index()\n", " else:\n", @@ -1449,12 +1451,1654 @@ " fig = px.pie(df, names='Service', values='count', color='Service', color_discrete_map=pathway_color)\n", " return fig\n", "\n", - "app.run(debug=True)\n", + "app.run(debug=True, port=8052)\n", "\n", "# TODO: Add number of students per each cohort \n", "# TODO: Fix the options on the selection \n", "# TODO: make colors better" ] + }, + { + "cell_type": "markdown", + "id": "16f9f9a6", + "metadata": {}, + "source": [ + "### 2. Completion rates" + ] + }, + { + "cell_type": "markdown", + "id": "af3ada51", + "metadata": {}, + "source": [ + "### 2.1 completion rate by path" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "id": "1504a061", + "metadata": {}, + "outputs": [], + "source": [ + "pathways = [\n", + " 'Software Development 1', \n", + " 'Software Development 2', \n", + " 'Web Development 1', \n", + " 'Web Development 2', \n", + " 'Data Analysis 1',\n", + " 'Data Analytics 2',\n", + " 'Web Development M1',\n", + " 'Web Development M2',\n", + " 'Web Development M3',\n", + " 'Web Development M4',\n", + " 'Data Analysis M1', \n", + " 'Data Analysis M2',\n", + " 'Data Analysis M3',\n", + " 'Data Analysis M4', \n", + " 'Software Development M1',\n", + " 'Software Development M2',\n", + " 'Software Development M3',\n", + " 'Software Development M4',\n", + " 'Quality Assurance M1', \n", + " 'Quality Assurance M2',\n", + " 'Quality Assurance M3', \n", + " 'Quality Assurance M4', \n", + " 'User Experience M1', \n", + " 'User Experience M2',\n", + " 'User Experience M3', \n", + " 'User Experience M4',\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "id": "930b4918", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Module", + "rawType": "object", + "type": "string" + }, + { + "name": "Did Not Complete", + "rawType": "float64", + "type": "float" + }, + { + "name": "Successfully Completed", + "rawType": "float64", + "type": "float" + }, + { + "name": "Partially Completed", + "rawType": "float64", + "type": "float" + }, + { + "name": "NA", + "rawType": "float64", + "type": "float" + }, + { + "name": "Pathway", + "rawType": "object", + "type": "string" + } + ], + "ref": "57c0c2ac-16b7-4b73-9fac-5eda16cda1fd", + "rows": [ + [ + "0", + "Software Development 1", + "0.5694444444444444", + "0.4305555555555556", + "0.0", + "0.0", + "Software Development" + ], + [ + "1", + "Software Development 2", + "0.47058823529411764", + "0.5294117647058824", + "0.0", + "0.0", + "Software Development" + ], + [ + "2", + "Web Development 1", + "0.5277777777777778", + "0.4722222222222222", + "0.0", + "0.0", + "Web Development" + ], + [ + "3", + "Web Development 2", + "0.4666666666666667", + "0.5333333333333333", + "0.0", + "0.0", + "Web Development" + ], + [ + "4", + "Data Analysis 1", + "0.6666666666666666", + "0.3333333333333333", + "0.0", + "0.0", + "Data Analysis" + ], + [ + "5", + "Data Analytics 2", + "0.35294117647058826", + "0.6470588235294118", + "0.0", + "0.0", + "Data Analytics" + ], + [ + "6", + "Web Development M1", + "0.15625", + "0.7916666666666666", + "0.052083333333333336", + "0.0", + "Web Development" + ], + [ + "7", + "Web Development M2", + "0.15384615384615385", + "0.8076923076923077", + "0.038461538461538464", + "0.0", + "Web Development" + ], + [ + "8", + "Web Development M3", + "0.09375", + "0.59375", + "0.0", + "0.3125", + "Web Development" + ], + [ + "9", + "Web Development M4", + "0.325", + "0.675", + "0.0", + "0.0", + "Web Development" + ], + [ + "10", + "Data Analysis M1", + "0.1574074074074074", + "0.7777777777777778", + "0.06481481481481481", + "0.0", + "Data Analysis" + ], + [ + "11", + "Data Analysis M2", + "0.27906976744186046", + "0.6046511627906976", + "0.09302325581395349", + "0.023255813953488372", + "Data Analysis" + ], + [ + "12", + "Data Analysis M3", + "0.07692307692307693", + "0.6346153846153846", + "0.038461538461538464", + "0.25", + "Data Analysis" + ], + [ + "13", + "Data Analysis M4", + "0.3939393939393939", + "0.5757575757575758", + "0.030303030303030304", + "0.0", + "Data Analysis" + ], + [ + "14", + "Software Development M1", + "0.18181818181818182", + "0.8", + "0.01818181818181818", + "0.0", + "Software Development" + ], + [ + "15", + "Software Development M2", + "0.20930232558139536", + "0.7441860465116279", + "0.046511627906976744", + "0.0", + "Software Development" + ], + [ + "16", + "Software Development M3", + "0.15151515151515152", + "0.7878787878787878", + "0.06060606060606061", + "0.0", + "Software Development" + ], + [ + "17", + "Software Development M4", + "0.4230769230769231", + "0.5769230769230769", + "0.0", + "0.0", + "Software Development" + ], + [ + "18", + "Quality Assurance M1", + "0.0", + "1.0", + "0.0", + "0.0", + "Quality Assurance" + ], + [ + "19", + "Quality Assurance M2", + "0.0", + "1.0", + "0.0", + "0.0", + "Quality Assurance" + ], + [ + "20", + "Quality Assurance M3", + "0.0", + "1.0", + "0.0", + "0.0", + "Quality Assurance" + ], + [ + "21", + "Quality Assurance M4", + "0.0", + "1.0", + "0.0", + "0.0", + "Quality Assurance" + ], + [ + "22", + "User Experience M1", + "0.5", + "0.5", + "0.0", + "0.0", + "User Experience" + ], + [ + "23", + "User Experience M2", + "0.0", + "1.0", + "0.0", + "0.0", + "User Experience" + ], + [ + "24", + "User Experience M3", + "0.0", + "1.0", + "0.0", + "0.0", + "User Experience" + ], + [ + "25", + "User Experience M4", + "0.0", + "1.0", + "0.0", + "0.0", + "User Experience" + ] + ], + "shape": { + "columns": 6, + "rows": 26 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ModuleDid Not CompleteSuccessfully CompletedPartially CompletedNAPathway
0Software Development 10.5694440.4305560.0000000.000000Software Development
1Software Development 20.4705880.5294120.0000000.000000Software Development
2Web Development 10.5277780.4722220.0000000.000000Web Development
3Web Development 20.4666670.5333330.0000000.000000Web Development
4Data Analysis 10.6666670.3333330.0000000.000000Data Analysis
5Data Analytics 20.3529410.6470590.0000000.000000Data Analytics
6Web Development M10.1562500.7916670.0520830.000000Web Development
7Web Development M20.1538460.8076920.0384620.000000Web Development
8Web Development M30.0937500.5937500.0000000.312500Web Development
9Web Development M40.3250000.6750000.0000000.000000Web Development
10Data Analysis M10.1574070.7777780.0648150.000000Data Analysis
11Data Analysis M20.2790700.6046510.0930230.023256Data Analysis
12Data Analysis M30.0769230.6346150.0384620.250000Data Analysis
13Data Analysis M40.3939390.5757580.0303030.000000Data Analysis
14Software Development M10.1818180.8000000.0181820.000000Software Development
15Software Development M20.2093020.7441860.0465120.000000Software Development
16Software Development M30.1515150.7878790.0606060.000000Software Development
17Software Development M40.4230770.5769230.0000000.000000Software Development
18Quality Assurance M10.0000001.0000000.0000000.000000Quality Assurance
19Quality Assurance M20.0000001.0000000.0000000.000000Quality Assurance
20Quality Assurance M30.0000001.0000000.0000000.000000Quality Assurance
21Quality Assurance M40.0000001.0000000.0000000.000000Quality Assurance
22User Experience M10.5000000.5000000.0000000.000000User Experience
23User Experience M20.0000001.0000000.0000000.000000User Experience
24User Experience M30.0000001.0000000.0000000.000000User Experience
25User Experience M40.0000001.0000000.0000000.000000User Experience
\n", + "
" + ], + "text/plain": [ + " Module Did Not Complete Successfully Completed \\\n", + "0 Software Development 1 0.569444 0.430556 \n", + "1 Software Development 2 0.470588 0.529412 \n", + "2 Web Development 1 0.527778 0.472222 \n", + "3 Web Development 2 0.466667 0.533333 \n", + "4 Data Analysis 1 0.666667 0.333333 \n", + "5 Data Analytics 2 0.352941 0.647059 \n", + "6 Web Development M1 0.156250 0.791667 \n", + "7 Web Development M2 0.153846 0.807692 \n", + "8 Web Development M3 0.093750 0.593750 \n", + "9 Web Development M4 0.325000 0.675000 \n", + "10 Data Analysis M1 0.157407 0.777778 \n", + "11 Data Analysis M2 0.279070 0.604651 \n", + "12 Data Analysis M3 0.076923 0.634615 \n", + "13 Data Analysis M4 0.393939 0.575758 \n", + "14 Software Development M1 0.181818 0.800000 \n", + "15 Software Development M2 0.209302 0.744186 \n", + "16 Software Development M3 0.151515 0.787879 \n", + "17 Software Development M4 0.423077 0.576923 \n", + "18 Quality Assurance M1 0.000000 1.000000 \n", + "19 Quality Assurance M2 0.000000 1.000000 \n", + "20 Quality Assurance M3 0.000000 1.000000 \n", + "21 Quality Assurance M4 0.000000 1.000000 \n", + "22 User Experience M1 0.500000 0.500000 \n", + "23 User Experience M2 0.000000 1.000000 \n", + "24 User Experience M3 0.000000 1.000000 \n", + "25 User Experience M4 0.000000 1.000000 \n", + "\n", + " Partially Completed NA Pathway \n", + "0 0.000000 0.000000 Software Development \n", + "1 0.000000 0.000000 Software Development \n", + "2 0.000000 0.000000 Web Development \n", + "3 0.000000 0.000000 Web Development \n", + "4 0.000000 0.000000 Data Analysis \n", + "5 0.000000 0.000000 Data Analytics \n", + "6 0.052083 0.000000 Web Development \n", + "7 0.038462 0.000000 Web Development \n", + "8 0.000000 0.312500 Web Development \n", + "9 0.000000 0.000000 Web Development \n", + "10 0.064815 0.000000 Data Analysis \n", + "11 0.093023 0.023256 Data Analysis \n", + "12 0.038462 0.250000 Data Analysis \n", + "13 0.030303 0.000000 Data Analysis \n", + "14 0.018182 0.000000 Software Development \n", + "15 0.046512 0.000000 Software Development \n", + "16 0.060606 0.000000 Software Development \n", + "17 0.000000 0.000000 Software Development \n", + "18 0.000000 0.000000 Quality Assurance \n", + "19 0.000000 0.000000 Quality Assurance \n", + "20 0.000000 0.000000 Quality Assurance \n", + "21 0.000000 0.000000 Quality Assurance \n", + "22 0.000000 0.000000 User Experience \n", + "23 0.000000 0.000000 User Experience \n", + "24 0.000000 0.000000 User Experience \n", + "25 0.000000 0.000000 User Experience " + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Not the best Pandas way to do it:\n", + "def Get_completion_percentages(df: pd.DataFrame, cohort: str = 'All') -> pd.DataFrame:\n", + " if cohort == 'All':\n", + " data = df\n", + " else:\n", + " data = df[df['ATP Cohort'] == pd.Timestamp(cohort)]\n", + "\n", + " completion_dictionary = {}\n", + "\n", + " for path in pathways:\n", + " outcome = df[df['Service'] == path]['Outcome'].value_counts(normalize=True).reset_index()\n", + " completion_dictionary[path] = {row.Outcome: row.proportion for row in outcome.itertuples(index=True)}\n", + " \n", + " result_df = pd.DataFrame(completion_dictionary).transpose().fillna(0).rename_axis('Module').reset_index()\n", + "\n", + " result_df['Pathway'] = result_df['Module'].apply(lambda x: x[:x.rfind(' ')]) # intended to be able to sort by pathway\n", + " return result_df\n", + "# TODO: Add test\n", + "\n", + "Get_completion_percentages(enrollments)" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "id": "b85b02f0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "app2 = Dash(__name__)\n", + "# Const\n", + "completion_df = Get_completion_percentages(enrollments)\n", + "options = list(completion_df['Pathway'].unique())\n", + "\n", + "pathway_color = {\n", + " 'Software Development': 'green', \n", + " 'Web Development': 'blue', \n", + " 'Data Analysis': 'red',\n", + " 'Quality Assurance': 'yellow', \n", + " 'User Experience': 'purple'\n", + "}\n", + "\n", + "# Display\n", + "app2.layout = html.Div([\n", + " html.H2('Pathways Completion', style={'text-align': \"center\"}),\n", + " html.P('Select pathway:'),\n", + " dcc.Dropdown(\n", + " id=\"dropdown\",\n", + " options=options,\n", + " value=options[0],\n", + " clearable=False,\n", + " ),\n", + " dcc.Graph(id=\"graph\")\n", + " \n", + "], style={'backgroundColor':'white'})\n", + "\n", + "@app2.callback(\n", + " Output(\"graph\", \"figure\"),\n", + " Input(\"dropdown\", \"value\"))\n", + "\n", + "# Graph\n", + "def Display_pathway_completion(p):\n", + " df = completion_df[completion_df['Pathway'] == p]\n", + " fig = px.bar(df, x='Module', y='Successfully Completed')\n", + " return fig\n", + "\n", + "app2.run(debug=True, port=8053)" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "id": "0db624db", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "hovertemplate": "Module=%{x}
Successfully Completed=%{y}", + "legendgroup": "", + "marker": { + "color": "#636efa", + "pattern": { + "shape": "" + } + }, + "name": "", + "orientation": "v", + "showlegend": false, + "textposition": "auto", + "type": "bar", + "x": [ + "Software Development 1", + "Software Development 2", + "Web Development 1", + "Web Development 2", + "Data Analysis 1", + "Data Analytics 2", + "Web Development M1", + "Web Development M2", + "Web Development M3", + "Web Development M4", + "Data Analysis M1", + "Data Analysis M2", + "Data Analysis M3", + "Data Analysis M4", + "Software Development M1", + "Software Development M2", + "Software Development M3", + "Software Development M4", + "Quality Assurance M1", + "Quality Assurance M2", + "Quality Assurance M3", + "Quality Assurance M4", + "User Experience M1", + "User Experience M2", + "User Experience M3", + "User Experience M4" + ], + "xaxis": "x", + "y": { + "bdata": "5DiO4ziO2z/x8PDw8PDgP47jOI7jON4/ERERERER4T9VVVVVVVXVP7W0tLS0tOQ/VVVVVVVV6T+KndiJndjpPwAAAAAAAOM/mpmZmZmZ5T85juM4juPoP9aUNWVNWeM/7MRO7MRO5D9tsskmm2ziP5qZmZmZmek/9AV9QV/Q5z822WSTTTbpP2IndmInduI/AAAAAAAA8D8AAAAAAADwPwAAAAAAAPA/AAAAAAAA8D8AAAAAAADgPwAAAAAAAPA/AAAAAAAA8D8AAAAAAADwPw==", + "dtype": "f8" + }, + "yaxis": "y" + } + ], + "layout": { + "barmode": "relative", + "legend": { + "tracegroupgap": 0 + }, + "margin": { + "t": 60 + }, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermap": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermap" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "xaxis": { + "anchor": "y", + "domain": [ + 0, + 1 + ], + "title": { + "text": "Module" + } + }, + "yaxis": { + "anchor": "x", + "domain": [ + 0, + 1 + ], + "title": { + "text": "Successfully Completed" + } + } + } + } + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Just testing\n", + "completion = Get_completion_percentages(enrollments)\n", + "px.bar(completion, x='Module', y='Successfully Completed').show()" + ] } ], "metadata": { From d050238d1299da8512fe66a205f67803cbd6b438 Mon Sep 17 00:00:00 2001 From: Euclides Date: Tue, 22 Jul 2025 14:29:47 -0400 Subject: [PATCH 7/9] Separate the logic and added corrections to cleaning function --- historical_student_data_dashboards.ipynb | 2194 +++++++--------------- 1 file changed, 672 insertions(+), 1522 deletions(-) diff --git a/historical_student_data_dashboards.ipynb b/historical_student_data_dashboards.ipynb index e1633d0..d16955f 100644 --- a/historical_student_data_dashboards.ipynb +++ b/historical_student_data_dashboards.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 86, + "execution_count": 134, "id": "ec855ef5", "metadata": {}, "outputs": [], @@ -31,9 +31,18 @@ "### Data cleaning: (This can be extracted to an object or just create a funciton that is call once and outputs a clean file, while the process is not define I will just add the cleaning functions to this file)" ] }, + { + "cell_type": "markdown", + "id": "d5704b71", + "metadata": {}, + "source": [ + "## Questions for Danny:\n", + "* What to do when I have an NA in a DateTime series?" + ] + }, { "cell_type": "code", - "execution_count": 87, + "execution_count": 135, "id": "bc04972b", "metadata": {}, "outputs": [], @@ -43,1122 +52,67 @@ " # Columns to clean\n", " COLUMNS_TO_DROP = ['Full Name']\n", " result = df.drop(columns=COLUMNS_TO_DROP)\n", - "\n", + " \n", + " # TODO: fix 'ATP Cohorts' \n", + " # Q: how do I handle 'NA' in a datetime series\n", + " \n", " # Fix NaN values\n", " NAN_VALUE_SUBSTITUTE = 'NA'\n", " columns_to_fix = {\n", " 'Projected Start Date': NAN_VALUE_SUBSTITUTE, 'Actual Start Date': NAN_VALUE_SUBSTITUTE, 'Projected End Date': NAN_VALUE_SUBSTITUTE,\n", - " 'Actual End Date': NAN_VALUE_SUBSTITUTE, 'Outcome': NAN_VALUE_SUBSTITUTE, 'ATP Cohort': NAN_VALUE_SUBSTITUTE\n", - " }\n", - "\n", - " for column, substitute_value in columns_to_fix.items():\n", - " result[column] = result[column].fillna(substitute_value)\n", - "\n", - " # Added the tests inside the cleaning function because it cannot be on a separeted folder structure until testing methods are define\n", - " Test_enrollments_cleaning(result)\n", - "\n", - " return result\n", - "\n", - "def Test_enrollments_cleaning(clean_df: pd.DataFrame):\n", - " # Parameter can be change to an in-function call of the data cleanner with the DF\n", - " assert ~clean_df.isna().any().any(), 'The Dataframe has na values.'" - ] - }, - { - "cell_type": "code", - "execution_count": 88, - "id": "15b7c4ec", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.microsoft.datawrangler.viewer.v0+json": { - "columns": [ - { - "name": "index", - "rawType": "int64", - "type": "integer" - }, - { - "name": "Auto Id", - "rawType": "object", - "type": "string" - }, - { - "name": "KY Region", - "rawType": "object", - "type": "string" - }, - { - "name": "Assessment ID", - "rawType": "object", - "type": "string" - }, - { - "name": "EnrollmentId", - "rawType": "object", - "type": "string" - }, - { - "name": "Enrollment Service Name", - "rawType": "object", - "type": "string" - }, - { - "name": "Service", - "rawType": "object", - "type": "string" - }, - { - "name": "Projected Start Date", - "rawType": "object", - "type": "unknown" - }, - { - "name": "Actual Start Date", - "rawType": "object", - "type": "unknown" - }, - { - "name": "Projected End Date", - "rawType": "object", - "type": "unknown" - }, - { - "name": "Actual End Date", - "rawType": "object", - "type": "unknown" - }, - { - "name": "Outcome", - "rawType": "object", - "type": "string" - }, - { - "name": "ATP Cohort", - "rawType": "object", - "type": "unknown" - } - ], - "ref": "fe8d8e5e-67bd-4ba2-8468-2d78bc22e701", - "rows": [ - [ - "0", - "202109-5224", - "SOAR", - "OA-003348", - "Enrollment-1386", - "ES-0011193", - "Career Readiness Workshop", - "2021-11-11 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA" - ], - [ - "1", - "202109-5224", - "SOAR", - "OA-003348", - "Enrollment-1386", - "ES-0013492", - "Software Development 1", - "2022-01-05 00:00:00", - "2022-01-05 00:00:00", - "2022-04-06 00:00:00", - "2022-04-06 00:00:00", - "Successfully Completed", - "2022-01-01 00:00:00" - ], - [ - "2", - "202109-5224", - "SOAR", - "OA-003348", - "Enrollment-1386", - "ES-0014187", - "Career Readiness Workshop", - "2022-03-07 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA" - ], - [ - "3", - "202109-5224", - "SOAR", - "OA-003348", - "Enrollment-1386", - "ES-0015022", - "Software Development 2", - "2022-05-04 00:00:00", - "2022-05-04 00:00:00", - "2022-07-29 00:00:00", - "2022-07-29 00:00:00", - "Successfully Completed", - "2022-05-01 00:00:00" - ], - [ - "4", - "202109-5224", - "SOAR", - "OA-003348", - "Enrollment-1386", - "ES-0015075", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-12-14 00:00:00", - "Successfully Completed", - "2021-09-01 00:00:00" - ], - [ - "5", - "202109-5230", - "SOAR", - "OA-003352", - "Enrollment-1389", - "ES-0015087", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-09-28 00:00:00", - "Did Not Complete", - "2021-09-01 00:00:00" - ], - [ - "6", - "202109-5233", - "SOAR", - "OA-003355", - "Enrollment-1392", - "ES-0011185", - "Career Readiness Workshop", - "2021-11-11 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA" - ], - [ - "7", - "202109-5233", - "SOAR", - "OA-003355", - "Enrollment-1392", - "ES-0013525", - "Web Development 2", - "2022-01-05 00:00:00", - "2022-01-05 00:00:00", - "2022-04-06 00:00:00", - "2022-04-06 00:00:00", - "Successfully Completed", - "2022-01-01 00:00:00" - ], - [ - "8", - "202109-5233", - "SOAR", - "OA-003355", - "Enrollment-1392", - "ES-0015081", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-12-14 00:00:00", - "Successfully Completed", - "2021-09-01 00:00:00" - ], - [ - "9", - "202109-5236", - "SOAR", - "OA-003358", - "Enrollment-1395", - "ES-0015092", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-12-01 00:00:00", - "Did Not Complete", - "2021-09-01 00:00:00" - ], - [ - "10", - "202109-5237", - "SOAR", - "OA-003359", - "Enrollment-1396", - "ES-0011212", - "One-on-one Job Readiness", - "2021-11-23 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA" - ], - [ - "11", - "202109-5237", - "SOAR", - "OA-003359", - "Enrollment-1396", - "ES-0015086", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-12-01 00:00:00", - "Did Not Complete", - "2021-09-01 00:00:00" - ], - [ - "12", - "202109-5237", - "SOAR", - "OA-003359", - "Enrollment-1396", - "ES-0017175", - "Data Analysis 1", - "2023-01-04 00:00:00", - "NA", - "NA", - "2023-03-31 00:00:00", - "Successfully Completed", - "2023-01-01 00:00:00" - ], - [ - "13", - "202109-5237", - "SOAR", - "OA-003359", - "Enrollment-1396", - "ES-0017537", - "Career Readiness Workshop", - "2023-02-16 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA" - ], - [ - "14", - "202109-5237", - "SOAR", - "OA-003359", - "Enrollment-1396", - "ES-0018722", - "Data Analytics 2", - "2023-05-09 00:00:00", - "NA", - "NA", - "2023-08-07 00:00:00", - "Successfully Completed", - "2023-05-01 00:00:00" - ], - [ - "15", - "202109-5237", - "SOAR", - "OA-003359", - "Enrollment-1396", - "ES-0023302", - "One-on-one Job Readiness", - "2024-06-12 00:00:00", - "2024-06-12 00:00:00", - "2024-06-12 00:00:00", - "2024-06-12 00:00:00", - "NA", - "NA" - ], - [ - "16", - "202109-5237", - "SOAR", - "OA-003359", - "Enrollment-1396", - "ES-0023396", - "One-on-one Job Readiness", - "2024-05-23 00:00:00", - "NA", - "2024-05-23 00:00:00", - "NA", - "NA", - "NA" - ], - [ - "17", - "202109-5238", - "SOAR", - "OA-003360", - "Enrollment-1398", - "ES-0015085", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-09 00:00:00", - "2021-12-09 00:00:00", - "Did Not Complete", - "2021-09-01 00:00:00" - ], - [ - "18", - "202109-5239", - "SOAR", - "OA-003363", - "Enrollment-1401", - "ES-0017159", - "Web Development 1", - "2021-09-08 00:00:00", - "NA", - "NA", - "2021-09-23 00:00:00", - "Did Not Complete", - "2021-09-01 00:00:00" - ], - [ - "19", - "202109-5243", - "SOAR", - "OA-003366", - "Enrollment-1404", - "ES-0011221", - "One-on-one Job Readiness", - "2021-12-03 00:00:00", - "2021-12-03 00:00:00", - "NA", - "NA", - "NA", - "NA" - ], - [ - "20", - "202109-5243", - "SOAR", - "OA-003366", - "Enrollment-1404", - "ES-0013503", - "Software Development 1", - "2022-01-05 00:00:00", - "2022-01-05 00:00:00", - "2022-04-06 00:00:00", - "2022-04-06 00:00:00", - "Successfully Completed", - "2022-01-01 00:00:00" - ], - [ - "21", - "202109-5243", - "SOAR", - "OA-003366", - "Enrollment-1404", - "ES-0014300", - "One-on-one Job Readiness", - "2022-03-29 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA" - ], - [ - "22", - "202109-5243", - "SOAR", - "OA-003366", - "Enrollment-1404", - "ES-0015052", - "Software Development 2", - "2022-05-04 00:00:00", - "2022-05-04 00:00:00", - "2022-07-29 00:00:00", - "2022-07-29 00:00:00", - "Successfully Completed", - "2022-05-01 00:00:00" - ], - [ - "23", - "202109-5243", - "SOAR", - "OA-003366", - "Enrollment-1404", - "ES-0015076", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-12-14 00:00:00", - "Successfully Completed", - "2021-09-01 00:00:00" - ], - [ - "24", - "202109-5243", - "SOAR", - "OA-003366", - "Enrollment-1404", - "ES-0017237", - "Data Analysis 1", - "2022-08-24 00:00:00", - "NA", - "NA", - "2022-11-18 00:00:00", - "Successfully Completed", - "2022-09-01 00:00:00" - ], - [ - "25", - "202109-5243", - "SOAR", - "OA-003366", - "Enrollment-1404", - "ES-0017238", - "Data Analytics 2", - "2023-01-04 00:00:00", - "NA", - "NA", - "2023-03-31 00:00:00", - "Successfully Completed", - "2023-01-01 00:00:00" - ], - [ - "26", - "202109-5243", - "SOAR", - "OA-003366", - "Enrollment-1404", - "ES-0018733", - "Web Development 2", - "2023-05-11 00:00:00", - "NA", - "NA", - "2023-08-07 00:00:00", - "Successfully Completed", - "2023-05-01 00:00:00" - ], - [ - "27", - "202109-5245", - "SOAR", - "OA-003367", - "Enrollment-1405", - "ES-0011174", - "Career Readiness Workshop", - "2021-11-08 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA" - ], - [ - "28", - "202109-5245", - "SOAR", - "OA-003367", - "Enrollment-1405", - "ES-0013507", - "Software Development 1", - "2022-01-05 00:00:00", - "2022-01-05 00:00:00", - "2022-05-06 00:00:00", - "2022-05-06 00:00:00", - "Successfully Completed", - "2022-01-01 00:00:00" - ], - [ - "29", - "202109-5245", - "SOAR", - "OA-003367", - "Enrollment-1405", - "ES-0014268", - "One-on-one Job Readiness", - "2022-03-22 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA" - ], - [ - "30", - "202109-5245", - "SOAR", - "OA-003367", - "Enrollment-1405", - "ES-0015065", - "Web Development 2", - "2022-05-04 00:00:00", - "2022-05-04 00:00:00", - "2022-07-29 00:00:00", - "2022-07-29 00:00:00", - "Did Not Complete", - "2022-05-01 00:00:00" - ], - [ - "31", - "202109-5245", - "SOAR", - "OA-003367", - "Enrollment-1405", - "ES-0015080", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-12-14 00:00:00", - "Successfully Completed", - "2021-09-01 00:00:00" - ], - [ - "32", - "202109-5245", - "SOAR", - "OA-003367", - "Enrollment-1405", - "ES-0023385", - "One-on-one Job Readiness", - "2024-01-29 00:00:00", - "NA", - "2024-01-29 00:00:00", - "NA", - "NA", - "NA" - ], - [ - "33", - "202109-5272", - "SOAR", - "OA-003395", - "Enrollment-1434", - "ES-0011175", - "Career Readiness Workshop", - "2021-11-08 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA" - ], - [ - "34", - "202109-5272", - "SOAR", - "OA-003395", - "Enrollment-1434", - "ES-0013526", - "Web Development 2", - "2022-01-05 00:00:00", - "2022-01-05 00:00:00", - "2022-04-06 00:00:00", - "2022-04-06 00:00:00", - "Successfully Completed", - "2022-01-01 00:00:00" - ], - [ - "35", - "202109-5272", - "SOAR", - "OA-003395", - "Enrollment-1434", - "ES-0015078", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-12-14 00:00:00", - "Successfully Completed", - "2021-09-01 00:00:00" - ], - [ - "36", - "202109-5272", - "SOAR", - "OA-003395", - "Enrollment-1434", - "ES-0017252", - "Software Development 1", - "2022-08-24 00:00:00", - "NA", - "NA", - "2022-10-03 00:00:00", - "Did Not Complete", - "2022-09-01 00:00:00" - ], - [ - "37", - "202109-5273", - "SOAR", - "OA-003396", - "Enrollment-1435", - "ES-0015089", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-10-27 00:00:00", - "Did Not Complete", - "2021-09-01 00:00:00" - ], - [ - "38", - "202109-5274", - "SOAR", - "OA-003397", - "Enrollment-1608", - "ES-0011204", - "Career Readiness Workshop", - "2021-11-18 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA" - ], - [ - "39", - "202109-5274", - "SOAR", - "OA-003397", - "Enrollment-1608", - "ES-0013483", - "Data Analysis 1", - "2022-01-05 00:00:00", - "2022-01-05 00:00:00", - "2022-04-06 00:00:00", - "2022-04-06 00:00:00", - "Successfully Completed", - "2022-01-01 00:00:00" - ], - [ - "40", - "202109-5274", - "SOAR", - "OA-003397", - "Enrollment-1608", - "ES-0014299", - "One-on-one Job Readiness", - "2022-03-29 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA" - ], - [ - "41", - "202109-5274", - "SOAR", - "OA-003397", - "Enrollment-1608", - "ES-0015047", - "Data Analytics 2", - "2022-05-04 00:00:00", - "2022-05-04 00:00:00", - "2022-07-29 00:00:00", - "2022-06-08 00:00:00", - "Did Not Complete", - "2022-05-01 00:00:00" - ], - [ - "42", - "202109-5274", - "SOAR", - "OA-003397", - "Enrollment-1608", - "ES-0015084", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-12-14 00:00:00", - "Successfully Completed", - "2021-09-01 00:00:00" - ], - [ - "43", - "202109-5275", - "SOAR", - "OA-003398", - "Enrollment-1609", - "ES-0015088", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-10-11 00:00:00", - "Did Not Complete", - "2021-09-01 00:00:00" - ], - [ - "44", - "202109-5276", - "SOAR", - "OA-003399", - "Enrollment-1611", - "ES-0015091", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-10-28 00:00:00", - "Did Not Complete", - "2021-09-01 00:00:00" - ], - [ - "45", - "202109-5277", - "SOAR", - "OA-003400", - "Enrollment-4425", - "ES-0011213", - "One-on-one Job Readiness", - "2021-11-23 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA" - ], - [ - "46", - "202109-5277", - "SOAR", - "OA-003400", - "Enrollment-4425", - "ES-0013995", - "Software Development 1", - "2022-01-05 00:00:00", - "2022-01-05 00:00:00", - "2022-04-06 00:00:00", - "2022-07-29 00:00:00", - "Successfully Completed", - "2022-01-01 00:00:00" - ], - [ - "47", - "202109-5277", - "SOAR", - "OA-003400", - "Enrollment-4425", - "ES-0014191", - "Career Readiness Workshop", - "2022-03-07 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA" - ], - [ - "48", - "202109-5277", - "SOAR", - "OA-003400", - "Enrollment-4425", - "ES-0015050", - "Software Development 2", - "2022-05-04 00:00:00", - "2022-05-04 00:00:00", - "2022-07-29 00:00:00", - "2022-07-29 00:00:00", - "Successfully Completed", - "2022-05-01 00:00:00" - ], - [ - "49", - "202109-5277", - "SOAR", - "OA-003400", - "Enrollment-4425", - "ES-0015074", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-12-14 00:00:00", - "Successfully Completed", - "2021-09-01 00:00:00" - ] - ], - "shape": { - "columns": 12, - "rows": 2033 - } - }, - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Auto IdKY RegionAssessment IDEnrollmentIdEnrollment Service NameServiceProjected Start DateActual Start DateProjected End DateActual End DateOutcomeATP Cohort
0202109-5224SOAROA-003348Enrollment-1386ES-0011193Career Readiness Workshop2021-11-11 00:00:00NANANANANA
1202109-5224SOAROA-003348Enrollment-1386ES-0013492Software Development 12022-01-05 00:00:002022-01-05 00:00:002022-04-06 00:00:002022-04-06 00:00:00Successfully Completed2022-01-01 00:00:00
2202109-5224SOAROA-003348Enrollment-1386ES-0014187Career Readiness Workshop2022-03-07 00:00:00NANANANANA
3202109-5224SOAROA-003348Enrollment-1386ES-0015022Software Development 22022-05-04 00:00:002022-05-04 00:00:002022-07-29 00:00:002022-07-29 00:00:00Successfully Completed2022-05-01 00:00:00
4202109-5224SOAROA-003348Enrollment-1386ES-0015075Web Development 12021-09-08 00:00:002021-09-08 00:00:002021-12-14 00:00:002021-12-14 00:00:00Successfully Completed2021-09-01 00:00:00
.......................................
2028202504-21723SOAROA-022760Enrollment-14196ES-0035149Intro to Programming Core2025-05-12 00:00:002025-05-12 00:00:002025-06-27 00:00:00NANA2025-05-01 00:00:00
2029202505-22788SOAROA-023710Enrollment-14213ES-0035212Intro to Programming Core2025-05-14 00:00:002025-05-13 00:00:002025-06-17 00:00:00NANA2025-05-01 00:00:00
2030202408-16568SOAROA-017961Enrollment-14833ES-0036429Intro to Programming Core2025-05-12 00:00:00NANANADid Not Complete2025-05-01 00:00:00
2031202408-16568SOAROA-017961Enrollment-14833ES-0036430Supportive Services ReferralNANANANANANA
2032202503-21420SOAROA-022426Enrollment-15195ES-0038953Intro to Programming Core2025-05-12 00:00:00NA2025-06-27 00:00:00NANA2025-05-01 00:00:00
\n", - "

2033 rows × 12 columns

\n", - "
" - ], + " 'Actual End Date': NAN_VALUE_SUBSTITUTE, 'Outcome': NAN_VALUE_SUBSTITUTE, 'ATP Cohort': NAN_VALUE_SUBSTITUTE\n", + " }\n", + "\n", + " for column, substitute_value in columns_to_fix.items():\n", + " result[column] = result[column].fillna(substitute_value)\n", + "\n", + " # Fix change name Data Analitics 2 to Data Analysis 2 for consistency\n", + " result.loc[result['Service'] == 'Data Analytics 2', 'Service'] = 'Data Analysis 2'\n", + "\n", + " # Delete values not needed \n", + " values_not_needed = {\n", + " 'Service': ['Software Development 1', 'Software Development 2', 'Web Development 1', 'Web Development 2', 'Data Analysis 1','Data Analysis 2']\n", + " }\n", + " for column, value in values_not_needed.items():\n", + " result = result[~result[column].isin(value)]\n", + "\n", + " # DataTypes\n", + " column_datatype: dict = {'Auto Id': str, 'KY Region': str, 'Assessment ID': str, 'EnrollmentId': str,\n", + " 'Enrollment Service Name': str, 'Service': str, 'Projected Start Date': str,\n", + " 'Actual Start Date': str, 'Projected End Date': str, 'Actual End Date': str, 'Outcome': str,\n", + " 'ATP Cohort': str} \n", + " # TODO: 'Projected Start Date', 'Actual Start Date', 'Projected End Date', 'Actual End Date' are all datetime types but have a value fix of NA\n", + " \n", + " for column, type in column_datatype.items():\n", + " result[column] = result[column].astype(type)\n", + "\n", + " # Added the tests inside the cleaning function because it cannot be on a separeted folder structure until testing methods are define\n", + " Test_enrollments_cleaning(result)\n", + "\n", + " return result\n", + "\n", + "def Test_enrollments_cleaning(clean_df: pd.DataFrame):\n", + " # Parameter can be change to an in-function call of the data cleanner with the DF\n", + " assert ~clean_df.isna().any().any(), 'The Dataframe has na values.'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15b7c4ec", + "metadata": {}, + "outputs": [ + { + "data": { "text/plain": [ - " Auto Id KY Region Assessment ID EnrollmentId \\\n", - "0 202109-5224 SOAR OA-003348 Enrollment-1386 \n", - "1 202109-5224 SOAR OA-003348 Enrollment-1386 \n", - "2 202109-5224 SOAR OA-003348 Enrollment-1386 \n", - "3 202109-5224 SOAR OA-003348 Enrollment-1386 \n", - "4 202109-5224 SOAR OA-003348 Enrollment-1386 \n", - "... ... ... ... ... \n", - "2028 202504-21723 SOAR OA-022760 Enrollment-14196 \n", - "2029 202505-22788 SOAR OA-023710 Enrollment-14213 \n", - "2030 202408-16568 SOAR OA-017961 Enrollment-14833 \n", - "2031 202408-16568 SOAR OA-017961 Enrollment-14833 \n", - "2032 202503-21420 SOAR OA-022426 Enrollment-15195 \n", - "\n", - " Enrollment Service Name Service \\\n", - "0 ES-0011193 Career Readiness Workshop \n", - "1 ES-0013492 Software Development 1 \n", - "2 ES-0014187 Career Readiness Workshop \n", - "3 ES-0015022 Software Development 2 \n", - "4 ES-0015075 Web Development 1 \n", - "... ... ... \n", - "2028 ES-0035149 Intro to Programming Core \n", - "2029 ES-0035212 Intro to Programming Core \n", - "2030 ES-0036429 Intro to Programming Core \n", - "2031 ES-0036430 Supportive Services Referral \n", - "2032 ES-0038953 Intro to Programming Core \n", - "\n", - " Projected Start Date Actual Start Date Projected End Date \\\n", - "0 2021-11-11 00:00:00 NA NA \n", - "1 2022-01-05 00:00:00 2022-01-05 00:00:00 2022-04-06 00:00:00 \n", - "2 2022-03-07 00:00:00 NA NA \n", - "3 2022-05-04 00:00:00 2022-05-04 00:00:00 2022-07-29 00:00:00 \n", - "4 2021-09-08 00:00:00 2021-09-08 00:00:00 2021-12-14 00:00:00 \n", - "... ... ... ... \n", - "2028 2025-05-12 00:00:00 2025-05-12 00:00:00 2025-06-27 00:00:00 \n", - "2029 2025-05-14 00:00:00 2025-05-13 00:00:00 2025-06-17 00:00:00 \n", - "2030 2025-05-12 00:00:00 NA NA \n", - "2031 NA NA NA \n", - "2032 2025-05-12 00:00:00 NA 2025-06-27 00:00:00 \n", - "\n", - " Actual End Date Outcome ATP Cohort \n", - "0 NA NA NA \n", - "1 2022-04-06 00:00:00 Successfully Completed 2022-01-01 00:00:00 \n", - "2 NA NA NA \n", - "3 2022-07-29 00:00:00 Successfully Completed 2022-05-01 00:00:00 \n", - "4 2021-12-14 00:00:00 Successfully Completed 2021-09-01 00:00:00 \n", - "... ... ... ... \n", - "2028 NA NA 2025-05-01 00:00:00 \n", - "2029 NA NA 2025-05-01 00:00:00 \n", - "2030 NA Did Not Complete 2025-05-01 00:00:00 \n", - "2031 NA NA NA \n", - "2032 NA NA 2025-05-01 00:00:00 \n", - "\n", - "[2033 rows x 12 columns]" + "Index(['Auto Id', 'KY Region', 'Assessment ID', 'EnrollmentId',\n", + " 'Enrollment Service Name', 'Service', 'Projected Start Date',\n", + " 'Actual Start Date', 'Projected End Date', 'Actual End Date', 'Outcome',\n", + " 'ATP Cohort'],\n", + " dtype='object')" ] }, - "execution_count": 88, + "execution_count": 136, "metadata": {}, "output_type": "execute_result" } @@ -1180,7 +134,7 @@ }, { "cell_type": "code", - "execution_count": 89, + "execution_count": 137, "id": "0773f9b7", "metadata": {}, "outputs": [], @@ -1188,20 +142,17 @@ "# Starter pathways are the only path that have to be taken into consideration for each period student pathway choosing\n", "# TODO: maybe make them generate automatically \n", "Starter_pathways = [\n", - " 'Software Development 1', \n", - " 'Web Development 1', \n", - " 'Data Analysis 1',\n", " 'Web Development M1',\n", " 'Data Analysis M1', \n", " 'Software Development M1',\n", " 'Quality Assurance M1', \n", - " 'User Experience M1',\n", + " 'User Experience M1'\n", "]" ] }, { "cell_type": "code", - "execution_count": 90, + "execution_count": 138, "id": "c4902e7d", "metadata": {}, "outputs": [ @@ -1220,7 +171,7 @@ "type": "integer" } ], - "ref": "4b783bd9-3580-4316-b4ef-e79e0f73a9b2", + "ref": "b4504a2c-f3c1-451f-8f67-12507b3d07ef", "rows": [ [ "Career Readiness Workshop", @@ -1259,11 +210,11 @@ "8" ], [ - "Technical Project Management", + "Resume Review and Optimization", "6" ], [ - "Resume Review and Optimization", + "Technical Project Management", "6" ], [ @@ -1275,25 +226,21 @@ "2" ], [ - "Web Development M4", + "Revised Resume Review", "2" ], [ - "Revised Resume Review", + "Web Development M4", "2" ], [ "Software Development M4", "1" - ], - [ - "Web Development 1", - "1" ] ], "shape": { "columns": 1, - "rows": 17 + "rows": 16 } }, "text/plain": [ @@ -1307,18 +254,17 @@ "Employer Event (Code:You) 14\n", "Demo Day Participant 12\n", "Remote Jobs Workshop (non EDA) 8\n", - "Technical Project Management 6\n", "Resume Review and Optimization 6\n", + "Technical Project Management 6\n", "Interview Guidance and Practice 5\n", "Data Analysis M4 2\n", - "Web Development M4 2\n", "Revised Resume Review 2\n", + "Web Development M4 2\n", "Software Development M4 1\n", - "Web Development 1 1\n", "Name: count, dtype: int64" ] }, - "execution_count": 90, + "execution_count": 138, "metadata": {}, "output_type": "execute_result" } @@ -1333,55 +279,379 @@ }, { "cell_type": "code", - "execution_count": 91, - "id": "ba47a304", + "execution_count": 139, + "id": "10a28ede", + "metadata": {}, + "outputs": [], + "source": [ + "def Get_starting_pathways(df: pd.DataFrame): \n", + " \"\"\"\n", + " Returns a pandas.DataFrame were all the services are the biginning paths\n", + "\n", + " Args: \n", + " df: pandas.DataFrame\n", + "\n", + " Return:\n", + " pandas.DataFrame\n", + " \"\"\"\n", + " Starter_pathways = [\n", + " 'Web Development M1',\n", + " 'Data Analysis M1', \n", + " 'Software Development M1',\n", + " 'Quality Assurance M1', \n", + " 'User Experience M1',\n", + " ]\n", + " mask_starter_pathways = df['Service'].isin(Starter_pathways)\n", + " return df[mask_starter_pathways]\n", + "\n", + "def Get_cohorts_list(df: pd.DataFrame):\n", + " cohorts = list(pd.to_datetime(df['ATP Cohort'][df['ATP Cohort'] != 'NA']).sort_values(ascending=True).unique())\n", + " cohorts.insert(0, 'All cohorts')\n", + " return cohorts\n", + "\n", + "def Get_data_by_cohort(df: pd.DataFrame, cohort: str = 'All cohorts') -> pd.DataFrame:\n", + " if cohort == 'All cohorts':\n", + " result = df.value_counts('Service').reset_index()\n", + " else:\n", + " result = df[df['ATP Cohort'] == pd.Timestamp(cohort)].value_counts('Service').reset_index()\n", + " \n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "id": "8020e97f", "metadata": {}, "outputs": [ { "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Auto Id", + "rawType": "object", + "type": "string" + }, + { + "name": "KY Region", + "rawType": "object", + "type": "string" + }, + { + "name": "Assessment ID", + "rawType": "object", + "type": "string" + }, + { + "name": "EnrollmentId", + "rawType": "object", + "type": "string" + }, + { + "name": "Enrollment Service Name", + "rawType": "object", + "type": "string" + }, + { + "name": "Service", + "rawType": "object", + "type": "string" + }, + { + "name": "Projected Start Date", + "rawType": "object", + "type": "string" + }, + { + "name": "Actual Start Date", + "rawType": "object", + "type": "string" + }, + { + "name": "Projected End Date", + "rawType": "object", + "type": "string" + }, + { + "name": "Actual End Date", + "rawType": "object", + "type": "string" + }, + { + "name": "Outcome", + "rawType": "object", + "type": "string" + }, + { + "name": "ATP Cohort", + "rawType": "object", + "type": "string" + } + ], + "ref": "8df3145d-7c39-4a2f-9491-bd86871a1713", + "rows": [ + [ + "0", + "202303-11274", + "SOAR", + "OA-010946", + "Enrollment-7415", + "ES-0021827", + "Web Development M1", + "2024-03-11 00:00:00", + "2024-03-11 00:00:00", + "2024-04-12 00:00:00", + "2024-04-12 00:00:00", + "Successfully Completed", + "2024-01-01 00:00:00" + ], + [ + "1", + "202206-8668", + "SOAR", + "OA-016863", + "Enrollment-9631", + "ES-0024437", + "Intro to Programming Core", + "2024-08-15 00:00:00", + "NA", + "NA", + "2024-09-29 00:00:00", + "Successfully Completed", + "2024-08-01 00:00:00" + ], + [ + "2", + "202110-5639", + "SOAR", + "OA-004316", + "Enrollment-5320", + "ES-0022632", + "Intro to Programming Core", + "2024-05-06 00:00:00", + "2024-05-06 00:00:00", + "NA", + "2024-06-28 00:00:00", + "Successfully Completed", + "2024-05-01 00:00:00" + ], + [ + "3", + "202410-17704", + "SOAR", + "OA-019195", + "Enrollment-11703", + "ES-0029379", + "Intro to Programming Core", + "2025-01-07 00:00:00", + "2025-01-13 00:00:00", + "2025-02-18 00:00:00", + "2025-02-04 00:00:00", + "Did Not Complete", + "2025-01-01 00:00:00" + ], + [ + "4", + "202208-9220", + "SOAR", + "OA-008218", + "Enrollment-6405", + "ES-0016248", + "Salesforce", + "2022-09-07 00:00:00", + "NA", + "NA", + "2022-11-18 00:00:00", + "Successfully Completed", + "2022-09-01 00:00:00" + ] + ], + "shape": { + "columns": 12, + "rows": 5 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Auto IdKY RegionAssessment IDEnrollmentIdEnrollment Service NameServiceProjected Start DateActual Start DateProjected End DateActual End DateOutcomeATP Cohort
0202303-11274SOAROA-010946Enrollment-7415ES-0021827Web Development M12024-03-11 00:00:002024-03-11 00:00:002024-04-12 00:00:002024-04-12 00:00:00Successfully Completed2024-01-01 00:00:00
1202206-8668SOAROA-016863Enrollment-9631ES-0024437Intro to Programming Core2024-08-15 00:00:00NANA2024-09-29 00:00:00Successfully Completed2024-08-01 00:00:00
2202110-5639SOAROA-004316Enrollment-5320ES-0022632Intro to Programming Core2024-05-06 00:00:002024-05-06 00:00:00NA2024-06-28 00:00:00Successfully Completed2024-05-01 00:00:00
3202410-17704SOAROA-019195Enrollment-11703ES-0029379Intro to Programming Core2025-01-07 00:00:002025-01-13 00:00:002025-02-18 00:00:002025-02-04 00:00:00Did Not Complete2025-01-01 00:00:00
4202208-9220SOAROA-008218Enrollment-6405ES-0016248Salesforce2022-09-07 00:00:00NANA2022-11-18 00:00:00Successfully Completed2022-09-01 00:00:00
\n", + "
" + ], "text/plain": [ - "['All cohorts',\n", - " Timestamp('2022-01-01 00:00:00'),\n", - " Timestamp('2022-05-01 00:00:00'),\n", - " Timestamp('2021-09-01 00:00:00'),\n", - " Timestamp('2023-01-01 00:00:00'),\n", - " Timestamp('2023-05-01 00:00:00'),\n", - " Timestamp('2022-09-01 00:00:00'),\n", - " Timestamp('2024-05-01 00:00:00'),\n", - " Timestamp('2024-08-01 00:00:00'),\n", - " Timestamp('2023-08-01 00:00:00'),\n", - " Timestamp('2024-01-01 00:00:00'),\n", - " Timestamp('2025-01-01 00:00:00'),\n", - " Timestamp('2025-05-01 00:00:00')]" + " Auto Id KY Region Assessment ID EnrollmentId \\\n", + "0 202303-11274 SOAR OA-010946 Enrollment-7415 \n", + "1 202206-8668 SOAR OA-016863 Enrollment-9631 \n", + "2 202110-5639 SOAR OA-004316 Enrollment-5320 \n", + "3 202410-17704 SOAR OA-019195 Enrollment-11703 \n", + "4 202208-9220 SOAR OA-008218 Enrollment-6405 \n", + "\n", + " Enrollment Service Name Service Projected Start Date \\\n", + "0 ES-0021827 Web Development M1 2024-03-11 00:00:00 \n", + "1 ES-0024437 Intro to Programming Core 2024-08-15 00:00:00 \n", + "2 ES-0022632 Intro to Programming Core 2024-05-06 00:00:00 \n", + "3 ES-0029379 Intro to Programming Core 2025-01-07 00:00:00 \n", + "4 ES-0016248 Salesforce 2022-09-07 00:00:00 \n", + "\n", + " Actual Start Date Projected End Date Actual End Date \\\n", + "0 2024-03-11 00:00:00 2024-04-12 00:00:00 2024-04-12 00:00:00 \n", + "1 NA NA 2024-09-29 00:00:00 \n", + "2 2024-05-06 00:00:00 NA 2024-06-28 00:00:00 \n", + "3 2025-01-13 00:00:00 2025-02-18 00:00:00 2025-02-04 00:00:00 \n", + "4 NA NA 2022-11-18 00:00:00 \n", + "\n", + " Outcome ATP Cohort \n", + "0 Successfully Completed 2024-01-01 00:00:00 \n", + "1 Successfully Completed 2024-08-01 00:00:00 \n", + "2 Successfully Completed 2024-05-01 00:00:00 \n", + "3 Did Not Complete 2025-01-01 00:00:00 \n", + "4 Successfully Completed 2022-09-01 00:00:00 " ] }, - "execution_count": 91, + "execution_count": 140, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "cohorts = list(enrollments['ATP Cohort'].unique())\n", - "cohorts.remove('NA')\n", - "cohorts.insert(0, 'All cohorts')\n", - "cohorts" - ] - }, - { - "cell_type": "code", - "execution_count": 92, - "id": "beb37448", - "metadata": {}, - "outputs": [], - "source": [ - "mask_starter_pathways = enrollments['Service'].isin(Starter_pathways)\n", - "starter_only_enrollments = enrollments[mask_starter_pathways]" + "# TODO: Finish tests\n", + "def Test_Get_starting_pathways():\n", + " mock_data = pd.DataFrame(\n", + " {\n", + " 'Auto Id': ['202303-11274', '202206-8668', '202110-5639', '202410-17704', '202208-9220'], \n", + " 'KY Region': ['SOAR', 'SOAR', 'SOAR', 'SOAR', 'SOAR'], \n", + " 'Assessment ID': ['OA-010946', 'OA-016863', 'OA-004316', 'OA-019195', 'OA-008218'], \n", + " 'EnrollmentId': ['Enrollment-7415', 'Enrollment-9631', 'Enrollment-5320', 'Enrollment-11703', 'Enrollment-6405'], \n", + " 'Enrollment Service Name': ['ES-0021827', 'ES-0024437', 'ES-0022632', 'ES-0029379', 'ES-0016248'], \n", + " 'Service': ['Web Development M1', 'Intro to Programming Core', 'Intro to Programming Core', 'Intro to Programming Core', 'Salesforce'], \n", + " 'Projected Start Date': ['2024-03-11 00:00:00', '2024-08-15 00:00:00', '2024-05-06 00:00:00', '2025-01-07 00:00:00', '2022-09-07 00:00:00'], \n", + " 'Actual Start Date': ['2024-03-11 00:00:00', 'NA', '2024-05-06 00:00:00', '2025-01-13 00:00:00', 'NA'], \n", + " 'Projected End Date': ['2024-04-12 00:00:00', 'NA', 'NA', '2025-02-18 00:00:00', 'NA'], \n", + " 'Actual End Date': ['2024-04-12 00:00:00', '2024-09-29 00:00:00', '2024-06-28 00:00:00', '2025-02-04 00:00:00', '2022-11-18 00:00:00'], \n", + " 'Outcome': ['Successfully Completed', 'Successfully Completed', 'Successfully Completed', 'Did Not Complete', 'Successfully Completed'], \n", + " 'ATP Cohort': ['2024-01-01 00:00:00', '2024-08-01 00:00:00', '2024-05-01 00:00:00', '2025-01-01 00:00:00', '2022-09-01 00:00:00']\n", + " }\n", + " )\n", + " return mock_data\n", + "Test_Get_starting_pathways()" ] }, { "cell_type": "code", - "execution_count": 93, - "id": "5436f5e6", + "execution_count": 141, + "id": "630dca1e", "metadata": {}, "outputs": [ { @@ -1399,7 +669,7 @@ " " ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1407,55 +677,51 @@ } ], "source": [ - "app = Dash(__name__)\n", - "# Const\n", - "options = cohorts\n", - "pathway_color = {\n", - " 'Software Development 1': 'green', \n", - " 'Web Development 1': 'blue', \n", - " 'Data Analysis 1': 'red',\n", - " 'Web Development M1': 'blue',\n", - " 'Data Analysis M1': 'red', \n", - " 'Software Development M1': 'green',\n", - " 'Quality Assurance M1': 'yellow', \n", - " 'User Experience M1': 'purple'\n", - "}\n", + "def Dash_most_selected_path_by_cohort(data: pd.DataFrame) -> Dash: # Need to pass the dataframe argument because of how the Data is structure\n", + " app = Dash(__name__)\n", + " # Const\n", + " starter_only_enrollments = Get_starting_pathways(data) # This function should be able to comunicate with the data without argument\n", "\n", - "# Display\n", - "app.layout = html.Div([\n", - " html.H2('Cohorts', style={'text-align': \"center\"}),\n", - " html.P('Select Cohort:'),\n", - " dcc.Dropdown(\n", - " id=\"dropdown\",\n", - " options=options,\n", - " value=options[0],\n", - " clearable=False,\n", - " ),\n", - " dcc.Graph(id=\"graph\")\n", - " \n", - "], style={'backgroundColor':'white'})\n", + " dropdown_options = Get_cohorts_list(starter_only_enrollments)\n", + " pathway_color = {\n", + " 'Web Development M1': 'blue',\n", + " 'Data Analysis M1': 'red', \n", + " 'Software Development M1': 'green',\n", + " 'Quality Assurance M1': 'yellow', \n", + " 'User Experience M1': 'purple'\n", + " }\n", "\n", - "@app.callback(\n", - " Output(\"graph\", \"figure\"),\n", - " Input(\"dropdown\", \"value\"))\n", + " # Display\n", + " app.layout = html.Div([\n", + " html.H2('Cohorts', style={'text-align': \"center\"}),\n", + " html.P('Select Cohort:'),\n", + " dcc.Dropdown(\n", + " id=\"dropdown\",\n", + " options=dropdown_options,\n", + " value=dropdown_options[0],\n", + " clearable=False,\n", + " ),\n", + " dcc.Graph(id=\"graph\")\n", + " \n", + " ], style={'backgroundColor':'white'})\n", "\n", - "# Graph\n", - "def tt(time):\n", - " # TODO: Separate the \n", - " if time == 'All cohorts':\n", - " df = starter_only_enrollments.value_counts('Service').reset_index()\n", - " else:\n", - " df = starter_only_enrollments[starter_only_enrollments['ATP Cohort'] == pd.Timestamp(time)].value_counts('Service').reset_index()\n", + " @app.callback(\n", + " Output(\"graph\", \"figure\"),\n", + " Input(\"dropdown\", \"value\"))\n", + "\n", + " # Graph\n", + " def tt(time):\n", + " df = Get_data_by_cohort(starter_only_enrollments, time)\n", + " fig = px.pie(df, names='Service', values='count', color='Service', color_discrete_map=pathway_color)\n", + " return fig\n", "\n", - " df['color'] = df['Service'].map(pathway_color)\n", - " fig = px.pie(df, names='Service', values='count', color='Service', color_discrete_map=pathway_color)\n", - " return fig\n", + " return app\n", "\n", - "app.run(debug=True, port=8052)\n", + " # TODO: Add number of students per each cohort \n", + " # TODO: Fix the options on the selection \n", + " # TODO: make colors better\n", "\n", - "# TODO: Add number of students per each cohort \n", - "# TODO: Fix the options on the selection \n", - "# TODO: make colors better" + "Dash_most_selected_path_by_cohort(enrollments).run(debug=True, port=8052)" ] }, { @@ -1476,18 +742,12 @@ }, { "cell_type": "code", - "execution_count": 94, + "execution_count": 142, "id": "1504a061", "metadata": {}, "outputs": [], "source": [ "pathways = [\n", - " 'Software Development 1', \n", - " 'Software Development 2', \n", - " 'Web Development 1', \n", - " 'Web Development 2', \n", - " 'Data Analysis 1',\n", - " 'Data Analytics 2',\n", " 'Web Development M1',\n", " 'Web Development M2',\n", " 'Web Development M3',\n", @@ -1513,8 +773,39 @@ }, { "cell_type": "code", - "execution_count": 95, - "id": "930b4918", + "execution_count": 143, + "id": "930b4918", + "metadata": {}, + "outputs": [], + "source": [ + "# Not the best Pandas way to do it:\n", + "def Get_completion_percentages(df: pd.DataFrame, cohort: str = 'All cohorts') -> pd.DataFrame:\n", + " if cohort == 'All cohorts':\n", + " data = df\n", + " else:\n", + " data = df[df['ATP Cohort'] == pd.Timestamp(cohort)]\n", + "\n", + " completion_dictionary = {}\n", + "\n", + " for path in pathways:\n", + " outcome = data[data['Service'] == path]['Outcome'].value_counts(normalize=True).reset_index()\n", + " completion_dictionary[path] = {row.Outcome: row.proportion for row in outcome.itertuples(index=True)}\n", + " \n", + " result_df = pd.DataFrame(completion_dictionary).transpose().fillna(0).rename_axis('Module').reset_index()\n", + "\n", + " result_df['Pathway'] = result_df['Module'].apply(lambda x: x[:x.rfind(' ')]) # intended to be able to sort by pathway\n", + " return result_df\n", + "# TODO: Add test\n", + "\n", + "def Get_pathways_name(df: pd.DataFrame) -> list:\n", + " return list(df['Pathway'].unique())\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "id": "a7ea4ffb", "metadata": {}, "outputs": [ { @@ -1532,12 +823,12 @@ "type": "string" }, { - "name": "Did Not Complete", + "name": "Successfully Completed", "rawType": "float64", "type": "float" }, { - "name": "Successfully Completed", + "name": "Did Not Complete", "rawType": "float64", "type": "float" }, @@ -1557,208 +848,154 @@ "type": "string" } ], - "ref": "57c0c2ac-16b7-4b73-9fac-5eda16cda1fd", + "ref": "742847f3-ad3f-4e82-a206-eb8d98493700", "rows": [ [ "0", - "Software Development 1", - "0.5694444444444444", - "0.4305555555555556", - "0.0", - "0.0", - "Software Development" - ], - [ - "1", - "Software Development 2", - "0.47058823529411764", - "0.5294117647058824", - "0.0", - "0.0", - "Software Development" - ], - [ - "2", - "Web Development 1", - "0.5277777777777778", - "0.4722222222222222", - "0.0", - "0.0", - "Web Development" - ], - [ - "3", - "Web Development 2", - "0.4666666666666667", - "0.5333333333333333", - "0.0", - "0.0", - "Web Development" - ], - [ - "4", - "Data Analysis 1", - "0.6666666666666666", - "0.3333333333333333", - "0.0", - "0.0", - "Data Analysis" - ], - [ - "5", - "Data Analytics 2", - "0.35294117647058826", - "0.6470588235294118", - "0.0", - "0.0", - "Data Analytics" - ], - [ - "6", "Web Development M1", - "0.15625", "0.7916666666666666", + "0.15625", "0.052083333333333336", "0.0", "Web Development" ], [ - "7", + "1", "Web Development M2", - "0.15384615384615385", "0.8076923076923077", + "0.15384615384615385", "0.038461538461538464", "0.0", "Web Development" ], [ - "8", + "2", "Web Development M3", - "0.09375", "0.59375", + "0.09375", "0.0", "0.3125", "Web Development" ], [ - "9", + "3", "Web Development M4", - "0.325", "0.675", + "0.325", "0.0", "0.0", "Web Development" ], [ - "10", + "4", "Data Analysis M1", - "0.1574074074074074", "0.7777777777777778", + "0.1574074074074074", "0.06481481481481481", "0.0", "Data Analysis" ], [ - "11", + "5", "Data Analysis M2", - "0.27906976744186046", "0.6046511627906976", + "0.27906976744186046", "0.09302325581395349", "0.023255813953488372", "Data Analysis" ], [ - "12", + "6", "Data Analysis M3", - "0.07692307692307693", "0.6346153846153846", + "0.07692307692307693", "0.038461538461538464", "0.25", "Data Analysis" ], [ - "13", + "7", "Data Analysis M4", - "0.3939393939393939", "0.5757575757575758", + "0.3939393939393939", "0.030303030303030304", "0.0", "Data Analysis" ], [ - "14", + "8", "Software Development M1", - "0.18181818181818182", "0.8", + "0.18181818181818182", "0.01818181818181818", "0.0", "Software Development" ], [ - "15", + "9", "Software Development M2", - "0.20930232558139536", "0.7441860465116279", + "0.20930232558139536", "0.046511627906976744", "0.0", "Software Development" ], [ - "16", + "10", "Software Development M3", - "0.15151515151515152", "0.7878787878787878", + "0.15151515151515152", "0.06060606060606061", "0.0", "Software Development" ], [ - "17", + "11", "Software Development M4", - "0.4230769230769231", "0.5769230769230769", + "0.4230769230769231", "0.0", "0.0", "Software Development" ], [ - "18", + "12", "Quality Assurance M1", - "0.0", "1.0", "0.0", "0.0", + "0.0", "Quality Assurance" ], [ - "19", + "13", "Quality Assurance M2", - "0.0", "1.0", "0.0", "0.0", + "0.0", "Quality Assurance" ], [ - "20", + "14", "Quality Assurance M3", - "0.0", "1.0", "0.0", "0.0", + "0.0", "Quality Assurance" ], [ - "21", + "15", "Quality Assurance M4", - "0.0", "1.0", "0.0", "0.0", + "0.0", "Quality Assurance" ], [ - "22", + "16", "User Experience M1", "0.5", "0.5", @@ -1767,36 +1004,36 @@ "User Experience" ], [ - "23", + "17", "User Experience M2", - "0.0", "1.0", "0.0", "0.0", + "0.0", "User Experience" ], [ - "24", + "18", "User Experience M3", - "0.0", "1.0", "0.0", "0.0", + "0.0", "User Experience" ], [ - "25", + "19", "User Experience M4", - "0.0", "1.0", "0.0", "0.0", + "0.0", "User Experience" ] ], "shape": { "columns": 6, - "rows": 26 + "rows": 20 } }, "text/html": [ @@ -1819,8 +1056,8 @@ " \n", " \n", " Module\n", - " Did Not Complete\n", " Successfully Completed\n", + " Did Not Complete\n", " Partially Completed\n", " NA\n", " Pathway\n", @@ -1829,204 +1066,150 @@ " \n", " \n", " 0\n", - " Software Development 1\n", - " 0.569444\n", - " 0.430556\n", - " 0.000000\n", - " 0.000000\n", - " Software Development\n", - " \n", - " \n", - " 1\n", - " Software Development 2\n", - " 0.470588\n", - " 0.529412\n", - " 0.000000\n", - " 0.000000\n", - " Software Development\n", - " \n", - " \n", - " 2\n", - " Web Development 1\n", - " 0.527778\n", - " 0.472222\n", - " 0.000000\n", - " 0.000000\n", - " Web Development\n", - " \n", - " \n", - " 3\n", - " Web Development 2\n", - " 0.466667\n", - " 0.533333\n", - " 0.000000\n", - " 0.000000\n", - " Web Development\n", - " \n", - " \n", - " 4\n", - " Data Analysis 1\n", - " 0.666667\n", - " 0.333333\n", - " 0.000000\n", - " 0.000000\n", - " Data Analysis\n", - " \n", - " \n", - " 5\n", - " Data Analytics 2\n", - " 0.352941\n", - " 0.647059\n", - " 0.000000\n", - " 0.000000\n", - " Data Analytics\n", - " \n", - " \n", - " 6\n", " Web Development M1\n", - " 0.156250\n", " 0.791667\n", + " 0.156250\n", " 0.052083\n", " 0.000000\n", " Web Development\n", " \n", " \n", - " 7\n", + " 1\n", " Web Development M2\n", - " 0.153846\n", " 0.807692\n", + " 0.153846\n", " 0.038462\n", " 0.000000\n", " Web Development\n", " \n", " \n", - " 8\n", + " 2\n", " Web Development M3\n", - " 0.093750\n", " 0.593750\n", + " 0.093750\n", " 0.000000\n", " 0.312500\n", " Web Development\n", " \n", " \n", - " 9\n", + " 3\n", " Web Development M4\n", - " 0.325000\n", " 0.675000\n", + " 0.325000\n", " 0.000000\n", " 0.000000\n", " Web Development\n", " \n", " \n", - " 10\n", + " 4\n", " Data Analysis M1\n", - " 0.157407\n", " 0.777778\n", + " 0.157407\n", " 0.064815\n", " 0.000000\n", " Data Analysis\n", " \n", " \n", - " 11\n", + " 5\n", " Data Analysis M2\n", - " 0.279070\n", " 0.604651\n", + " 0.279070\n", " 0.093023\n", " 0.023256\n", " Data Analysis\n", " \n", " \n", - " 12\n", + " 6\n", " Data Analysis M3\n", - " 0.076923\n", " 0.634615\n", + " 0.076923\n", " 0.038462\n", " 0.250000\n", " Data Analysis\n", " \n", " \n", - " 13\n", + " 7\n", " Data Analysis M4\n", - " 0.393939\n", " 0.575758\n", + " 0.393939\n", " 0.030303\n", " 0.000000\n", " Data Analysis\n", " \n", " \n", - " 14\n", + " 8\n", " Software Development M1\n", - " 0.181818\n", " 0.800000\n", + " 0.181818\n", " 0.018182\n", " 0.000000\n", " Software Development\n", " \n", " \n", - " 15\n", + " 9\n", " Software Development M2\n", - " 0.209302\n", " 0.744186\n", + " 0.209302\n", " 0.046512\n", " 0.000000\n", " Software Development\n", " \n", " \n", - " 16\n", + " 10\n", " Software Development M3\n", - " 0.151515\n", " 0.787879\n", + " 0.151515\n", " 0.060606\n", " 0.000000\n", " Software Development\n", " \n", " \n", - " 17\n", + " 11\n", " Software Development M4\n", - " 0.423077\n", " 0.576923\n", + " 0.423077\n", " 0.000000\n", " 0.000000\n", " Software Development\n", " \n", " \n", - " 18\n", + " 12\n", " Quality Assurance M1\n", - " 0.000000\n", " 1.000000\n", " 0.000000\n", " 0.000000\n", + " 0.000000\n", " Quality Assurance\n", " \n", " \n", - " 19\n", + " 13\n", " Quality Assurance M2\n", - " 0.000000\n", " 1.000000\n", " 0.000000\n", " 0.000000\n", + " 0.000000\n", " Quality Assurance\n", " \n", " \n", - " 20\n", + " 14\n", " Quality Assurance M3\n", - " 0.000000\n", " 1.000000\n", " 0.000000\n", " 0.000000\n", + " 0.000000\n", " Quality Assurance\n", " \n", " \n", - " 21\n", + " 15\n", " Quality Assurance M4\n", - " 0.000000\n", " 1.000000\n", " 0.000000\n", " 0.000000\n", + " 0.000000\n", " Quality Assurance\n", " \n", " \n", - " 22\n", + " 16\n", " User Experience M1\n", " 0.500000\n", " 0.500000\n", @@ -2035,30 +1218,30 @@ " User Experience\n", " \n", " \n", - " 23\n", + " 17\n", " User Experience M2\n", - " 0.000000\n", " 1.000000\n", " 0.000000\n", " 0.000000\n", + " 0.000000\n", " User Experience\n", " \n", " \n", - " 24\n", + " 18\n", " User Experience M3\n", - " 0.000000\n", " 1.000000\n", " 0.000000\n", " 0.000000\n", + " 0.000000\n", " User Experience\n", " \n", " \n", - " 25\n", + " 19\n", " User Experience M4\n", - " 0.000000\n", " 1.000000\n", " 0.000000\n", " 0.000000\n", + " 0.000000\n", " User Experience\n", " \n", " \n", @@ -2066,95 +1249,64 @@ "" ], "text/plain": [ - " Module Did Not Complete Successfully Completed \\\n", - "0 Software Development 1 0.569444 0.430556 \n", - "1 Software Development 2 0.470588 0.529412 \n", - "2 Web Development 1 0.527778 0.472222 \n", - "3 Web Development 2 0.466667 0.533333 \n", - "4 Data Analysis 1 0.666667 0.333333 \n", - "5 Data Analytics 2 0.352941 0.647059 \n", - "6 Web Development M1 0.156250 0.791667 \n", - "7 Web Development M2 0.153846 0.807692 \n", - "8 Web Development M3 0.093750 0.593750 \n", - "9 Web Development M4 0.325000 0.675000 \n", - "10 Data Analysis M1 0.157407 0.777778 \n", - "11 Data Analysis M2 0.279070 0.604651 \n", - "12 Data Analysis M3 0.076923 0.634615 \n", - "13 Data Analysis M4 0.393939 0.575758 \n", - "14 Software Development M1 0.181818 0.800000 \n", - "15 Software Development M2 0.209302 0.744186 \n", - "16 Software Development M3 0.151515 0.787879 \n", - "17 Software Development M4 0.423077 0.576923 \n", - "18 Quality Assurance M1 0.000000 1.000000 \n", - "19 Quality Assurance M2 0.000000 1.000000 \n", - "20 Quality Assurance M3 0.000000 1.000000 \n", - "21 Quality Assurance M4 0.000000 1.000000 \n", - "22 User Experience M1 0.500000 0.500000 \n", - "23 User Experience M2 0.000000 1.000000 \n", - "24 User Experience M3 0.000000 1.000000 \n", - "25 User Experience M4 0.000000 1.000000 \n", + " Module Successfully Completed Did Not Complete \\\n", + "0 Web Development M1 0.791667 0.156250 \n", + "1 Web Development M2 0.807692 0.153846 \n", + "2 Web Development M3 0.593750 0.093750 \n", + "3 Web Development M4 0.675000 0.325000 \n", + "4 Data Analysis M1 0.777778 0.157407 \n", + "5 Data Analysis M2 0.604651 0.279070 \n", + "6 Data Analysis M3 0.634615 0.076923 \n", + "7 Data Analysis M4 0.575758 0.393939 \n", + "8 Software Development M1 0.800000 0.181818 \n", + "9 Software Development M2 0.744186 0.209302 \n", + "10 Software Development M3 0.787879 0.151515 \n", + "11 Software Development M4 0.576923 0.423077 \n", + "12 Quality Assurance M1 1.000000 0.000000 \n", + "13 Quality Assurance M2 1.000000 0.000000 \n", + "14 Quality Assurance M3 1.000000 0.000000 \n", + "15 Quality Assurance M4 1.000000 0.000000 \n", + "16 User Experience M1 0.500000 0.500000 \n", + "17 User Experience M2 1.000000 0.000000 \n", + "18 User Experience M3 1.000000 0.000000 \n", + "19 User Experience M4 1.000000 0.000000 \n", "\n", " Partially Completed NA Pathway \n", - "0 0.000000 0.000000 Software Development \n", - "1 0.000000 0.000000 Software Development \n", - "2 0.000000 0.000000 Web Development \n", + "0 0.052083 0.000000 Web Development \n", + "1 0.038462 0.000000 Web Development \n", + "2 0.000000 0.312500 Web Development \n", "3 0.000000 0.000000 Web Development \n", - "4 0.000000 0.000000 Data Analysis \n", - "5 0.000000 0.000000 Data Analytics \n", - "6 0.052083 0.000000 Web Development \n", - "7 0.038462 0.000000 Web Development \n", - "8 0.000000 0.312500 Web Development \n", - "9 0.000000 0.000000 Web Development \n", - "10 0.064815 0.000000 Data Analysis \n", - "11 0.093023 0.023256 Data Analysis \n", - "12 0.038462 0.250000 Data Analysis \n", - "13 0.030303 0.000000 Data Analysis \n", - "14 0.018182 0.000000 Software Development \n", - "15 0.046512 0.000000 Software Development \n", - "16 0.060606 0.000000 Software Development \n", - "17 0.000000 0.000000 Software Development \n", - "18 0.000000 0.000000 Quality Assurance \n", - "19 0.000000 0.000000 Quality Assurance \n", - "20 0.000000 0.000000 Quality Assurance \n", - "21 0.000000 0.000000 Quality Assurance \n", - "22 0.000000 0.000000 User Experience \n", - "23 0.000000 0.000000 User Experience \n", - "24 0.000000 0.000000 User Experience \n", - "25 0.000000 0.000000 User Experience " + "4 0.064815 0.000000 Data Analysis \n", + "5 0.093023 0.023256 Data Analysis \n", + "6 0.038462 0.250000 Data Analysis \n", + "7 0.030303 0.000000 Data Analysis \n", + "8 0.018182 0.000000 Software Development \n", + "9 0.046512 0.000000 Software Development \n", + "10 0.060606 0.000000 Software Development \n", + "11 0.000000 0.000000 Software Development \n", + "12 0.000000 0.000000 Quality Assurance \n", + "13 0.000000 0.000000 Quality Assurance \n", + "14 0.000000 0.000000 Quality Assurance \n", + "15 0.000000 0.000000 Quality Assurance \n", + "16 0.000000 0.000000 User Experience \n", + "17 0.000000 0.000000 User Experience \n", + "18 0.000000 0.000000 User Experience \n", + "19 0.000000 0.000000 User Experience " ] }, - "execution_count": 95, + "execution_count": 144, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Not the best Pandas way to do it:\n", - "def Get_completion_percentages(df: pd.DataFrame, cohort: str = 'All') -> pd.DataFrame:\n", - " if cohort == 'All':\n", - " data = df\n", - " else:\n", - " data = df[df['ATP Cohort'] == pd.Timestamp(cohort)]\n", - "\n", - " completion_dictionary = {}\n", - "\n", - " for path in pathways:\n", - " outcome = df[df['Service'] == path]['Outcome'].value_counts(normalize=True).reset_index()\n", - " completion_dictionary[path] = {row.Outcome: row.proportion for row in outcome.itertuples(index=True)}\n", - " \n", - " result_df = pd.DataFrame(completion_dictionary).transpose().fillna(0).rename_axis('Module').reset_index()\n", - "\n", - " result_df['Pathway'] = result_df['Module'].apply(lambda x: x[:x.rfind(' ')]) # intended to be able to sort by pathway\n", - " return result_df\n", - "# TODO: Add test\n", - "\n", "Get_completion_percentages(enrollments)" ] }, { "cell_type": "code", - "execution_count": 96, - "id": "b85b02f0", + "execution_count": 145, + "id": "98a912ca", "metadata": {}, "outputs": [ { @@ -2172,7 +1324,7 @@ " " ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -2180,49 +1332,53 @@ } ], "source": [ - "app2 = Dash(__name__)\n", - "# Const\n", - "completion_df = Get_completion_percentages(enrollments)\n", - "options = list(completion_df['Pathway'].unique())\n", + "def Dash_completion_rates_by_path(df: pd.DataFrame) -> Dash: # TODO: fix data structure so visualization doesn't use df\n", + " app2 = Dash(__name__)\n", + " # Const\n", + " completion_df = Get_completion_percentages(df)\n", + " options = Get_pathways_name(completion_df)\n", + "\n", + " pathway_color = {\n", + " 'Software Development': 'green', \n", + " 'Web Development': 'blue', \n", + " 'Data Analysis': 'red',\n", + " 'Quality Assurance': 'yellow', \n", + " 'User Experience': 'purple'\n", + " }\n", "\n", - "pathway_color = {\n", - " 'Software Development': 'green', \n", - " 'Web Development': 'blue', \n", - " 'Data Analysis': 'red',\n", - " 'Quality Assurance': 'yellow', \n", - " 'User Experience': 'purple'\n", - "}\n", + " # Display\n", + " app2.layout = html.Div([\n", + " html.H2('Pathways Completion', style={'text-align': \"center\"}),\n", + " html.P('Select pathway:'),\n", + " dcc.Dropdown(\n", + " id=\"dropdown\",\n", + " options=options,\n", + " value=options[0],\n", + " clearable=False,\n", + " ),\n", + " dcc.Graph(id=\"graph\")\n", + " \n", + " ], style={'backgroundColor':'white'})\n", "\n", - "# Display\n", - "app2.layout = html.Div([\n", - " html.H2('Pathways Completion', style={'text-align': \"center\"}),\n", - " html.P('Select pathway:'),\n", - " dcc.Dropdown(\n", - " id=\"dropdown\",\n", - " options=options,\n", - " value=options[0],\n", - " clearable=False,\n", - " ),\n", - " dcc.Graph(id=\"graph\")\n", - " \n", - "], style={'backgroundColor':'white'})\n", + " @app2.callback(\n", + " Output(\"graph\", \"figure\"),\n", + " Input(\"dropdown\", \"value\"))\n", "\n", - "@app2.callback(\n", - " Output(\"graph\", \"figure\"),\n", - " Input(\"dropdown\", \"value\"))\n", + " # Graph\n", + " # TODO: Need to add an extra selection box with the cohorts\n", + " def Display_pathway_completion(p):\n", + " df = completion_df[completion_df['Pathway'] == p]\n", + " fig = px.bar(df, x='Module', y='Successfully Completed')\n", + " return fig\n", "\n", - "# Graph\n", - "def Display_pathway_completion(p):\n", - " df = completion_df[completion_df['Pathway'] == p]\n", - " fig = px.bar(df, x='Module', y='Successfully Completed')\n", - " return fig\n", + " return app2\n", "\n", - "app2.run(debug=True, port=8053)" + "Dash_completion_rates_by_path(enrollments).run(debug=True, port=8053)" ] }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 146, "id": "0db624db", "metadata": {}, "outputs": [ @@ -2248,12 +1404,6 @@ "textposition": "auto", "type": "bar", "x": [ - "Software Development 1", - "Software Development 2", - "Web Development 1", - "Web Development 2", - "Data Analysis 1", - "Data Analytics 2", "Web Development M1", "Web Development M2", "Web Development M3", @@ -2277,7 +1427,7 @@ ], "xaxis": "x", "y": { - "bdata": "5DiO4ziO2z/x8PDw8PDgP47jOI7jON4/ERERERER4T9VVVVVVVXVP7W0tLS0tOQ/VVVVVVVV6T+KndiJndjpPwAAAAAAAOM/mpmZmZmZ5T85juM4juPoP9aUNWVNWeM/7MRO7MRO5D9tsskmm2ziP5qZmZmZmek/9AV9QV/Q5z822WSTTTbpP2IndmInduI/AAAAAAAA8D8AAAAAAADwPwAAAAAAAPA/AAAAAAAA8D8AAAAAAADgPwAAAAAAAPA/AAAAAAAA8D8AAAAAAADwPw==", + "bdata": "VVVVVVVV6T+KndiJndjpPwAAAAAAAOM/mpmZmZmZ5T85juM4juPoP9aUNWVNWeM/7MRO7MRO5D9tsskmm2ziP5qZmZmZmek/9AV9QV/Q5z822WSTTTbpP2IndmInduI/AAAAAAAA8D8AAAAAAADwPwAAAAAAAPA/AAAAAAAA8D8AAAAAAADgPwAAAAAAAPA/AAAAAAAA8D8AAAAAAADwPw==", "dtype": "f8" }, "yaxis": "y" From 2849b97287eff562bcb38db4915b6d9c1f74b66e Mon Sep 17 00:00:00 2001 From: Euclides Date: Sun, 10 Aug 2025 17:41:36 -0400 Subject: [PATCH 8/9] Structure files, deleted old nb and added demostration nb --- .gitignore | 5 + Cleaning_enrollments_data.ipynb | 2307 ---------------------- cleaning_enrollments_data.py | 85 + compleation_rate_data.py | 52 + historical_student_data_dashboards.ipynb | 2275 --------------------- most_common_pathways_taken_data.py | 41 + visualization_examples.ipynb | 246 +++ 7 files changed, 429 insertions(+), 4582 deletions(-) delete mode 100644 Cleaning_enrollments_data.ipynb create mode 100644 cleaning_enrollments_data.py create mode 100644 compleation_rate_data.py delete mode 100644 historical_student_data_dashboards.ipynb create mode 100644 most_common_pathways_taken_data.py create mode 100644 visualization_examples.ipynb diff --git a/.gitignore b/.gitignore index b7faf40..97df104 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ +# Personal +Data/ +Tests/ +tester_2.ipynb + # Byte-compiled / optimized / DLL files __pycache__/ *.py[codz] diff --git a/Cleaning_enrollments_data.ipynb b/Cleaning_enrollments_data.ipynb deleted file mode 100644 index 09020d3..0000000 --- a/Cleaning_enrollments_data.ipynb +++ /dev/null @@ -1,2307 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "f1f8e04d", - "metadata": {}, - "source": [ - "# ARC Application - Data cleanning" - ] - }, - { - "cell_type": "markdown", - "id": "86a96267", - "metadata": {}, - "source": [ - "All data cleaning will be in the function 'Enrollments cleaning'\n", - "\n", - "Notes:\n", - "- 'KY Region' field all the values are 'SOAR'\n", - "\n", - "Questions:\n", - "- Why are they 'Actual Start Date' fields as NA when there is a Actual End Date" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "f782ef33", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import plotly.express as px" - ] - }, - { - "cell_type": "markdown", - "id": "0b28f740", - "metadata": {}, - "source": [ - "# Result:" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "id": "2efeac68", - "metadata": {}, - "outputs": [], - "source": [ - "def Enrollments_cleaning(df: pd.DataFrame) -> pd.DataFrame:\n", - " # Columns to clean\n", - " COLUMNS_TO_DROP = ['Full Name']\n", - " enrollments = df.drop(columns=COLUMNS_TO_DROP)\n", - "\n", - " # Fix NaN values\n", - " NAN_VALUE_SUBSTITUTE = 'NA'\n", - " columns_to_fix = {\n", - " 'Projected Start Date': NAN_VALUE_SUBSTITUTE, 'Actual Start Date': NAN_VALUE_SUBSTITUTE, 'Projected End Date': NAN_VALUE_SUBSTITUTE,\n", - " 'Actual End Date': NAN_VALUE_SUBSTITUTE, 'Outcome': NAN_VALUE_SUBSTITUTE, 'ATP Cohort': NAN_VALUE_SUBSTITUTE\n", - " }\n", - "\n", - " for column, substitute_value in columns_to_fix.items():\n", - " enrollments[column] = enrollments[column].fillna(substitute_value)\n", - "\n", - " return enrollments" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "c34df585", - "metadata": {}, - "outputs": [], - "source": [ - "def Test_enrollments_cleaning(clean_df: pd.DataFrame):\n", - " # Parameter can be change to an in-function call of the data cleanner with the DF\n", - " assert ~clean_df.isna().any().any(), 'The Dataframe has na values.'" - ] - }, - { - "cell_type": "markdown", - "id": "10146c9e", - "metadata": {}, - "source": [ - "# Exploratory Data Analysis" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "id": "a32e170c", - "metadata": {}, - "outputs": [], - "source": [ - "enrollments = pd.read_excel('Data\\\\Raw\\\\ARC Enrollments.xlsx')\n", - "enrollments = Enrollments_cleaning(enrollments)\n", - "enrollments\n", - "Test_enrollments_cleaning(enrollments)\n" - ] - }, - { - "cell_type": "markdown", - "id": "a6805ca6", - "metadata": {}, - "source": [ - "### Understanding the categories of data\n" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "id": "423d5090", - "metadata": {}, - "outputs": [], - "source": [ - "pathways = [\n", - " 'Software Development 1', \n", - " 'Software Development 2', \n", - " 'Web Development 1', \n", - " 'Web Development 2', \n", - " 'Data Analysis 1',\n", - " 'Data Analytics 2',\n", - " 'Web Development M1',\n", - " 'Web Development M2',\n", - " 'Web Development M3',\n", - " 'Web Development M4',\n", - " 'Data Analysis M1', \n", - " 'Data Analysis M2',\n", - " 'Data Analysis M3',\n", - " 'Data Analysis M4', \n", - " 'Software Development M1',\n", - " 'Software Development M2',\n", - " 'Software Development M3',\n", - " 'Software Development M4',\n", - " 'Quality Assurance M1', \n", - " 'Quality Assurance M2',\n", - " 'Quality Assurance M3', \n", - " 'Quality Assurance M4', \n", - " 'User Experience M1', \n", - " 'User Experience M2',\n", - " 'User Experience M3', \n", - " 'User Experience M4',\n", - "]\n", - "\n", - "workshops = [\n", - " 'JavaScript - React',\n", - " 'AWS',\n", - " 'Salesforce',\n", - " 'UofL Cyber Security Certificate',\n", - " 'Intro to Programming Core',\n", - " 'Artificial Intelligence M1',\n", - " 'Technical Project Management',\n", - "]\n", - "\n", - "support_ways = [\n", - " 'Career Readiness Workshop',\n", - " 'One-on-one Job Readiness',\n", - " 'Interview Guidance and Practice',\n", - " 'Remote Jobs Workshop (EDA Grant)',\n", - " 'Employer Event (Code:You)',\n", - " 'Loaner Laptop',\n", - " 'Demo Day Participant',\n", - " 'Tech Communications Workshop',\n", - " 'Remote Jobs Workshop (non EDA)',\n", - " 'Referral to External Service',\n", - " 'Supportive Services Referral', \n", - " 'Resume Review and Optimization',\n", - " 'Revised Resume Review'\n", - "]\n", - "\n", - "# This category is only used to find the path choose by students:\n", - "Starter_pathways = [\n", - " 'Software Development 1', \n", - " 'Web Development 1', \n", - " 'Data Analysis 1',\n", - " 'Web Development M1',\n", - " 'Data Analysis M1', \n", - " 'Software Development M1',\n", - " 'Quality Assurance M1', \n", - " 'User Experience M1',\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "id": "61a579c5", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.microsoft.datawrangler.viewer.v0+json": { - "columns": [ - { - "name": "index", - "rawType": "int64", - "type": "integer" - }, - { - "name": "Auto Id", - "rawType": "object", - "type": "string" - }, - { - "name": "KY Region", - "rawType": "object", - "type": "string" - }, - { - "name": "Assessment ID", - "rawType": "object", - "type": "string" - }, - { - "name": "EnrollmentId", - "rawType": "object", - "type": "string" - }, - { - "name": "Enrollment Service Name", - "rawType": "object", - "type": "string" - }, - { - "name": "Service", - "rawType": "object", - "type": "string" - }, - { - "name": "Projected Start Date", - "rawType": "object", - "type": "unknown" - }, - { - "name": "Actual Start Date", - "rawType": "object", - "type": "unknown" - }, - { - "name": "Projected End Date", - "rawType": "object", - "type": "unknown" - }, - { - "name": "Actual End Date", - "rawType": "object", - "type": "unknown" - }, - { - "name": "Outcome", - "rawType": "object", - "type": "string" - }, - { - "name": "ATP Cohort", - "rawType": "object", - "type": "unknown" - }, - { - "name": "Category", - "rawType": "object", - "type": "string" - } - ], - "ref": "60433c33-2973-4ecf-8a66-39b937632565", - "rows": [ - [ - "0", - "202109-5224", - "SOAR", - "OA-003348", - "Enrollment-1386", - "ES-0011193", - "Career Readiness Workshop", - "2021-11-11 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA", - "Student Support" - ], - [ - "1", - "202109-5224", - "SOAR", - "OA-003348", - "Enrollment-1386", - "ES-0013492", - "Software Development 1", - "2022-01-05 00:00:00", - "2022-01-05 00:00:00", - "2022-04-06 00:00:00", - "2022-04-06 00:00:00", - "Successfully Completed", - "2022-01-01 00:00:00", - "Pathway" - ], - [ - "2", - "202109-5224", - "SOAR", - "OA-003348", - "Enrollment-1386", - "ES-0014187", - "Career Readiness Workshop", - "2022-03-07 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA", - "Student Support" - ], - [ - "3", - "202109-5224", - "SOAR", - "OA-003348", - "Enrollment-1386", - "ES-0015022", - "Software Development 2", - "2022-05-04 00:00:00", - "2022-05-04 00:00:00", - "2022-07-29 00:00:00", - "2022-07-29 00:00:00", - "Successfully Completed", - "2022-05-01 00:00:00", - "Pathway" - ], - [ - "4", - "202109-5224", - "SOAR", - "OA-003348", - "Enrollment-1386", - "ES-0015075", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-12-14 00:00:00", - "Successfully Completed", - "2021-09-01 00:00:00", - "Pathway" - ], - [ - "5", - "202109-5230", - "SOAR", - "OA-003352", - "Enrollment-1389", - "ES-0015087", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-09-28 00:00:00", - "Did Not Complete", - "2021-09-01 00:00:00", - "Pathway" - ], - [ - "6", - "202109-5233", - "SOAR", - "OA-003355", - "Enrollment-1392", - "ES-0011185", - "Career Readiness Workshop", - "2021-11-11 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA", - "Student Support" - ], - [ - "7", - "202109-5233", - "SOAR", - "OA-003355", - "Enrollment-1392", - "ES-0013525", - "Web Development 2", - "2022-01-05 00:00:00", - "2022-01-05 00:00:00", - "2022-04-06 00:00:00", - "2022-04-06 00:00:00", - "Successfully Completed", - "2022-01-01 00:00:00", - "Pathway" - ], - [ - "8", - "202109-5233", - "SOAR", - "OA-003355", - "Enrollment-1392", - "ES-0015081", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-12-14 00:00:00", - "Successfully Completed", - "2021-09-01 00:00:00", - "Pathway" - ], - [ - "9", - "202109-5236", - "SOAR", - "OA-003358", - "Enrollment-1395", - "ES-0015092", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-12-01 00:00:00", - "Did Not Complete", - "2021-09-01 00:00:00", - "Pathway" - ], - [ - "10", - "202109-5237", - "SOAR", - "OA-003359", - "Enrollment-1396", - "ES-0011212", - "One-on-one Job Readiness", - "2021-11-23 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA", - "Student Support" - ], - [ - "11", - "202109-5237", - "SOAR", - "OA-003359", - "Enrollment-1396", - "ES-0015086", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-12-01 00:00:00", - "Did Not Complete", - "2021-09-01 00:00:00", - "Pathway" - ], - [ - "12", - "202109-5237", - "SOAR", - "OA-003359", - "Enrollment-1396", - "ES-0017175", - "Data Analysis 1", - "2023-01-04 00:00:00", - "NA", - "NA", - "2023-03-31 00:00:00", - "Successfully Completed", - "2023-01-01 00:00:00", - "Pathway" - ], - [ - "13", - "202109-5237", - "SOAR", - "OA-003359", - "Enrollment-1396", - "ES-0017537", - "Career Readiness Workshop", - "2023-02-16 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA", - "Student Support" - ], - [ - "14", - "202109-5237", - "SOAR", - "OA-003359", - "Enrollment-1396", - "ES-0018722", - "Data Analytics 2", - "2023-05-09 00:00:00", - "NA", - "NA", - "2023-08-07 00:00:00", - "Successfully Completed", - "2023-05-01 00:00:00", - "Pathway" - ], - [ - "15", - "202109-5237", - "SOAR", - "OA-003359", - "Enrollment-1396", - "ES-0023302", - "One-on-one Job Readiness", - "2024-06-12 00:00:00", - "2024-06-12 00:00:00", - "2024-06-12 00:00:00", - "2024-06-12 00:00:00", - "NA", - "NA", - "Student Support" - ], - [ - "16", - "202109-5237", - "SOAR", - "OA-003359", - "Enrollment-1396", - "ES-0023396", - "One-on-one Job Readiness", - "2024-05-23 00:00:00", - "NA", - "2024-05-23 00:00:00", - "NA", - "NA", - "NA", - "Student Support" - ], - [ - "17", - "202109-5238", - "SOAR", - "OA-003360", - "Enrollment-1398", - "ES-0015085", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-09 00:00:00", - "2021-12-09 00:00:00", - "Did Not Complete", - "2021-09-01 00:00:00", - "Pathway" - ], - [ - "18", - "202109-5239", - "SOAR", - "OA-003363", - "Enrollment-1401", - "ES-0017159", - "Web Development 1", - "2021-09-08 00:00:00", - "NA", - "NA", - "2021-09-23 00:00:00", - "Did Not Complete", - "2021-09-01 00:00:00", - "Pathway" - ], - [ - "19", - "202109-5243", - "SOAR", - "OA-003366", - "Enrollment-1404", - "ES-0011221", - "One-on-one Job Readiness", - "2021-12-03 00:00:00", - "2021-12-03 00:00:00", - "NA", - "NA", - "NA", - "NA", - "Student Support" - ], - [ - "20", - "202109-5243", - "SOAR", - "OA-003366", - "Enrollment-1404", - "ES-0013503", - "Software Development 1", - "2022-01-05 00:00:00", - "2022-01-05 00:00:00", - "2022-04-06 00:00:00", - "2022-04-06 00:00:00", - "Successfully Completed", - "2022-01-01 00:00:00", - "Pathway" - ], - [ - "21", - "202109-5243", - "SOAR", - "OA-003366", - "Enrollment-1404", - "ES-0014300", - "One-on-one Job Readiness", - "2022-03-29 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA", - "Student Support" - ], - [ - "22", - "202109-5243", - "SOAR", - "OA-003366", - "Enrollment-1404", - "ES-0015052", - "Software Development 2", - "2022-05-04 00:00:00", - "2022-05-04 00:00:00", - "2022-07-29 00:00:00", - "2022-07-29 00:00:00", - "Successfully Completed", - "2022-05-01 00:00:00", - "Pathway" - ], - [ - "23", - "202109-5243", - "SOAR", - "OA-003366", - "Enrollment-1404", - "ES-0015076", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-12-14 00:00:00", - "Successfully Completed", - "2021-09-01 00:00:00", - "Pathway" - ], - [ - "24", - "202109-5243", - "SOAR", - "OA-003366", - "Enrollment-1404", - "ES-0017237", - "Data Analysis 1", - "2022-08-24 00:00:00", - "NA", - "NA", - "2022-11-18 00:00:00", - "Successfully Completed", - "2022-09-01 00:00:00", - "Pathway" - ], - [ - "25", - "202109-5243", - "SOAR", - "OA-003366", - "Enrollment-1404", - "ES-0017238", - "Data Analytics 2", - "2023-01-04 00:00:00", - "NA", - "NA", - "2023-03-31 00:00:00", - "Successfully Completed", - "2023-01-01 00:00:00", - "Pathway" - ], - [ - "26", - "202109-5243", - "SOAR", - "OA-003366", - "Enrollment-1404", - "ES-0018733", - "Web Development 2", - "2023-05-11 00:00:00", - "NA", - "NA", - "2023-08-07 00:00:00", - "Successfully Completed", - "2023-05-01 00:00:00", - "Pathway" - ], - [ - "27", - "202109-5245", - "SOAR", - "OA-003367", - "Enrollment-1405", - "ES-0011174", - "Career Readiness Workshop", - "2021-11-08 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA", - "Student Support" - ], - [ - "28", - "202109-5245", - "SOAR", - "OA-003367", - "Enrollment-1405", - "ES-0013507", - "Software Development 1", - "2022-01-05 00:00:00", - "2022-01-05 00:00:00", - "2022-05-06 00:00:00", - "2022-05-06 00:00:00", - "Successfully Completed", - "2022-01-01 00:00:00", - "Pathway" - ], - [ - "29", - "202109-5245", - "SOAR", - "OA-003367", - "Enrollment-1405", - "ES-0014268", - "One-on-one Job Readiness", - "2022-03-22 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA", - "Student Support" - ], - [ - "30", - "202109-5245", - "SOAR", - "OA-003367", - "Enrollment-1405", - "ES-0015065", - "Web Development 2", - "2022-05-04 00:00:00", - "2022-05-04 00:00:00", - "2022-07-29 00:00:00", - "2022-07-29 00:00:00", - "Did Not Complete", - "2022-05-01 00:00:00", - "Pathway" - ], - [ - "31", - "202109-5245", - "SOAR", - "OA-003367", - "Enrollment-1405", - "ES-0015080", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-12-14 00:00:00", - "Successfully Completed", - "2021-09-01 00:00:00", - "Pathway" - ], - [ - "32", - "202109-5245", - "SOAR", - "OA-003367", - "Enrollment-1405", - "ES-0023385", - "One-on-one Job Readiness", - "2024-01-29 00:00:00", - "NA", - "2024-01-29 00:00:00", - "NA", - "NA", - "NA", - "Student Support" - ], - [ - "33", - "202109-5272", - "SOAR", - "OA-003395", - "Enrollment-1434", - "ES-0011175", - "Career Readiness Workshop", - "2021-11-08 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA", - "Student Support" - ], - [ - "34", - "202109-5272", - "SOAR", - "OA-003395", - "Enrollment-1434", - "ES-0013526", - "Web Development 2", - "2022-01-05 00:00:00", - "2022-01-05 00:00:00", - "2022-04-06 00:00:00", - "2022-04-06 00:00:00", - "Successfully Completed", - "2022-01-01 00:00:00", - "Pathway" - ], - [ - "35", - "202109-5272", - "SOAR", - "OA-003395", - "Enrollment-1434", - "ES-0015078", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-12-14 00:00:00", - "Successfully Completed", - "2021-09-01 00:00:00", - "Pathway" - ], - [ - "36", - "202109-5272", - "SOAR", - "OA-003395", - "Enrollment-1434", - "ES-0017252", - "Software Development 1", - "2022-08-24 00:00:00", - "NA", - "NA", - "2022-10-03 00:00:00", - "Did Not Complete", - "2022-09-01 00:00:00", - "Pathway" - ], - [ - "37", - "202109-5273", - "SOAR", - "OA-003396", - "Enrollment-1435", - "ES-0015089", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-10-27 00:00:00", - "Did Not Complete", - "2021-09-01 00:00:00", - "Pathway" - ], - [ - "38", - "202109-5274", - "SOAR", - "OA-003397", - "Enrollment-1608", - "ES-0011204", - "Career Readiness Workshop", - "2021-11-18 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA", - "Student Support" - ], - [ - "39", - "202109-5274", - "SOAR", - "OA-003397", - "Enrollment-1608", - "ES-0013483", - "Data Analysis 1", - "2022-01-05 00:00:00", - "2022-01-05 00:00:00", - "2022-04-06 00:00:00", - "2022-04-06 00:00:00", - "Successfully Completed", - "2022-01-01 00:00:00", - "Pathway" - ], - [ - "40", - "202109-5274", - "SOAR", - "OA-003397", - "Enrollment-1608", - "ES-0014299", - "One-on-one Job Readiness", - "2022-03-29 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA", - "Student Support" - ], - [ - "41", - "202109-5274", - "SOAR", - "OA-003397", - "Enrollment-1608", - "ES-0015047", - "Data Analytics 2", - "2022-05-04 00:00:00", - "2022-05-04 00:00:00", - "2022-07-29 00:00:00", - "2022-06-08 00:00:00", - "Did Not Complete", - "2022-05-01 00:00:00", - "Pathway" - ], - [ - "42", - "202109-5274", - "SOAR", - "OA-003397", - "Enrollment-1608", - "ES-0015084", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-12-14 00:00:00", - "Successfully Completed", - "2021-09-01 00:00:00", - "Pathway" - ], - [ - "43", - "202109-5275", - "SOAR", - "OA-003398", - "Enrollment-1609", - "ES-0015088", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-10-11 00:00:00", - "Did Not Complete", - "2021-09-01 00:00:00", - "Pathway" - ], - [ - "44", - "202109-5276", - "SOAR", - "OA-003399", - "Enrollment-1611", - "ES-0015091", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-10-28 00:00:00", - "Did Not Complete", - "2021-09-01 00:00:00", - "Pathway" - ], - [ - "45", - "202109-5277", - "SOAR", - "OA-003400", - "Enrollment-4425", - "ES-0011213", - "One-on-one Job Readiness", - "2021-11-23 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA", - "Student Support" - ], - [ - "46", - "202109-5277", - "SOAR", - "OA-003400", - "Enrollment-4425", - "ES-0013995", - "Software Development 1", - "2022-01-05 00:00:00", - "2022-01-05 00:00:00", - "2022-04-06 00:00:00", - "2022-07-29 00:00:00", - "Successfully Completed", - "2022-01-01 00:00:00", - "Pathway" - ], - [ - "47", - "202109-5277", - "SOAR", - "OA-003400", - "Enrollment-4425", - "ES-0014191", - "Career Readiness Workshop", - "2022-03-07 00:00:00", - "NA", - "NA", - "NA", - "NA", - "NA", - "Student Support" - ], - [ - "48", - "202109-5277", - "SOAR", - "OA-003400", - "Enrollment-4425", - "ES-0015050", - "Software Development 2", - "2022-05-04 00:00:00", - "2022-05-04 00:00:00", - "2022-07-29 00:00:00", - "2022-07-29 00:00:00", - "Successfully Completed", - "2022-05-01 00:00:00", - "Pathway" - ], - [ - "49", - "202109-5277", - "SOAR", - "OA-003400", - "Enrollment-4425", - "ES-0015074", - "Web Development 1", - "2021-09-08 00:00:00", - "2021-09-08 00:00:00", - "2021-12-14 00:00:00", - "2021-12-14 00:00:00", - "Successfully Completed", - "2021-09-01 00:00:00", - "Pathway" - ] - ], - "shape": { - "columns": 13, - "rows": 2033 - } - }, - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Auto IdKY RegionAssessment IDEnrollmentIdEnrollment Service NameServiceProjected Start DateActual Start DateProjected End DateActual End DateOutcomeATP CohortCategory
0202109-5224SOAROA-003348Enrollment-1386ES-0011193Career Readiness Workshop2021-11-11 00:00:00NANANANANAStudent Support
1202109-5224SOAROA-003348Enrollment-1386ES-0013492Software Development 12022-01-05 00:00:002022-01-05 00:00:002022-04-06 00:00:002022-04-06 00:00:00Successfully Completed2022-01-01 00:00:00Pathway
2202109-5224SOAROA-003348Enrollment-1386ES-0014187Career Readiness Workshop2022-03-07 00:00:00NANANANANAStudent Support
3202109-5224SOAROA-003348Enrollment-1386ES-0015022Software Development 22022-05-04 00:00:002022-05-04 00:00:002022-07-29 00:00:002022-07-29 00:00:00Successfully Completed2022-05-01 00:00:00Pathway
4202109-5224SOAROA-003348Enrollment-1386ES-0015075Web Development 12021-09-08 00:00:002021-09-08 00:00:002021-12-14 00:00:002021-12-14 00:00:00Successfully Completed2021-09-01 00:00:00Pathway
..........................................
2028202504-21723SOAROA-022760Enrollment-14196ES-0035149Intro to Programming Core2025-05-12 00:00:002025-05-12 00:00:002025-06-27 00:00:00NANA2025-05-01 00:00:00Workshop
2029202505-22788SOAROA-023710Enrollment-14213ES-0035212Intro to Programming Core2025-05-14 00:00:002025-05-13 00:00:002025-06-17 00:00:00NANA2025-05-01 00:00:00Workshop
2030202408-16568SOAROA-017961Enrollment-14833ES-0036429Intro to Programming Core2025-05-12 00:00:00NANANADid Not Complete2025-05-01 00:00:00Workshop
2031202408-16568SOAROA-017961Enrollment-14833ES-0036430Supportive Services ReferralNANANANANANAStudent Support
2032202503-21420SOAROA-022426Enrollment-15195ES-0038953Intro to Programming Core2025-05-12 00:00:00NA2025-06-27 00:00:00NANA2025-05-01 00:00:00Workshop
\n", - "

2033 rows × 13 columns

\n", - "
" - ], - "text/plain": [ - " Auto Id KY Region Assessment ID EnrollmentId \\\n", - "0 202109-5224 SOAR OA-003348 Enrollment-1386 \n", - "1 202109-5224 SOAR OA-003348 Enrollment-1386 \n", - "2 202109-5224 SOAR OA-003348 Enrollment-1386 \n", - "3 202109-5224 SOAR OA-003348 Enrollment-1386 \n", - "4 202109-5224 SOAR OA-003348 Enrollment-1386 \n", - "... ... ... ... ... \n", - "2028 202504-21723 SOAR OA-022760 Enrollment-14196 \n", - "2029 202505-22788 SOAR OA-023710 Enrollment-14213 \n", - "2030 202408-16568 SOAR OA-017961 Enrollment-14833 \n", - "2031 202408-16568 SOAR OA-017961 Enrollment-14833 \n", - "2032 202503-21420 SOAR OA-022426 Enrollment-15195 \n", - "\n", - " Enrollment Service Name Service \\\n", - "0 ES-0011193 Career Readiness Workshop \n", - "1 ES-0013492 Software Development 1 \n", - "2 ES-0014187 Career Readiness Workshop \n", - "3 ES-0015022 Software Development 2 \n", - "4 ES-0015075 Web Development 1 \n", - "... ... ... \n", - "2028 ES-0035149 Intro to Programming Core \n", - "2029 ES-0035212 Intro to Programming Core \n", - "2030 ES-0036429 Intro to Programming Core \n", - "2031 ES-0036430 Supportive Services Referral \n", - "2032 ES-0038953 Intro to Programming Core \n", - "\n", - " Projected Start Date Actual Start Date Projected End Date \\\n", - "0 2021-11-11 00:00:00 NA NA \n", - "1 2022-01-05 00:00:00 2022-01-05 00:00:00 2022-04-06 00:00:00 \n", - "2 2022-03-07 00:00:00 NA NA \n", - "3 2022-05-04 00:00:00 2022-05-04 00:00:00 2022-07-29 00:00:00 \n", - "4 2021-09-08 00:00:00 2021-09-08 00:00:00 2021-12-14 00:00:00 \n", - "... ... ... ... \n", - "2028 2025-05-12 00:00:00 2025-05-12 00:00:00 2025-06-27 00:00:00 \n", - "2029 2025-05-14 00:00:00 2025-05-13 00:00:00 2025-06-17 00:00:00 \n", - "2030 2025-05-12 00:00:00 NA NA \n", - "2031 NA NA NA \n", - "2032 2025-05-12 00:00:00 NA 2025-06-27 00:00:00 \n", - "\n", - " Actual End Date Outcome ATP Cohort \\\n", - "0 NA NA NA \n", - "1 2022-04-06 00:00:00 Successfully Completed 2022-01-01 00:00:00 \n", - "2 NA NA NA \n", - "3 2022-07-29 00:00:00 Successfully Completed 2022-05-01 00:00:00 \n", - "4 2021-12-14 00:00:00 Successfully Completed 2021-09-01 00:00:00 \n", - "... ... ... ... \n", - "2028 NA NA 2025-05-01 00:00:00 \n", - "2029 NA NA 2025-05-01 00:00:00 \n", - "2030 NA Did Not Complete 2025-05-01 00:00:00 \n", - "2031 NA NA NA \n", - "2032 NA NA 2025-05-01 00:00:00 \n", - "\n", - " Category \n", - "0 Student Support \n", - "1 Pathway \n", - "2 Student Support \n", - "3 Pathway \n", - "4 Pathway \n", - "... ... \n", - "2028 Workshop \n", - "2029 Workshop \n", - "2030 Workshop \n", - "2031 Student Support \n", - "2032 Workshop \n", - "\n", - "[2033 rows x 13 columns]" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "enrollments['Category'] = ''\n", - "\n", - "enrollments.loc[enrollments['Service'].isin(pathways), 'Category'] = 'Pathway'\n", - "enrollments.loc[enrollments['Service'].isin(workshops), 'Category'] = 'Workshop'\n", - "enrollments.loc[enrollments['Service'].isin(support_ways), 'Category'] = 'Student Support'\n", - "\n", - "enrollments" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "id": "64432809", - "metadata": {}, - "outputs": [], - "source": [ - "pie_df = enrollments.value_counts('Category').reset_index()" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "id": "712aeaff", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "domain": { - "x": [ - 0, - 1 - ], - "y": [ - 0, - 1 - ] - }, - "hovertemplate": "Category=%{label}
count=%{value}", - "labels": [ - "Pathway", - "Workshop", - "Student Support" - ], - "legendgroup": "", - "name": "", - "showlegend": true, - "type": "pie", - "values": { - "bdata": "7gMHAvwB", - "dtype": "i2" - } - } - ], - "layout": { - "legend": { - "tracegroupgap": 0 - }, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermap": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermap" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "title": { - "text": "Data structure by type of Service" - } - } - } - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "fig = px.pie(pie_df, values='count', names='Category', title='Data structure by type of Service')\n", - "fig.show()" - ] - }, - { - "cell_type": "markdown", - "id": "0ecaa27b", - "metadata": {}, - "source": [ - "### Understanding 'ATP Cohort'" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "id": "dac5ffdb", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.microsoft.datawrangler.viewer.v0+json": { - "columns": [ - { - "name": "Outcome", - "rawType": "object", - "type": "string" - }, - { - "name": "count", - "rawType": "int64", - "type": "integer" - } - ], - "ref": "847f75cd-6b6c-4d4c-a1dc-5c109a6499d9", - "rows": [ - [ - "NA", - "445" - ], - [ - "Successfully Completed", - "10" - ], - [ - "Did Not Complete", - "5" - ] - ], - "shape": { - "columns": 1, - "rows": 3 - } - }, - "text/plain": [ - "Outcome\n", - "NA 445\n", - "Successfully Completed 10\n", - "Did Not Complete 5\n", - "Name: count, dtype: int64" - ] - }, - "execution_count": 45, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "enrollments[enrollments['ATP Cohort'] == 'NA'].value_counts('Outcome')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/cleaning_enrollments_data.py b/cleaning_enrollments_data.py new file mode 100644 index 0000000..870303d --- /dev/null +++ b/cleaning_enrollments_data.py @@ -0,0 +1,85 @@ +import pandas as pd +import numpy as np + +class EnrollmentsCleaning: + def __init__(self, raw_data): + self.raw_data = raw_data + + def Drop_columns(self, df): + COLUMNS_TO_DROP = ['Full Name'] + result = df.drop(columns=COLUMNS_TO_DROP) + return result + + def Fix_nan_values(self, df): + # Fix NaN values + NAN_VALUE_SUBSTITUTE = 'NA' + columns_to_fix = { + 'Projected Start Date': NAN_VALUE_SUBSTITUTE, 'Actual Start Date': NAN_VALUE_SUBSTITUTE, 'Projected End Date': NAN_VALUE_SUBSTITUTE, + 'Actual End Date': NAN_VALUE_SUBSTITUTE, 'Outcome': NAN_VALUE_SUBSTITUTE + } + # 'ATP Cohort' NA will handle in a separed function + for column, substitute_value in columns_to_fix.items(): + df[column] = df[column].fillna(substitute_value) + + return df + + def Rename_values(self, df): + # Fix change name Data Analitics 2 to Data Analysis 2 for consistency + df.loc[df['Service'] == 'Data Analytics 2', 'Service'] = 'Data Analysis 2' + return df + + def Delete_values(self, df): + # Delete values not needed + # 'Referral to External Service', 'Supportive Services Referral', are deleted because dont have a "Projected Start Date" + values_not_needed = { + 'Service': ['Software Development 1', 'Software Development 2', 'Web Development 1', 'Web Development 2', 'Data Analysis 1','Data Analysis 2', 'Referral to External Service', 'Supportive Services Referral'] + } + for column, value in values_not_needed.items(): + df = df[~df[column].isin(value)] + return df + + def Set_data_types(self, df): + # DataTypes + column_datatype: dict = {'Auto Id': str, 'KY Region': str, 'Assessment ID': str, 'EnrollmentId': str, + 'Enrollment Service Name': str, 'Service': str, 'Projected Start Date': str, + 'Actual Start Date': str, 'Projected End Date': str, 'Actual End Date': str, 'Outcome': str, + 'ATP Cohort': 'datetime64[ns]'} + # TODO: 'Projected Start Date', 'Actual Start Date', 'Projected End Date', 'Actual End Date' are all datetime types but have a value fix of NA + + for column, type in column_datatype.items(): + df[column] = df[column].astype(type) + return df + + def Find_cohort(self, id: str, projected_start_date: str, cohort_to_find: str, df_to_clean: pd.DataFrame): + ## Q: What to do with Service: ['Referral to External Service', 'Supportive Services Referral'] + ## TODO: Clean the NaTType before this function runs + if pd.isna(cohort_to_find): + student_df = df_to_clean[df_to_clean['Auto Id'] == id] + # remove ATP Cohort NA values, it can be more than one + student_df: pd.DataFrame = student_df[~student_df['ATP Cohort'].isna()] + cohorts_participaded = student_df['ATP Cohort'].astype('datetime64[ns]').unique() + + # print(cohorts_participaded) + if len(cohorts_participaded) == 1: + return cohorts_participaded[0] + else: + # cohorts_participaded.append(pd.to_datetime(projected_start_date)) + stimated_module_date = np.datetime64(projected_start_date) + cohorts_participaded = np.append(cohorts_participaded, stimated_module_date) + cohorts_participaded.sort() + previus_date = cohorts_participaded[0] + for cohort in cohorts_participaded: + if stimated_module_date == cohort: + return previus_date + else: + return np.datetime64(cohort_to_find) + + def Get_clean_data(self): + df = self.raw_data + df = self.Drop_columns(df) + df = self.Fix_nan_values(df) + df = self.Rename_values(df) + df = self.Delete_values(df) + df = self.Set_data_types(df) + df['ATP Cohort'] = df.apply(lambda row: self.Find_cohort(row['Auto Id'], row['Projected Start Date'], row['ATP Cohort'], df), axis=1) + return df \ No newline at end of file diff --git a/compleation_rate_data.py b/compleation_rate_data.py new file mode 100644 index 0000000..62bc7cd --- /dev/null +++ b/compleation_rate_data.py @@ -0,0 +1,52 @@ +import pandas as pd + +class Compleation_rate_data: + def __init__(self, data): + self.data = data + self.__pathways = [ + 'Web Development M1', + 'Web Development M2', + 'Web Development M3', + 'Web Development M4', + 'Data Analysis M1', + 'Data Analysis M2', + 'Data Analysis M3', + 'Data Analysis M4', + 'Software Development M1', + 'Software Development M2', + 'Software Development M3', + 'Software Development M4', + 'Quality Assurance M1', + 'Quality Assurance M2', + 'Quality Assurance M3', + 'Quality Assurance M4', + 'User Experience M1', + 'User Experience M2', + 'User Experience M3', + 'User Experience M4', + ] + + # Not the best Pandas way to do it: + def Get_completion_percentages(self, cohort: str = 'All cohorts') -> pd.DataFrame: + + + if cohort == 'All cohorts': + data = self.data + else: + data = self.data[self.data['ATP Cohort'] == pd.Timestamp(cohort)] + + completion_dictionary = {} + + for path in self.__pathways: + outcome = data[data['Service'] == path]['Outcome'].value_counts(normalize=True).reset_index() + completion_dictionary[path] = {row.Outcome: row.proportion for row in outcome.itertuples(index=True)} + + result_df = pd.DataFrame(completion_dictionary).transpose().fillna(0).rename_axis('Module').reset_index() + + result_df['Pathway'] = result_df['Module'].apply(lambda x: x[:x.rfind(' ')]) # intended to be able to sort by pathway + return result_df + # TODO: Add test + + def Get_pathways_name(self, df: pd.DataFrame) -> list: + return list(df['Pathway'].unique()) + diff --git a/historical_student_data_dashboards.ipynb b/historical_student_data_dashboards.ipynb deleted file mode 100644 index d16955f..0000000 --- a/historical_student_data_dashboards.ipynb +++ /dev/null @@ -1,2275 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "c73760d6", - "metadata": {}, - "source": [ - "# Dashboard Historical student data\n", - "\n", - "## 1. Most common pathways taken:\n" - ] - }, - { - "cell_type": "code", - "execution_count": 134, - "id": "ec855ef5", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import plotly.express as px\n", - "import plotly.graph_objects as go\n", - "from dash import Dash, dcc, html, Input, Output" - ] - }, - { - "cell_type": "markdown", - "id": "49621639", - "metadata": {}, - "source": [ - "### Data cleaning: (This can be extracted to an object or just create a funciton that is call once and outputs a clean file, while the process is not define I will just add the cleaning functions to this file)" - ] - }, - { - "cell_type": "markdown", - "id": "d5704b71", - "metadata": {}, - "source": [ - "## Questions for Danny:\n", - "* What to do when I have an NA in a DateTime series?" - ] - }, - { - "cell_type": "code", - "execution_count": 135, - "id": "bc04972b", - "metadata": {}, - "outputs": [], - "source": [ - "# Cleaning and testing function\n", - "def Enrollments_cleaning(df: pd.DataFrame) -> pd.DataFrame:\n", - " # Columns to clean\n", - " COLUMNS_TO_DROP = ['Full Name']\n", - " result = df.drop(columns=COLUMNS_TO_DROP)\n", - " \n", - " # TODO: fix 'ATP Cohorts' \n", - " # Q: how do I handle 'NA' in a datetime series\n", - " \n", - " # Fix NaN values\n", - " NAN_VALUE_SUBSTITUTE = 'NA'\n", - " columns_to_fix = {\n", - " 'Projected Start Date': NAN_VALUE_SUBSTITUTE, 'Actual Start Date': NAN_VALUE_SUBSTITUTE, 'Projected End Date': NAN_VALUE_SUBSTITUTE,\n", - " 'Actual End Date': NAN_VALUE_SUBSTITUTE, 'Outcome': NAN_VALUE_SUBSTITUTE, 'ATP Cohort': NAN_VALUE_SUBSTITUTE\n", - " }\n", - "\n", - " for column, substitute_value in columns_to_fix.items():\n", - " result[column] = result[column].fillna(substitute_value)\n", - "\n", - " # Fix change name Data Analitics 2 to Data Analysis 2 for consistency\n", - " result.loc[result['Service'] == 'Data Analytics 2', 'Service'] = 'Data Analysis 2'\n", - "\n", - " # Delete values not needed \n", - " values_not_needed = {\n", - " 'Service': ['Software Development 1', 'Software Development 2', 'Web Development 1', 'Web Development 2', 'Data Analysis 1','Data Analysis 2']\n", - " }\n", - " for column, value in values_not_needed.items():\n", - " result = result[~result[column].isin(value)]\n", - "\n", - " # DataTypes\n", - " column_datatype: dict = {'Auto Id': str, 'KY Region': str, 'Assessment ID': str, 'EnrollmentId': str,\n", - " 'Enrollment Service Name': str, 'Service': str, 'Projected Start Date': str,\n", - " 'Actual Start Date': str, 'Projected End Date': str, 'Actual End Date': str, 'Outcome': str,\n", - " 'ATP Cohort': str} \n", - " # TODO: 'Projected Start Date', 'Actual Start Date', 'Projected End Date', 'Actual End Date' are all datetime types but have a value fix of NA\n", - " \n", - " for column, type in column_datatype.items():\n", - " result[column] = result[column].astype(type)\n", - "\n", - " # Added the tests inside the cleaning function because it cannot be on a separeted folder structure until testing methods are define\n", - " Test_enrollments_cleaning(result)\n", - "\n", - " return result\n", - "\n", - "def Test_enrollments_cleaning(clean_df: pd.DataFrame):\n", - " # Parameter can be change to an in-function call of the data cleanner with the DF\n", - " assert ~clean_df.isna().any().any(), 'The Dataframe has na values.'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "15b7c4ec", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['Auto Id', 'KY Region', 'Assessment ID', 'EnrollmentId',\n", - " 'Enrollment Service Name', 'Service', 'Projected Start Date',\n", - " 'Actual Start Date', 'Projected End Date', 'Actual End Date', 'Outcome',\n", - " 'ATP Cohort'],\n", - " dtype='object')" - ] - }, - "execution_count": 136, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "enrollments = pd.read_excel('Data\\\\Raw\\\\ARC Enrollments.xlsx')\n", - "enrollments = Enrollments_cleaning(enrollments)\n", - "enrollments" - ] - }, - { - "cell_type": "markdown", - "id": "8c9d2634", - "metadata": {}, - "source": [ - "### 1.1 Most common path by period\n", - "- Periods are going to be define by ATP Cohort" - ] - }, - { - "cell_type": "code", - "execution_count": 137, - "id": "0773f9b7", - "metadata": {}, - "outputs": [], - "source": [ - "# Starter pathways are the only path that have to be taken into consideration for each period student pathway choosing\n", - "# TODO: maybe make them generate automatically \n", - "Starter_pathways = [\n", - " 'Web Development M1',\n", - " 'Data Analysis M1', \n", - " 'Software Development M1',\n", - " 'Quality Assurance M1', \n", - " 'User Experience M1'\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": 138, - "id": "c4902e7d", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.microsoft.datawrangler.viewer.v0+json": { - "columns": [ - { - "name": "Service", - "rawType": "object", - "type": "string" - }, - { - "name": "count", - "rawType": "int64", - "type": "integer" - } - ], - "ref": "b4504a2c-f3c1-451f-8f67-12507b3d07ef", - "rows": [ - [ - "Career Readiness Workshop", - "224" - ], - [ - "One-on-one Job Readiness", - "87" - ], - [ - "Remote Jobs Workshop (EDA Grant)", - "30" - ], - [ - "Referral to External Service", - "20" - ], - [ - "Supportive Services Referral", - "20" - ], - [ - "Tech Communications Workshop", - "20" - ], - [ - "Employer Event (Code:You)", - "14" - ], - [ - "Demo Day Participant", - "12" - ], - [ - "Remote Jobs Workshop (non EDA)", - "8" - ], - [ - "Resume Review and Optimization", - "6" - ], - [ - "Technical Project Management", - "6" - ], - [ - "Interview Guidance and Practice", - "5" - ], - [ - "Data Analysis M4", - "2" - ], - [ - "Revised Resume Review", - "2" - ], - [ - "Web Development M4", - "2" - ], - [ - "Software Development M4", - "1" - ] - ], - "shape": { - "columns": 1, - "rows": 16 - } - }, - "text/plain": [ - "Service\n", - "Career Readiness Workshop 224\n", - "One-on-one Job Readiness 87\n", - "Remote Jobs Workshop (EDA Grant) 30\n", - "Referral to External Service 20\n", - "Supportive Services Referral 20\n", - "Tech Communications Workshop 20\n", - "Employer Event (Code:You) 14\n", - "Demo Day Participant 12\n", - "Remote Jobs Workshop (non EDA) 8\n", - "Resume Review and Optimization 6\n", - "Technical Project Management 6\n", - "Interview Guidance and Practice 5\n", - "Data Analysis M4 2\n", - "Revised Resume Review 2\n", - "Web Development M4 2\n", - "Software Development M4 1\n", - "Name: count, dtype: int64" - ] - }, - "execution_count": 138, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# What are the NA values in 'ATP Cohort'?\n", - "enrollments[enrollments['ATP Cohort'] == 'NA'].value_counts('Service')\n", - "\n", - "# looks like pathways are not represented in ATP Cohort NA Values (Probably has more to do with support entries)\n", - "# only 6 of the 460 ATP cohort 'NA' values are pathways" - ] - }, - { - "cell_type": "code", - "execution_count": 139, - "id": "10a28ede", - "metadata": {}, - "outputs": [], - "source": [ - "def Get_starting_pathways(df: pd.DataFrame): \n", - " \"\"\"\n", - " Returns a pandas.DataFrame were all the services are the biginning paths\n", - "\n", - " Args: \n", - " df: pandas.DataFrame\n", - "\n", - " Return:\n", - " pandas.DataFrame\n", - " \"\"\"\n", - " Starter_pathways = [\n", - " 'Web Development M1',\n", - " 'Data Analysis M1', \n", - " 'Software Development M1',\n", - " 'Quality Assurance M1', \n", - " 'User Experience M1',\n", - " ]\n", - " mask_starter_pathways = df['Service'].isin(Starter_pathways)\n", - " return df[mask_starter_pathways]\n", - "\n", - "def Get_cohorts_list(df: pd.DataFrame):\n", - " cohorts = list(pd.to_datetime(df['ATP Cohort'][df['ATP Cohort'] != 'NA']).sort_values(ascending=True).unique())\n", - " cohorts.insert(0, 'All cohorts')\n", - " return cohorts\n", - "\n", - "def Get_data_by_cohort(df: pd.DataFrame, cohort: str = 'All cohorts') -> pd.DataFrame:\n", - " if cohort == 'All cohorts':\n", - " result = df.value_counts('Service').reset_index()\n", - " else:\n", - " result = df[df['ATP Cohort'] == pd.Timestamp(cohort)].value_counts('Service').reset_index()\n", - " \n", - " return result" - ] - }, - { - "cell_type": "code", - "execution_count": 140, - "id": "8020e97f", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.microsoft.datawrangler.viewer.v0+json": { - "columns": [ - { - "name": "index", - "rawType": "int64", - "type": "integer" - }, - { - "name": "Auto Id", - "rawType": "object", - "type": "string" - }, - { - "name": "KY Region", - "rawType": "object", - "type": "string" - }, - { - "name": "Assessment ID", - "rawType": "object", - "type": "string" - }, - { - "name": "EnrollmentId", - "rawType": "object", - "type": "string" - }, - { - "name": "Enrollment Service Name", - "rawType": "object", - "type": "string" - }, - { - "name": "Service", - "rawType": "object", - "type": "string" - }, - { - "name": "Projected Start Date", - "rawType": "object", - "type": "string" - }, - { - "name": "Actual Start Date", - "rawType": "object", - "type": "string" - }, - { - "name": "Projected End Date", - "rawType": "object", - "type": "string" - }, - { - "name": "Actual End Date", - "rawType": "object", - "type": "string" - }, - { - "name": "Outcome", - "rawType": "object", - "type": "string" - }, - { - "name": "ATP Cohort", - "rawType": "object", - "type": "string" - } - ], - "ref": "8df3145d-7c39-4a2f-9491-bd86871a1713", - "rows": [ - [ - "0", - "202303-11274", - "SOAR", - "OA-010946", - "Enrollment-7415", - "ES-0021827", - "Web Development M1", - "2024-03-11 00:00:00", - "2024-03-11 00:00:00", - "2024-04-12 00:00:00", - "2024-04-12 00:00:00", - "Successfully Completed", - "2024-01-01 00:00:00" - ], - [ - "1", - "202206-8668", - "SOAR", - "OA-016863", - "Enrollment-9631", - "ES-0024437", - "Intro to Programming Core", - "2024-08-15 00:00:00", - "NA", - "NA", - "2024-09-29 00:00:00", - "Successfully Completed", - "2024-08-01 00:00:00" - ], - [ - "2", - "202110-5639", - "SOAR", - "OA-004316", - "Enrollment-5320", - "ES-0022632", - "Intro to Programming Core", - "2024-05-06 00:00:00", - "2024-05-06 00:00:00", - "NA", - "2024-06-28 00:00:00", - "Successfully Completed", - "2024-05-01 00:00:00" - ], - [ - "3", - "202410-17704", - "SOAR", - "OA-019195", - "Enrollment-11703", - "ES-0029379", - "Intro to Programming Core", - "2025-01-07 00:00:00", - "2025-01-13 00:00:00", - "2025-02-18 00:00:00", - "2025-02-04 00:00:00", - "Did Not Complete", - "2025-01-01 00:00:00" - ], - [ - "4", - "202208-9220", - "SOAR", - "OA-008218", - "Enrollment-6405", - "ES-0016248", - "Salesforce", - "2022-09-07 00:00:00", - "NA", - "NA", - "2022-11-18 00:00:00", - "Successfully Completed", - "2022-09-01 00:00:00" - ] - ], - "shape": { - "columns": 12, - "rows": 5 - } - }, - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Auto IdKY RegionAssessment IDEnrollmentIdEnrollment Service NameServiceProjected Start DateActual Start DateProjected End DateActual End DateOutcomeATP Cohort
0202303-11274SOAROA-010946Enrollment-7415ES-0021827Web Development M12024-03-11 00:00:002024-03-11 00:00:002024-04-12 00:00:002024-04-12 00:00:00Successfully Completed2024-01-01 00:00:00
1202206-8668SOAROA-016863Enrollment-9631ES-0024437Intro to Programming Core2024-08-15 00:00:00NANA2024-09-29 00:00:00Successfully Completed2024-08-01 00:00:00
2202110-5639SOAROA-004316Enrollment-5320ES-0022632Intro to Programming Core2024-05-06 00:00:002024-05-06 00:00:00NA2024-06-28 00:00:00Successfully Completed2024-05-01 00:00:00
3202410-17704SOAROA-019195Enrollment-11703ES-0029379Intro to Programming Core2025-01-07 00:00:002025-01-13 00:00:002025-02-18 00:00:002025-02-04 00:00:00Did Not Complete2025-01-01 00:00:00
4202208-9220SOAROA-008218Enrollment-6405ES-0016248Salesforce2022-09-07 00:00:00NANA2022-11-18 00:00:00Successfully Completed2022-09-01 00:00:00
\n", - "
" - ], - "text/plain": [ - " Auto Id KY Region Assessment ID EnrollmentId \\\n", - "0 202303-11274 SOAR OA-010946 Enrollment-7415 \n", - "1 202206-8668 SOAR OA-016863 Enrollment-9631 \n", - "2 202110-5639 SOAR OA-004316 Enrollment-5320 \n", - "3 202410-17704 SOAR OA-019195 Enrollment-11703 \n", - "4 202208-9220 SOAR OA-008218 Enrollment-6405 \n", - "\n", - " Enrollment Service Name Service Projected Start Date \\\n", - "0 ES-0021827 Web Development M1 2024-03-11 00:00:00 \n", - "1 ES-0024437 Intro to Programming Core 2024-08-15 00:00:00 \n", - "2 ES-0022632 Intro to Programming Core 2024-05-06 00:00:00 \n", - "3 ES-0029379 Intro to Programming Core 2025-01-07 00:00:00 \n", - "4 ES-0016248 Salesforce 2022-09-07 00:00:00 \n", - "\n", - " Actual Start Date Projected End Date Actual End Date \\\n", - "0 2024-03-11 00:00:00 2024-04-12 00:00:00 2024-04-12 00:00:00 \n", - "1 NA NA 2024-09-29 00:00:00 \n", - "2 2024-05-06 00:00:00 NA 2024-06-28 00:00:00 \n", - "3 2025-01-13 00:00:00 2025-02-18 00:00:00 2025-02-04 00:00:00 \n", - "4 NA NA 2022-11-18 00:00:00 \n", - "\n", - " Outcome ATP Cohort \n", - "0 Successfully Completed 2024-01-01 00:00:00 \n", - "1 Successfully Completed 2024-08-01 00:00:00 \n", - "2 Successfully Completed 2024-05-01 00:00:00 \n", - "3 Did Not Complete 2025-01-01 00:00:00 \n", - "4 Successfully Completed 2022-09-01 00:00:00 " - ] - }, - "execution_count": 140, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# TODO: Finish tests\n", - "def Test_Get_starting_pathways():\n", - " mock_data = pd.DataFrame(\n", - " {\n", - " 'Auto Id': ['202303-11274', '202206-8668', '202110-5639', '202410-17704', '202208-9220'], \n", - " 'KY Region': ['SOAR', 'SOAR', 'SOAR', 'SOAR', 'SOAR'], \n", - " 'Assessment ID': ['OA-010946', 'OA-016863', 'OA-004316', 'OA-019195', 'OA-008218'], \n", - " 'EnrollmentId': ['Enrollment-7415', 'Enrollment-9631', 'Enrollment-5320', 'Enrollment-11703', 'Enrollment-6405'], \n", - " 'Enrollment Service Name': ['ES-0021827', 'ES-0024437', 'ES-0022632', 'ES-0029379', 'ES-0016248'], \n", - " 'Service': ['Web Development M1', 'Intro to Programming Core', 'Intro to Programming Core', 'Intro to Programming Core', 'Salesforce'], \n", - " 'Projected Start Date': ['2024-03-11 00:00:00', '2024-08-15 00:00:00', '2024-05-06 00:00:00', '2025-01-07 00:00:00', '2022-09-07 00:00:00'], \n", - " 'Actual Start Date': ['2024-03-11 00:00:00', 'NA', '2024-05-06 00:00:00', '2025-01-13 00:00:00', 'NA'], \n", - " 'Projected End Date': ['2024-04-12 00:00:00', 'NA', 'NA', '2025-02-18 00:00:00', 'NA'], \n", - " 'Actual End Date': ['2024-04-12 00:00:00', '2024-09-29 00:00:00', '2024-06-28 00:00:00', '2025-02-04 00:00:00', '2022-11-18 00:00:00'], \n", - " 'Outcome': ['Successfully Completed', 'Successfully Completed', 'Successfully Completed', 'Did Not Complete', 'Successfully Completed'], \n", - " 'ATP Cohort': ['2024-01-01 00:00:00', '2024-08-01 00:00:00', '2024-05-01 00:00:00', '2025-01-01 00:00:00', '2022-09-01 00:00:00']\n", - " }\n", - " )\n", - " return mock_data\n", - "Test_Get_starting_pathways()" - ] - }, - { - "cell_type": "code", - "execution_count": 141, - "id": "630dca1e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "def Dash_most_selected_path_by_cohort(data: pd.DataFrame) -> Dash: # Need to pass the dataframe argument because of how the Data is structure\n", - " app = Dash(__name__)\n", - " # Const\n", - " starter_only_enrollments = Get_starting_pathways(data) # This function should be able to comunicate with the data without argument\n", - "\n", - " dropdown_options = Get_cohorts_list(starter_only_enrollments)\n", - " pathway_color = {\n", - " 'Web Development M1': 'blue',\n", - " 'Data Analysis M1': 'red', \n", - " 'Software Development M1': 'green',\n", - " 'Quality Assurance M1': 'yellow', \n", - " 'User Experience M1': 'purple'\n", - " }\n", - "\n", - " # Display\n", - " app.layout = html.Div([\n", - " html.H2('Cohorts', style={'text-align': \"center\"}),\n", - " html.P('Select Cohort:'),\n", - " dcc.Dropdown(\n", - " id=\"dropdown\",\n", - " options=dropdown_options,\n", - " value=dropdown_options[0],\n", - " clearable=False,\n", - " ),\n", - " dcc.Graph(id=\"graph\")\n", - " \n", - " ], style={'backgroundColor':'white'})\n", - "\n", - " @app.callback(\n", - " Output(\"graph\", \"figure\"),\n", - " Input(\"dropdown\", \"value\"))\n", - "\n", - " # Graph\n", - " def tt(time):\n", - " df = Get_data_by_cohort(starter_only_enrollments, time)\n", - " fig = px.pie(df, names='Service', values='count', color='Service', color_discrete_map=pathway_color)\n", - " return fig\n", - "\n", - " return app\n", - "\n", - " # TODO: Add number of students per each cohort \n", - " # TODO: Fix the options on the selection \n", - " # TODO: make colors better\n", - "\n", - "Dash_most_selected_path_by_cohort(enrollments).run(debug=True, port=8052)" - ] - }, - { - "cell_type": "markdown", - "id": "16f9f9a6", - "metadata": {}, - "source": [ - "### 2. Completion rates" - ] - }, - { - "cell_type": "markdown", - "id": "af3ada51", - "metadata": {}, - "source": [ - "### 2.1 completion rate by path" - ] - }, - { - "cell_type": "code", - "execution_count": 142, - "id": "1504a061", - "metadata": {}, - "outputs": [], - "source": [ - "pathways = [\n", - " 'Web Development M1',\n", - " 'Web Development M2',\n", - " 'Web Development M3',\n", - " 'Web Development M4',\n", - " 'Data Analysis M1', \n", - " 'Data Analysis M2',\n", - " 'Data Analysis M3',\n", - " 'Data Analysis M4', \n", - " 'Software Development M1',\n", - " 'Software Development M2',\n", - " 'Software Development M3',\n", - " 'Software Development M4',\n", - " 'Quality Assurance M1', \n", - " 'Quality Assurance M2',\n", - " 'Quality Assurance M3', \n", - " 'Quality Assurance M4', \n", - " 'User Experience M1', \n", - " 'User Experience M2',\n", - " 'User Experience M3', \n", - " 'User Experience M4',\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": 143, - "id": "930b4918", - "metadata": {}, - "outputs": [], - "source": [ - "# Not the best Pandas way to do it:\n", - "def Get_completion_percentages(df: pd.DataFrame, cohort: str = 'All cohorts') -> pd.DataFrame:\n", - " if cohort == 'All cohorts':\n", - " data = df\n", - " else:\n", - " data = df[df['ATP Cohort'] == pd.Timestamp(cohort)]\n", - "\n", - " completion_dictionary = {}\n", - "\n", - " for path in pathways:\n", - " outcome = data[data['Service'] == path]['Outcome'].value_counts(normalize=True).reset_index()\n", - " completion_dictionary[path] = {row.Outcome: row.proportion for row in outcome.itertuples(index=True)}\n", - " \n", - " result_df = pd.DataFrame(completion_dictionary).transpose().fillna(0).rename_axis('Module').reset_index()\n", - "\n", - " result_df['Pathway'] = result_df['Module'].apply(lambda x: x[:x.rfind(' ')]) # intended to be able to sort by pathway\n", - " return result_df\n", - "# TODO: Add test\n", - "\n", - "def Get_pathways_name(df: pd.DataFrame) -> list:\n", - " return list(df['Pathway'].unique())\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 144, - "id": "a7ea4ffb", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.microsoft.datawrangler.viewer.v0+json": { - "columns": [ - { - "name": "index", - "rawType": "int64", - "type": "integer" - }, - { - "name": "Module", - "rawType": "object", - "type": "string" - }, - { - "name": "Successfully Completed", - "rawType": "float64", - "type": "float" - }, - { - "name": "Did Not Complete", - "rawType": "float64", - "type": "float" - }, - { - "name": "Partially Completed", - "rawType": "float64", - "type": "float" - }, - { - "name": "NA", - "rawType": "float64", - "type": "float" - }, - { - "name": "Pathway", - "rawType": "object", - "type": "string" - } - ], - "ref": "742847f3-ad3f-4e82-a206-eb8d98493700", - "rows": [ - [ - "0", - "Web Development M1", - "0.7916666666666666", - "0.15625", - "0.052083333333333336", - "0.0", - "Web Development" - ], - [ - "1", - "Web Development M2", - "0.8076923076923077", - "0.15384615384615385", - "0.038461538461538464", - "0.0", - "Web Development" - ], - [ - "2", - "Web Development M3", - "0.59375", - "0.09375", - "0.0", - "0.3125", - "Web Development" - ], - [ - "3", - "Web Development M4", - "0.675", - "0.325", - "0.0", - "0.0", - "Web Development" - ], - [ - "4", - "Data Analysis M1", - "0.7777777777777778", - "0.1574074074074074", - "0.06481481481481481", - "0.0", - "Data Analysis" - ], - [ - "5", - "Data Analysis M2", - "0.6046511627906976", - "0.27906976744186046", - "0.09302325581395349", - "0.023255813953488372", - "Data Analysis" - ], - [ - "6", - "Data Analysis M3", - "0.6346153846153846", - "0.07692307692307693", - "0.038461538461538464", - "0.25", - "Data Analysis" - ], - [ - "7", - "Data Analysis M4", - "0.5757575757575758", - "0.3939393939393939", - "0.030303030303030304", - "0.0", - "Data Analysis" - ], - [ - "8", - "Software Development M1", - "0.8", - "0.18181818181818182", - "0.01818181818181818", - "0.0", - "Software Development" - ], - [ - "9", - "Software Development M2", - "0.7441860465116279", - "0.20930232558139536", - "0.046511627906976744", - "0.0", - "Software Development" - ], - [ - "10", - "Software Development M3", - "0.7878787878787878", - "0.15151515151515152", - "0.06060606060606061", - "0.0", - "Software Development" - ], - [ - "11", - "Software Development M4", - "0.5769230769230769", - "0.4230769230769231", - "0.0", - "0.0", - "Software Development" - ], - [ - "12", - "Quality Assurance M1", - "1.0", - "0.0", - "0.0", - "0.0", - "Quality Assurance" - ], - [ - "13", - "Quality Assurance M2", - "1.0", - "0.0", - "0.0", - "0.0", - "Quality Assurance" - ], - [ - "14", - "Quality Assurance M3", - "1.0", - "0.0", - "0.0", - "0.0", - "Quality Assurance" - ], - [ - "15", - "Quality Assurance M4", - "1.0", - "0.0", - "0.0", - "0.0", - "Quality Assurance" - ], - [ - "16", - "User Experience M1", - "0.5", - "0.5", - "0.0", - "0.0", - "User Experience" - ], - [ - "17", - "User Experience M2", - "1.0", - "0.0", - "0.0", - "0.0", - "User Experience" - ], - [ - "18", - "User Experience M3", - "1.0", - "0.0", - "0.0", - "0.0", - "User Experience" - ], - [ - "19", - "User Experience M4", - "1.0", - "0.0", - "0.0", - "0.0", - "User Experience" - ] - ], - "shape": { - "columns": 6, - "rows": 20 - } - }, - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ModuleSuccessfully CompletedDid Not CompletePartially CompletedNAPathway
0Web Development M10.7916670.1562500.0520830.000000Web Development
1Web Development M20.8076920.1538460.0384620.000000Web Development
2Web Development M30.5937500.0937500.0000000.312500Web Development
3Web Development M40.6750000.3250000.0000000.000000Web Development
4Data Analysis M10.7777780.1574070.0648150.000000Data Analysis
5Data Analysis M20.6046510.2790700.0930230.023256Data Analysis
6Data Analysis M30.6346150.0769230.0384620.250000Data Analysis
7Data Analysis M40.5757580.3939390.0303030.000000Data Analysis
8Software Development M10.8000000.1818180.0181820.000000Software Development
9Software Development M20.7441860.2093020.0465120.000000Software Development
10Software Development M30.7878790.1515150.0606060.000000Software Development
11Software Development M40.5769230.4230770.0000000.000000Software Development
12Quality Assurance M11.0000000.0000000.0000000.000000Quality Assurance
13Quality Assurance M21.0000000.0000000.0000000.000000Quality Assurance
14Quality Assurance M31.0000000.0000000.0000000.000000Quality Assurance
15Quality Assurance M41.0000000.0000000.0000000.000000Quality Assurance
16User Experience M10.5000000.5000000.0000000.000000User Experience
17User Experience M21.0000000.0000000.0000000.000000User Experience
18User Experience M31.0000000.0000000.0000000.000000User Experience
19User Experience M41.0000000.0000000.0000000.000000User Experience
\n", - "
" - ], - "text/plain": [ - " Module Successfully Completed Did Not Complete \\\n", - "0 Web Development M1 0.791667 0.156250 \n", - "1 Web Development M2 0.807692 0.153846 \n", - "2 Web Development M3 0.593750 0.093750 \n", - "3 Web Development M4 0.675000 0.325000 \n", - "4 Data Analysis M1 0.777778 0.157407 \n", - "5 Data Analysis M2 0.604651 0.279070 \n", - "6 Data Analysis M3 0.634615 0.076923 \n", - "7 Data Analysis M4 0.575758 0.393939 \n", - "8 Software Development M1 0.800000 0.181818 \n", - "9 Software Development M2 0.744186 0.209302 \n", - "10 Software Development M3 0.787879 0.151515 \n", - "11 Software Development M4 0.576923 0.423077 \n", - "12 Quality Assurance M1 1.000000 0.000000 \n", - "13 Quality Assurance M2 1.000000 0.000000 \n", - "14 Quality Assurance M3 1.000000 0.000000 \n", - "15 Quality Assurance M4 1.000000 0.000000 \n", - "16 User Experience M1 0.500000 0.500000 \n", - "17 User Experience M2 1.000000 0.000000 \n", - "18 User Experience M3 1.000000 0.000000 \n", - "19 User Experience M4 1.000000 0.000000 \n", - "\n", - " Partially Completed NA Pathway \n", - "0 0.052083 0.000000 Web Development \n", - "1 0.038462 0.000000 Web Development \n", - "2 0.000000 0.312500 Web Development \n", - "3 0.000000 0.000000 Web Development \n", - "4 0.064815 0.000000 Data Analysis \n", - "5 0.093023 0.023256 Data Analysis \n", - "6 0.038462 0.250000 Data Analysis \n", - "7 0.030303 0.000000 Data Analysis \n", - "8 0.018182 0.000000 Software Development \n", - "9 0.046512 0.000000 Software Development \n", - "10 0.060606 0.000000 Software Development \n", - "11 0.000000 0.000000 Software Development \n", - "12 0.000000 0.000000 Quality Assurance \n", - "13 0.000000 0.000000 Quality Assurance \n", - "14 0.000000 0.000000 Quality Assurance \n", - "15 0.000000 0.000000 Quality Assurance \n", - "16 0.000000 0.000000 User Experience \n", - "17 0.000000 0.000000 User Experience \n", - "18 0.000000 0.000000 User Experience \n", - "19 0.000000 0.000000 User Experience " - ] - }, - "execution_count": 144, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Get_completion_percentages(enrollments)" - ] - }, - { - "cell_type": "code", - "execution_count": 145, - "id": "98a912ca", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "def Dash_completion_rates_by_path(df: pd.DataFrame) -> Dash: # TODO: fix data structure so visualization doesn't use df\n", - " app2 = Dash(__name__)\n", - " # Const\n", - " completion_df = Get_completion_percentages(df)\n", - " options = Get_pathways_name(completion_df)\n", - "\n", - " pathway_color = {\n", - " 'Software Development': 'green', \n", - " 'Web Development': 'blue', \n", - " 'Data Analysis': 'red',\n", - " 'Quality Assurance': 'yellow', \n", - " 'User Experience': 'purple'\n", - " }\n", - "\n", - " # Display\n", - " app2.layout = html.Div([\n", - " html.H2('Pathways Completion', style={'text-align': \"center\"}),\n", - " html.P('Select pathway:'),\n", - " dcc.Dropdown(\n", - " id=\"dropdown\",\n", - " options=options,\n", - " value=options[0],\n", - " clearable=False,\n", - " ),\n", - " dcc.Graph(id=\"graph\")\n", - " \n", - " ], style={'backgroundColor':'white'})\n", - "\n", - " @app2.callback(\n", - " Output(\"graph\", \"figure\"),\n", - " Input(\"dropdown\", \"value\"))\n", - "\n", - " # Graph\n", - " # TODO: Need to add an extra selection box with the cohorts\n", - " def Display_pathway_completion(p):\n", - " df = completion_df[completion_df['Pathway'] == p]\n", - " fig = px.bar(df, x='Module', y='Successfully Completed')\n", - " return fig\n", - "\n", - " return app2\n", - "\n", - "Dash_completion_rates_by_path(enrollments).run(debug=True, port=8053)" - ] - }, - { - "cell_type": "code", - "execution_count": 146, - "id": "0db624db", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "hovertemplate": "Module=%{x}
Successfully Completed=%{y}", - "legendgroup": "", - "marker": { - "color": "#636efa", - "pattern": { - "shape": "" - } - }, - "name": "", - "orientation": "v", - "showlegend": false, - "textposition": "auto", - "type": "bar", - "x": [ - "Web Development M1", - "Web Development M2", - "Web Development M3", - "Web Development M4", - "Data Analysis M1", - "Data Analysis M2", - "Data Analysis M3", - "Data Analysis M4", - "Software Development M1", - "Software Development M2", - "Software Development M3", - "Software Development M4", - "Quality Assurance M1", - "Quality Assurance M2", - "Quality Assurance M3", - "Quality Assurance M4", - "User Experience M1", - "User Experience M2", - "User Experience M3", - "User Experience M4" - ], - "xaxis": "x", - "y": { - "bdata": "VVVVVVVV6T+KndiJndjpPwAAAAAAAOM/mpmZmZmZ5T85juM4juPoP9aUNWVNWeM/7MRO7MRO5D9tsskmm2ziP5qZmZmZmek/9AV9QV/Q5z822WSTTTbpP2IndmInduI/AAAAAAAA8D8AAAAAAADwPwAAAAAAAPA/AAAAAAAA8D8AAAAAAADgPwAAAAAAAPA/AAAAAAAA8D8AAAAAAADwPw==", - "dtype": "f8" - }, - "yaxis": "y" - } - ], - "layout": { - "barmode": "relative", - "legend": { - "tracegroupgap": 0 - }, - "margin": { - "t": 60 - }, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermap": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermap" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "xaxis": { - "anchor": "y", - "domain": [ - 0, - 1 - ], - "title": { - "text": "Module" - } - }, - "yaxis": { - "anchor": "x", - "domain": [ - 0, - 1 - ], - "title": { - "text": "Successfully Completed" - } - } - } - } - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Just testing\n", - "completion = Get_completion_percentages(enrollments)\n", - "px.bar(completion, x='Module', y='Successfully Completed').show()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/most_common_pathways_taken_data.py b/most_common_pathways_taken_data.py new file mode 100644 index 0000000..845c59f --- /dev/null +++ b/most_common_pathways_taken_data.py @@ -0,0 +1,41 @@ +import pandas as pd + +class Most_common_pathways_taken_data: + def __init__(self, data): + self.data = data + self.__starter_pathways = [ + 'Web Development M1', + 'Data Analysis M1', + 'Software Development M1', + 'Quality Assurance M1', + 'User Experience M1', + ] + self.starter_only_df = self.Get_starting_pathways() + + def Get_starting_pathways(self): + """ + Returns a pandas.DataFrame were all the services are the biginning paths + + Args: + df: pandas.DataFrame + + Return: + pandas.DataFrame + """ + mask_starter_pathways = self.data['Service'].isin(self.__starter_pathways) + return self.data[mask_starter_pathways] + + def Get_cohorts_list(self): + df = self.starter_only_df + cohorts = list(pd.to_datetime(df['ATP Cohort'][df['ATP Cohort'] != 'NA']).sort_values(ascending=True).astype(str).unique()) + cohorts.insert(0, 'All cohorts') + return cohorts + + def Get_data_by_cohort(self, cohort: str = 'All cohorts') -> pd.DataFrame: + df = self.starter_only_df + if cohort == 'All cohorts': + result = df.value_counts('Service').reset_index() + else: + result = df[df['ATP Cohort'] == str(pd.to_datetime(cohort))].value_counts('Service').reset_index() + + return result \ No newline at end of file diff --git a/visualization_examples.ipynb b/visualization_examples.ipynb new file mode 100644 index 0000000..8fa5ef8 --- /dev/null +++ b/visualization_examples.ipynb @@ -0,0 +1,246 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b0c6df40", + "metadata": {}, + "source": [ + "# Visualization examples\n", + "\n", + "Visualizion was not turn into a class because the project will use Google Locker for dashboard creation, this notebook only works to showcase how to use the Data Manipulation classes." + ] + }, + { + "cell_type": "markdown", + "id": "fc151064", + "metadata": {}, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "47cd23cd", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import plotly.express as px\n", + "import plotly.graph_objects as go\n", + "from dash import Dash, dcc, html, Input, Output\n", + "from most_common_pathways_taken_data import Most_common_pathways_taken_data\n", + "from compleation_rate_data import Compleation_rate_data\n", + "from cleaning_enrollments_data import EnrollmentsCleaning" + ] + }, + { + "cell_type": "markdown", + "id": "cc61af47", + "metadata": {}, + "source": [ + "## Cleaning data\n", + "\n", + "This step should be done before the use of any of the Data classes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba57e157", + "metadata": {}, + "outputs": [], + "source": [ + "cleaner = EnrollmentsCleaning(pd.read_excel('Data\\\\Raw\\\\ARC Enrollments.xlsx'))\n" + ] + }, + { + "cell_type": "markdown", + "id": "4225b677", + "metadata": {}, + "source": [ + "## Most common pathway taken:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "fa1b6e02", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def Dash_most_selected_path_by_cohort() -> Dash: # Need to pass the dataframe argument because of how the Data is structure\n", + " app = Dash(__name__)\n", + " # Const\n", + " data_class = Most_common_pathways_taken_data(cleaner.Get_clean_data())\n", + " starter_only_enrollments = data_class.Get_starting_pathways() # This function should be able to comunicate with the data without argument\n", + "\n", + " dropdown_options = data_class.Get_cohorts_list()\n", + " pathway_color = {\n", + " 'Web Development M1': 'blue',\n", + " 'Data Analysis M1': 'red', \n", + " 'Software Development M1': 'green',\n", + " 'Quality Assurance M1': 'yellow', \n", + " 'User Experience M1': 'purple'\n", + " }\n", + "\n", + " # Display\n", + " app.layout = html.Div([\n", + " html.H2('Cohorts', style={'text-align': \"center\"}),\n", + " html.P('Select Cohort:'),\n", + " dcc.Dropdown(\n", + " id=\"dropdown\",\n", + " options=dropdown_options,\n", + " value=dropdown_options[0],\n", + " clearable=False,\n", + " ),\n", + " dcc.Graph(id=\"graph\")\n", + " \n", + " ], style={'backgroundColor':'white'})\n", + "\n", + " @app.callback(\n", + " Output(\"graph\", \"figure\"),\n", + " Input(\"dropdown\", \"value\"))\n", + "\n", + " # Graph\n", + " def tt(time):\n", + " df = data_class.Get_data_by_cohort(time)\n", + " fig = px.pie(df, names='Service', values='count', color='Service', color_discrete_map=pathway_color)\n", + " return fig\n", + "\n", + " return app\n", + "\n", + " # TODO: Add number of students per each cohort \n", + " # TODO: Fix the options on the selection \n", + " # TODO: make colors better\n", + "\n", + "Dash_most_selected_path_by_cohort().run(debug=True, port=8052)" + ] + }, + { + "cell_type": "markdown", + "id": "6b5b514e", + "metadata": {}, + "source": [ + "## Compleation rates:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "c0b7d44e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def Dash_completion_rates_by_path() -> Dash: # TODO: fix data structure so visualization doesn't use df\n", + " app2 = Dash(__name__)\n", + " # Const\n", + " data_class = Compleation_rate_data(cleaner.Get_clean_data())\n", + " completion_df = data_class.Get_completion_percentages()\n", + " options = data_class.Get_pathways_name(completion_df)\n", + "\n", + " pathway_color = {\n", + " 'Software Development': 'green', \n", + " 'Web Development': 'blue', \n", + " 'Data Analysis': 'red',\n", + " 'Quality Assurance': 'yellow', \n", + " 'User Experience': 'purple'\n", + " }\n", + "\n", + " # Display\n", + " app2.layout = html.Div([\n", + " html.H2('Pathways Completion', style={'text-align': \"center\"}),\n", + " html.P('Select pathway:'),\n", + " dcc.Dropdown(\n", + " id=\"dropdown\",\n", + " options=options,\n", + " value=options[0],\n", + " clearable=False,\n", + " ),\n", + " dcc.Graph(id=\"graph\")\n", + " \n", + " ], style={'backgroundColor':'white'})\n", + "\n", + " @app2.callback(\n", + " Output(\"graph\", \"figure\"),\n", + " Input(\"dropdown\", \"value\"))\n", + "\n", + " # Graph\n", + " # TODO: Need to add an extra selection box with the cohorts\n", + " def Display_pathway_completion(p):\n", + " df = completion_df[completion_df['Pathway'] == p]\n", + " fig = px.bar(df, x='Module', y='Successfully Completed')\n", + " return fig\n", + "\n", + " return app2\n", + "\n", + "Dash_completion_rates_by_path().run(debug=True, port=8053)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 8f9f01c77df6bbacae89df688cfec05bc8ae38da Mon Sep 17 00:00:00 2001 From: Euclides Date: Sun, 10 Aug 2025 17:47:02 -0400 Subject: [PATCH 9/9] Fixed typo on name of file and class --- compleation_rate_data.py => completion_rate_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename compleation_rate_data.py => completion_rate_data.py (98%) diff --git a/compleation_rate_data.py b/completion_rate_data.py similarity index 98% rename from compleation_rate_data.py rename to completion_rate_data.py index 62bc7cd..48597d9 100644 --- a/compleation_rate_data.py +++ b/completion_rate_data.py @@ -1,6 +1,6 @@ import pandas as pd -class Compleation_rate_data: +class Completion_rate_data: def __init__(self, data): self.data = data self.__pathways = [