diff --git a/.virtual_documents/notebook/PROJECT2_pedro.ipynb b/.virtual_documents/notebook/PROJECT2_pedro.ipynb
new file mode 100644
index 0000000..2752210
--- /dev/null
+++ b/.virtual_documents/notebook/PROJECT2_pedro.ipynb
@@ -0,0 +1,317 @@
+import pandas as pd
+
+df4 = pd.read_csv("df_final_demo.txt")
+
+
+df3 = pd.read_csv("df_final_experiment_clients.txt")
+
+
+df1 = pd.read_csv("df_final_web_data_pt_1.txt")
+
+
+df2 = pd.read_csv("df_final_web_data_pt_2.txt")
+
+
+df1.columns
+
+
+df2.columns
+
+
+dfs = pd.concat([df1, df2], ignore_index=True)
+
+
+dfs['client_id'].unique()
+
+
+dfs.isnull()
+
+
+dfs.shape
+
+
+dfs.head()
+
+
+#checking what the type of date_time
+dfs.dtypes
+
+
+#convert object into date_time
+dfs['date_time'] = pd.to_datetime(dfs['date_time'])
+
+
+dfs.dtypes
+
+
+df3.columns
+
+
+df4.columns
+
+
+
+
+
+#df_all.head()
+
+
+
+dfs.columns
+
+
+#dfs.sort_values(by=['client_id', 'visitor_id', 'visit_id', 'date_time'], ascending=[True, True, True, True])
+
+
+dfs.head(10)
+
+
+#dfsorted[dfsorted['client_id'] == 442857]
+
+
+dfs['client_id'].value_counts().head()
+
+
+dfs['date_time'].isnull().value_counts()
+
+
+dfs['visitor_id'].isnull().value_counts()
+
+
+dfs['date_time'].isnull().value_counts()
+
+
+dfs['visit_id'].isnull().value_counts()
+
+
+dfs['visitor_id'].isnull().value_counts()
+
+
+dfs['process_step'].isnull().value_counts()
+
+
+dfs.isnull().value_counts()
+
+
+df4.columns
+
+
+df3.columns
+
+
+#df_test.isnull()
+
+
+dfs['process_step'].unique()
+
+
+#merfe dfs + experiment(df3) to have Variaton = Control OR test)
+df_mergeexp = dfs.merge(df3, on="client_id", how="left")
+
+
+#merge with demo(df4) (demographic data)
+df_merged = df_mergeexp.merge(df4, on="client_id", how="left")
+
+
+df_merged.shape
+
+
+#remove duplicates
+df_merged = df_merged.drop_duplicates()
+
+
+#remove rows without any variation
+df_merged = df_merged[df_merged["Variation"].isin(["Control", "Test"])]
+
+
+df_merged.shape
+
+
+df_merged["process_step"].value_counts()
+
+
+df_merged["Variation"].value_counts()
+
+
+#Give a table for Tests and another for Controls! 
+df_control = df_merged[df_merged["Variation"] == "Control"].copy()
+df_test = df_merged[df_merged["Variation"] == "Test"].copy()
+
+
+df_control.shape
+
+
+df_test.shape
+
+
+df_merged['num_accts'].nunique()
+
+
+####################################################################
+
+
+#Q1: Who are the primary clients using this online process?
+
+
+usage = (df_merged.groupby("client_id")["visit_id"].nunique().reset_index(name="n_visits"))  #hor many visits per client
+
+
+cutoff = usage["n_visits"].quantile(0.75)
+usage['primary'] = (usage["n_visits"] >= cutoff).astype(int)          #top25% by number of visits
+
+
+df_merged.columns
+
+
+df_merged = usage.merge(df_merged, left_on="client_id", right_on="client_id", how="left") #join demographics
+
+
+df_merged.groupby("primary")[["clnt_age","clnt_tenure_yr","clnt_tenure_mnth","logons_6_mnth"]].mean()
+
+
+
+sub = df_merged[df_merged["primary"] == 1]
+print(sub[["clnt_age","clnt_tenure_yr","bal","gendr"]].describe(include="all"))
+
+
+#Average age is about 51.8, half of the primary clients are between 39 and 63. So they are midle-aged and not very young customers, it means primary clients are older than non primary clients.
+#primary cients are more long standing.
+#The most frequent gender is “M”
+
+
+df_merged.groupby("primary")[['calls_6_mnth', 'logons_6_mnth']].mean()
+
+
+#They also make more calls and more logons in 6 months, so they are more active on all channels, not only online.
+
+
+df_merged = df_merged.rename(columns={'Variation': 'variation'})
+df3 = df3.rename(columns={'Variation': 'variation'})
+df_control = df_control.rename(columns={'Variation': 'variation'})
+df_test = df_test.rename(columns={'Variation': 'variation'})
+
+
+df_merged.columns
+
+
+df_control.columns
+
+
+df3.columns
+
+
+dfs_var = dfs.merge(df3[['client_id', 'variation']], on='client_id', how='left')
+
+
+dfs_var['date_time'] = pd.to_datetime(dfs_var['date_time'])
+
+
+step_order = {'start': 0, 'step_1' : 1, 'step_2' : 2, 'step_3' : 3, 'confirm' : 4}
+
+
+
+
+
+dfs_var['step_num'] = dfs_var['process_step'].map(step_order)
+
+
+######################################################
+#COMPLETION RATE
+
+
+last_step = dfs_var['step_num'].max()
+
+
+visitcomp = (dfs_var.groupby(['variation', 'visit_id'])['step_num'].max().reset_index(name='max_step'))
+
+
+visitcomp["completed"] = (visitcomp["max_step"] == last_step).astype(int)
+
+
+completionrate = (visitcomp.groupby('variation')['completed'].mean().reset_index())
+
+
+completionrate
+
+
+print('Average time spent on each steap is', completionrate)
+
+
+############################################################################################
+#TIME SPENT ON EACH STEP
+
+
+dfs_var['date_time'] = pd.to_datetime(dfs_var['date_time'])
+
+
+dfs_var = dfs_var.sort_values(['variation', 'visit_id','step_num','date_time'])
+
+
+dfs_var['next_time'] = (dfs_var.groupby(['variation','visit_id'])['date_time'].shift) #time (-1)for the next step of the same visit
+
+
+dfs_var["next_time"] = pd.to_datetime(dfs_var["next_time"], errors="coerce")
+
+
+dfs_var['step_durationsec'] = (dfs_var['next_time'] - dfs_var['date_time']).dt.total_seconds() #duration in seconds
+
+
+step_time = (dfs_var.dropna(subset=['step_durationsec']).groupby(['variation', 'process_step'])['step_durationsec'].mean().reset_index())
+
+
+step_time
+
+
+##################################################################
+#CHECKING RATING OF ERRORS PER EACH STEP
+
+
+dfs_var['error_flag'] = (dfs_var['step_num'].astype(int))
+
+
+error_rates = (dfs_var.groupby(['variation', 'process_step'])['error_flag'].mean().reset_index().rename(columns={'error_flag' : 'error_rate'}))
+
+
+error_rates
+
+
+#COMPLETITION RATE - Comparing the percentage of visits who reach into final step in control vs test, highter percentage of completion in the test means better efectiveness for users to finish all the steps.
+#TIME SPENT - The test version is better for user to complete the steps faster
+#ERRORS RATES - The test version neither reduce or increased the frequency of errors in any step. The two versions have identical performance when talking about number of errors.
+
+
+import numpy as np
+from scipy.stats import norm
+
+
+
+#Order control test
+summary = summary.set_index('variation').loc[['Control', 'Test']]
+x1, x2 = summary['n_complete'].values
+n1, n2 = summary['n_total'].values
+
+
+#propoortions
+p1 = x1 / n1
+p2 = x2 / n2
+
+
+#h0
+p_pool = (x1 + x2) / (n1 + n2)
+
+
+#test of 2 proportions *THANK YOU CHATGPT
+se = np.sqrt(p_pool * (1 - p_pool) * (1/n1 + 1/n2))
+z = (p2 - p1) / se
+
+
+p_value = 2 * (1 - norm.cdf(abs(z)))
+print(f'Completion of rate Control: {p1:4f}')
+print(f'Completion of rate Test: {p2:4f}')
+print(f'z-statistic: {z:4f}')
+print(f'p-value    : {p_value:.6f}')
+alpha = 0.05
+if p_value < alpha
+    print('Statistical diference highly significative (alpha=0.05)')
+else
+    print('Statistical diference not significative (alpha=0.05)')
diff --git a/.virtual_documents/notebook/Untitled.ipynb b/.virtual_documents/notebook/Untitled.ipynb
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/.virtual_documents/notebook/Untitled.ipynb
@@ -0,0 +1 @@
+
diff --git a/anaconda_projects/db/project_filebrowser.db b/anaconda_projects/db/project_filebrowser.db
new file mode 100644
index 0000000..e5ae8f7
Binary files /dev/null and b/anaconda_projects/db/project_filebrowser.db differ
diff --git a/notebook/.ipynb_checkpoints/PROJECT2_pedro-checkpoint.ipynb b/notebook/.ipynb_checkpoints/PROJECT2_pedro-checkpoint.ipynb
index a2ea406..00ce796 100644
--- a/notebook/.ipynb_checkpoints/PROJECT2_pedro-checkpoint.ipynb
+++ b/notebook/.ipynb_checkpoints/PROJECT2_pedro-checkpoint.ipynb
@@ -1193,28 +1193,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 46,
    "id": "788ff698-7a92-4969-8cd7-6d87eeca1194",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "7"
-      ]
-     },
-     "execution_count": 45,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "####################################################################"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 47,
    "id": "162a3f57-229f-4cbf-848a-e4704a2cdf52",
    "metadata": {},
    "outputs": [],
@@ -1224,7 +1213,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 48,
    "id": "fed97a77-c349-420e-a073-8cdd689514c9",
    "metadata": {},
    "outputs": [],
@@ -1234,7 +1223,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 49,
    "id": "ed106950-abca-4aa9-afb0-f758055eaa47",
    "metadata": {},
    "outputs": [],
@@ -1245,7 +1234,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 50,
    "id": "c5b77ee6-dd0f-4256-b044-6688bc348160",
    "metadata": {},
    "outputs": [
@@ -1258,7 +1247,7 @@
        "      dtype='object')"
       ]
      },
-     "execution_count": 49,
+     "execution_count": 50,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1269,7 +1258,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 51,
    "id": "c04385dd-5d27-442c-ba4c-63c1e2808d1b",
    "metadata": {},
    "outputs": [],
@@ -1279,7 +1268,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": 52,
    "id": "2a46f9ad-2767-4c80-8e18-bf36f0afb441",
    "metadata": {},
    "outputs": [
@@ -1343,7 +1332,7 @@
        "1        51.816468       12.831203        159.971186       6.780515"
       ]
      },
-     "execution_count": 51,
+     "execution_count": 52,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1384,29 +1373,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 54,
    "id": "9948430a-27f5-467a-b44f-ccc9968944a0",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "             clnt_age  clnt_tenure_yr           bal   gendr\n",
-      "count   124060.000000   124072.000000  1.240720e+05  124072\n",
-      "unique            NaN             NaN           NaN       3\n",
-      "top               NaN             NaN           NaN       M\n",
-      "freq              NaN             NaN           NaN   43343\n",
-      "mean        51.816468       12.831203  1.961189e+05     NaN\n",
-      "std         15.672054        7.328423  4.243774e+05     NaN\n",
-      "min         17.000000        2.000000  2.378961e+04     NaN\n",
-      "25%         39.000000        6.000000  4.566559e+04     NaN\n",
-      "50%         54.000000       12.000000  8.424076e+04     NaN\n",
-      "75%         63.500000       17.000000  1.926728e+05     NaN\n",
-      "max         94.000000       55.000000  1.632004e+07     NaN\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "#Average age is about 51.8, half of the primary clients are between 39 and 63. So they are midle-aged and not very young customers, it means primary clients are older than non primary clients.\n",
     "#primary cients are more long standing.\n",
@@ -1415,7 +1385,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": 55,
    "id": "95002485-6f20-4f4a-a763-d037b9faa90f",
    "metadata": {},
    "outputs": [
@@ -1471,7 +1441,7 @@
        "1            3.743455       6.780515"
       ]
      },
-     "execution_count": 54,
+     "execution_count": 55,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1482,13 +1452,690 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 56,
    "id": "2ac842fc-b333-4bb9-b804-01f9fc8f0c72",
    "metadata": {},
    "outputs": [],
    "source": [
     "#They also make more calls and more logons in 6 months, so they are more active on all channels, not only online."
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "id": "2a4f3984-9698-45f1-9d0e-299d234bdccf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_merged = df_merged.rename(columns={'Variation': 'variation'})\n",
+    "df3 = df3.rename(columns={'Variation': 'variation'})\n",
+    "df_control = df_control.rename(columns={'Variation': 'variation'})\n",
+    "df_test = df_test.rename(columns={'Variation': 'variation'})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "id": "73f27b9e-3a8c-4ad7-a756-6838314f5b34",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['client_id', 'n_visits', 'primary', 'visitor_id', 'visit_id',\n",
+       "       'process_step', 'date_time', 'variation', 'clnt_tenure_yr',\n",
+       "       'clnt_tenure_mnth', 'clnt_age', 'gendr', 'num_accts', 'bal',\n",
+       "       'calls_6_mnth', 'logons_6_mnth'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 58,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_merged.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "id": "5da498ad-e434-4dbc-af5f-84b4a322b09d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['client_id', 'visitor_id', 'visit_id', 'process_step', 'date_time',\n",
+       "       'variation', 'clnt_tenure_yr', 'clnt_tenure_mnth', 'clnt_age', 'gendr',\n",
+       "       'num_accts', 'bal', 'calls_6_mnth', 'logons_6_mnth'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 59,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_control.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "id": "110917b6-fa20-47d1-9704-9263dafd5312",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['client_id', 'variation'], dtype='object')"
+      ]
+     },
+     "execution_count": 60,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df3.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "id": "90d7a353-0bd9-422f-b3e4-98d8d709c90d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dfs_var = dfs.merge(df3[['client_id', 'variation']], on='client_id', how='left')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "id": "b4edff36-d806-4048-9b85-aa83de863e3c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dfs_var['date_time'] = pd.to_datetime(dfs_var['date_time'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "id": "5f86541c-bc8d-4ea1-a7b5-d823188ed736",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step_order = {'start': 0, 'step_1' : 1, 'step_2' : 2, 'step_3' : 3, 'confirm' : 4}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eed944a6-5fce-4e95-ad1e-8f384f15addb",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "id": "28162307-e0cf-4d5d-b662-58f03c6a67e2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dfs_var['step_num'] = dfs_var['process_step'].map(step_order)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "id": "c71f9b3d-9581-40fc-986d-bf30da3e1882",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "######################################################\n",
+    "#COMPLETION RATE"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "id": "0699b897-2d70-4a36-a2fa-de92681667c0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "last_step = dfs_var['step_num'].max()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "id": "1451a1a2-67f8-4522-b352-6a72513b7a3c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "visitcomp = (dfs_var.groupby(['variation', 'visit_id'])['step_num'].max().reset_index(name='max_step'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "id": "decd9c0a-810e-4e3d-a3da-4aa31dd2c58c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "visitcomp[\"completed\"] = (visitcomp[\"max_step\"] == last_step).astype(int)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "id": "323d47af-6a05-4a10-bc42-3886c7c88ca4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "completionrate = (visitcomp.groupby('variation')['completed'].mean().reset_index())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "id": "c501100b-d67e-4774-9e73-db3350874cf7",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>variation</th>\n",
+       "      <th>completed</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Control</td>\n",
+       "      <td>0.498493</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Test</td>\n",
+       "      <td>0.585173</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  variation  completed\n",
+       "0   Control   0.498493\n",
+       "1      Test   0.585173"
+      ]
+     },
+     "execution_count": 70,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "completionrate"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "id": "1f83ff5d-1898-479b-8fcd-e69478d05b96",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Average time spent on each steap is   variation  completed\n",
+      "0   Control   0.498493\n",
+      "1      Test   0.585173\n"
+     ]
+    }
+   ],
+   "source": [
+    "print('Average time spent on each steap is', completionrate)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "id": "fb188b90-2c49-4edf-8975-8c7c2f4d56f8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "############################################################################################\n",
+    "#TIME SPENT ON EACH STEP"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "id": "37da593d-4aa1-466c-9a4b-f3524bfdb0b5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dfs_var['date_time'] = pd.to_datetime(dfs_var['date_time'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "id": "3c09e14a-51ab-4c1d-bf97-2d348c4aded6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dfs_var = dfs_var.sort_values(['variation', 'visit_id','step_num','date_time'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 75,
+   "id": "f4a43083-3154-46ca-823f-10e50602565c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dfs_var['next_time'] = (dfs_var.groupby(['variation','visit_id'])['date_time'].shift) #time (-1)for the next step of the same visit"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 76,
+   "id": "0b2f7798-0953-4991-9634-1984749b0703",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dfs_var[\"next_time\"] = pd.to_datetime(dfs_var[\"next_time\"], errors=\"coerce\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 77,
+   "id": "57d91d74-ae22-46fc-a195-ad7e1b777296",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dfs_var['step_durationsec'] = (dfs_var['next_time'] - dfs_var['date_time']).dt.total_seconds() #duration in seconds"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 78,
+   "id": "ec88f0a4-fcb2-4422-a325-52661e23016f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step_time = (dfs_var.dropna(subset=['step_durationsec']).groupby(['variation', 'process_step'])['step_durationsec'].mean().reset_index())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 79,
+   "id": "d3178785-bcb9-4034-a4e1-1007ea34d3fd",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>variation</th>\n",
+       "      <th>process_step</th>\n",
+       "      <th>step_durationsec</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: [variation, process_step, step_durationsec]\n",
+       "Index: []"
+      ]
+     },
+     "execution_count": 79,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "step_time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
+   "id": "cd0f8815-ef1b-4311-b4d6-645d3ed1edeb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##################################################################\n",
+    "#CHECKING RATING OF ERRORS PER EACH STEP"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "id": "c6f6a958-a82a-4386-9e1f-ab5d57397e67",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dfs_var['error_flag'] = (dfs_var['step_num'].astype(int))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 82,
+   "id": "5e5c5a7c-9498-4b17-8941-8fef7839cbf1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "error_rates = (dfs_var.groupby(['variation', 'process_step'])['error_flag'].mean().reset_index().rename(columns={'error_flag' : 'error_rate'}))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 83,
+   "id": "0e39a5d1-4c37-4226-9728-0e514dbc9806",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>variation</th>\n",
+       "      <th>process_step</th>\n",
+       "      <th>error_rate</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Control</td>\n",
+       "      <td>confirm</td>\n",
+       "      <td>4.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Control</td>\n",
+       "      <td>start</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Control</td>\n",
+       "      <td>step_1</td>\n",
+       "      <td>1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Control</td>\n",
+       "      <td>step_2</td>\n",
+       "      <td>2.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Control</td>\n",
+       "      <td>step_3</td>\n",
+       "      <td>3.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>Test</td>\n",
+       "      <td>confirm</td>\n",
+       "      <td>4.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>Test</td>\n",
+       "      <td>start</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>Test</td>\n",
+       "      <td>step_1</td>\n",
+       "      <td>1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>Test</td>\n",
+       "      <td>step_2</td>\n",
+       "      <td>2.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>Test</td>\n",
+       "      <td>step_3</td>\n",
+       "      <td>3.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  variation process_step  error_rate\n",
+       "0   Control      confirm         4.0\n",
+       "1   Control        start         0.0\n",
+       "2   Control       step_1         1.0\n",
+       "3   Control       step_2         2.0\n",
+       "4   Control       step_3         3.0\n",
+       "5      Test      confirm         4.0\n",
+       "6      Test        start         0.0\n",
+       "7      Test       step_1         1.0\n",
+       "8      Test       step_2         2.0\n",
+       "9      Test       step_3         3.0"
+      ]
+     },
+     "execution_count": 83,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "error_rates"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 84,
+   "id": "8bf8ae7d-ab0e-4057-bccf-b379f2a32601",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#COMPLETITION RATE - Comparing the percentage of visits who reach into final step in control vs test, highter percentage of completion in the test means better efectiveness for users to finish all the steps.\n",
+    "#TIME SPENT - The test version is better for user to complete the steps faster\n",
+    "#ERRORS RATES - The test version neither reduce or increased the frequency of errors in any step. The two versions have identical performance when talking about number of errors."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "id": "a07db602-47f1-4c8c-8785-76c322d070db",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "from scipy.stats import norm\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 86,
+   "id": "1d6944b6-7979-4f60-b7e5-8886c4dc15ab",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  variation  n_complete  n_total\n",
+      "0   Control       16046    32189\n",
+      "1      Test       21731    37136\n"
+     ]
+    }
+   ],
+   "source": [
+    "summary = (visitcomp.groupby(\"variation\")[\"completed\"].agg(n_complete=\"sum\", n_total=\"count\").reset_index())\n",
+    "\n",
+    "print(summary)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 87,
+   "id": "086e2bee-9e59-4756-a45f-55c2a6c3fa00",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Order control test\n",
+    "summary = summary.set_index('variation').loc[['Control', 'Test']]\n",
+    "x1, x2 = summary['n_complete'].values\n",
+    "n1, n2 = summary['n_total'].values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 88,
+   "id": "ef1baf41-13b1-4f5f-bdb4-69d71ea93160",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#propoortions\n",
+    "p1 = x1 / n1\n",
+    "p2 = x2 / n2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 89,
+   "id": "5d8ace24-1dcc-4d8c-9ccf-da4fa3aff7b5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#h0\n",
+    "p_pool = (x1 + x2) / (n1 + n2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 90,
+   "id": "48060fa7-6f84-4685-98c0-9f818b234cbe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#test of 2 proportions *THANK YOU CHATGPT\n",
+    "se = np.sqrt(p_pool * (1 - p_pool) * (1/n1 + 1/n2))\n",
+    "z = (p2 - p1) / se"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 91,
+   "id": "15c0edac-b6cd-41ec-b36f-ddeab6282675",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Completion of rate Control: 0.498493\n",
+      "Completion of rate Test: 0.585173\n",
+      "z-statistic: 22.856841\n",
+      "p-value    : 0.000000\n",
+      "Statistical diference highly significative (alpha=0.05)\n"
+     ]
+    }
+   ],
+   "source": [
+    "p_value = 2 * (1 - norm.cdf(abs(z)))\n",
+    "print(f'Completion of rate Control: {p1:4f}')\n",
+    "print(f'Completion of rate Test: {p2:4f}')\n",
+    "print(f'z-statistic: {z:4f}')\n",
+    "print(f'p-value    : {p_value:.6f}')\n",
+    "alpha = 0.05\n",
+    "if p_value < alpha:\n",
+    "    print('Statistical diference highly significative (alpha=0.05)')\n",
+    "else:\n",
+    "    print('Statistical diference not significative (alpha=0.05)')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b6336d5f-7c76-4b04-bded-84819c26634d",
+   "metadata": {},
+   "source": [
+    "Next step: MAKE GRAPHIC FOR THIS\n",
+    "and: Carried out an analysis ensuring that the observed increase in completion rate from the A/B test meets or exceeds this 5% threshold.\n",
+    "Carried out another hypothesis test of your choosing.\n",
+    "Evaluated the experiment by answering questions relating to:\n",
+    "\n",
+    "    Design Effectiveness\n",
+    "    Duration\n",
+    "    Additional Data Needs\n",
+    "\n"
+   ]
   }
  ],
  "metadata": {
diff --git a/notebook/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/notebook/.ipynb_checkpoints/Untitled-checkpoint.ipynb
new file mode 100644
index 0000000..363fcab
--- /dev/null
+++ b/notebook/.ipynb_checkpoints/Untitled-checkpoint.ipynb
@@ -0,0 +1,6 @@
+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebook/PROJECT2_pedro.ipynb b/notebook/PROJECT2_pedro.ipynb
index 29cdc42..00ce796 100644
--- a/notebook/PROJECT2_pedro.ipynb
+++ b/notebook/PROJECT2_pedro.ipynb
@@ -1767,6 +1767,16 @@
   {
    "cell_type": "code",
    "execution_count": 76,
+   "id": "0b2f7798-0953-4991-9634-1984749b0703",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dfs_var[\"next_time\"] = pd.to_datetime(dfs_var[\"next_time\"], errors=\"coerce\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 77,
    "id": "57d91d74-ae22-46fc-a195-ad7e1b777296",
    "metadata": {},
    "outputs": [],
@@ -1776,7 +1786,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 77,
+   "execution_count": 78,
    "id": "ec88f0a4-fcb2-4422-a325-52661e23016f",
    "metadata": {},
    "outputs": [],
@@ -1786,7 +1796,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 78,
+   "execution_count": 79,
    "id": "d3178785-bcb9-4034-a4e1-1007ea34d3fd",
    "metadata": {},
    "outputs": [
@@ -1817,85 +1827,17 @@
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>Control</td>\n",
-       "      <td>confirm</td>\n",
-       "      <td>153.740233</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>Control</td>\n",
-       "      <td>start</td>\n",
-       "      <td>49.744712</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>Control</td>\n",
-       "      <td>step_1</td>\n",
-       "      <td>45.093323</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>Control</td>\n",
-       "      <td>step_2</td>\n",
-       "      <td>86.703724</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>Control</td>\n",
-       "      <td>step_3</td>\n",
-       "      <td>140.788394</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>Test</td>\n",
-       "      <td>confirm</td>\n",
-       "      <td>246.065397</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>Test</td>\n",
-       "      <td>start</td>\n",
-       "      <td>38.240943</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>Test</td>\n",
-       "      <td>step_1</td>\n",
-       "      <td>60.130113</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>Test</td>\n",
-       "      <td>step_2</td>\n",
-       "      <td>89.756501</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>Test</td>\n",
-       "      <td>step_3</td>\n",
-       "      <td>139.834792</td>\n",
-       "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "  variation process_step  step_durationsec\n",
-       "0   Control      confirm        153.740233\n",
-       "1   Control        start         49.744712\n",
-       "2   Control       step_1         45.093323\n",
-       "3   Control       step_2         86.703724\n",
-       "4   Control       step_3        140.788394\n",
-       "5      Test      confirm        246.065397\n",
-       "6      Test        start         38.240943\n",
-       "7      Test       step_1         60.130113\n",
-       "8      Test       step_2         89.756501\n",
-       "9      Test       step_3        139.834792"
+       "Empty DataFrame\n",
+       "Columns: [variation, process_step, step_durationsec]\n",
+       "Index: []"
       ]
      },
-     "execution_count": 78,
+     "execution_count": 79,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1906,7 +1848,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 80,
    "id": "cd0f8815-ef1b-4311-b4d6-645d3ed1edeb",
    "metadata": {},
    "outputs": [],
@@ -1917,7 +1859,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 79,
+   "execution_count": 81,
    "id": "c6f6a958-a82a-4386-9e1f-ab5d57397e67",
    "metadata": {},
    "outputs": [],
@@ -1927,7 +1869,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 80,
+   "execution_count": 82,
    "id": "5e5c5a7c-9498-4b17-8941-8fef7839cbf1",
    "metadata": {},
    "outputs": [],
@@ -1937,7 +1879,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 81,
+   "execution_count": 83,
    "id": "0e39a5d1-4c37-4226-9728-0e514dbc9806",
    "metadata": {},
    "outputs": [
@@ -2046,7 +1988,7 @@
        "9      Test       step_3         3.0"
       ]
      },
-     "execution_count": 81,
+     "execution_count": 83,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2054,6 +1996,146 @@
    "source": [
     "error_rates"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 84,
+   "id": "8bf8ae7d-ab0e-4057-bccf-b379f2a32601",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#COMPLETITION RATE - Comparing the percentage of visits who reach into final step in control vs test, highter percentage of completion in the test means better efectiveness for users to finish all the steps.\n",
+    "#TIME SPENT - The test version is better for user to complete the steps faster\n",
+    "#ERRORS RATES - The test version neither reduce or increased the frequency of errors in any step. The two versions have identical performance when talking about number of errors."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "id": "a07db602-47f1-4c8c-8785-76c322d070db",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "from scipy.stats import norm\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 86,
+   "id": "1d6944b6-7979-4f60-b7e5-8886c4dc15ab",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  variation  n_complete  n_total\n",
+      "0   Control       16046    32189\n",
+      "1      Test       21731    37136\n"
+     ]
+    }
+   ],
+   "source": [
+    "summary = (visitcomp.groupby(\"variation\")[\"completed\"].agg(n_complete=\"sum\", n_total=\"count\").reset_index())\n",
+    "\n",
+    "print(summary)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 87,
+   "id": "086e2bee-9e59-4756-a45f-55c2a6c3fa00",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Order control test\n",
+    "summary = summary.set_index('variation').loc[['Control', 'Test']]\n",
+    "x1, x2 = summary['n_complete'].values\n",
+    "n1, n2 = summary['n_total'].values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 88,
+   "id": "ef1baf41-13b1-4f5f-bdb4-69d71ea93160",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#propoortions\n",
+    "p1 = x1 / n1\n",
+    "p2 = x2 / n2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 89,
+   "id": "5d8ace24-1dcc-4d8c-9ccf-da4fa3aff7b5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#h0\n",
+    "p_pool = (x1 + x2) / (n1 + n2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 90,
+   "id": "48060fa7-6f84-4685-98c0-9f818b234cbe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#test of 2 proportions *THANK YOU CHATGPT\n",
+    "se = np.sqrt(p_pool * (1 - p_pool) * (1/n1 + 1/n2))\n",
+    "z = (p2 - p1) / se"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 91,
+   "id": "15c0edac-b6cd-41ec-b36f-ddeab6282675",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Completion of rate Control: 0.498493\n",
+      "Completion of rate Test: 0.585173\n",
+      "z-statistic: 22.856841\n",
+      "p-value    : 0.000000\n",
+      "Statistical diference highly significative (alpha=0.05)\n"
+     ]
+    }
+   ],
+   "source": [
+    "p_value = 2 * (1 - norm.cdf(abs(z)))\n",
+    "print(f'Completion of rate Control: {p1:4f}')\n",
+    "print(f'Completion of rate Test: {p2:4f}')\n",
+    "print(f'z-statistic: {z:4f}')\n",
+    "print(f'p-value    : {p_value:.6f}')\n",
+    "alpha = 0.05\n",
+    "if p_value < alpha:\n",
+    "    print('Statistical diference highly significative (alpha=0.05)')\n",
+    "else:\n",
+    "    print('Statistical diference not significative (alpha=0.05)')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b6336d5f-7c76-4b04-bded-84819c26634d",
+   "metadata": {},
+   "source": [
+    "Next step: MAKE GRAPHIC FOR THIS\n",
+    "and: Carried out an analysis ensuring that the observed increase in completion rate from the A/B test meets or exceeds this 5% threshold.\n",
+    "Carried out another hypothesis test of your choosing.\n",
+    "Evaluated the experiment by answering questions relating to:\n",
+    "\n",
+    "    Design Effectiveness\n",
+    "    Duration\n",
+    "    Additional Data Needs\n",
+    "\n"
+   ]
   }
  ],
  "metadata": {
diff --git a/notebook/Untitled.ipynb b/notebook/Untitled.ipynb
new file mode 100644
index 0000000..b41851f
--- /dev/null
+++ b/notebook/Untitled.ipynb
@@ -0,0 +1,33 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "139f8b74-4220-4cbd-b2b9-666f8b7bfaf5",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "venv"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.14.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}