From 0d1f138423a422f75725e915c37ba51e88329d67 Mon Sep 17 00:00:00 2001 From: luckycarms Date: Tue, 5 Aug 2025 14:04:19 -0400 Subject: [PATCH 1/2] updates to notebook --- src/notebooks/worc_employment_clean.xlsx | Bin 8062 -> 8063 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/src/notebooks/worc_employment_clean.xlsx b/src/notebooks/worc_employment_clean.xlsx index cfd8a085d45f2d510b48302ba4badd936f3be7af..541655af11b2962462e2fdbbdc91a885b68e7011 100644 GIT binary patch delta 4019 zcmY*ccQhMp-;NL?HZe+3krq`uRW(vUW5g4ys%p=oDyUt=C=aSATCbGaTdmZS)(C3V zR$J{-qxK3>lg`ReQ@7)YHc#<4*Gxvm7}gOBCC%x= z`8$rzPbTi?v8WT-Gwt|glG;A3SS}15Bv=k4_-dU-VJznYNAHqy+*GDnyN(a8ck1c z{Z%Fv@Tu!aX`$QFFy{OHvQcdAn%23=%0v4@vQYbrtT)?hvp@C!=|h@Dx(FLBfUrxY zK+EvkkS`D5?U-a)`l~qWr{5L1+Zto9vzil3&~}&3b44@0J;$i4C+Dd<)jG{W44U}u ztNzl_UmB{=bFS=Ft=`!;=EUbpRN_dcwH=7l@}x0uDaZCR3r){sit{_iZhcYFRR<{^ zsOo+6rM2?_!%lUF<9-Lhug2!K>N>!5G%4X3r2qk69#zS0UdPa!}cVtzN_Ji7o zH#B!cghdWc_t@_o@3h-1zJ0WI3)6>5wY%0K_v=Y9=(s<5LvhGMcmaM-7QNWLocr`_ zb%s2ebf6W&`s47!HvY%#l3DTCNy6LJWf6(Yl(X=B}L^0ZQkG0mk)`#Id$hb-& zsw!tJN{@aWo3+BJ#O=+xre8hAX$VLwD_c9pPV4FG%c;ITd7s>g^0(!-dY=r^tDcQk{IPCEq|pw2Vn|rC8jE{4+2iI?3v7{(>It(h%LB zgDgGY{?zQ{s5)v@kRDja_O!(9Zwa)^kFpZ{)PpDWPBT3McC!ZcG|MUqhT%~+m%j~B zKZ|bFT{4pj)P2TWh!F=wjX%_{GTKp;`0&B}HEcza_9=qkml zn047fFxG$X+3M5BSMH|tib92D?`lkU-e`ob$s1oClHpFvVIkKvAl!r^7F`xMdOV*< zD5?fXsKLpf}=S7yrBFmH~xtA%jn9Bavm;~`P%Yk z;%B9DM$Z20Qyt6_#G%b*NU+}NwJo8y;2v+!$Ai&}PCQr41FHbj)( zEdff~8Q1zRO((m=YHcP~U@R|8;D9bT*jvk$>rpM6UBG?xqDqCu%p$G8sZ;g6m@nk= zJ3{oh>?*9OXAy_*UhCi?Jo!aR%qldl1&(IJSON>($BO5k0~J~sIx9FS@2YLf-ifEz zw&|E5U+)O>)o@|jZ0b2&Xhc>^Qfa~BR}8cbs8a}_u4XkK$%>XIn-3v|+K*seFgec1 z6i`mkY_?NiOkQIfJ{>mln%!7>lJM#~?Mqx@Cd5ON(E z^b~UscaP+URDS-0Xs-;prG3VWSz_cezPbP2IH!GQ8hq1P56@>=BL(@$6f7B4E#Zk0 zW6?#f>?QKTiiF#zITrB72K_&c=zah@3BP>Z%nIs)6GYXfVA8-M3OLFG z6T^>JPH$1lc&f31uSqb*-N&h+EmhYaeF0!9%#4t`VMk~fjL*Fou_tMTq<%SmJsT1+ zG2_`^z8w}WjF@Xy3%Y`27Mgj(TA`4us0wDwlLy*yq){Qej8L5k2Z$@$_{^}(GXn`Q zIPcPI;nW0Q9I5ABbu;Y?+;Lf6AVa+s`87ZWA}DJ5`O%>byO>^WH?v@?j)gI z67gfQO~}7VhK)5Z)W6BPBTlQbO{B?R@y+yne1GSS3oVrkEkSLwCK39;d%|8ZdDdsb z+}+k4ZZJDpbD_tv97QaTY)OX79G#^-D@qbsM;;|F{2uQgQuA*)XcoDM|8gmlE!#m>%o*0?&c-4wP_{HwbYaZWyx?M(lMm=>1FDW zQoXieEN)UlQiCjU18!5gXvoz5tiw05g&iFAM);ypUM$h!8d^cbzid`gY30IMwjLr( zW~|k+*_m^@dT3MLR7azwMzekWCxj~B`g`e-RbIKm3bSnzErP_{caj%_IP4#xz6d%E$?48!DD+ zxa_nVGBZw0vf62KahKM$gA$|r);4R&@TW1VdfF;YZ)V@s+|;{UlgG#Nn%6UEC-=yp zUuony0Oj7r4Oi5QO2QcZh$1cA<3V_Q@zp8e?OQS_VcA;MU_cc*nr3c&JFfq$V)mfO zarMIJuP(C_e-3gerNg_`HWETkEP2myrip-N?q`f%MFpffVrG{?e>?YGNzE zmn*a7;V+*fNXt$`9*wEM}(^H~8i z;Adlrziv6&J)o1`hSE0!#jO-7QC&1ni5YEsxH*{a7hrp*%+_g{W)3Bx))%)t*qwgg z3Xoe8l}76x+C*NJ1Q|D~`t?>?D~RTilwy2xubqnOu<@;kv)k5_CLM`mO(6knI->z# zo+Q3?ys;HkMeVJdG8htrw)S})CXvJ4HuVIUZb}jSY->)`Knvpz1Jrk}=+p7jW#z;d zX845iH&9Qr_yr;?>ucm#c};Gr+^k~V4;&WvVW?r9FSRg*-@q;{ZZ4AIR@<7nF6ULu z9(gEWH|b?eJW?`4ZC0>+;HXNFUphwr|u9@PR15X#s_sK{J^Dsk`2m@++q>Z`v7KFHcH-spn!!+pTo+In2P64an zu=zs+4*<83fyt+tsZ*fm=R)z{SW=Q2vjSNGaiAe44i(t$U7kU|>-PGwFF_+FAOHNx zXSJR&)}-LioE}uyOHefXjLfE3^^vWp#U%MWRWQ528eDLG>Aw`wpYMn}RIYC~D7^I& zI2s}SXp$L1EnO~Zd~ns1FX>Qi_S@Btu#L^D{EDu1%C{vsHDtK!Q}=8psqPJX5(on0 zw3E#p3N;;_8$dDm)1et(Kz_c;Lek{0f$|$!=WR6|ekU=`aO-J0!MViVU6$bm<;UyQ z=Qih~e2O?HWxLpU|3m+TOT5hY9~p!fyR0HB{RC+y7m;5|SW8%GIqwpA$q!A|CFCMvK?|JCRu9Pc{im#Moeq8oUf2~Y6*_3t ziK?xa&Q`EyS}B#|kA^V};gE%5m*-0HkuGh7mQkWqvj#^ArK+mJ*W^;H|fg<07jduqh$D6A>Kz@~tfa^b_Cn$_=5sfwt)^mY~fM*xaigDSJ z4jmkPsVsH6FPb+}y0V;Dd%gzNEqBD-NcIV9@5B148gcM+8O|M&OKPIxfzgbEIFl`C zj_Z(l-MT2fMPXN${sCvo=ZR)dgPMgV51EF`CyG=T!^Ck}yps7{c*6d7%AyneVwSjU ziC-8PxZ@(v55ZdMS6BYDLEZg@xgyM z?C!9AO}HahV{vC-+srZTVVU)k7V;W}ZvVVdtik%mmmCAAZo3}HeF<6?h9$gi~8RcO7(#k0I>FQbbBf-^?Nr! zQvo5={}-dlK2#+#Q~-eZ1(Ef?tcw~-m66;lDEPP5)`iw_GFV7l=(pC}DD~(37nCt- U0D$u!l-Xn>p=-1f0>8n3189_LssI20 delta 3998 zcmY*ccRbbK|G!+W)g>XLTwC0TjI3O4kOVH* za`+i;j0@1w>J+~ie)bXXt8n7Vaz}=>ix9jJyd;-J`!A&1Y%@ZdDh1?l;=7@RUn7yfHih|HC-!wvBYt; z@>Z&}c6*=LJvRf62l1zT@2gt5QChF$U!m3n9+(073rm?$k9Tp8-qT56xQiH-Ou7YM zJlI?Gwm?*2Y-V|A#@OQ7)IGRHNxZ<-8Avlr+Q?=(d5Uo=_t94ezW!bX*A)G&wCdQxxrojCz!OoCn}#Z=^OD?T`DKuV;sr@^lHd!PZAy zC>_+>{oy<#C;3SKjm!KkU(ndh719Syi;?1UBvYq??h3h*Xg;0`UjCDGf~@8 z;QJeVSPyB&#uI;oNjTqWh*!Q+^pk>lq&Pxe0duwq@W?3`mx$0lI*O}!R&5r!a&qV1 z#;Jq_;Wyba&75L7ewZ?&c zU!j(QunCbfsR7NVHJ7LPnX#pQ1`>FJ2-n@OXLB}|_w*6*86VGX<-oSytbJVi#?@h2 z)&f!lTI_Kjn_o1CjX0kk3CUU;9`Ty!9~L$jbNylvW zLN~QyM0Aaqi8=AO?@dxAzZ@%>_Y_k16U_aAl8hJMV%nIo+c*pTM4(Ereb0uq8%?duSCl35)`e zoXNx|=-F3Xb(j}={4WpW+O3uIHAp+=^lN)6qC&Db_Xyj6Jg%nkGHDDXY5B`u`;O{N z+tJEIgN5HXBN%C-ggG{J?T%Q?GgIIx;gvni*oyM4gNMJ^`1SZ)>|Z&d+lakrMongZ zrL8FxFg&($@OsMgpM3g0C>|KwtpEaeSFck(bTDZAW+W)Bl4B7T1*_>ZiI^oa=ap~j zxl0sqUxzGP$PH7ga?tD_dqtd{&fMk~^ZB7J-mC;jT^yOcG$PHo>=oR4C}bViryi|g zBM1?G*}2P|6^R_y2L>n9ymC$4^67XI?W#qLeqJLUKtg;8xW z9I4ifK*(*7u->rsy=Cwv3t-$Q@X}wC$HtgyK`pwwwd812b9*1k$S}*$cj*k4xrUIN z)DbRhtN1i zy47~ptuc>QHH;$bl_mJZ1##?bxgvSo5t@lf>eHDKxEpzf`R$Q_4#LEk`G)$L4&qzb zW|~EwSa0y}X@LdT(vR#Cfg?_dMsa@Jr@#ku3hN|`r>&B%}=ZWvvafrLCz86 z1rxu}2^Qvt?atg>W^~@87KgpE^{QL;ka&4si{ZQ$mBHm*>R_00SRb{FbB#9jq<^IcUBk1(BN!a>SNy?)$c%v#ZoXDEl$z`vm+zCL^I(#Vp)Tc z;rh0AU`;LaP65_$D22vNhDn{eBcTRE1iKCQf@zP<@|=TC=zW%U1M|Bk0)W zA#<2D4pEM0pPGKJsc^q}qGXp`?P+CRwaob2$#zR=WlF%hlh$ao&0v={D}1eHj<#X6 z3pgUD{alf3BwaE3fkGY`OF*}wem#@>@*bgn-zuqj;FfP|c2@sOr+#7e=hzlj$YWZX z^epOS5&x0GIAPnIUpT`}KX6kYwN%i14S1fbXZ2)owGd)R}Pp9v4g?;jo?y`36>&Ys3r$1tH3^jHDZW4ki{ z4ld1?rBk=+8-u3Z0^SVUu@!p9AIIaIYfS|SGELLtEVB|rs+jbsFB$6ZRI5p|j*Cix zxU_eaPH5YufxM*j4t6(t>+S^v<_Q(Y8b?BRc=~}NC z58y;}x%!C-x0$hJhA900Sz6d;Ke45ZE@k5{ZzJOv6ir8QD)Ws@ePcUBP`A(CE5dew%#QW;<0S>LJ zI1W=#s{YX;bN?YL8KixoRWE(GWX!j)5Ntp3d%b(RrTqJ=osmyDCR;#GF(e%V0YT}< zY#KtR=R5e(#P27_7pD8}ew2BS4)Ff0Q8tzVW;T;LrR79YGx;jH>xl(RYKsFn@VFeF zf&Yd_TS$sTG*#HjcRZ)Iq$@08DtK&0dsr^y z!&J{q69{ISrQc@AWarw!n z$#_vD!(~6}DT|Y2pp+a%TCN7 zZ*K>O{aUJKEJXO%Fd@m76w&vSBN%vy+HruZJELGLmY0&3?<8E`%z}J)DAm5 z770w}Zt6W9(Ap+%KVwjPLZ#VjiE5$(bq6~@XkeF)-p~;h5-P8Cbq8<2zud(_#SZ(O ztgbXdbem__%+o~{vj+Xd?0w70;xEU@!mfD-0n^RH#@guui={* zTO~X2dC-qqmf~fhy}ns4TlnQx&mVZ`)#};T>4{N1Y;<#oWx=0+r#w;sE1g2jE79*BbeU@dtyexyw-6&ljs@Sd|@$Z1^y9 zNvJe22+24Fs)7$CwqzMH)9)I8ZhyZt_3#^-b<&gXsh5J32Ft4KZb;Mpx7o`egD?_n zNxYxXt65R|ykaVAYBlG@Wns#h@}(>^dG@o4sdKVW(yWD9efF1aJ znerfZ;QrY_i&O4BR(9K9%**(LJ`J{GZ? zGb!MhX?Vk!Cl{-?Xbi<8)3Q!Hx>D=Ub0UMfkIG0AbH{2Oj5vCqB?(-xUM Date: Thu, 7 Aug 2025 09:36:47 -0400 Subject: [PATCH 2/2] updates to cleaning notebook. initially dropped Auto-IDs but restored for team --- src/notebooks/worc_cleaning.ipynb | 2 +- src/notebooks/worc_employment_clean.xlsx | Bin 8063 -> 8063 bytes src/notebooks/worc_employment_plots.ipynb | 73 ++++++++++++++++++---- 3 files changed, 63 insertions(+), 12 deletions(-) diff --git a/src/notebooks/worc_cleaning.ipynb b/src/notebooks/worc_cleaning.ipynb index 267af60..1ed7f9b 100644 --- a/src/notebooks/worc_cleaning.ipynb +++ b/src/notebooks/worc_cleaning.ipynb @@ -418,7 +418,7 @@ "outputs": [], "source": [ "# Dropping multiple columns based including those with no unique values as well as those that seem unnecessary\n", - "cols_to_drop = ['Auto Id','Employment History Name']\n", + "cols_to_drop = ['Employment History Name']\n", "\n", "worc_cols_dropped = worc.drop(columns=cols_to_drop, axis=1)" ] diff --git a/src/notebooks/worc_employment_clean.xlsx b/src/notebooks/worc_employment_clean.xlsx index 541655af11b2962462e2fdbbdc91a885b68e7011..151ff8b50765b3cbf9302dba8115e1f2cc01045e 100644 GIT binary patch delta 423 zcmexw_uq~;z?+#xgn@y9gCW<4eIoAx4j>iHH#zLf#H;f4zW#>|c-sD-x>EhRE`!H4 zQg6ZBww5C*0^3h~+?M4lrRw(fpKb0Ge=o~jbZaAMl&{MV1R7a=4j*vGuDgp zGlLkD4@x`(3#^iiVgw7YOFse&ypi^X2w2Hl!vk}2t*kj%)pl7sF#SW;8BE*BS%B$E UIWsW5NzMvPzn60b)8_IX0NCNFj{pDw delta 423 zcmexw_uq~;z?+#xgn@y9gQ3=oWg_nZ4j>gRUmP4d@v40NLBGQWJZ+Xd$*NgGT$y5&Xesw#mP2*{*e`cV|6n>*cDbt*Mwd?fe@5C41b$ycf6j ztyt0fwIa~sYi~=wYvsJZp$?lR)HtkI?`-h?6m<9Q`)zCVDtDgxVfFZQY+&c3ur=56 z7r%D@ANnFH`TIBdW8Dj;eRzHUl|M=_M9++wqb$J2u=x|C85=V&KsIZ0H1dKO>qYsQ zL5#@fjPNW)*P&AyR03U{vqoOrtRb`z;vaY T8JONAX9cF;%ejJSb9oN{Al$0V diff --git a/src/notebooks/worc_employment_plots.ipynb b/src/notebooks/worc_employment_plots.ipynb index 643170e..d927097 100644 --- a/src/notebooks/worc_employment_plots.ipynb +++ b/src/notebooks/worc_employment_plots.ipynb @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -211,7 +211,7 @@ "24 First ATP Placement - Already in Tech 23.83 Female White SOAR " ] }, - "execution_count": 4, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -240,7 +240,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -253,7 +253,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -339,7 +339,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -364,7 +364,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -398,14 +398,14 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/var/folders/sw/mf1x4fnn1jg2jq5n72k6mkm80000gn/T/ipykernel_25780/1675383775.py:2: FutureWarning: 'M' is deprecated and will be removed in a future version, please use 'ME' instead.\n", + "/var/folders/sw/mf1x4fnn1jg2jq5n72k6mkm80000gn/T/ipykernel_4812/1675383775.py:2: FutureWarning: 'M' is deprecated and will be removed in a future version, please use 'ME' instead.\n", " worc_clean.set_index('Start Date').resample('M').size().plot(kind='line', marker='o', figsize=(10, 4))\n" ] }, @@ -430,7 +430,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -455,7 +455,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -479,7 +479,58 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Gender\n", + "Male 13\n", + "Female 12\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Count of Gender\n", + "worc_clean['Gender'].value_counts()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ATP Placement Type Gender\n", + "First ATP Placement - Already in Tech Female 2\n", + " Male 1\n", + "First ATP Placement - New to Tech Female 9\n", + " Male 9\n", + "First ATP Placement - Promotion Female 1\n", + " Male 3\n", + "dtype: int64\n" + ] + } + ], + "source": [ + " # Count of gender by ATP Placement Type\n", + "grouped = worc_clean.groupby(['ATP Placement Type', 'Gender']).size()\n", + "\n", + "print(grouped)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, "metadata": {}, "outputs": [], "source": [