Skip to content

Commit 90a2a67

Browse files
committed
Forced commit without merge
2 parents 707cc39 + 302eee6 commit 90a2a67

21 files changed

+2425
-64
lines changed

.github/workflows/lint.yml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
name: Lint
2+
on: [push, pull_request]
3+
jobs:
4+
lint:
5+
runs-on: ubuntu-latest
6+
steps:
7+
- uses: actions/checkout@v4
8+
- uses: actions/setup-python@v5
9+
with:
10+
python-version: '3.10'
11+
- name: Install dependencies
12+
run: pip install ruff
13+
- name: Run ruff
14+
run: ruff check .
15+
16+

.github/workflows/tests.yml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
name: Test
2+
3+
on: [push, pull_request]
4+
5+
jobs:
6+
test:
7+
runs-on: ubuntu-latest
8+
steps:
9+
- uses: actions/checkout@v4
10+
- uses: actions/setup-python@v5
11+
with:
12+
python-version: '3.10'
13+
- name: Install dependencies
14+
run: pip install -r requirements.txt
15+
- name: Run tests with pytest
16+
run: pytest
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
name: Validate Notebooks
2+
3+
on:
4+
push:
5+
paths:
6+
- '**.ipynb'
7+
8+
jobs:
9+
validate:
10+
runs-on: ubuntu-latest
11+
steps:
12+
- name: Checkout Code
13+
uses: actions/checkout@v4
14+
with:
15+
# Required to fetch the history for 'tj-actions/changed-files'
16+
fetch-depth: 0
17+
18+
- name: Get changed notebooks
19+
id: changed-notebooks
20+
uses: tj-actions/changed-files@v44
21+
with:
22+
files: |
23+
**.ipynb
24+
25+
- name: Setup Python and Dependencies
26+
if: steps.changed-notebooks.outputs.any_changed == 'true'
27+
uses: actions/setup-python@v5
28+
with:
29+
python-version: '3.10'
30+
31+
- name: Install dependencies
32+
if: steps.changed-notebooks.outputs.any_changed == 'true'
33+
run: pip install -r requirements.txt
34+
35+
- name: Run changed notebooks
36+
if: steps.changed-notebooks.outputs.any_changed == 'true'
37+
run: |
38+
for notebook in ${{ steps.changed-notebooks.outputs.all_changed_files }}; do
39+
echo "--- Validating ${notebook} ---"
40+
jupyter nbconvert --to script --execute "${notebook}"
41+
done

.gitignore

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,3 @@
1-
# Personal
2-
Data/
3-
Tests/
4-
tester_2.ipynb
5-
61
# Byte-compiled / optimized / DLL files
72
__pycache__/
83
*.py[codz]
@@ -210,3 +205,11 @@ cython_debug/
210205
marimo/_static/
211206
marimo/_lsp/
212207
__marimo__/
208+
209+
.DS_Store
210+
211+
data/WORC Employment.xlsx
212+
data/ARC Enrollments.xlsx
213+
data/ARC Application.xlsx
214+
data/All demographics and programs.xlsx
215+
data/WORC_Employment.xlsx

data/ARC_Application.xlsx

276 KB
Binary file not shown.

data/ARC_Enrollments.xlsx

118 KB
Binary file not shown.
903 KB
Binary file not shown.

requirements.txt

2.13 KB
Binary file not shown.

src/Carmen_WORCEmployment.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import pandas as pd
2+
3+
4+
def load_and_clean(file_path="data/WORC_Employment.xlsx"):
5+
"""
6+
Loads and cleans the WORC Employment dataset.
7+
8+
Parameter:
9+
file_path (str): Relative path to the Excel file.
10+
11+
Returns:
12+
pd.DataFrame: Cleaned DataFrame.
13+
"""
14+
# Load data
15+
worc = pd.read_excel(file_path)
16+
17+
# Drop columns we don't need
18+
cols_to_drop = ['Employment History Name']
19+
worc_cols_dropped = worc.drop(columns=cols_to_drop, axis=1)
20+
21+
# Clean up data types
22+
worc_cols_dropped['Start Date'] = pd.to_datetime(worc_cols_dropped['Start Date']) # noqa
23+
worc_cols_dropped['Salary'] = pd.to_numeric(worc_cols_dropped['Salary'],
24+
errors='coerce')
25+
26+
# Adjust salary that is listed as 60,000 to 28.84 for
27+
# consistency with other salaries
28+
# Took 60,000 / 2080hrs - 28.84
29+
worc_cols_dropped['Salary'] = worc_cols_dropped['Salary'].replace(60000, 28.84) # noqa
30+
31+
worc_clean = worc_cols_dropped
32+
33+
return worc_clean

src/Carmen_WORCEmployment_Plots.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import pandas as pd
2+
from Carmen_WORCEmployment import load_and_clean
3+
import matplotlib.pyplot as plt
4+
import seaborn as sns
5+
6+
7+
def plot_salary_by_gender(data):
8+
plt.figure(figsize=(8, 5))
9+
sns.boxplot(data=data,
10+
x='Gender',
11+
y='Salary')
12+
plt.title("Salary Distribution by Gender")
13+
plt.show()
14+
15+
16+
def plot_avg_salary_by_city(data):
17+
region_salary = data.groupby('Mailing City')['Salary'].mean().sort_values()
18+
region_salary.plot(kind='barh',
19+
figsize=(8, 5),
20+
title="Average Salary by KY Region")
21+
plt.xlabel("Average Salary")
22+
plt.show()
23+
24+
25+
def plot_placements_over_time(data):
26+
data.set_index('Start Date').resample('M').size().plot(kind='line',
27+
marker='o',
28+
figsize=(10, 4))
29+
plt.title("Number of Placements Over Time")
30+
plt.ylabel("Placements")
31+
plt.show()
32+
33+
34+
def plot_placement_type_by_program(data):
35+
plt.figure(figsize=(10, 6))
36+
sns.countplot(data=data,
37+
x='ATP Placement Type',
38+
hue='Program: Program Name')
39+
plt.xticks(rotation=45)
40+
plt.title("Placement Type by Program")
41+
plt.show()
42+
43+
44+
def plot_top_cities(data):
45+
city_counts = data['Mailing City'].value_counts().head(10)
46+
city_counts.plot(kind='bar',
47+
title='Top Cities by Participant Count',
48+
figsize=(8, 4))
49+
plt.ylabel("Count")
50+
plt.show()
51+
52+
53+
def main():
54+
worc_clean = load_and_clean()
55+
56+
plot_salary_by_gender(worc_clean)
57+
plot_avg_salary_by_city(worc_clean)
58+
plot_placements_over_time(worc_clean)
59+
plot_placement_type_by_program(worc_clean)
60+
plot_top_cities(worc_clean)
61+
62+
63+
if __name__ == "__main__":
64+
data = pd.read_excel('data/WORC_Employment.xlsx')
65+
worc_clean = load_and_clean()
66+
main()

0 commit comments

Comments
 (0)