Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
750c098
Updated to use object labels in column headers and fall back to objec…
pendingintent Jan 21, 2026
b51a2d3
Updated SQL query to populate values for sending to template edit.html
pendingintent Jan 21, 2026
558dc40
Renamed column header 'UID' to 'id'
pendingintent Jan 21, 2026
91b7c73
Added inline comments to clarify sections
pendingintent Jan 22, 2026
af067ca
Added ignore for new protocol
pendingintent Jan 22, 2026
f2e922f
Concepts cells are now collapsible with button showing count of assoc…
pendingintent Jan 22, 2026
9129560
Added href column to activity_concept table
pendingintent Jan 26, 2026
28a0c92
Reorganized column header rows
pendingintent Jan 26, 2026
95c6b0f
Removed backup file of edit.html
pendingintent Jan 26, 2026
32eba89
Aligned export XLSX SoA matrix with that shown in the UI
pendingintent Jan 26, 2026
c2ef252
XLSX export matrix now includes a worksheet per timeline
pendingintent Jan 26, 2026
821c15f
All timelines now collpase/expand biomedical concepts
pendingintent Feb 3, 2026
2c72b4e
Added reorder instances functionality and return to edit page navigation
pendingintent Feb 5, 2026
198c4e5
Added reorder instances functionality
pendingintent Feb 5, 2026
116ebff
Added return to edit page navigation
pendingintent Feb 5, 2026
39e6946
Added edit-column-collapse.html test file to gitignore
pendingintent Feb 5, 2026
3d7fb7b
Update src/soa_builder/web/templates/instances.html
pendingintent Feb 5, 2026
607ad0c
Update src/soa_builder/web/templates/instances.html
pendingintent Feb 5, 2026
67f245c
Update src/soa_builder/web/templates/concepts_cell.html
pendingintent Feb 5, 2026
de0b06d
Update src/soa_builder/web/migrate_database.py
pendingintent Feb 5, 2026
ac25f17
Update src/soa_builder/web/templates/instances.html
pendingintent Feb 5, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -97,5 +97,8 @@ docs/~*
files/~*
output/*
SOA Workbench Wishlist.docx
NCT01750580_limited.json
CLAUDE.md
edit-column-collapse.html

# End of file
313 changes: 228 additions & 85 deletions src/soa_builder/web/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
_migrate_timing_add_member_of_timeline,
_migrate_instances_add_member_of_timeline,
_migrate_matrix_cells_add_instance_id,
_migrate_activity_concept_add_href,
)
from .routers import activities as activities_router
from .routers import arms as arms_router
Expand Down Expand Up @@ -150,6 +151,7 @@ def _configure_logging():


# Database migration steps
_migrate_activity_concept_add_href()
_migrate_matrix_cells_add_instance_id()
_migrate_instances_add_member_of_timeline()
_migrate_timing_add_member_of_timeline()
Expand Down Expand Up @@ -1961,6 +1963,118 @@ def _matrix_arrays(soa_id: int):
return instance_headers, rows


def _fetch_enriched_instances(soa_id: int):
"""Return enriched instance data with all header information for XLSX export."""
conn = _connect()
cur = conn.cursor()
cur.execute(
"""
SELECT i.id,i.name,i.instance_uid,i.label,i.member_of_timeline,
v.name AS encounter_name,v.label AS encounter_label,
e.name AS epoch_name,e.epoch_label as epoch_label,
tm.window_label,tm.label AS timing_label,tm.name AS timing_name,tm.value AS study_day
FROM instances i
LEFT JOIN visit v ON v.encounter_uid = i.encounter_uid AND v.soa_id = i.soa_id
LEFT JOIN epoch e ON e.epoch_uid = i.epoch_uid AND e.soa_id = i.soa_id
LEFT JOIN timing tm ON tm.id = v.scheduledAtId AND tm.soa_id = v.soa_id
WHERE i.soa_id=?
ORDER BY COALESCE(i.member_of_timeline, 'zzz'), LENGTH(i.instance_uid), i.instance_uid
Copy link

Copilot AI Feb 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The magic string 'zzz' is used as a sentinel value to push unassigned timelines to the end of the sort. This is fragile if timeline UIDs ever start with characters lexicographically after 'z'. Use a more robust approach like a CASE expression or separate the unassigned instances explicitly.

Suggested change
ORDER BY COALESCE(i.member_of_timeline, 'zzz'), LENGTH(i.instance_uid), i.instance_uid
ORDER BY
CASE WHEN i.member_of_timeline IS NULL THEN 1 ELSE 0 END,
i.member_of_timeline,
LENGTH(i.instance_uid),
i.instance_uid

Copilot uses AI. Check for mistakes.
""",
(soa_id,),
)
instances = [
{
"id": r[0],
"name": r[1],
"instance_uid": r[2],
"label": r[3],
"member_of_timeline": r[4],
"encounter_name": r[5],
"encounter_label": r[6],
"epoch_name": r[7],
"epoch_label": r[8],
"window_label": r[9],
"timing_label": r[10],
"timing_name": r[11],
"study_day": r[12],
}
for r in cur.fetchall()
]
conn.close()
return instances


def _add_header_rows_to_worksheet(worksheet, enriched_instances):
"""Add header rows to a worksheet with instance metadata."""
# Insert 6 rows at the top for header rows
worksheet.insert_rows(1, 6)
Comment on lines +2007 to +2010
Copy link

Copilot AI Feb 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The magic number 6 represents the number of header rows (Epoch, Encounter, Instance, Study Day, Timing, Visit Window) but is not documented or named. Define a constant like HEADER_ROW_COUNT = 6 to make the code self-documenting and easier to maintain if header rows change.

Suggested change
def _add_header_rows_to_worksheet(worksheet, enriched_instances):
"""Add header rows to a worksheet with instance metadata."""
# Insert 6 rows at the top for header rows
worksheet.insert_rows(1, 6)
HEADER_ROW_COUNT = 6
def _add_header_rows_to_worksheet(worksheet, enriched_instances):
"""Add header rows to a worksheet with instance metadata."""
# Insert 6 rows at the top for header rows
worksheet.insert_rows(1, HEADER_ROW_COUNT)

Copilot uses AI. Check for mistakes.
Copy link

Copilot AI Feb 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment says '6 rows' but the header construction shows 6 distinct semantic rows (Epoch, Encounter, Instance, Study Day, Timing, Visit Window). Consider adding a comment explaining what each of these 6 rows represents for future maintainability.

Copilot uses AI. Check for mistakes.

# Build header rows
# Row 1: Epoch (with merged cells for consecutive same values)
worksheet.cell(1, 1, "")
worksheet.cell(1, 2, "Epoch:")
col_idx = 3
epoch_groups = [] # Track (value, start_col, end_col) for merging
prev_epoch = None
start_col = 3
Copy link

Copilot AI Feb 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The variable start_col is initialized to 3 twice (lines 2018 and 2021), which is redundant. The initialization on line 2021 should be removed since col_idx is already set to 3 on line 2018.

Suggested change
start_col = 3

Copilot uses AI. Check for mistakes.
for i, inst in enumerate(enriched_instances):
epoch_val = inst.get("epoch_label") or inst.get("epoch_name") or ""
if prev_epoch is None:
prev_epoch = epoch_val
start_col = col_idx
elif prev_epoch != epoch_val:
epoch_groups.append((prev_epoch, start_col, col_idx - 1))
prev_epoch = epoch_val
start_col = col_idx
col_idx += 1
# Add last group
if prev_epoch is not None:
epoch_groups.append((prev_epoch, start_col, col_idx - 1))

# Write and merge epoch cells
for epoch_val, start, end in epoch_groups:
worksheet.cell(1, start, epoch_val)
if start != end:
worksheet.merge_cells(
start_row=1, start_column=start, end_row=1, end_column=end
)

# Row 2: Encounter
worksheet.cell(2, 1, "")
worksheet.cell(2, 2, "Encounter:")
for i, inst in enumerate(enriched_instances):
encounter_val = inst.get("encounter_label") or inst.get("encounter_name") or ""
worksheet.cell(2, i + 3, encounter_val)

# Row 3: Instance (ScheduledActivityInstance)
worksheet.cell(3, 1, "")
worksheet.cell(3, 2, "Instance:")
for i, inst in enumerate(enriched_instances):
instance_val = inst.get("label") or inst.get("name") or ""
worksheet.cell(3, i + 3, instance_val)

# Row 4: Study Day
worksheet.cell(4, 1, "")
worksheet.cell(4, 2, "Study Day:")
for i, inst in enumerate(enriched_instances):
study_day_val = inst.get("study_day") or ""
worksheet.cell(4, i + 3, study_day_val)

# Row 5: Timing
worksheet.cell(5, 1, "")
worksheet.cell(5, 2, "Timing:")
for i, inst in enumerate(enriched_instances):
timing_val = inst.get("timing_label") or inst.get("timing_name") or ""
worksheet.cell(5, i + 3, timing_val)

# Row 6: Visit Window
worksheet.cell(6, 1, "")
worksheet.cell(6, 2, "Visit Window:")
for i, inst in enumerate(enriched_instances):
window_val = inst.get("window_label") or ""
worksheet.cell(6, i + 3, window_val)


# API endpoint for creating new Study/SOA
@app.post("/soa")
def create_soa(payload: SOACreate):
Expand Down Expand Up @@ -2722,13 +2836,108 @@ def export_xlsx(soa_id: int, left: Optional[int] = None, right: Optional[int] =
except Exception as e:
# Provide an error sheet to highlight issue rather than failing entire export
concept_diff_df = pd.DataFrame([[str(e)]], columns=["ConceptDiffError"])
# Fetch enriched instances for header rows
enriched_instances = _fetch_enriched_instances(soa_id)

# Fetch timelines
conn_tl = _connect()
cur_tl = conn_tl.cursor()
cur_tl.execute(
"""
SELECT schedule_timeline_uid,name,main_timeline
FROM schedule_timelines
WHERE soa_id=?
ORDER BY main_timeline DESC, name
""",
(soa_id,),
)
timelines = [
{
"schedule_timeline_uid": r[0],
"name": r[1],
"main_timeline": bool(r[2]),
}
for r in cur_tl.fetchall()
]
conn_tl.close()

# Group enriched instances by timeline
instances_by_timeline = {}
for inst in enriched_instances:
timeline_key = inst.get("member_of_timeline") or "unassigned"
if timeline_key not in instances_by_timeline:
instances_by_timeline[timeline_key] = []
instances_by_timeline[timeline_key].append(inst)

with pd.ExcelWriter(bio, engine="openpyxl") as writer:
study_df.to_excel(writer, index=False, sheet_name="Study")
df.to_excel(writer, index=False, sheet_name="SoA")
mapping_df.to_excel(writer, index=False, sheet_name="ConceptMappings")
audit_df.to_excel(writer, index=False, sheet_name="RollbackAudit")
if concept_diff_df is not None:
concept_diff_df.to_excel(writer, index=False, sheet_name="ConceptDiff")

# Create a worksheet for each timeline
if timelines:
for timeline in timelines:
timeline_uid = timeline["schedule_timeline_uid"]
timeline_name = timeline["name"]
timeline_instances = instances_by_timeline.get(timeline_uid, [])

if not timeline_instances:
continue

# Build matrix data for this timeline
cell_lookup = {
(c["instance_id"], c["activity_id"]): c.get("status", "")
for c in cells
if c.get("instance_id") is not None
and c.get("activity_id") is not None
}

# Build instance headers for this timeline
instance_headers_tl = [inst["name"] for inst in timeline_instances]

# Build rows for this timeline
rows_tl = []
for a in activities:
row = [a["name"]]
for inst in timeline_instances:
row.append(cell_lookup.get((inst["id"], a["id"]), ""))
rows_tl.append(row)

# Create DataFrame for this timeline
df_tl = pd.DataFrame(
rows_tl, columns=["Activity"] + instance_headers_tl
)

# Add concepts columns
if len(concepts_strings) == len(df_tl):
df_tl.insert(1, "Concepts", concepts_strings)
df_tl["Concept UIDs"] = concept_titles_strings

# Sanitize sheet name (max 31 chars, no special chars)
sheet_name = f"SoA - {timeline_name}"[:31]
Copy link

Copilot AI Feb 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The magic number 31 represents Excel's maximum sheet name length but is not documented. Define a constant like EXCEL_MAX_SHEET_NAME_LENGTH = 31 to make this constraint explicit and easier to maintain.

Copilot uses AI. Check for mistakes.
Copy link

Copilot AI Feb 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The sheet name is truncated to 31 characters (Excel's limit) after adding the 'SoA - ' prefix. If timeline_name is long, this could result in multiple sheets with identical truncated names causing overwrites. Consider truncating timeline_name first to ensure uniqueness: sheet_name = f\"SoA - {timeline_name[:24]}\"

Copilot uses AI. Check for mistakes.
sheet_name = (
sheet_name.replace("/", "-")
.replace("\\", "-")
.replace("*", "-")
.replace("?", "-")
.replace(":", "-")
.replace("[", "-")
.replace("]", "-")
)
Comment on lines +2919 to +2928
Copy link

Copilot AI Feb 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sanitizing the sheet name happens in two separate steps: truncation first, then character replacement. If the original timeline_name is exactly 31 characters and contains invalid characters that need replacement, the final sheet_name could still be 31 characters but with different content than intended. Consider sanitizing before truncation to ensure consistent behavior.

Copilot uses AI. Check for mistakes.

# Write to Excel
df_tl.to_excel(writer, index=False, sheet_name=sheet_name)

# Add header rows
worksheet_tl = writer.sheets[sheet_name]
_add_header_rows_to_worksheet(worksheet_tl, timeline_instances)
else:
# No timelines, create single SoA sheet as before
df.to_excel(writer, index=False, sheet_name="SoA")
worksheet = writer.sheets["SoA"]
_add_header_rows_to_worksheet(worksheet, enriched_instances)
bio.seek(0)
# Dynamic filename pattern: studyid_version.xlsx
# Determine study_id and version context
Expand Down Expand Up @@ -3116,9 +3325,6 @@ def delete_activity(soa_id: int, activity_id: int):
return {"deleted_activity_id": activity_id}


# API endpoint for deleting an Epoch <- moved to routers/epochs.py


@app.get("/", response_class=HTMLResponse)
def ui_index(request: Request):
"""Render home page for the SoA Workbench."""
Expand Down Expand Up @@ -3280,38 +3486,6 @@ def ui_update_meta(
)


# Helper to fetch element audit rows with legacy-safe columns -> Deprecated (Moved to audits.py, audits.html)
"""
def _fetch_element_audits(soa_id: int):
conn_ea = _connect()
cur_ea = conn_ea.cursor()
cur_ea.execute("PRAGMA table_info(element_audit)")
cols = [row[1] for row in cur_ea.fetchall()]
want = [
"id",
"element_id",
"action",
"before_json",
"after_json",
"performed_at",
]
available = [c for c in want if c in cols]
element_audits = []
if available:
select_sql = f"SELECT {', '.join(available)} FROM element_audit WHERE soa_id=? ORDER BY id DESC"
cur_ea.execute(select_sql, (soa_id,))
for r in cur_ea.fetchall():
item = {}
for i, c in enumerate(available):
item[c] = r[i]
for k in want:
item.setdefault(k, None)
element_audits.append(item)
conn_ea.close()
return element_audits
"""


# UI endpoint for rendering SOA edit page
@app.get("/ui/soa/{soa_id}/edit", response_class=HTMLResponse)
def ui_edit(request: Request, soa_id: int):
Expand Down Expand Up @@ -3582,51 +3756,16 @@ def ui_edit(request: Request, soa_id: int):
cur_inst = conn_inst.cursor()
cur_inst.execute(
"""
SELECT i.id,
i.name,
i.instance_uid,
i.label,
i.member_of_timeline,
(SELECT t.name
FROM schedule_timelines t
WHERE t.schedule_timeline_uid = i.member_of_timeline
AND t.soa_id = i.soa_id) AS timeline_name,
(SELECT v.name
FROM visit v
WHERE v.encounter_uid = i.encounter_uid
AND v.soa_id = i.soa_id) AS encounter_name,
(SELECT e.name
FROM epoch e
WHERE e.epoch_uid = i.epoch_uid
AND e.soa_id = i.soa_id) AS epoch_name,
(SELECT tm.window_label
FROM visit v
JOIN timing tm
ON tm.id = v.scheduledAtId
AND tm.soa_id = v.soa_id
WHERE v.encounter_uid = i.encounter_uid
AND v.soa_id = i.soa_id
LIMIT 1) AS window_label,
(SELECT tm.label
FROM visit v
JOIN timing tm
ON tm.id = v.scheduledAtId
AND tm.soa_id = v.soa_id
WHERE v.encounter_uid = i.encounter_uid
AND v.soa_id = i.soa_id
LIMIT 1) AS timing_label,
(SELECT tm.value
FROM visit v
JOIN timing tm
ON tm.id = v.scheduledAtId
AND tm.soa_id = v.soa_id
WHERE v.encounter_uid = i.encounter_uid
AND v.soa_id = i.soa_id
LIMIT 1) AS study_day
SELECT i.id,i.name,i.instance_uid,i.label,i.member_of_timeline,st.name AS timeline_name,st.label AS timeline_label,
v.name AS encounter_name,v.label AS encounter_label,e.name AS epoch_name,e.epoch_label as epoch_label,tm.window_label,tm.label AS timing_label,tm.name AS timing_name,tm.value AS study_day
FROM instances i
WHERE soa_id=?
ORDER BY member_of_timeline, length(instance_uid), instance_uid
""",
LEFT JOIN schedule_timelines st ON st.schedule_timeline_uid = i.member_of_timeline AND st.soa_id = i.soa_id
LEFT JOIN visit v ON v.encounter_uid = i.encounter_uid AND v.soa_id = i.soa_id
LEFT JOIN epoch e ON e.epoch_uid = i.epoch_uid AND e.soa_id = i.soa_id
LEFT JOIN timing tm ON tm.id = v.scheduledAtId AND tm.soa_id = v.soa_id
WHERE i.soa_id=?
ORDER BY COALESCE(i.member_of_timeline, 'zzz'), LENGTH(i.instance_uid), i.instance_uid
Copy link

Copilot AI Feb 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Duplicate of the same magic string pattern found in _fetch_enriched_instances (line 1982). Both queries use 'zzz' as a sentinel, which should be addressed consistently across both functions.

Copilot uses AI. Check for mistakes.
""",
(soa_id,),
)
instances = [
Expand All @@ -3637,11 +3776,15 @@ def ui_edit(request: Request, soa_id: int):
"label": r[3],
"member_of_timeline": r[4],
"timeline_name": r[5],
"encounter_name": r[6],
"epoch_name": r[7],
"window_label": r[8],
"timing_label": r[9],
"study_day": iso_duration_to_days(r[10]),
"timeline_label": r[6],
"encounter_name": r[7],
"encounter_label": r[8],
"epoch_name": r[9],
"epoch_label": r[10],
"window_label": r[11],
"timing_label": r[12],
"timing_name": r[13],
"study_day": iso_duration_to_days(r[14]),
}
for r in cur_inst.fetchall()
]
Expand Down
3 changes: 2 additions & 1 deletion src/soa_builder/web/initialize_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ def _init_db():
concept_title TEXT,
concept_uid TEXT, -- immutable BiomedicalConcept_N identifier unique within an SOA
activity_uid TEXT, -- joins to the activity table using this uid unique within an SOA
soa_id INT
soa_id INT,
href TEXT -- stores the API address where the BC exists; codeSystem & codeSystemVersion
)"""
)

Expand Down
Loading