diff --git a/scripts/enrich_biomedical_concept.py b/scripts/enrich_biomedical_concept.py new file mode 100644 index 0000000..13a0b23 --- /dev/null +++ b/scripts/enrich_biomedical_concept.py @@ -0,0 +1,65 @@ +"""One-time script: populate label and description in biomedical_concept from CDISC API. + +Usage: + CDISC_API_KEY= python scripts/enrich_biomedical_concept.py + +Processes only rows where label IS NULL or description IS NULL, so it is safe +to re-run after partial failures. +""" + +import os +import sqlite3 +import time +from pathlib import Path + +import requests + +DB = Path("soa_builder_web.db") +URL_PREFIX = "https://api.library.cdisc.org/api/cosmos/v2/mdr/bc/biomedicalconcepts/" + +api_key = os.environ.get("CDISC_API_KEY") or os.environ.get("CDISC_SUBSCRIPTION_KEY") +subscription_key = os.environ.get("CDISC_SUBSCRIPTION_KEY") or api_key +headers: dict = {"Accept": "application/json"} +if subscription_key: + headers["Ocp-Apim-Subscription-Key"] = subscription_key +if api_key: + headers["Authorization"] = f"Bearer {api_key}" + headers["api-key"] = api_key + +conn = sqlite3.connect(DB) +cur = conn.cursor() + +cur.execute( + "SELECT id, code FROM biomedical_concept WHERE label IS NULL OR description IS NULL" +) +rows = cur.fetchall() +print(f"Rows to enrich: {len(rows)}") + +updated = skipped = errors = 0 +for row_id, code in rows: + if not code: + skipped += 1 + continue + try: + resp = requests.get(URL_PREFIX + code, headers=headers, timeout=15) + if resp.status_code != 200: + print(f" SKIP {code}: HTTP {resp.status_code}") + skipped += 1 + continue + data = resp.json() + label = data.get("shortName") + description = data.get("definition") + cur.execute( + "UPDATE biomedical_concept SET label=?, description=? WHERE id=?", + (label, description, row_id), + ) + updated += 1 + print(f" OK {code}: {label}") + except Exception as exc: + print(f" ERR {code}: {exc}") + errors += 1 + time.sleep(0.1) # avoid hammering the API + +conn.commit() +conn.close() +print(f"\nDone. updated={updated} skipped={skipped} errors={errors}") diff --git a/scripts/populate_biomedical_concept.py b/scripts/populate_biomedical_concept.py new file mode 100644 index 0000000..a95bdab --- /dev/null +++ b/scripts/populate_biomedical_concept.py @@ -0,0 +1,46 @@ +"""One-time script: populate biomedical_concept from activity_concept. + +Usage: + python scripts/populate_biomedical_concept.py + +Deduplicates by (soa_id, concept_uid). Skips rows where concept_uid is NULL. +Aborts without writing if biomedical_concept is already populated. +""" + +import sqlite3 +import sys +from pathlib import Path + +DB = Path("soa_builder_web.db") + +conn = sqlite3.connect(DB) +cur = conn.cursor() + +cur.execute("SELECT COUNT(*) FROM biomedical_concept") +existing = cur.fetchone()[0] +if existing: + print(f"biomedical_concept already has {existing} rows — aborting.") + conn.close() + sys.exit(0) + +cur.execute( + "SELECT soa_id, concept_uid, MIN(concept_title), concept_code " + "FROM activity_concept " + "WHERE concept_uid IS NOT NULL " + "GROUP BY soa_id, concept_uid " + "ORDER BY soa_id, concept_title" +) +rows = cur.fetchall() + +inserted = 0 +for soa_id, concept_uid, concept_title, concept_code in rows: + cur.execute( + "INSERT INTO biomedical_concept (soa_id, biomedical_concept_uid, name, code) " + "VALUES (?, ?, ?, ?)", + (soa_id, concept_uid, concept_title, concept_code), + ) + inserted += 1 + +conn.commit() +conn.close() +print(f"Inserted {inserted} rows into biomedical_concept.") diff --git a/scripts/populate_biomedical_concept_property.py b/scripts/populate_biomedical_concept_property.py new file mode 100644 index 0000000..a645702 --- /dev/null +++ b/scripts/populate_biomedical_concept_property.py @@ -0,0 +1,190 @@ +"""One-time script: populate biomedical_concept_property from DSS variables. + +Usage: + CDISC_SUBSCRIPTION_KEY= python scripts/populate_biomedical_concept_property.py + +For each biomedical_concept row with no biomedical_concept_property rows, fetches +DSS variables from the CDISC Library API and inserts property rows. Idempotent — +re-running skips concepts that already have property rows and never recreates UIDs. +""" + +import os +import sqlite3 +import sys +from pathlib import Path + +import requests + +DB = Path("soa_builder_web.db") + +api_key = os.environ.get("CDISC_API_KEY") or os.environ.get("CDISC_SUBSCRIPTION_KEY") +subscription_key = os.environ.get("CDISC_SUBSCRIPTION_KEY") or api_key +headers = {"Accept": "application/json"} +if subscription_key: + headers["Ocp-Apim-Subscription-Key"] = subscription_key +if api_key: + headers["Authorization"] = f"Bearer {api_key}" + headers["api-key"] = api_key + +# Discover biomedical_concept rows with no property rows +conn = sqlite3.connect(DB) +cur = conn.cursor() +cur.execute( + """ + SELECT bc.soa_id, bc.biomedical_concept_uid, ac.concept_code, ac.activity_id + FROM biomedical_concept bc + LEFT JOIN biomedical_concept_property bcp + ON bcp.biomedical_concept_uid = bc.biomedical_concept_uid + AND bcp.soa_id = bc.soa_id + LEFT JOIN activity_concept ac + ON ac.concept_uid = bc.biomedical_concept_uid + AND ac.soa_id = bc.soa_id + WHERE bcp.id IS NULL + AND ac.concept_code IS NOT NULL + GROUP BY bc.soa_id, bc.biomedical_concept_uid + """ +) +rows = cur.fetchall() +conn.close() + +if not rows: + print("No biomedical_concept rows need property population — done.") + sys.exit(0) + +print(f"Found {len(rows)} concepts to process.") +inserted_total = 0 + +for soa_id, bc_uid, concept_code, activity_id in rows: + # Step 1: discover DSS href + try: + r1 = requests.get( + "https://api.library.cdisc.org/api/cosmos/v2/mdr/specializations" + "/datasetspecializations?biomedicalconcept=" + concept_code, + headers=headers, + timeout=15, + ) + except Exception as e: + print(f" {concept_code}: network error step 1: {e}") + continue + if r1.status_code != 200: + print(f" {concept_code}: step 1 HTTP {r1.status_code} — skipping") + continue + try: + sdtm_links = r1.json()["_links"]["datasetSpecializations"]["sdtm"] + except (KeyError, TypeError): + print(f" {concept_code}: no sdtm links — skipping") + continue + if not sdtm_links: + print(f" {concept_code}: empty sdtm links — skipping") + continue + dss_href = sdtm_links[0]["href"] + if dss_href.startswith("/"): + dss_href = "https://api.library.cdisc.org/api/cosmos/v2" + dss_href + + # Step 2: fetch DSS detail + try: + r2 = requests.get(dss_href, headers=headers, timeout=15) + except Exception as e: + print(f" {concept_code}: network error step 2: {e}") + continue + if r2.status_code != 200: + print(f" {concept_code}: step 2 HTTP {r2.status_code} — skipping") + continue + raw = r2.json() + variables = raw.get("variables") or [] + if not variables: + print(f" {concept_code}: no variables — skipping") + continue + + pkg_href = (raw.get("_links") or {}).get("parentPackage") or {} + pkg_href = pkg_href.get("href", "") if isinstance(pkg_href, dict) else "" + try: + code_system_version = pkg_href.split("/")[5] + except Exception: + code_system_version = "" + + # Step 3: persist — one transaction per concept + conn = sqlite3.connect(DB) + cur = conn.cursor() + inserted = 0 + + for var in variables: + var_concept_id = var.get("dataElementConceptId") + if not var_concept_id: + continue + var_name = var.get("name") + var_required = var.get("mandatoryVariable") + var_datatype = var.get("dataType") + + # skip if this named property already exists for this BC — UIDs are immutable + cur.execute( + "SELECT id FROM biomedical_concept_property" + " WHERE soa_id=? AND biomedical_concept_uid=? AND name=?", + (soa_id, bc_uid, var_name), + ) + if cur.fetchone(): + continue + + # always create a new code row for this property (never reuse) + cur.execute( + "SELECT code_uid FROM code WHERE soa_id=? AND code_uid LIKE 'Code_%'", + (soa_id,), + ) + existing = [x[0] for x in cur.fetchall() if x[0]] + code_n = max((int(x.split("_")[1]) for x in existing), default=0) + 1 + code_uid = f"Code_{code_n}" + cur.execute( + "INSERT INTO code" + " (soa_id, code_uid, code, code_system, code_system_version, decode)" + " VALUES (?,?,?,?,?,?)", + (soa_id, code_uid, var_concept_id, pkg_href, code_system_version, var_name), + ) + + # always create a new alias_code row for this property (never reuse) + cur.execute( + "SELECT alias_code_uid FROM alias_code" + " WHERE soa_id=? AND alias_code_uid LIKE 'AliasCode_%'", + (soa_id,), + ) + existing = [x[0] for x in cur.fetchall() if x[0]] + alias_n = max((int(x.split("_")[1]) for x in existing), default=0) + 1 + alias_uid = f"AliasCode_{alias_n}" + cur.execute( + "INSERT INTO alias_code (soa_id, alias_code_uid, standard_code) VALUES (?,?,?)", + (soa_id, alias_uid, code_uid), + ) + + # generate monotonic BiomedicalConceptProperty_N uid + cur.execute( + "SELECT biomedical_concept_property_uid FROM biomedical_concept_property" + " WHERE soa_id=? AND biomedical_concept_property_uid LIKE 'BiomedicalConceptProperty_%'", + (soa_id,), + ) + existing_uids = [r[0] for r in cur.fetchall() if r[0]] + n = max((int(u.split("_")[1]) for u in existing_uids), default=0) + 1 + bcp_uid = f"BiomedicalConceptProperty_{n}" + + cur.execute( + "INSERT INTO biomedical_concept_property" + " (soa_id, biomedical_concept_uid, biomedical_concept_property_uid," + " name, label, isRequired, datatype, code)" + " VALUES (?,?,?,?,?,?,?,?)", + ( + soa_id, + bc_uid, + bcp_uid, + var_name, + var_name, + var_required, + var_datatype, + alias_uid, + ), + ) + inserted += 1 + + conn.commit() + conn.close() + print(f" {concept_code}: inserted {inserted} property rows") + inserted_total += inserted + +print(f"\nDone. Total property rows inserted: {inserted_total}") diff --git a/src/soa_builder/web/app.py b/src/soa_builder/web/app.py index 8d41ae8..cf1f79f 100644 --- a/src/soa_builder/web/app.py +++ b/src/soa_builder/web/app.py @@ -54,7 +54,6 @@ _migrate_arm_add_type_fields, _migrate_element_audit_columns, _migrate_copy_cell_data, - _migrate_create_code_junction, _migrate_drop_arm_element_link, _migrate_element_id, _migrate_element_table, @@ -68,6 +67,8 @@ _migrate_activity_concept_add_href, _migrate_activity_concept_add_dss, _migrate_study_cell_add_order_index, + _migrate_biomedical_concept_audit, + _migrate_backfill_biomedical_concept_codes, ) from .routers import activities as activities_router from .routers import arms as arms_router @@ -186,7 +187,6 @@ def _configure_logging(): _migrate_add_epoch_seq() _migrate_add_epoch_label_desc() _migrate_add_epoch_uid() -_migrate_create_code_junction() _migrate_add_study_fields() _drop_unused_override_table() _migrate_element_table() @@ -199,6 +199,8 @@ def _configure_logging(): _migrate_element_audit_columns() _backfill_dataset_date("ddf_terminology", "ddf_terminology_audit") _backfill_dataset_date("protocol_terminology", "protocol_terminology_audit") +_migrate_biomedical_concept_audit() +_migrate_backfill_biomedical_concept_codes() # Include routers @@ -757,6 +759,10 @@ def _rollback_freeze(soa_id: int, freeze_id: int) -> dict: "DELETE FROM activity_concept WHERE activity_id IN (SELECT id FROM activity WHERE soa_id=? )", (soa_id,), ) + cur.execute("DELETE FROM biomedical_concept WHERE soa_id=?", (soa_id,)) + cur.execute("DELETE FROM alias_code WHERE soa_id=?", (soa_id,)) + cur.execute("DELETE FROM code WHERE soa_id=?", (soa_id,)) + cur.execute("DELETE FROM code_association WHERE soa_id=?", (soa_id,)) cur.execute("DELETE FROM activity WHERE soa_id=?", (soa_id,)) cur.execute("DELETE FROM visit WHERE soa_id=?", (soa_id,)) cur.execute("DELETE FROM element WHERE soa_id=?", (soa_id,)) @@ -884,6 +890,7 @@ def _rollback_freeze(soa_id: int, freeze_id: int) -> dict: "INSERT INTO activity_concept (activity_id, concept_code, concept_title) VALUES (?,?,?)", (new_aid, code, title), ) + _upsert_biomedical_concept(cur, soa_id, concept_uid, title, code) inserted_concepts += 1 conn.commit() conn.close() @@ -1817,6 +1824,32 @@ async def lifespan(app: FastAPI): logger.info("Lifespan preload SDTM specializations count=%d", len(sdtm_specs)) except Exception as e: logger.error("Lifespan SDTM specializations preload failed: %s", e) + try: + import threading + from concurrent.futures import ThreadPoolExecutor + + _conn = _connect() + _cur = _conn.cursor() + _cur.execute( + "SELECT code, soa_id FROM code" + " WHERE code_system IS NULL AND code IS NOT NULL" + ) + _unenriched = _cur.fetchall() + _conn.close() + if _unenriched: + logger.info( + "Lifespan scheduling enrichment for %d unenriched code rows", + len(_unenriched), + ) + + def _run_enrichment_pool(_rows=_unenriched): + with ThreadPoolExecutor(max_workers=4) as _pool: + for _concept_code, _soa_id in _rows: + _pool.submit(_enrich_code_bg, _concept_code, _soa_id) + + threading.Thread(target=_run_enrichment_pool, daemon=True).start() + except Exception as e: + logger.error("Lifespan code enrichment startup failed: %s", e) yield # No shutdown actions required presently. @@ -2252,6 +2285,27 @@ def set_activity_concepts(soa_id: int, activity_id: int, payload: ConceptsUpdate # Clear existing mappings; include soa_id if column exists ac_has_soa = _table_has_columns(cur, "activity_concept", ("soa_id",)) ac_has_actuid = _table_has_columns(cur, "activity_concept", ("activity_uid",)) + ac_has_conceptuid = _table_has_columns(cur, "activity_concept", ("concept_uid",)) + # Capture existing pairs before delete for cascade cleanup + if ac_has_soa: + if ac_has_conceptuid: + cur.execute( + "SELECT concept_code, concept_uid FROM activity_concept" + " WHERE activity_id=? AND soa_id=?", + (activity_id, soa_id), + ) + else: + cur.execute( + "SELECT concept_code, NULL FROM activity_concept" + " WHERE activity_id=? AND soa_id=?", + (activity_id, soa_id), + ) + else: + cur.execute( + "SELECT concept_code, NULL FROM activity_concept WHERE activity_id=?", + (activity_id,), + ) + old_pairs = cur.fetchall() if ac_has_soa: cur.execute( "DELETE FROM activity_concept WHERE activity_id=? AND soa_id=?", @@ -2265,8 +2319,6 @@ def set_activity_concepts(soa_id: int, activity_id: int, payload: ConceptsUpdate cur.execute("SELECT activity_uid FROM activity WHERE id=?", (activity_id,)) r = cur.fetchone() activity_uid = r[0] if r else None - # Prepare concept_uid generation when column exists - ac_has_conceptuid = _table_has_columns(cur, "activity_concept", ("concept_uid",)) inserted = 0 for code in payload.concept_codes: ccode = code.strip() @@ -2318,7 +2370,9 @@ def set_activity_concepts(soa_id: int, activity_id: int, payload: ConceptsUpdate "INSERT INTO activity_concept (activity_id, concept_code, concept_title) VALUES (?,?,?)", (activity_id, ccode, title), ) + _upsert_biomedical_concept(cur, soa_id, concept_uid, title, ccode) inserted += 1 + _cleanup_orphaned_concept_rows(cur, soa_id, old_pairs) conn.commit() conn.close() return {"activity_id": activity_id, "concepts_set": inserted} @@ -2408,6 +2462,601 @@ def _lookup_and_save_dss(soa_id: int, activity_id: int, concept_code: str) -> No pass # silent failure — DSS column remains unset; user can assign manually +def _populate_bc_properties_bg( + soa_id: int, activity_id: int, concept_code: str +) -> None: + """Background task: populate biomedical_concept_property from DSS variables. + + Prefers activity_concept.dss_href (set by manual or auto DSS assignment) over + fresh step-1 discovery, so the exact selected DSS drives property population. + UIDs are immutable — existing rows are skipped. All writes in one transaction. + """ + import os + import requests as _requests + + api_key = os.environ.get("CDISC_API_KEY") or os.environ.get( + "CDISC_SUBSCRIPTION_KEY" + ) + subscription_key = os.environ.get("CDISC_SUBSCRIPTION_KEY") or api_key + headers: dict = {"Accept": "application/json"} + if subscription_key: + headers["Ocp-Apim-Subscription-Key"] = subscription_key + if api_key: + headers["Authorization"] = f"Bearer {api_key}" + headers["api-key"] = api_key + + try: + # Read existing dss_href and bc_uid from DB before any API call + _conn = _connect() + _cur = _conn.cursor() + _cur.execute( + "SELECT concept_uid, dss_href FROM activity_concept" + " WHERE activity_id=? AND concept_code=? AND soa_id=?", + (activity_id, concept_code, soa_id), + ) + _ac = _cur.fetchone() + _conn.close() + bc_uid = _ac[0] if _ac else None + dss_href = (_ac[1] if _ac else None) or None + if not bc_uid: + return + + if dss_href: + # Use the already-known href — skip step-1 + if dss_href.startswith("/"): + dss_href = "https://api.library.cdisc.org/api/cosmos/v2" + dss_href + else: + # Step 1: discover DSS href for this concept + list_url = ( + "https://api.library.cdisc.org/api/cosmos/v2/mdr/specializations" + "/datasetspecializations?biomedicalconcept=" + concept_code + ) + r1 = _requests.get(list_url, headers=headers, timeout=15) + if r1.status_code != 200: + return + data1 = r1.json() + sdtm_links = data1["_links"]["datasetSpecializations"]["sdtm"] + if not sdtm_links: + return + dss_href = sdtm_links[0]["href"] + if dss_href.startswith("/"): + dss_href = "https://api.library.cdisc.org/api/cosmos/v2" + dss_href + + # Step 2: fetch DSS detail + r2 = _requests.get(dss_href, headers=headers, timeout=15) + if r2.status_code != 200: + return + raw = r2.json() + + variables = raw.get("variables") or [] + if not variables: + return + + pkg_href = (raw.get("_links") or {}).get("parentPackage") or {} + pkg_href = pkg_href.get("href", "") if isinstance(pkg_href, dict) else "" + try: + code_system_version = pkg_href.split("/")[5] + except Exception: + code_system_version = "" + + # Step 3: persist — single write-locked transaction + conn = _connect() + conn.isolation_level = None # manual transaction management + cur = conn.cursor() + try: + cur.execute("BEGIN IMMEDIATE") # acquire write lock before UID reads + + for var in variables: + var_concept_id = var.get("dataElementConceptId") + if not var_concept_id: + continue + var_name = var.get("name") + var_required = var.get("mandatoryVariable") + var_datatype = var.get("dataType") + + # skip if this named property already exists for this BC — UIDs are immutable + cur.execute( + "SELECT id FROM biomedical_concept_property" + " WHERE soa_id=? AND biomedical_concept_uid=? AND name=?", + (soa_id, bc_uid, var_name), + ) + if cur.fetchone(): + continue + + # always create a new code row for this property (never reuse) + cur.execute( + "SELECT code_uid FROM code WHERE soa_id=? AND code_uid LIKE 'Code_%'" + " UNION" + " SELECT code_uid FROM code_association WHERE soa_id=? AND code_uid LIKE 'Code_%'", + (soa_id, soa_id), + ) + existing_codes = [x[0] for x in cur.fetchall() if x[0]] + code_n = ( + max((int(x.split("_")[1]) for x in existing_codes), default=0) + 1 + ) + code_uid = f"Code_{code_n}" + cur.execute( + "INSERT INTO code" + " (soa_id, code_uid, code, code_system, code_system_version, decode)" + " VALUES (?,?,?,?,?,?)", + ( + soa_id, + code_uid, + var_concept_id, + pkg_href, + code_system_version, + var_name, + ), + ) + + # always create a new alias_code row for this property (never reuse) + cur.execute( + "SELECT alias_code_uid FROM alias_code" + " WHERE soa_id=? AND alias_code_uid LIKE 'AliasCode_%'", + (soa_id,), + ) + existing_aliases = [x[0] for x in cur.fetchall() if x[0]] + alias_n = ( + max((int(x.split("_")[1]) for x in existing_aliases), default=0) + 1 + ) + alias_uid = f"AliasCode_{alias_n}" + cur.execute( + "INSERT INTO alias_code (soa_id, alias_code_uid, standard_code)" + " VALUES (?,?,?)", + (soa_id, alias_uid, code_uid), + ) + + # generate monotonic BiomedicalConceptProperty_N uid + cur.execute( + "SELECT biomedical_concept_property_uid FROM biomedical_concept_property" + " WHERE soa_id=? AND biomedical_concept_property_uid" + " LIKE 'BiomedicalConceptProperty_%'", + (soa_id,), + ) + existing_uids = [r[0] for r in cur.fetchall() if r[0]] + n = max((int(u.split("_")[1]) for u in existing_uids), default=0) + 1 + bcp_uid = f"BiomedicalConceptProperty_{n}" + + cur.execute( + "INSERT INTO biomedical_concept_property" + " (soa_id, biomedical_concept_uid, biomedical_concept_property_uid," + " name, label, isRequired, datatype, code)" + " VALUES (?,?,?,?,?,?,?,?)", + ( + soa_id, + bc_uid, + bcp_uid, + var_name, + var_name, + var_required, + var_datatype, + alias_uid, + ), + ) + + cur.execute("COMMIT") + except Exception as _exc: + try: + cur.execute("ROLLBACK") + except Exception: + pass + logger.warning( + "_populate_bc_properties_bg: soa_id=%s concept=%s failed: %s", + soa_id, + concept_code, + _exc, + ) + finally: + conn.close() + except Exception: + pass # steps 1/2 network errors already printed above + + +def _upsert_code(cur, soa_id: int, concept_code: str): + """Get-or-create a code row for this conceptId within this SoA. + + The synchronous insert records (soa_id, code_uid, code) immediately. + A background task (_enrich_code_bg) fills in code_system, code_system_version, + and decode from the CDISC API. + + Always returns the code_uid (pre-existing or newly inserted), or None if no code. + """ + if not concept_code: + return None + cur.execute( + "SELECT code_uid FROM code WHERE soa_id=? AND code=?", (soa_id, concept_code) + ) + row = cur.fetchone() + if row: + return row[0] # pre-existing — return uid, do not re-insert + cur.execute( + "SELECT code_uid FROM code WHERE soa_id=? AND code_uid LIKE 'Code_%'" + " UNION" + " SELECT code_uid FROM code_association WHERE soa_id=? AND code_uid LIKE 'Code_%'", + (soa_id, soa_id), + ) + existing = [x[0] for x in cur.fetchall() if x[0]] + n = 1 + if existing: + try: + n = max(int(x.split("_")[1]) for x in existing) + 1 + except Exception: + n = len(existing) + 1 + uid = f"Code_{n}" + cur.execute( + "INSERT INTO code (soa_id, code_uid, code) VALUES (?,?,?)", + (soa_id, uid, concept_code), + ) + return uid + + +def _upsert_alias_code(cur, soa_id: int, code_uid): + """Get-or-create an alias_code row pointing to the given code_uid. + + Returns None if code_uid is None. + Returns the existing alias_code_uid if already present for (soa_id, standard_code). + Otherwise inserts and returns a new AliasCode_N uid. + """ + if not code_uid: + return None + cur.execute( + "SELECT alias_code_uid FROM alias_code WHERE soa_id=? AND standard_code=?", + (soa_id, code_uid), + ) + row = cur.fetchone() + if row: + return row[0] + cur.execute( + "SELECT alias_code_uid FROM alias_code" + " WHERE soa_id=? AND alias_code_uid LIKE 'AliasCode_%'", + (soa_id,), + ) + existing = [x[0] for x in cur.fetchall() if x[0]] + n = 1 + if existing: + try: + n = max(int(x.split("_")[1]) for x in existing) + 1 + except Exception: + n = len(existing) + 1 + alias_uid = f"AliasCode_{n}" + cur.execute( + "INSERT INTO alias_code (soa_id, alias_code_uid, standard_code) VALUES (?,?,?)", + (soa_id, alias_uid, code_uid), + ) + return alias_uid + + +def _enrich_code_bg(concept_code: str, soa_id: int) -> None: + """Background task: populate code_system, code_system_version, decode from CDISC API.""" + import os + import requests as _requests + + api_key = os.environ.get("CDISC_API_KEY") or os.environ.get( + "CDISC_SUBSCRIPTION_KEY" + ) + subscription_key = os.environ.get("CDISC_SUBSCRIPTION_KEY") or api_key + headers: dict = {"Accept": "application/json"} + if subscription_key: + headers["Ocp-Apim-Subscription-Key"] = subscription_key + if api_key: + headers["Authorization"] = f"Bearer {api_key}" + headers["api-key"] = api_key + try: + url = ( + "https://api.library.cdisc.org/api/cosmos/v2/mdr/bc/biomedicalconcepts/" + + concept_code + ) + resp = _requests.get(url, headers=headers, timeout=15) + if resp.status_code != 200: + return + data = resp.json() + href = (data.get("_links") or {}).get("parentPackage") or {} + href = href.get("href", "") if isinstance(href, dict) else "" + try: + code_system_version = href.split("/")[4] + except Exception: + code_system_version = "" + decode = data.get("shortName") + conn = _connect() + cur = conn.cursor() + cur.execute( + "UPDATE code SET code_system=?, code_system_version=?, decode=?" + " WHERE code=? AND soa_id=?", + (href, code_system_version, decode, concept_code, soa_id), + ) + conn.commit() + conn.close() + except Exception: + pass + + +def _resolve_code_chain(cur, soa_id: int, alias_uid): + """Return (code_uid, code_code, decode) for the given alias_code_uid in this SoA.""" + if not alias_uid: + return None, None, None + cur.execute( + "SELECT standard_code FROM alias_code WHERE alias_code_uid=? AND soa_id=?", + (alias_uid, soa_id), + ) + ac_row = cur.fetchone() + if not ac_row: + return None, None, None + code_uid_val = ac_row[0] + cur.execute( + "SELECT code, decode FROM code WHERE code_uid=? AND soa_id=?", + (code_uid_val, soa_id), + ) + c_row = cur.fetchone() + return code_uid_val, (c_row[0] if c_row else None), (c_row[1] if c_row else None) + + +def _upsert_biomedical_concept( + cur, soa_id: int, concept_uid, name: str, concept_code: str +): + """Upsert a biomedical_concept row within an existing transaction. + + No-op when concept_uid is None (legacy schema) or already present. + Always creates new code + alias_code rows (never reuses existing ones). + Records a create audit entry when a new row is inserted. + """ + if not concept_uid: + return + # no-op if already present + cur.execute( + "SELECT id FROM biomedical_concept WHERE soa_id=? AND biomedical_concept_uid=?", + (soa_id, concept_uid), + ) + if cur.fetchone(): + return + + # always create a new code row for this BC (never reuse) + alias_uid = None + if concept_code: + cur.execute( + "SELECT code_uid FROM code WHERE soa_id=? AND code_uid LIKE 'Code_%'" + " UNION" + " SELECT code_uid FROM code_association WHERE soa_id=? AND code_uid LIKE 'Code_%'", + (soa_id, soa_id), + ) + existing_codes = [x[0] for x in cur.fetchall() if x[0]] + code_n = max((int(x.split("_")[1]) for x in existing_codes), default=0) + 1 + code_uid = f"Code_{code_n}" + cur.execute( + "INSERT INTO code (soa_id, code_uid, code) VALUES (?,?,?)", + (soa_id, code_uid, concept_code), + ) + # always create a new alias_code row for this BC (never reuse) + cur.execute( + "SELECT alias_code_uid FROM alias_code" + " WHERE soa_id=? AND alias_code_uid LIKE 'AliasCode_%'", + (soa_id,), + ) + existing_aliases = [x[0] for x in cur.fetchall() if x[0]] + alias_n = max((int(x.split("_")[1]) for x in existing_aliases), default=0) + 1 + alias_uid = f"AliasCode_{alias_n}" + cur.execute( + "INSERT INTO alias_code (soa_id, alias_code_uid, standard_code) VALUES (?,?,?)", + (soa_id, alias_uid, code_uid), + ) + + cur.execute( + "INSERT INTO biomedical_concept" + " (soa_id, biomedical_concept_uid, name, code) VALUES (?,?,?,?)", + (soa_id, concept_uid, name, alias_uid), + ) + from .audit import _record_biomedical_concept_audit + + bc_id = cur.lastrowid + code_uid_val, code_val, decode_val = _resolve_code_chain(cur, soa_id, alias_uid) + _record_biomedical_concept_audit( + soa_id, + "create", + bc_id, + before=None, + after={ + "biomedical_concept_uid": concept_uid, + "code": code_val, + "alias_code_uid": alias_uid, + "code_uid": code_uid_val, + "decode": decode_val, + }, + cur=cur, + ) + + +def _enrich_biomedical_concept_bg(concept_code: str, soa_id: int) -> None: + """Background task: fetch label/description from CDISC API and persist.""" + import os + import requests as _requests + + api_key = os.environ.get("CDISC_API_KEY") or os.environ.get( + "CDISC_SUBSCRIPTION_KEY" + ) + subscription_key = os.environ.get("CDISC_SUBSCRIPTION_KEY") or api_key + headers: dict = {"Accept": "application/json"} + if subscription_key: + headers["Ocp-Apim-Subscription-Key"] = subscription_key + if api_key: + headers["Authorization"] = f"Bearer {api_key}" + headers["api-key"] = api_key + try: + url = ( + "https://api.library.cdisc.org/api/cosmos/v2/mdr/bc/biomedicalconcepts/" + + concept_code + ) + resp = _requests.get(url, headers=headers, timeout=15) + if resp.status_code != 200: + return + data = resp.json() + label = data.get("shortName") + description = data.get("definition") + conn = _connect() + cur = conn.cursor() + cur.execute( + "SELECT ac.alias_code_uid FROM alias_code ac " + "JOIN code c ON ac.standard_code = c.code_uid " + "WHERE c.soa_id=? AND c.code=?", + (soa_id, concept_code), + ) + row = cur.fetchone() + if not row: + conn.close() + return + cur.execute( + "UPDATE biomedical_concept SET label=?, description=? WHERE code=? AND soa_id=?", + (label, description, row[0], soa_id), + ) + conn.commit() + conn.close() + except Exception: + pass + + +def _cleanup_orphaned_concept_rows(cur, soa_id: int, removed_pairs) -> None: + """Delete biomedical_concept/code/alias_code rows no longer referenced in this SoA. + + removed_pairs: iterable of (concept_code, concept_uid); concept_uid may be None. + Call AFTER the activity_concept rows have been deleted (and any new ones inserted), + so the orphan check reflects the final state of activity_concept. + """ + from .audit import _record_biomedical_concept_audit + + for concept_code, concept_uid in removed_pairs: + if concept_uid: + cur.execute( + "SELECT id, code FROM biomedical_concept" + " WHERE biomedical_concept_uid=? AND soa_id=?", + (concept_uid, soa_id), + ) + bc_row = cur.fetchone() + if bc_row: + bc_id, alias_uid = bc_row + code_uid_val, code_val, decode_val = _resolve_code_chain( + cur, soa_id, alias_uid + ) + _record_biomedical_concept_audit( + soa_id, + "delete", + bc_id, + before={ + "biomedical_concept_uid": concept_uid, + "code": code_val, + "alias_code_uid": alias_uid, + "code_uid": code_uid_val, + "decode": decode_val, + }, + after=None, + cur=cur, + ) + # collect property alias_uids before deleting rows + cur.execute( + "SELECT code FROM biomedical_concept_property" + " WHERE biomedical_concept_uid=? AND soa_id=?", + (concept_uid, soa_id), + ) + prop_alias_uids = [r[0] for r in cur.fetchall() if r[0]] + + # delete property rows + cur.execute( + "DELETE FROM biomedical_concept_property" + " WHERE biomedical_concept_uid=? AND soa_id=?", + (concept_uid, soa_id), + ) + + # cascade-delete orphaned alias_code + code rows created for properties + for prop_alias in prop_alias_uids: + cur.execute( + "SELECT 1 FROM biomedical_concept_property WHERE soa_id=? AND code=? LIMIT 1", + (soa_id, prop_alias), + ) + if cur.fetchone(): + continue + cur.execute( + "SELECT 1 FROM biomedical_concept WHERE soa_id=? AND code=? LIMIT 1", + (soa_id, prop_alias), + ) + if cur.fetchone(): + continue + cur.execute( + "SELECT standard_code FROM alias_code WHERE alias_code_uid=? AND soa_id=?", + (prop_alias, soa_id), + ) + prop_ac_row = cur.fetchone() + cur.execute( + "DELETE FROM alias_code WHERE alias_code_uid=? AND soa_id=?", + (prop_alias, soa_id), + ) + if prop_ac_row: + cur.execute( + "DELETE FROM code WHERE code_uid=? AND soa_id=?", + (prop_ac_row[0], soa_id), + ) + + cur.execute( + "DELETE FROM biomedical_concept" + " WHERE biomedical_concept_uid=? AND soa_id=?", + (concept_uid, soa_id), + ) + if not concept_code: + continue + cur.execute( + "SELECT 1 FROM activity_concept WHERE soa_id=? AND concept_code=? LIMIT 1", + (soa_id, concept_code), + ) + if cur.fetchone(): + continue # still referenced by another activity in this SoA + cur.execute( + "SELECT code_uid FROM code WHERE soa_id=? AND code=?", + (soa_id, concept_code), + ) + code_row = cur.fetchone() + if not code_row: + continue + code_uid_val = code_row[0] + cur.execute( + "SELECT alias_code_uid FROM alias_code WHERE soa_id=? AND standard_code=?", + (soa_id, code_uid_val), + ) + alias_row = cur.fetchone() + if alias_row: + # Audit any remaining biomedical_concept rows referencing this alias (edge case) + cur.execute( + "SELECT id, biomedical_concept_uid FROM biomedical_concept" + " WHERE code=? AND soa_id=?", + (alias_row[0], soa_id), + ) + for edge_bc_id, edge_bc_uid in cur.fetchall(): + edge_code_uid, edge_code_val, edge_decode = _resolve_code_chain( + cur, soa_id, alias_row[0] + ) + _record_biomedical_concept_audit( + soa_id, + "delete", + edge_bc_id, + before={ + "biomedical_concept_uid": edge_bc_uid, + "code": edge_code_val, + "alias_code_uid": alias_row[0], + "code_uid": edge_code_uid, + "decode": edge_decode, + }, + after=None, + cur=cur, + ) + cur.execute( + "DELETE FROM biomedical_concept WHERE code=? AND soa_id=?", + (alias_row[0], soa_id), + ) + cur.execute( + "DELETE FROM alias_code WHERE alias_code_uid=? AND soa_id=?", + (alias_row[0], soa_id), + ) + cur.execute( + "DELETE FROM code WHERE code_uid=? AND soa_id=?", + (code_uid_val, soa_id), + ) + + # API endpoint for adding a BC to an activity @app.post( "/ui/soa/{soa_id}/activity/{activity_id}/concepts/add", response_class=HTMLResponse @@ -2500,8 +3149,12 @@ def ui_add_activity_concept( "INSERT INTO activity_concept (activity_id, concept_code, concept_title) VALUES (?,?,?)", (activity_id, code, title), ) + _upsert_biomedical_concept(cur, soa_id, concept_uid, title, code) conn.commit() background_tasks.add_task(_lookup_and_save_dss, soa_id, activity_id, code) + background_tasks.add_task(_enrich_biomedical_concept_bg, code, soa_id) + background_tasks.add_task(_enrich_code_bg, code, soa_id) + background_tasks.add_task(_populate_bc_properties_bg, soa_id, activity_id, code) conn.close() selected = _get_activity_concepts(activity_id) html = templates.get_template("concepts_cell.html").render( @@ -2537,6 +3190,26 @@ def ui_remove_activity_concept( # Delete mapping; include soa_id if column exists cur.execute("PRAGMA table_info(activity_concept)") ac_cols = {r[1] for r in cur.fetchall()} + # Capture the concept_uid (if column exists) before deleting + if "concept_uid" in ac_cols and "soa_id" in ac_cols: + cur.execute( + "SELECT concept_code, concept_uid FROM activity_concept" + " WHERE activity_id=? AND concept_code=? AND soa_id=?", + (activity_id, code, soa_id), + ) + elif "soa_id" in ac_cols: + cur.execute( + "SELECT concept_code, NULL FROM activity_concept" + " WHERE activity_id=? AND concept_code=? AND soa_id=?", + (activity_id, code, soa_id), + ) + else: + cur.execute( + "SELECT concept_code, NULL FROM activity_concept" + " WHERE activity_id=? AND concept_code=?", + (activity_id, code), + ) + old_pairs = cur.fetchall() if "soa_id" in ac_cols: cur.execute( "DELETE FROM activity_concept WHERE activity_id=? AND concept_code=? AND soa_id=?", @@ -2547,6 +3220,8 @@ def ui_remove_activity_concept( "DELETE FROM activity_concept WHERE activity_id=? AND concept_code=?", (activity_id, code), ) + if "soa_id" in ac_cols: + _cleanup_orphaned_concept_rows(cur, soa_id, old_pairs) conn.commit() conn.close() concepts = fetch_biomedical_concepts() @@ -3745,7 +4420,7 @@ def ui_edit(request: Request, soa_id: int): cur_map = conn_map.cursor() cur_map.execute( "SELECT c.code_uid, pt.cdisc_submission_value " - "FROM code c JOIN protocol_terminology pt ON pt.code = c.code " + "FROM code_association c JOIN protocol_terminology pt ON pt.code = c.code " "WHERE c.soa_id=? AND c.codelist_code='C174222'", (soa_id,), ) @@ -3770,7 +4445,7 @@ def ui_edit(request: Request, soa_id: int): cur_ddf_map = conn_ddf_map.cursor() cur_ddf_map.execute( "SELECT c.code_uid, dt.cdisc_submission_value " - "FROM code c JOIN ddf_terminology dt ON dt.code = c.code " + "FROM code_association c JOIN ddf_terminology dt ON dt.code = c.code " "WHERE c.soa_id=? AND c.codelist_code='C188727'", (soa_id,), ) @@ -3807,7 +4482,7 @@ def ui_edit(request: Request, soa_id: int): conn_em = _connect() cur_em = conn_em.cursor() cur_em.execute( - "SELECT e.id, c.code FROM epoch e LEFT JOIN code c ON c.code_uid = e.type AND c.soa_id = e.soa_id WHERE e.soa_id=?", + "SELECT e.id, c.code FROM epoch e LEFT JOIN code_association c ON c.code_uid = e.type AND c.soa_id = e.soa_id WHERE e.soa_id=?", (soa_id,), ) for eid, code in cur_em.fetchall(): @@ -4807,6 +5482,12 @@ def ui_set_activity_concepts( for (code,) in cur.fetchall(): background_tasks.add_task(_lookup_and_save_dss, soa_id, activity_id, code) conn.close() + for code in payload.concept_codes: + if code.strip(): + background_tasks.add_task( + _enrich_biomedical_concept_bg, code.strip(), soa_id + ) + background_tasks.add_task(_enrich_code_bg, code.strip(), soa_id) # HTMX inline update support if request.headers.get("HX-Request") == "true": concepts = fetch_biomedical_concepts() @@ -6812,7 +7493,7 @@ def ui_add_epoch( except Exception: parent_href = None cur.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", ( soa_id, code_uid, @@ -6958,7 +7639,7 @@ def ui_update_epoch( except Exception: parent_href = None cur_t.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", ( soa_id, code_uid, @@ -7148,7 +7829,7 @@ async def ui_add_arm( # Create Code_N new_type_uid = _get_next_code_uid(cur, soa_id) cur.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", ( soa_id, new_type_uid, @@ -7187,7 +7868,7 @@ async def ui_add_arm( # Create Code_N (continue numbering) new_data_origin_uid = _get_next_code_uid(cur, soa_id) cur.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", ( soa_id, new_data_origin_uid, @@ -7273,14 +7954,14 @@ async def ui_update_arm( prior_data_origin_code_value: Optional[str] = None if current_code_uid: cur.execute( - "SELECT code FROM code WHERE soa_id=? AND code_uid=?", + "SELECT code FROM code_association WHERE soa_id=? AND code_uid=?", (soa_id, current_code_uid), ) rcv = cur.fetchone() prior_arm_type_code_value = rcv[0] if rcv else None if current_data_origin_uid: cur.execute( - "SELECT code FROM code WHERE soa_id=? AND code_uid=?", + "SELECT code FROM code_association WHERE soa_id=? AND code_uid=?", (soa_id, current_data_origin_uid), ) rdv = cur.fetchone() @@ -7322,7 +8003,7 @@ async def ui_update_arm( if current_code_uid: # Update existing junction row for this code_uid cur.execute( - "UPDATE code SET code=?, codelist_code='C174222', codelist_table='protocol_terminology' WHERE soa_id=? AND code_uid=?", + "UPDATE code_association SET code=?, codelist_code='C174222', codelist_table='protocol_terminology' WHERE soa_id=? AND code_uid=?", (resolved_code, soa_id, current_code_uid), ) logger.info( @@ -7336,7 +8017,7 @@ async def ui_update_arm( # Create new Code_N within this SoA new_code_uid = _get_next_code_uid(cur, soa_id) cur.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", ( soa_id, new_code_uid, @@ -7378,7 +8059,7 @@ async def ui_update_arm( # Maintain/Upsert immutable Code_N for DDF mapping if current_data_origin_uid: cur.execute( - "UPDATE code SET code=?, codelist_code='C188727', codelist_table='ddf_terminology' WHERE soa_id=? AND code_uid=?", + "UPDATE code_association SET code=?, codelist_code='C188727', codelist_table='ddf_terminology' WHERE soa_id=? AND code_uid=?", (resolved_ddf_code, soa_id, current_data_origin_uid), ) new_data_origin_uid = current_data_origin_uid @@ -7393,7 +8074,7 @@ async def ui_update_arm( # Create new Code_N, ensuring unique across this SoA new_data_origin_uid = _get_next_code_uid(cur, soa_id) cur.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", ( soa_id, new_data_origin_uid, @@ -7435,19 +8116,19 @@ async def ui_update_arm( new_data_origin_uid, ) conn.commit() - # Capture post-update code values + # Capture post-update code_association values post_arm_type_code_value: Optional[str] = None post_data_origin_code_value: Optional[str] = None if new_code_uid: cur.execute( - "SELECT code FROM code WHERE soa_id=? AND code_uid=?", + "SELECT code FROM code_association WHERE soa_id=? AND code_uid=?", (soa_id, new_code_uid), ) rav = cur.fetchone() post_arm_type_code_value = rav[0] if rav else None if new_data_origin_uid: cur.execute( - "SELECT code FROM code WHERE soa_id=? AND code_uid=?", + "SELECT code FROM code_association WHERE soa_id=? AND code_uid=?", (soa_id, new_data_origin_uid), ) rdv2 = cur.fetchone() diff --git a/src/soa_builder/web/audit.py b/src/soa_builder/web/audit.py index 4dfc260..e0835f6 100644 --- a/src/soa_builder/web/audit.py +++ b/src/soa_builder/web/audit.py @@ -388,3 +388,47 @@ def _record_transition_rule_audit( conn.close() except Exception as e: logger.warning("Failed recording transition rule audit: %s", e) + + +def _record_biomedical_concept_audit( + soa_id: int, + action: str, + biomedical_concept_id: Optional[int], + before: Optional[Dict[str, Any]] = None, + after: Optional[Dict[str, Any]] = None, + cur=None, +): + try: + own_conn = cur is None + if own_conn: + conn = _connect() + cur = conn.cursor() + cur.execute( + """CREATE TABLE IF NOT EXISTS biomedical_concept_audit ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + soa_id INTEGER NOT NULL, + biomedical_concept_id INTEGER, + action TEXT NOT NULL, + before_json TEXT, + after_json TEXT, + performed_at TEXT NOT NULL + )""" + ) + cur.execute( + "INSERT INTO biomedical_concept_audit" + " (soa_id, biomedical_concept_id, action, before_json, after_json, performed_at)" + " VALUES (?,?,?,?,?,?)", + ( + soa_id, + biomedical_concept_id, + action, + json.dumps(before) if before else None, + json.dumps(after) if after else None, + datetime.now(timezone.utc).isoformat(), + ), + ) + if own_conn: + conn.commit() + conn.close() + except Exception as e: + logger.warning("Failed recording biomedical_concept audit: %s", e) diff --git a/src/soa_builder/web/db.py b/src/soa_builder/web/db.py index 628259b..17fc22c 100644 --- a/src/soa_builder/web/db.py +++ b/src/soa_builder/web/db.py @@ -7,21 +7,41 @@ # Load environment variables from .env early load_dotenv() -# Prefer explicit env var; otherwise, auto-select test DB under pytest -_env_db = os.environ.get("SOA_BUILDER_DB") -_running_pytest = "PYTEST_CURRENT_TEST" in os.environ or "pytest" in sys.modules -if _env_db: - DB_PATH = _env_db -else: - DB_PATH = "soa_builder_web_tests.db" if _running_pytest else "soa_builder_web.db" +_PRODUCTION_DB = "soa_builder_web.db" + + +def _resolve_db_path() -> str: + """Resolve the database path at call time (not import time). + + Priority: SOA_BUILDER_DB env var > pytest detection > production default. + Evaluated fresh on every call so import order does not affect test isolation. + """ + env_db = os.environ.get("SOA_BUILDER_DB") + if env_db: + return env_db + is_pytest = "PYTEST_CURRENT_TEST" in os.environ or "pytest" in sys.modules + return "soa_builder_web_tests.db" if is_pytest else _PRODUCTION_DB + + +# Module-level snapshot kept for backward-compat (used only for logging in app.py). +# All actual connections go through _connect() which re-evaluates dynamically. +DB_PATH = _resolve_db_path() def _connect(): - conn = sqlite3.connect(DB_PATH, timeout=5.0, check_same_thread=False) + db_path = _resolve_db_path() + # Hard guard: tests must never connect to the production database + if db_path == _PRODUCTION_DB and ( + "PYTEST_CURRENT_TEST" in os.environ or "pytest" in sys.modules + ): + raise RuntimeError( + f"Tests must not connect to the production database '{_PRODUCTION_DB}'. " + "Set SOA_BUILDER_DB to a test-specific path." + ) + conn = sqlite3.connect(db_path, timeout=5.0, check_same_thread=False) try: - # Improve concurrency and reduce lock errors; favor simpler mode under pytest - _is_pytest = "PYTEST_CURRENT_TEST" in os.environ or "pytest" in sys.modules - if _is_pytest: + is_pytest = "PYTEST_CURRENT_TEST" in os.environ or "pytest" in sys.modules + if is_pytest: conn.execute("PRAGMA journal_mode=DELETE") conn.execute("PRAGMA synchronous=OFF") else: diff --git a/src/soa_builder/web/initialize_database.py b/src/soa_builder/web/initialize_database.py index 49160c8..de5ff08 100644 --- a/src/soa_builder/web/initialize_database.py +++ b/src/soa_builder/web/initialize_database.py @@ -64,20 +64,6 @@ def _init_db(): )""" ) - # code - # create the code table to store unique Code_uid values associated with study objects - cur.execute( - """CREATE TABLE IF NOT EXISTS code ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - soa_id INTEGER NOT NULL, - code_uid TEXT, -- immutable Code_N identifier unique within an SOA - codelist_table TEXT, - codelist_code TEXT NOT NULL, - code TEXT NOT NULL, - UNIQUE(soa_id, code_uid) - )""" - ) - # ddf_terminology: this table is created dynamically when uploading a new DDF Terminology # spreadsheet (app.py:5179-5545) @@ -283,6 +269,89 @@ def _init_db(): )""" ) + # Biomedical Concepts and properties + cur.execute( + """CREATE TABLE IF NOT EXISTS biomedical_concept ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + soa_id INT NOT NULL, + biomedical_concept_uid TEXT NOT NULL, + name TEXT NOT NULL, + label TEXT, + description TEXT, + code TEXT, -- reference to alias_code.alias_code_uid + UNIQUE(biomedical_concept_uid, soa_id) + )""" + ) + + cur.execute( + """CREATE TABLE IF NOT EXISTS biomedical_concept_property ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + soa_id INT NOT NULL, + biomedical_concept_uid TEXT, -- reference to parent biomedical_concept + biomedical_concept_property_uid TEXT NOT NULL, + name TEXT NOT NULL, + label TEXT, + description TEXT, + isRequired INT, -- sqlite does not have bool data type + isEnabled INT, -- sqlite does not have bool data type + datatype TEXT, + code, -- reference to alias_code.alias_code_uid + UNIQUE(biomedical_concept_property_uid, soa_id) + )""" + ) + + # Biomedical Concept audit table + cur.execute( + """CREATE TABLE IF NOT EXISTS biomedical_concept_audit ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + soa_id INTEGER NOT NULL, + biomedical_concept_id INTEGER, + action TEXT NOT NULL, -- create|delete + before_json TEXT, + after_json TEXT, + performed_at TEXT NOT NULL + )""" + ) + + # Alias Code table + cur.execute( + """CREATE TABLE IF NOT EXISTS alias_code ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + soa_id INT NOT NULL, + alias_code_uid TEXT NOT NULL, + standard_code TEXT, -- reference to code_uid value + UNIQUE(alias_code_uid, soa_id) + )""" + ) + + # code_assignment table for storing code assignment values stored in code table + # ISSUE #127: all refactor complete + cur.execute( + """CREATE TABLE IF NOT EXISTS code_association ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + soa_id INTEGER NOT NULL, + code_uid TEXT NOT NULL, -- immutable Code_N identifier unique within an SOA + codelist_table TEXT, + codelist_code TEXT , + code, + UNIQUE(code_uid, soa_id) + )""" + ) + + # Code table + cur.execute( + """CREATE TABLE IF NOT EXISTS code ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + soa_id INT NOT NULL, + code_uid TEXT NOT NULL, + code TEXT, + code_system TEXT, + code_system_version TEXT, + decode TEXT, + UNIQUE(code_uid, soa_id) + )""" + ) + # AUDIT TABLES FOR TRACKING ALL CHANGES TO ENTITIES # Element audit table capturing create/update/delete operations diff --git a/src/soa_builder/web/migrate_database.py b/src/soa_builder/web/migrate_database.py index 61ff5e8..71c678e 100644 --- a/src/soa_builder/web/migrate_database.py +++ b/src/soa_builder/web/migrate_database.py @@ -389,56 +389,6 @@ def _migrate_add_epoch_uid(): logger.warning("epoch_uid migration failed: %s", e) -# Migration: create code_junction table -def _migrate_create_code_junction(): - """Create code_junction linking table if absent. - - Columns: - id INTEGER PRIMARY KEY AUTOINCREMENT - code_uid TEXT -- opaque unique identifier for the code instance - codelist_table TEXT -- source table name that provided the code - codelist_code TEXT -- code value from source codelist - type_code TEXT -- type/category for the code (e.g., TERM, SYNONYM) - data_origin_type_code TEXT -- origin classification (e.g., DDF, PROTOCOL, IMPORT) - soa_id INTEGER -- optional foreign key to study (not enforced) - linked_table TEXT -- target table name being linked - linked_column TEXT -- column name in target table referencing the code - linked_id TEXT -- id/key in target table row (stored as TEXT for flexibility) - - Indexes can be added later once query patterns emerge. Using TEXT for linked_id avoids - premature typing constraints (could be INT or UUID).""" - try: - conn = _connect() - cur = conn.cursor() - # Detect existing table - cur.execute("PRAGMA table_info(code_junction)") - existing_cols = [r[1] for r in cur.fetchall()] - if existing_cols: # table already exists - conn.close() - return - cur.execute( - """ - CREATE TABLE code_junction ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - code_uid TEXT, - codelist_table TEXT, - codelist_code TEXT, - type_code TEXT, - data_origin_type_code TEXT, - soa_id INTEGER, - linked_table TEXT, - linked_column TEXT, - linked_id TEXT - ) - """ - ) - conn.commit() - conn.close() - logger.info("Created code_junction table") - except Exception as e: # pragma: no cover - logger.warning("code_junction migration failed: %s", e) - - # Migrations: add study metadata columns def _migrate_add_study_fields(): """Ensure study metadata columns (study_id, study_label, study_description) exist on soa table. @@ -1035,3 +985,140 @@ def _migrate_study_cell_add_order_index(): conn.close() except Exception as e: logger.warning("order_index migration failed: %s", e) + + +def _migrate_biomedical_concept_audit(): + """Create biomedical_concept_audit table for tracking create/delete operations.""" + try: + conn = _connect() + cur = conn.cursor() + cur.execute( + """CREATE TABLE IF NOT EXISTS biomedical_concept_audit ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + soa_id INTEGER NOT NULL, + biomedical_concept_id INTEGER, + action TEXT NOT NULL, + before_json TEXT, + after_json TEXT, + performed_at TEXT NOT NULL + )""" + ) + conn.commit() + conn.close() + except Exception as e: + logger.warning("_migrate_biomedical_concept_audit: %s", e) + + +def _migrate_backfill_biomedical_concept_codes(): + """One-time backfill: for biomedical_concept rows that have no matching alias_code entry, + create code + alias_code rows and update biomedical_concept.code to the alias_code_uid. + """ + try: + conn = _connect() + cur = conn.cursor() + + cur.execute( + """ + SELECT bc.id, bc.soa_id, bc.biomedical_concept_uid, ac2.concept_code + FROM biomedical_concept bc + LEFT JOIN alias_code ac + ON ac.alias_code_uid = bc.code AND ac.soa_id = bc.soa_id + LEFT JOIN activity_concept ac2 + ON ac2.concept_uid = bc.biomedical_concept_uid + AND ac2.soa_id = bc.soa_id + WHERE ac.id IS NULL + """ + ) + rows = cur.fetchall() + + for bc_id, soa_id, bc_uid, concept_code in rows: + if not concept_code: + continue # no raw code available — nothing to create + + # get-or-create code row + cur.execute( + "SELECT code_uid FROM code WHERE soa_id=? AND code=?", + (soa_id, concept_code), + ) + row = cur.fetchone() + if row: + code_uid = row[0] + else: + cur.execute( + "SELECT code_uid FROM code" + " WHERE soa_id=? AND code_uid LIKE 'Code_%'", + (soa_id,), + ) + existing = [x[0] for x in cur.fetchall() if x[0]] + n = 1 + if existing: + try: + n = max(int(x.split("_")[1]) for x in existing) + 1 + except Exception: + n = len(existing) + 1 + code_uid = f"Code_{n}" + cur.execute( + "INSERT INTO code (soa_id, code_uid, code) VALUES (?,?,?)", + (soa_id, code_uid, concept_code), + ) + + # get-or-create alias_code row + cur.execute( + "SELECT alias_code_uid FROM alias_code" + " WHERE soa_id=? AND standard_code=?", + (soa_id, code_uid), + ) + row = cur.fetchone() + if row: + alias_uid = row[0] + else: + cur.execute( + "SELECT alias_code_uid FROM alias_code" + " WHERE soa_id=? AND alias_code_uid LIKE 'AliasCode_%'", + (soa_id,), + ) + existing = [x[0] for x in cur.fetchall() if x[0]] + n = 1 + if existing: + try: + n = max(int(x.split("_")[1]) for x in existing) + 1 + except Exception: + n = len(existing) + 1 + alias_uid = f"AliasCode_{n}" + cur.execute( + "INSERT INTO alias_code" + " (soa_id, alias_code_uid, standard_code) VALUES (?,?,?)", + (soa_id, alias_uid, code_uid), + ) + + # patch biomedical_concept.code + cur.execute( + "UPDATE biomedical_concept SET code=? WHERE id=?", + (alias_uid, bc_id), + ) + + conn.commit() + conn.close() + except Exception as e: + logger.warning("_migrate_backfill_biomedical_concept_codes: %s", e) + + +def _migrate_biomedical_concept_property_add_uid(): + """Add biomedical_concept_uid column to biomedical_concept_property table.""" + try: + conn = _connect() + cur = conn.cursor() + cur.execute("PRAGMA table_info(biomedical_concept_property)") + cols = {r[1] for r in cur.fetchall()} + if "biomedical_concept_uid" not in cols: + cur.execute( + "ALTER TABLE biomedical_concept_property" + " ADD COLUMN biomedical_concept_uid TEXT" + ) + conn.commit() + logger.info( + "Added biomedical_concept_uid column to biomedical_concept_property" + ) + conn.close() + except Exception as e: + logger.warning("_migrate_biomedical_concept_property_add_uid: %s", e) diff --git a/src/soa_builder/web/routers/activities.py b/src/soa_builder/web/routers/activities.py index 88e2573..48d675c 100644 --- a/src/soa_builder/web/routers/activities.py +++ b/src/soa_builder/web/routers/activities.py @@ -425,7 +425,12 @@ def add_activities_bulk(soa_id: int, payload: BulkActivities): @router.post("/activities/{activity_id}/concepts", response_class=JSONResponse) -def set_activity_concepts(soa_id: int, activity_id: int, concept_codes: List[str]): +def set_activity_concepts( + soa_id: int, + activity_id: int, + concept_codes: List[str], + background_tasks: BackgroundTasks, +): if not soa_exists(soa_id): raise HTTPException(404, "SOA not found") conn = _connect() @@ -437,6 +442,27 @@ def set_activity_concepts(soa_id: int, activity_id: int, concept_codes: List[str # Clear existing mappings; include soa_id if column exists ac_has_soa = _table_has_columns(cur, "activity_concept", ("soa_id",)) ac_has_actuid = _table_has_columns(cur, "activity_concept", ("activity_uid",)) + ac_has_conceptuid = _table_has_columns(cur, "activity_concept", ("concept_uid",)) + # Capture existing pairs before delete for cascade cleanup + if ac_has_soa: + if ac_has_conceptuid: + cur.execute( + "SELECT concept_code, concept_uid FROM activity_concept" + " WHERE activity_id=? AND soa_id=?", + (activity_id, soa_id), + ) + else: + cur.execute( + "SELECT concept_code, NULL FROM activity_concept" + " WHERE activity_id=? AND soa_id=?", + (activity_id, soa_id), + ) + else: + cur.execute( + "SELECT concept_code, NULL FROM activity_concept WHERE activity_id=?", + (activity_id,), + ) + old_pairs = cur.fetchall() if ac_has_soa: cur.execute( "DELETE FROM activity_concept WHERE activity_id=? AND soa_id=?", @@ -450,7 +476,14 @@ def set_activity_concepts(soa_id: int, activity_id: int, concept_codes: List[str cur.execute("SELECT activity_uid FROM activity WHERE id=?", (activity_id,)) row = cur.fetchone() activity_uid = row[0] if row else None - ac_has_conceptuid = _table_has_columns(cur, "activity_concept", ("concept_uid",)) + from ..app import ( + _upsert_biomedical_concept, + _enrich_biomedical_concept_bg, + _enrich_code_bg, + _cleanup_orphaned_concept_rows, + _populate_bc_properties_bg, + ) + inserted = 0 for code in concept_codes: ccode = code.strip() @@ -502,7 +535,14 @@ def set_activity_concepts(soa_id: int, activity_id: int, concept_codes: List[str "INSERT INTO activity_concept (activity_id, concept_code, concept_title) VALUES (?,?,?)", (activity_id, ccode, title), ) + _upsert_biomedical_concept(cur, soa_id, concept_uid, title, ccode) + background_tasks.add_task(_enrich_biomedical_concept_bg, ccode, soa_id) + background_tasks.add_task(_enrich_code_bg, ccode, soa_id) + background_tasks.add_task( + _populate_bc_properties_bg, soa_id, activity_id, ccode + ) inserted += 1 + _cleanup_orphaned_concept_rows(cur, soa_id, old_pairs) conn.commit() conn.close() return {"activity_id": activity_id, "concepts_set": inserted} @@ -658,6 +698,7 @@ def ui_dss_auto_assign( ): """Queue background DSS auto-assignment for all concepts in the SOA.""" from ..app import _lookup_and_save_dss as _auto_dss + from ..app import _populate_bc_properties_bg if not soa_exists(soa_id): raise HTTPException(404, "SOA not found") @@ -679,6 +720,9 @@ def ui_dss_auto_assign( conn.close() for activity_id, concept_code in rows: background_tasks.add_task(_auto_dss, soa_id, activity_id, concept_code) + background_tasks.add_task( + _populate_bc_properties_bg, soa_id, activity_id, concept_code + ) redirect_url = f"/ui/soa/{int(soa_id)}/activities" if request.headers.get("HX-Request") == "true": return HTMLResponse("", headers={"HX-Redirect": redirect_url}) @@ -715,6 +759,21 @@ def ui_delete_activity_page(request: Request, soa_id: int, activity_id: int): conn.close() raise HTTPException(404, "Activity not found") before = {"id": row[0], "name": row[1], "order_index": row[2]} + # Capture concept pairs before deleting for cascade cleanup + ac_has_conceptuid = _table_has_columns(cur, "activity_concept", ("concept_uid",)) + if ac_has_conceptuid: + cur.execute( + "SELECT concept_code, concept_uid FROM activity_concept" + " WHERE activity_id=? AND soa_id=?", + (activity_id, soa_id), + ) + else: + cur.execute( + "SELECT concept_code, NULL FROM activity_concept" + " WHERE activity_id=? AND soa_id=?", + (activity_id, soa_id), + ) + old_pairs = cur.fetchall() cur.execute( "DELETE FROM matrix_cells WHERE soa_id=? AND activity_id=?", (soa_id, activity_id), @@ -723,6 +782,9 @@ def ui_delete_activity_page(request: Request, soa_id: int, activity_id: int): "DELETE FROM activity_concept WHERE activity_id=? AND soa_id=?", (activity_id, soa_id), ) + from ..app import _cleanup_orphaned_concept_rows + + _cleanup_orphaned_concept_rows(cur, soa_id, old_pairs) cur.execute("DELETE FROM activity WHERE id=?", (activity_id,)) conn.commit() conn.close() @@ -801,6 +863,7 @@ def ui_save_dss_assignment( soa_id: int, activity_id: int, concept_code: str, + background_tasks: BackgroundTasks, dss_selection: str = Form(""), ): """Save a DSS assignment for a specific concept on an activity.""" @@ -849,9 +912,74 @@ def ui_save_dss_assignment( "UPDATE activity_concept SET dss_title=?, dss_href=? WHERE activity_id=? AND concept_code=?", (new_title, new_href, activity_id, concept_code), ) + # When DSS is cleared, cascade-delete property/alias_code/code rows for this BC + if not new_href: + if _table_has_columns(cur, "activity_concept", ("soa_id",)): + cur.execute( + "SELECT concept_uid FROM activity_concept" + " WHERE activity_id=? AND concept_code=? AND soa_id=?", + (activity_id, concept_code, soa_id), + ) + else: + cur.execute( + "SELECT concept_uid FROM activity_concept" + " WHERE activity_id=? AND concept_code=?", + (activity_id, concept_code), + ) + uid_row = cur.fetchone() + bc_uid = uid_row[0] if uid_row else None + if bc_uid: + cur.execute( + "SELECT code FROM biomedical_concept_property" + " WHERE biomedical_concept_uid=? AND soa_id=?", + (bc_uid, soa_id), + ) + prop_alias_uids = [r[0] for r in cur.fetchall() if r[0]] + cur.execute( + "DELETE FROM biomedical_concept_property" + " WHERE biomedical_concept_uid=? AND soa_id=?", + (bc_uid, soa_id), + ) + for prop_alias in prop_alias_uids: + cur.execute( + "SELECT 1 FROM biomedical_concept_property" + " WHERE soa_id=? AND code=? LIMIT 1", + (soa_id, prop_alias), + ) + if cur.fetchone(): + continue + cur.execute( + "SELECT 1 FROM biomedical_concept WHERE soa_id=? AND code=? LIMIT 1", + (soa_id, prop_alias), + ) + if cur.fetchone(): + continue + cur.execute( + "SELECT standard_code FROM alias_code" + " WHERE alias_code_uid=? AND soa_id=?", + (prop_alias, soa_id), + ) + prop_ac_row = cur.fetchone() + cur.execute( + "DELETE FROM alias_code WHERE alias_code_uid=? AND soa_id=?", + (prop_alias, soa_id), + ) + if prop_ac_row: + cur.execute( + "DELETE FROM code WHERE code_uid=? AND soa_id=?", + (prop_ac_row[0], soa_id), + ) + conn.commit() conn.close() + if new_href: + from ..app import _populate_bc_properties_bg + + background_tasks.add_task( + _populate_bc_properties_bg, soa_id, activity_id, concept_code + ) + # Audit _record_activity_audit( soa_id, diff --git a/src/soa_builder/web/routers/arms.py b/src/soa_builder/web/routers/arms.py index 93eefd4..f4484b0 100644 --- a/src/soa_builder/web/routers/arms.py +++ b/src/soa_builder/web/routers/arms.py @@ -69,13 +69,13 @@ def ui_list_arms(request: Request, soa_id: int): cur = conn.cursor() # Map arm.type (code_uid) -> conceptId for Arm type (C174222) cur.execute( - "SELECT code_uid,code FROM code WHERE soa_id=? AND codelist_code='C174222'", + "SELECT code_uid,code FROM code_association WHERE soa_id=? AND codelist_code='C174222'", (soa_id,), ) type_rows = cur.fetchall() # Map arm.data_origin_type (code_uid) -> conceptId for Arm Data Origin Type (C188727) cur.execute( - "SELECT code_uid,code FROM code WHERE soa_id=? AND codelist_code='C188727'", + "SELECT code_uid,code FROM code_association WHERE soa_id=? AND codelist_code='C188727'", (soa_id,), ) data_origin_rows = cur.fetchall() @@ -181,7 +181,7 @@ def create_arm(soa_id: int, payload: ArmCreate): logger.info("arm type: %s", arm_type) arm_type_codelist_table = "db://protocol_terminology" cur.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", ( soa_id, arm_type, @@ -198,7 +198,7 @@ def create_arm(soa_id: int, payload: ArmCreate): logger.info("arm dataOriginType: %s", arm_data_origin_type) arm_data_origin_type_codelist_table = "db://ddf_terminology" cur.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", ( soa_id, arm_data_origin_type, @@ -319,7 +319,7 @@ def update_arm(soa_id: int, arm_id: int, payload: ArmUpdate): # Create new Code_{N} and attach to arm.type type_uid = _get_next_code_uid(cur, soa_id) cur.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", ( soa_id, type_uid, @@ -334,14 +334,14 @@ def update_arm(soa_id: int, arm_id: int, payload: ArmUpdate): ) else: cur.execute( - "UPDATE code SET code=? WHERE soa_id=? AND code_uid=?", + "UPDATE code_association SET code=? WHERE soa_id=? AND code_uid=?", (new_type, soa_id, type_uid), ) if cur.rowcount == 0: # Fallback if code row is missing type_uid = _get_next_code_uid(cur, soa_id) cur.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", ( soa_id, type_uid, @@ -363,7 +363,7 @@ def update_arm(soa_id: int, arm_id: int, payload: ArmUpdate): if not data_origin_type_uid: data_origin_type_uid = _get_next_code_uid(cur, soa_id) cur.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", ( soa_id, data_origin_type_uid, @@ -378,13 +378,13 @@ def update_arm(soa_id: int, arm_id: int, payload: ArmUpdate): ) else: cur.execute( - "UPDATE code SET code=? WHERE soa_id=? AND code_uid=?", + "UPDATE code_association SET code=? WHERE soa_id=? AND code_uid=?", (new_data_origin_type, soa_id, data_origin_type_uid), ) if cur.rowcount == 0: data_origin_type_uid = _get_next_code_uid(cur, soa_id) cur.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", ( soa_id, data_origin_type_uid, diff --git a/src/soa_builder/web/routers/audits.py b/src/soa_builder/web/routers/audits.py index f939d75..ef87c48 100644 --- a/src/soa_builder/web/routers/audits.py +++ b/src/soa_builder/web/routers/audits.py @@ -194,6 +194,25 @@ def ui_list_audits(request: Request, soa_id: int): ] instance_cur.close() + # Get Biomedical Concept Audits + bc_cur = conn.cursor() + bc_cur.execute( + """SELECT id,biomedical_concept_id,action,before_json,after_json,performed_at FROM biomedical_concept_audit WHERE soa_id=? ORDER BY id DESC LIMIT 20""", + (soa_id,), + ) + bc_audits = [ + { + "id": r[0], + "biomedical_concept_id": r[1], + "action": r[2], + "before_json": r[3], + "after_json": r[4], + "performed_at": r[5], + } + for r in bc_cur.fetchall() + ] + bc_cur.close() + return templates.TemplateResponse( request, "audits.html", @@ -207,6 +226,7 @@ def ui_list_audits(request: Request, soa_id: int): "visit_audits": visit_audits, "timing_audits": timing_audits, "instance_audits": instance_audits, + "bc_audits": bc_audits, "soa_id": soa_id, }, ) diff --git a/src/soa_builder/web/routers/epochs.py b/src/soa_builder/web/routers/epochs.py index 7d24c82..2590fc0 100644 --- a/src/soa_builder/web/routers/epochs.py +++ b/src/soa_builder/web/routers/epochs.py @@ -102,11 +102,11 @@ def ui_list_epochs(request: Request, soa_id: int): epochs = list_epochs(soa_id) - # resolve epoch.type (code_uid) -> conceptId from code table + # resolve epoch.type (code_uid) -> conceptId from code_association table conn = _connect() cur = conn.cursor() cur.execute( - "SELECT code_uid, code FROM code WHERE soa_id=? AND codelist_code='C99079'", + "SELECT code_uid, code FROM code_association WHERE soa_id=? AND codelist_code='C99079'", (soa_id,), ) type_rows = cur.fetchall() @@ -215,7 +215,7 @@ def add_epoch(soa_id: int, payload: EpochCreate): else "/mdr/ct/packages" ) cur.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", ( soa_id, epoch_type, @@ -346,7 +346,7 @@ def update_epoch(soa_id: int, epoch_id: int, payload: EpochUpdate): if not type_uid: type_uid = _get_next_code_uid(cur, soa_id) cur.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", ( soa_id, type_uid, @@ -361,13 +361,13 @@ def update_epoch(soa_id: int, epoch_id: int, payload: EpochUpdate): ) else: cur.execute( - "UPDATE code SET code=? WHERE soa_id=? AND code_uid=?", + "UPDATE code_association SET code=? WHERE soa_id=? AND code_uid=?", (new_type, soa_id, type_uid), ) if cur.rowcount == 0: type_uid = _get_next_code_uid(cur, soa_id) cur.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", ( soa_id, type_uid, diff --git a/src/soa_builder/web/routers/schedule_timelines.py b/src/soa_builder/web/routers/schedule_timelines.py index 21b1911..d1f78c0 100644 --- a/src/soa_builder/web/routers/schedule_timelines.py +++ b/src/soa_builder/web/routers/schedule_timelines.py @@ -165,7 +165,7 @@ def ui_study_timing(request: Request, soa_id: int): code_to_sv_rtf = {v: k for k, v in (sv_to_code_rtf or {}).items()} conn = _connect() cur = conn.cursor() - cur.execute("SELECT code_uid, code FROM code WHERE soa_id=?", (soa_id,)) + cur.execute("SELECT code_uid, code FROM code_association WHERE soa_id=?", (soa_id,)) code_uid_to_code = {r[0]: r[1] for r in cur.fetchall() if r[0]} cur.execute( "SELECT study_id, study_label, study_description, name, created_at FROM soa WHERE id=?", diff --git a/src/soa_builder/web/routers/timings.py b/src/soa_builder/web/routers/timings.py index 0aa6040..f393363 100644 --- a/src/soa_builder/web/routers/timings.py +++ b/src/soa_builder/web/routers/timings.py @@ -94,7 +94,7 @@ def ui_list_timings(request: Request, soa_id: int): # Map timing.type (code_uid) -> code via code table, then to submissionValue conn = _connect() cur = conn.cursor() - cur.execute("SELECT code_uid, code FROM code WHERE soa_id=?", (soa_id,)) + cur.execute("SELECT code_uid, code FROM code_association WHERE soa_id=?", (soa_id,)) code_uid_to_code = {r[0]: r[1] for r in cur.fetchall() if r[0]} conn.close() for t in timings: @@ -275,11 +275,11 @@ def ui_create_timing( cur_c = conn_c.cursor() code_uid = _get_next_code_uid(cur_c, soa_id) cur_c.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", ( soa_id, code_uid, - "ddf_terminology", + "http://www.cdisc.org", "C201264", str(code_val), ), @@ -300,11 +300,11 @@ def ui_create_timing( cur_c2 = conn_c2.cursor() rtf_code_uid = _get_next_code_uid(cur_c2, soa_id) cur_c2.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", ( soa_id, rtf_code_uid, - "ddf_terminology", + "http://www.cdisc.org", "C201265", str(rtf_code_val), ), @@ -583,7 +583,7 @@ def ui_update_timing( existing_rtf_uid = row_chk[1] if row_chk else None # Map existing code_uids -> code values cur_chk.execute( - "SELECT code_uid, code FROM code WHERE soa_id=?", + "SELECT code_uid, code FROM code_association WHERE soa_id=?", (soa_id,), ) code_rows = cur_chk.fetchall() or [] @@ -609,11 +609,11 @@ def ui_update_timing( # Always create a fresh code_uid; do not reuse across timings mapped_type = _get_next_code_uid(cur_chk, soa_id) cur_chk.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", ( soa_id, mapped_type, - "ddf_terminology", + "http://www.cdisc.org", "C201264", str(new_code_val), ), @@ -636,11 +636,11 @@ def ui_update_timing( else: mapped_rtf = _get_next_code_uid(cur_chk, soa_id) cur_chk.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", ( soa_id, mapped_rtf, - "ddf_terminology", + "http://www.cdisc.org", "C201265", str(new_rtf_code_val), ), diff --git a/src/soa_builder/web/routers/visits.py b/src/soa_builder/web/routers/visits.py index 1ba1ba8..f40dcd2 100644 --- a/src/soa_builder/web/routers/visits.py +++ b/src/soa_builder/web/routers/visits.py @@ -36,7 +36,7 @@ def _load_code_value_map(soa_id: int) -> dict[str, str]: conn = _connect() cur = conn.cursor() cur.execute( - "SELECT code_uid, code FROM code WHERE soa_id=?", + "SELECT code_uid, code FROM code_association WHERE soa_id=?", (soa_id,), ) rows = cur.fetchall() @@ -232,11 +232,11 @@ def add_visit(soa_id: int, payload: VisitCreate): if type: cur.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", ( soa_id, type, - "ddf_terminology", + "http://www.cdisc.org", "C188728", "C25716", ), @@ -255,7 +255,7 @@ def add_visit(soa_id: int, payload: VisitCreate): else "/mdr/ct/packages" ) cur.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", ( soa_id, environmentalSettings, @@ -278,7 +278,7 @@ def add_visit(soa_id: int, payload: VisitCreate): else "/mdr/ct/packages" ) cur.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", ( soa_id, contactModes, @@ -463,7 +463,7 @@ def update_visit(soa_id: int, visit_id: int, payload: VisitUpdate): if not env_code_uid: env_code_uid = _get_next_code_uid(cur, soa_id) cur.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", ( soa_id, env_code_uid, @@ -478,13 +478,13 @@ def update_visit(soa_id: int, visit_id: int, payload: VisitUpdate): ) else: cur.execute( - "UPDATE code SET code=? WHERE soa_id=? AND code_uid=?", + "UPDATE code_association SET code=? WHERE soa_id=? AND code_uid=?", (new_environmental_value, soa_id, env_code_uid), ) if cur.rowcount == 0: env_code_uid = _get_next_code_uid(cur, soa_id) cur.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", ( soa_id, env_code_uid, @@ -504,7 +504,7 @@ def update_visit(soa_id: int, visit_id: int, payload: VisitUpdate): if not contact_mode_code_uid: contact_mode_code_uid = _get_next_code_uid(cur, soa_id) cur.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", ( soa_id, contact_mode_code_uid, @@ -519,13 +519,13 @@ def update_visit(soa_id: int, visit_id: int, payload: VisitUpdate): ) else: cur.execute( - "UPDATE code SET code=? WHERE soa_id=? AND code_uid=?", + "UPDATE code_association SET code=? WHERE soa_id=? AND code_uid=?", (new_contact_mode, soa_id, contact_mode_code_uid), ) if cur.rowcount == 0: contact_mode_code_uid = _get_next_code_uid(cur, soa_id) cur.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", ( soa_id, contact_mode_code_uid, diff --git a/src/soa_builder/web/templates/audits.html b/src/soa_builder/web/templates/audits.html index c2776a4..fad03e7 100644 --- a/src/soa_builder/web/templates/audits.html +++ b/src/soa_builder/web/templates/audits.html @@ -5,6 +5,33 @@

Entity Audits for {{ soa_id }}

+
+ Biomedical Concept Audit (latest {{ bc_audits|length }}) + {% if bc_audits %} + + + + + + + + + + {% for bca in bc_audits %} + + + + + + + + + {% endfor %} +
IDVisitActionPerformedBeforeAfter
{{ bca.id }}{{ bca.visit_id }}{{ bca.action }}{{ bca.performed_at }}{{ bca.before_json or '' }}{{ bca.after_json or '' }}
+ {% else %} +
No biomedical concept audit entries yet.
+ {% endif %} +
Visit Audit (latest {{ visit_audits|length }}) {% if visit_audits %} diff --git a/src/soa_builder/web/utils.py b/src/soa_builder/web/utils.py index ca6caf4..b715f69 100644 --- a/src/soa_builder/web/utils.py +++ b/src/soa_builder/web/utils.py @@ -365,14 +365,36 @@ def get_epoch_parent_package_href_cached() -> str | None: return str(val) if val else None +# Helper function to generate new alias_code_uid value +def get_next_alias_code_uid(cur: Any, soa_id: int) -> str: + """Compute next unique AliasCode_ for the given SOA. + Assumes `cur` is a sqlite cursor within an open transaction. + """ + cur.execute( + "SELECT alias_code_uid from alias_code WHERE soa_id=? AND alias_code_uid LIKE 'AliasCode_%'", + (soa_id,), + ) + existing = [x[0] for x in cur.fetchall() if x[0]] + n = 1 + if existing: + try: + n = max(int(x.split("_")[1]) for x in existing) + 1 + except Exception: + n = len(existing) + 1 + return f"AliasCode_{n}" + + +# Helper function to generate new code_uid value def get_next_code_uid(cur: Any, soa_id: int) -> str: """Compute next unique Code_N for the given SOA. Assumes `cur` is a sqlite cursor within an open transaction. """ cur.execute( - "SELECT code_uid FROM code WHERE soa_id=? AND code_uid LIKE 'Code_%'", - (soa_id,), + "SELECT code_uid FROM code_association WHERE soa_id=? AND code_uid LIKE 'Code_%'" + " UNION" + " SELECT code_uid FROM code WHERE soa_id=? AND code_uid LIKE 'Code_%'", + (soa_id, soa_id), ) existing = [x[0] for x in cur.fetchall() if x[0]] n = 1 @@ -695,7 +717,7 @@ def get_encounter_type_sv(soa_id: int, code_uid: str): cur.execute( """ SELECT ddf.cdisc_submission_value FROM visit v - INNER JOIN code c ON v.type=c.code_uid AND v.soa_id=c.soa_id + INNER JOIN code_association c ON v.type=c.code_uid AND v.soa_id=c.soa_id INNER JOIN ddf_terminology ddf ON c.codelist_code=ddf.codelist_code AND c.code=ddf.code WHERE v.soa_id =? AND v.type=? """, @@ -779,7 +801,7 @@ def get_encounter_environment_sv(soa_id: int, code_uid: str): conn = _connect() cur = conn.cursor() cur.execute( - "SELECT code FROM code WHERE soa_id=? AND code_uid=?", + "SELECT code FROM code_association WHERE soa_id=? AND code_uid=?", (soa_id, code_uid), ) row = cur.fetchone() @@ -886,7 +908,7 @@ def get_submission_value_for_code(soa_id: int, codelist_code: str, code_uid: str conn = _connect() cur = conn.cursor() cur.execute( - "SELECT code FROM code WHERE soa_id=? AND code_uid=?", + "SELECT code FROM code_association WHERE soa_id=? AND code_uid=?", (soa_id, code_uid), ) row = cur.fetchone() diff --git a/src/usdm/generate_arms.py b/src/usdm/generate_arms.py index fdb83c5..6bcdf48 100644 --- a/src/usdm/generate_arms.py +++ b/src/usdm/generate_arms.py @@ -25,7 +25,7 @@ def _get_type_code_tuple(soa_id: int, code_uid: str) -> Tuple[str, str, str, str cur = conn.cursor() cur.execute( "SELECT DISTINCT c.codelist_table, p.code,p.cdisc_submission_value,p.dataset_date " - "FROM code c INNER JOIN protocol_terminology p ON c.codelist_code = p.codelist_code " + "FROM code_association c INNER JOIN protocol_terminology p ON c.codelist_code = p.codelist_code " "AND c.code = p.code WHERE c.soa_id=? AND c.code_uid=?", ( soa_id, @@ -49,7 +49,7 @@ def _get_data_origin_type_tuple( cur = conn.cursor() cur.execute( "SELECT DISTINCT c.codelist_table,d.code,d.cdisc_submission_value,d.dataset_date " - "FROM code c INNER JOIN ddf_terminology d ON c.codelist_code = d.codelist_code " + "FROM code_association c INNER JOIN ddf_terminology d ON c.codelist_code = d.codelist_code " "AND c.code = d.code WHERE c.soa_id=? AND c.code_uid=?", ( soa_id, diff --git a/src/usdm/generate_encounters.py b/src/usdm/generate_encounters.py index eb5a2c2..47d4c95 100644 --- a/src/usdm/generate_encounters.py +++ b/src/usdm/generate_encounters.py @@ -98,7 +98,7 @@ def _get_type_code_tuple(soa_id: int, code_uid: str) -> Tuple[str, str, str, str cur = conn.cursor() cur.execute( "SELECT DISTINCT c.codelist_table, p.code,p.cdisc_submission_value,p.dataset_date " - "FROM code c INNER JOIN ddf_terminology p ON c.codelist_code = p.codelist_code " + "FROM code_association c INNER JOIN ddf_terminology p ON c.codelist_code = p.codelist_code " "AND c.code = p.code WHERE c.soa_id=? AND c.code_uid=?", ( soa_id, @@ -120,7 +120,7 @@ def _get_code_tuple(soa_id: int, code_uid: str) -> Tuple[str, str]: cur = conn.cursor() cur.execute( "SELECT DISTINCT c.codelist_table,c.code " - "FROM code c WHERE c.soa_id=? AND c.code_uid=?", + "FROM code_association c WHERE c.soa_id=? AND c.code_uid=?", ( soa_id, code_uid, diff --git a/src/usdm/generate_study_epochs.py b/src/usdm/generate_study_epochs.py index ee61700..5b3a9cb 100644 --- a/src/usdm/generate_study_epochs.py +++ b/src/usdm/generate_study_epochs.py @@ -93,14 +93,14 @@ def build_usdm_epochs(soa_id: int) -> List[Dict[str, Any]]: if "order_index" in cols: cur.execute( "SELECT e.id, e.epoch_uid, e.name, e.epoch_label, e.epoch_description, e.type, c.code " - "FROM epoch e INNER JOIN code c ON e.soa_id = c.soa_id AND e.type = c.code_uid " + "FROM epoch e INNER JOIN code_association c ON e.soa_id = c.soa_id AND e.type = c.code_uid " "WHERE e.soa_id=? ORDER BY e.order_index, e.id", (soa_id,), ) else: cur.execute( "SELECT e.id, e.epoch_uid, e.name, e.epoch_label, e.epoch_description, e.type, c.code " - "FROM epoch e INNER JOIN code c ON e.soa_id = c.soa_id AND e.type = c.code_uid " + "FROM epoch e INNER JOIN code_association c ON e.soa_id = c.soa_id AND e.type = c.code_uid " "WHERE e.soa_id=? ORDER BY e.id", (soa_id,), ) diff --git a/src/usdm/generate_study_timings.py b/src/usdm/generate_study_timings.py index ff4d8e0..1f10e64 100644 --- a/src/usdm/generate_study_timings.py +++ b/src/usdm/generate_study_timings.py @@ -25,7 +25,7 @@ def _get_timing_code_values(soa_id: int, code_uid: str) -> Tuple[str, str, str, cur = conn.cursor() cur.execute( "SELECT DISTINCT c.codelist_table,d.code,d.cdisc_submission_value,d.dataset_date " - "FROM code c INNER JOIN ddf_terminology d ON c.codelist_code = d.codelist_code " + "FROM code_association c INNER JOIN ddf_terminology d ON c.codelist_code = d.codelist_code " "AND c.code = d.code WHERE c.soa_id=? AND c.code_uid=?", ( soa_id, @@ -86,9 +86,9 @@ def build_usdm_timings( if member_of_timeline and member_of_timeline.strip(): cur.execute( """ - SELECT id,timing_uid,name,label,description,type,value,value_label,relative_to_from, + SELECT timing_uid,name,label,description,type,value,value_label,relative_to_from, relative_from_schedule_instance,relative_to_schedule_instance,window_label,window_upper, - window_lower,order_index FROM timing WHERE soa_id=? AND member_of_timeline=? order by length(timing_uid), + window_lower FROM timing WHERE soa_id=? AND member_of_timeline=? order by length(timing_uid), timing_uid """, (soa_id, member_of_timeline.strip()), @@ -96,9 +96,9 @@ def build_usdm_timings( else: cur.execute( """ - SELECT id,timing_uid,name,label,description,type,value,value_label,relative_to_from, + SELECT timing_uid,name,label,description,type,value,value_label,relative_to_from, relative_from_schedule_instance,relative_to_schedule_instance,window_label,window_upper, - window_lower,order_index FROM timing WHERE soa_id=? order by length(timing_uid), timing_uid + window_lower FROM timing WHERE soa_id=? order by length(timing_uid), timing_uid """, (soa_id,), ) @@ -108,7 +108,6 @@ def build_usdm_timings( for i, r in enumerate(rows): ( - row_id, timing_uid, name, label, @@ -122,7 +121,6 @@ def build_usdm_timings( window_label, window_upper, window_lower, - order_index, ) = ( r[0], r[1], @@ -137,8 +135,6 @@ def build_usdm_timings( r[10], r[11], r[12], - r[13], - r[14], ) t_code, t_decode, t_codeSystem, t_codeSystemVersion = _get_timing_code_values( soa_id, type @@ -157,7 +153,7 @@ def build_usdm_timings( "id": type, "extensionAttributes": [], "code": t_code[0], - "codeSystem": "db://" + t_codeSystem[0], + "codeSystem": "http://www.cdisc.org", "codeSystemVersion": t_codeSystemVersion[0], "decode": t_decode[0], "instanceType": "Code", diff --git a/tests/test_code_uid_generation.py b/tests/test_code_uid_generation.py index bfe7437..b4d5b33 100644 --- a/tests/test_code_uid_generation.py +++ b/tests/test_code_uid_generation.py @@ -35,16 +35,16 @@ def test_get_next_code_uid_mixed_existing(): conn.commit() # Insert mixed existing code_uids cur.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", (soa_id, "Code_1", "protocol_terminology", "C174222", "X"), ) cur.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", - (soa_id, "Code_3", "ddf_terminology", "C188727", "Y"), + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + (soa_id, "Code_3", "http://www.cdisc.org", "C188727", "Y"), ) # Malformed tail should be ignored in max() and trigger fallback only if parsing fails for all cur.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", (soa_id, "Code_X", "protocol_terminology", "C174222", "Z"), ) conn.commit() @@ -66,12 +66,12 @@ def test_get_next_code_uid_all_invalid_tails(): conn.commit() # Insert only invalid tails that cannot be parsed as integers cur.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", (soa_id, "Code_A", "protocol_terminology", "C174222", "X"), ) cur.execute( - "INSERT INTO code (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", - (soa_id, "Code_B", "ddf_terminology", "C188727", "Y"), + "INSERT INTO code_association (soa_id, code_uid, codelist_table, codelist_code, code) VALUES (?,?,?,?,?)", + (soa_id, "Code_B", "http://www.cdisc.org", "C188727", "Y"), ) conn.commit() # Fallback should use len(existing)+1 -> 3 diff --git a/tests/test_timings_code_junction.py b/tests/test_timings_code_junction.py deleted file mode 100644 index 361d743..0000000 --- a/tests/test_timings_code_junction.py +++ /dev/null @@ -1,195 +0,0 @@ -from fastapi.testclient import TestClient - -from soa_builder.web.app import app -from soa_builder.web.db import _connect - -client = TestClient(app) - - -def _ensure_soa_clean(soa_id: int) -> int: - conn = _connect() - cur = conn.cursor() - # Ensure SOA exists - cur.execute( - "INSERT OR IGNORE INTO soa (id, name) VALUES (?, ?)", - (soa_id, f"Test SOA {soa_id}"), - ) - # Clean related tables for isolation - cur.execute("DELETE FROM timing WHERE soa_id=?", (soa_id,)) - cur.execute("DELETE FROM code WHERE soa_id=?", (soa_id,)) - conn.commit() - # Seed minimal ddf_terminology if missing - cur.execute( - "SELECT name FROM sqlite_master WHERE type='table' AND name='ddf_terminology'" - ) - if cur.fetchone() is None: - cur.execute( - """ - CREATE TABLE ddf_terminology ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - codelist_code TEXT, - cdisc_submission_value TEXT, - code TEXT - ) - """ - ) - conn.commit() - # Upsert test codelist entries for C201264 (type) and C201265 (relativeToFrom) - # Clear any existing test entries to avoid duplicates - cur.execute( - "DELETE FROM ddf_terminology WHERE codelist_code IN ('C201264','C201265')" - ) - cur.executemany( - "INSERT INTO ddf_terminology (codelist_code, cdisc_submission_value, code) VALUES (?,?,?)", - [ - ("C201264", "TYPE_A", "C201264_A_CODE"), - ("C201264", "TYPE_B", "C201264_B_CODE"), - ("C201265", "from", "C201265_FROM_CODE"), - ("C201265", "to", "C201265_TO_CODE"), - ], - ) - conn.commit() - conn.close() - return soa_id - - -def _list_timings(soa_id: int): - r = client.get(f"/soa/{soa_id}/timings") - assert r.status_code == 200, r.text - return r.json() - - -def _code_rows(soa_id: int, codelist_code: str): - conn = _connect() - cur = conn.cursor() - cur.execute( - "SELECT code_uid, code FROM code WHERE soa_id=? AND codelist_code=? ORDER BY id", - (soa_id, codelist_code), - ) - rows = cur.fetchall() or [] - conn.close() - return [(r[0], r[1]) for r in rows] - - -def test_relative_to_from_update_creates_new_code_and_unchanged_does_not(): - soa_id = _ensure_soa_clean(17001) - - # Create T1 with relative_to_from = 'from'; T2 with 'to'. - r1 = client.post( - f"/ui/soa/{soa_id}/timings/create", - data={ - "name": "T1", - "type_submission_value": "TYPE_A", - "relative_to_from_submission_value": "from", - }, - follow_redirects=True, - ) - assert r1.status_code in (200, 303) - r2 = client.post( - f"/ui/soa/{soa_id}/timings/create", - data={ - "name": "T2", - "type_submission_value": "TYPE_B", - "relative_to_from_submission_value": "to", - }, - follow_redirects=True, - ) - assert r2.status_code in (200, 303) - - timings = _list_timings(soa_id) - assert len(timings) == 2 - t1_id = timings[0]["id"] - t2_id = timings[1]["id"] - - code_rtf_before = _code_rows(soa_id, "C201265") - assert len(code_rtf_before) == 2 - - # Update T1 relative_to_from to 'to' (same code exists from T2) -> must create NEW Code_N - u1 = client.post( - f"/ui/soa/{soa_id}/timings/{t1_id}/update", - data={"name": timings[0]["name"], "relative_to_from_submission_value": "to"}, - follow_redirects=True, - ) - assert u1.status_code in (200, 303) - - code_rtf_after_change = _code_rows(soa_id, "C201265") - assert len(code_rtf_after_change) == 3 # new code row created - - timings_after = _list_timings(soa_id) - t1_after = [t for t in timings_after if t["id"] == t1_id][0] - t2_after = [t for t in timings_after if t["id"] == t2_id][0] - assert ( - t1_after["relative_to_from"] != t2_after["relative_to_from"] - ) # distinct Code_Ns - - # Update T1 relative_to_from to 'to' again (unchanged) -> must NOT create a new code - u2 = client.post( - f"/ui/soa/{soa_id}/timings/{t1_id}/update", - data={"name": t1_after["name"], "relative_to_from_submission_value": "to"}, - follow_redirects=True, - ) - assert u2.status_code in (200, 303) - - code_rtf_after_unchanged = _code_rows(soa_id, "C201265") - assert len(code_rtf_after_unchanged) == 3 # unchanged selection does not add - - -def test_type_update_creates_new_code_and_unchanged_does_not(): - soa_id = _ensure_soa_clean(17002) - - # Create T1 with type = 'TYPE_A'; T2 with 'TYPE_B'. - r1 = client.post( - f"/ui/soa/{soa_id}/timings/create", - data={ - "name": "T1", - "type_submission_value": "TYPE_A", - "relative_to_from_submission_value": "from", - }, - follow_redirects=True, - ) - assert r1.status_code in (200, 303) - r2 = client.post( - f"/ui/soa/{soa_id}/timings/create", - data={ - "name": "T2", - "type_submission_value": "TYPE_B", - "relative_to_from_submission_value": "to", - }, - follow_redirects=True, - ) - assert r2.status_code in (200, 303) - - timings = _list_timings(soa_id) - assert len(timings) == 2 - t1_id = timings[0]["id"] - t2_id = timings[1]["id"] - - code_type_before = _code_rows(soa_id, "C201264") - assert len(code_type_before) == 2 - - # Update T1 type to 'TYPE_B' (same code exists from T2) -> must create NEW Code_N - u1 = client.post( - f"/ui/soa/{soa_id}/timings/{t1_id}/update", - data={"name": timings[0]["name"], "type_submission_value": "TYPE_B"}, - follow_redirects=True, - ) - assert u1.status_code in (200, 303) - - code_type_after_change = _code_rows(soa_id, "C201264") - assert len(code_type_after_change) == 3 # new code row created - - timings_after = _list_timings(soa_id) - t1_after = [t for t in timings_after if t["id"] == t1_id][0] - t2_after = [t for t in timings_after if t["id"] == t2_id][0] - assert t1_after["type"] != t2_after["type"] # distinct Code_Ns - - # Update T1 type to 'TYPE_B' again (unchanged) -> must NOT create a new code - u2 = client.post( - f"/ui/soa/{soa_id}/timings/{t1_id}/update", - data={"name": t1_after["name"], "type_submission_value": "TYPE_B"}, - follow_redirects=True, - ) - assert u2.status_code in (200, 303) - - code_type_after_unchanged = _code_rows(soa_id, "C201264") - assert len(code_type_after_unchanged) == 3 # unchanged selection does not add