From 6ab1fca1e9926897cf5e8f039903ffb04ec281c8 Mon Sep 17 00:00:00 2001 From: Darren <3921919+pendingintent@users.noreply.github.com> Date: Fri, 21 Nov 2025 15:00:18 -0500 Subject: [PATCH 1/3] Created helper, endpoint, templates, tests for biomedical concept categories --- src/soa_builder/web/app.py | 270 ++++++++++++++++++ src/soa_builder/web/templates/base.html | 1 + .../web/templates/concept_categories.html | 66 +++++ .../templates/concept_category_detail.html | 32 +++ .../web/templates/concepts_list.html | 4 +- tests/test_concept_categories.py | 161 +++++++++++ 6 files changed, 531 insertions(+), 3 deletions(-) create mode 100644 src/soa_builder/web/templates/concept_categories.html create mode 100644 src/soa_builder/web/templates/concept_category_detail.html create mode 100644 tests/test_concept_categories.py diff --git a/src/soa_builder/web/app.py b/src/soa_builder/web/app.py index 72773fe..6d842d2 100644 --- a/src/soa_builder/web/app.py +++ b/src/soa_builder/web/app.py @@ -999,6 +999,220 @@ def _fetch_matrix(soa_id: int): return visits, activities, cells +def fetch_biomedical_concept_categories() -> list[dict]: + """Return list of Biomedical Concept Categories from CDISC Library. + + Normalized shape: + [{'name': , 'title': , 'href': <absolute_href>}] + """ + url = "https://api.library.cdisc.org/api/cosmos/v2/mdr/bc/categories" + base_prefix = "https://api.library.cdisc.org/api/cosmos/v2" + headers = {"Accept": "application/json"} + api_key = _get_cdisc_api_key() + subscription_key = os.environ.get("CDISC_SUBSCRIPTION_KEY") or api_key + # Some CDISC gateways require subscription key header, others accept bearer/api-key; send all when available. + if subscription_key: + headers["Ocp-Apim-Subscription-Key"] = subscription_key + if api_key: + headers["Authorization"] = f"Bearer {api_key}" # bearer token style + headers["api-key"] = api_key # fallback header name + + def _normalize_href(h: Optional[str]) -> Optional[str]: + if not h: + return None + if h.startswith("http://") or h.startswith("https://"): + return h + if h.startswith("/"): + return base_prefix + h + return base_prefix + "/" + h + + try: + resp = requests.get(url, headers=headers, timeout=15) + if resp.status_code != 200: + logger.warning( + "BC categories fetch HTTP %s (snippet=%s)", + resp.status_code, + resp.text[:200], + ) + return [] + try: + data = resp.json() + except ValueError: + logger.error("BC categories fetch 200 but non-JSON response") + return [] + + categories: list[dict] = [] + if ( + isinstance(data, dict) + and "_links" in data + and isinstance(data["_links"], dict) + ): + cat_list = data["_links"].get("categories") or [] + if isinstance(cat_list, list): + for cat in cat_list: + if not isinstance(cat, dict): + continue + name = cat.get("name") + self_link = (cat.get("_links", {}) or {}).get("self") or {} + if not isinstance(self_link, dict): + self_link = {} + href = _normalize_href(self_link.get("href")) + title = self_link.get("title") or cat.get("label") or name or href + if name and href: + categories.append( + { + "name": str(name), + "title": str(title or name), + "href": href, + } + ) + categories.sort(key=lambda c: (c["title"] or "").lower()) + logger.info("Fetched %d BC categories from remote API", len(categories)) + return categories + except Exception as e: # pragma: no cover + logger.error("BC categories fetch error: %s", e) + return [] + + +def fetch_biomedical_concepts_by_category(name: str) -> list[dict]: + """Return biomedical concepts for a given category name. + + Uses category-specific endpoint: /mdr/bc/biomedicalconcepts?category=<name> + Normalized list of dicts: {'code': <code>, 'title': <title>, 'href': <absolute_href>} + Errors yield empty list; logs diagnostic info. + """ + if not name or not name.strip(): + return [] + category = name.strip() + base_prefix = "https://api.library.cdisc.org/api/cosmos/v2" + # Endpoint pattern observed in category self links + # Encode only if raw value does not already contain percent escapes + if "%" in category: + encoded = category # assume already percent-encoded + else: + encoded = requests.utils.quote(category, safe="") + url = f"{base_prefix}/mdr/bc/biomedicalconcepts?category={encoded}" + headers = {"Accept": "application/json"} + api_key = _get_cdisc_api_key() + subscription_key = os.environ.get("CDISC_SUBSCRIPTION_KEY") or api_key + if subscription_key: + headers["Ocp-Apim-Subscription-Key"] = subscription_key + if api_key: + headers["Authorization"] = f"Bearer {api_key}" + headers["api-key"] = api_key + + def _normalize_href(h: Optional[str]) -> Optional[str]: + if not h: + return None + if h.startswith("http://") or h.startswith("https://"): + return h + if h.startswith("/"): + return base_prefix + h + return base_prefix + "/" + h + + concepts: list[dict] = [] + try: + resp = requests.get(url, headers=headers, timeout=20) + if resp.status_code != 200: + logger.warning( + "BC concepts by category fetch HTTP %s category=%s snippet=%s", + resp.status_code, + category, + resp.text[:180], + ) + return [] + try: + data = resp.json() + except ValueError: + logger.warning( + "BC concepts by category non-JSON response category=%s", category + ) + return [] + + # Strategy: + # 1. If 'items' list present, treat as direct concept objects. + # 2. Else if HAL '_links' present, scan all list-valued link groups for concept links. + # Recognize concept links by href containing '/mdr/bc/biomedicalconcepts/' or query '?concept=' style; + # derive code from link.get('code') or last path segment. + # 3. Else if root is a single dict that looks like a concept, process it. + root_items: list[dict] = [] + if isinstance(data, dict): + # Direct items array + if isinstance(data.get("items"), list): + root_items = [it for it in data["items"] if isinstance(it, dict)] + else: + # HAL links exploration + links = data.get("_links") + if isinstance(links, dict): + # Collect potential lists under known or unknown keys + for key, val in links.items(): + if key == "self": + continue + if isinstance(val, list): + for link in val: + if not isinstance(link, dict): + continue + raw_href = link.get("href") + if not isinstance(raw_href, str): + continue + href_norm = _normalize_href(raw_href) + # Identify concept link by path pattern + if "/mdr/bc/biomedicalconcepts" in raw_href: + # Extract code (last path component before query) if not provided + code = ( + link.get("code") + or link.get("name") + or link.get("identifier") + ) + if not code: + # Parse from path + path_part = raw_href.split("?")[0].rstrip("/") + code = path_part.split("/")[-1] + # If code equals 'biomedicalconcepts' it is the list endpoint; skip + if code == "biomedicalconcepts": + code = None + title = link.get("title") or code or href_norm + if code and href_norm: + concepts.append( + { + "code": str(code), + "title": str(title), + "href": href_norm, + } + ) + # Fallback single object + if not concepts: + root_items = [data] + elif isinstance(data, list): + root_items = [it for it in data if isinstance(it, dict)] + + # Process root_items (non-HAL direct objects) if any + for it in root_items: + code = ( + it.get("code") + or it.get("conceptCode") + or it.get("identifier") + or it.get("id") + ) + href = _normalize_href(it.get("href") or it.get("link")) + if not href and code: + href = f"{base_prefix}/mdr/bc/biomedicalconcepts/{code}" + title = it.get("title") or it.get("name") or it.get("label") or code + if code and href: + concepts.append({"code": str(code), "title": str(title), "href": href}) + + if not concepts: + logger.info("No biomedical concepts parsed for category '%s'", category) + concepts.sort(key=lambda c: c["title"].lower()) + logger.info( + "Fetched %d biomedical concepts for category '%s'", len(concepts), category + ) + return concepts + except Exception as e: # pragma: no cover + logger.error("BC concepts by category fetch error for '%s': %s", category, e) + return [] + + def fetch_biomedical_concepts(force: bool = False): """Return list of biomedical concepts as [{'code':..., 'title':...}]. Precedence: CDISC_CONCEPTS_JSON env override (for tests/offline) > cached remote fetch > empty list. @@ -2828,6 +3042,62 @@ def ui_concepts_list(request: Request): ) +@app.get("/ui/concept_categories", response_class=HTMLResponse) +def ui_categories_list(request: Request): + """Render table listing biomedical concept categories (name + title + href).""" + categories = fetch_biomedical_concept_categories() or [] + rows = [ + { + "name": c.get("name"), + "title": c.get("title") or c.get("name"), + "href": c.get("href"), + } + for c in categories + ] + subscription_key = os.environ.get("CDISC_SUBSCRIPTION_KEY") or _get_cdisc_api_key() + return templates.TemplateResponse( + request, + "concept_categories.html", + { + "rows": rows, + "count": len(rows), + "missing_key": subscription_key is None, + }, + ) + + +@app.get("/ui/concept_categories/view", response_class=HTMLResponse) +def ui_category_detail(request: Request, name: str = ""): + """Render list of biomedical concepts within a given category name. + + Query params: + name: category name as returned by /ui/concept_categories. + """ + category_name = name.strip() + if not category_name: + return HTMLResponse( + "<p><em>Category name required.</em></p><p><a href='/ui/concept_categories'>Back</a></p>" + ) + concepts = fetch_biomedical_concepts_by_category(category_name) or [] + rows = [ + { + "code": c.get("code"), + "title": c.get("title"), + "href": c.get("href"), + } + for c in concepts + ] + return templates.TemplateResponse( + request, + "concept_category_detail.html", + { + "category": category_name, + "rows": rows, + "count": len(rows), + }, + ) + + @app.get("/ui/sdtm/specializations", response_class=HTMLResponse) def ui_sdtm_specializations_list(request: Request, code: Optional[str] = None): """Render table listing SDTM dataset specializations (title + API link). diff --git a/src/soa_builder/web/templates/base.html b/src/soa_builder/web/templates/base.html index 3ef343e..797beb8 100644 --- a/src/soa_builder/web/templates/base.html +++ b/src/soa_builder/web/templates/base.html @@ -11,6 +11,7 @@ <h1>SoA Workbench</h1> <nav> <a href="/">Home</a> | + <a href="/ui/concept_categories">Biomedical Concept Categories</a> | <a href="/ui/concepts">Biomedical Concepts</a> | <a href="/ui/sdtm/specializations">SDTM Dataset Specializations</a> | <a href="/ui/ddf/terminology">DDF Terminology</a> | diff --git a/src/soa_builder/web/templates/concept_categories.html b/src/soa_builder/web/templates/concept_categories.html new file mode 100644 index 0000000..b59342f --- /dev/null +++ b/src/soa_builder/web/templates/concept_categories.html @@ -0,0 +1,66 @@ +{% extends 'base.html' %} +{% block content %} +<h2>Biomedical Concept Categories (<span id="conceptTotal">{{ count }}</span>)</h2> +<p> + This list is derived from the CDISC Library API. Each link points to the + category's API resource (which lists biomedical concepts in that category). +</p> +<div style="margin:0.5em 0 1em;"> + <label for="categorySearch"><strong>Search:</strong></label> + <input id="categorySearch" type="text" placeholder="Filter categories..." style="width:280px;" oninput="filterCategories()" /> + <span id="searchCount" style="margin-left:1em;color:#555;"></span> +</div> + +{% if rows %} +<table border="1" cellspacing="0" cellpadding="4" id="categoriesTable"> + <thead> + <tr> + <th style="text-align:left;">Name</th> + <th style="text-align:left;">Title</th> + </tr> + </thead> + <tbody> + {% for r in rows %} + <tr> + <td> + {% if r.name %} + <!-- Pass raw name; backend will encode once to avoid double-encoding --> + <a href="/ui/concept_categories/view?name={{ r.name }}">{{ r.name }}</a> + {% else %} + <em>n/a</em> + {% endif %} + </td> + <td>{{ r.title }}</td> + </tr> + {% endfor %} + </tbody> +</table> + +<script> +function filterCategories(){ + const q = document.getElementById('categorySearch').value.toLowerCase(); + const rows = document.querySelectorAll('#categoriesTable tbody tr'); + let visible = 0; + rows.forEach(tr => { + const text = tr.innerText.toLowerCase(); + if(!q || text.indexOf(q) !== -1){ + tr.style.display = ''; + visible++; + } else { + tr.style.display = 'none'; + } + }); + const sc = document.getElementById('searchCount'); + if(q){ + sc.textContent = visible + ' match' + (visible === 1 ? '' : 'es'); + } else { + sc.textContent = ''; + } +} +</script> +{% else %} + <p><em>No biomedical concept categories available (empty list).</em></p> +{% endif %} + +<p><a href="/">Return Home</a></p> +{% endblock %} \ No newline at end of file diff --git a/src/soa_builder/web/templates/concept_category_detail.html b/src/soa_builder/web/templates/concept_category_detail.html new file mode 100644 index 0000000..16f63b5 --- /dev/null +++ b/src/soa_builder/web/templates/concept_category_detail.html @@ -0,0 +1,32 @@ +{% extends 'base.html' %} +{% block content %} +<h2>Biomedical Concepts in Category: {{ category }}</h2> +<p>Total concepts: <strong>{{ count }}</strong></p> + +{% if rows %} +<table border="1" cellspacing="0" cellpadding="4" id="conceptsTable"> + <thead> + <tr> + <th style="text-align:left;">Title</th> + <th style="text-align:center;">Biomedical Concept Code</th> + </tr> + </thead> + <tbody> + {% for r in rows %} + <tr> + <td>{{ r.title }}</td> + <td style="text-align:center;"> + {% if r.code %} + <a href="/ui/concepts/{{ r.code }}">{{ r.code }}</a> + {% else %}<em>n/a</em>{% endif %} + </td> + </tr> + {% endfor %} + </tbody> +</table> +{% else %} + <p><em>No concepts found for this category.</em></p> +{% endif %} + +<p><a href="/ui/concept_categories">← Back to Categories</a></p> +{% endblock %} \ No newline at end of file diff --git a/src/soa_builder/web/templates/concepts_list.html b/src/soa_builder/web/templates/concepts_list.html index 84201ba..f48db14 100644 --- a/src/soa_builder/web/templates/concepts_list.html +++ b/src/soa_builder/web/templates/concepts_list.html @@ -13,7 +13,6 @@ <h2>Biomedical Concepts (<span id="conceptTotal">{{ count }}</span>)</h2> <tr> <th style="text-align:left;">Title</th> <th style="text-align:left;">Code</th> - <th style="text-align:left;">Href</th> <th style="text-align:left;">Related DSS</th> </tr> </thead> @@ -21,8 +20,7 @@ <h2>Biomedical Concepts (<span id="conceptTotal">{{ count }}</span>)</h2> {% for r in rows %} <tr> <td>{{ r.title }}</td> - <td>{{ r.code }}</td> - <td>{% if r.code %}<a href="/ui/concepts/{{ r.code }}">Detail</a>{% else %}<em>n/a</em>{% endif %}</td> + <td>{% if r.code %}<a href="/ui/concepts/{{ r.code }}">{{ r.code }}</a>{% else %}<em>n/a</em>{% endif %}</td> <td> {% if r.code %} <a href="/ui/sdtm/specializations?code={{ r.code }}">Related DSS</a> diff --git a/tests/test_concept_categories.py b/tests/test_concept_categories.py new file mode 100644 index 0000000..b2c5daf --- /dev/null +++ b/tests/test_concept_categories.py @@ -0,0 +1,161 @@ +import json +from typing import List +from fastapi.testclient import TestClient +import pytest + +from soa_builder.web.app import app, fetch_biomedical_concepts_by_category + +client = TestClient(app) + + +class DummyResp: + def __init__(self, status_code: int, json_data=None, text: str = ""): + self.status_code = status_code + self._json = json_data + self.text = text or json.dumps(json_data or {}) + + def json(self): + if self._json is None: + raise ValueError("No JSON") + return self._json + + +@pytest.mark.parametrize( + "raw_category, expected_url_fragment", + [ + ( + "Liver Findings", + "biomedicalconcepts?category=Liver%20Findings", + ), # encoding applied + ( + "Already%20Encoded", + "biomedicalconcepts?category=Already%20Encoded", + ), # no double encoding + ], +) +def test_helper_category_url_encoding(monkeypatch, raw_category, expected_url_fragment): + """Ensure category name is encoded only once and request performed.""" + captured_urls: List[str] = [] + + def fake_get(url, headers=None, timeout=0): # noqa: D401 + captured_urls.append(url) + return DummyResp(200, {"items": []}) + + monkeypatch.setattr("requests.get", fake_get) + fetch_biomedical_concepts_by_category(raw_category) + assert len(captured_urls) == 1 + assert expected_url_fragment in captured_urls[0] + + +def test_helper_parses_items_list(monkeypatch): + """Direct items list should return normalized concept dicts.""" + payload = { + "items": [ + { + "code": "ALT", + "title": "Alanine Aminotransferase", + "href": "/mdr/bc/biomedicalconcepts/ALT", + }, + { + "code": "AST", + "title": "Aspartate Aminotransferase", + "href": "/mdr/bc/biomedicalconcepts/AST", + }, + ] + } + + def fake_get(url, headers=None, timeout=0): + return DummyResp(200, payload) + + monkeypatch.setattr("requests.get", fake_get) + concepts = fetch_biomedical_concepts_by_category("Liver Findings") + assert {c["code"] for c in concepts} == {"ALT", "AST"} + assert all( + c["href"].startswith( + "https://api.library.cdisc.org/api/cosmos/v2/mdr/bc/biomedicalconcepts/" + ) + for c in concepts + ) + + +def test_helper_parses_hal_links(monkeypatch): + """HAL _links with concept hrefs should be parsed correctly.""" + payload = { + "_links": { + "self": {"href": "/mdr/bc/biomedicalconcepts?category=Liver%20Findings"}, + "concepts": [ + { + "href": "/mdr/bc/biomedicalconcepts/ALT", + "title": "Alanine Aminotransferase", + }, + { + "href": "/mdr/bc/biomedicalconcepts/AST", + "title": "Aspartate Aminotransferase", + }, + ], + } + } + + def fake_get(url, headers=None, timeout=0): + return DummyResp(200, payload) + + monkeypatch.setattr("requests.get", fake_get) + concepts = fetch_biomedical_concepts_by_category("Liver Findings") + codes = {c["code"] for c in concepts} + assert codes == {"ALT", "AST"} + # Titles preserved + assert any(c["title"] == "Alanine Aminotransferase" for c in concepts) + + +def test_helper_handles_non_200(monkeypatch): + """HTTP != 200 should yield empty list.""" + + def fake_get(url, headers=None, timeout=0): + return DummyResp(404, {}, text="Not Found") + + monkeypatch.setattr("requests.get", fake_get) + concepts = fetch_biomedical_concepts_by_category("Liver Findings") + assert concepts == [] + + +def test_category_ui_endpoint_renders_links(monkeypatch): + """UI endpoint should render internal concept detail links.""" + payload = { + "items": [ + { + "code": "ALT", + "title": "Alanine Aminotransferase", + "href": "/mdr/bc/biomedicalconcepts/ALT", + }, + { + "code": "AST", + "title": "Aspartate Aminotransferase", + "href": "/mdr/bc/biomedicalconcepts/AST", + }, + ] + } + + def fake_get(url, headers=None, timeout=0): + return DummyResp(200, payload) + + monkeypatch.setattr("requests.get", fake_get) + resp = client.get("/ui/concept_categories/view", params={"name": "Liver Findings"}) + assert resp.status_code == 200 + text = resp.text + # Internal links to concept detail page + assert "/ui/concepts/ALT" in text + assert "/ui/concepts/AST" in text + # Category name present + assert "Biomedical Concepts in Category: Liver Findings" in text + + +def test_category_ui_endpoint_empty(monkeypatch): + """Empty concepts should show fallback message.""" + + def fake_get(url, headers=None, timeout=0): + return DummyResp(200, {"items": []}) + + monkeypatch.setattr("requests.get", fake_get) + resp = client.get("/ui/concept_categories/view", params={"name": "Liver Findings"}) + assert resp.status_code == 200 + assert "No concepts found for this category." in resp.text From 3e922a9d8056a12318db85f28cbce9edeca128df Mon Sep 17 00:00:00 2001 From: Darren <3921919+pendingintent@users.noreply.github.com> Date: Fri, 21 Nov 2025 15:07:50 -0500 Subject: [PATCH 2/3] Update tests/test_concept_categories.py Remove noqa comment Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/test_concept_categories.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_concept_categories.py b/tests/test_concept_categories.py index b2c5daf..4c07769 100644 --- a/tests/test_concept_categories.py +++ b/tests/test_concept_categories.py @@ -37,7 +37,7 @@ def test_helper_category_url_encoding(monkeypatch, raw_category, expected_url_fr """Ensure category name is encoded only once and request performed.""" captured_urls: List[str] = [] - def fake_get(url, headers=None, timeout=0): # noqa: D401 + def fake_get(url, headers=None, timeout=0): captured_urls.append(url) return DummyResp(200, {"items": []}) From 444693bec57fc4b240abc5f97ef081efcd6a6dd5 Mon Sep 17 00:00:00 2001 From: Darren <3921919+pendingintent@users.noreply.github.com> Date: Fri, 21 Nov 2025 15:14:57 -0500 Subject: [PATCH 3/3] perform deterministic single encoding --- src/soa_builder/web/app.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/soa_builder/web/app.py b/src/soa_builder/web/app.py index 6d842d2..705bbc6 100644 --- a/src/soa_builder/web/app.py +++ b/src/soa_builder/web/app.py @@ -21,6 +21,7 @@ import os import re import re as _re +import urllib.parse import tempfile import time from contextlib import asynccontextmanager @@ -1085,12 +1086,9 @@ def fetch_biomedical_concepts_by_category(name: str) -> list[dict]: return [] category = name.strip() base_prefix = "https://api.library.cdisc.org/api/cosmos/v2" - # Endpoint pattern observed in category self links - # Encode only if raw value does not already contain percent escapes - if "%" in category: - encoded = category # assume already percent-encoded - else: - encoded = requests.utils.quote(category, safe="") + # Deterministic single encoding: unquote once then re-encode + decoded_once = urllib.parse.unquote(category) + encoded = requests.utils.quote(decoded_once, safe="") url = f"{base_prefix}/mdr/bc/biomedicalconcepts?category={encoded}" headers = {"Accept": "application/json"} api_key = _get_cdisc_api_key()