Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
268 changes: 268 additions & 0 deletions src/soa_builder/web/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import os
import re
import re as _re
import urllib.parse
import tempfile
import time
from contextlib import asynccontextmanager
Expand Down Expand Up @@ -999,6 +1000,217 @@ def _fetch_matrix(soa_id: int):
return visits, activities, cells


def fetch_biomedical_concept_categories() -> list[dict]:
"""Return list of Biomedical Concept Categories from CDISC Library.

Normalized shape:
[{'name': <category_name>, 'title': <title>, 'href': <absolute_href>}]
"""
url = "https://api.library.cdisc.org/api/cosmos/v2/mdr/bc/categories"
base_prefix = "https://api.library.cdisc.org/api/cosmos/v2"
headers = {"Accept": "application/json"}
api_key = _get_cdisc_api_key()
subscription_key = os.environ.get("CDISC_SUBSCRIPTION_KEY") or api_key
# Some CDISC gateways require subscription key header, others accept bearer/api-key; send all when available.
if subscription_key:
headers["Ocp-Apim-Subscription-Key"] = subscription_key
if api_key:
headers["Authorization"] = f"Bearer {api_key}" # bearer token style
headers["api-key"] = api_key # fallback header name

def _normalize_href(h: Optional[str]) -> Optional[str]:
if not h:
return None
if h.startswith("http://") or h.startswith("https://"):
return h
if h.startswith("/"):
return base_prefix + h
return base_prefix + "/" + h

try:
resp = requests.get(url, headers=headers, timeout=15)
if resp.status_code != 200:
logger.warning(
"BC categories fetch HTTP %s (snippet=%s)",
resp.status_code,
resp.text[:200],
)
return []
try:
data = resp.json()
except ValueError:
logger.error("BC categories fetch 200 but non-JSON response")
return []

categories: list[dict] = []
if (
isinstance(data, dict)
and "_links" in data
and isinstance(data["_links"], dict)
):
cat_list = data["_links"].get("categories") or []
if isinstance(cat_list, list):
for cat in cat_list:
if not isinstance(cat, dict):
continue
name = cat.get("name")
self_link = (cat.get("_links", {}) or {}).get("self") or {}
if not isinstance(self_link, dict):
self_link = {}
href = _normalize_href(self_link.get("href"))
title = self_link.get("title") or cat.get("label") or name or href
if name and href:
categories.append(
{
"name": str(name),
"title": str(title or name),
"href": href,
}
)
categories.sort(key=lambda c: (c["title"] or "").lower())
logger.info("Fetched %d BC categories from remote API", len(categories))
return categories
except Exception as e: # pragma: no cover
logger.error("BC categories fetch error: %s", e)
return []


def fetch_biomedical_concepts_by_category(name: str) -> list[dict]:
"""Return biomedical concepts for a given category name.

Uses category-specific endpoint: /mdr/bc/biomedicalconcepts?category=<name>
Normalized list of dicts: {'code': <code>, 'title': <title>, 'href': <absolute_href>}
Errors yield empty list; logs diagnostic info.
"""
if not name or not name.strip():
return []
category = name.strip()
base_prefix = "https://api.library.cdisc.org/api/cosmos/v2"
# Deterministic single encoding: unquote once then re-encode
decoded_once = urllib.parse.unquote(category)
encoded = requests.utils.quote(decoded_once, safe="")
url = f"{base_prefix}/mdr/bc/biomedicalconcepts?category={encoded}"
headers = {"Accept": "application/json"}
api_key = _get_cdisc_api_key()
subscription_key = os.environ.get("CDISC_SUBSCRIPTION_KEY") or api_key
if subscription_key:
headers["Ocp-Apim-Subscription-Key"] = subscription_key
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
headers["api-key"] = api_key

def _normalize_href(h: Optional[str]) -> Optional[str]:
if not h:
return None
if h.startswith("http://") or h.startswith("https://"):
return h
if h.startswith("/"):
return base_prefix + h
return base_prefix + "/" + h

concepts: list[dict] = []
try:
resp = requests.get(url, headers=headers, timeout=20)
if resp.status_code != 200:
logger.warning(
"BC concepts by category fetch HTTP %s category=%s snippet=%s",
resp.status_code,
category,
resp.text[:180],
)
return []
try:
data = resp.json()
except ValueError:
logger.warning(
"BC concepts by category non-JSON response category=%s", category
)
return []

# Strategy:
# 1. If 'items' list present, treat as direct concept objects.
# 2. Else if HAL '_links' present, scan all list-valued link groups for concept links.
# Recognize concept links by href containing '/mdr/bc/biomedicalconcepts/' or query '?concept=' style;
# derive code from link.get('code') or last path segment.
# 3. Else if root is a single dict that looks like a concept, process it.
root_items: list[dict] = []
if isinstance(data, dict):
# Direct items array
if isinstance(data.get("items"), list):
root_items = [it for it in data["items"] if isinstance(it, dict)]
else:
# HAL links exploration
links = data.get("_links")
if isinstance(links, dict):
# Collect potential lists under known or unknown keys
for key, val in links.items():
if key == "self":
continue
if isinstance(val, list):
for link in val:
if not isinstance(link, dict):
continue
raw_href = link.get("href")
if not isinstance(raw_href, str):
continue
href_norm = _normalize_href(raw_href)
# Identify concept link by path pattern
if "/mdr/bc/biomedicalconcepts" in raw_href:
# Extract code (last path component before query) if not provided
code = (
link.get("code")
or link.get("name")
or link.get("identifier")
)
if not code:
# Parse from path
path_part = raw_href.split("?")[0].rstrip("/")
code = path_part.split("/")[-1]
# If code equals 'biomedicalconcepts' it is the list endpoint; skip
if code == "biomedicalconcepts":
code = None
title = link.get("title") or code or href_norm
if code and href_norm:
concepts.append(
{
"code": str(code),
"title": str(title),
"href": href_norm,
}
)
# Fallback single object
if not concepts:
root_items = [data]
elif isinstance(data, list):
root_items = [it for it in data if isinstance(it, dict)]

# Process root_items (non-HAL direct objects) if any
for it in root_items:
code = (
it.get("code")
or it.get("conceptCode")
or it.get("identifier")
or it.get("id")
)
href = _normalize_href(it.get("href") or it.get("link"))
if not href and code:
href = f"{base_prefix}/mdr/bc/biomedicalconcepts/{code}"
title = it.get("title") or it.get("name") or it.get("label") or code
if code and href:
concepts.append({"code": str(code), "title": str(title), "href": href})

if not concepts:
logger.info("No biomedical concepts parsed for category '%s'", category)
concepts.sort(key=lambda c: c["title"].lower())
logger.info(
"Fetched %d biomedical concepts for category '%s'", len(concepts), category
)
return concepts
except Exception as e: # pragma: no cover
logger.error("BC concepts by category fetch error for '%s': %s", category, e)
return []


def fetch_biomedical_concepts(force: bool = False):
"""Return list of biomedical concepts as [{'code':..., 'title':...}].
Precedence: CDISC_CONCEPTS_JSON env override (for tests/offline) > cached remote fetch > empty list.
Expand Down Expand Up @@ -2828,6 +3040,62 @@ def ui_concepts_list(request: Request):
)


@app.get("/ui/concept_categories", response_class=HTMLResponse)
def ui_categories_list(request: Request):
"""Render table listing biomedical concept categories (name + title + href)."""
categories = fetch_biomedical_concept_categories() or []
rows = [
{
"name": c.get("name"),
"title": c.get("title") or c.get("name"),
"href": c.get("href"),
}
for c in categories
]
subscription_key = os.environ.get("CDISC_SUBSCRIPTION_KEY") or _get_cdisc_api_key()
return templates.TemplateResponse(
request,
"concept_categories.html",
{
"rows": rows,
"count": len(rows),
"missing_key": subscription_key is None,
},
)


@app.get("/ui/concept_categories/view", response_class=HTMLResponse)
def ui_category_detail(request: Request, name: str = ""):
"""Render list of biomedical concepts within a given category name.

Query params:
name: category name as returned by /ui/concept_categories.
"""
category_name = name.strip()
if not category_name:
return HTMLResponse(
"<p><em>Category name required.</em></p><p><a href='/ui/concept_categories'>Back</a></p>"
)
concepts = fetch_biomedical_concepts_by_category(category_name) or []
rows = [
{
"code": c.get("code"),
"title": c.get("title"),
"href": c.get("href"),
}
for c in concepts
]
return templates.TemplateResponse(
request,
"concept_category_detail.html",
{
"category": category_name,
"rows": rows,
"count": len(rows),
},
)


@app.get("/ui/sdtm/specializations", response_class=HTMLResponse)
def ui_sdtm_specializations_list(request: Request, code: Optional[str] = None):
"""Render table listing SDTM dataset specializations (title + API link).
Expand Down
1 change: 1 addition & 0 deletions src/soa_builder/web/templates/base.html
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
<h1>SoA Workbench</h1>
<nav>
<a href="/">Home</a> |
<a href="/ui/concept_categories">Biomedical Concept Categories</a> |
<a href="/ui/concepts">Biomedical Concepts</a> |
<a href="/ui/sdtm/specializations">SDTM Dataset Specializations</a> |
<a href="/ui/ddf/terminology">DDF Terminology</a> |
Expand Down
66 changes: 66 additions & 0 deletions src/soa_builder/web/templates/concept_categories.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
{% extends 'base.html' %}
{% block content %}
<h2>Biomedical Concept Categories (<span id="conceptTotal">{{ count }}</span>)</h2>
<p>
This list is derived from the CDISC Library API. Each link points to the
category's API resource (which lists biomedical concepts in that category).
</p>
<div style="margin:0.5em 0 1em;">
<label for="categorySearch"><strong>Search:</strong></label>
<input id="categorySearch" type="text" placeholder="Filter categories..." style="width:280px;" oninput="filterCategories()" />
<span id="searchCount" style="margin-left:1em;color:#555;"></span>
</div>

{% if rows %}
<table border="1" cellspacing="0" cellpadding="4" id="categoriesTable">
<thead>
<tr>
<th style="text-align:left;">Name</th>
<th style="text-align:left;">Title</th>
</tr>
</thead>
<tbody>
{% for r in rows %}
<tr>
<td>
{% if r.name %}
<!-- Pass raw name; backend will encode once to avoid double-encoding -->
<a href="/ui/concept_categories/view?name={{ r.name }}">{{ r.name }}</a>
Copy link

Copilot AI Nov 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The category name is inserted directly into the URL without HTML encoding. Use Jinja's | urlencode filter to properly encode the name parameter: href=\"/ui/concept_categories/view?name={{ r.name | urlencode }}\"

Suggested change
<a href="/ui/concept_categories/view?name={{ r.name }}">{{ r.name }}</a>
<a href="/ui/concept_categories/view?name={{ r.name | urlencode }}">{{ r.name }}</a>

Copilot uses AI. Check for mistakes.
{% else %}
<em>n/a</em>
{% endif %}
</td>
<td>{{ r.title }}</td>
</tr>
{% endfor %}
</tbody>
</table>

<script>
function filterCategories(){
const q = document.getElementById('categorySearch').value.toLowerCase();
const rows = document.querySelectorAll('#categoriesTable tbody tr');
let visible = 0;
rows.forEach(tr => {
const text = tr.innerText.toLowerCase();
if(!q || text.indexOf(q) !== -1){
tr.style.display = '';
visible++;
} else {
tr.style.display = 'none';
}
});
const sc = document.getElementById('searchCount');
if(q){
sc.textContent = visible + ' match' + (visible === 1 ? '' : 'es');
} else {
sc.textContent = '';
}
}
</script>
{% else %}
<p><em>No biomedical concept categories available (empty list).</em></p>
{% endif %}

<p><a href="/">Return Home</a></p>
{% endblock %}
32 changes: 32 additions & 0 deletions src/soa_builder/web/templates/concept_category_detail.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{% extends 'base.html' %}
{% block content %}
<h2>Biomedical Concepts in Category: {{ category }}</h2>
<p>Total concepts: <strong>{{ count }}</strong></p>

{% if rows %}
<table border="1" cellspacing="0" cellpadding="4" id="conceptsTable">
<thead>
<tr>
<th style="text-align:left;">Title</th>
<th style="text-align:center;">Biomedical Concept Code</th>
</tr>
</thead>
<tbody>
{% for r in rows %}
<tr>
<td>{{ r.title }}</td>
<td style="text-align:center;">
{% if r.code %}
<a href="/ui/concepts/{{ r.code }}">{{ r.code }}</a>
{% else %}<em>n/a</em>{% endif %}
</td>
</tr>
{% endfor %}
</tbody>
</table>
{% else %}
<p><em>No concepts found for this category.</em></p>
{% endif %}

<p><a href="/ui/concept_categories">&larr; Back to Categories</a></p>
{% endblock %}
Loading