diff --git a/apps/gateway/Gateway.API/Services/PostgresPARequestStore.cs b/apps/gateway/Gateway.API/Services/PostgresPARequestStore.cs index 54269a8..1c4fbd8 100644 --- a/apps/gateway/Gateway.API/Services/PostgresPARequestStore.cs +++ b/apps/gateway/Gateway.API/Services/PostgresPARequestStore.cs @@ -301,17 +301,21 @@ private async Task GenerateIdAsync(CancellationToken ct) await IdGenerationLock.WaitAsync(ct).ConfigureAwait(false); try { - var maxId = await _context.PriorAuthRequests + // Filter to only sequential PA-NNN IDs (exclude PA-DEMO-* etc.) + var sequentialIds = await _context.PriorAuthRequests .AsNoTracking() .Select(e => e.Id) - .OrderByDescending(id => id) - .FirstOrDefaultAsync(ct) + .Where(id => id.StartsWith("PA-") && id.Length <= 7) + .ToListAsync(ct) .ConfigureAwait(false); var counter = 1; - if (maxId is not null && maxId.StartsWith("PA-") && int.TryParse(maxId[3..], out var existing)) + foreach (var id in sequentialIds) { - counter = existing + 1; + if (int.TryParse(id[3..], out var existing) && existing >= counter) + { + counter = existing + 1; + } } return $"PA-{counter:D3}"; diff --git a/apps/intelligence/src/api/analyze.py b/apps/intelligence/src/api/analyze.py index 143ddc7..bd01a91 100644 --- a/apps/intelligence/src/api/analyze.py +++ b/apps/intelligence/src/api/analyze.py @@ -12,15 +12,12 @@ from src.models.clinical_bundle import ClinicalBundle from src.models.pa_form import PAFormResponse from src.parsers.pdf_parser import parse_pdf -from src.policies.example_policy import EXAMPLE_POLICY +from src.policies.registry import registry from src.reasoning.evidence_extractor import extract_evidence from src.reasoning.form_generator import generate_form_data router = APIRouter() -# Supported procedure codes (MRI Lumbar Spine) -SUPPORTED_PROCEDURE_CODES = {"72148", "72149", "72158"} - class AnalyzeRequest(BaseModel): """Request payload for analysis endpoint.""" @@ -36,14 +33,8 @@ async def analyze(request: AnalyzeRequest) -> PAFormResponse: Analyze clinical data and generate PA form response. Uses LLM to extract evidence from clinical data and generate PA form. + Resolves policy from registry; unknown CPT codes fall back to generic policy. """ - # Check if procedure is supported - if request.procedure_code not in SUPPORTED_PROCEDURE_CODES: - raise HTTPException( - status_code=400, - detail=f"Procedure code {request.procedure_code} not supported", - ) - # Parse clinical data into structured format bundle = ClinicalBundle.from_dict(request.patient_id, request.clinical_data) @@ -55,8 +46,8 @@ async def analyze(request: AnalyzeRequest) -> PAFormResponse: detail="patient.birth_date is required", ) - # Load policy with requested procedure code - policy = {**EXAMPLE_POLICY, "procedure_codes": [request.procedure_code]} + # Resolve policy from registry (no more 400 rejection for unsupported CPTs) + policy = registry.resolve(request.procedure_code) # Extract evidence using LLM evidence = await extract_evidence(bundle, policy) @@ -87,21 +78,11 @@ async def analyze_with_documents( except json.JSONDecodeError as e: raise HTTPException(status_code=400, detail=f"Invalid clinical data JSON: {e}") - # Check if procedure is supported - if procedure_code not in SUPPORTED_PROCEDURE_CODES: - raise HTTPException( - status_code=400, - detail=f"Procedure code {procedure_code} not supported", - ) - - # Parse clinical data into structured format bundle = ClinicalBundle.from_dict(patient_id, clinical_data_dict) # Read all document bytes, then parse PDFs in parallel pdf_bytes_list = [await doc.read() for doc in documents] - document_texts = list(await asyncio.gather(*[parse_pdf(b) for b in pdf_bytes_list])) - bundle.document_texts = document_texts # Validate required patient data @@ -112,8 +93,8 @@ async def analyze_with_documents( detail="patient.birth_date is required", ) - # Load policy with requested procedure code - policy = {**EXAMPLE_POLICY, "procedure_codes": [procedure_code]} + # Resolve policy from registry + policy = registry.resolve(procedure_code) # Extract evidence using LLM evidence = await extract_evidence(bundle, policy) @@ -122,30 +103,3 @@ async def analyze_with_documents( form_response = await generate_form_data(bundle, evidence, policy) return form_response - - -def _build_field_mappings(bundle: ClinicalBundle, procedure_code: str) -> dict[str, str]: - """Build PDF field mappings from clinical bundle.""" - patient_name = bundle.patient.name if bundle.patient else "Unknown" - patient_dob = ( - bundle.patient.birth_date.isoformat() - if bundle.patient and bundle.patient.birth_date - else "Unknown" - ) - member_id = ( - bundle.patient.member_id - if bundle.patient and bundle.patient.member_id - else "Unknown" - ) - diagnosis_codes = ", ".join(c.code for c in bundle.conditions) if bundle.conditions else "" - - return { - "PatientName": patient_name, - "PatientDOB": patient_dob, - "MemberID": member_id, - "DiagnosisCodes": diagnosis_codes, - "ProcedureCode": procedure_code, - "ClinicalSummary": "Awaiting production configuration", - "ProviderSignature": "", - "Date": "", - } diff --git a/apps/intelligence/src/models/pa_form.py b/apps/intelligence/src/models/pa_form.py index 0cfa78d..d605400 100644 --- a/apps/intelligence/src/models/pa_form.py +++ b/apps/intelligence/src/models/pa_form.py @@ -32,3 +32,7 @@ class PAFormResponse(BaseModel): field_mappings: dict[str, str] = Field( description="PDF field name to value mappings" ) + policy_id: str | None = Field(default=None, description="Policy identifier") + lcd_reference: str | None = Field( + default=None, description="LCD article reference" + ) diff --git a/apps/intelligence/src/models/policy.py b/apps/intelligence/src/models/policy.py new file mode 100644 index 0000000..f40ce59 --- /dev/null +++ b/apps/intelligence/src/models/policy.py @@ -0,0 +1,28 @@ +"""Policy data models for LCD-backed prior authorization criteria.""" + +from pydantic import BaseModel + + +class PolicyCriterion(BaseModel): + """A single criterion from a coverage policy.""" + + id: str + description: str + weight: float # 0.0-1.0, clinical importance + required: bool = False # Hard gate — if NOT_MET, caps score + lcd_section: str | None = None # e.g. "L34220 §4.2" + bypasses: list[str] = [] # criterion IDs this one bypasses when MET + + +class PolicyDefinition(BaseModel): + """Complete policy definition with LCD metadata.""" + + policy_id: str + policy_name: str + lcd_reference: str | None = None # e.g. "L34220" + lcd_title: str | None = None + lcd_contractor: str | None = None + payer: str + procedure_codes: list[str] + diagnosis_codes: list[str] = [] + criteria: list[PolicyCriterion] diff --git a/apps/intelligence/src/policies/generic_policy.py b/apps/intelligence/src/policies/generic_policy.py new file mode 100644 index 0000000..fbb8780 --- /dev/null +++ b/apps/intelligence/src/policies/generic_policy.py @@ -0,0 +1,35 @@ +"""Generic fallback policy for unsupported procedure codes.""" + +from src.models.policy import PolicyCriterion, PolicyDefinition + + +def build_generic_policy(procedure_code: str) -> PolicyDefinition: + """Build a generic medical necessity policy for any procedure code.""" + return PolicyDefinition( + policy_id=f"generic-{procedure_code}", + policy_name="General Medical Necessity", + lcd_reference=None, + payer="General", + procedure_codes=[procedure_code], + diagnosis_codes=[], + criteria=[ + PolicyCriterion( + id="medical_necessity", + description="Medical necessity is documented with clinical rationale", + weight=0.40, + required=True, + ), + PolicyCriterion( + id="diagnosis_present", + description="Valid diagnosis code is present and supports the procedure", + weight=0.30, + required=True, + ), + PolicyCriterion( + id="conservative_therapy", + description="Conservative therapy attempted or documented as not applicable", + weight=0.30, + required=False, + ), + ], + ) diff --git a/apps/intelligence/src/policies/registry.py b/apps/intelligence/src/policies/registry.py new file mode 100644 index 0000000..0a06646 --- /dev/null +++ b/apps/intelligence/src/policies/registry.py @@ -0,0 +1,30 @@ +"""Policy registry for resolving procedure codes to policy definitions.""" + +from src.models.policy import PolicyDefinition +from src.policies.generic_policy import build_generic_policy + + +class PolicyRegistry: + """Resolves procedure codes to LCD-backed policy definitions.""" + + def __init__(self) -> None: + self._by_cpt: dict[str, PolicyDefinition] = {} + + def register(self, policy: PolicyDefinition) -> None: + for cpt in policy.procedure_codes: + self._by_cpt[cpt] = policy + + def resolve(self, procedure_code: str) -> PolicyDefinition: + """Return LCD-backed policy if available, else generic fallback.""" + if procedure_code in self._by_cpt: + return self._by_cpt[procedure_code] + return build_generic_policy(procedure_code) + + +# Module-level singleton +registry = PolicyRegistry() + +# Import seed policies to register them +from src.policies.seed import register_all_seeds # noqa: E402 + +register_all_seeds(registry) diff --git a/apps/intelligence/src/policies/seed/__init__.py b/apps/intelligence/src/policies/seed/__init__.py new file mode 100644 index 0000000..e64335b --- /dev/null +++ b/apps/intelligence/src/policies/seed/__init__.py @@ -0,0 +1,13 @@ +"""Seed policy loader.""" +from src.policies.seed.mri_lumbar import POLICY as MRI_LUMBAR +from src.policies.seed.mri_brain import POLICY as MRI_BRAIN +from src.policies.seed.tka import POLICY as TKA +from src.policies.seed.physical_therapy import POLICY as PHYSICAL_THERAPY +from src.policies.seed.epidural_steroid import POLICY as EPIDURAL_STEROID + +ALL_SEED_POLICIES = [MRI_LUMBAR, MRI_BRAIN, TKA, PHYSICAL_THERAPY, EPIDURAL_STEROID] + + +def register_all_seeds(registry) -> None: + for policy in ALL_SEED_POLICIES: + registry.register(policy) diff --git a/apps/intelligence/src/policies/seed/epidural_steroid.py b/apps/intelligence/src/policies/seed/epidural_steroid.py new file mode 100644 index 0000000..64599c9 --- /dev/null +++ b/apps/intelligence/src/policies/seed/epidural_steroid.py @@ -0,0 +1,51 @@ +"""Epidural Steroid Injection seed policy — LCD L39240.""" + +from src.models.policy import PolicyCriterion, PolicyDefinition + +POLICY = PolicyDefinition( + policy_id="lcd-esi-L39240", + policy_name="Epidural Steroid Injection", + lcd_reference="L39240", + lcd_title="Epidural Steroid Injections", + lcd_contractor="Noridian Healthcare Solutions", + payer="CMS Medicare", + procedure_codes=["62322", "62323"], + diagnosis_codes=["M54.10", "M54.16", "M54.17", "M48.06"], + criteria=[ + PolicyCriterion( + id="diagnosis_confirmed", + description="Radiculopathy/stenosis confirmed by history, exam, and imaging", + weight=0.25, + required=True, + lcd_section="L39240 — Requirement 1", + ), + PolicyCriterion( + id="severity_documented", + description="Pain severe enough to impact QoL/function, documented with standardized scale", + weight=0.20, + required=True, + lcd_section="L39240 — Requirement 2", + ), + PolicyCriterion( + id="conservative_care_4wk", + description="4 weeks conservative care failed/intolerable (except acute herpes zoster)", + weight=0.25, + required=True, + lcd_section="L39240 — Requirement 3", + ), + PolicyCriterion( + id="frequency_within_limits", + description="<=4 sessions per region per rolling 12 months", + weight=0.15, + required=True, + lcd_section="L39240 — Frequency Limits", + ), + PolicyCriterion( + id="image_guidance_planned", + description="Fluoroscopy or CT guidance with contrast planned", + weight=0.15, + required=True, + lcd_section="L39240 — Procedural Requirements", + ), + ], +) diff --git a/apps/intelligence/src/policies/seed/mri_brain.py b/apps/intelligence/src/policies/seed/mri_brain.py new file mode 100644 index 0000000..088de90 --- /dev/null +++ b/apps/intelligence/src/policies/seed/mri_brain.py @@ -0,0 +1,44 @@ +"""MRI Brain seed policy — LCD L37373.""" + +from src.models.policy import PolicyCriterion, PolicyDefinition + +POLICY = PolicyDefinition( + policy_id="lcd-mri-brain-L37373", + policy_name="MRI Brain", + lcd_reference="L37373", + lcd_title="Magnetic Resonance Imaging of the Brain", + lcd_contractor="Noridian Healthcare Solutions", + payer="CMS Medicare", + procedure_codes=["70551", "70552", "70553"], + diagnosis_codes=["G40.909", "R51.9", "G43.909", "G35"], + criteria=[ + PolicyCriterion( + id="diagnosis_present", + description="Valid ICD-10 for neurological condition", + weight=0.15, + required=True, + lcd_section="L37373 / A57204 — Covered Diagnoses", + ), + PolicyCriterion( + id="neurological_indication", + description="Tumor, stroke, MS, seizures, unexplained neuro deficit", + weight=0.35, + required=True, + lcd_section="L37373 — Indications for MRI", + ), + PolicyCriterion( + id="ct_insufficient", + description="CT already performed and insufficient, or MRI specifically indicated", + weight=0.25, + required=False, + lcd_section="L37373 — MRI vs CT Selection", + ), + PolicyCriterion( + id="clinical_documentation", + description="Supporting clinical findings documented", + weight=0.25, + required=True, + lcd_section="L37373 — Coverage Requirements", + ), + ], +) diff --git a/apps/intelligence/src/policies/seed/mri_lumbar.py b/apps/intelligence/src/policies/seed/mri_lumbar.py new file mode 100644 index 0000000..d57836b --- /dev/null +++ b/apps/intelligence/src/policies/seed/mri_lumbar.py @@ -0,0 +1,52 @@ +"""MRI Lumbar Spine seed policy — LCD L34220.""" + +from src.models.policy import PolicyCriterion, PolicyDefinition + +POLICY = PolicyDefinition( + policy_id="lcd-mri-lumbar-L34220", + policy_name="MRI Lumbar Spine", + lcd_reference="L34220", + lcd_title="Magnetic Resonance Imaging of the Lumbar Spine", + lcd_contractor="Noridian Healthcare Solutions", + payer="CMS Medicare", + procedure_codes=["72148", "72149", "72158"], + diagnosis_codes=["M54.5", "M54.50", "M54.51", "M51.16", "M51.17"], + criteria=[ + PolicyCriterion( + id="diagnosis_present", + description="Valid ICD-10 for lumbar pathology", + weight=0.15, + required=True, + lcd_section="L34220 / A57206 — Covered Diagnoses", + ), + PolicyCriterion( + id="red_flag_screening", + description="Cauda equina, tumor, infection, major neuro deficit", + weight=0.25, + required=False, + lcd_section="L34220 — Immediate MRI Indications", + bypasses=["conservative_therapy_4wk"], + ), + PolicyCriterion( + id="conservative_therapy_4wk", + description="4+ weeks conservative management (NSAIDs, PT, activity mod) documented", + weight=0.30, + required=True, + lcd_section="L34220 — Non-Red-Flag Requirements", + ), + PolicyCriterion( + id="clinical_rationale", + description="Imaging abnormalities alone insufficient; supporting clinical rationale documented", + weight=0.20, + required=True, + lcd_section="L34220 — Coverage Principle", + ), + PolicyCriterion( + id="no_duplicate_imaging", + description="No recent duplicative CT/MRI without new justification", + weight=0.10, + required=False, + lcd_section="L34220 — Non-Covered Indications", + ), + ], +) diff --git a/apps/intelligence/src/policies/seed/physical_therapy.py b/apps/intelligence/src/policies/seed/physical_therapy.py new file mode 100644 index 0000000..44b4926 --- /dev/null +++ b/apps/intelligence/src/policies/seed/physical_therapy.py @@ -0,0 +1,44 @@ +"""Physical Therapy seed policy — LCD L34049.""" + +from src.models.policy import PolicyCriterion, PolicyDefinition + +POLICY = PolicyDefinition( + policy_id="lcd-pt-L34049", + policy_name="Physical Therapy", + lcd_reference="L34049", + lcd_title="Outpatient Physical and Occupational Therapy Services", + lcd_contractor="Noridian Healthcare Solutions", + payer="CMS Medicare", + procedure_codes=["97161", "97162", "97163"], + diagnosis_codes=["M54.5", "M25.561", "M79.3", "S83.511A"], + criteria=[ + PolicyCriterion( + id="improvement_potential", + description="Patient condition has improvement potential or actively improving", + weight=0.30, + required=True, + lcd_section="L34049 — Rehabilitative Therapy", + ), + PolicyCriterion( + id="skilled_service_required", + description="Service requires professional judgment, cannot be self-administered", + weight=0.25, + required=True, + lcd_section="L34049 — Skilled Service Requirements", + ), + PolicyCriterion( + id="individualized_plan", + description="Plan of care with goals, frequency, duration documented", + weight=0.25, + required=True, + lcd_section="L34049 — Documentation Requirements", + ), + PolicyCriterion( + id="objective_progress", + description="Successive objective measurements demonstrate progress", + weight=0.20, + required=False, + lcd_section="L34049 — Progress Documentation", + ), + ], +) diff --git a/apps/intelligence/src/policies/seed/tka.py b/apps/intelligence/src/policies/seed/tka.py new file mode 100644 index 0000000..a254125 --- /dev/null +++ b/apps/intelligence/src/policies/seed/tka.py @@ -0,0 +1,51 @@ +"""Total Knee Arthroplasty seed policy — LCD L36575.""" + +from src.models.policy import PolicyCriterion, PolicyDefinition + +POLICY = PolicyDefinition( + policy_id="lcd-tka-L36575", + policy_name="Total Knee Arthroplasty", + lcd_reference="L36575", + lcd_title="Total Knee Arthroplasty", + lcd_contractor="Noridian Healthcare Solutions", + payer="CMS Medicare", + procedure_codes=["27447"], + diagnosis_codes=["M17.0", "M17.11", "M17.12", "M87.052"], + criteria=[ + PolicyCriterion( + id="diagnosis_present", + description="Valid ICD-10 for knee joint disease", + weight=0.10, + required=True, + lcd_section="L36575 / A57685 — Covered Diagnoses", + ), + PolicyCriterion( + id="advanced_joint_disease", + description="Imaging showing joint space narrowing, osteophytes, sclerosis, AVN", + weight=0.25, + required=True, + lcd_section="L36575 — Advanced Joint Disease", + ), + PolicyCriterion( + id="functional_impairment", + description="Pain/disability interfering with ADLs, increased with weight bearing", + weight=0.25, + required=True, + lcd_section="L36575 — Functional Impairment", + ), + PolicyCriterion( + id="failed_conservative_mgmt", + description="Documented trials of NSAIDs, PT, assistive devices, injections", + weight=0.30, + required=True, + lcd_section="L36575 — Failed Conservative Management", + ), + PolicyCriterion( + id="no_contraindication", + description="No active joint infection, systemic bacteremia, skin infection at site", + weight=0.10, + required=True, + lcd_section="L36575 — Contraindications", + ), + ], +) diff --git a/apps/intelligence/src/reasoning/confidence_scorer.py b/apps/intelligence/src/reasoning/confidence_scorer.py new file mode 100644 index 0000000..f38f3da --- /dev/null +++ b/apps/intelligence/src/reasoning/confidence_scorer.py @@ -0,0 +1,90 @@ +"""Weighted LCD compliance confidence scoring algorithm.""" + +from dataclasses import dataclass +from typing import Literal + +from src.models.pa_form import EvidenceItem +from src.models.policy import PolicyDefinition + + +STATUS_SCORES = {"MET": 1.0, "UNCLEAR": 0.5, "NOT_MET": 0.0} + +SCORE_FLOOR = 0.05 +GATE_BASE = 0.65 +GATE_PENALTY_PER = 0.15 + + +@dataclass +class ScoreResult: + score: float + recommendation: Literal["APPROVE", "MANUAL_REVIEW", "NEED_INFO"] + + +def calculate_confidence( + evidence: list[EvidenceItem], + policy: PolicyDefinition, +) -> ScoreResult: + """Calculate weighted confidence score from evidence and policy.""" + criteria_by_id = {c.id: c for c in policy.criteria} + + # Build bypass set: IDs that are bypassed by a MET criterion + bypassed_ids: set[str] = set() + for e in evidence: + criterion = criteria_by_id.get(e.criterion_id) + if criterion and e.status == "MET" and criterion.bypasses: + bypassed_ids.update(criterion.bypasses) + + # Calculate weighted score + numerator = 0.0 + denominator = 0.0 + + for e in evidence: + criterion = criteria_by_id.get(e.criterion_id) + if criterion is None: + continue + + weight = criterion.weight + llm_conf = e.confidence + + # If bypassed, treat as MET + if e.criterion_id in bypassed_ids: + status_score = 1.0 + else: + status_score = STATUS_SCORES.get(e.status, 0.5) + + numerator += weight * status_score * llm_conf + denominator += weight * llm_conf + + if denominator == 0: + raw_score = SCORE_FLOOR + else: + raw_score = numerator / denominator + + # Hard gates: required criteria that are NOT_MET (and not bypassed) + required_not_met = [] + for e in evidence: + criterion = criteria_by_id.get(e.criterion_id) + if ( + criterion + and criterion.required + and e.status == "NOT_MET" + and e.criterion_id not in bypassed_ids + ): + required_not_met.append(e) + + if required_not_met: + gate_cap = GATE_BASE - GATE_PENALTY_PER * len(required_not_met) + raw_score = min(raw_score, gate_cap) + + # Floor and ceiling + final_score = max(SCORE_FLOOR, min(1.0, raw_score)) + + # Recommendation from score + if final_score >= 0.80: + recommendation = "APPROVE" + elif final_score >= 0.50: + recommendation = "MANUAL_REVIEW" + else: + recommendation = "NEED_INFO" + + return ScoreResult(score=round(final_score, 4), recommendation=recommendation) diff --git a/apps/intelligence/src/reasoning/evidence_extractor.py b/apps/intelligence/src/reasoning/evidence_extractor.py index 7c904eb..6dc1c06 100644 --- a/apps/intelligence/src/reasoning/evidence_extractor.py +++ b/apps/intelligence/src/reasoning/evidence_extractor.py @@ -12,39 +12,48 @@ from src.llm_client import chat_completion from src.models.clinical_bundle import ClinicalBundle from src.models.pa_form import EvidenceItem +from src.models.policy import PolicyCriterion, PolicyDefinition logger = logging.getLogger(__name__) async def evaluate_criterion( - criterion: dict[str, Any], + criterion: PolicyCriterion | dict[str, Any], clinical_summary: str, ) -> EvidenceItem: """ Evaluate a single policy criterion against clinical data using LLM. Args: - criterion: Policy criterion dict with 'id' and 'description' + criterion: PolicyCriterion or dict with 'id' and 'description' clinical_summary: Pre-built clinical data summary string Returns: EvidenceItem with evaluation result """ - criterion_id = criterion.get("id", "unknown") - criterion_desc = criterion.get("description", "") + if isinstance(criterion, PolicyCriterion): + criterion_id = criterion.id + criterion_desc = criterion.description + lcd_section = criterion.lcd_section + else: + criterion_id = criterion.get("id", "unknown") + criterion_desc = criterion.get("description", "") + lcd_section = None system_prompt = ( "You are a medical prior authorization analyst. Evaluate whether " "clinical evidence meets the specified criterion." ) + policy_ref = f"\nPolicy Reference: {lcd_section}" if lcd_section else "" user_prompt = f""" -Criterion: {criterion_desc} +Criterion: {criterion_desc}{policy_ref} Clinical Data: {clinical_summary} -Evaluate if this criterion is MET, NOT_MET, or UNCLEAR. Provide a brief -explanation of the evidence found. +Evaluate if this criterion is MET, NOT_MET, or UNCLEAR. +Indicate your confidence level: HIGH CONFIDENCE, MEDIUM CONFIDENCE, or LOW CONFIDENCE. +Provide a brief explanation of the evidence found. """ llm_response = await chat_completion( @@ -64,13 +73,18 @@ async def evaluate_criterion( # Use regex to handle "NOT MET", "NOT_MET", "NOTMET" variants if re.search(r"\bNOT[\s_]?MET\b", response_upper): status = "NOT_MET" - confidence = 0.8 elif re.search(r"\bMET\b", response_upper): status = "MET" - confidence = 0.8 elif re.search(r"\bUNCLEAR\b", response_upper): status = "UNCLEAR" + + # Parse confidence signal from LLM response + if "HIGH CONFIDENCE" in response_upper: + confidence = 0.9 + elif "LOW CONFIDENCE" in response_upper: confidence = 0.5 + else: + confidence = 0.7 # default MEDIUM return EvidenceItem( criterion_id=criterion_id, @@ -125,7 +139,7 @@ def _get_llm_semaphore() -> asyncio.Semaphore: async def _bounded_evaluate( - criterion: dict[str, Any], + criterion: PolicyCriterion | dict[str, Any], clinical_summary: str, semaphore: asyncio.Semaphore, ) -> EvidenceItem: @@ -136,7 +150,7 @@ async def _bounded_evaluate( async def extract_evidence( clinical_bundle: ClinicalBundle, - policy: dict[str, Any], + policy: PolicyDefinition | dict[str, Any], ) -> list[EvidenceItem]: """ Extract evidence from clinical bundle using LLM to evaluate policy criteria. @@ -145,12 +159,15 @@ async def extract_evidence( Args: clinical_bundle: FHIR clinical data bundle - policy: Policy definition with criteria + policy: PolicyDefinition or dict with criteria Returns: List of evidence items, one per policy criterion """ - criteria = policy.get("criteria", []) + if isinstance(policy, PolicyDefinition): + criteria: list[PolicyCriterion | dict[str, Any]] = list(policy.criteria) + else: + criteria = policy.get("criteria", []) if not criteria: return [] @@ -165,7 +182,10 @@ async def extract_evidence( evidence_items: list[EvidenceItem] = [] for i, result in enumerate(results): if isinstance(result, BaseException): - criterion_id = criteria[i].get("id", "unknown") + crit = criteria[i] + criterion_id = ( + crit.id if isinstance(crit, PolicyCriterion) else crit.get("id", "unknown") + ) logger.error("Criterion %s evaluation failed: %s", criterion_id, result) evidence_items.append( EvidenceItem( diff --git a/apps/intelligence/src/reasoning/form_generator.py b/apps/intelligence/src/reasoning/form_generator.py index 6fc0550..d5a2520 100644 --- a/apps/intelligence/src/reasoning/form_generator.py +++ b/apps/intelligence/src/reasoning/form_generator.py @@ -3,27 +3,27 @@ Calculates recommendations and generates clinical summaries. """ -from typing import Any, Literal - from src.llm_client import chat_completion from src.models.clinical_bundle import ClinicalBundle from src.models.pa_form import EvidenceItem, PAFormResponse +from src.models.policy import PolicyDefinition +from src.reasoning.confidence_scorer import calculate_confidence async def generate_form_data( clinical_bundle: ClinicalBundle, evidence: list[EvidenceItem], - policy: dict[str, Any], + policy: PolicyDefinition, ) -> PAFormResponse: """ Generate PA form data from extracted evidence using LLM. - Calculates recommendation based on evidence and generates clinical summary. + Delegates scoring to confidence_scorer and generates clinical summary. Args: clinical_bundle: FHIR clinical data bundle evidence: Extracted evidence items - policy: Policy definition with field mappings + policy: PolicyDefinition with criteria and metadata Returns: Complete PA form response ready for PDF stamping @@ -43,41 +43,14 @@ async def generate_form_data( if not diagnosis_codes: diagnosis_codes = ["Unknown"] - procedure_codes = policy.get("procedure_codes") or ["72148"] - procedure_code = procedure_codes[0] - - # Calculate recommendation based on evidence - required_criteria = [ - c for c in policy.get("criteria", []) if c.get("required", False) - ] - required_criterion_ids = {c.get("id") for c in required_criteria} - - met_required = all( - e.status == "MET" - for e in evidence - if e.criterion_id in required_criterion_ids - ) - - has_not_met = any(e.status == "NOT_MET" for e in evidence) - has_unclear = any(e.status == "UNCLEAR" for e in evidence) + procedure_code = policy.procedure_codes[0] if policy.procedure_codes else "72148" - recommendation: Literal["APPROVE", "NEED_INFO", "MANUAL_REVIEW"] - if met_required and not has_not_met and not has_unclear: - recommendation = "APPROVE" - confidence_score = 0.9 - elif has_not_met: - recommendation = "NEED_INFO" - confidence_score = 0.6 - else: - recommendation = "MANUAL_REVIEW" - confidence_score = 0.7 + # Delegate scoring to confidence_scorer + score_result = calculate_confidence(evidence, policy) # Generate clinical summary using LLM evidence_summary = "\n".join( - [ - f"- {e.criterion_id}: {e.status} - {e.evidence[:100]}" - for e in evidence - ] + [f"- {e.criterion_id}: {e.status} - {e.evidence[:100]}" for e in evidence] ) system_prompt = ( @@ -105,7 +78,6 @@ async def generate_form_data( max_tokens=1000, ) or "Clinical summary generation pending." - # Build field mappings field_mappings = { "PatientName": patient_name, "PatientDOB": patient_dob, @@ -115,12 +87,6 @@ async def generate_form_data( "ClinicalJustification": clinical_summary, } - # Add policy-defined field mappings - policy_mappings = policy.get("form_field_mappings", {}) - for key, value in policy_mappings.items(): - if key in field_mappings: - field_mappings[value] = field_mappings[key] - return PAFormResponse( patient_name=patient_name, patient_dob=patient_dob, @@ -129,7 +95,9 @@ async def generate_form_data( procedure_code=procedure_code, clinical_summary=clinical_summary, supporting_evidence=evidence, - recommendation=recommendation, - confidence_score=confidence_score, + recommendation=score_result.recommendation, + confidence_score=score_result.score, field_mappings=field_mappings, + policy_id=policy.policy_id, + lcd_reference=policy.lcd_reference, ) diff --git a/apps/intelligence/src/tests/test_analyze.py b/apps/intelligence/src/tests/test_analyze.py index bb2491d..8b74919 100644 --- a/apps/intelligence/src/tests/test_analyze.py +++ b/apps/intelligence/src/tests/test_analyze.py @@ -1,4 +1,4 @@ -"""Tests for analyze API endpoint stub implementation.""" +"""Tests for analyze API endpoint implementation.""" from unittest.mock import AsyncMock, patch @@ -29,8 +29,8 @@ def valid_request() -> AnalyzeRequest: @pytest.mark.asyncio async def test_analyze_returns_approve(valid_request: AnalyzeRequest) -> None: - """Stub should return APPROVE recommendation.""" - mock_llm = AsyncMock(return_value="The criterion is MET based on the evidence.") + """Should return APPROVE recommendation with high confidence.""" + mock_llm = AsyncMock(return_value="The criterion is MET based on the evidence. HIGH CONFIDENCE.") with ( patch("src.reasoning.evidence_extractor.chat_completion", mock_llm), patch("src.reasoning.form_generator.chat_completion", mock_llm), @@ -38,38 +38,27 @@ async def test_analyze_returns_approve(valid_request: AnalyzeRequest) -> None: result = await analyze(valid_request) assert result.recommendation == "APPROVE" - assert result.confidence_score == 0.9 + assert result.confidence_score >= 0.80 # Weighted score, not fixed 0.9 @pytest.mark.asyncio async def test_analyze_extracts_patient_info(valid_request: AnalyzeRequest) -> None: - """Stub should extract patient information.""" - result = await analyze(valid_request) + """Should extract patient information.""" + mock_llm = AsyncMock(return_value="MET. Evidence found.") + with ( + patch("src.reasoning.evidence_extractor.chat_completion", mock_llm), + patch("src.reasoning.form_generator.chat_completion", mock_llm), + ): + result = await analyze(valid_request) assert result.patient_name == "John Doe" assert result.patient_dob == "1980-05-15" assert result.member_id == "MEM-001" -@pytest.mark.asyncio -async def test_analyze_rejects_unsupported_procedure() -> None: - """Stub should reject unsupported procedure codes.""" - request = AnalyzeRequest( - patient_id="test", - procedure_code="99999", - clinical_data={"patient": {"name": "Test", "birth_date": "1980-01-01"}}, - ) - - with pytest.raises(HTTPException) as exc_info: - await analyze(request) - - assert exc_info.value.status_code == 400 - assert "not supported" in exc_info.value.detail - - @pytest.mark.asyncio async def test_analyze_requires_patient_dob() -> None: - """Stub should require patient birth_date.""" + """Should require patient birth_date.""" request = AnalyzeRequest( patient_id="test", procedure_code="72148", @@ -85,10 +74,54 @@ async def test_analyze_requires_patient_dob() -> None: @pytest.mark.asyncio async def test_analyze_builds_field_mappings(valid_request: AnalyzeRequest) -> None: - """Stub should include PDF field mappings.""" - result = await analyze(valid_request) + """Should include PDF field mappings.""" + mock_llm = AsyncMock(return_value="MET. Evidence found.") + with ( + patch("src.reasoning.evidence_extractor.chat_completion", mock_llm), + patch("src.reasoning.form_generator.chat_completion", mock_llm), + ): + result = await analyze(valid_request) assert "PatientName" in result.field_mappings assert "PatientDOB" in result.field_mappings assert "ProcedureCode" in result.field_mappings assert result.field_mappings["PatientName"] == "John Doe" + + +@pytest.mark.asyncio +async def test_analyze_unknown_cpt_returns_200_with_generic() -> None: + """CPT 99999 -> 200 OK with generic policy (no lcd_reference).""" + request = AnalyzeRequest( + patient_id="test", + procedure_code="99999", + clinical_data={"patient": {"name": "Test", "birth_date": "1980-01-01", "member_id": "M001"}}, + ) + mock_llm = AsyncMock(return_value="MET. Evidence found.") + with ( + patch("src.reasoning.evidence_extractor.chat_completion", mock_llm), + patch("src.reasoning.form_generator.chat_completion", mock_llm), + ): + result = await analyze(request) + assert result.lcd_reference is None # Generic fallback + assert result.recommendation in ("APPROVE", "MANUAL_REVIEW", "NEED_INFO") + + +@pytest.mark.asyncio +async def test_analyze_mri_lumbar_uses_lcd_policy() -> None: + """CPT 72148 -> response includes lcd_reference='L34220'.""" + request = AnalyzeRequest( + patient_id="test", + procedure_code="72148", + clinical_data={ + "patient": {"name": "Test", "birth_date": "1980-01-01", "member_id": "M001"}, + "conditions": [{"code": "M54.5", "display": "Low back pain"}], + }, + ) + mock_llm = AsyncMock(return_value="MET. HIGH CONFIDENCE. Evidence found.") + with ( + patch("src.reasoning.evidence_extractor.chat_completion", mock_llm), + patch("src.reasoning.form_generator.chat_completion", mock_llm), + ): + result = await analyze(request) + assert result.lcd_reference == "L34220" + assert result.policy_id == "lcd-mri-lumbar-L34220" diff --git a/apps/intelligence/src/tests/test_confidence_scorer.py b/apps/intelligence/src/tests/test_confidence_scorer.py new file mode 100644 index 0000000..fcc7a8d --- /dev/null +++ b/apps/intelligence/src/tests/test_confidence_scorer.py @@ -0,0 +1,162 @@ +"""Tests for weighted LCD compliance confidence scorer.""" +import pytest +from src.models.pa_form import EvidenceItem +from src.models.policy import PolicyCriterion, PolicyDefinition +from src.reasoning.confidence_scorer import ScoreResult, calculate_confidence + + +def _make_criterion(id: str, weight: float, required: bool = False, bypasses: list[str] | None = None) -> PolicyCriterion: + return PolicyCriterion(id=id, description=f"Test {id}", weight=weight, required=required, bypasses=bypasses or []) + +def _make_evidence(criterion_id: str, status: str, confidence: float = 0.9) -> EvidenceItem: + return EvidenceItem(criterion_id=criterion_id, status=status, evidence="test", source="test", confidence=confidence) + +def _make_policy(criteria: list[PolicyCriterion]) -> PolicyDefinition: + return PolicyDefinition(policy_id="test", policy_name="Test", payer="Test", procedure_codes=["72148"], criteria=criteria) + + +def test_all_met_high_confidence(): + """All criteria MET with high confidence -> score >= 0.85, APPROVE.""" + criteria = [_make_criterion("c1", 0.3), _make_criterion("c2", 0.3), _make_criterion("c3", 0.4)] + policy = _make_policy(criteria) + evidence = [_make_evidence("c1", "MET", 0.9), _make_evidence("c2", "MET", 0.9), _make_evidence("c3", "MET", 0.9)] + result = calculate_confidence(evidence, policy) + assert result.score >= 0.85 + assert result.recommendation == "APPROVE" + + +def test_all_not_met_hits_floor(): + """All NOT_MET -> score = 0.05 (floor).""" + criteria = [_make_criterion("c1", 0.5, required=True), _make_criterion("c2", 0.5, required=True)] + policy = _make_policy(criteria) + evidence = [_make_evidence("c1", "NOT_MET", 0.9), _make_evidence("c2", "NOT_MET", 0.9)] + result = calculate_confidence(evidence, policy) + assert result.score == pytest.approx(0.05, abs=0.01) + + +def test_mixed_met_and_optional_not_met(): + """3 MET + 1 optional NOT_MET -> high score (only 10% weight NOT_MET).""" + criteria = [ + _make_criterion("c1", 0.3, required=True), + _make_criterion("c2", 0.3, required=True), + _make_criterion("c3", 0.3, required=True), + _make_criterion("c4", 0.1, required=False), + ] + policy = _make_policy(criteria) + evidence = [ + _make_evidence("c1", "MET"), _make_evidence("c2", "MET"), + _make_evidence("c3", "MET"), _make_evidence("c4", "NOT_MET"), + ] + result = calculate_confidence(evidence, policy) + assert 0.85 <= result.score <= 0.95 + + +def test_required_not_met_caps_score(): + """4 MET + 1 required NOT_MET -> score capped at 0.50.""" + criteria = [ + _make_criterion("c1", 0.2), _make_criterion("c2", 0.2), + _make_criterion("c3", 0.2), _make_criterion("c4", 0.2), + _make_criterion("c5", 0.2, required=True), + ] + policy = _make_policy(criteria) + evidence = [ + _make_evidence("c1", "MET"), _make_evidence("c2", "MET"), + _make_evidence("c3", "MET"), _make_evidence("c4", "MET"), + _make_evidence("c5", "NOT_MET"), + ] + result = calculate_confidence(evidence, policy) + assert result.score <= 0.50 + + +def test_multiple_required_not_met_stacks_penalty(): + """2 required NOT_MET -> lower cap than 1.""" + criteria = [ + _make_criterion("c1", 0.3, required=True), + _make_criterion("c2", 0.3, required=True), + _make_criterion("c3", 0.4), + ] + policy = _make_policy(criteria) + evidence_one = [_make_evidence("c1", "MET"), _make_evidence("c2", "NOT_MET"), _make_evidence("c3", "MET")] + evidence_two = [_make_evidence("c1", "NOT_MET"), _make_evidence("c2", "NOT_MET"), _make_evidence("c3", "MET")] + result_one = calculate_confidence(evidence_one, policy) + result_two = calculate_confidence(evidence_two, policy) + assert result_two.score < result_one.score + + +def test_unclear_contributes_half(): + """All UNCLEAR -> score around 0.50.""" + criteria = [_make_criterion("c1", 0.5), _make_criterion("c2", 0.5)] + policy = _make_policy(criteria) + evidence = [_make_evidence("c1", "UNCLEAR", 0.7), _make_evidence("c2", "UNCLEAR", 0.7)] + result = calculate_confidence(evidence, policy) + assert 0.40 <= result.score <= 0.60 + + +def test_bypass_treats_bypassed_as_met(): + """Criterion with bypasses=['c2'] MET -> c2 treated as MET.""" + criteria = [ + _make_criterion("c1", 0.5, bypasses=["c2"]), + _make_criterion("c2", 0.5, required=True), + ] + policy = _make_policy(criteria) + evidence = [_make_evidence("c1", "MET"), _make_evidence("c2", "NOT_MET")] + result = calculate_confidence(evidence, policy) + # c2 should be treated as MET because c1 (which bypasses c2) is MET + assert result.score >= 0.80 + + +def test_bypass_ignored_when_bypasser_not_met(): + """Bypass criterion NOT_MET -> bypassed criterion evaluated normally.""" + criteria = [ + _make_criterion("c1", 0.5, bypasses=["c2"]), + _make_criterion("c2", 0.5, required=True), + ] + policy = _make_policy(criteria) + evidence = [_make_evidence("c1", "NOT_MET"), _make_evidence("c2", "NOT_MET")] + result = calculate_confidence(evidence, policy) + assert result.score <= 0.50 + + +def test_recommendation_approve_threshold(): + """Score >= 0.80 -> APPROVE.""" + criteria = [_make_criterion("c1", 1.0)] + policy = _make_policy(criteria) + evidence = [_make_evidence("c1", "MET", 0.9)] + result = calculate_confidence(evidence, policy) + assert result.recommendation == "APPROVE" + + +def test_recommendation_manual_review_threshold(): + """Score in [0.50, 0.80) -> MANUAL_REVIEW.""" + criteria = [_make_criterion("c1", 0.5), _make_criterion("c2", 0.5)] + policy = _make_policy(criteria) + evidence = [_make_evidence("c1", "MET", 0.9), _make_evidence("c2", "UNCLEAR", 0.7)] + result = calculate_confidence(evidence, policy) + assert result.recommendation == "MANUAL_REVIEW" + + +def test_recommendation_need_info_threshold(): + """Score < 0.50 -> NEED_INFO.""" + criteria = [_make_criterion("c1", 0.5, required=True), _make_criterion("c2", 0.5)] + policy = _make_policy(criteria) + evidence = [_make_evidence("c1", "NOT_MET", 0.9), _make_evidence("c2", "NOT_MET", 0.9)] + result = calculate_confidence(evidence, policy) + assert result.recommendation == "NEED_INFO" + + +def test_score_floor_never_below_five_percent(): + """Extreme inputs -> min 0.05.""" + criteria = [_make_criterion("c1", 1.0, required=True)] + policy = _make_policy(criteria) + evidence = [_make_evidence("c1", "NOT_MET", 0.99)] + result = calculate_confidence(evidence, policy) + assert result.score >= 0.05 + + +def test_score_ceiling_never_above_one(): + """Perfect inputs -> max 1.0.""" + criteria = [_make_criterion("c1", 1.0)] + policy = _make_policy(criteria) + evidence = [_make_evidence("c1", "MET", 1.0)] + result = calculate_confidence(evidence, policy) + assert result.score <= 1.0 diff --git a/apps/intelligence/src/tests/test_evidence_extractor.py b/apps/intelligence/src/tests/test_evidence_extractor.py index 24e1003..5afb1f7 100644 --- a/apps/intelligence/src/tests/test_evidence_extractor.py +++ b/apps/intelligence/src/tests/test_evidence_extractor.py @@ -69,7 +69,7 @@ async def test_extract_evidence_confidence_score( with patch("src.reasoning.evidence_extractor.chat_completion", mock_llm): evidence = await extract_evidence(sample_bundle, sample_policy) - assert all(e.confidence == 0.80 for e in evidence) + assert all(e.confidence == 0.7 for e in evidence) # --- A1: evaluate_criterion tests --- @@ -91,7 +91,7 @@ async def test_evaluate_criterion_returns_met_evidence_item(): assert isinstance(result, EvidenceItem) assert result.criterion_id == "crit-1" assert result.status == "MET" - assert result.confidence == 0.8 + assert result.confidence == 0.7 @pytest.mark.asyncio @@ -108,7 +108,7 @@ async def test_evaluate_criterion_parses_not_met(): result = await evaluate_criterion(criterion, clinical_summary) assert result.status == "NOT_MET" - assert result.confidence == 0.8 + assert result.confidence == 0.7 @pytest.mark.asyncio @@ -230,3 +230,92 @@ def test_get_llm_semaphore_returns_singleton(): # Cleanup mod._llm_semaphore = None + + +# --- T006: Evidence extractor enhancement tests --- + +from src.models.policy import PolicyCriterion, PolicyDefinition + + +def _make_policy_def() -> PolicyDefinition: + return PolicyDefinition( + policy_id="test-lcd", + policy_name="Test LCD Policy", + payer="CMS Medicare", + procedure_codes=["72148"], + criteria=[ + PolicyCriterion( + id="crit-1", description="Test criterion 1", weight=0.5, + lcd_section="L34220 — Test Section", + ), + PolicyCriterion( + id="crit-2", description="Test criterion 2", weight=0.5, + lcd_section="L34220 — Another Section", + ), + ], + ) + + +@pytest.mark.asyncio +async def test_extract_evidence_accepts_policy_definition(): + """Pass PolicyDefinition instead of dict -> works.""" + policy = _make_policy_def() + bundle = ClinicalBundle( + patient_id="test", + patient=PatientInfo(name="Test"), + conditions=[Condition(code="M54.5", display="Low back pain")], + ) + mock_llm = AsyncMock(return_value="The criterion is MET based on clinical data.") + with patch("src.reasoning.evidence_extractor.chat_completion", mock_llm): + evidence = await extract_evidence(bundle, policy) + assert len(evidence) == 2 + assert evidence[0].criterion_id == "crit-1" + + +@pytest.mark.asyncio +async def test_evaluate_criterion_includes_lcd_section_in_prompt(): + """Mock LLM captures prompt, verify LCD section text present.""" + criterion = PolicyCriterion( + id="test", description="Test criterion", weight=0.5, + lcd_section="L34220 — Coverage Principle", + ) + captured_prompts = [] + + async def capture_llm(*args, **kwargs): + captured_prompts.append(kwargs.get("user_prompt", args[1] if len(args) > 1 else "")) + return "MET. HIGH CONFIDENCE. Evidence found." + + mock_llm = AsyncMock(side_effect=capture_llm) + with patch("src.reasoning.evidence_extractor.chat_completion", mock_llm): + await evaluate_criterion(criterion, "Clinical data here") + assert any("L34220" in p for p in captured_prompts) + + +@pytest.mark.asyncio +async def test_evaluate_criterion_confidence_parsing_high(): + """LLM response with 'HIGH CONFIDENCE' -> conf=0.9.""" + criterion = {"id": "test", "description": "Test"} + mock_llm = AsyncMock(return_value="MET. HIGH CONFIDENCE. Strong evidence.") + with patch("src.reasoning.evidence_extractor.chat_completion", mock_llm): + result = await evaluate_criterion(criterion, "data") + assert result.confidence == 0.9 + + +@pytest.mark.asyncio +async def test_evaluate_criterion_confidence_parsing_low(): + """LLM response with 'LOW CONFIDENCE' -> conf=0.5.""" + criterion = {"id": "test", "description": "Test"} + mock_llm = AsyncMock(return_value="UNCLEAR. LOW CONFIDENCE. Limited data.") + with patch("src.reasoning.evidence_extractor.chat_completion", mock_llm): + result = await evaluate_criterion(criterion, "data") + assert result.confidence == 0.5 + + +@pytest.mark.asyncio +async def test_evaluate_criterion_confidence_parsing_default(): + """No confidence signal -> conf=0.7.""" + criterion = {"id": "test", "description": "Test"} + mock_llm = AsyncMock(return_value="MET. Evidence found in records.") + with patch("src.reasoning.evidence_extractor.chat_completion", mock_llm): + result = await evaluate_criterion(criterion, "data") + assert result.confidence == 0.7 diff --git a/apps/intelligence/src/tests/test_form_generator.py b/apps/intelligence/src/tests/test_form_generator.py index 800ca7d..6aa4c6b 100644 --- a/apps/intelligence/src/tests/test_form_generator.py +++ b/apps/intelligence/src/tests/test_form_generator.py @@ -1,4 +1,4 @@ -"""Tests for form generator stub implementation.""" +"""Tests for form generator implementation.""" from datetime import date from unittest.mock import AsyncMock, patch @@ -7,6 +7,8 @@ from src.models.clinical_bundle import ClinicalBundle, Condition, PatientInfo from src.models.pa_form import EvidenceItem +from src.models.policy import PolicyCriterion, PolicyDefinition +from src.reasoning.confidence_scorer import ScoreResult from src.reasoning.form_generator import generate_form_data @@ -39,23 +41,32 @@ def sample_evidence() -> list[EvidenceItem]: @pytest.fixture -def sample_policy() -> dict: +def sample_policy() -> PolicyDefinition: """Create a sample policy.""" - return { - "id": "test-policy", - "procedure_codes": ["72148"], - } + return PolicyDefinition( + policy_id="test-policy", + policy_name="Test Policy", + payer="Test Payer", + procedure_codes=["72148"], + criteria=[ + PolicyCriterion(id="crit-1", description="Test criterion", weight=1.0), + ], + ) @pytest.mark.asyncio async def test_generate_form_data_returns_approve( sample_bundle: ClinicalBundle, sample_evidence: list[EvidenceItem], - sample_policy: dict, + sample_policy: PolicyDefinition, ) -> None: - """Stub should return APPROVE recommendation.""" + """Should return APPROVE recommendation via scorer.""" + mock_scorer = ScoreResult(score=0.9, recommendation="APPROVE") mock_llm = AsyncMock(return_value="Patient requires this procedure.") - with patch("src.reasoning.form_generator.chat_completion", mock_llm): + with ( + patch("src.reasoning.form_generator.calculate_confidence", return_value=mock_scorer), + patch("src.reasoning.form_generator.chat_completion", mock_llm), + ): result = await generate_form_data(sample_bundle, sample_evidence, sample_policy) assert result.recommendation == "APPROVE" @@ -66,10 +77,16 @@ async def test_generate_form_data_returns_approve( async def test_generate_form_data_extracts_patient_info( sample_bundle: ClinicalBundle, sample_evidence: list[EvidenceItem], - sample_policy: dict, + sample_policy: PolicyDefinition, ) -> None: - """Stub should extract patient information from bundle.""" - result = await generate_form_data(sample_bundle, sample_evidence, sample_policy) + """Should extract patient information from bundle.""" + mock_scorer = ScoreResult(score=0.85, recommendation="APPROVE") + mock_llm = AsyncMock(return_value="Summary.") + with ( + patch("src.reasoning.form_generator.calculate_confidence", return_value=mock_scorer), + patch("src.reasoning.form_generator.chat_completion", mock_llm), + ): + result = await generate_form_data(sample_bundle, sample_evidence, sample_policy) assert result.patient_name == "John Doe" assert result.patient_dob == "1980-05-15" @@ -80,10 +97,16 @@ async def test_generate_form_data_extracts_patient_info( async def test_generate_form_data_extracts_diagnosis( sample_bundle: ClinicalBundle, sample_evidence: list[EvidenceItem], - sample_policy: dict, + sample_policy: PolicyDefinition, ) -> None: - """Stub should extract diagnosis codes from bundle.""" - result = await generate_form_data(sample_bundle, sample_evidence, sample_policy) + """Should extract diagnosis codes from bundle.""" + mock_scorer = ScoreResult(score=0.85, recommendation="APPROVE") + mock_llm = AsyncMock(return_value="Summary.") + with ( + patch("src.reasoning.form_generator.calculate_confidence", return_value=mock_scorer), + patch("src.reasoning.form_generator.chat_completion", mock_llm), + ): + result = await generate_form_data(sample_bundle, sample_evidence, sample_policy) assert result.diagnosis_codes == ["M54.5"] @@ -92,22 +115,40 @@ async def test_generate_form_data_extracts_diagnosis( async def test_generate_form_data_uses_policy_procedure_code( sample_bundle: ClinicalBundle, sample_evidence: list[EvidenceItem], - sample_policy: dict, + sample_policy: PolicyDefinition, ) -> None: - """Stub should use procedure code from policy.""" - result = await generate_form_data(sample_bundle, sample_evidence, sample_policy) + """Should use procedure code from policy.""" + mock_scorer = ScoreResult(score=0.85, recommendation="APPROVE") + mock_llm = AsyncMock(return_value="Summary.") + with ( + patch("src.reasoning.form_generator.calculate_confidence", return_value=mock_scorer), + patch("src.reasoning.form_generator.chat_completion", mock_llm), + ): + result = await generate_form_data(sample_bundle, sample_evidence, sample_policy) assert result.procedure_code == "72148" @pytest.mark.asyncio async def test_generate_form_data_handles_missing_patient() -> None: - """Stub should handle missing patient data gracefully.""" + """Should handle missing patient data gracefully.""" bundle = ClinicalBundle(patient_id="test") evidence: list[EvidenceItem] = [] - policy: dict = {"procedure_codes": ["72148"]} + policy = PolicyDefinition( + policy_id="test-empty", + policy_name="Test Empty", + payer="Test", + procedure_codes=["72148"], + criteria=[], + ) - result = await generate_form_data(bundle, evidence, policy) + mock_scorer = ScoreResult(score=0.5, recommendation="MANUAL_REVIEW") + mock_llm = AsyncMock(return_value="Summary.") + with ( + patch("src.reasoning.form_generator.calculate_confidence", return_value=mock_scorer), + patch("src.reasoning.form_generator.chat_completion", mock_llm), + ): + result = await generate_form_data(bundle, evidence, policy) assert result.patient_name == "Unknown" assert result.patient_dob == "Unknown" @@ -116,11 +157,99 @@ async def test_generate_form_data_handles_missing_patient() -> None: @pytest.mark.asyncio async def test_generate_form_data_handles_empty_procedure_codes() -> None: - """Stub should use default procedure code when list is empty.""" + """Should use default procedure code when list is empty.""" bundle = ClinicalBundle(patient_id="test") evidence: list[EvidenceItem] = [] - policy: dict = {"procedure_codes": []} + policy = PolicyDefinition( + policy_id="test-no-codes", + policy_name="Test No Codes", + payer="Test", + procedure_codes=[], + criteria=[], + ) - result = await generate_form_data(bundle, evidence, policy) + mock_scorer = ScoreResult(score=0.5, recommendation="MANUAL_REVIEW") + mock_llm = AsyncMock(return_value="Summary.") + with ( + patch("src.reasoning.form_generator.calculate_confidence", return_value=mock_scorer), + patch("src.reasoning.form_generator.chat_completion", mock_llm), + ): + result = await generate_form_data(bundle, evidence, policy) assert result.procedure_code == "72148" + + +@pytest.mark.asyncio +async def test_generate_form_data_delegates_to_scorer( + sample_bundle: ClinicalBundle, + sample_evidence: list[EvidenceItem], + sample_policy: PolicyDefinition, +) -> None: + """Mock confidence_scorer, verify it's called.""" + mock_scorer = ScoreResult(score=0.72, recommendation="MANUAL_REVIEW") + mock_llm = AsyncMock(return_value="Summary.") + with ( + patch("src.reasoning.form_generator.calculate_confidence", return_value=mock_scorer) as mock_calc, + patch("src.reasoning.form_generator.chat_completion", mock_llm), + ): + result = await generate_form_data(sample_bundle, sample_evidence, sample_policy) + mock_calc.assert_called_once() + + +@pytest.mark.asyncio +async def test_generate_form_data_uses_scorer_recommendation( + sample_bundle: ClinicalBundle, + sample_evidence: list[EvidenceItem], + sample_policy: PolicyDefinition, +) -> None: + """Scorer returns MANUAL_REVIEW -> response has MANUAL_REVIEW.""" + mock_scorer = ScoreResult(score=0.72, recommendation="MANUAL_REVIEW") + mock_llm = AsyncMock(return_value="Summary.") + with ( + patch("src.reasoning.form_generator.calculate_confidence", return_value=mock_scorer), + patch("src.reasoning.form_generator.chat_completion", mock_llm), + ): + result = await generate_form_data(sample_bundle, sample_evidence, sample_policy) + assert result.recommendation == "MANUAL_REVIEW" + + +@pytest.mark.asyncio +async def test_generate_form_data_uses_scorer_confidence( + sample_bundle: ClinicalBundle, + sample_evidence: list[EvidenceItem], + sample_policy: PolicyDefinition, +) -> None: + """Scorer returns 0.72 -> response.confidence_score == 0.72.""" + mock_scorer = ScoreResult(score=0.72, recommendation="MANUAL_REVIEW") + mock_llm = AsyncMock(return_value="Summary.") + with ( + patch("src.reasoning.form_generator.calculate_confidence", return_value=mock_scorer), + patch("src.reasoning.form_generator.chat_completion", mock_llm), + ): + result = await generate_form_data(sample_bundle, sample_evidence, sample_policy) + assert result.confidence_score == 0.72 + + +@pytest.mark.asyncio +async def test_generate_form_data_includes_policy_metadata( + sample_bundle: ClinicalBundle, + sample_evidence: list[EvidenceItem], +) -> None: + """Response has policy_id + lcd_reference from policy.""" + policy = PolicyDefinition( + policy_id="lcd-test-L12345", + policy_name="Test LCD", + lcd_reference="L12345", + payer="CMS", + procedure_codes=["72148"], + criteria=[PolicyCriterion(id="c1", description="Test", weight=1.0)], + ) + mock_scorer = ScoreResult(score=0.85, recommendation="APPROVE") + mock_llm = AsyncMock(return_value="Summary.") + with ( + patch("src.reasoning.form_generator.calculate_confidence", return_value=mock_scorer), + patch("src.reasoning.form_generator.chat_completion", mock_llm), + ): + result = await generate_form_data(sample_bundle, sample_evidence, policy) + assert result.policy_id == "lcd-test-L12345" + assert result.lcd_reference == "L12345" diff --git a/apps/intelligence/src/tests/test_generic_policy.py b/apps/intelligence/src/tests/test_generic_policy.py new file mode 100644 index 0000000..edaffda --- /dev/null +++ b/apps/intelligence/src/tests/test_generic_policy.py @@ -0,0 +1,43 @@ +"""Tests for generic fallback policy builder.""" +import pytest +from src.models.policy import PolicyDefinition +from src.policies.generic_policy import build_generic_policy + + +def test_build_generic_policy_returns_policy_definition(): + """Returns a PolicyDefinition instance.""" + result = build_generic_policy("99999") + assert isinstance(result, PolicyDefinition) + + +def test_generic_policy_has_three_criteria(): + """Generic policy has 3 universal criteria.""" + result = build_generic_policy("99999") + assert len(result.criteria) == 3 + ids = {c.id for c in result.criteria} + assert ids == {"medical_necessity", "diagnosis_present", "conservative_therapy"} + + +def test_generic_policy_weights_sum_to_one(): + """Weights sum approximately to 1.0.""" + result = build_generic_policy("99999") + total = sum(c.weight for c in result.criteria) + assert total == pytest.approx(1.0, abs=0.01) + + +def test_generic_policy_no_lcd_reference(): + """Generic policy has no LCD reference.""" + result = build_generic_policy("99999") + assert result.lcd_reference is None + + +def test_generic_policy_includes_procedure_code(): + """Passed CPT code appears in procedure_codes.""" + result = build_generic_policy("12345") + assert "12345" in result.procedure_codes + + +def test_generic_policy_payer_is_general(): + """Payer field is set to a generic value.""" + result = build_generic_policy("99999") + assert "general" in result.payer.lower() or "generic" in result.payer.lower() diff --git a/apps/intelligence/src/tests/test_pa_form_model.py b/apps/intelligence/src/tests/test_pa_form_model.py new file mode 100644 index 0000000..9658b43 --- /dev/null +++ b/apps/intelligence/src/tests/test_pa_form_model.py @@ -0,0 +1,60 @@ +"""Tests for PAFormResponse model update.""" +import pytest +from src.models.pa_form import PAFormResponse + + +def test_pa_form_response_backward_compat(): + """Construct without new fields -> defaults to None.""" + resp = PAFormResponse( + patient_name="Test", patient_dob="2000-01-01", member_id="M001", + diagnosis_codes=["M54.5"], procedure_code="72148", + clinical_summary="Summary", supporting_evidence=[], + recommendation="APPROVE", confidence_score=0.9, + field_mappings={"PatientName": "Test"}, + ) + assert resp.policy_id is None + assert resp.lcd_reference is None + + +def test_pa_form_response_with_policy_metadata(): + """Construct with policy_id + lcd_reference -> present.""" + resp = PAFormResponse( + patient_name="Test", patient_dob="2000-01-01", member_id="M001", + diagnosis_codes=["M54.5"], procedure_code="72148", + clinical_summary="Summary", supporting_evidence=[], + recommendation="APPROVE", confidence_score=0.9, + field_mappings={"PatientName": "Test"}, + policy_id="lcd-mri-lumbar-L34220", + lcd_reference="L34220", + ) + assert resp.policy_id == "lcd-mri-lumbar-L34220" + assert resp.lcd_reference == "L34220" + + +def test_pa_form_response_serialization_includes_new_fields(): + """model_dump() includes policy_id and lcd_reference.""" + resp = PAFormResponse( + patient_name="Test", patient_dob="2000-01-01", member_id="M001", + diagnosis_codes=["M54.5"], procedure_code="72148", + clinical_summary="Summary", supporting_evidence=[], + recommendation="APPROVE", confidence_score=0.9, + field_mappings={}, + policy_id="test", lcd_reference="L12345", + ) + data = resp.model_dump() + assert data["policy_id"] == "test" + assert data["lcd_reference"] == "L12345" + + +def test_pa_form_response_serialization_omits_none(): + """With exclude_none, absent fields omitted.""" + resp = PAFormResponse( + patient_name="Test", patient_dob="2000-01-01", member_id="M001", + diagnosis_codes=["M54.5"], procedure_code="72148", + clinical_summary="Summary", supporting_evidence=[], + recommendation="APPROVE", confidence_score=0.9, + field_mappings={}, + ) + data = resp.model_dump(exclude_none=True) + assert "policy_id" not in data + assert "lcd_reference" not in data diff --git a/apps/intelligence/src/tests/test_policy_model.py b/apps/intelligence/src/tests/test_policy_model.py new file mode 100644 index 0000000..c2fdb93 --- /dev/null +++ b/apps/intelligence/src/tests/test_policy_model.py @@ -0,0 +1,75 @@ +"""Tests for policy data models.""" +import pytest +from src.models.policy import PolicyCriterion, PolicyDefinition + + +def test_policy_criterion_valid(): + """Construct PolicyCriterion with all fields.""" + c = PolicyCriterion( + id="conservative_therapy", + description="6+ weeks conservative therapy", + weight=0.30, + required=True, + lcd_section="L34220 §4.2", + bypasses=[], + ) + assert c.id == "conservative_therapy" + assert c.weight == 0.30 + assert c.required is True + assert c.lcd_section == "L34220 §4.2" + + +def test_policy_criterion_defaults(): + """Required=False, lcd_section=None, bypasses=[] by default.""" + c = PolicyCriterion(id="test", description="Test", weight=0.5) + assert c.required is False + assert c.lcd_section is None + assert c.bypasses == [] + + +def test_policy_definition_valid(): + """Construct PolicyDefinition with criteria list.""" + criteria = [ + PolicyCriterion(id="c1", description="Criterion 1", weight=0.6), + PolicyCriterion(id="c2", description="Criterion 2", weight=0.4), + ] + p = PolicyDefinition( + policy_id="lcd-test", + policy_name="Test Policy", + payer="CMS Medicare", + procedure_codes=["72148"], + diagnosis_codes=["M54.5"], + criteria=criteria, + ) + assert p.policy_id == "lcd-test" + assert len(p.criteria) == 2 + assert p.lcd_reference is None + + +def test_policy_criterion_bypasses_field(): + """Verify bypasses list works.""" + c = PolicyCriterion( + id="red_flag", + description="Red flag symptoms", + weight=0.25, + bypasses=["conservative_therapy_4wk"], + ) + assert c.bypasses == ["conservative_therapy_4wk"] + + +def test_policy_definition_with_lcd_metadata(): + """PolicyDefinition with LCD metadata fields populated.""" + p = PolicyDefinition( + policy_id="lcd-mri-lumbar-L34220", + policy_name="MRI Lumbar Spine", + lcd_reference="L34220", + lcd_title="Lumbar MRI", + lcd_contractor="Noridian Healthcare Solutions", + payer="CMS Medicare", + procedure_codes=["72148", "72149"], + diagnosis_codes=["M54.5"], + criteria=[], + ) + assert p.lcd_reference == "L34220" + assert p.lcd_title == "Lumbar MRI" + assert p.lcd_contractor == "Noridian Healthcare Solutions" diff --git a/apps/intelligence/src/tests/test_policy_registry.py b/apps/intelligence/src/tests/test_policy_registry.py new file mode 100644 index 0000000..36037dc --- /dev/null +++ b/apps/intelligence/src/tests/test_policy_registry.py @@ -0,0 +1,74 @@ +"""Tests for policy registry.""" +import pytest +from src.models.policy import PolicyDefinition +from src.policies.registry import PolicyRegistry, registry + + +def test_register_and_resolve_known_cpt(): + """Register policy with CPT, resolve returns same policy.""" + r = PolicyRegistry() + policy = PolicyDefinition( + policy_id="test", policy_name="Test", payer="Test", + procedure_codes=["72148"], criteria=[] + ) + r.register(policy) + assert r.resolve("72148") is policy + + +def test_resolve_unknown_cpt_returns_generic(): + """Unregistered CPT returns generic fallback.""" + r = PolicyRegistry() + result = r.resolve("99999") + assert result.lcd_reference is None + assert "99999" in result.procedure_codes + + +def test_register_multi_cpt_policy(): + """Policy with 3 CPTs, all 3 resolve to it.""" + r = PolicyRegistry() + policy = PolicyDefinition( + policy_id="multi", policy_name="Multi", payer="Test", + procedure_codes=["72148", "72149", "72158"], criteria=[] + ) + r.register(policy) + assert r.resolve("72148") is policy + assert r.resolve("72149") is policy + assert r.resolve("72158") is policy + + +def test_seed_policies_registered_on_import(): + """Module-level registry has pre-registered seed policies.""" + # 72148 is MRI Lumbar CPT + result = registry.resolve("72148") + assert result.lcd_reference is not None + + +def test_all_seed_cpts_resolve_to_lcd_policy(): + """All 14 seed CPT codes resolve to LCD-backed policies.""" + seed_cpts = [ + "72148", "72149", "72158", # MRI Lumbar + "70551", "70552", "70553", # MRI Brain + "27447", # TKA + "97161", "97162", "97163", # Physical Therapy + "62322", "62323", # Epidural Steroid + ] + for cpt in seed_cpts: + result = registry.resolve(cpt) + assert result.lcd_reference is not None, f"CPT {cpt} should have LCD reference" + + +def test_seed_policy_lcd_references_populated(): + """All seed policies have non-null lcd_reference.""" + seed_cpts = ["72148", "70551", "27447", "97161", "62322"] + for cpt in seed_cpts: + result = registry.resolve(cpt) + assert result.lcd_reference is not None + + +def test_seed_policy_weights_sum_approximately_one(): + """All seed policies have weights summing to ~1.0.""" + seed_cpts = ["72148", "70551", "27447", "97161", "62322"] + for cpt in seed_cpts: + policy = registry.resolve(cpt) + total = sum(c.weight for c in policy.criteria) + assert total == pytest.approx(1.0, abs=0.01), f"Policy {policy.policy_id}: weights sum to {total}" diff --git a/apps/intelligence/src/tests/test_seed_policies.py b/apps/intelligence/src/tests/test_seed_policies.py new file mode 100644 index 0000000..2915cdf --- /dev/null +++ b/apps/intelligence/src/tests/test_seed_policies.py @@ -0,0 +1,54 @@ +"""Tests for LCD-backed seed policies.""" +import pytest +from src.policies.seed.mri_lumbar import POLICY as MRI_LUMBAR +from src.policies.seed.mri_brain import POLICY as MRI_BRAIN +from src.policies.seed.tka import POLICY as TKA +from src.policies.seed.physical_therapy import POLICY as PHYSICAL_THERAPY +from src.policies.seed.epidural_steroid import POLICY as EPIDURAL_STEROID + + +ALL_POLICIES = [MRI_LUMBAR, MRI_BRAIN, TKA, PHYSICAL_THERAPY, EPIDURAL_STEROID] + + +def test_mri_lumbar_lcd_reference(): + assert MRI_LUMBAR.lcd_reference == "L34220" + +def test_mri_lumbar_has_five_criteria(): + assert len(MRI_LUMBAR.criteria) == 5 + +def test_mri_lumbar_conservative_therapy_bypass(): + """red_flag_screening bypasses conservative_therapy_4wk.""" + red_flag = next(c for c in MRI_LUMBAR.criteria if c.id == "red_flag_screening") + assert "conservative_therapy_4wk" in red_flag.bypasses + +def test_mri_brain_lcd_reference(): + assert MRI_BRAIN.lcd_reference == "L37373" + +def test_mri_brain_has_four_criteria(): + assert len(MRI_BRAIN.criteria) == 4 + +def test_tka_lcd_reference(): + assert TKA.lcd_reference == "L36575" + +def test_tka_has_five_criteria(): + assert len(TKA.criteria) == 5 + +def test_physical_therapy_lcd_reference(): + assert PHYSICAL_THERAPY.lcd_reference == "L34049" + +def test_epidural_lcd_reference(): + assert EPIDURAL_STEROID.lcd_reference == "L39240" + +@pytest.mark.parametrize("policy", ALL_POLICIES, ids=lambda p: p.policy_id) +def test_all_seed_weights_valid(policy): + """All weights in [0,1] and sum to ~1.0.""" + total = sum(c.weight for c in policy.criteria) + assert total == pytest.approx(1.0, abs=0.01) + for c in policy.criteria: + assert 0.0 <= c.weight <= 1.0 + +@pytest.mark.parametrize("policy", ALL_POLICIES, ids=lambda p: p.policy_id) +def test_all_seed_criteria_have_lcd_sections(policy): + """All criteria have non-null lcd_section.""" + for c in policy.criteria: + assert c.lcd_section is not None, f"{policy.policy_id}.{c.id} missing lcd_section"