From 862b373f963d112abe51fb636302a7dc1aead2e2 Mon Sep 17 00:00:00 2001 From: Spyros Date: Wed, 11 Feb 2026 10:07:33 +0000 Subject: [PATCH 1/2] improve backoff-retry for embeddings calc --- .../prompt_client/vertex_prompt_client.py | 69 +++++++++++++------ 1 file changed, 49 insertions(+), 20 deletions(-) diff --git a/application/prompt_client/vertex_prompt_client.py b/application/prompt_client/vertex_prompt_client.py index fdd082b0..08a698cc 100644 --- a/application/prompt_client/vertex_prompt_client.py +++ b/application/prompt_client/vertex_prompt_client.py @@ -60,32 +60,61 @@ def get_model_name(self) -> str: """Return the model name being used.""" return self.model_name - def get_text_embeddings(self, text: str) -> List[float]: - """Text embedding with a Large Language Model.""" + def get_text_embeddings(self, text: str, max_retries: int = 3) -> List[float]: + """Text embedding with a Large Language Model. + Args: + text: Text to generate embeddings for + max_retries: Maximum number of retry attempts for transient errors + + Returns: + List of embedding values, or None if embedding generation failed + """ if len(text) > 8000: logger.info( f"embedding content is more than the vertex hard limit of 8k tokens, reducing to 8000" ) text = text[:8000] - values = [] - try: - result = self.client.models.embed_content( - model="models/gemini-embedding-001", - contents=text, - config=types.EmbedContentConfig(task_type="SEMANTIC_SIMILARITY"), - ) - if not result: - return None - values = result.embeddings[0].values - except genai.errors.ClientError as e: - logger.info(f"hit limit, sleeping for a minute, error was: {repr(e)}") - time.sleep( - 60 - ) # Vertex's quota is per minute, so sleep for a full minute, then try again - values = self.get_text_embeddings(text) - - return values + + for attempt in range(max_retries): + try: + result = self.client.models.embed_content( + model="models/gemini-embedding-001", + contents=text, + config=types.EmbedContentConfig(task_type="SEMANTIC_SIMILARITY"), + ) + if not result: + logger.warning("Embedding API returned empty result") + return None + return result.embeddings[0].values + + except genai.errors.ClientError as e: + error_str = str(e) + # Check if this is a quota/rate limit error (429) + is_quota_error = "429" in error_str or "RESOURCE_EXHAUSTED" in error_str or "quota" in error_str.lower() + + if not is_quota_error: + # Non-quota errors should not be retried + logger.error(f"Non-retryable error from embedding API: {repr(e)}") + return None + + if attempt < max_retries - 1: + # Exponential backoff: 60s, 120s, 180s + backoff_seconds = 60 * (attempt + 1) + logger.warning( + f"Quota/rate limit hit (attempt {attempt + 1}/{max_retries}), " + f"sleeping {backoff_seconds}s before retry. Error: {repr(e)}" + ) + time.sleep(backoff_seconds) + else: + # Final attempt failed + logger.error( + f"Embedding API quota exhausted after {max_retries} attempts. " + f"Last error: {repr(e)}. Please check your API quota/billing in AI Studio." + ) + return None + + return None def create_chat_completion(self, prompt, closest_object_str) -> str: msg = ( From 3f092715525327597ddee10545dfb974fee5d774 Mon Sep 17 00:00:00 2001 From: Spyros Date: Wed, 11 Feb 2026 10:36:21 +0000 Subject: [PATCH 2/2] add code to detect heroku and only return pre-calculated ga --- .../frontend/src/pages/Explorer/explorer.tsx | 362 +++---- .../Explorer/visuals/circles/circles.tsx | 906 +++++++++--------- .../frontend/src/pages/chatbot/chatbot.tsx | 5 +- .../prompt_client/vertex_prompt_client.py | 6 +- application/utils/gap_analysis.py | 63 +- application/web/web_main.py | 38 + 6 files changed, 739 insertions(+), 641 deletions(-) diff --git a/application/frontend/src/pages/Explorer/explorer.tsx b/application/frontend/src/pages/Explorer/explorer.tsx index c85fc3b3..c3e78e3a 100644 --- a/application/frontend/src/pages/Explorer/explorer.tsx +++ b/application/frontend/src/pages/Explorer/explorer.tsx @@ -1,181 +1,181 @@ -import './explorer.scss'; - -import React, { useEffect, useState } from 'react'; -import { Link } from 'react-router-dom'; -import { List } from 'semantic-ui-react'; - -import { LoadingAndErrorIndicator } from '../../components/LoadingAndErrorIndicator'; -import { TYPE_CONTAINS, TYPE_LINKED_TO } from '../../const'; -import { useDataStore } from '../../providers/DataProvider'; -import { LinkedTreeDocument, TreeDocument } from '../../types'; -import { getDocumentDisplayName } from '../../utils'; -import { getInternalUrl } from '../../utils/document'; -import { LinkedStandards } from './LinkedStandards'; - -export const Explorer = () => { - const { dataLoading, dataTree } = useDataStore(); - const [loading, setLoading] = useState(false); - const [filter, setFilter] = useState(''); - const [filteredTree, setFilteredTree] = useState(); - const applyHighlight = (text, term) => { - if (!term) return text; - let index = text.toLowerCase().indexOf(term); - if (index >= 0) { - return ( - <> - {text.substring(0, index)} - {text.substring(index, index + term.length)} - {text.substring(index + term.length)} - - ); - } - return text; - }; - - const filterFunc = (doc: TreeDocument, term: string) => - doc?.displayName?.toLowerCase().includes(term) || doc?.name?.toLowerCase().includes(term); - - const recursiveFilter = (doc: TreeDocument, term: string) => { - if (doc.links) { - const filteredLinks: LinkedTreeDocument[] = []; - doc.links.forEach((x) => { - const filteredDoc = recursiveFilter(x.document, term); - if (filterFunc(x.document, term) || filteredDoc) { - filteredLinks.push({ ltype: x.ltype, document: filteredDoc || x.document }); - } - }); - doc.links = filteredLinks; - } - - if (filterFunc(doc, term) || doc.links?.length) { - return doc; // Return the document if it or any of its children (links or standards) matches the term - } - return null; // Return null if the document and its descendants do not match the term - }; - - //accordion - const [collapsedItems, setCollapsedItems] = useState([]); - const isCollapsed = (id: string) => collapsedItems.includes(id); - const toggleItem = (id: string) => { - if (collapsedItems.includes(id)) { - setCollapsedItems(collapsedItems.filter((itemId) => itemId !== id)); - } else { - setCollapsedItems([...collapsedItems, id]); - } - }; - - useEffect(() => { - if (dataTree.length) { - const treeCopy = structuredClone(dataTree); - const filTree: TreeDocument[] = []; - treeCopy - .map((x) => recursiveFilter(x, filter)) - .forEach((x) => { - if (x) { - filTree.push(x); - } - }); - setFilteredTree(filTree); - } - }, [filter, dataTree, setFilteredTree]); - - useEffect(() => { - setLoading(dataLoading); - }, [dataLoading]); - - function processNode(item) { - if (!item) { - return <>; - } - item.displayName = item.displayName ?? getDocumentDisplayName(item); - item.url = item.url ?? getInternalUrl(item); - item.links = item.links ?? []; - - const contains = item.links.filter((x) => x.ltype === TYPE_CONTAINS); - const linkedTo = item.links.filter((x) => x.ltype === TYPE_LINKED_TO); - - const creCode = item.id; - const creName = item.displayName.split(' : ').pop(); - return ( - - - - {contains.length > 0 && ( -
toggleItem(item.id)} - > - -
- )} - - {applyHighlight(creCode, filter)}: - {applyHighlight(creName, filter)} - -
- - {contains.length > 0 && !isCollapsed(item.id) && ( - {contains.map((child) => processNode(child.document))} - )} -
-
- ); - } - - function update(event) { - setFilter(event.target.value.toLowerCase()); - } - - return ( - <> -
-

Open CRE Explorer

-

- A visual explorer of Open Common Requirement Enumerations (CREs). Originally created by:{' '} - - Zeljko Obrenovic - - . -

- - - - - {filteredTree?.map((item) => { - return processNode(item); - })} - -
- - ); -}; +import './explorer.scss'; + +import React, { useEffect, useState } from 'react'; +import { Link } from 'react-router-dom'; +import { List } from 'semantic-ui-react'; + +import { LoadingAndErrorIndicator } from '../../components/LoadingAndErrorIndicator'; +import { TYPE_CONTAINS, TYPE_LINKED_TO } from '../../const'; +import { useDataStore } from '../../providers/DataProvider'; +import { LinkedTreeDocument, TreeDocument } from '../../types'; +import { getDocumentDisplayName } from '../../utils'; +import { getInternalUrl } from '../../utils/document'; +import { LinkedStandards } from './LinkedStandards'; + +export const Explorer = () => { + const { dataLoading, dataTree } = useDataStore(); + const [loading, setLoading] = useState(false); + const [filter, setFilter] = useState(''); + const [filteredTree, setFilteredTree] = useState(); + const applyHighlight = (text, term) => { + if (!term) return text; + let index = text.toLowerCase().indexOf(term); + if (index >= 0) { + return ( + <> + {text.substring(0, index)} + {text.substring(index, index + term.length)} + {text.substring(index + term.length)} + + ); + } + return text; + }; + + const filterFunc = (doc: TreeDocument, term: string) => + doc?.displayName?.toLowerCase().includes(term) || doc?.name?.toLowerCase().includes(term); + + const recursiveFilter = (doc: TreeDocument, term: string) => { + if (doc.links) { + const filteredLinks: LinkedTreeDocument[] = []; + doc.links.forEach((x) => { + const filteredDoc = recursiveFilter(x.document, term); + if (filterFunc(x.document, term) || filteredDoc) { + filteredLinks.push({ ltype: x.ltype, document: filteredDoc || x.document }); + } + }); + doc.links = filteredLinks; + } + + if (filterFunc(doc, term) || doc.links?.length) { + return doc; // Return the document if it or any of its children (links or standards) matches the term + } + return null; // Return null if the document and its descendants do not match the term + }; + + //accordion + const [collapsedItems, setCollapsedItems] = useState([]); + const isCollapsed = (id: string) => collapsedItems.includes(id); + const toggleItem = (id: string) => { + if (collapsedItems.includes(id)) { + setCollapsedItems(collapsedItems.filter((itemId) => itemId !== id)); + } else { + setCollapsedItems([...collapsedItems, id]); + } + }; + + useEffect(() => { + if (dataTree.length) { + const treeCopy = structuredClone(dataTree); + const filTree: TreeDocument[] = []; + treeCopy + .map((x) => recursiveFilter(x, filter)) + .forEach((x) => { + if (x) { + filTree.push(x); + } + }); + setFilteredTree(filTree); + } + }, [filter, dataTree, setFilteredTree]); + + useEffect(() => { + setLoading(dataLoading); + }, [dataLoading]); + + function processNode(item) { + if (!item) { + return <>; + } + item.displayName = item.displayName ?? getDocumentDisplayName(item); + item.url = item.url ?? getInternalUrl(item); + item.links = item.links ?? []; + + const contains = item.links.filter((x) => x.ltype === TYPE_CONTAINS); + const linkedTo = item.links.filter((x) => x.ltype === TYPE_LINKED_TO); + + const creCode = item.id; + const creName = item.displayName.split(' : ').pop(); + return ( + + + + {contains.length > 0 && ( +
toggleItem(item.id)} + > + +
+ )} + + {applyHighlight(creCode, filter)}: + {applyHighlight(creName, filter)} + +
+ + {contains.length > 0 && !isCollapsed(item.id) && ( + {contains.map((child) => processNode(child.document))} + )} +
+
+ ); + } + + function update(event) { + setFilter(event.target.value.toLowerCase()); + } + + return ( + <> +
+

Open CRE Explorer

+

+ A visual explorer of Open Common Requirement Enumerations (CREs). Originally created by:{' '} + + Zeljko Obrenovic + + . +

+ + + + + {filteredTree?.map((item) => { + return processNode(item); + })} + +
+ + ); +}; diff --git a/application/frontend/src/pages/Explorer/visuals/circles/circles.tsx b/application/frontend/src/pages/Explorer/visuals/circles/circles.tsx index 8083b9da..da3618d9 100644 --- a/application/frontend/src/pages/Explorer/visuals/circles/circles.tsx +++ b/application/frontend/src/pages/Explorer/visuals/circles/circles.tsx @@ -1,453 +1,453 @@ -import './circles.scss'; - -import { LoadingAndErrorIndicator } from 'application/frontend/src/components/LoadingAndErrorIndicator'; -import useWindowDimensions from 'application/frontend/src/hooks/useWindowDimensions'; -import { useDataStore } from 'application/frontend/src/providers/DataProvider'; -import * as d3 from 'd3'; -import React, { useEffect, useState } from 'react'; -import { Button, Icon } from 'semantic-ui-react'; - -export const ExplorerCircles = () => { - const { height, width } = useWindowDimensions(); - const [useFullScreen, setUseFullScreen] = useState(false); - const { dataLoading, dataTree } = useDataStore(); - const [breadcrumb, setBreadcrumb] = useState([]); - const svgRef = React.useRef(null); - - const rootRef = React.useRef(null); - const zoomRef = React.useRef(null); - const updateBreadcrumbRef = React.useRef(null); - const viewRef = React.useRef(null); - const zoomToRef = React.useRef(null); - const margin = 20; - - const defaultSize = width > height ? height - 100 : width; - const size = useFullScreen ? width : defaultSize; - - useEffect(() => { - if (!svgRef.current) { - // guard to ensure the element exists - return; - } - var svg = d3.select(svgRef.current); - svg.selectAll('*').remove(); - - var diameter = size, - g = svg.append('g').attr('transform', 'translate(' + diameter / 2 + ',' + diameter / 2 + ')'); - - var color = d3 - .scaleLinear([-1, 5], ['hsl(152,80%,80%)', 'hsl(228,30%,40%)']) - .interpolate(d3.interpolateHcl); - - var pack = d3 - .pack() - .size([diameter - margin, diameter - margin]) - .padding(2); - - const populateChildren = (node) => { - node.children = []; - if (node.links) { - node.children = node.links.filter((x) => x.document && x.ltype !== 'Related').map((x) => x.document); - } - node.children.forEach((x) => populateChildren(x)); - node.children.forEach((x) => { - if (x.children.length === 0) x.size = 1; - }); - }; - - const dataTreeClone = structuredClone(dataTree); - dataTreeClone.forEach((node) => populateChildren(node)); - - let root: any = { - displayName: 'OpenCRE', - children: dataTreeClone, - }; - - root = d3 - .hierarchy(root) - .sum(function (d: any) { - return d.size; - }) - .sort(function (a: any, b: any) { - return b.value - a.value; - }); - - var focus: any = root, - nodes = pack(root).descendants(), - view; - - // Create tooltip div for hover labels - const tooltip = d3 - .select('body') - .append('div') - .attr('class', 'circle-tooltip') - .style('position', 'absolute') - .style('visibility', 'hidden') - .style('background-color', 'white') - .style('padding', '5px') - .style('border-radius', '3px') - .style('border', '1px solid #ccc') - .style('pointer-events', 'none') - .style('z-index', '10'); - - // Update breadcrumb when focus changes - const updateBreadcrumb = (d: any) => { - if (d === root) { - setBreadcrumb(['OpenCRE']); - return; - } - - let path: string[] = []; - let current = d; - - while (current && current !== root) { - if (current.data.displayName && current.data.displayName !== 'OpenCRE') { - // Remove "CRE: " prefix if it exists - const displayName = current.data.displayName.replace(/^CRE: /, ''); - path.unshift(displayName); - } - current = current.parent; - } - path.unshift('OpenCRE'); - setBreadcrumb(path); - }; - - var circle = g - .selectAll('circle') - .data(nodes) - .enter() - .append('circle') - .attr('class', function (d) { - return d.parent ? (d.children ? 'node' : 'node node--leaf') : 'node node--root'; - }) - .style('fill', function (d: any) { - return d.children ? color(d.depth) : d.data.color ? d.data.color : null; - }) - .style('cursor', function (d) { - // Show the pointer cursor only if it's a leaf node AND has a hyperlink property. - if (!d.children && (d.data as { hyperlink?: string }).hyperlink) { - return 'pointer'; - } - return 'default'; - }) - - .on('mouseover', function (event, d: any) { - // Prefer displayName, fallback to id - const label = d.data.displayName - ? d.data.displayName.replace(/^CRE: /, '') - : d.data.id - ? d.data.id - : ''; - - if (label) { - tooltip - .html(label) - .style('visibility', 'visible') - .style('top', event.pageY - 10 + 'px') - .style('left', event.pageX + 10 + 'px'); - } - }) - .on('mousemove', function (event) { - tooltip.style('top', event.pageY - 10 + 'px').style('left', event.pageX + 10 + 'px'); - }) - .on('mouseout', function () { - tooltip.style('visibility', 'hidden'); - }) - - .on('click', function (event, d: any) { - if (!d.children) { - event.stopPropagation(); - - // Directly access the hyperlink property from the node's data. - const url = d.data.hyperlink; - - // If the url exists, open it in a new tab. - if (url) { - console.log('URL found:', url); - window.open(url, '_blank'); - } else { - console.log('This leaf node does not have a hyperlink.'); - } - } else if (focus !== d) { - updateBreadcrumb(d); - zoom(event, d); - event.stopPropagation(); - } - }); - let showLabels = true; - - // Filter the nodes to only include those that have children (i.e., are not leaves) - const parentNodes = nodes.filter(function (d) { - return d.children; - }); - - // Create a group for the label components using ONLY the parent nodes - var labelGroup = g - .selectAll('.label-group') - .data(parentNodes) // Use the filtered data - .enter() - .append('g') - .attr('class', 'label-group') - .style('opacity', function (d: any) { - return d.parent === focus ? 1 : 0; - }) - .style('display', function (d: any) { - return d.parent === focus ? 'inline' : 'none'; - }); - - // Add the underlined text to the group - labelGroup - .append('text') - .attr('class', 'label') - .style('text-anchor', 'middle') - .style('text-decoration', 'underline') - .text(function (d: any) { - if (!d.data.displayName) return ''; - let name = d.data.displayName; - name = name.replace(/^CRE\s*:\s*\d+-\d+\s*:\s*/, ''); - return name; - }); - - // Add the downward-pointing tick line to the group - labelGroup - .append('line') - .attr('class', 'label-tick') - .style('stroke', 'black') - .style('stroke-width', 1) - .attr('x1', 0) - .attr('y1', 2) - .attr('x2', 0) - .attr('y2', 8); - - svg.style('background', color(-1)).on('click', function (event) { - updateBreadcrumb(root); - zoom(event, root); - }); - - zoomTo([root.x, root.y, root.r * 2 + margin]); - setBreadcrumb(['OpenCRE']); - - function zoom(event: any, d: any) { - var focus0 = focus; - focus = d; - - var transition = d3 - .transition() - .duration(event.altKey ? 7500 : 750) - .tween('zoom', function () { - var i = d3.interpolateZoom(view, [focus.x, focus.y, focus.r * 2 + margin]); - return function (t) { - zoomTo(i(t)); - }; - }); - - if (showLabels) { - transition - .selectAll('.label-group') - .filter(function (d: any) { - const el = this as HTMLElement; - return (d && d.parent === focus) || el.style.display === 'inline'; - }) - .style('opacity', function (d: any) { - return d && d.parent === focus ? 1 : 0; - }) - .on('start', function (d: any) { - const el = this as HTMLElement; - if (d && d.parent === focus) el.style.display = 'inline'; - }) - .on('end', function (d: any) { - const el = this as HTMLElement; - if (d && d.parent !== focus) el.style.display = 'none'; - }); - } - } - - function zoomTo(v) { - var k = diameter / v[2]; - view = v; - viewRef.current = v; - - circle.attr('transform', function (d: any) { - return 'translate(' + (d.x - v[0]) * k + ',' + (d.y - v[1]) * k + ')'; - }); - - labelGroup.attr('transform', function (d: any) { - const xPos = (d.x - v[0]) * k; - const yPos = (d.y - v[1]) * k - (d.r * k + 5); - return 'translate(' + xPos + ',' + yPos + ')'; - }); - - circle.attr('r', function (d: any) { - return d.r * k; - }); - } - - rootRef.current = root; - zoomRef.current = zoom; - updateBreadcrumbRef.current = updateBreadcrumb; - zoomToRef.current = zoomTo; - - return () => { - d3.select('.circle-tooltip').remove(); - }; - }, [size, dataTree]); - - return ( -
- {breadcrumb.length > 0 && ( -
- {breadcrumb.map((item, index) => ( - - {index > 0 && } - { - if (index < breadcrumb.length - 1) { - let node = rootRef.current; - for (let i = 1; i <= index; i++) { - if (!node.children) break; - node = node.children.find( - (child) => - child.data.displayName && - child.data.displayName.replace(/^CRE: /, '') === breadcrumb[i] - ); - if (!node) break; - } - if (node) { - updateBreadcrumbRef.current(node); - zoomRef.current({ altKey: false }, node); - } - } - }} - > - {item} - - - ))} -
- )} - -
-
- -
-
- - -
- - - - -
- -
- ); -}; +import './circles.scss'; + +import { LoadingAndErrorIndicator } from 'application/frontend/src/components/LoadingAndErrorIndicator'; +import useWindowDimensions from 'application/frontend/src/hooks/useWindowDimensions'; +import { useDataStore } from 'application/frontend/src/providers/DataProvider'; +import * as d3 from 'd3'; +import React, { useEffect, useState } from 'react'; +import { Button, Icon } from 'semantic-ui-react'; + +export const ExplorerCircles = () => { + const { height, width } = useWindowDimensions(); + const [useFullScreen, setUseFullScreen] = useState(false); + const { dataLoading, dataTree } = useDataStore(); + const [breadcrumb, setBreadcrumb] = useState([]); + const svgRef = React.useRef(null); + + const rootRef = React.useRef(null); + const zoomRef = React.useRef(null); + const updateBreadcrumbRef = React.useRef(null); + const viewRef = React.useRef(null); + const zoomToRef = React.useRef(null); + const margin = 20; + + const defaultSize = width > height ? height - 100 : width; + const size = useFullScreen ? width : defaultSize; + + useEffect(() => { + if (!svgRef.current) { + // guard to ensure the element exists + return; + } + var svg = d3.select(svgRef.current); + svg.selectAll('*').remove(); + + var diameter = size, + g = svg.append('g').attr('transform', 'translate(' + diameter / 2 + ',' + diameter / 2 + ')'); + + var color = d3 + .scaleLinear([-1, 5], ['hsl(152,80%,80%)', 'hsl(228,30%,40%)']) + .interpolate(d3.interpolateHcl); + + var pack = d3 + .pack() + .size([diameter - margin, diameter - margin]) + .padding(2); + + const populateChildren = (node) => { + node.children = []; + if (node.links) { + node.children = node.links.filter((x) => x.document && x.ltype !== 'Related').map((x) => x.document); + } + node.children.forEach((x) => populateChildren(x)); + node.children.forEach((x) => { + if (x.children.length === 0) x.size = 1; + }); + }; + + const dataTreeClone = structuredClone(dataTree); + dataTreeClone.forEach((node) => populateChildren(node)); + + let root: any = { + displayName: 'OpenCRE', + children: dataTreeClone, + }; + + root = d3 + .hierarchy(root) + .sum(function (d: any) { + return d.size; + }) + .sort(function (a: any, b: any) { + return b.value - a.value; + }); + + var focus: any = root, + nodes = pack(root).descendants(), + view; + + // Create tooltip div for hover labels + const tooltip = d3 + .select('body') + .append('div') + .attr('class', 'circle-tooltip') + .style('position', 'absolute') + .style('visibility', 'hidden') + .style('background-color', 'white') + .style('padding', '5px') + .style('border-radius', '3px') + .style('border', '1px solid #ccc') + .style('pointer-events', 'none') + .style('z-index', '10'); + + // Update breadcrumb when focus changes + const updateBreadcrumb = (d: any) => { + if (d === root) { + setBreadcrumb(['OpenCRE']); + return; + } + + let path: string[] = []; + let current = d; + + while (current && current !== root) { + if (current.data.displayName && current.data.displayName !== 'OpenCRE') { + // Remove "CRE: " prefix if it exists + const displayName = current.data.displayName.replace(/^CRE: /, ''); + path.unshift(displayName); + } + current = current.parent; + } + path.unshift('OpenCRE'); + setBreadcrumb(path); + }; + + var circle = g + .selectAll('circle') + .data(nodes) + .enter() + .append('circle') + .attr('class', function (d) { + return d.parent ? (d.children ? 'node' : 'node node--leaf') : 'node node--root'; + }) + .style('fill', function (d: any) { + return d.children ? color(d.depth) : d.data.color ? d.data.color : null; + }) + .style('cursor', function (d) { + // Show the pointer cursor only if it's a leaf node AND has a hyperlink property. + if (!d.children && (d.data as { hyperlink?: string }).hyperlink) { + return 'pointer'; + } + return 'default'; + }) + + .on('mouseover', function (event, d: any) { + // Prefer displayName, fallback to id + const label = d.data.displayName + ? d.data.displayName.replace(/^CRE: /, '') + : d.data.id + ? d.data.id + : ''; + + if (label) { + tooltip + .html(label) + .style('visibility', 'visible') + .style('top', event.pageY - 10 + 'px') + .style('left', event.pageX + 10 + 'px'); + } + }) + .on('mousemove', function (event) { + tooltip.style('top', event.pageY - 10 + 'px').style('left', event.pageX + 10 + 'px'); + }) + .on('mouseout', function () { + tooltip.style('visibility', 'hidden'); + }) + + .on('click', function (event, d: any) { + if (!d.children) { + event.stopPropagation(); + + // Directly access the hyperlink property from the node's data. + const url = d.data.hyperlink; + + // If the url exists, open it in a new tab. + if (url) { + console.log('URL found:', url); + window.open(url, '_blank'); + } else { + console.log('This leaf node does not have a hyperlink.'); + } + } else if (focus !== d) { + updateBreadcrumb(d); + zoom(event, d); + event.stopPropagation(); + } + }); + let showLabels = true; + + // Filter the nodes to only include those that have children (i.e., are not leaves) + const parentNodes = nodes.filter(function (d) { + return d.children; + }); + + // Create a group for the label components using ONLY the parent nodes + var labelGroup = g + .selectAll('.label-group') + .data(parentNodes) // Use the filtered data + .enter() + .append('g') + .attr('class', 'label-group') + .style('opacity', function (d: any) { + return d.parent === focus ? 1 : 0; + }) + .style('display', function (d: any) { + return d.parent === focus ? 'inline' : 'none'; + }); + + // Add the underlined text to the group + labelGroup + .append('text') + .attr('class', 'label') + .style('text-anchor', 'middle') + .style('text-decoration', 'underline') + .text(function (d: any) { + if (!d.data.displayName) return ''; + let name = d.data.displayName; + name = name.replace(/^CRE\s*:\s*\d+-\d+\s*:\s*/, ''); + return name; + }); + + // Add the downward-pointing tick line to the group + labelGroup + .append('line') + .attr('class', 'label-tick') + .style('stroke', 'black') + .style('stroke-width', 1) + .attr('x1', 0) + .attr('y1', 2) + .attr('x2', 0) + .attr('y2', 8); + + svg.style('background', color(-1)).on('click', function (event) { + updateBreadcrumb(root); + zoom(event, root); + }); + + zoomTo([root.x, root.y, root.r * 2 + margin]); + setBreadcrumb(['OpenCRE']); + + function zoom(event: any, d: any) { + var focus0 = focus; + focus = d; + + var transition = d3 + .transition() + .duration(event.altKey ? 7500 : 750) + .tween('zoom', function () { + var i = d3.interpolateZoom(view, [focus.x, focus.y, focus.r * 2 + margin]); + return function (t) { + zoomTo(i(t)); + }; + }); + + if (showLabels) { + transition + .selectAll('.label-group') + .filter(function (d: any) { + const el = this as HTMLElement; + return (d && d.parent === focus) || el.style.display === 'inline'; + }) + .style('opacity', function (d: any) { + return d && d.parent === focus ? 1 : 0; + }) + .on('start', function (d: any) { + const el = this as HTMLElement; + if (d && d.parent === focus) el.style.display = 'inline'; + }) + .on('end', function (d: any) { + const el = this as HTMLElement; + if (d && d.parent !== focus) el.style.display = 'none'; + }); + } + } + + function zoomTo(v) { + var k = diameter / v[2]; + view = v; + viewRef.current = v; + + circle.attr('transform', function (d: any) { + return 'translate(' + (d.x - v[0]) * k + ',' + (d.y - v[1]) * k + ')'; + }); + + labelGroup.attr('transform', function (d: any) { + const xPos = (d.x - v[0]) * k; + const yPos = (d.y - v[1]) * k - (d.r * k + 5); + return 'translate(' + xPos + ',' + yPos + ')'; + }); + + circle.attr('r', function (d: any) { + return d.r * k; + }); + } + + rootRef.current = root; + zoomRef.current = zoom; + updateBreadcrumbRef.current = updateBreadcrumb; + zoomToRef.current = zoomTo; + + return () => { + d3.select('.circle-tooltip').remove(); + }; + }, [size, dataTree]); + + return ( +
+ {breadcrumb.length > 0 && ( +
+ {breadcrumb.map((item, index) => ( + + {index > 0 && } + { + if (index < breadcrumb.length - 1) { + let node = rootRef.current; + for (let i = 1; i <= index; i++) { + if (!node.children) break; + node = node.children.find( + (child) => + child.data.displayName && + child.data.displayName.replace(/^CRE: /, '') === breadcrumb[i] + ); + if (!node) break; + } + if (node) { + updateBreadcrumbRef.current(node); + zoomRef.current({ altKey: false }, node); + } + } + }} + > + {item} + + + ))} +
+ )} + +
+
+ +
+
+ + +
+ + + + +
+ +
+ ); +}; diff --git a/application/frontend/src/pages/chatbot/chatbot.tsx b/application/frontend/src/pages/chatbot/chatbot.tsx index 7ffabdd5..cd6e70cd 100644 --- a/application/frontend/src/pages/chatbot/chatbot.tsx +++ b/application/frontend/src/pages/chatbot/chatbot.tsx @@ -162,7 +162,6 @@ export const Chatbot = () => { <> {user !== '' ? null : login()} -
OWASP OpenCRE Chat
@@ -229,8 +228,8 @@ export const Chatbot = () => {
- Answers are generated by {getModelDisplayName(modelName)} Large Language Model, which uses the internet as - training data, plus collected key cybersecurity standards from{' '} + Answers are generated by {getModelDisplayName(modelName)} Large Language Model, which uses the + internet as training data, plus collected key cybersecurity standards from{' '} OpenCRE as the preferred source. This leads to more reliable answers and adds references, but note: it is still generative AI which is never guaranteed correct. diff --git a/application/prompt_client/vertex_prompt_client.py b/application/prompt_client/vertex_prompt_client.py index 08a698cc..9ed8d696 100644 --- a/application/prompt_client/vertex_prompt_client.py +++ b/application/prompt_client/vertex_prompt_client.py @@ -91,7 +91,11 @@ def get_text_embeddings(self, text: str, max_retries: int = 3) -> List[float]: except genai.errors.ClientError as e: error_str = str(e) # Check if this is a quota/rate limit error (429) - is_quota_error = "429" in error_str or "RESOURCE_EXHAUSTED" in error_str or "quota" in error_str.lower() + is_quota_error = ( + "429" in error_str + or "RESOURCE_EXHAUSTED" in error_str + or "quota" in error_str.lower() + ) if not is_quota_error: # Non-quota errors should not be retried diff --git a/application/utils/gap_analysis.py b/application/utils/gap_analysis.py index 39ec6256..da2bc246 100644 --- a/application/utils/gap_analysis.py +++ b/application/utils/gap_analysis.py @@ -1,6 +1,7 @@ import requests import time import logging +import os from rq import Queue, job, exceptions from typing import List, Dict from application.utils import redis @@ -59,14 +60,70 @@ def get_next_id(step, previous_id): return step["start"].id +def _all_requested_standards_exist(standards: List[str], database) -> bool: + """ + Best-effort check that all requested standards exist in the database. + + - If the check fails unexpectedly or returns a non-sequence (e.g. in tests + with heavy mocking), we assume they exist to avoid changing behaviour. + - If the standards list is empty, we treat it as valid and let the rest of + the logic handle it. + """ + if not standards: + return True + + try: + existing = database.standards() + except Exception as exc: # pragma: no cover - defensive guardrail + logger.error( + f"Unable to verify standards existence when scheduling gap analysis, " + f"proceeding anyway: {exc}" + ) + return True + + if not isinstance(existing, (list, tuple, set)): + # In test environments this may be a MagicMock; do not enforce the + # existence check in that case to keep behaviour unchanged. + logger.debug( + f"database.standards() returned non-iterable type " + f"{type(existing)}, skipping existence check" + ) + return True + + existing_lower = {str(s).lower() for s in existing} + missing = [s for s in standards if str(s).lower() not in existing_lower] + + if missing: + standards_hash = make_resources_key(standards) + logger.info( + f"Gap analysis request {standards_hash} references standards " + f"that do not exist in the database: {', '.join(missing)}" + ) + return False + + return True + + # database is of type Node_collection, cannot annotate due to circular import def schedule(standards: List[str], database): + """ + Schedule or retrieve gap analysis for the given standards. + + This function handles Redis queue operations and job scheduling. + For web requests, the caller (map_analysis route) should check: + - Cached results in database first + - Heroku environment and standards existence (if on Heroku) + - CRE_NO_CALCULATE_GAP_ANALYSIS env var + + This function still checks for cached results as a safety net for + non-web callers (e.g., cre_main.py during imports). + """ from application.database import db standards_hash = make_resources_key(standards) - if database.gap_analysis_exists( - standards_hash - ): # easiest, it's been calculated and cached, get it from the db + + # Check for cached results (safety net for non-web callers) + if database.gap_analysis_exists(standards_hash): return flask_json.loads(database.get_gap_analysis_result(standards_hash)) logger.info(f"Gap analysis result for {standards_hash} does not exist") diff --git a/application/web/web_main.py b/application/web/web_main.py index fa7a82ef..7c3ebe28 100644 --- a/application/web/web_main.py +++ b/application/web/web_main.py @@ -276,6 +276,44 @@ def map_analysis() -> Any: database = db.Node_collection() standards = request.args.getlist("standard") + standards_hash = gap_analysis.make_resources_key(standards) + + # First, check if we have cached results in the database + if database.gap_analysis_exists(standards_hash): + gap_analysis_result = database.get_gap_analysis_result(standards_hash) + if gap_analysis_result: + return jsonify(flask_json.loads(gap_analysis_result)) + + # On Heroku (read-only), check if standards exist before attempting Redis/queue operations + is_heroku = os.environ.get("DYNO") is not None + if is_heroku: + # Check if all requested standards exist + try: + existing_standards = database.standards() + if isinstance(existing_standards, (list, tuple, set)): + existing_lower = {str(s).lower() for s in existing_standards} + missing = [s for s in standards if str(s).lower() not in existing_lower] + if missing: + logger.info( + f"On Heroku: gap analysis request {standards_hash} references " + f"standards that do not exist: {', '.join(missing)}, returning 404" + ) + abort( + 404, f"One or more standards do not exist: {', '.join(missing)}" + ) + except Exception as exc: + # If we can't verify standards, log but don't fail (defensive) + logger.warning(f"Could not verify standards existence on Heroku: {exc}") + + # If calculations are disabled, return 404 + if os.environ.get("CRE_NO_CALCULATE_GAP_ANALYSIS"): + logger.info( + f"Gap analysis calculations are disabled by CRE_NO_CALCULATE_GAP_ANALYSIS; " + f"refusing to schedule new job for {standards_hash}" + ) + abort(404, "Gap analysis calculations are disabled") + + # Now call schedule() which will handle Redis/queue operations gap_analysis_dict = gap_analysis.schedule(standards, database) if gap_analysis_dict.get("result"): return jsonify(gap_analysis_dict)