From ee9d5ef40dd140d7ee123babde9316c06ae236c9 Mon Sep 17 00:00:00 2001 From: Grant Parry Date: Tue, 17 Feb 2026 09:46:27 -0700 Subject: [PATCH] Fix SPARQL parser backtrack, executor memory leak, and add catch_unwind Three additional hardening fixes for the SPARQL subsystem, building on PR #172: 1. Parser: replace hardcoded saturating_sub(6) with saved_pos variable. The old backtrack assumed all update keywords are 6 chars, but LOAD, DROP, and CLEAR are 4-5 chars, causing incorrect parse positions. 2. Executor: change default_graph from Option<&'a str> to Option and remove Box::leak calls in the GraphPattern::Graph handler. Each GRAPH clause previously leaked a String allocation that was never freed. 3. Operators: wrap ruvector_sparql parse/execute/format in catch_unwind so that panics from non-empty but malformed queries are converted to PostgreSQL ERROR messages instead of crashing the backend. Closes #167 Co-Authored-By: Claude Opus 4.6 --- .../ruvector-postgres/src/graph/operators.rs | 44 +++++++++++++------ .../src/graph/sparql/executor.rs | 11 +++-- .../src/graph/sparql/parser.rs | 3 +- 3 files changed, 38 insertions(+), 20 deletions(-) diff --git a/crates/ruvector-postgres/src/graph/operators.rs b/crates/ruvector-postgres/src/graph/operators.rs index e09c5d42d..9103c834f 100644 --- a/crates/ruvector-postgres/src/graph/operators.rs +++ b/crates/ruvector-postgres/src/graph/operators.rs @@ -332,19 +332,37 @@ fn ruvector_sparql(store_name: &str, query: &str, format: &str) -> Result ResultFormat::Json, - "xml" => ResultFormat::Xml, - "csv" => ResultFormat::Csv, - "tsv" => ResultFormat::Tsv, - _ => ResultFormat::Json, - }; - - Ok(format_results(&result, result_format)) + let format_lower = format.to_lowercase(); + + // Wrap parse/execute/format in catch_unwind to convert any remaining + // panics into PostgreSQL errors instead of crashing the backend + let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + let parsed = parse_sparql(query).map_err(|e| format!("Parse error: {}", e))?; + let result = + execute_sparql(&store, &parsed).map_err(|e| format!("Execution error: {}", e))?; + let result_format = match format_lower.as_str() { + "json" => ResultFormat::Json, + "xml" => ResultFormat::Xml, + "csv" => ResultFormat::Csv, + "tsv" => ResultFormat::Tsv, + _ => ResultFormat::Json, + }; + Ok(format_results(&result, result_format)) + })); + + match result { + Ok(inner) => inner, + Err(panic_info) => { + let msg = if let Some(s) = panic_info.downcast_ref::() { + s.clone() + } else if let Some(s) = panic_info.downcast_ref::<&str>() { + s.to_string() + } else { + "Unknown internal error".to_string() + }; + Err(format!("Internal error: {}", msg)) + } + } } /// Execute a SPARQL query and return results as JSONB diff --git a/crates/ruvector-postgres/src/graph/sparql/executor.rs b/crates/ruvector-postgres/src/graph/sparql/executor.rs index 75d9989f7..42c6074ab 100644 --- a/crates/ruvector-postgres/src/graph/sparql/executor.rs +++ b/crates/ruvector-postgres/src/graph/sparql/executor.rs @@ -21,7 +21,7 @@ static EMPTY_PREFIXES: Lazy> = Lazy::new(HashMap::new); /// Execution context for SPARQL queries pub struct SparqlContext<'a> { pub store: &'a TripleStore, - pub default_graph: Option<&'a str>, + pub default_graph: Option, pub named_graphs: Vec<&'a str>, pub base: Option<&'a Iri>, pub prefixes: &'a HashMap, @@ -260,8 +260,8 @@ fn evaluate_graph_pattern( if let Some(graph) = graph_iri { // Temporarily set the graph context - let old_default = ctx.default_graph; - ctx.default_graph = Some(Box::leak(graph.into_boxed_str())); + let old_default = ctx.default_graph.clone(); + ctx.default_graph = Some(graph); let result = evaluate_graph_pattern(ctx, inner); ctx.default_graph = old_default; result @@ -269,14 +269,13 @@ fn evaluate_graph_pattern( // Union over all named graphs let mut all_solutions = Vec::new(); for graph in ctx.store.list_graphs() { - let graph_str: &'static str = Box::leak(graph.into_boxed_str()); - ctx.default_graph = Some(graph_str); + ctx.default_graph = Some(graph.clone()); let solutions = evaluate_graph_pattern(ctx, inner)?; // Add graph variable binding if let VarOrIri::Variable(var) = graph_name { for mut sol in solutions { - sol.insert(var.clone(), RdfTerm::Iri(Iri::new(graph_str))); + sol.insert(var.clone(), RdfTerm::Iri(Iri::new(&graph))); all_solutions.push(sol); } } else { diff --git a/crates/ruvector-postgres/src/graph/sparql/parser.rs b/crates/ruvector-postgres/src/graph/sparql/parser.rs index 78b627800..5f1331304 100644 --- a/crates/ruvector-postgres/src/graph/sparql/parser.rs +++ b/crates/ruvector-postgres/src/graph/sparql/parser.rs @@ -71,6 +71,7 @@ impl<'a> SparqlParser<'a> { fn parse_query_body(&mut self) -> Result { self.skip_whitespace(); + let saved_pos = self.pos; if self.match_keyword("SELECT") { Ok(QueryBody::Select(self.parse_select_query()?)) @@ -87,7 +88,7 @@ impl<'a> SparqlParser<'a> { || self.match_keyword("CREATE") || self.match_keyword("DROP") { - self.pos = self.pos.saturating_sub(6); // Backtrack + self.pos = saved_pos; // Backtrack to before the matched keyword Ok(QueryBody::Update(self.parse_update()?)) } else { Err(SparqlError::ParseError(format!(