From 9b03b5e52cb1ab7690cc041ed93dc290e91f1dff Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Tue, 17 Feb 2026 21:11:53 +0100 Subject: [PATCH 01/14] Migrate to Elastic.Ingest.Elasticsearch 0.19.0 with source-generated mappings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace manual channel orchestration with IncrementalSyncOrchestrator and source-generated ElasticsearchTypeContext from Elastic.Mapping 0.4.0. Add field type attributes ([Keyword], [Text], [Object], etc.) directly on DocumentationDocument to drive the mapping source generator, replacing verbose manual JSON mappings. - Update Elastic.Ingest.Elasticsearch 0.17.1 → 0.19.0, add Elastic.Mapping 0.4.0 - Add mapping attributes to DocumentationDocument and IndexedProduct - Create DocumentationMappingConfig.cs with two Entity variants (lexical/semantic) - Rewrite ElasticsearchMarkdownExporter to use orchestrator for dual-index mode - Delete ElasticsearchIngestChannel.cs and ElasticsearchIngestChannel.Mapping.cs - Remove unused ReindexAsync from ElasticsearchOperations - Update SearchBootstrapFixture to use IngestChannel with semantic type context --- Directory.Packages.props | 3 +- .../Elastic.Documentation.csproj | 1 + .../Search/DocumentationDocument.cs | 21 + .../Search/IndexedProduct.cs | 3 + .../DocumentationMappingConfig.cs | 199 ++++++++ .../ElasticsearchIngestChannel.Mapping.cs | 260 ---------- .../ElasticsearchIngestChannel.cs | 161 ------ .../ElasticsearchMarkdownExporter.Export.cs | 26 +- .../ElasticsearchMarkdownExporter.cs | 478 +++++++----------- .../Elasticsearch/ElasticsearchOperations.cs | 16 - .../Search/SearchBootstrapFixture.cs | 26 +- 11 files changed, 435 insertions(+), 759 deletions(-) create mode 100644 src/Elastic.Markdown/Exporters/Elasticsearch/DocumentationMappingConfig.cs delete mode 100644 src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchIngestChannel.Mapping.cs delete mode 100644 src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchIngestChannel.cs diff --git a/Directory.Packages.props b/Directory.Packages.props index 2adc4ebc0..04966e308 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -48,7 +48,8 @@ - + + diff --git a/src/Elastic.Documentation/Elastic.Documentation.csproj b/src/Elastic.Documentation/Elastic.Documentation.csproj index 99b59c073..fbc2f8c72 100644 --- a/src/Elastic.Documentation/Elastic.Documentation.csproj +++ b/src/Elastic.Documentation/Elastic.Documentation.csproj @@ -9,6 +9,7 @@ + diff --git a/src/Elastic.Documentation/Search/DocumentationDocument.cs b/src/Elastic.Documentation/Search/DocumentationDocument.cs index e30a4b350..e25ded0ab 100644 --- a/src/Elastic.Documentation/Search/DocumentationDocument.cs +++ b/src/Elastic.Documentation/Search/DocumentationDocument.cs @@ -4,6 +4,7 @@ using System.Text.Json.Serialization; using Elastic.Documentation.AppliesTo; +using Elastic.Mapping; namespace Elastic.Documentation.Search; @@ -12,6 +13,7 @@ public record ParentDocument [JsonPropertyName("title")] public required string Title { get; set; } + [Keyword] [JsonPropertyName("url")] public required string Url { get; set; } } @@ -28,6 +30,7 @@ public record DocumentationDocument [JsonPropertyName("search_title")] public required string SearchTitle { get; set; } + [Keyword(Normalizer = "keyword_normalizer")] [JsonPropertyName("type")] public required string Type { get; set; } = "doc"; @@ -35,6 +38,7 @@ public record DocumentationDocument /// The canonical/primary product for this document (nested object with id and repository). /// Name and version are looked up dynamically by product id. /// + [Object] [JsonPropertyName("product")] [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public IndexedProduct? Product { get; set; } @@ -42,13 +46,18 @@ public record DocumentationDocument /// /// All related products found during inference (from legacy mappings, applicability, etc.) /// + [Object] [JsonPropertyName("related_products")] [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public IndexedProduct[]? RelatedProducts { get; set; } + [Id] + [Keyword] [JsonPropertyName("url")] public required string Url { get; set; } = string.Empty; + [ContentHash] + [Keyword] [JsonPropertyName("hash")] public string Hash { get; set; } = string.Empty; @@ -58,6 +67,7 @@ public record DocumentationDocument [JsonPropertyName("navigation_table_of_contents")] public int NavigationTableOfContents { get; set; } = 50; //default to a high number so that omission gets penalized. + [Keyword(Normalizer = "keyword_normalizer")] [JsonPropertyName("navigation_section")] public string? NavigationSection { get; set; } @@ -67,18 +77,21 @@ public record DocumentationDocument public DateTimeOffset BatchIndexDate { get; set; } /// The date this document was last updated, + [Timestamp] [JsonPropertyName("last_updated")] public DateTimeOffset LastUpdated { get; set; } [JsonPropertyName("description")] public string? Description { get; set; } + [Text] [JsonPropertyName("headings")] public string[] Headings { get; set; } = []; [JsonPropertyName("links")] public string[] Links { get; set; } = []; + [Nested] [JsonPropertyName("applies_to")] public ApplicableTo? Applies { get; set; } @@ -92,6 +105,7 @@ public record DocumentationDocument [JsonPropertyName("abstract")] public string? Abstract { get; set; } + [Object] [JsonPropertyName("parents")] public ParentDocument[] Parents { get; set; } = []; @@ -105,6 +119,7 @@ public record DocumentationDocument /// Key for enrichment cache lookups. Derived from normalized content + prompt hash. /// Used by enrich processor to join AI-generated fields at index time. /// + [Keyword] [JsonPropertyName("enrichment_key")] [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public string? EnrichmentKey { get; set; } @@ -112,6 +127,7 @@ public record DocumentationDocument /// /// 3-5 sentences dense with technical entities, API names, and core functionality for vector matching. /// + [Text] [JsonPropertyName("ai_rag_optimized_summary")] [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public string? AiRagOptimizedSummary { get; set; } @@ -119,6 +135,7 @@ public record DocumentationDocument /// /// Exactly 5-10 words for a UI tooltip. /// + [Text] [JsonPropertyName("ai_short_summary")] [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public string? AiShortSummary { get; set; } @@ -126,6 +143,7 @@ public record DocumentationDocument /// /// A 3-8 word keyword string representing a high-intent user search for this doc. /// + [Keyword] [JsonPropertyName("ai_search_query")] [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public string? AiSearchQuery { get; set; } @@ -133,6 +151,7 @@ public record DocumentationDocument /// /// Array of 3-5 specific questions answered by this document. /// + [Text] [JsonPropertyName("ai_questions")] [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public string[]? AiQuestions { get; set; } @@ -140,6 +159,7 @@ public record DocumentationDocument /// /// Array of 2-4 specific use cases this doc helps with. /// + [Text] [JsonPropertyName("ai_use_cases")] [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public string[]? AiUseCases { get; set; } @@ -148,6 +168,7 @@ public record DocumentationDocument /// Hash of the LLM prompt templates used to generate AI fields. /// Used to detect stale enrichments when prompts change. /// + [Keyword] [JsonPropertyName("enrichment_prompt_hash")] [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public string? EnrichmentPromptHash { get; set; } diff --git a/src/Elastic.Documentation/Search/IndexedProduct.cs b/src/Elastic.Documentation/Search/IndexedProduct.cs index ee766fac1..cdb8925e8 100644 --- a/src/Elastic.Documentation/Search/IndexedProduct.cs +++ b/src/Elastic.Documentation/Search/IndexedProduct.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information using System.Text.Json.Serialization; +using Elastic.Mapping; namespace Elastic.Documentation.Search; @@ -15,12 +16,14 @@ public record IndexedProduct /// /// The product ID from products.yml (e.g., "elasticsearch", "kibana", "apm-agent-java") /// + [Keyword(Normalizer = "keyword_normalizer")] [JsonPropertyName("id")] public string? Id { get; init; } /// /// The repository name (e.g., "elasticsearch", "docs-content", "elastic-otel-java") /// + [Keyword(Normalizer = "keyword_normalizer")] [JsonPropertyName("repository")] public string? Repository { get; init; } } diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/DocumentationMappingConfig.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/DocumentationMappingConfig.cs new file mode 100644 index 000000000..9ae1c7072 --- /dev/null +++ b/src/Elastic.Markdown/Exporters/Elasticsearch/DocumentationMappingConfig.cs @@ -0,0 +1,199 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using Elastic.Documentation.Search; +using Elastic.Ingest.Elasticsearch.Indices; +using Elastic.Mapping; +using Elastic.Mapping.Analysis; + +namespace Elastic.Markdown.Exporters.Elasticsearch; + +[ElasticsearchMappingContext] +[Entity( + Target = EntityTarget.Index, + Name = "docs-lexical", + WriteAlias = "docs-lexical", + ReadAlias = "docs-lexical", + SearchPattern = "docs-lexical-*", + DatePattern = "yyyy.MM.dd.HHmmss", + Configuration = typeof(LexicalConfig) +)] +[Entity( + Target = EntityTarget.Index, + Name = "docs-semantic", + Variant = "Semantic", + WriteAlias = "docs-semantic", + ReadAlias = "docs-semantic", + SearchPattern = "docs-semantic-*", + DatePattern = "yyyy.MM.dd.HHmmss", + Configuration = typeof(SemanticConfig) +)] +public static partial class DocumentationMappingContext; + +public static class LexicalConfig +{ + public static AnalysisBuilder ConfigureAnalysis(AnalysisBuilder analysis) => analysis; + + public static DocumentationDocumentMappingsBuilder ConfigureMappings(DocumentationDocumentMappingsBuilder m) => + ConfigureCommonMappings(m); + + internal static DocumentationDocumentMappingsBuilder ConfigureCommonMappings(DocumentationDocumentMappingsBuilder m) => m + // Text fields with custom analyzers and multi-fields + .SearchTitle(f => f + .Analyzer("synonyms_fixed_analyzer") + .SearchAnalyzer("synonyms_analyzer") + .MultiField("completion", mf => mf.SearchAsYouType() + .Analyzer("synonyms_fixed_analyzer") + .SearchAnalyzer("synonyms_analyzer"))) + .Title(f => f + .SearchAnalyzer("synonyms_analyzer") + .MultiField("keyword", mf => mf.Keyword().Normalizer("keyword_normalizer")) + .MultiField("starts_with", mf => mf.Text() + .Analyzer("starts_with_analyzer") + .SearchAnalyzer("starts_with_analyzer_search")) + .MultiField("completion", mf => mf.SearchAsYouType().SearchAnalyzer("synonyms_analyzer"))) + .StrippedBody(f => f + .Analyzer("synonyms_fixed_analyzer") + .SearchAnalyzer("synonyms_analyzer")) + .Abstract(f => f + .Analyzer("synonyms_fixed_analyzer") + .SearchAnalyzer("synonyms_analyzer")) + .Headings(f => f + .Analyzer("synonyms_fixed_analyzer") + .SearchAnalyzer("synonyms_analyzer")) + // JsonIgnore fields — [Text]/[Keyword] attributes handle the type, + // AddField only needed when custom analyzers are required + .AddField("ai_rag_optimized_summary", f => f.Text() + .Analyzer("synonyms_fixed_analyzer") + .SearchAnalyzer("synonyms_analyzer")) + // Keyword fields with multi-fields + .Url(f => f + .MultiField("match", mf => mf.Text()) + .MultiField("prefix", mf => mf.Text().Analyzer("hierarchy_analyzer"))) + // Rank features — no attribute available, must use AddField + .AddField("navigation_depth", f => f.RankFeature().PositiveScoreImpact(false)) + .AddField("navigation_table_of_contents", f => f.RankFeature().PositiveScoreImpact(false)) + // Nested applies_to — sub-fields don't match C# structure (custom JsonConverter) + .AddField("applies_to.type", f => f.Keyword().Normalizer("keyword_normalizer")) + .AddField("applies_to.sub-type", f => f.Keyword().Normalizer("keyword_normalizer")) + .AddField("applies_to.lifecycle", f => f.Keyword().Normalizer("keyword_normalizer")) + .AddField("applies_to.version", f => f.Version()) + // Parent document multi-fields + .AddField("parents.url", f => f.Keyword() + .MultiField("match", mf => mf.Text()) + .MultiField("prefix", mf => mf.Text().Analyzer("hierarchy_analyzer"))) + .AddField("parents.title", f => f.Text() + .SearchAnalyzer("synonyms_analyzer") + .MultiField("keyword", mf => mf.Keyword())); +} + +public static class SemanticConfig +{ + private const string InferenceId = ".elser-2-elastic"; + + public static AnalysisBuilder ConfigureAnalysis(AnalysisBuilder analysis) => analysis; + + public static DocumentationDocumentMappingsBuilder ConfigureMappings(DocumentationDocumentMappingsBuilder m) => + LexicalConfig.ConfigureCommonMappings(m) + .AddField("title.semantic_text", f => f.SemanticText().InferenceId(InferenceId)) + .AddField("abstract.semantic_text", f => f.SemanticText().InferenceId(InferenceId)) + .AddField("ai_rag_optimized_summary.semantic_text", f => f.SemanticText().InferenceId(InferenceId)) + .AddField("ai_questions.semantic_text", f => f.SemanticText().InferenceId(InferenceId)) + .AddField("ai_use_cases.semantic_text", f => f.SemanticText().InferenceId(InferenceId)); +} + +/// +/// Builds analysis settings at runtime (includes synonyms that are loaded from configuration). +/// +public static class DocumentationAnalysisFactory +{ + public static AnalysisBuilder BuildAnalysis(AnalysisBuilder analysis, string synonymSetName, string[] indexTimeSynonyms) => analysis + .Normalizer("keyword_normalizer", n => n.Custom() + .CharFilter("strip_non_word_chars") + .Filters("lowercase", "asciifolding", "trim")) + .Analyzer("starts_with_analyzer", a => a.Custom() + .Tokenizer("starts_with_tokenizer") + .Filter("lowercase")) + .Analyzer("starts_with_analyzer_search", a => a.Custom() + .Tokenizer("keyword") + .Filter("lowercase")) + .Analyzer("synonyms_fixed_analyzer", a => a.Custom() + .Tokenizer("group_tokenizer") + .Filters("lowercase", "synonyms_fixed_filter", "kstem")) + .Analyzer("synonyms_analyzer", a => a.Custom() + .Tokenizer("group_tokenizer") + .Filters("lowercase", "synonyms_filter", "kstem")) + .Analyzer("highlight_analyzer", a => a.Custom() + .Tokenizer("group_tokenizer") + .Filters("lowercase", "english_stop")) + .Analyzer("hierarchy_analyzer", a => a.Custom() + .Tokenizer("path_tokenizer")) + .CharFilter("strip_non_word_chars", cf => cf.PatternReplace() + .Pattern(@"\W") + .Replacement(" ")) + .TokenFilter("synonyms_fixed_filter", tf => tf.SynonymGraph() + .Synonyms(indexTimeSynonyms)) + .TokenFilter("synonyms_filter", tf => tf.SynonymGraph() + .SynonymsSet(synonymSetName) + .Updateable(true)) + .TokenFilter("english_stop", tf => tf.Stop() + .Stopwords("_english_")) + .Tokenizer("starts_with_tokenizer", t => t.EdgeNGram() + .MinGram(1) + .MaxGram(10) + .TokenChars("letter", "digit", "symbol", "whitespace")) + .Tokenizer("group_tokenizer", t => t.CharGroup() + .TokenizeOnChars("whitespace", ",", ";", "?", "!", "(", ")", "&", "'", "\"", "/", "[", "]", "{", "}")) + .Tokenizer("path_tokenizer", t => t.PathHierarchy() + .Delimiter('/')); + + /// + /// Creates the index settings JSON with analysis configuration and optional default pipeline. + /// + public static string BuildSettingsJson(string synonymSetName, string[] indexTimeSynonyms, string? defaultPipeline = null) + { + var analysis = BuildAnalysis(new AnalysisBuilder(), synonymSetName, indexTimeSynonyms); + var analysisJson = analysis.Build().ToJsonString(); + + if (defaultPipeline is not null) + { + // Merge default_pipeline into the settings JSON + return $$""" + { + "default_pipeline": "{{defaultPipeline}}", + "analysis": {{analysisJson}} + } + """; + } + + return $$""" + { + "analysis": {{analysisJson}} + } + """; + } + + /// + /// Creates an ElasticsearchTypeContext with runtime analysis settings and dynamic index name. + /// + public static ElasticsearchTypeContext CreateContext( + ElasticsearchTypeContext baseContext, + string indexName, + string synonymSetName, + string[] indexTimeSynonyms, + string? defaultPipeline = null) + { + var settingsJson = BuildSettingsJson(synonymSetName, indexTimeSynonyms, defaultPipeline); + var settingsHash = HashedBulkUpdate.CreateHash(settingsJson); + var hash = HashedBulkUpdate.CreateHash(settingsHash, baseContext.MappingsHash); + + return baseContext.WithIndexName(indexName) with + { + GetSettingsJson = () => settingsJson, + SettingsHash = settingsHash, + Hash = hash, + ConfigureAnalysis = a => BuildAnalysis(a, synonymSetName, indexTimeSynonyms) + }; + } +} diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchIngestChannel.Mapping.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchIngestChannel.Mapping.cs deleted file mode 100644 index 4e36f7a56..000000000 --- a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchIngestChannel.Mapping.cs +++ /dev/null @@ -1,260 +0,0 @@ -// Licensed to Elasticsearch B.V under one or more agreements. -// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. -// See the LICENSE file in the project root for more information - -using Elastic.Documentation.Search; -using Elastic.Ingest.Elasticsearch.Catalog; - -namespace Elastic.Markdown.Exporters.Elasticsearch; - -public abstract partial class ElasticsearchIngestChannel - where TChannelOptions : CatalogIndexChannelOptionsBase - where TChannel : CatalogIndexChannel -{ - protected static string CreateMappingSetting(string synonymSetName, string[] synonyms, string? defaultPipeline = null) - { - var indexTimeSynonyms = $"[{string.Join(",", synonyms.Select(r => $"\"{r}\""))}]"; - var pipelineSetting = defaultPipeline is not null ? $"\"default_pipeline\": \"{defaultPipeline}\"," : ""; - // language=json - return - $$$""" - { - {{{pipelineSetting}}} - "analysis": { - "normalizer": { - "keyword_normalizer": { - "type": "custom", - "char_filter": ["strip_non_word_chars"], - "filter": ["lowercase", "asciifolding", "trim"] - } - }, - "analyzer": { - "starts_with_analyzer": { - "tokenizer": "starts_with_tokenizer", - "filter": [ "lowercase" ] - }, - "starts_with_analyzer_search": { - "tokenizer": "keyword", - "filter": [ "lowercase" ] - }, - "synonyms_fixed_analyzer": { - "tokenizer": "group_tokenizer", - "filter": [ - "lowercase", - "synonyms_fixed_filter", - "kstem" - ] - }, - "synonyms_analyzer": { - "tokenizer": "group_tokenizer", - "filter": [ - "lowercase", - "synonyms_filter", - "kstem" - ] - }, - "highlight_analyzer": { - "tokenizer": "group_tokenizer", - "filter": [ - "lowercase", - "english_stop" - ] - }, - "hierarchy_analyzer": { "tokenizer": "path_tokenizer" } - }, - "char_filter": { - "strip_non_word_chars": { - "type": "pattern_replace", - "pattern": "\\W", - "replacement": " " - } - }, - "filter": { - "synonyms_fixed_filter": { - "type": "synonym_graph", - "synonyms": {{{indexTimeSynonyms}}} - }, - "synonyms_filter": { - "type": "synonym_graph", - "synonyms_set": "{{{synonymSetName}}}", - "updateable": true - }, - "english_stop": { - "type": "stop", - "stopwords": "_english_" - } - }, - "tokenizer": { - "starts_with_tokenizer": { - "type": "edge_ngram", - "min_gram": 1, - "max_gram": 10, - "token_chars": [ - "letter", - "digit", - "symbol", - "whitespace" - ] - }, - "group_tokenizer": { - "type": "char_group", - "tokenize_on_chars": [ "whitespace", ",", ";", "?", "!", "(", ")", "&", "'", "\"", "/", "[", "]", "{", "}" ] - }, - "path_tokenizer": { - "type": "path_hierarchy", - "delimiter": "/" - } - } - } - } - """; - } - - // language=json - protected static string CreateMapping(string? inferenceId) => - $$""" - { - "properties": { - "type": { "type" : "keyword", "normalizer": "keyword_normalizer" }, - "product": { - "type": "object", - "properties": { - "id": { "type": "keyword", "normalizer": "keyword_normalizer" }, - "repository": { "type": "keyword", "normalizer": "keyword_normalizer" } - } - }, - "related_products": { - "type": "object", - "properties": { - "id": { "type": "keyword", "normalizer": "keyword_normalizer" }, - "repository": { "type": "keyword", "normalizer": "keyword_normalizer" } - } - }, - "url": { - "type": "keyword", - "fields": { - "match": { "type": "text" }, - "prefix": { "type": "text", "analyzer" : "hierarchy_analyzer" } - } - }, - "navigation_depth" : { "type" : "rank_feature", "positive_score_impact": false }, - "navigation_table_of_contents" : { "type" : "rank_feature", "positive_score_impact": false }, - "navigation_section" : { "type" : "keyword", "normalizer": "keyword_normalizer" }, - "hidden" : { - "type" : "boolean" - }, - "applies_to" : { - "type" : "nested", - "properties" : { - "type" : { "type" : "keyword", "normalizer": "keyword_normalizer" }, - "sub-type" : { "type" : "keyword", "normalizer": "keyword_normalizer" }, - "lifecycle" : { "type" : "keyword", "normalizer": "keyword_normalizer" }, - "version" : { "type" : "version" } - } - }, - "parents" : { - "type" : "object", - "properties" : { - "url" : { - "type": "keyword", - "fields": { - "match": { "type": "text" }, - "prefix": { "type": "text", "analyzer" : "hierarchy_analyzer" } - } - }, - "title": { - "type": "text", - "search_analyzer": "synonyms_analyzer", - "fields": { - "keyword": { "type": "keyword" } - } - } - } - }, - "hash" : { "type" : "keyword" }, - "enrichment_key" : { "type" : "keyword" }, - "search_title": { - "type": "text", - "analyzer": "synonyms_fixed_analyzer", - "search_analyzer": "synonyms_analyzer", - "fields": { - "completion": { - "type": "search_as_you_type", - "analyzer": "synonyms_fixed_analyzer", - "search_analyzer": "synonyms_analyzer", - "term_vector": "with_positions_offsets", - "index_options": "offsets" - } - } - }, - "title": { - "type": "text", - "search_analyzer": "synonyms_analyzer", - "fields": { - "keyword": { "type": "keyword", "normalizer": "keyword_normalizer" }, - "starts_with": { "type": "text", "analyzer": "starts_with_analyzer", "search_analyzer": "starts_with_analyzer_search" }, - "completion": { "type": "search_as_you_type", "search_analyzer": "synonyms_analyzer" } - {{(!string.IsNullOrWhiteSpace(inferenceId) ? $$""", "semantic_text": {{{InferenceMapping(inferenceId)}}}""" : "")}} - } - }, - "body": { - "type": "text" - }, - "stripped_body": { - "type": "text", - "analyzer": "synonyms_fixed_analyzer", - "search_analyzer": "synonyms_analyzer", - "term_vector": "with_positions_offsets" - }, - "headings": { - "type": "text", - "analyzer": "synonyms_fixed_analyzer", - "search_analyzer": "synonyms_analyzer" - }, - "abstract": { - "type" : "text", - "analyzer": "synonyms_fixed_analyzer", - "search_analyzer": "synonyms_analyzer", - "fields" : { - {{(!string.IsNullOrWhiteSpace(inferenceId) ? $"\"semantic_text\": {{{InferenceMapping(inferenceId)}}}" : "")}} - } - }, - "ai_rag_optimized_summary": { - "type": "text", - "analyzer": "synonyms_fixed_analyzer", - "search_analyzer": "synonyms_analyzer", - "fields": { - {{(!string.IsNullOrWhiteSpace(inferenceId) ? $"\"semantic_text\": {{{InferenceMapping(inferenceId)}}}" : "")}} - } - }, - "ai_short_summary": { - "type": "text" - }, - "ai_search_query": { - "type": "keyword" - }, - "ai_questions": { - "type": "text", - "fields": { - {{(!string.IsNullOrWhiteSpace(inferenceId) ? $"\"semantic_text\": {{{InferenceMapping(inferenceId)}}}" : "")}} - } - }, - "ai_use_cases": { - "type": "text", - "fields": { - {{(!string.IsNullOrWhiteSpace(inferenceId) ? $"\"semantic_text\": {{{InferenceMapping(inferenceId)}}}" : "")}} - } - }, - "enrichment_prompt_hash": { - "type": "keyword" - } - } - } - """; - - private static string InferenceMapping(string inferenceId) => - $""" - "type": "semantic_text", - "inference_id": "{inferenceId}" - """; -} diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchIngestChannel.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchIngestChannel.cs deleted file mode 100644 index 6ff857956..000000000 --- a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchIngestChannel.cs +++ /dev/null @@ -1,161 +0,0 @@ -// Licensed to Elasticsearch B.V under one or more agreements. -// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. -// See the LICENSE file in the project root for more information - -using Elastic.Channels; -using Elastic.Documentation.Configuration; -using Elastic.Documentation.Diagnostics; -using Elastic.Documentation.Search; -using Elastic.Documentation.Serialization; -using Elastic.Ingest.Elasticsearch.Catalog; -using Elastic.Ingest.Elasticsearch.Indices; -using Elastic.Ingest.Elasticsearch.Semantic; -using Elastic.Transport; -using Microsoft.Extensions.Logging; - -namespace Elastic.Markdown.Exporters.Elasticsearch; - -public class ElasticsearchLexicalIngestChannel( - ILoggerFactory logFactory, - IDiagnosticsCollector collector, - ElasticsearchEndpoint endpoint, - string indexNamespace, - DistributedTransport transport, - string[] indexTimeSynonyms, - string? defaultPipeline = null -) - : ElasticsearchIngestChannel, CatalogIndexChannel> - (logFactory, collector, endpoint, transport, o => new(o), t => new(t) - { - BulkOperationIdLookup = d => d.Url, - // hash, last_updated and batch_index_date are all set before the docs are written to the channel - ScriptedHashBulkUpsertLookup = (d, _) => new HashedBulkUpdate("hash", d.Hash, "ctx._source.batch_index_date = params.batch_index_date", - new Dictionary - { - { "batch_index_date", d.BatchIndexDate.ToString("o") } - }), - GetMapping = () => CreateMapping(null), - GetMappingSettings = () => CreateMappingSetting($"docs-{indexNamespace}", indexTimeSynonyms, defaultPipeline), - IndexFormat = - $"{endpoint.IndexNamePrefix.Replace("semantic", "lexical").ToLowerInvariant()}-{indexNamespace.ToLowerInvariant()}-{{0:yyyy.MM.dd.HHmmss}}", - ActiveSearchAlias = $"{endpoint.IndexNamePrefix.Replace("semantic", "lexical").ToLowerInvariant()}-{indexNamespace.ToLowerInvariant()}" - }); - -public class ElasticsearchSemanticIngestChannel( - ILoggerFactory logFactory, - IDiagnosticsCollector collector, - ElasticsearchEndpoint endpoint, - string indexNamespace, - DistributedTransport transport, - string[] indexTimeSynonyms, - string? defaultPipeline = null -) - : ElasticsearchIngestChannel, SemanticIndexChannel> - (logFactory, collector, endpoint, transport, o => new(o), t => new(t) - { - BulkOperationIdLookup = d => d.Url, - GetMapping = (inferenceId, _) => CreateMapping(inferenceId), - GetMappingSettings = (_, _) => CreateMappingSetting($"docs-{indexNamespace}", indexTimeSynonyms, defaultPipeline), - IndexFormat = $"{endpoint.IndexNamePrefix.ToLowerInvariant()}-{indexNamespace.ToLowerInvariant()}-{{0:yyyy.MM.dd.HHmmss}}", - ActiveSearchAlias = $"{endpoint.IndexNamePrefix}-{indexNamespace.ToLowerInvariant()}", - IndexNumThreads = endpoint.IndexNumThreads, - SearchNumThreads = endpoint.SearchNumThreads, - InferenceCreateTimeout = TimeSpan.FromMinutes(endpoint.BootstrapTimeout ?? 4), - UsePreexistingInferenceIds = !endpoint.NoElasticInferenceService, - InferenceId = endpoint.NoElasticInferenceService ? null : ".elser-2-elastic", - SearchInferenceId = endpoint.NoElasticInferenceService ? null : ".elser-2-elastic" - }); - -public abstract partial class ElasticsearchIngestChannel : IDisposable - where TChannelOptions : CatalogIndexChannelOptionsBase - where TChannel : CatalogIndexChannel -{ - private readonly IDiagnosticsCollector _collector; - public TChannel Channel { get; } - private readonly ILogger _logger; - - protected ElasticsearchIngestChannel( - ILoggerFactory logFactory, - IDiagnosticsCollector collector, - ElasticsearchEndpoint endpoint, - DistributedTransport transport, - Func createChannel, - Func createOptions - ) - { - _collector = collector; - _logger = logFactory.CreateLogger>(); - //The max num threads per allocated node, from testing its best to limit our max concurrency - //producing to this number as well - var options = createOptions(transport); - var i = 0; - options.BufferOptions = new BufferOptions - { - OutboundBufferMaxSize = endpoint.BufferSize, - ExportMaxConcurrency = endpoint.IndexNumThreads, - ExportMaxRetries = endpoint.MaxRetries - }; - options.SerializerContext = SourceGenerationContext.Default; - options.ExportBufferCallback = () => - { - var count = Interlocked.Increment(ref i); - _logger.LogInformation("Exported {Count} documents to Elasticsearch index {IndexName}", - count * endpoint.BufferSize, Channel?.IndexName ?? string.Format(options.IndexFormat, "latest")); - }; - options.ExportExceptionCallback = e => - { - _logger.LogError(e, "Failed to export document"); - _collector.EmitGlobalError("Elasticsearch export: failed to export document", e); - }; - options.ServerRejectionCallback = items => - { - foreach (var (doc, responseItem) in items) - { - _collector.EmitGlobalError( - $"Server rejection: {responseItem.Status} {responseItem.Error?.Type} {responseItem.Error?.Reason} for document {doc.Url}"); - } - }; - Channel = createChannel(options); - _logger.LogInformation("Created {Channel} Elasticsearch target for indexing", typeof(TChannel).Name); - } - - public async ValueTask StopAsync(Cancel ctx = default) - { - _logger.LogInformation("Waiting to drain all inflight exports to Elasticsearch"); - var drained = await Channel.WaitForDrainAsync(null, ctx); - if (!drained) - _collector.EmitGlobalError("Elasticsearch export: failed to complete indexing in a timely fashion while shutting down"); - - _logger.LogInformation("Refreshing target index {Index}", Channel.IndexName); - var refreshed = await Channel.RefreshAsync(ctx); - if (!refreshed) - _collector.EmitGlobalError($"Refreshing target index {Channel.IndexName} did not complete successfully"); - - _logger.LogInformation("Applying aliases to {Index}", Channel.IndexName); - var swapped = await Channel.ApplyAliasesAsync(ctx); - if (!swapped) - _collector.EmitGlobalError($"${nameof(ElasticsearchMarkdownExporter)} failed to apply aliases to index {Channel.IndexName}"); - - return drained && refreshed && swapped; - } - - public async ValueTask RefreshAsync(Cancel ctx = default) => await Channel.RefreshAsync(ctx); - - public async ValueTask TryWrite(DocumentationDocument document, Cancel ctx = default) - { - if (Channel.TryWrite(document)) - return true; - - if (await Channel.WaitToWriteAsync(ctx)) - return Channel.TryWrite(document); - return false; - } - - public void Dispose() - { - Channel.Complete(); - Channel.Dispose(); - - GC.SuppressFinalize(this); - } -} diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.Export.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.Export.cs index 00f4d65a0..a4c2172d8 100644 --- a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.Export.cs +++ b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.Export.cs @@ -27,8 +27,8 @@ public partial class ElasticsearchMarkdownExporter /// private void AssignDocumentMetadata(DocumentationDocument doc) { - var semanticHash = _semanticChannel.Channel.ChannelHash; - var lexicalHash = _lexicalChannel.Channel.ChannelHash; + var semanticHash = _semanticTypeContext?.Hash ?? string.Empty; + var lexicalHash = _lexicalTypeContext.Hash; var hash = HashedBulkUpdate.CreateHash(semanticHash, lexicalHash, doc.Url, doc.Type, doc.StrippedBody ?? string.Empty, string.Join(",", doc.Headings.OrderBy(h => h)), doc.SearchTitle ?? string.Empty, @@ -165,9 +165,7 @@ public async ValueTask ExportAsync(MarkdownExportFileContext fileContext, AssignDocumentMetadata(doc); - if (_indexStrategy == IngestStrategy.Multiplex) - return await _lexicalChannel.TryWrite(doc, ctx) && await _semanticChannel.TryWrite(doc, ctx); - return await _lexicalChannel.TryWrite(doc, ctx); + return await WriteDocumentAsync(doc, ctx); } /// @@ -209,22 +207,10 @@ public async ValueTask FinishExportAsync(IDirectoryInfo outputFolder, Canc AssignDocumentMetadata(doc); - // Write to channels following the multiplex or reindex strategy - if (_indexStrategy == IngestStrategy.Multiplex) + if (!await WriteDocumentAsync(doc, ctx)) { - if (!await _lexicalChannel.TryWrite(doc, ctx) || !await _semanticChannel.TryWrite(doc, ctx)) - { - _logger.LogError("Failed to write OpenAPI document {Url}", doc.Url); - return false; - } - } - else - { - if (!await _lexicalChannel.TryWrite(doc, ctx)) - { - _logger.LogError("Failed to write OpenAPI document {Url}", doc.Url); - return false; - } + _logger.LogError("Failed to write OpenAPI document {Url}", doc.Url); + return false; } } diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs index 5220bfe39..84c0e5c75 100644 --- a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs +++ b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs @@ -4,38 +4,42 @@ using System.Text.Json; using System.Text.Json.Serialization; +using Elastic.Channels; using Elastic.Documentation.Configuration; using Elastic.Documentation.Configuration.Search; using Elastic.Documentation.Configuration.Versions; using Elastic.Documentation.Diagnostics; +using Elastic.Documentation.Search; +using Elastic.Documentation.Serialization; using Elastic.Ingest.Elasticsearch; using Elastic.Ingest.Elasticsearch.Indices; +using Elastic.Mapping; using Elastic.Markdown.Exporters.Elasticsearch.Enrichment; using Elastic.Transport; using Microsoft.Extensions.Logging; -using NetEscapades.EnumGenerators; namespace Elastic.Markdown.Exporters.Elasticsearch; -[EnumExtensions] -public enum IngestStrategy { Reindex, Multiplex } - public partial class ElasticsearchMarkdownExporter : IMarkdownExporter, IDisposable { private readonly IDiagnosticsCollector _collector; private readonly IDocumentationConfigurationContext _context; private readonly ILogger _logger; - private readonly ElasticsearchLexicalIngestChannel _lexicalChannel; - private readonly ElasticsearchSemanticIngestChannel _semanticChannel; - private readonly ElasticsearchEndpoint _endpoint; - - private readonly DateTimeOffset _batchIndexDate = DateTimeOffset.UtcNow; private readonly DistributedTransport _transport; - private IngestStrategy _indexStrategy; private readonly string _indexNamespace; - private string _currentLexicalHash = string.Empty; - private string _currentSemanticHash = string.Empty; + private readonly DateTimeOffset _batchIndexDate; + + // Ingest: orchestrator for dual-index mode, plain channel for --no-semantic + private readonly IncrementalSyncOrchestrator? _orchestrator; + private readonly IngestChannel? _lexicalOnlyChannel; + + // Type context hashes for document content hash computation + private readonly ElasticsearchTypeContext _lexicalTypeContext; + private readonly ElasticsearchTypeContext? _semanticTypeContext; + + // Alias names for queries/statistics + private readonly string _lexicalAlias; private readonly IReadOnlyDictionary _synonyms; private readonly IReadOnlyCollection _rules; @@ -65,7 +69,6 @@ IDocumentationConfigurationContext context _context = context; _logger = logFactory.CreateLogger(); _endpoint = endpoints.Elasticsearch; - _indexStrategy = IngestStrategy.Reindex; _indexNamespace = indexNamespace; _versionsConfiguration = context.VersionsConfiguration; _synonyms = context.SearchConfiguration.Synonyms; @@ -73,6 +76,7 @@ IDocumentationConfigurationContext context var es = endpoints.Elasticsearch; _transport = ElasticsearchTransportFactory.Create(es); + _operations = new ElasticsearchOperations(_transport, _logger, collector); string[] fixedSynonyms = ["esql", "data-stream", "data-streams", "machine-learning"]; var indexTimeSynonyms = _synonyms.Aggregate(new List(), (acc, synonym) => @@ -83,15 +87,16 @@ IDocumentationConfigurationContext context }).Where(r => fixedSynonyms.Contains(r.Id)).Select(r => r.Synonyms).ToArray(); _fixedSynonymsHash = HashedBulkUpdate.CreateHash(string.Join(",", indexTimeSynonyms)); - // Use AI enrichment pipeline if enabled - hybrid approach: - // - Cache hits: enrich processor applies fields at index time - // - Cache misses: apply fields inline before indexing var aiPipeline = es.EnableAiEnrichment ? EnrichPolicyManager.PipelineName : null; - _lexicalChannel = new ElasticsearchLexicalIngestChannel(logFactory, collector, es, indexNamespace, _transport, indexTimeSynonyms, aiPipeline); - _semanticChannel = new ElasticsearchSemanticIngestChannel(logFactory, collector, es, indexNamespace, _transport, indexTimeSynonyms, aiPipeline); + var synonymSetName = $"docs-{indexNamespace}"; + var ns = indexNamespace.ToLowerInvariant(); + var lexicalPrefix = es.IndexNamePrefix.Replace("semantic", "lexical").ToLowerInvariant(); + _lexicalAlias = $"{lexicalPrefix}-{ns}"; - // Initialize shared ES operations - _operations = new ElasticsearchOperations(_transport, _logger, collector); + _lexicalTypeContext = DocumentationAnalysisFactory.CreateContext( + DocumentationMappingContext.DocumentationDocument.Context, + _lexicalAlias, synonymSetName, indexTimeSynonyms, aiPipeline + ); // Initialize AI enrichment services if enabled if (es.EnableAiEnrichment) @@ -100,76 +105,177 @@ IDocumentationConfigurationContext context _llmClient = new ElasticsearchLlmClient(_transport, logFactory.CreateLogger(), _operations); _enrichPolicyManager = new EnrichPolicyManager(_transport, logFactory.CreateLogger(), _enrichmentCache.IndexName); } + + if (!es.NoSemantic) + { + var semanticAlias = $"{es.IndexNamePrefix.ToLowerInvariant()}-{ns}"; + _semanticTypeContext = DocumentationAnalysisFactory.CreateContext( + DocumentationMappingContext.DocumentationDocumentSemantic.Context, + semanticAlias, synonymSetName, indexTimeSynonyms, aiPipeline + ); + + _orchestrator = new IncrementalSyncOrchestrator(_transport, _lexicalTypeContext, _semanticTypeContext) + { + ConfigurePrimary = ConfigureChannelOptions, + ConfigureSecondary = ConfigureChannelOptions, + OnPostComplete = es.EnableAiEnrichment + ? async (ctx, ct) => await PostCompleteAsync(ctx, ct) + : null + }; + _ = _orchestrator.AddPreBootstrapTask(async (_, ct) => + { + await InitializeEnrichmentAsync(ct); + await PublishSynonymsAsync(ct); + await PublishQueryRulesAsync(ct); + }); + + _batchIndexDate = _orchestrator.BatchTimestamp; + } + else + { + _batchIndexDate = DateTimeOffset.UtcNow; + var options = new IngestChannelOptions(_transport, _lexicalTypeContext); + ConfigureChannelOptions(options); + _lexicalOnlyChannel = new IngestChannel(options); + } + } + + private void ConfigureChannelOptions(IngestChannelOptions options) + { + options.BufferOptions = new BufferOptions + { + OutboundBufferMaxSize = _endpoint.BufferSize, + ExportMaxConcurrency = _endpoint.IndexNumThreads, + ExportMaxRetries = _endpoint.MaxRetries + }; + options.SerializerContext = SourceGenerationContext.Default; + options.ExportExceptionCallback = e => + { + _logger.LogError(e, "Failed to export document"); + _collector.EmitGlobalError("Elasticsearch export: failed to export document", e); + }; + options.ServerRejectionCallback = items => + { + foreach (var (doc, responseItem) in items) + { + _collector.EmitGlobalError( + $"Server rejection: {responseItem.Status} {responseItem.Error?.Type} {responseItem.Error?.Reason} for document {doc.Url}"); + } + }; } /// public async ValueTask StartAsync(Cancel ctx = default) { - // Initialize AI enrichment cache (pre-loads existing hashes into memory) - if (_enrichmentCache is not null && _enrichPolicyManager is not null) + if (_orchestrator is not null) { - _logger.LogInformation("Initializing AI enrichment cache..."); - await _enrichmentCache.InitializeAsync(ctx); - _logger.LogInformation("AI enrichment cache ready with {Count} existing entries", _enrichmentCache.Count); - - // The enrich pipeline must exist before indexing (used as default_pipeline). - // The pipeline's enrich processor requires the .enrich-* index to exist, - // which is created by executing the policy. We execute even with an empty - // cache index - it just creates an empty enrich index that returns no matches. - _logger.LogInformation("Setting up enrich policy and pipeline..."); - await _enrichPolicyManager.ExecutePolicyAsync(ctx); - await _enrichPolicyManager.EnsurePipelineExistsAsync(ctx); + _ = await _orchestrator.StartAsync(BootstrapMethod.Failure, ctx); + _logger.LogInformation("Orchestrator started with {Strategy} strategy", _orchestrator.Strategy); + return; } - _currentLexicalHash = await _lexicalChannel.Channel.GetIndexTemplateHashAsync(ctx) ?? string.Empty; - _currentSemanticHash = await _semanticChannel.Channel.GetIndexTemplateHashAsync(ctx) ?? string.Empty; - + // NoSemantic path + await InitializeEnrichmentAsync(ctx); await PublishSynonymsAsync(ctx); await PublishQueryRulesAsync(ctx); - _ = await _lexicalChannel.Channel.BootstrapElasticsearchAsync(BootstrapMethod.Failure, null, ctx); - - // if the previous hash does not match the current hash, we know already we want to multiplex to a new index - if (_currentLexicalHash != _lexicalChannel.Channel.ChannelHash) - _indexStrategy = IngestStrategy.Multiplex; + _ = await _lexicalOnlyChannel!.BootstrapElasticsearchAsync(BootstrapMethod.Failure, ctx); + } - if (!_endpoint.NoSemantic) + /// + public async ValueTask StopAsync(Cancel ctx = default) + { + if (_orchestrator is not null) { - var semanticWriteAlias = string.Format(_semanticChannel.Channel.Options.IndexFormat, "latest"); - var semanticIndexAvailable = await _transport.HeadAsync(semanticWriteAlias, ctx); - if (!semanticIndexAvailable.ApiCallDetails.HasSuccessfulStatusCode && _endpoint is { ForceReindex: false, NoSemantic: false }) + _ = await _orchestrator.CompleteAsync(null, ctx); + return; + } + + // NoSemantic path — drain, delete stale, refresh, alias + var drained = await _lexicalOnlyChannel!.WaitForDrainAsync(null, ctx); + if (!drained) + _collector.EmitGlobalError("Elasticsearch export: failed to drain in a timely fashion"); + + // Delete stale documents not part of this batch + var deleteQuery = PostData.String($$""" { - _indexStrategy = IngestStrategy.Multiplex; - _logger.LogInformation("Index strategy set to multiplex because {SemanticIndex} does not exist, pass --force-reindex to always use reindex", semanticWriteAlias); + "query": { + "range": { + "batch_index_date": { + "lt": "{{_batchIndexDate:o}}" + } + } + } } + """); + await _operations.DeleteByQueryAsync(_lexicalAlias, deleteQuery, ctx); - //try re-use index if we are re-indexing. Multiplex should always go to a new index - _semanticChannel.Channel.Options.TryReuseIndex = _indexStrategy == IngestStrategy.Reindex; - _ = await _semanticChannel.Channel.BootstrapElasticsearchAsync(BootstrapMethod.Failure, null, ctx); - } + _ = await _lexicalOnlyChannel.RefreshAsync(ctx); + _ = await _lexicalOnlyChannel.ApplyAliasesAsync(_lexicalAlias, ctx); + } - var lexicalIndexExists = await IndexExists(_lexicalChannel.Channel.IndexName) ? "existing" : "new"; - var semanticIndexExists = await IndexExists(_semanticChannel.Channel.IndexName) ? "existing" : "new"; - if (_currentLexicalHash != _lexicalChannel.Channel.ChannelHash) - { - _indexStrategy = IngestStrategy.Multiplex; - _logger.LogInformation("Multiplexing lexical new index: '{Index}' since current hash on server '{HashCurrent}' does not match new '{HashNew}'", - _lexicalChannel.Channel.IndexName, _currentLexicalHash, _lexicalChannel.Channel.ChannelHash); - } - else - _logger.LogInformation("Targeting {State} lexical: '{Index}'", lexicalIndexExists, _lexicalChannel.Channel.IndexName); + private async Task InitializeEnrichmentAsync(Cancel ctx) + { + if (_enrichmentCache is null || _enrichPolicyManager is null) + return; + + _logger.LogInformation("Initializing AI enrichment cache..."); + await _enrichmentCache.InitializeAsync(ctx); + _logger.LogInformation("AI enrichment cache ready with {Count} existing entries", _enrichmentCache.Count); + + _logger.LogInformation("Setting up enrich policy and pipeline..."); + await _enrichPolicyManager.ExecutePolicyAsync(ctx); + await _enrichPolicyManager.EnsurePipelineExistsAsync(ctx); + } + + private async Task PostCompleteAsync(OrchestratorContext context, Cancel ctx) => + await ExecuteEnrichPolicyIfNeededAsync(context.SecondaryWriteAlias, ctx); + + private async ValueTask ExecuteEnrichPolicyIfNeededAsync(string? semanticAlias, Cancel ctx) + { + if (_enrichmentCache is null || _enrichPolicyManager is null) + return; - if (!_endpoint.NoSemantic && _currentSemanticHash != _semanticChannel.Channel.ChannelHash) + _logger.LogInformation( + "AI enrichment complete: {CacheHits} cache hits, {Enrichments} enrichments generated (limit: {Limit})", + _cacheHitCount, _enrichmentCount, _enrichmentOptions.MaxNewEnrichmentsPerRun); + + if (_enrichmentCache.Count > 0) { - _indexStrategy = IngestStrategy.Multiplex; - _logger.LogInformation("Multiplexing new index '{Index}' since current hash on server '{HashCurrent}' does not match new '{HashNew}'", - _semanticChannel.Channel.IndexName, _currentSemanticHash, _semanticChannel.Channel.ChannelHash); + _logger.LogInformation("Executing enrich policy to update internal index with {Count} total entries...", _enrichmentCache.Count); + await _enrichPolicyManager.ExecutePolicyAsync(ctx); + + if (semanticAlias is not null) + await BackfillMissingAiFieldsAsync(semanticAlias, ctx); } - else if (!_endpoint.NoSemantic) - _logger.LogInformation("Targeting {State} semantical: '{Index}'", semanticIndexExists, _semanticChannel.Channel.IndexName); + } + + private async ValueTask BackfillMissingAiFieldsAsync(string semanticAlias, Cancel ctx) + { + if (_endpoint.NoSemantic || _enrichmentCache is null || _llmClient is null) + return; + + var currentPromptHash = ElasticsearchLlmClient.PromptHash; - _logger.LogInformation("Using {IndexStrategy} to sync lexical index to semantic index", _indexStrategy.ToStringFast(true)); + _logger.LogInformation( + "Starting AI backfill for documents missing or stale AI fields (cache has {CacheCount} entries, prompt hash: {PromptHash})", + _enrichmentCache.Count, currentPromptHash[..8]); - async ValueTask IndexExists(string name) => (await _transport.HeadAsync(name, ctx)).ApiCallDetails.HasSuccessfulStatusCode; + var query = $$""" + { + "query": { + "bool": { + "must": { "exists": { "field": "enrichment_key" } }, + "should": [ + { "bool": { "must_not": { "exists": { "field": "ai_questions" } } } }, + { "bool": { "must_not": { "term": { "enrichment_prompt_hash": "{{currentPromptHash}}" } } } } + ], + "minimum_should_match": 1 + } + } + } + """; + + await _operations.UpdateByQueryAsync(semanticAlias, PostData.String(query), EnrichPolicyManager.PipelineName, ctx); } private async Task PublishSynonymsAsync(Cancel ctx) @@ -246,236 +352,28 @@ private async Task PutQueryRuleset(QueryRuleset ruleset, string rulesetName, Can _logger.LogInformation("Successfully published query ruleset '{RulesetName}'.", rulesetName); } - private async ValueTask CountAsync(string index, string body, Cancel ctx = default) + internal async ValueTask WriteDocumentAsync(DocumentationDocument doc, Cancel ctx) { - var countResponse = await _operations.WithRetryAsync( - () => _transport.PostAsync($"/{index}/_count", PostData.String(body), ctx), - $"POST {index}/_count", - ctx); - return countResponse.Body.Get("count"); - } - - /// - public async ValueTask StopAsync(Cancel ctx = default) - { - var semanticWriteAlias = string.Format(_semanticChannel.Channel.Options.IndexFormat, "latest"); - var lexicalWriteAlias = string.Format(_lexicalChannel.Channel.Options.IndexFormat, "latest"); - - var stopped = await _lexicalChannel.StopAsync(ctx); - if (!stopped) - throw new Exception($"Failed to stop {_lexicalChannel.GetType().Name}"); - - await QueryIngestStatistics(lexicalWriteAlias, ctx); - - if (_indexStrategy == IngestStrategy.Multiplex) + if (_orchestrator is not null) { - if (!_endpoint.NoSemantic) - _ = await _semanticChannel.StopAsync(ctx); - - // cleanup lexical index of old data - await DoDeleteByQuery(lexicalWriteAlias, ctx); - // need to refresh the lexical index to ensure that the delete by query is available - _ = await _lexicalChannel.RefreshAsync(ctx); - await QueryDocumentCounts(ctx); - // ReSharper disable once ConvertIfStatementToConditionalTernaryExpression - if (_endpoint.NoSemantic) - _logger.LogInformation("Finish indexing {IndexStrategy} strategy", _indexStrategy.ToStringFast(true)); - else - _logger.LogInformation("Finish syncing to semantic in {IndexStrategy} strategy", _indexStrategy.ToStringFast(true)); - return; + if (_orchestrator.TryWrite(doc)) + return true; + _ = await _orchestrator.WaitToWriteAsync(doc, ctx); + return true; } - if (_endpoint.NoSemantic) - { - _logger.LogInformation("--no-semantic was specified so exiting early before reindexing to {Index}", lexicalWriteAlias); - return; - } - - var semanticIndex = _semanticChannel.Channel.IndexName; - // check if the alias exists - var semanticIndexHead = await _transport.HeadAsync(semanticWriteAlias, ctx); - if (!semanticIndexHead.ApiCallDetails.HasSuccessfulStatusCode) - { - _logger.LogInformation("No semantic index exists yet, creating index {Index} for semantic search", semanticIndex); - _ = await _semanticChannel.Channel.BootstrapElasticsearchAsync(BootstrapMethod.Failure, null, ctx); - var semanticIndexPut = await _transport.PutAsync(semanticIndex, PostData.String("{}"), ctx); - if (!semanticIndexPut.ApiCallDetails.HasSuccessfulStatusCode) - throw new Exception($"Failed to create index {semanticIndex}: {semanticIndexPut}"); - } - var destinationIndex = _semanticChannel.Channel.IndexName; - - _logger.LogInformation("_reindex updates: '{SourceIndex}' => '{DestinationIndex}'", lexicalWriteAlias, destinationIndex); - var request = PostData.String(@" - { - ""dest"": { - ""index"": """ + destinationIndex + @""" - }, - ""source"": { - ""index"": """ + lexicalWriteAlias + @""", - ""size"": 100, - ""query"": { - ""range"": { - ""last_updated"": { - ""gte"": """ + _batchIndexDate.ToString("o") + @""" - } - } - } - } - }"); - await DoReindex(request, lexicalWriteAlias, destinationIndex, "updates", ctx); - - _logger.LogInformation("_reindex deletions: '{SourceIndex}' => '{DestinationIndex}'", lexicalWriteAlias, destinationIndex); - request = PostData.String(@" - { - ""dest"": { - ""index"": """ + destinationIndex + @""" - }, - ""script"": { - ""source"": ""ctx.op = \""delete\"""" - }, - ""source"": { - ""index"": """ + lexicalWriteAlias + @""", - ""size"": 100, - ""query"": { - ""range"": { - ""batch_index_date"": { - ""lt"": """ + _batchIndexDate.ToString("o") + @""" - } - } - } - } - }"); - await DoReindex(request, lexicalWriteAlias, destinationIndex, "deletions", ctx); - - await DoDeleteByQuery(lexicalWriteAlias, ctx); - - _ = await _lexicalChannel.Channel.ApplyLatestAliasAsync(ctx); - _ = await _semanticChannel.Channel.ApplyAliasesAsync(ctx); - - _ = await _lexicalChannel.RefreshAsync(ctx); - _ = await _semanticChannel.RefreshAsync(ctx); - - _logger.LogInformation("Finish sync to semantic index using {IndexStrategy} strategy", _indexStrategy.ToStringFast(true)); - await QueryDocumentCounts(ctx); - - // Execute enrich policy so new cache entries are available for next run - await ExecuteEnrichPolicyIfNeededAsync(ctx); + if (_lexicalOnlyChannel!.TryWrite(doc)) + return true; + if (await _lexicalOnlyChannel.WaitToWriteAsync(ctx)) + return _lexicalOnlyChannel.TryWrite(doc); + return false; } - private async ValueTask ExecuteEnrichPolicyIfNeededAsync(Cancel ctx) - { - if (_enrichmentCache is null || _enrichPolicyManager is null) - return; - - _logger.LogInformation( - "AI enrichment complete: {CacheHits} cache hits, {Enrichments} enrichments generated (limit: {Limit})", - _cacheHitCount, _enrichmentCount, _enrichmentOptions.MaxNewEnrichmentsPerRun); - - if (_enrichmentCache.Count > 0) - { - _logger.LogInformation("Executing enrich policy to update internal index with {Count} total entries...", _enrichmentCache.Count); - await _enrichPolicyManager.ExecutePolicyAsync(ctx); - - // Backfill: Apply AI fields to documents that were skipped by hash-based upsert - await BackfillMissingAiFieldsAsync(ctx); - } - } - - private async ValueTask BackfillMissingAiFieldsAsync(Cancel ctx) - { - // Why backfill is needed: - // The exporter uses hash-based upsert - unchanged documents are skipped during indexing. - // These skipped documents never pass through the ingest pipeline, so they miss AI fields. - // This backfill runs _update_by_query with the AI pipeline to enrich those documents. - // - // Additionally, when prompts change, existing documents have stale AI fields. - // We detect this by checking if the document's prompt_hash differs from the current one. - // - // Only backfill the semantic index - it's what the search API uses. - // The lexical index is just an intermediate step for reindexing. - if (_endpoint.NoSemantic || _enrichmentCache is null || _llmClient is null) - return; - - var semanticAlias = _semanticChannel.Channel.Options.ActiveSearchAlias; - var currentPromptHash = ElasticsearchLlmClient.PromptHash; - - _logger.LogInformation( - "Starting AI backfill for documents missing or stale AI fields (cache has {CacheCount} entries, prompt hash: {PromptHash})", - _enrichmentCache.Count, currentPromptHash[..8]); - - // Find documents with enrichment_key that either: - // 1. Missing AI fields (never enriched), OR - // 2. Have stale/missing enrichment_prompt_hash (enriched with old prompts) - var query = $$""" - { - "query": { - "bool": { - "must": { "exists": { "field": "enrichment_key" } }, - "should": [ - { "bool": { "must_not": { "exists": { "field": "ai_questions" } } } }, - { "bool": { "must_not": { "term": { "enrichment_prompt_hash": "{{currentPromptHash}}" } } } } - ], - "minimum_should_match": 1 - } - } - } - """; - - await RunBackfillQuery(semanticAlias, query, ctx); - } - - private async ValueTask RunBackfillQuery(string indexAlias, string query, Cancel ctx) => - await _operations.UpdateByQueryAsync(indexAlias, PostData.String(query), EnrichPolicyManager.PipelineName, ctx); - - private async ValueTask QueryIngestStatistics(string lexicalWriteAlias, Cancel ctx) - { - var lexicalSearchAlias = _lexicalChannel.Channel.Options.ActiveSearchAlias; - var updated = await CountAsync(lexicalSearchAlias, $$""" { "query": { "range": { "last_updated": { "gte": "{{_batchIndexDate:o}}" } } } }""", ctx); - var total = await CountAsync(lexicalSearchAlias, $$""" { "query": { "range": { "batch_index_date": { "gte": "{{_batchIndexDate:o}}" } } } }""", ctx); - var deleted = await CountAsync(lexicalSearchAlias, $$""" { "query": { "range": { "batch_index_date": { "lt": "{{_batchIndexDate:o}}" } } } }""", ctx); - - // TODO emit these as metrics - _logger.LogInformation("Exported {Total}, Updated {Updated}, Deleted, {Deleted} documents to {LexicalIndex}", total, updated, deleted, lexicalWriteAlias); - _logger.LogInformation("Syncing to semantic index using {IndexStrategy} strategy", _indexStrategy.ToStringFast(true)); - } - - private async ValueTask QueryDocumentCounts(Cancel ctx) - { - var semanticWriteAlias = string.Format(_semanticChannel.Channel.Options.IndexFormat, "latest"); - var lexicalWriteAlias = string.Format(_lexicalChannel.Channel.Options.IndexFormat, "latest"); - var totalLexical = await CountAsync(lexicalWriteAlias, "{}", ctx); - var totalSemantic = await CountAsync(semanticWriteAlias, "{}", ctx); - - // TODO emit these as metrics - _logger.LogInformation("Document counts -> Semantic Index: {TotalSemantic}, Lexical Index: {TotalLexical}", totalSemantic, totalLexical); - } - - private async ValueTask DoDeleteByQuery(string lexicalWriteAlias, Cancel ctx) - { - // delete all documents with batch_index_date < _batchIndexDate - // they weren't part of the current export - _logger.LogInformation("Delete data in '{SourceIndex}' not part of batch date: {Date}", lexicalWriteAlias, _batchIndexDate.ToString("o")); - var query = PostData.String(@" - { - ""query"": { - ""range"": { - ""batch_index_date"": { - ""lt"": """ + _batchIndexDate.ToString("o") + @""" - } - } - } - }"); - await _operations.DeleteByQueryAsync(lexicalWriteAlias, query, ctx); - } - - private async ValueTask DoReindex(PostData request, string lexicalWriteAlias, string semanticWriteAlias, string typeOfSync, Cancel ctx) => - await _operations.ReindexAsync(request, lexicalWriteAlias, semanticWriteAlias, typeOfSync, ctx); - /// public void Dispose() { - _lexicalChannel.Dispose(); - _semanticChannel.Dispose(); + _orchestrator?.Dispose(); + _lexicalOnlyChannel?.Dispose(); _llmClient?.Dispose(); GC.SuppressFinalize(this); } diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchOperations.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchOperations.cs index 4f94ae14a..3a3952406 100644 --- a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchOperations.cs +++ b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchOperations.cs @@ -161,22 +161,6 @@ public async Task DeleteByQueryAsync( await PollTaskUntilCompleteAsync(taskId, "_delete_by_query", index, null, ct); } - /// - /// Executes a reindex operation and waits for completion. - /// - public async Task ReindexAsync( - PostData request, - string sourceIndex, - string destIndex, - string operationType, - CancellationToken ct) - { - var url = "/_reindex?wait_for_completion=false&scroll=10m"; - var taskId = await PostAsyncTaskAsync(url, request, $"POST _reindex ({operationType})", ct); - if (taskId is not null) - await PollTaskUntilCompleteAsync(taskId, $"_reindex {operationType}", sourceIndex, destIndex, ct); - } - /// /// Executes an update_by_query operation and waits for completion. /// diff --git a/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchBootstrapFixture.cs b/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchBootstrapFixture.cs index 18b096b6a..9e696c984 100644 --- a/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchBootstrapFixture.cs +++ b/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchBootstrapFixture.cs @@ -7,7 +7,10 @@ using Documentation.Builder.Diagnostics.Console; using Elastic.Documentation.Aspire; using Elastic.Documentation.Configuration; +using Elastic.Documentation.Search; using Elastic.Ingest.Elasticsearch; +using Elastic.Ingest.Elasticsearch.Indices; +using Elastic.Mapping; using Elastic.Markdown.Exporters.Elasticsearch; using Elastic.Transport; using Elastic.Transport.Products.Elasticsearch; @@ -175,22 +178,23 @@ private async ValueTask IsIndexingNeeded() var loggerFactory = fixture.DistributedApplication.Services.GetRequiredService(); var collector = new ConsoleDiagnosticsCollector(loggerFactory); - // Create semantic exporter to check channel hash (index namespace is 'dev' for tests) - using var semanticExporter = new ElasticsearchSemanticIngestChannel( - loggerFactory, - collector, - endpoint, - "dev", // index namespace - transport, + // Create semantic type context to check channel hash (index namespace is 'dev' for tests) + var semanticTypeContext = DocumentationAnalysisFactory.CreateContext( + DocumentationMappingContext.DocumentationDocumentSemantic.Context, + $"{endpoint.IndexNamePrefix.ToLowerInvariant()}-dev", + "docs-dev", [] ); + var options = new IngestChannelOptions(transport, semanticTypeContext); + using var channel = new IngestChannel(options); + // Get the current hash from Elasticsearch index template - var currentSemanticHash = await semanticExporter.Channel.GetIndexTemplateHashAsync(TestContext.Current.CancellationToken) ?? string.Empty; + var currentSemanticHash = await channel.GetIndexTemplateHashAsync(TestContext.Current.CancellationToken) ?? string.Empty; - // Get the expected channel hash from the semantic exporter - await semanticExporter.Channel.BootstrapElasticsearchAsync(BootstrapMethod.Silent, ctx: TestContext.Current.CancellationToken); - var expectedSemanticHash = semanticExporter.Channel.ChannelHash; + // Get the expected channel hash + _ = await channel.BootstrapElasticsearchAsync(BootstrapMethod.Silent, TestContext.Current.CancellationToken); + var expectedSemanticHash = channel.ChannelHash; Console.WriteLine($"Elasticsearch semantic hash: '{currentSemanticHash}'"); Console.WriteLine($"Expected semantic hash: '{expectedSemanticHash}'"); From c9e0a1e3a58fb21406c0dddd09432e48a8492451 Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Wed, 18 Feb 2026 11:18:01 +0100 Subject: [PATCH 02/14] Centralize Elasticsearch configuration into DocumentationEndpoints Replaces `ElasticsearchOptions` with `DocumentationEndpoints` as the single source of truth for Elasticsearch configuration across all API apps, MCP server, and integration tests. - Adds `IndexName` property to `ElasticsearchEndpoint` with a field-backed getter defaulting to `{IndexNamePrefix}-dev-latest`. - Creates `ElasticsearchEndpointFactory` in `ServiceDefaults` to centralize user-secrets and environment variable reading, eliminating the duplicated `72f50f33` secrets ID pattern. - Registers `DocumentationEndpoints` as a singleton in `AddDocumentationServiceDefaults`. - Updates `ElasticsearchClientAccessor` to accept `DocumentationEndpoints` instead of `ElasticsearchOptions`, supporting both API key and basic authentication. - Updates all gateway consumers (`NavigationSearchGateway`, `FullSearchGateway`, `DocumentGateway`, `ElasticsearchAskAiMessageFeedbackGateway`) to use endpoint properties. - Simplifies all three integration test files (`SearchRelevanceTests`, `McpToolsIntegrationTestsBase`, `SearchBootstrapFixture`) to use `ElasticsearchEndpointFactory` and `ElasticsearchTransportFactory`, removing manual config construction. - Deletes `ElasticsearchOptions.cs` and removes `Microsoft.Extensions.Configuration.UserSecrets` from the Search project. --- .../DocumentationEndpoints.cs | 6 ++ .../AppDefaultsExtensions.cs | 3 + ...astic.Documentation.ServiceDefaults.csproj | 1 + .../ElasticsearchEndpointFactory.cs | 66 +++++++++++++++++ .../Elastic.Documentation.Api.App/Program.cs | 13 ++-- ...lasticsearchAskAiMessageFeedbackGateway.cs | 15 ++-- .../Gateways/DocumentGateway.cs | 4 +- .../Program.cs | 12 ++-- .../Common/ElasticsearchClientAccessor.cs | 22 ++++-- .../Elastic.Documentation.Search.csproj | 1 - .../ElasticsearchOptions.cs | 39 ---------- .../FullSearchGateway.cs | 4 +- .../NavigationSearchGateway.cs | 6 +- .../ServicesExtension.cs | 2 - .../Search/SearchBootstrapFixture.cs | 72 ++----------------- .../Mcp.Remote.IntegrationTests.csproj | 1 + .../McpToolsIntegrationTestsBase.cs | 62 ++++------------ .../Search.IntegrationTests.csproj | 1 + .../SearchRelevanceTests.cs | 37 ++-------- 19 files changed, 150 insertions(+), 217 deletions(-) create mode 100644 src/Elastic.Documentation.ServiceDefaults/ElasticsearchEndpointFactory.cs delete mode 100644 src/services/Elastic.Documentation.Search/ElasticsearchOptions.cs diff --git a/src/Elastic.Documentation.Configuration/DocumentationEndpoints.cs b/src/Elastic.Documentation.Configuration/DocumentationEndpoints.cs index 367fe844b..2bec0b94b 100644 --- a/src/Elastic.Documentation.Configuration/DocumentationEndpoints.cs +++ b/src/Elastic.Documentation.Configuration/DocumentationEndpoints.cs @@ -28,6 +28,12 @@ public class ElasticsearchEndpoint // index options public string IndexNamePrefix { get; set; } = "semantic-docs"; + public string IndexName + { + get => field ?? $"{IndexNamePrefix}-dev-latest"; + set; + } + // channel buffer options public int BufferSize { get; set; } = 50; // Reduced for Serverless rate limits public int MaxRetries { get; set; } = 5; // Increased for 429 retries diff --git a/src/Elastic.Documentation.ServiceDefaults/AppDefaultsExtensions.cs b/src/Elastic.Documentation.ServiceDefaults/AppDefaultsExtensions.cs index eae34aeac..4b3a497eb 100644 --- a/src/Elastic.Documentation.ServiceDefaults/AppDefaultsExtensions.cs +++ b/src/Elastic.Documentation.ServiceDefaults/AppDefaultsExtensions.cs @@ -45,6 +45,9 @@ public static TBuilder AddDocumentationServiceDefaults(this TBuilder b _ = builder.Services.AddElasticDocumentationLogging(globalArgs.LogLevel, noConsole: globalArgs.IsMcp); _ = services.AddSingleton(globalArgs); + var endpoints = ElasticsearchEndpointFactory.Create(builder.Configuration); + _ = services.AddSingleton(endpoints); + return builder.AddServiceDefaults(); } diff --git a/src/Elastic.Documentation.ServiceDefaults/Elastic.Documentation.ServiceDefaults.csproj b/src/Elastic.Documentation.ServiceDefaults/Elastic.Documentation.ServiceDefaults.csproj index 4357d65ce..ef3d8edd8 100644 --- a/src/Elastic.Documentation.ServiceDefaults/Elastic.Documentation.ServiceDefaults.csproj +++ b/src/Elastic.Documentation.ServiceDefaults/Elastic.Documentation.ServiceDefaults.csproj @@ -17,6 +17,7 @@ + diff --git a/src/Elastic.Documentation.ServiceDefaults/ElasticsearchEndpointFactory.cs b/src/Elastic.Documentation.ServiceDefaults/ElasticsearchEndpointFactory.cs new file mode 100644 index 000000000..418ff21ef --- /dev/null +++ b/src/Elastic.Documentation.ServiceDefaults/ElasticsearchEndpointFactory.cs @@ -0,0 +1,66 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using Elastic.Documentation.Configuration; +using Microsoft.Extensions.Configuration; + +namespace Elastic.Documentation.ServiceDefaults; + +/// Centralizes user-secrets + env-var reading for Elasticsearch configuration. +public static class ElasticsearchEndpointFactory +{ + private const string UserSecretsId = "72f50f33-6fb9-4d08-bff3-39568fe370b3"; + + /// + /// Creates from user secrets and environment variables. + /// Returns null when no URL is available. + /// + public static DocumentationEndpoints Create(IConfiguration? appConfiguration = null) + { + var configBuilder = new ConfigurationBuilder(); + _ = configBuilder.AddUserSecrets(UserSecretsId); + _ = configBuilder.AddEnvironmentVariables(); + var config = configBuilder.Build(); + + var url = + config["Parameters:DocumentationElasticUrl"] + ?? config["DOCUMENTATION_ELASTIC_URL"]; + + var apiKey = + config["Parameters:DocumentationElasticApiKey"] + ?? config["DOCUMENTATION_ELASTIC_APIKEY"]; + + var password = + config["Parameters:DocumentationElasticPassword"] + ?? config["DOCUMENTATION_ELASTIC_PASSWORD"]; + + var username = + config["Parameters:DocumentationElasticUsername"] + ?? config["DOCUMENTATION_ELASTIC_USERNAME"] + ?? "elastic"; + + if (string.IsNullOrEmpty(url)) + { + return new DocumentationEndpoints + { + Elasticsearch = new ElasticsearchEndpoint { Uri = new Uri("http://localhost:9200") } + }; + } + + var indexName = appConfiguration?["DOCUMENTATION_ELASTIC_INDEX"]; + + var endpoint = new ElasticsearchEndpoint + { + Uri = new Uri(url), + ApiKey = apiKey, + Password = password, + Username = username + }; + + if (indexName is not null) + endpoint.IndexName = indexName; + + return new DocumentationEndpoints { Elasticsearch = endpoint }; + } +} diff --git a/src/api/Elastic.Documentation.Api.App/Program.cs b/src/api/Elastic.Documentation.Api.App/Program.cs index 2165dead6..373ae9a46 100644 --- a/src/api/Elastic.Documentation.Api.App/Program.cs +++ b/src/api/Elastic.Documentation.Api.App/Program.cs @@ -4,8 +4,8 @@ using Elastic.Documentation.Api.Infrastructure; using Elastic.Documentation.Api.Infrastructure.OpenTelemetry; +using Elastic.Documentation.Configuration; using Elastic.Documentation.Configuration.Assembler; -using Elastic.Documentation.Search; using Elastic.Documentation.ServiceDefaults; using Microsoft.AspNetCore.Diagnostics; using Microsoft.AspNetCore.Diagnostics.HealthChecks; @@ -82,17 +82,18 @@ static void LogElasticsearchConfiguration(WebApplication app, ILogger logger) { try { - var esOptions = app.Services.GetService(); - if (esOptions != null) + var endpoints = app.Services.GetService(); + if (endpoints is not null) { + var endpoint = endpoints.Elasticsearch; logger.LogInformation( "Elasticsearch configuration - Url: {Url}, Index: {Index}", - esOptions.Url, - esOptions.IndexName + endpoint.Uri, + endpoint.IndexName ); } else - logger.LogWarning("ElasticsearchOptions could not be resolved from DI"); + logger.LogWarning("DocumentationEndpoints could not be resolved from DI"); } catch (Exception ex) { diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/AskAi/ElasticsearchAskAiMessageFeedbackGateway.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/AskAi/ElasticsearchAskAiMessageFeedbackGateway.cs index 345d07ba5..99e20e5ec 100644 --- a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/AskAi/ElasticsearchAskAiMessageFeedbackGateway.cs +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/AskAi/ElasticsearchAskAiMessageFeedbackGateway.cs @@ -7,7 +7,7 @@ using Elastic.Clients.Elasticsearch.Serialization; using Elastic.Documentation.Api.Core; using Elastic.Documentation.Api.Core.AskAi; -using Elastic.Documentation.Search; +using Elastic.Documentation.Configuration; using Elastic.Transport; using Microsoft.Extensions.Logging; @@ -25,20 +25,27 @@ public sealed class ElasticsearchAskAiMessageFeedbackGateway : IAskAiMessageFeed private bool _disposed; public ElasticsearchAskAiMessageFeedbackGateway( - ElasticsearchOptions elasticsearchOptions, + DocumentationEndpoints endpoints, AppEnvironment appEnvironment, ILogger logger) { _logger = logger; _indexName = $"ask-ai-message-feedback-{appEnvironment.Current.ToStringFast(true)}"; - _nodePool = new SingleNodePool(new Uri(elasticsearchOptions.Url.Trim())); + var endpoint = endpoints.Elasticsearch; + _nodePool = new SingleNodePool(endpoint.Uri); + var auth = endpoint.ApiKey is { } apiKey + ? (AuthorizationHeader)new ApiKey(apiKey) + : endpoint is { Username: { } username, Password: { } password } + ? new BasicAuthentication(username, password) + : null!; + using var clientSettings = new ElasticsearchClientSettings( _nodePool, sourceSerializer: (_, settings) => new DefaultSourceSerializer(settings, MessageFeedbackJsonContext.Default) ) .DefaultIndex(_indexName) - .Authentication(new ApiKey(elasticsearchOptions.ApiKey)); + .Authentication(auth); _client = new ElasticsearchClient(clientSettings); } diff --git a/src/api/Elastic.Documentation.Mcp.Remote/Gateways/DocumentGateway.cs b/src/api/Elastic.Documentation.Mcp.Remote/Gateways/DocumentGateway.cs index aef0f15cb..168926126 100644 --- a/src/api/Elastic.Documentation.Mcp.Remote/Gateways/DocumentGateway.cs +++ b/src/api/Elastic.Documentation.Mcp.Remote/Gateways/DocumentGateway.cs @@ -24,7 +24,7 @@ public class DocumentGateway( try { var response = await clientAccessor.Client.SearchAsync(s => s - .Indices(clientAccessor.Options.IndexName) + .Indices(clientAccessor.Endpoint.IndexName) .Query(q => q.Term(t => t.Field(f => f.Url.Suffix("keyword")).Value(url))) .Size(1) .Source(sf => sf.Filter(f => f.Includes( @@ -101,7 +101,7 @@ public class DocumentGateway( try { var response = await clientAccessor.Client.SearchAsync(s => s - .Indices(clientAccessor.Options.IndexName) + .Indices(clientAccessor.Endpoint.IndexName) .Query(q => q.Term(t => t.Field(f => f.Url.Suffix("keyword")).Value(url))) .Size(1) .Source(sf => sf.Filter(f => f.Includes( diff --git a/src/api/Elastic.Documentation.Mcp.Remote/Program.cs b/src/api/Elastic.Documentation.Mcp.Remote/Program.cs index 42ddc27de..b9ae29394 100644 --- a/src/api/Elastic.Documentation.Mcp.Remote/Program.cs +++ b/src/api/Elastic.Documentation.Mcp.Remote/Program.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information using Elastic.Documentation.Api.Infrastructure.OpenTelemetry; +using Elastic.Documentation.Configuration; using Elastic.Documentation.Mcp.Remote.Gateways; using Elastic.Documentation.Mcp.Remote.Tools; using Elastic.Documentation.Search; @@ -82,17 +83,18 @@ static void LogElasticsearchConfiguration(WebApplication app, ILogger logger) { try { - var esOptions = app.Services.GetService(); - if (esOptions != null) + var endpoints = app.Services.GetService(); + if (endpoints is not null) { + var endpoint = endpoints.Elasticsearch; logger.LogInformation( "Elasticsearch configuration - Url: {Url}, Index: {Index}", - esOptions.Url, - esOptions.IndexName + endpoint.Uri, + endpoint.IndexName ); } else - logger.LogWarning("ElasticsearchOptions could not be resolved from DI"); + logger.LogWarning("DocumentationEndpoints could not be resolved from DI"); } catch (Exception ex) { diff --git a/src/services/Elastic.Documentation.Search/Common/ElasticsearchClientAccessor.cs b/src/services/Elastic.Documentation.Search/Common/ElasticsearchClientAccessor.cs index b49b02250..747408968 100644 --- a/src/services/Elastic.Documentation.Search/Common/ElasticsearchClientAccessor.cs +++ b/src/services/Elastic.Documentation.Search/Common/ElasticsearchClientAccessor.cs @@ -4,6 +4,7 @@ using Elastic.Clients.Elasticsearch; using Elastic.Clients.Elasticsearch.Serialization; +using Elastic.Documentation.Configuration; using Elastic.Documentation.Configuration.Search; using Elastic.Transport; @@ -18,31 +19,38 @@ public class ElasticsearchClientAccessor : IDisposable private readonly ElasticsearchClientSettings _clientSettings; private readonly SingleNodePool _nodePool; public ElasticsearchClient Client { get; } - public ElasticsearchOptions Options { get; } + public ElasticsearchEndpoint Endpoint { get; } public SearchConfiguration SearchConfiguration { get; } public string? RulesetName { get; } public IReadOnlyDictionary SynonymBiDirectional { get; } public IReadOnlyCollection DiminishTerms { get; } public ElasticsearchClientAccessor( - ElasticsearchOptions elasticsearchOptions, + DocumentationEndpoints endpoints, SearchConfiguration searchConfiguration) { - Options = elasticsearchOptions; + var endpoint = endpoints.Elasticsearch; + Endpoint = endpoint; SearchConfiguration = searchConfiguration; SynonymBiDirectional = searchConfiguration.SynonymBiDirectional; DiminishTerms = searchConfiguration.DiminishTerms; RulesetName = searchConfiguration.Rules.Count > 0 - ? ExtractRulesetName(elasticsearchOptions.IndexName) + ? ExtractRulesetName(endpoint.IndexName) : null; - _nodePool = new SingleNodePool(new Uri(elasticsearchOptions.Url.Trim())); + _nodePool = new SingleNodePool(endpoint.Uri); + var auth = endpoint.ApiKey is { } apiKey + ? (AuthorizationHeader)new ApiKey(apiKey) + : endpoint is { Username: { } username, Password: { } password } + ? new BasicAuthentication(username, password) + : null!; + _clientSettings = new ElasticsearchClientSettings( _nodePool, sourceSerializer: (_, settings) => new DefaultSourceSerializer(settings, EsJsonContext.Default) ) - .DefaultIndex(elasticsearchOptions.IndexName) - .Authentication(new ApiKey(elasticsearchOptions.ApiKey)); + .DefaultIndex(endpoint.IndexName) + .Authentication(auth); Client = new ElasticsearchClient(_clientSettings); } diff --git a/src/services/Elastic.Documentation.Search/Elastic.Documentation.Search.csproj b/src/services/Elastic.Documentation.Search/Elastic.Documentation.Search.csproj index 27eb575bc..8a350648d 100644 --- a/src/services/Elastic.Documentation.Search/Elastic.Documentation.Search.csproj +++ b/src/services/Elastic.Documentation.Search/Elastic.Documentation.Search.csproj @@ -17,7 +17,6 @@ - diff --git a/src/services/Elastic.Documentation.Search/ElasticsearchOptions.cs b/src/services/Elastic.Documentation.Search/ElasticsearchOptions.cs deleted file mode 100644 index 9327ae816..000000000 --- a/src/services/Elastic.Documentation.Search/ElasticsearchOptions.cs +++ /dev/null @@ -1,39 +0,0 @@ -// Licensed to Elasticsearch B.V under one or more agreements. -// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. -// See the LICENSE file in the project root for more information - -using Microsoft.Extensions.Configuration; - -namespace Elastic.Documentation.Search; - -public class ElasticsearchOptions -{ - public ElasticsearchOptions(IConfiguration configuration) - { - // Build a new ConfigurationBuilder to read user secrets - var configBuilder = new ConfigurationBuilder(); - _ = configBuilder.AddUserSecrets("72f50f33-6fb9-4d08-bff3-39568fe370b3"); - var userSecretsConfig = configBuilder.Build(); - var elasticUrlFromSecret = userSecretsConfig["Parameters:DocumentationElasticUrl"]; - var elasticApiKeyFromSecret = userSecretsConfig["Parameters:DocumentationElasticApiKey"]; - - Url = GetEnv("DOCUMENTATION_ELASTIC_URL", elasticUrlFromSecret); - ApiKey = GetEnv("DOCUMENTATION_ELASTIC_APIKEY", elasticApiKeyFromSecret); - IndexName = configuration["DOCUMENTATION_ELASTIC_INDEX"] ?? "semantic-docs-dev-latest"; - } - - private static string GetEnv(string name, string? defaultValue = null) - { - var value = Environment.GetEnvironmentVariable(name); - if (!string.IsNullOrEmpty(value)) - return value; - if (defaultValue != null) - return defaultValue; - throw new ArgumentException($"Environment variable '{name}' not found."); - } - - // Read from environment variables (set by Terraform from SSM at deploy time) - public string Url { get; } - public string ApiKey { get; } - public string IndexName { get; } -} diff --git a/src/services/Elastic.Documentation.Search/FullSearchGateway.cs b/src/services/Elastic.Documentation.Search/FullSearchGateway.cs index d21dfb05b..af0b5063e 100644 --- a/src/services/Elastic.Documentation.Search/FullSearchGateway.cs +++ b/src/services/Elastic.Documentation.Search/FullSearchGateway.cs @@ -104,7 +104,7 @@ private async Task SearchWithHybridRrf(FullSearchRequest reque var response = await clientAccessor.Client.SearchAsync(s => { _ = s - .Indices(clientAccessor.Options.IndexName) + .Indices(clientAccessor.Endpoint.IndexName) .From(Math.Max(request.PageNumber - 1, 0) * request.PageSize) .Size(request.PageSize) .Query(filteredQuery) @@ -193,7 +193,7 @@ private async Task SearchLexicalOnly(FullSearchRequest request var response = await clientAccessor.Client.SearchAsync(s => { _ = s - .Indices(clientAccessor.Options.IndexName) + .Indices(clientAccessor.Endpoint.IndexName) .From(Math.Max(request.PageNumber - 1, 0) * request.PageSize) .Size(request.PageSize) .Query(filteredQuery) diff --git a/src/services/Elastic.Documentation.Search/NavigationSearchGateway.cs b/src/services/Elastic.Documentation.Search/NavigationSearchGateway.cs index a8c3710ae..797dd0121 100644 --- a/src/services/Elastic.Documentation.Search/NavigationSearchGateway.cs +++ b/src/services/Elastic.Documentation.Search/NavigationSearchGateway.cs @@ -46,7 +46,7 @@ public async Task SearchImplementation(string query, int var response = await clientAccessor.Client.SearchAsync(s => { _ = s - .Indices(clientAccessor.Options.IndexName) + .Indices(clientAccessor.Endpoint.IndexName) .From(Math.Max(pageNumber - 1, 0) * pageSize) .Size(pageSize) .Query(lexicalQuery) @@ -167,7 +167,7 @@ public async Task ExplainDocumentAsync(string query, string docum { // First, find the document by URL var getDocResponse = await clientAccessor.Client.SearchAsync(s => s - .Indices(clientAccessor.Options.IndexName) + .Indices(clientAccessor.Endpoint.IndexName) .Query(q => q.Term(t => t.Field(f => f.Url).Value(documentUrl))) .Size(1), ctx); @@ -186,7 +186,7 @@ public async Task ExplainDocumentAsync(string query, string docum // Now explain why this document matches (or doesn't match) the query var explainResponse = await clientAccessor.Client.ExplainAsync( - clientAccessor.Options.IndexName, documentId, e => e.Query(combinedQuery), ctx); + clientAccessor.Endpoint.IndexName, documentId, e => e.Query(combinedQuery), ctx); if (!explainResponse.IsValidResponse) { diff --git a/src/services/Elastic.Documentation.Search/ServicesExtension.cs b/src/services/Elastic.Documentation.Search/ServicesExtension.cs index 9b505c6f2..99e6619ae 100644 --- a/src/services/Elastic.Documentation.Search/ServicesExtension.cs +++ b/src/services/Elastic.Documentation.Search/ServicesExtension.cs @@ -25,8 +25,6 @@ public static IServiceCollection AddSearchServices(this IServiceCollection servi var logger = GetLogger(services); logger?.LogInformation("Configuring Search services"); - // Shared Elasticsearch options - DI auto-resolves IConfiguration from primary constructor - _ = services.AddSingleton(); _ = services.AddSingleton(); // Navigation Search (autocomplete/navigation search) diff --git a/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchBootstrapFixture.cs b/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchBootstrapFixture.cs index 9e696c984..c7f0db5a8 100644 --- a/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchBootstrapFixture.cs +++ b/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchBootstrapFixture.cs @@ -8,6 +8,7 @@ using Elastic.Documentation.Aspire; using Elastic.Documentation.Configuration; using Elastic.Documentation.Search; +using Elastic.Documentation.ServiceDefaults; using Elastic.Ingest.Elasticsearch; using Elastic.Ingest.Elasticsearch.Indices; using Elastic.Mapping; @@ -15,7 +16,6 @@ using Elastic.Transport; using Elastic.Transport.Products.Elasticsearch; using FluentAssertions; -using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; @@ -139,39 +139,12 @@ private async ValueTask IsIndexingNeeded() { try { - // Get Elasticsearch configuration from Aspire - var (elasticsearchUrl, apiKey, password, username) = GetElasticsearchConfiguration(); + var endpoints = ElasticsearchEndpointFactory.Create(); - if (string.IsNullOrEmpty(elasticsearchUrl)) - { - Console.WriteLine("No Elasticsearch URL configured, indexing will be performed."); - Connected = false; - return false; - } + var endpoint = endpoints.Elasticsearch; + Console.WriteLine($"Checking remote Elasticsearch at {endpoint.Uri} for existing data..."); - Console.WriteLine($"Checking remote Elasticsearch at {elasticsearchUrl} for existing data..."); - - // Create Elasticsearch endpoint configuration - var endpoint = new ElasticsearchEndpoint - { - Uri = new Uri(elasticsearchUrl), - ApiKey = apiKey, - Username = username, - Password = password - }; - - // Create transport configuration (similar to ElasticsearchMarkdownExporter) - var configuration = new ElasticsearchConfiguration(endpoint.Uri) - { - Authentication = endpoint.ApiKey is { } eApiKey - ? new ApiKey(eApiKey) - : endpoint is { Username: { } eUsername, Password: { } ePassword } - ? new BasicAuthentication(eUsername, ePassword) - : null, - EnableHttpCompression = true - }; - - var transport = new DistributedTransport(configuration); + var transport = ElasticsearchTransportFactory.Create(endpoint); Connected = (await transport.HeadAsync("/", TestContext.Current.CancellationToken)).ApiCallDetails.HasSuccessfulStatusCode; // Create a logger factory and diagnostics collector @@ -236,41 +209,6 @@ private async ValueTask ValidateResourceExitCode(string resourceName) Console.WriteLine($"{resourceName} completed with exit code 0"); } - /// - /// Gets Elasticsearch configuration from Aspire parameters and environment. - /// Manually reads user secrets from the aspire project, then falls back to environment variables. - /// - private (string? Url, string? ApiKey, string? Password, string? Username) GetElasticsearchConfiguration() - { - // Manually read user secrets from the aspire project - // UserSecretsId from aspire.csproj: 72f50f33-6fb9-4d08-bff3-39568fe370b3 - var configBuilder = new ConfigurationBuilder(); - configBuilder.AddUserSecrets("72f50f33-6fb9-4d08-bff3-39568fe370b3"); - var userSecretsConfig = configBuilder.Build(); - - // Get URL - try user secrets first, then Aspire configuration, then environment - var url = userSecretsConfig["Parameters:DocumentationElasticUrl"] - ?? fixture.DistributedApplication.Services.GetService()?["Parameters:DocumentationElasticUrl"] - ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_URL"); - - // Get API Key - try user secrets first, then Aspire configuration, then environment - var apiKey = userSecretsConfig["Parameters:DocumentationElasticApiKey"] - ?? fixture.DistributedApplication.Services.GetService()?["Parameters:DocumentationElasticApiKey"] - ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_APIKEY"); - - // Get password for local Elasticsearch (when using --start-elasticsearch) - var password = userSecretsConfig["Parameters:DocumentationElasticPassword"] ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_PASSWORD"); - - // Get username (defaults to "elastic") - var username = userSecretsConfig["Parameters:DocumentationElasticUsername"] - ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_USERNAME") - ?? "elastic"; - - Console.WriteLine($"Elasticsearch configuration retrieved: URL={url != null}, ApiKey={apiKey != null}, Password={password != null}"); - - return (url, apiKey, password, username); - } - public ValueTask DisposeAsync() { HttpClient?.Dispose(); diff --git a/tests-integration/Mcp.Remote.IntegrationTests/Mcp.Remote.IntegrationTests.csproj b/tests-integration/Mcp.Remote.IntegrationTests/Mcp.Remote.IntegrationTests.csproj index 2896294b0..8889bbcba 100644 --- a/tests-integration/Mcp.Remote.IntegrationTests/Mcp.Remote.IntegrationTests.csproj +++ b/tests-integration/Mcp.Remote.IntegrationTests/Mcp.Remote.IntegrationTests.csproj @@ -10,6 +10,7 @@ + diff --git a/tests-integration/Mcp.Remote.IntegrationTests/McpToolsIntegrationTestsBase.cs b/tests-integration/Mcp.Remote.IntegrationTests/McpToolsIntegrationTestsBase.cs index a6ec7c7dd..d2d0d696e 100644 --- a/tests-integration/Mcp.Remote.IntegrationTests/McpToolsIntegrationTestsBase.cs +++ b/tests-integration/Mcp.Remote.IntegrationTests/McpToolsIntegrationTestsBase.cs @@ -10,7 +10,7 @@ using Elastic.Documentation.Mcp.Remote.Tools; using Elastic.Documentation.Search; using Elastic.Documentation.Search.Common; -using Microsoft.Extensions.Configuration; +using Elastic.Documentation.ServiceDefaults; using Microsoft.Extensions.Logging.Abstractions; namespace Mcp.Remote.IntegrationTests; @@ -25,17 +25,16 @@ public abstract class McpToolsIntegrationTestsBase(ITestOutputHelper output) /// /// Creates SearchTools with all required dependencies. /// - protected (SearchTools? Tools, ElasticsearchClientAccessor? ClientAccessor) CreateSearchTools() + protected (SearchTools Tools, ElasticsearchClientAccessor? ClientAccessor) CreateSearchTools() { var clientAccessor = CreateElasticsearchClientAccessor(); - if (clientAccessor == null) - return (null, null); var productsConfig = CreateProductsConfiguration(); var fullSearchGateway = new FullSearchGateway( clientAccessor, productsConfig, - NullLogger.Instance); + NullLogger.Instance + ); var searchTools = new SearchTools(fullSearchGateway, NullLogger.Instance); return (searchTools, clientAccessor); @@ -44,16 +43,11 @@ public abstract class McpToolsIntegrationTestsBase(ITestOutputHelper output) /// /// Creates DocumentTools with all required dependencies. /// - protected (DocumentTools? Tools, ElasticsearchClientAccessor? ClientAccessor) CreateDocumentTools() + protected (DocumentTools Tools, ElasticsearchClientAccessor? ClientAccessor) CreateDocumentTools() { var clientAccessor = CreateElasticsearchClientAccessor(); - if (clientAccessor == null) - return (null, null); - - var documentGateway = new DocumentGateway( - clientAccessor, - NullLogger.Instance); + var documentGateway = new DocumentGateway(clientAccessor, NullLogger.Instance); var documentTools = new DocumentTools(documentGateway, NullLogger.Instance); return (documentTools, clientAccessor); } @@ -61,18 +55,12 @@ public abstract class McpToolsIntegrationTestsBase(ITestOutputHelper output) /// /// Creates CoherenceTools with all required dependencies. /// - protected (CoherenceTools? Tools, ElasticsearchClientAccessor? ClientAccessor) CreateCoherenceTools() + protected (CoherenceTools Tools, ElasticsearchClientAccessor? ClientAccessor) CreateCoherenceTools() { var clientAccessor = CreateElasticsearchClientAccessor(); - if (clientAccessor == null) - return (null, null); var productsConfig = CreateProductsConfiguration(); - var fullSearchGateway = new FullSearchGateway( - clientAccessor, - productsConfig, - NullLogger.Instance); - + var fullSearchGateway = new FullSearchGateway(clientAccessor, productsConfig, NullLogger.Instance); var coherenceTools = new CoherenceTools(fullSearchGateway, NullLogger.Instance); return (coherenceTools, clientAccessor); } @@ -80,34 +68,12 @@ public abstract class McpToolsIntegrationTestsBase(ITestOutputHelper output) /// /// Creates an ElasticsearchClientAccessor using configuration from user secrets and environment variables. /// - private static ElasticsearchClientAccessor? CreateElasticsearchClientAccessor() + private static ElasticsearchClientAccessor CreateElasticsearchClientAccessor() { - var configBuilder = new ConfigurationBuilder(); - configBuilder.AddUserSecrets("72f50f33-6fb9-4d08-bff3-39568fe370b3"); - configBuilder.AddEnvironmentVariables(); - var config = configBuilder.Build(); - - var elasticsearchUrl = - config["Parameters:DocumentationElasticUrl"] - ?? config["DOCUMENTATION_ELASTIC_URL"]; - - var elasticsearchApiKey = - config["Parameters:DocumentationElasticApiKey"] - ?? config["DOCUMENTATION_ELASTIC_APIKEY"]; - - if (string.IsNullOrEmpty(elasticsearchUrl) || string.IsNullOrEmpty(elasticsearchApiKey)) - return null; - - var testConfig = new ConfigurationBuilder() - .AddInMemoryCollection(new Dictionary - { - ["DOCUMENTATION_ELASTIC_URL"] = elasticsearchUrl, - ["DOCUMENTATION_ELASTIC_APIKEY"] = elasticsearchApiKey, - ["DOCUMENTATION_ELASTIC_INDEX"] = "semantic-docs-dev-latest" - }) - .Build(); - - var options = new ElasticsearchOptions(testConfig); + var endpoints = ElasticsearchEndpointFactory.Create(); + + endpoints.Elasticsearch.IndexName = "semantic-docs-dev-latest"; + var searchConfig = new SearchConfiguration { Synonyms = new Dictionary(), @@ -115,7 +81,7 @@ public abstract class McpToolsIntegrationTestsBase(ITestOutputHelper output) DiminishTerms = ["plugin", "client", "integration", "glossary"] }; - return new ElasticsearchClientAccessor(options, searchConfig); + return new ElasticsearchClientAccessor(endpoints, searchConfig); } /// diff --git a/tests-integration/Search.IntegrationTests/Search.IntegrationTests.csproj b/tests-integration/Search.IntegrationTests/Search.IntegrationTests.csproj index 0754b8a91..4763c5eb0 100644 --- a/tests-integration/Search.IntegrationTests/Search.IntegrationTests.csproj +++ b/tests-integration/Search.IntegrationTests/Search.IntegrationTests.csproj @@ -10,6 +10,7 @@ + diff --git a/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs b/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs index 3d93fd012..9047368e2 100644 --- a/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs +++ b/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs @@ -5,8 +5,8 @@ using Elastic.Documentation.Configuration.Search; using Elastic.Documentation.Search; using Elastic.Documentation.Search.Common; +using Elastic.Documentation.ServiceDefaults; using FluentAssertions; -using Microsoft.Extensions.Configuration; using Microsoft.Extensions.Logging.Abstractions; namespace Search.IntegrationTests; @@ -220,37 +220,12 @@ public async Task ExplainTopResultAndExpectedAsyncReturnsDetailedScoring() /// /// Creates an ElasticsearchGateway instance using configuration from the distributed application. /// - private NavigationSearchGateway? CreateFindPageGateway() + private NavigationSearchGateway CreateFindPageGateway() { - // Build a new ConfigurationBuilder to read user secrets and environment variables - var configBuilder = new ConfigurationBuilder(); - configBuilder.AddUserSecrets("72f50f33-6fb9-4d08-bff3-39568fe370b3"); - configBuilder.AddEnvironmentVariables(); - var config = configBuilder.Build(); - - // Get Elasticsearch configuration with fallback chain: user secrets → environment - var elasticsearchUrl = - config["Parameters:DocumentationElasticUrl"] - ?? config["DOCUMENTATION_ELASTIC_URL"]; - - var elasticsearchApiKey = - config["Parameters:DocumentationElasticApiKey"] - ?? config["DOCUMENTATION_ELASTIC_APIKEY"]; - - if (elasticsearchUrl is null or "" || elasticsearchApiKey is null or "") - return null; - - // Create IConfiguration with the required values for ElasticsearchOptions - var testConfig = new ConfigurationBuilder() - .AddInMemoryCollection(new Dictionary - { - ["DOCUMENTATION_ELASTIC_URL"] = elasticsearchUrl, - ["DOCUMENTATION_ELASTIC_APIKEY"] = elasticsearchApiKey, - ["DOCUMENTATION_ELASTIC_INDEX"] = "semantic-docs-dev-latest" - }) - .Build(); + var endpoints = ElasticsearchEndpointFactory.Create(); + + endpoints.Elasticsearch.IndexName = "semantic-docs-dev-latest"; - var options = new ElasticsearchOptions(testConfig); var searchConfig = new SearchConfiguration { Synonyms = new Dictionary(), @@ -278,7 +253,7 @@ public async Task ExplainTopResultAndExpectedAsyncReturnsDetailedScoring() DiminishTerms = ["plugin", "client", "integration", "glossary"] }; - var clientAccessor = new ElasticsearchClientAccessor(options, searchConfig); + var clientAccessor = new ElasticsearchClientAccessor(endpoints, searchConfig); return new NavigationSearchGateway(clientAccessor, NullLogger.Instance); } } From 22defec0369278be2ad66c88b01bb1e395318846 Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Wed, 18 Feb 2026 16:09:12 +0100 Subject: [PATCH 03/14] Replace hardcoded IndexName with namespace-based index resolution Move mapping context (DocumentationMappingContext, LexicalConfig, SemanticConfig, DocumentationAnalysisFactory) from Elastic.Markdown to Elastic.Documentation so both indexing and search derive index names from the same source. Add ContentHash helper to avoid Elastic.Ingest.Elasticsearch dependency in Elastic.Documentation. Remove IndexName from ElasticsearchEndpoint, add Namespace to DocumentationEndpoints. ElasticsearchEndpointFactory resolves namespace from DOCUMENTATION_ELASTIC_INDEX env var (backward compat), DOTNET_ENVIRONMENT, ENVIRONMENT, or falls back to "dev". ElasticsearchClientAccessor derives SearchIndex and RulesetName from namespace instead of parsing the old IndexName string. Remove ExtractRulesetName and all hardcoded "semantic-docs-dev-latest" assignments from tests and config files. --- .../DocumentationEndpoints.cs | 7 +--- .../ElasticsearchEndpointFactory.cs | 38 ++++++++++++++++--- .../Search/ContentHash.cs | 19 ++++++++++ .../Search}/DocumentationMappingConfig.cs | 8 ++-- .../Elastic.Documentation.Api.App/Program.cs | 6 ++- .../Aws/LocalParameterProvider.cs | 4 -- .../Gateways/DocumentGateway.cs | 4 +- .../Program.cs | 6 ++- .../appsettings.development.json | 3 +- .../appsettings.edge.json | 3 +- .../Common/ElasticsearchClientAccessor.cs | 25 +++--------- .../FullSearchGateway.cs | 4 +- .../NavigationSearchGateway.cs | 6 +-- .../McpToolsIntegrationTestsBase.cs | 2 - .../SearchRelevanceTests.cs | 2 - 15 files changed, 79 insertions(+), 58 deletions(-) create mode 100644 src/Elastic.Documentation/Search/ContentHash.cs rename src/{Elastic.Markdown/Exporters/Elasticsearch => Elastic.Documentation/Search}/DocumentationMappingConfig.cs (96%) diff --git a/src/Elastic.Documentation.Configuration/DocumentationEndpoints.cs b/src/Elastic.Documentation.Configuration/DocumentationEndpoints.cs index 2bec0b94b..69282068f 100644 --- a/src/Elastic.Documentation.Configuration/DocumentationEndpoints.cs +++ b/src/Elastic.Documentation.Configuration/DocumentationEndpoints.cs @@ -9,6 +9,7 @@ namespace Elastic.Documentation.Configuration; public class DocumentationEndpoints { public required ElasticsearchEndpoint Elasticsearch { get; init; } + public string Namespace { get; set; } = "dev"; } public class ElasticsearchEndpoint @@ -28,12 +29,6 @@ public class ElasticsearchEndpoint // index options public string IndexNamePrefix { get; set; } = "semantic-docs"; - public string IndexName - { - get => field ?? $"{IndexNamePrefix}-dev-latest"; - set; - } - // channel buffer options public int BufferSize { get; set; } = 50; // Reduced for Serverless rate limits public int MaxRetries { get; set; } = 5; // Increased for 429 retries diff --git a/src/Elastic.Documentation.ServiceDefaults/ElasticsearchEndpointFactory.cs b/src/Elastic.Documentation.ServiceDefaults/ElasticsearchEndpointFactory.cs index 418ff21ef..d5e6cd6ac 100644 --- a/src/Elastic.Documentation.ServiceDefaults/ElasticsearchEndpointFactory.cs +++ b/src/Elastic.Documentation.ServiceDefaults/ElasticsearchEndpointFactory.cs @@ -48,8 +48,6 @@ public static DocumentationEndpoints Create(IConfiguration? appConfiguration = n }; } - var indexName = appConfiguration?["DOCUMENTATION_ELASTIC_INDEX"]; - var endpoint = new ElasticsearchEndpoint { Uri = new Uri(url), @@ -58,9 +56,39 @@ public static DocumentationEndpoints Create(IConfiguration? appConfiguration = n Username = username }; - if (indexName is not null) - endpoint.IndexName = indexName; + var ns = ResolveNamespace(config, appConfiguration, endpoint.IndexNamePrefix); + + return new DocumentationEndpoints { Elasticsearch = endpoint, Namespace = ns }; + } + + /// + /// Resolves the deployment namespace using this priority: + /// 1. DOCUMENTATION_ELASTIC_INDEX env var — strip prefix and -latest suffix + /// 2. DOTNET_ENVIRONMENT env var + /// 3. ENVIRONMENT env var + /// 4. Fallback: "dev" + /// + private static string ResolveNamespace(IConfiguration config, IConfiguration? appConfiguration, string indexNamePrefix) + { + var indexName = appConfiguration?["DOCUMENTATION_ELASTIC_INDEX"] + ?? config["DOCUMENTATION_ELASTIC_INDEX"]; + + if (!string.IsNullOrEmpty(indexName)) + { + var prefix = $"{indexNamePrefix}-"; + const string suffix = "-latest"; + if (indexName.StartsWith(prefix, StringComparison.OrdinalIgnoreCase) && + indexName.EndsWith(suffix, StringComparison.OrdinalIgnoreCase)) + { + var ns = indexName[prefix.Length..^suffix.Length]; + if (!string.IsNullOrEmpty(ns)) + return ns; + } + } + + var env = config["DOTNET_ENVIRONMENT"] + ?? config["ENVIRONMENT"]; - return new DocumentationEndpoints { Elasticsearch = endpoint }; + return !string.IsNullOrEmpty(env) ? env.ToLowerInvariant() : "dev"; } } diff --git a/src/Elastic.Documentation/Search/ContentHash.cs b/src/Elastic.Documentation/Search/ContentHash.cs new file mode 100644 index 000000000..17eb2e7ae --- /dev/null +++ b/src/Elastic.Documentation/Search/ContentHash.cs @@ -0,0 +1,19 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System.Security.Cryptography; +using System.Text; + +namespace Elastic.Documentation.Search; + +/// Creates a short hex hash from one or more string components. +public static class ContentHash +{ + /// + /// Concatenates all components, computes SHA-256, and returns the first 16 hex characters (lowercased). + /// Compatible with HashedBulkUpdate.CreateHash. + /// + public static string Create(params string[] components) => + Convert.ToHexString(SHA256.HashData(Encoding.UTF8.GetBytes(string.Join("", components))))[..16].ToLowerInvariant(); +} diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/DocumentationMappingConfig.cs b/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs similarity index 96% rename from src/Elastic.Markdown/Exporters/Elasticsearch/DocumentationMappingConfig.cs rename to src/Elastic.Documentation/Search/DocumentationMappingConfig.cs index 9ae1c7072..b36db5ca4 100644 --- a/src/Elastic.Markdown/Exporters/Elasticsearch/DocumentationMappingConfig.cs +++ b/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs @@ -2,12 +2,10 @@ // Elasticsearch B.V licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information -using Elastic.Documentation.Search; -using Elastic.Ingest.Elasticsearch.Indices; using Elastic.Mapping; using Elastic.Mapping.Analysis; -namespace Elastic.Markdown.Exporters.Elasticsearch; +namespace Elastic.Documentation.Search; [ElasticsearchMappingContext] [Entity( @@ -185,8 +183,8 @@ public static ElasticsearchTypeContext CreateContext( string? defaultPipeline = null) { var settingsJson = BuildSettingsJson(synonymSetName, indexTimeSynonyms, defaultPipeline); - var settingsHash = HashedBulkUpdate.CreateHash(settingsJson); - var hash = HashedBulkUpdate.CreateHash(settingsHash, baseContext.MappingsHash); + var settingsHash = ContentHash.Create(settingsJson); + var hash = ContentHash.Create(settingsHash, baseContext.MappingsHash); return baseContext.WithIndexName(indexName) with { diff --git a/src/api/Elastic.Documentation.Api.App/Program.cs b/src/api/Elastic.Documentation.Api.App/Program.cs index 373ae9a46..9ee9ef56d 100644 --- a/src/api/Elastic.Documentation.Api.App/Program.cs +++ b/src/api/Elastic.Documentation.Api.App/Program.cs @@ -86,10 +86,12 @@ static void LogElasticsearchConfiguration(WebApplication app, ILogger logger) if (endpoints is not null) { var endpoint = endpoints.Elasticsearch; + var searchIndex = $"{endpoint.IndexNamePrefix.ToLowerInvariant()}-{endpoints.Namespace}-latest"; logger.LogInformation( - "Elasticsearch configuration - Url: {Url}, Index: {Index}", + "Elasticsearch configuration - Url: {Url}, Namespace: {Namespace}, SearchIndex: {SearchIndex}", endpoint.Uri, - endpoint.IndexName + endpoints.Namespace, + searchIndex ); } else diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Aws/LocalParameterProvider.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Aws/LocalParameterProvider.cs index 04db74d4d..8ec6d6d18 100644 --- a/src/api/Elastic.Documentation.Api.Infrastructure/Aws/LocalParameterProvider.cs +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Aws/LocalParameterProvider.cs @@ -58,10 +58,6 @@ public async Task GetParam(string name, bool withDecryption = true, Canc { return GetEnv("DOCUMENTATION_KIBANA_APIKEY"); } - case "docs-elasticsearch-index": - { - return GetEnv("DOCUMENTATION_ELASTIC_INDEX", "semantic-docs-dev-latest"); - } default: { throw new ArgumentException($"Parameter '{name}' not found in {nameof(LocalParameterProvider)}"); diff --git a/src/api/Elastic.Documentation.Mcp.Remote/Gateways/DocumentGateway.cs b/src/api/Elastic.Documentation.Mcp.Remote/Gateways/DocumentGateway.cs index 168926126..aeba0be88 100644 --- a/src/api/Elastic.Documentation.Mcp.Remote/Gateways/DocumentGateway.cs +++ b/src/api/Elastic.Documentation.Mcp.Remote/Gateways/DocumentGateway.cs @@ -24,7 +24,7 @@ public class DocumentGateway( try { var response = await clientAccessor.Client.SearchAsync(s => s - .Indices(clientAccessor.Endpoint.IndexName) + .Indices(clientAccessor.SearchIndex) .Query(q => q.Term(t => t.Field(f => f.Url.Suffix("keyword")).Value(url))) .Size(1) .Source(sf => sf.Filter(f => f.Includes( @@ -101,7 +101,7 @@ public class DocumentGateway( try { var response = await clientAccessor.Client.SearchAsync(s => s - .Indices(clientAccessor.Endpoint.IndexName) + .Indices(clientAccessor.SearchIndex) .Query(q => q.Term(t => t.Field(f => f.Url.Suffix("keyword")).Value(url))) .Size(1) .Source(sf => sf.Filter(f => f.Includes( diff --git a/src/api/Elastic.Documentation.Mcp.Remote/Program.cs b/src/api/Elastic.Documentation.Mcp.Remote/Program.cs index b9ae29394..3fdde5ac5 100644 --- a/src/api/Elastic.Documentation.Mcp.Remote/Program.cs +++ b/src/api/Elastic.Documentation.Mcp.Remote/Program.cs @@ -87,10 +87,12 @@ static void LogElasticsearchConfiguration(WebApplication app, ILogger logger) if (endpoints is not null) { var endpoint = endpoints.Elasticsearch; + var searchIndex = $"{endpoint.IndexNamePrefix.ToLowerInvariant()}-{endpoints.Namespace}-latest"; logger.LogInformation( - "Elasticsearch configuration - Url: {Url}, Index: {Index}", + "Elasticsearch configuration - Url: {Url}, Namespace: {Namespace}, SearchIndex: {SearchIndex}", endpoint.Uri, - endpoint.IndexName + endpoints.Namespace, + searchIndex ); } else diff --git a/src/api/Elastic.Documentation.Mcp.Remote/appsettings.development.json b/src/api/Elastic.Documentation.Mcp.Remote/appsettings.development.json index 15cac94ee..34f00ef13 100644 --- a/src/api/Elastic.Documentation.Mcp.Remote/appsettings.development.json +++ b/src/api/Elastic.Documentation.Mcp.Remote/appsettings.development.json @@ -4,6 +4,5 @@ "Default": "Debug", "Microsoft.AspNetCore": "Information" } - }, - "DOCUMENTATION_ELASTIC_INDEX": "semantic-docs-dev-latest" + } } diff --git a/src/api/Elastic.Documentation.Mcp.Remote/appsettings.edge.json b/src/api/Elastic.Documentation.Mcp.Remote/appsettings.edge.json index fe7d17f7b..0c208ae91 100644 --- a/src/api/Elastic.Documentation.Mcp.Remote/appsettings.edge.json +++ b/src/api/Elastic.Documentation.Mcp.Remote/appsettings.edge.json @@ -4,6 +4,5 @@ "Default": "Information", "Microsoft.AspNetCore": "Warning" } - }, - "DOCUMENTATION_ELASTIC_INDEX": "semantic-docs-edge-latest" + } } diff --git a/src/services/Elastic.Documentation.Search/Common/ElasticsearchClientAccessor.cs b/src/services/Elastic.Documentation.Search/Common/ElasticsearchClientAccessor.cs index 747408968..9ee24d757 100644 --- a/src/services/Elastic.Documentation.Search/Common/ElasticsearchClientAccessor.cs +++ b/src/services/Elastic.Documentation.Search/Common/ElasticsearchClientAccessor.cs @@ -21,6 +21,7 @@ public class ElasticsearchClientAccessor : IDisposable public ElasticsearchClient Client { get; } public ElasticsearchEndpoint Endpoint { get; } public SearchConfiguration SearchConfiguration { get; } + public string SearchIndex { get; } public string? RulesetName { get; } public IReadOnlyDictionary SynonymBiDirectional { get; } public IReadOnlyCollection DiminishTerms { get; } @@ -34,8 +35,11 @@ public ElasticsearchClientAccessor( SearchConfiguration = searchConfiguration; SynonymBiDirectional = searchConfiguration.SynonymBiDirectional; DiminishTerms = searchConfiguration.DiminishTerms; + + SearchIndex = $"{endpoint.IndexNamePrefix.ToLowerInvariant()}-{endpoints.Namespace}-latest"; + RulesetName = searchConfiguration.Rules.Count > 0 - ? ExtractRulesetName(endpoint.IndexName) + ? $"docs-ruleset-{endpoints.Namespace}" : null; _nodePool = new SingleNodePool(endpoint.Uri); @@ -49,29 +53,12 @@ public ElasticsearchClientAccessor( _nodePool, sourceSerializer: (_, settings) => new DefaultSourceSerializer(settings, EsJsonContext.Default) ) - .DefaultIndex(endpoint.IndexName) + .DefaultIndex(SearchIndex) .Authentication(auth); Client = new ElasticsearchClient(_clientSettings); } - /// - /// Extracts the ruleset name from the index name. - /// Index name format: "semantic-docs-{namespace}-latest" -> ruleset: "docs-ruleset-{namespace}" - /// The namespace may contain hyphens (e.g., "codex-engineering"), so we extract everything - /// between the "semantic-docs-" prefix and the "-latest" suffix. - /// - private static string? ExtractRulesetName(string indexName) - { - const string prefix = "semantic-docs-"; - const string suffix = "-latest"; - if (!indexName.StartsWith(prefix, StringComparison.Ordinal) || !indexName.EndsWith(suffix, StringComparison.Ordinal)) - return null; - - var ns = indexName[prefix.Length..^suffix.Length]; - return string.IsNullOrEmpty(ns) ? null : $"docs-ruleset-{ns}"; - } - /// /// Tests connectivity to the Elasticsearch cluster. /// diff --git a/src/services/Elastic.Documentation.Search/FullSearchGateway.cs b/src/services/Elastic.Documentation.Search/FullSearchGateway.cs index af0b5063e..7d1d3a3cf 100644 --- a/src/services/Elastic.Documentation.Search/FullSearchGateway.cs +++ b/src/services/Elastic.Documentation.Search/FullSearchGateway.cs @@ -104,7 +104,7 @@ private async Task SearchWithHybridRrf(FullSearchRequest reque var response = await clientAccessor.Client.SearchAsync(s => { _ = s - .Indices(clientAccessor.Endpoint.IndexName) + .Indices(clientAccessor.SearchIndex) .From(Math.Max(request.PageNumber - 1, 0) * request.PageSize) .Size(request.PageSize) .Query(filteredQuery) @@ -193,7 +193,7 @@ private async Task SearchLexicalOnly(FullSearchRequest request var response = await clientAccessor.Client.SearchAsync(s => { _ = s - .Indices(clientAccessor.Endpoint.IndexName) + .Indices(clientAccessor.SearchIndex) .From(Math.Max(request.PageNumber - 1, 0) * request.PageSize) .Size(request.PageSize) .Query(filteredQuery) diff --git a/src/services/Elastic.Documentation.Search/NavigationSearchGateway.cs b/src/services/Elastic.Documentation.Search/NavigationSearchGateway.cs index 797dd0121..dcb20c3e5 100644 --- a/src/services/Elastic.Documentation.Search/NavigationSearchGateway.cs +++ b/src/services/Elastic.Documentation.Search/NavigationSearchGateway.cs @@ -46,7 +46,7 @@ public async Task SearchImplementation(string query, int var response = await clientAccessor.Client.SearchAsync(s => { _ = s - .Indices(clientAccessor.Endpoint.IndexName) + .Indices(clientAccessor.SearchIndex) .From(Math.Max(pageNumber - 1, 0) * pageSize) .Size(pageSize) .Query(lexicalQuery) @@ -167,7 +167,7 @@ public async Task ExplainDocumentAsync(string query, string docum { // First, find the document by URL var getDocResponse = await clientAccessor.Client.SearchAsync(s => s - .Indices(clientAccessor.Endpoint.IndexName) + .Indices(clientAccessor.SearchIndex) .Query(q => q.Term(t => t.Field(f => f.Url).Value(documentUrl))) .Size(1), ctx); @@ -186,7 +186,7 @@ public async Task ExplainDocumentAsync(string query, string docum // Now explain why this document matches (or doesn't match) the query var explainResponse = await clientAccessor.Client.ExplainAsync( - clientAccessor.Endpoint.IndexName, documentId, e => e.Query(combinedQuery), ctx); + clientAccessor.SearchIndex, documentId, e => e.Query(combinedQuery), ctx); if (!explainResponse.IsValidResponse) { diff --git a/tests-integration/Mcp.Remote.IntegrationTests/McpToolsIntegrationTestsBase.cs b/tests-integration/Mcp.Remote.IntegrationTests/McpToolsIntegrationTestsBase.cs index d2d0d696e..67f8570a6 100644 --- a/tests-integration/Mcp.Remote.IntegrationTests/McpToolsIntegrationTestsBase.cs +++ b/tests-integration/Mcp.Remote.IntegrationTests/McpToolsIntegrationTestsBase.cs @@ -72,8 +72,6 @@ private static ElasticsearchClientAccessor CreateElasticsearchClientAccessor() { var endpoints = ElasticsearchEndpointFactory.Create(); - endpoints.Elasticsearch.IndexName = "semantic-docs-dev-latest"; - var searchConfig = new SearchConfiguration { Synonyms = new Dictionary(), diff --git a/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs b/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs index 9047368e2..34246dc35 100644 --- a/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs +++ b/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs @@ -224,8 +224,6 @@ private NavigationSearchGateway CreateFindPageGateway() { var endpoints = ElasticsearchEndpointFactory.Create(); - endpoints.Elasticsearch.IndexName = "semantic-docs-dev-latest"; - var searchConfig = new SearchConfiguration { Synonyms = new Dictionary(), From 736af109875b9a90aa981af67ad627de5d9e95c8 Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Sun, 22 Feb 2026 16:26:13 +0100 Subject: [PATCH 04/14] Update Elastic.Ingest.Elasticsearch and Elastic.Mapping to 0.24.0 Enable IndexPatternUseBatchDate now that Elastic.Mapping supports it, and pass batchTimestamp to IngestChannelOptions in the lexical-only path so the channel uses the exporter's timestamp for index name computation. --- Directory.Packages.props | 4 +- .../Search/DocumentationMappingConfig.cs | 39 +++++-------------- .../ElasticsearchMarkdownExporter.cs | 2 +- 3 files changed, 12 insertions(+), 33 deletions(-) diff --git a/Directory.Packages.props b/Directory.Packages.props index 04966e308..58aadb514 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -48,8 +48,8 @@ - - + + diff --git a/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs b/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs index b36db5ca4..ec5bb391c 100644 --- a/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs +++ b/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs @@ -146,34 +146,10 @@ public static AnalysisBuilder BuildAnalysis(AnalysisBuilder analysis, string syn .Tokenizer("path_tokenizer", t => t.PathHierarchy() .Delimiter('/')); - /// - /// Creates the index settings JSON with analysis configuration and optional default pipeline. - /// - public static string BuildSettingsJson(string synonymSetName, string[] indexTimeSynonyms, string? defaultPipeline = null) - { - var analysis = BuildAnalysis(new AnalysisBuilder(), synonymSetName, indexTimeSynonyms); - var analysisJson = analysis.Build().ToJsonString(); - - if (defaultPipeline is not null) - { - // Merge default_pipeline into the settings JSON - return $$""" - { - "default_pipeline": "{{defaultPipeline}}", - "analysis": {{analysisJson}} - } - """; - } - - return $$""" - { - "analysis": {{analysisJson}} - } - """; - } - /// /// Creates an ElasticsearchTypeContext with runtime analysis settings and dynamic index name. + /// Analysis is provided via , which + /// Elastic.Ingest.Elasticsearch merges into the settings automatically. /// public static ElasticsearchTypeContext CreateContext( ElasticsearchTypeContext baseContext, @@ -182,16 +158,19 @@ public static ElasticsearchTypeContext CreateContext( string[] indexTimeSynonyms, string? defaultPipeline = null) { - var settingsJson = BuildSettingsJson(synonymSetName, indexTimeSynonyms, defaultPipeline); - var settingsHash = ContentHash.Create(settingsJson); + var analysisJson = BuildAnalysis(new AnalysisBuilder(), synonymSetName, indexTimeSynonyms).Build().ToJsonString(); + var settingsHash = ContentHash.Create(analysisJson, defaultPipeline ?? ""); var hash = ContentHash.Create(settingsHash, baseContext.MappingsHash); return baseContext.WithIndexName(indexName) with { - GetSettingsJson = () => settingsJson, + GetSettingsJson = defaultPipeline is not null + ? () => $$"""{ "default_pipeline": "{{defaultPipeline}}" }""" + : () => "{}", SettingsHash = settingsHash, Hash = hash, - ConfigureAnalysis = a => BuildAnalysis(a, synonymSetName, indexTimeSynonyms) + ConfigureAnalysis = a => BuildAnalysis(a, synonymSetName, indexTimeSynonyms), + IndexPatternUseBatchDate = true }; } } diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs index 84c0e5c75..0bfa52ef2 100644 --- a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs +++ b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs @@ -134,7 +134,7 @@ IDocumentationConfigurationContext context else { _batchIndexDate = DateTimeOffset.UtcNow; - var options = new IngestChannelOptions(_transport, _lexicalTypeContext); + var options = new IngestChannelOptions(_transport, _lexicalTypeContext, _batchIndexDate); ConfigureChannelOptions(options); _lexicalOnlyChannel = new IngestChannel(options); } From 9a8799376732ee8a43de576b5c6b614ad2b0020b Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Sun, 22 Feb 2026 16:27:43 +0100 Subject: [PATCH 05/14] Use centralized ElasticsearchEndpointFactory and add skipOpenApi parameter Simplify DocumentationTooling endpoint resolution by delegating to ElasticsearchEndpointFactory. Add missing skipOpenApi parameter to IsolatedIndexService.Index call. --- .../IsolatedIndexService.cs | 1 + .../docs-builder/DocumentationTooling.cs | 30 +++---------------- 2 files changed, 5 insertions(+), 26 deletions(-) diff --git a/src/services/Elastic.Documentation.Isolated/IsolatedIndexService.cs b/src/services/Elastic.Documentation.Isolated/IsolatedIndexService.cs index c4fcc6c0f..81d389c69 100644 --- a/src/services/Elastic.Documentation.Isolated/IsolatedIndexService.cs +++ b/src/services/Elastic.Documentation.Isolated/IsolatedIndexService.cs @@ -114,6 +114,7 @@ public async Task Index(IDiagnosticsCollector collector, return await Build(collector, fileSystem, metadataOnly: true, strict: false, path: path, output: null, pathPrefix: null, force: true, allowIndexing: null, exporters: exporters, canonicalBaseUrl: null, + skipOpenApi: true, ctx: ctx); } } diff --git a/src/tooling/docs-builder/DocumentationTooling.cs b/src/tooling/docs-builder/DocumentationTooling.cs index 298d82cd2..f8b3a952c 100644 --- a/src/tooling/docs-builder/DocumentationTooling.cs +++ b/src/tooling/docs-builder/DocumentationTooling.cs @@ -14,6 +14,7 @@ using Elastic.Documentation.Configuration.Search; using Elastic.Documentation.Configuration.Versions; using Elastic.Documentation.Diagnostics; +using Elastic.Documentation.ServiceDefaults; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Hosting; using Microsoft.Extensions.Logging; @@ -42,33 +43,10 @@ public static TBuilder AddDocumentationToolingDefaults(this TBuilder b return new DiagnosticsCollector([]); return new ConsoleDiagnosticsCollector(logFactory, githubActionsService); }) - .AddSingleton(sp => + .AddSingleton(_ => { - var resolver = sp.GetRequiredService(); - var elasticsearchUri = ResolveServiceEndpoint(resolver, - () => TryEnvVars("http://localhost:9200", "DOCUMENTATION_ELASTIC_URL", "CONNECTIONSTRINGS__ELASTICSEARCH") - ); - var elasticsearchPassword = - elasticsearchUri.UserInfo is { } userInfo && userInfo.Contains(':') - ? userInfo.Split(':')[1] - : TryEnvVarsOptional("DOCUMENTATION_ELASTIC_PASSWORD"); - - var elasticsearchUser = - elasticsearchUri.UserInfo is { } userInfo2 && userInfo2.Contains(':') - ? userInfo2.Split(':')[0] - : TryEnvVars("elastic", "DOCUMENTATION_ELASTIC_USERNAME"); - - var elasticsearchApiKey = TryEnvVarsOptional("DOCUMENTATION_ELASTIC_APIKEY"); - return new DocumentationEndpoints - { - Elasticsearch = new ElasticsearchEndpoint - { - Uri = elasticsearchUri, - Password = elasticsearchPassword, - ApiKey = elasticsearchApiKey, - Username = elasticsearchUser - }, - }; + var endpoints = ElasticsearchEndpointFactory.Create(builder.Configuration); + return endpoints; }) .AddSingleton(sp => { From 3e8034abcc9e0d57a278e66ba3b48795c16e6999 Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Sun, 22 Feb 2026 16:53:53 +0100 Subject: [PATCH 06/14] Remove --no-semantic flag entirely The lexical-only code path manually reimplemented drain, delete-stale, refresh, and alias logic that the orchestrator handles automatically. Remove the flag end-to-end: CLI parameters, configuration, exporter branching, and CLI documentation. --- docs/cli/assembler/assembler-index.md | 3 - docs/cli/docset/index-command.md | 3 - .../DocumentationEndpoints.cs | 1 - .../ElasticsearchEndpointConfigurator.cs | 3 - .../ElasticsearchMarkdownExporter.cs | 130 +++++------------- .../Indexing/AssemblerIndexService.cs | 3 - .../IsolatedIndexService.cs | 3 - .../Assembler/AssemblerIndexCommand.cs | 6 +- .../Commands/Codex/CodexIndexCommand.cs | 3 - .../docs-builder/Commands/IndexCommand.cs | 6 +- 10 files changed, 38 insertions(+), 123 deletions(-) diff --git a/docs/cli/assembler/assembler-index.md b/docs/cli/assembler/assembler-index.md index 5d551e4b4..8ae72ddcd 100644 --- a/docs/cli/assembler/assembler-index.md +++ b/docs/cli/assembler/assembler-index.md @@ -29,9 +29,6 @@ docs-builder assembler index [options...] [-h|--help] [--version] `--password` `` : Elasticsearch password (basic auth), alternatively set env DOCUMENTATION_ELASTIC_PASSWORD (optional) -`--no-semantic` `` -: Index without semantic fields (optional) - `--search-num-threads` `` : The number of search threads the inference endpoint should use. Defaults: 8 (optional) diff --git a/docs/cli/docset/index-command.md b/docs/cli/docset/index-command.md index 32aa3a25b..00e28cf1c 100644 --- a/docs/cli/docset/index-command.md +++ b/docs/cli/docset/index-command.md @@ -25,9 +25,6 @@ docs-builder index [options...] [-h|--help] [--version] `--password` `` : Elasticsearch password (basic auth), alternatively set env DOCUMENTATION_ELASTIC_PASSWORD (optional) -`--no-semantic` `` -: Index without semantic fields (optional) - `--search-num-threads` `` : The number of search threads the inference endpoint should use. Defaults: 8 (optional) diff --git a/src/Elastic.Documentation.Configuration/DocumentationEndpoints.cs b/src/Elastic.Documentation.Configuration/DocumentationEndpoints.cs index 69282068f..3636c0478 100644 --- a/src/Elastic.Documentation.Configuration/DocumentationEndpoints.cs +++ b/src/Elastic.Documentation.Configuration/DocumentationEndpoints.cs @@ -44,7 +44,6 @@ public class ElasticsearchEndpoint public X509Certificate? Certificate { get; set; } public bool CertificateIsNotRoot { get; set; } public int? BootstrapTimeout { get; set; } - public bool NoSemantic { get; set; } public bool ForceReindex { get; set; } /// diff --git a/src/Elastic.Documentation.Configuration/ElasticsearchEndpointConfigurator.cs b/src/Elastic.Documentation.Configuration/ElasticsearchEndpointConfigurator.cs index e8031368c..035fe559c 100644 --- a/src/Elastic.Documentation.Configuration/ElasticsearchEndpointConfigurator.cs +++ b/src/Elastic.Documentation.Configuration/ElasticsearchEndpointConfigurator.cs @@ -21,7 +21,6 @@ public record ElasticsearchIndexOptions public string? Password { get; init; } // inference options - public bool? NoSemantic { get; init; } public bool? EnableAiEnrichment { get; init; } public int? SearchNumThreads { get; init; } public int? IndexNumThreads { get; init; } @@ -117,8 +116,6 @@ public static async Task ApplyAsync( if (options.BootstrapTimeout.HasValue) cfg.BootstrapTimeout = options.BootstrapTimeout.Value; - if (options.NoSemantic.HasValue) - cfg.NoSemantic = options.NoSemantic.Value; if (options.EnableAiEnrichment.HasValue) cfg.EnableAiEnrichment = options.EnableAiEnrichment.Value; if (options.ForceReindex.HasValue) diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs index 0bfa52ef2..300a1a6bb 100644 --- a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs +++ b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs @@ -30,16 +30,12 @@ public partial class ElasticsearchMarkdownExporter : IMarkdownExporter, IDisposa private readonly string _indexNamespace; private readonly DateTimeOffset _batchIndexDate; - // Ingest: orchestrator for dual-index mode, plain channel for --no-semantic - private readonly IncrementalSyncOrchestrator? _orchestrator; - private readonly IngestChannel? _lexicalOnlyChannel; + // Ingest: orchestrator for dual-index mode + private readonly IncrementalSyncOrchestrator _orchestrator; // Type context hashes for document content hash computation private readonly ElasticsearchTypeContext _lexicalTypeContext; - private readonly ElasticsearchTypeContext? _semanticTypeContext; - - // Alias names for queries/statistics - private readonly string _lexicalAlias; + private readonly ElasticsearchTypeContext _semanticTypeContext; private readonly IReadOnlyDictionary _synonyms; private readonly IReadOnlyCollection _rules; @@ -91,11 +87,11 @@ IDocumentationConfigurationContext context var synonymSetName = $"docs-{indexNamespace}"; var ns = indexNamespace.ToLowerInvariant(); var lexicalPrefix = es.IndexNamePrefix.Replace("semantic", "lexical").ToLowerInvariant(); - _lexicalAlias = $"{lexicalPrefix}-{ns}"; + var lexicalAlias = $"{lexicalPrefix}-{ns}"; _lexicalTypeContext = DocumentationAnalysisFactory.CreateContext( DocumentationMappingContext.DocumentationDocument.Context, - _lexicalAlias, synonymSetName, indexTimeSynonyms, aiPipeline + lexicalAlias, synonymSetName, indexTimeSynonyms, aiPipeline ); // Initialize AI enrichment services if enabled @@ -106,38 +102,28 @@ IDocumentationConfigurationContext context _enrichPolicyManager = new EnrichPolicyManager(_transport, logFactory.CreateLogger(), _enrichmentCache.IndexName); } - if (!es.NoSemantic) - { - var semanticAlias = $"{es.IndexNamePrefix.ToLowerInvariant()}-{ns}"; - _semanticTypeContext = DocumentationAnalysisFactory.CreateContext( - DocumentationMappingContext.DocumentationDocumentSemantic.Context, - semanticAlias, synonymSetName, indexTimeSynonyms, aiPipeline - ); - - _orchestrator = new IncrementalSyncOrchestrator(_transport, _lexicalTypeContext, _semanticTypeContext) - { - ConfigurePrimary = ConfigureChannelOptions, - ConfigureSecondary = ConfigureChannelOptions, - OnPostComplete = es.EnableAiEnrichment - ? async (ctx, ct) => await PostCompleteAsync(ctx, ct) - : null - }; - _ = _orchestrator.AddPreBootstrapTask(async (_, ct) => - { - await InitializeEnrichmentAsync(ct); - await PublishSynonymsAsync(ct); - await PublishQueryRulesAsync(ct); - }); + var semanticAlias = $"{es.IndexNamePrefix.ToLowerInvariant()}-{ns}"; + _semanticTypeContext = DocumentationAnalysisFactory.CreateContext( + DocumentationMappingContext.DocumentationDocumentSemantic.Context, + semanticAlias, synonymSetName, indexTimeSynonyms, aiPipeline + ); - _batchIndexDate = _orchestrator.BatchTimestamp; - } - else + _orchestrator = new IncrementalSyncOrchestrator(_transport, _lexicalTypeContext, _semanticTypeContext) { - _batchIndexDate = DateTimeOffset.UtcNow; - var options = new IngestChannelOptions(_transport, _lexicalTypeContext, _batchIndexDate); - ConfigureChannelOptions(options); - _lexicalOnlyChannel = new IngestChannel(options); - } + ConfigurePrimary = ConfigureChannelOptions, + ConfigureSecondary = ConfigureChannelOptions, + OnPostComplete = es.EnableAiEnrichment + ? async (ctx, ct) => await PostCompleteAsync(ctx, ct) + : null + }; + _ = _orchestrator.AddPreBootstrapTask(async (_, ct) => + { + await InitializeEnrichmentAsync(ct); + await PublishSynonymsAsync(ct); + await PublishQueryRulesAsync(ct); + }); + + _batchIndexDate = _orchestrator.BatchTimestamp; } private void ConfigureChannelOptions(IngestChannelOptions options) @@ -167,51 +153,13 @@ private void ConfigureChannelOptions(IngestChannelOptions /// public async ValueTask StartAsync(Cancel ctx = default) { - if (_orchestrator is not null) - { - _ = await _orchestrator.StartAsync(BootstrapMethod.Failure, ctx); - _logger.LogInformation("Orchestrator started with {Strategy} strategy", _orchestrator.Strategy); - return; - } - - // NoSemantic path - await InitializeEnrichmentAsync(ctx); - await PublishSynonymsAsync(ctx); - await PublishQueryRulesAsync(ctx); - _ = await _lexicalOnlyChannel!.BootstrapElasticsearchAsync(BootstrapMethod.Failure, ctx); + _ = await _orchestrator.StartAsync(BootstrapMethod.Failure, ctx); + _logger.LogInformation("Orchestrator started with {Strategy} strategy", _orchestrator.Strategy); } /// - public async ValueTask StopAsync(Cancel ctx = default) - { - if (_orchestrator is not null) - { - _ = await _orchestrator.CompleteAsync(null, ctx); - return; - } - - // NoSemantic path — drain, delete stale, refresh, alias - var drained = await _lexicalOnlyChannel!.WaitForDrainAsync(null, ctx); - if (!drained) - _collector.EmitGlobalError("Elasticsearch export: failed to drain in a timely fashion"); - - // Delete stale documents not part of this batch - var deleteQuery = PostData.String($$""" - { - "query": { - "range": { - "batch_index_date": { - "lt": "{{_batchIndexDate:o}}" - } - } - } - } - """); - await _operations.DeleteByQueryAsync(_lexicalAlias, deleteQuery, ctx); - - _ = await _lexicalOnlyChannel.RefreshAsync(ctx); - _ = await _lexicalOnlyChannel.ApplyAliasesAsync(_lexicalAlias, ctx); - } + public async ValueTask StopAsync(Cancel ctx = default) => + _ = await _orchestrator.CompleteAsync(null, ctx); private async Task InitializeEnrichmentAsync(Cancel ctx) { @@ -251,7 +199,7 @@ private async ValueTask ExecuteEnrichPolicyIfNeededAsync(string? semanticAlias, private async ValueTask BackfillMissingAiFieldsAsync(string semanticAlias, Cancel ctx) { - if (_endpoint.NoSemantic || _enrichmentCache is null || _llmClient is null) + if (_enrichmentCache is null || _llmClient is null) return; var currentPromptHash = ElasticsearchLlmClient.PromptHash; @@ -354,26 +302,16 @@ private async Task PutQueryRuleset(QueryRuleset ruleset, string rulesetName, Can internal async ValueTask WriteDocumentAsync(DocumentationDocument doc, Cancel ctx) { - if (_orchestrator is not null) - { - if (_orchestrator.TryWrite(doc)) - return true; - _ = await _orchestrator.WaitToWriteAsync(doc, ctx); - return true; - } - - if (_lexicalOnlyChannel!.TryWrite(doc)) + if (_orchestrator.TryWrite(doc)) return true; - if (await _lexicalOnlyChannel.WaitToWriteAsync(ctx)) - return _lexicalOnlyChannel.TryWrite(doc); - return false; + _ = await _orchestrator.WaitToWriteAsync(doc, ctx); + return true; } /// public void Dispose() { - _orchestrator?.Dispose(); - _lexicalOnlyChannel?.Dispose(); + _orchestrator.Dispose(); _llmClient?.Dispose(); GC.SuppressFinalize(this); } diff --git a/src/services/Elastic.Documentation.Assembler/Indexing/AssemblerIndexService.cs b/src/services/Elastic.Documentation.Assembler/Indexing/AssemblerIndexService.cs index 1e44b5c93..cb84652c2 100644 --- a/src/services/Elastic.Documentation.Assembler/Indexing/AssemblerIndexService.cs +++ b/src/services/Elastic.Documentation.Assembler/Indexing/AssemblerIndexService.cs @@ -32,7 +32,6 @@ ICoreService githubActionsService /// Elasticsearch API key, alternatively set env DOCUMENTATION_ELASTIC_APIKEY /// Elasticsearch username (basic auth), alternatively set env DOCUMENTATION_ELASTIC_USERNAME /// Elasticsearch password (basic auth), alternatively set env DOCUMENTATION_ELASTIC_PASSWORD - /// Index without semantic fields /// Enable AI enrichment of documents using LLM-generated metadata /// The number of search threads the inference endpoint should use. Defaults: 8 /// The number of index threads the inference endpoint should use. Defaults: 8 @@ -60,7 +59,6 @@ public async Task Index(IDiagnosticsCollector collector, string? username = null, string? password = null, // inference options - bool? noSemantic = null, bool? enableAiEnrichment = null, int? searchNumThreads = null, int? indexNumThreads = null, @@ -91,7 +89,6 @@ public async Task Index(IDiagnosticsCollector collector, ApiKey = apiKey, Username = username, Password = password, - NoSemantic = noSemantic, EnableAiEnrichment = enableAiEnrichment, SearchNumThreads = searchNumThreads, IndexNumThreads = indexNumThreads, diff --git a/src/services/Elastic.Documentation.Isolated/IsolatedIndexService.cs b/src/services/Elastic.Documentation.Isolated/IsolatedIndexService.cs index 81d389c69..9d996c8ed 100644 --- a/src/services/Elastic.Documentation.Isolated/IsolatedIndexService.cs +++ b/src/services/Elastic.Documentation.Isolated/IsolatedIndexService.cs @@ -29,7 +29,6 @@ ICoreService githubActionsService /// Elasticsearch API key, alternatively set env DOCUMENTATION_ELASTIC_APIKEY /// Elasticsearch username (basic auth), alternatively set env DOCUMENTATION_ELASTIC_USERNAME /// Elasticsearch password (basic auth), alternatively set env DOCUMENTATION_ELASTIC_PASSWORD - /// Index without semantic fields /// Enable AI enrichment of documents using LLM-generated metadata /// The number of search threads the inference endpoint should use. Defaults: 8 /// The number of index threads the inference endpoint should use. Defaults: 8 @@ -57,7 +56,6 @@ public async Task Index(IDiagnosticsCollector collector, string? username = null, string? password = null, // inference options - bool? noSemantic = null, bool? enableAiEnrichment = null, int? searchNumThreads = null, int? indexNumThreads = null, @@ -88,7 +86,6 @@ public async Task Index(IDiagnosticsCollector collector, ApiKey = apiKey, Username = username, Password = password, - NoSemantic = noSemantic, EnableAiEnrichment = enableAiEnrichment, SearchNumThreads = searchNumThreads, IndexNumThreads = indexNumThreads, diff --git a/src/tooling/docs-builder/Commands/Assembler/AssemblerIndexCommand.cs b/src/tooling/docs-builder/Commands/Assembler/AssemblerIndexCommand.cs index 115dda5b9..ba3e8ebc9 100644 --- a/src/tooling/docs-builder/Commands/Assembler/AssemblerIndexCommand.cs +++ b/src/tooling/docs-builder/Commands/Assembler/AssemblerIndexCommand.cs @@ -30,7 +30,6 @@ ICoreService githubActionsService /// Elasticsearch API key, alternatively set env DOCUMENTATION_ELASTIC_APIKEY /// Elasticsearch username (basic auth), alternatively set env DOCUMENTATION_ELASTIC_USERNAME /// Elasticsearch password (basic auth), alternatively set env DOCUMENTATION_ELASTIC_PASSWORD - /// Index without semantic fields /// Enable AI enrichment of documents using LLM-generated metadata /// The number of search threads the inference endpoint should use. Defaults: 8 /// The number of index threads the inference endpoint should use. Defaults: 8 @@ -59,7 +58,6 @@ public async Task Index( string? password = null, // inference options - bool? noSemantic = null, bool? enableAiEnrichment = null, int? searchNumThreads = null, int? indexNumThreads = null, @@ -97,7 +95,7 @@ public async Task Index( // endpoint options endpoint, environment, apiKey, username, password, // inference options - noSemantic, enableAiEnrichment, indexNumThreads, searchNumThreads, noEis, bootstrapTimeout, + enableAiEnrichment, indexNumThreads, searchNumThreads, noEis, bootstrapTimeout, // channel and connection options indexNamePrefix, forceReindex, bufferSize, maxRetries, debugMode, // proxy options @@ -110,7 +108,7 @@ static async (s, collector, state, ctx) => await s.Index(collector, state.fs, // endpoint options state.endpoint, state.environment, state.apiKey, state.username, state.password, // inference options - state.noSemantic, state.enableAiEnrichment, state.searchNumThreads, state.indexNumThreads, state.noEis, state.bootstrapTimeout, + state.enableAiEnrichment, state.searchNumThreads, state.indexNumThreads, state.noEis, state.bootstrapTimeout, // channel and connection options state.indexNamePrefix, state.forceReindex, state.bufferSize, state.maxRetries, state.debugMode, // proxy options diff --git a/src/tooling/docs-builder/Commands/Codex/CodexIndexCommand.cs b/src/tooling/docs-builder/Commands/Codex/CodexIndexCommand.cs index 2af22611d..57ba9bdf2 100644 --- a/src/tooling/docs-builder/Commands/Codex/CodexIndexCommand.cs +++ b/src/tooling/docs-builder/Commands/Codex/CodexIndexCommand.cs @@ -35,7 +35,6 @@ ICoreService githubActionsService /// Elasticsearch API key, alternatively set env DOCUMENTATION_ELASTIC_APIKEY /// Elasticsearch username (basic auth), alternatively set env DOCUMENTATION_ELASTIC_USERNAME /// Elasticsearch password (basic auth), alternatively set env DOCUMENTATION_ELASTIC_PASSWORD - /// Index without semantic fields /// Enable AI enrichment of documents using LLM-generated metadata /// The number of search threads the inference endpoint should use. Defaults: 8 /// The number of index threads the inference endpoint should use. Defaults: 8 @@ -64,7 +63,6 @@ public async Task Index( string? password = null, // inference options - bool? noSemantic = null, bool? enableAiEnrichment = null, int? searchNumThreads = null, int? indexNumThreads = null, @@ -126,7 +124,6 @@ public async Task Index( ApiKey = apiKey, Username = username, Password = password, - NoSemantic = noSemantic, EnableAiEnrichment = enableAiEnrichment, SearchNumThreads = searchNumThreads, IndexNumThreads = indexNumThreads, diff --git a/src/tooling/docs-builder/Commands/IndexCommand.cs b/src/tooling/docs-builder/Commands/IndexCommand.cs index efc1af596..ad34f4eaf 100644 --- a/src/tooling/docs-builder/Commands/IndexCommand.cs +++ b/src/tooling/docs-builder/Commands/IndexCommand.cs @@ -28,7 +28,6 @@ ICoreService githubActionsService /// Elasticsearch API key, alternatively set env DOCUMENTATION_ELASTIC_APIKEY /// Elasticsearch username (basic auth), alternatively set env DOCUMENTATION_ELASTIC_USERNAME /// Elasticsearch password (basic auth), alternatively set env DOCUMENTATION_ELASTIC_PASSWORD - /// Index without semantic fields /// Enable AI enrichment of documents using LLM-generated metadata /// The number of search threads the inference endpoint should use. Defaults: 8 /// The number of index threads the inference endpoint should use. Defaults: 8 @@ -57,7 +56,6 @@ public async Task Index( string? password = null, // inference options - bool? noSemantic = null, bool? enableAiEnrichment = null, int? searchNumThreads = null, int? indexNumThreads = null, @@ -95,7 +93,7 @@ public async Task Index( // endpoint options endpoint, apiKey, username, password, // inference options - noSemantic, enableAiEnrichment, indexNumThreads, noEis, searchNumThreads, bootstrapTimeout, + enableAiEnrichment, indexNumThreads, noEis, searchNumThreads, bootstrapTimeout, // channel and connection options indexNamePrefix, forceReindex, bufferSize, maxRetries, debugMode, // proxy options @@ -108,7 +106,7 @@ static async (s, collector, state, ctx) => await s.Index(collector, state.fs, st // endpoint options state.endpoint, state.apiKey, state.username, state.password, // inference options - state.noSemantic, state.enableAiEnrichment, state.searchNumThreads, state.indexNumThreads, state.noEis, state.bootstrapTimeout, + state.enableAiEnrichment, state.searchNumThreads, state.indexNumThreads, state.noEis, state.bootstrapTimeout, // channel and connection options state.indexNamePrefix, state.forceReindex, state.bufferSize, state.maxRetries, state.debugMode, // proxy options From 8f485d1a2aa1fdd7325990e6975b333f7d9dea55 Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Sun, 22 Feb 2026 18:22:08 +0100 Subject: [PATCH 07/14] Add Jina v5 dense embeddings alongside ELSER sparse embeddings Add .jina-embeddings-v5-text-small inference on 6 fields (title, abstract, ai_rag_optimized_summary, ai_questions, ai_use_cases, stripped_body) to enable hybrid sparse+dense retrieval. Rename InferenceId to ElserInferenceId for clarity. --- .../Search/DocumentationMappingConfig.cs | 26 ++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs b/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs index ec5bb391c..d3fc5e706 100644 --- a/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs +++ b/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs @@ -88,17 +88,31 @@ internal static DocumentationDocumentMappingsBuilder ConfigureCommonMappings(Doc public static class SemanticConfig { - private const string InferenceId = ".elser-2-elastic"; + private const string ElserInferenceId = ".elser-2-elastic"; + private const string JinaInferenceId = ".jina-embeddings-v5-text-small"; public static AnalysisBuilder ConfigureAnalysis(AnalysisBuilder analysis) => analysis; public static DocumentationDocumentMappingsBuilder ConfigureMappings(DocumentationDocumentMappingsBuilder m) => LexicalConfig.ConfigureCommonMappings(m) - .AddField("title.semantic_text", f => f.SemanticText().InferenceId(InferenceId)) - .AddField("abstract.semantic_text", f => f.SemanticText().InferenceId(InferenceId)) - .AddField("ai_rag_optimized_summary.semantic_text", f => f.SemanticText().InferenceId(InferenceId)) - .AddField("ai_questions.semantic_text", f => f.SemanticText().InferenceId(InferenceId)) - .AddField("ai_use_cases.semantic_text", f => f.SemanticText().InferenceId(InferenceId)); + .StrippedBody(s => s + .Analyzer("synonyms_fixed_analyzer") + .SearchAnalyzer("synonyms_analyzer") + .MultiField("jina", mf => mf.Text().Analyzer(JinaInferenceId)) + ) + // ELSER sparse embeddings + .AddField("title.semantic_text", f => f.SemanticText().InferenceId(ElserInferenceId)) + .AddField("abstract.semantic_text", f => f.SemanticText().InferenceId(ElserInferenceId)) + .AddField("ai_rag_optimized_summary.semantic_text", f => f.SemanticText().InferenceId(ElserInferenceId)) + .AddField("ai_questions.semantic_text", f => f.SemanticText().InferenceId(ElserInferenceId)) + .AddField("ai_use_cases.semantic_text", f => f.SemanticText().InferenceId(ElserInferenceId)) + // Jina v5 dense embeddings + .AddField("title.jina", f => f.SemanticText().InferenceId(JinaInferenceId)) + .AddField("abstract.jina", f => f.SemanticText().InferenceId(JinaInferenceId)) + .AddField("ai_rag_optimized_summary.jina", f => f.SemanticText().InferenceId(JinaInferenceId)) + .AddField("ai_questions.jina", f => f.SemanticText().InferenceId(JinaInferenceId)) + .AddField("ai_use_cases.jina", f => f.SemanticText().InferenceId(JinaInferenceId)) + .AddField("stripped_body.jina", f => f.SemanticText().InferenceId(JinaInferenceId)); } /// From d8f4a32a00363f5d864600798220a77409e1b815 Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Sun, 22 Feb 2026 19:03:17 +0100 Subject: [PATCH 08/14] fix import ordering --- src/api/Elastic.Documentation.Mcp.Remote/Program.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/api/Elastic.Documentation.Mcp.Remote/Program.cs b/src/api/Elastic.Documentation.Mcp.Remote/Program.cs index 23d5495b8..129821347 100644 --- a/src/api/Elastic.Documentation.Mcp.Remote/Program.cs +++ b/src/api/Elastic.Documentation.Mcp.Remote/Program.cs @@ -3,9 +3,9 @@ // See the LICENSE file in the project root for more information using Elastic.Documentation.Api.Infrastructure.OpenTelemetry; -using Elastic.Documentation.Configuration; using Elastic.Documentation.Assembler.Links; using Elastic.Documentation.Assembler.Mcp; +using Elastic.Documentation.Configuration; using Elastic.Documentation.LinkIndex; using Elastic.Documentation.Links.InboundLinks; using Elastic.Documentation.Mcp.Remote; From 6d5802f428867efd1600e7c4bcf50774f3ccaa06 Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Mon, 23 Feb 2026 11:00:51 +0100 Subject: [PATCH 09/14] Bump ingest libraries --- Directory.Packages.props | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Directory.Packages.props b/Directory.Packages.props index ff43854b8..093b318be 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -48,8 +48,8 @@ - - + + From a1a4ea04d2f69cd80835c4c1ba206206d4d2bfbb Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Tue, 24 Feb 2026 09:20:53 +0100 Subject: [PATCH 10/14] Update Elastic.Ingest.Elasticsearch and Elastic.Mapping to 0.27.0 Use source-generated IStaticMappingResolver delegates for auto-stamping BatchIndexDate and LastUpdated instead of manual assignment. Replace DocumentationAnalysisFactory.CreateContext with direct context customization via WithIndexName() and record-with expressions. Pass IndexSettings for default_pipeline conditionally at runtime. --- Directory.Packages.props | 4 +- .../Search/DocumentationDocument.cs | 2 + .../Search/DocumentationMappingConfig.cs | 40 ++--------- .../ElasticsearchMarkdownExporter.Export.cs | 2 - .../ElasticsearchMarkdownExporter.cs | 67 +++++++++++-------- 5 files changed, 49 insertions(+), 66 deletions(-) diff --git a/Directory.Packages.props b/Directory.Packages.props index 093b318be..20f8117f1 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -48,8 +48,8 @@ - - + + diff --git a/src/Elastic.Documentation/Search/DocumentationDocument.cs b/src/Elastic.Documentation/Search/DocumentationDocument.cs index e25ded0ab..bfbaace5d 100644 --- a/src/Elastic.Documentation/Search/DocumentationDocument.cs +++ b/src/Elastic.Documentation/Search/DocumentationDocument.cs @@ -73,10 +73,12 @@ public record DocumentationDocument /// The date of the batch update this document was part of last. /// This date could be higher than the date_last_updated. + [BatchIndexDate] [JsonPropertyName("batch_index_date")] public DateTimeOffset BatchIndexDate { get; set; } /// The date this document was last updated, + [LastUpdated] [Timestamp] [JsonPropertyName("last_updated")] public DateTimeOffset LastUpdated { get; set; } diff --git a/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs b/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs index d3fc5e706..e07f0cbf0 100644 --- a/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs +++ b/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs @@ -34,7 +34,11 @@ public static class LexicalConfig public static AnalysisBuilder ConfigureAnalysis(AnalysisBuilder analysis) => analysis; public static DocumentationDocumentMappingsBuilder ConfigureMappings(DocumentationDocumentMappingsBuilder m) => - ConfigureCommonMappings(m); + ConfigureCommonMappings(m) + .StrippedBody(f => f + .Analyzer("synonyms_fixed_analyzer") + .SearchAnalyzer("synonyms_analyzer") + ); internal static DocumentationDocumentMappingsBuilder ConfigureCommonMappings(DocumentationDocumentMappingsBuilder m) => m // Text fields with custom analyzers and multi-fields @@ -51,9 +55,6 @@ internal static DocumentationDocumentMappingsBuilder ConfigureCommonMappings(Doc .Analyzer("starts_with_analyzer") .SearchAnalyzer("starts_with_analyzer_search")) .MultiField("completion", mf => mf.SearchAsYouType().SearchAnalyzer("synonyms_analyzer"))) - .StrippedBody(f => f - .Analyzer("synonyms_fixed_analyzer") - .SearchAnalyzer("synonyms_analyzer")) .Abstract(f => f .Analyzer("synonyms_fixed_analyzer") .SearchAnalyzer("synonyms_analyzer")) @@ -98,7 +99,6 @@ public static DocumentationDocumentMappingsBuilder ConfigureMappings(Documentati .StrippedBody(s => s .Analyzer("synonyms_fixed_analyzer") .SearchAnalyzer("synonyms_analyzer") - .MultiField("jina", mf => mf.Text().Analyzer(JinaInferenceId)) ) // ELSER sparse embeddings .AddField("title.semantic_text", f => f.SemanticText().InferenceId(ElserInferenceId)) @@ -111,8 +111,7 @@ public static DocumentationDocumentMappingsBuilder ConfigureMappings(Documentati .AddField("abstract.jina", f => f.SemanticText().InferenceId(JinaInferenceId)) .AddField("ai_rag_optimized_summary.jina", f => f.SemanticText().InferenceId(JinaInferenceId)) .AddField("ai_questions.jina", f => f.SemanticText().InferenceId(JinaInferenceId)) - .AddField("ai_use_cases.jina", f => f.SemanticText().InferenceId(JinaInferenceId)) - .AddField("stripped_body.jina", f => f.SemanticText().InferenceId(JinaInferenceId)); + .AddField("ai_use_cases.jina", f => f.SemanticText().InferenceId(JinaInferenceId)); } /// @@ -160,31 +159,4 @@ public static AnalysisBuilder BuildAnalysis(AnalysisBuilder analysis, string syn .Tokenizer("path_tokenizer", t => t.PathHierarchy() .Delimiter('/')); - /// - /// Creates an ElasticsearchTypeContext with runtime analysis settings and dynamic index name. - /// Analysis is provided via , which - /// Elastic.Ingest.Elasticsearch merges into the settings automatically. - /// - public static ElasticsearchTypeContext CreateContext( - ElasticsearchTypeContext baseContext, - string indexName, - string synonymSetName, - string[] indexTimeSynonyms, - string? defaultPipeline = null) - { - var analysisJson = BuildAnalysis(new AnalysisBuilder(), synonymSetName, indexTimeSynonyms).Build().ToJsonString(); - var settingsHash = ContentHash.Create(analysisJson, defaultPipeline ?? ""); - var hash = ContentHash.Create(settingsHash, baseContext.MappingsHash); - - return baseContext.WithIndexName(indexName) with - { - GetSettingsJson = defaultPipeline is not null - ? () => $$"""{ "default_pipeline": "{{defaultPipeline}}" }""" - : () => "{}", - SettingsHash = settingsHash, - Hash = hash, - ConfigureAnalysis = a => BuildAnalysis(a, synonymSetName, indexTimeSynonyms), - IndexPatternUseBatchDate = true - }; - } } diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.Export.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.Export.cs index a4c2172d8..e69dfc9e5 100644 --- a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.Export.cs +++ b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.Export.cs @@ -37,8 +37,6 @@ private void AssignDocumentMetadata(DocumentationDocument doc) _fixedSynonymsHash ); doc.Hash = hash; - doc.LastUpdated = _batchIndexDate; - doc.BatchIndexDate = _batchIndexDate; } private static void CommonEnrichments(DocumentationDocument doc, INavigationItem? navigationItem) diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs index 300a1a6bb..965ebd179 100644 --- a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs +++ b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs @@ -28,7 +28,6 @@ public partial class ElasticsearchMarkdownExporter : IMarkdownExporter, IDisposa private readonly ElasticsearchEndpoint _endpoint; private readonly DistributedTransport _transport; private readonly string _indexNamespace; - private readonly DateTimeOffset _batchIndexDate; // Ingest: orchestrator for dual-index mode private readonly IncrementalSyncOrchestrator _orchestrator; @@ -89,10 +88,16 @@ IDocumentationConfigurationContext context var lexicalPrefix = es.IndexNamePrefix.Replace("semantic", "lexical").ToLowerInvariant(); var lexicalAlias = $"{lexicalPrefix}-{ns}"; - _lexicalTypeContext = DocumentationAnalysisFactory.CreateContext( - DocumentationMappingContext.DocumentationDocument.Context, - lexicalAlias, synonymSetName, indexTimeSynonyms, aiPipeline - ); + var pipelineSettings = aiPipeline is not null + ? new Dictionary { ["index.default_pipeline"] = aiPipeline } + : null; + + _lexicalTypeContext = DocumentationMappingContext.DocumentationDocument.Context + .WithIndexName(lexicalAlias) with + { + ConfigureAnalysis = a => DocumentationAnalysisFactory.BuildAnalysis(a, synonymSetName, indexTimeSynonyms), + IndexSettings = pipelineSettings + }; // Initialize AI enrichment services if enabled if (es.EnableAiEnrichment) @@ -103,17 +108,23 @@ IDocumentationConfigurationContext context } var semanticAlias = $"{es.IndexNamePrefix.ToLowerInvariant()}-{ns}"; - _semanticTypeContext = DocumentationAnalysisFactory.CreateContext( - DocumentationMappingContext.DocumentationDocumentSemantic.Context, - semanticAlias, synonymSetName, indexTimeSynonyms, aiPipeline - ); + _semanticTypeContext = DocumentationMappingContext.DocumentationDocumentSemantic.Context + .WithIndexName(semanticAlias) with + { + ConfigureAnalysis = a => DocumentationAnalysisFactory.BuildAnalysis(a, synonymSetName, indexTimeSynonyms), + IndexSettings = pipelineSettings + }; - _orchestrator = new IncrementalSyncOrchestrator(_transport, _lexicalTypeContext, _semanticTypeContext) + var resolver = DocumentationMappingContext.DocumentationDocument; + _orchestrator = new IncrementalSyncOrchestrator( + _transport, _lexicalTypeContext, _semanticTypeContext, + setBatchIndexDate: resolver.SetBatchIndexDate, + setLastUpdated: resolver.SetLastUpdated) { ConfigurePrimary = ConfigureChannelOptions, ConfigureSecondary = ConfigureChannelOptions, OnPostComplete = es.EnableAiEnrichment - ? async (ctx, ct) => await PostCompleteAsync(ctx, ct) + ? async (ctx, _, ct) => await PostCompleteAsync(ctx, ct) : null }; _ = _orchestrator.AddPreBootstrapTask(async (_, ct) => @@ -122,8 +133,6 @@ IDocumentationConfigurationContext context await PublishSynonymsAsync(ct); await PublishQueryRulesAsync(ct); }); - - _batchIndexDate = _orchestrator.BatchTimestamp; } private void ConfigureChannelOptions(IngestChannelOptions options) @@ -209,19 +218,19 @@ private async ValueTask BackfillMissingAiFieldsAsync(string semanticAlias, Cance _enrichmentCache.Count, currentPromptHash[..8]); var query = $$""" - { - "query": { - "bool": { - "must": { "exists": { "field": "enrichment_key" } }, - "should": [ - { "bool": { "must_not": { "exists": { "field": "ai_questions" } } } }, - { "bool": { "must_not": { "term": { "enrichment_prompt_hash": "{{currentPromptHash}}" } } } } - ], - "minimum_should_match": 1 - } - } - } - """; + { + "query": { + "bool": { + "must": { "exists": { "field": "enrichment_key" } }, + "should": [ + { "bool": { "must_not": { "exists": { "field": "ai_questions" } } } }, + { "bool": { "must_not": { "term": { "enrichment_prompt_hash": "{{currentPromptHash}}" } } } } + ], + "minimum_should_match": 1 + } + } + } + """; await _operations.UpdateByQueryAsync(semanticAlias, PostData.String(query), EnrichPolicyManager.PipelineName, ctx); } @@ -252,7 +261,8 @@ private async Task PutSynonyms(SynonymsSet synonymsSet, string setName, Cancel c ctx); if (!response.ApiCallDetails.HasSuccessfulStatusCode) - _collector.EmitGlobalError($"Failed to publish synonym set '{setName}'. Reason: {response.ApiCallDetails.OriginalException?.Message ?? response.ToString()}"); + _collector.EmitGlobalError( + $"Failed to publish synonym set '{setName}'. Reason: {response.ApiCallDetails.OriginalException?.Message ?? response.ToString()}"); else _logger.LogInformation("Successfully published synonym set '{SetName}'.", setName); } @@ -295,7 +305,8 @@ private async Task PutQueryRuleset(QueryRuleset ruleset, string rulesetName, Can ctx); if (!response.ApiCallDetails.HasSuccessfulStatusCode) - _collector.EmitGlobalError($"Failed to publish query ruleset '{rulesetName}'. Reason: {response.ApiCallDetails.OriginalException?.Message ?? response.ToString()}"); + _collector.EmitGlobalError( + $"Failed to publish query ruleset '{rulesetName}'. Reason: {response.ApiCallDetails.OriginalException?.Message ?? response.ToString()}"); else _logger.LogInformation("Successfully published query ruleset '{RulesetName}'.", rulesetName); } From 7f578324ec6fa4b699a7faa1b409cb19c6b8e5a7 Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Tue, 24 Feb 2026 18:06:32 +0100 Subject: [PATCH 11/14] Fix template parameter semantics: {type} = build type, {env} = environment Rename indexNamespace to buildType throughout the exporter pipeline so callers pass the build type (assembler, isolated, codex) instead of the environment name. Search services now hardcode "assembler" as the type since they always target assembler indices. ResolveNamespace renamed to ResolveEnvironment and updated to parse the old production index format ({variant}-docs-{env}-{timestamp}) to extract the environment name. --- .../Building/CodexBuildService.cs | 2 +- .../ElasticsearchEndpointFactory.cs | 30 +++++++++++-------- .../Search/DocumentationMappingConfig.cs | 2 +- .../ElasticsearchMarkdownExporter.cs | 22 +++++--------- .../Exporters/ExporterExtensions.cs | 4 +-- .../Elastic.Documentation.Api.App/Program.cs | 5 +++- .../Program.cs | 4 ++- .../Building/AssemblerBuildService.cs | 2 +- .../Building/AssemblerBuilder.cs | 4 +-- .../Common/ElasticsearchClientAccessor.cs | 10 +++++-- .../Search/SearchBootstrapFixture.cs | 10 +++---- 11 files changed, 50 insertions(+), 45 deletions(-) diff --git a/src/Elastic.Codex/Building/CodexBuildService.cs b/src/Elastic.Codex/Building/CodexBuildService.cs index 8db6350d2..a502ff52c 100644 --- a/src/Elastic.Codex/Building/CodexBuildService.cs +++ b/src/Elastic.Codex/Building/CodexBuildService.cs @@ -85,7 +85,7 @@ public async Task BuildAll( if (exporters is not null && buildContexts.Count > 0) { var firstContext = buildContexts[0].BuildContext; - sharedExporters = exporters.CreateMarkdownExporters(logFactory, firstContext, context.IndexNamespace).ToArray(); + sharedExporters = exporters.CreateMarkdownExporters(logFactory, firstContext, "codex").ToArray(); var startTasks = sharedExporters.Select(async e => await e.StartAsync(ctx)); await Task.WhenAll(startTasks); } diff --git a/src/Elastic.Documentation.ServiceDefaults/ElasticsearchEndpointFactory.cs b/src/Elastic.Documentation.ServiceDefaults/ElasticsearchEndpointFactory.cs index d5e6cd6ac..86def5bbc 100644 --- a/src/Elastic.Documentation.ServiceDefaults/ElasticsearchEndpointFactory.cs +++ b/src/Elastic.Documentation.ServiceDefaults/ElasticsearchEndpointFactory.cs @@ -56,39 +56,43 @@ public static DocumentationEndpoints Create(IConfiguration? appConfiguration = n Username = username }; - var ns = ResolveNamespace(config, appConfiguration, endpoint.IndexNamePrefix); + var ns = ResolveEnvironment(config, appConfiguration); return new DocumentationEndpoints { Elasticsearch = endpoint, Namespace = ns }; } /// - /// Resolves the deployment namespace using this priority: - /// 1. DOCUMENTATION_ELASTIC_INDEX env var — strip prefix and -latest suffix + /// Resolves the environment name using this priority: + /// 1. DOCUMENTATION_ELASTIC_INDEX env var — parse old format {variant}-docs-{env}-{timestamp} /// 2. DOTNET_ENVIRONMENT env var /// 3. ENVIRONMENT env var /// 4. Fallback: "dev" /// - private static string ResolveNamespace(IConfiguration config, IConfiguration? appConfiguration, string indexNamePrefix) + private static string ResolveEnvironment(IConfiguration config, IConfiguration? appConfiguration) { var indexName = appConfiguration?["DOCUMENTATION_ELASTIC_INDEX"] ?? config["DOCUMENTATION_ELASTIC_INDEX"]; if (!string.IsNullOrEmpty(indexName)) { - var prefix = $"{indexNamePrefix}-"; - const string suffix = "-latest"; - if (indexName.StartsWith(prefix, StringComparison.OrdinalIgnoreCase) && - indexName.EndsWith(suffix, StringComparison.OrdinalIgnoreCase)) + // Old production format: {variant}-docs-{env}-{timestamp} + // e.g. "lexical-docs-edge-2025.10.23.120521" + // Extract the environment segment after "docs-" and before the next "-" followed by digits. + const string marker = "-docs-"; + var markerIndex = indexName.IndexOf(marker, StringComparison.OrdinalIgnoreCase); + if (markerIndex >= 0) { - var ns = indexName[prefix.Length..^suffix.Length]; - if (!string.IsNullOrEmpty(ns)) - return ns; + var afterMarker = indexName[(markerIndex + marker.Length)..]; + var dashIndex = afterMarker.IndexOf('-'); + var env = dashIndex > 0 ? afterMarker[..dashIndex] : afterMarker; + if (!string.IsNullOrEmpty(env) && (dashIndex < 0 || char.IsDigit(afterMarker[dashIndex + 1]))) + return env.ToLowerInvariant(); } } - var env = config["DOTNET_ENVIRONMENT"] + var envVar = config["DOTNET_ENVIRONMENT"] ?? config["ENVIRONMENT"]; - return !string.IsNullOrEmpty(env) ? env.ToLowerInvariant() : "dev"; + return !string.IsNullOrEmpty(envVar) ? envVar.ToLowerInvariant() : "dev"; } } diff --git a/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs b/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs index e07f0cbf0..f5101dec4 100644 --- a/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs +++ b/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs @@ -29,7 +29,7 @@ namespace Elastic.Documentation.Search; )] public static partial class DocumentationMappingContext; -public static class LexicalConfig +public static class LexicalConfig : IConfigureElasticsearch { public static AnalysisBuilder ConfigureAnalysis(AnalysisBuilder analysis) => analysis; diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs index 965ebd179..6a6a510dd 100644 --- a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs +++ b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs @@ -27,7 +27,7 @@ public partial class ElasticsearchMarkdownExporter : IMarkdownExporter, IDisposa private readonly ILogger _logger; private readonly ElasticsearchEndpoint _endpoint; private readonly DistributedTransport _transport; - private readonly string _indexNamespace; + private readonly string _buildType; // Ingest: orchestrator for dual-index mode private readonly IncrementalSyncOrchestrator _orchestrator; @@ -56,7 +56,7 @@ public ElasticsearchMarkdownExporter( ILoggerFactory logFactory, IDiagnosticsCollector collector, DocumentationEndpoints endpoints, - string indexNamespace, + string buildType, IDocumentationConfigurationContext context ) { @@ -64,7 +64,7 @@ IDocumentationConfigurationContext context _context = context; _logger = logFactory.CreateLogger(); _endpoint = endpoints.Elasticsearch; - _indexNamespace = indexNamespace; + _buildType = buildType; _versionsConfiguration = context.VersionsConfiguration; _synonyms = context.SearchConfiguration.Synonyms; _rules = context.SearchConfiguration.Rules; @@ -83,17 +83,13 @@ IDocumentationConfigurationContext context _fixedSynonymsHash = HashedBulkUpdate.CreateHash(string.Join(",", indexTimeSynonyms)); var aiPipeline = es.EnableAiEnrichment ? EnrichPolicyManager.PipelineName : null; - var synonymSetName = $"docs-{indexNamespace}"; - var ns = indexNamespace.ToLowerInvariant(); - var lexicalPrefix = es.IndexNamePrefix.Replace("semantic", "lexical").ToLowerInvariant(); - var lexicalAlias = $"{lexicalPrefix}-{ns}"; + var synonymSetName = $"docs-{buildType}"; var pipelineSettings = aiPipeline is not null ? new Dictionary { ["index.default_pipeline"] = aiPipeline } : null; - _lexicalTypeContext = DocumentationMappingContext.DocumentationDocument.Context - .WithIndexName(lexicalAlias) with + _lexicalTypeContext = DocumentationMappingContext.DocumentationDocument.CreateContext(type: buildType) with { ConfigureAnalysis = a => DocumentationAnalysisFactory.BuildAnalysis(a, synonymSetName, indexTimeSynonyms), IndexSettings = pipelineSettings @@ -107,9 +103,7 @@ IDocumentationConfigurationContext context _enrichPolicyManager = new EnrichPolicyManager(_transport, logFactory.CreateLogger(), _enrichmentCache.IndexName); } - var semanticAlias = $"{es.IndexNamePrefix.ToLowerInvariant()}-{ns}"; - _semanticTypeContext = DocumentationMappingContext.DocumentationDocumentSemantic.Context - .WithIndexName(semanticAlias) with + _semanticTypeContext = DocumentationMappingContext.DocumentationDocumentSemantic.CreateContext(type: buildType) with { ConfigureAnalysis = a => DocumentationAnalysisFactory.BuildAnalysis(a, synonymSetName, indexTimeSynonyms), IndexSettings = pipelineSettings @@ -237,7 +231,7 @@ private async ValueTask BackfillMissingAiFieldsAsync(string semanticAlias, Cance private async Task PublishSynonymsAsync(Cancel ctx) { - var setName = $"docs-{_indexNamespace}"; + var setName = $"docs-{_buildType}"; _logger.LogInformation("Publishing synonym set '{SetName}' to Elasticsearch", setName); var synonymRules = _synonyms.Aggregate(new List(), (acc, synonym) => @@ -275,7 +269,7 @@ private async Task PublishQueryRulesAsync(Cancel ctx) return; } - var rulesetName = $"docs-ruleset-{_indexNamespace}"; + var rulesetName = $"docs-ruleset-{_buildType}"; _logger.LogInformation("Publishing query ruleset '{RulesetName}' with {Count} rules to Elasticsearch", rulesetName, _rules.Count); var rulesetRules = _rules.Select(r => new QueryRulesetRule diff --git a/src/Elastic.Markdown/Exporters/ExporterExtensions.cs b/src/Elastic.Markdown/Exporters/ExporterExtensions.cs index cec7388f3..6deb2a8c0 100644 --- a/src/Elastic.Markdown/Exporters/ExporterExtensions.cs +++ b/src/Elastic.Markdown/Exporters/ExporterExtensions.cs @@ -15,7 +15,7 @@ public static IReadOnlyCollection CreateMarkdownExporters( this IReadOnlySet exportOptions, ILoggerFactory logFactory, IDocumentationConfigurationContext context, - string indexNamespace + string buildType ) { var markdownExporters = new List(4); @@ -24,7 +24,7 @@ string indexNamespace if (exportOptions.Contains(Exporter.Configuration)) markdownExporters.Add(new ConfigurationExporter(logFactory, context.ConfigurationFileProvider, context)); if (exportOptions.Contains(Exporter.Elasticsearch)) - markdownExporters.Add(new ElasticsearchMarkdownExporter(logFactory, context.Collector, context.Endpoints, indexNamespace, context)); + markdownExporters.Add(new ElasticsearchMarkdownExporter(logFactory, context.Collector, context.Endpoints, buildType, context)); return markdownExporters; } } diff --git a/src/api/Elastic.Documentation.Api.App/Program.cs b/src/api/Elastic.Documentation.Api.App/Program.cs index 9ee9ef56d..ff255c280 100644 --- a/src/api/Elastic.Documentation.Api.App/Program.cs +++ b/src/api/Elastic.Documentation.Api.App/Program.cs @@ -6,6 +6,7 @@ using Elastic.Documentation.Api.Infrastructure.OpenTelemetry; using Elastic.Documentation.Configuration; using Elastic.Documentation.Configuration.Assembler; +using Elastic.Documentation.Search; using Elastic.Documentation.ServiceDefaults; using Microsoft.AspNetCore.Diagnostics; using Microsoft.AspNetCore.Diagnostics.HealthChecks; @@ -86,7 +87,9 @@ static void LogElasticsearchConfiguration(WebApplication app, ILogger logger) if (endpoints is not null) { var endpoint = endpoints.Elasticsearch; - var searchIndex = $"{endpoint.IndexNamePrefix.ToLowerInvariant()}-{endpoints.Namespace}-latest"; + var searchIndex = DocumentationMappingContext.DocumentationDocumentSemantic + .CreateContext(type: "assembler") + .ResolveReadTarget(); logger.LogInformation( "Elasticsearch configuration - Url: {Url}, Namespace: {Namespace}, SearchIndex: {SearchIndex}", endpoint.Uri, diff --git a/src/api/Elastic.Documentation.Mcp.Remote/Program.cs b/src/api/Elastic.Documentation.Mcp.Remote/Program.cs index 129821347..41615c7c1 100644 --- a/src/api/Elastic.Documentation.Mcp.Remote/Program.cs +++ b/src/api/Elastic.Documentation.Mcp.Remote/Program.cs @@ -146,7 +146,9 @@ static void LogElasticsearchConfiguration(WebApplication app, ILogger logger) if (endpoints is not null) { var endpoint = endpoints.Elasticsearch; - var searchIndex = $"{endpoint.IndexNamePrefix.ToLowerInvariant()}-{endpoints.Namespace}-latest"; + var searchIndex = DocumentationMappingContext.DocumentationDocumentSemantic + .CreateContext(type: "assembler") + .ResolveReadTarget(); logger.LogInformation( "Elasticsearch configuration - Url: {Url}, Namespace: {Namespace}, SearchIndex: {SearchIndex}", endpoint.Uri, diff --git a/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuildService.cs b/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuildService.cs index bd0a6e760..941aa47c8 100644 --- a/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuildService.cs +++ b/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuildService.cs @@ -109,7 +109,7 @@ Cancel ctx var builder = new AssemblerBuilder(logFactory, assembleContext, navigation, htmlWriter, pathProvider, historyMapper); - await builder.BuildAllAsync(assembleContext.Environment, assembleSources.AssembleSets, exporters, ctx); + await builder.BuildAllAsync(assembleSources.AssembleSets, exporters, ctx); if (exporters.Contains(Exporter.LinkMetadata)) await cloner.WriteLinkRegistrySnapshot(checkoutResult.LinkRegistrySnapshot, ctx); diff --git a/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuilder.cs b/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuilder.cs index e60954294..74b037059 100644 --- a/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuilder.cs +++ b/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuilder.cs @@ -38,7 +38,7 @@ public class AssemblerBuilder( private ILegacyUrlMapper? LegacyUrlMapper { get; } = legacyUrlMapper; - public async Task BuildAllAsync(PublishEnvironment environment, FrozenDictionary assembleSets, IReadOnlySet exportOptions, Cancel ctx) + public async Task BuildAllAsync(FrozenDictionary assembleSets, IReadOnlySet exportOptions, Cancel ctx) { if (context.OutputDirectory.Exists) context.OutputDirectory.Delete(true); @@ -48,7 +48,7 @@ public async Task BuildAllAsync(PublishEnvironment environment, FrozenDictionary var buildTimes = new List<(string Name, int FileCount, TimeSpan Duration)>(); // Create exporters without inferrer - inferrer is created per-repository - var markdownExporters = exportOptions.CreateMarkdownExporters(logFactory, context, environment.Name); + var markdownExporters = exportOptions.CreateMarkdownExporters(logFactory, context, "assembler"); var tasks = markdownExporters.Select(async e => await e.StartAsync(ctx)); await Task.WhenAll(tasks); diff --git a/src/services/Elastic.Documentation.Search/Common/ElasticsearchClientAccessor.cs b/src/services/Elastic.Documentation.Search/Common/ElasticsearchClientAccessor.cs index 9ee24d757..7c26b7843 100644 --- a/src/services/Elastic.Documentation.Search/Common/ElasticsearchClientAccessor.cs +++ b/src/services/Elastic.Documentation.Search/Common/ElasticsearchClientAccessor.cs @@ -6,6 +6,7 @@ using Elastic.Clients.Elasticsearch.Serialization; using Elastic.Documentation.Configuration; using Elastic.Documentation.Configuration.Search; +using Elastic.Documentation.Search; using Elastic.Transport; namespace Elastic.Documentation.Search.Common; @@ -28,7 +29,8 @@ public class ElasticsearchClientAccessor : IDisposable public ElasticsearchClientAccessor( DocumentationEndpoints endpoints, - SearchConfiguration searchConfiguration) + SearchConfiguration searchConfiguration + ) { var endpoint = endpoints.Elasticsearch; Endpoint = endpoint; @@ -36,10 +38,12 @@ public ElasticsearchClientAccessor( SynonymBiDirectional = searchConfiguration.SynonymBiDirectional; DiminishTerms = searchConfiguration.DiminishTerms; - SearchIndex = $"{endpoint.IndexNamePrefix.ToLowerInvariant()}-{endpoints.Namespace}-latest"; + SearchIndex = DocumentationMappingContext.DocumentationDocumentSemantic + .CreateContext(type: "assembler") + .ResolveReadTarget(); RulesetName = searchConfiguration.Rules.Count > 0 - ? $"docs-ruleset-{endpoints.Namespace}" + ? "docs-ruleset-assembler" : null; _nodePool = new SingleNodePool(endpoint.Uri); diff --git a/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchBootstrapFixture.cs b/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchBootstrapFixture.cs index c7f0db5a8..cb9411ec5 100644 --- a/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchBootstrapFixture.cs +++ b/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchBootstrapFixture.cs @@ -152,12 +152,10 @@ private async ValueTask IsIndexingNeeded() var collector = new ConsoleDiagnosticsCollector(loggerFactory); // Create semantic type context to check channel hash (index namespace is 'dev' for tests) - var semanticTypeContext = DocumentationAnalysisFactory.CreateContext( - DocumentationMappingContext.DocumentationDocumentSemantic.Context, - $"{endpoint.IndexNamePrefix.ToLowerInvariant()}-dev", - "docs-dev", - [] - ); + var semanticTypeContext = DocumentationMappingContext.DocumentationDocumentSemantic.CreateContext(type: "assembler") with + { + ConfigureAnalysis = a => DocumentationAnalysisFactory.BuildAnalysis(a, "docs-assembler", []) + }; var options = new IngestChannelOptions(transport, semanticTypeContext); using var channel = new IngestChannel(options); From b190fd84941133f2293655bf4892f6cf186c8420 Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Tue, 24 Feb 2026 18:14:58 +0100 Subject: [PATCH 12/14] Remove `IndexNamePrefix` across configuration, services, and commands to simplify index naming logic. Update Elasticsearch dependencies to version 0.28.0. --- Directory.Packages.props | 4 +- PLAN-rules-config.md | 194 ------------------ .../DocumentationEndpoints.cs | 3 - .../ElasticsearchEndpointConfigurator.cs | 3 - .../Search/DocumentationMappingConfig.cs | 36 ++-- .../Indexing/AssemblerIndexService.cs | 3 - .../IsolatedIndexService.cs | 3 - .../Assembler/AssemblerIndexCommand.cs | 6 +- .../Commands/Codex/CodexIndexCommand.cs | 3 - .../docs-builder/Commands/IndexCommand.cs | 6 +- 10 files changed, 18 insertions(+), 243 deletions(-) delete mode 100644 PLAN-rules-config.md diff --git a/Directory.Packages.props b/Directory.Packages.props index 20f8117f1..5ca9831a0 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -48,8 +48,8 @@ - - + + diff --git a/PLAN-rules-config.md b/PLAN-rules-config.md deleted file mode 100644 index 94fc33bbd..000000000 --- a/PLAN-rules-config.md +++ /dev/null @@ -1,194 +0,0 @@ -# Improved Rules Configuration Format - -## Context - -The `block` section in `changelog.yml` is being redesigned and renamed to `rules:`. Goals: -1. Explicit matching semantics (`any` vs `all`) -2. Per-field include/exclude modes for types and areas -3. Product overrides nested under the section they affect -4. Clear, scannable log messages prefixed with `[+include]` / `[-exclude]` -5. No backward compat — error if old `block:` key is seen - -## YAML Format - -```yaml -rules: - # Global match default for multi-valued fields (labels, areas). - # any (default) = match if ANY item matches the list - # all = match only if ALL items match the list - # Inherited by create, publish, and all product overrides. - # match: any - - # Create — controls which PRs generate changelog entries. - # exclude: block PRs with these labels (comma-separated) - # include: only create changelogs for PRs with these labels - # Cannot specify both. - # - # create: - # exclude: ">non-issue, >test" - # # match: any - # products: - # 'elasticsearch, kibana': - # exclude: ">test" - # 'cloud-serverless': - # exclude: "ILM" - - # Publish — controls which entries appear in rendered output. - # exclude_types / include_types - # exclude_areas / include_areas - # Cannot mix exclude_ and include_ for the same field. - # - # match_areas inherits from rules.match if not specified. - # - # publish: - # # match_areas: any - # exclude_types: - # - deprecation - # - known-issue - # exclude_areas: - # - "Internal" - # products: - # 'elasticsearch, kibana': - # exclude_types: - # - docs - # 'cloud-serverless': - # # match_areas: any - # include_areas: - # - "Search" - # - "Monitoring" -``` - -### Match inheritance - -``` -rules.match (global default, "any" if omitted) - ├─ create.match → create.products.{id}.match - └─ publish.match_areas → publish.products.{id}.match_areas -``` - -### Area matching examples - -| Config | Entry areas: `["Search", "Internal"]` | Result | -|--------|--------------------------------------|--------| -| `exclude_areas: [Internal]`, match `any` | "Internal" matches | **Blocked** | -| `exclude_areas: [Internal]`, match `all` | Not all match | **Allowed** | -| `include_areas: [Search]`, match `any` | "Search" matches | **Allowed** | -| `include_areas: [Search]`, match `all` | "Internal" not in list | **Blocked** | - -## Error Messages - -### Validation (config parsing) - -| Condition | Message | -|-----------|---------| -| Old `block:` key found | `'block' is no longer supported. Rename to 'rules'. See changelog.example.yml.` | -| Both `exclude_types` + `include_types` | `rules.publish: cannot have both 'exclude_types' and 'include_types'. Use one or the other.` | -| Both `exclude_areas` + `include_areas` | Same pattern | -| Both `create.exclude` + `create.include` | `rules.create: cannot have both 'exclude' and 'include'. Use one or the other.` | -| Invalid match value | `rules.match: '{value}' is not valid. Use 'any' or 'all'.` | -| Empty list | `rules.publish.exclude_types: list is empty. Add types or remove the field.` | -| Unknown product | `rules.publish.products: '{id}' not in available products. Available: {list}` | - -### Runtime (create/publish time) - -Prefixed with `[-exclude]` or `[+include]` for scanning: - -**Create:** -- `[-exclude] PR #{n}: skipped, label '{label}' matches rules.create.exclude (match: {mode})` -- `[+include] PR #{n}: created, label '{label}' matches rules.create.include (match: {mode})` -- `[+include] PR #{n}: skipped, no labels match rules.create.include [{labels}] (match: {mode})` -- Product: `[-exclude] PR #{n} ({product}): skipped, label '{label}' matches rules.create.products.{product}.exclude` - -**Publish:** -- `[-exclude] PR #{n}: hidden, type '{type}' in rules.publish.exclude_types` -- `[+include] PR #{n}: hidden, type '{type}' not in rules.publish.include_types` -- `[-exclude] PR #{n}: hidden, area '{area}' in rules.publish.exclude_areas (match_areas: {mode})` -- `[-exclude] PR #{n}: hidden, all areas [{areas}] in rules.publish.exclude_areas (match_areas: all)` -- `[+include] PR #{n}: hidden, areas [{areas}] not in rules.publish.include_areas (match_areas: {mode})` -- Product: same patterns with `rules.publish.products.{product}.` prefix - -## Files to Modify - -### 1. Domain model — enums and PublishBlocker -**`src/Elastic.Documentation/ReleaseNotes/PublishBlocker.cs`** - -- Add `MatchMode` enum (`Any`, `All`) -- Add `FieldMode` enum (`Exclude`, `Include`) -- Add to `PublishBlocker`: `MatchAreas` (MatchMode), `TypesMode` (FieldMode), `AreasMode` (FieldMode) - -### 2. Domain model — rename and restructure BlockConfiguration -**`src/Elastic.Documentation.Configuration/Changelog/BlockConfiguration.cs`** - -Rename to `RulesConfiguration` (or new file). Structure: -- `RulesConfiguration`: `Match` (MatchMode), `Create` (CreateRules?), `Publish` (PublishRules?) -- `CreateRules`: `Labels` (list), `Mode` (FieldMode), `Match` (MatchMode?), `ByProduct` (dict) -- `PublishRules`: `PublishBlocker` fields + `ByProduct` (dict of product-specific `PublishBlocker`s) -- Delete old `ProductBlockers` record - -### 3. Core blocking logic -**`src/Elastic.Documentation/ReleaseNotes/PublishBlockerExtensions.cs`** - -- `MatchesType()`: type vs list -- `MatchesArea()`: any/all matching -- `ShouldBlock()`: per-field mode (`Exclude` + match → blocked; `Include` + no match → blocked) - -### 4. YAML DTO (CLI path) -**`src/services/Elastic.Changelog/Serialization/ChangelogConfigurationYaml.cs`** - -- Rename `BlockConfigurationYaml` → `RulesConfigurationYaml` -- New `CreateRulesYaml`: `Exclude`/`Include` (string), `Match` (string?), `Products` (dict) -- Update `PublishBlockerYaml`: `MatchAreas`, `ExcludeTypes`/`IncludeTypes`, `ExcludeAreas`/`IncludeAreas`, `Products` (dict) -- Remove old fields (`Types`, `Areas`, `Create` string, root `Product`) -- Update parent `ChangelogConfigurationYaml`: rename `Block` → `Rules` - -### 5. YAML DTO (minimal/inline path) -**`src/Elastic.Documentation.Configuration/ReleaseNotes/ReleaseNotesSerialization.cs`** - -Mirror changes for minimal DTOs. Rename `BlockConfigMinimalDto` → `RulesConfigMinimalDto`, etc. - -### 6. Configuration parsing + validation -**`src/services/Elastic.Changelog/Configuration/ChangelogConfigurationLoader.cs`** - -- Detect old `block:` key → emit error -- Parse `rules:` with new structure -- Validate mutual exclusivity, match values, empty lists -- Resolve match inheritance chain - -### 7. Create blocking logic -Find where create labels are checked and update for include/exclude + match + runtime messages. - -### 8. Rendering utilities -**`src/services/Elastic.Changelog/Rendering/ChangelogRenderUtilities.cs`** - -- Update for new `publish.products` structure -- Add `[-exclude]` / `[+include]` prefixed runtime log messages - -### 9. Example config -**`config/changelog.example.yml`** — replace `block:` section with `rules:`. - -### 10. All references to BlockConfiguration -Find and update all code referencing `BlockConfiguration`, `Block`, `ProductBlockers` to use new names. - -### 11. Tests - -**Unit tests** (`PublishBlockerExtensionsTests.cs`): -- All mode/match combinations (exclude×any, exclude×all, include×any, include×all) -- Mixed modes (exclude_types + include_areas) -- Match inheritance (global → section → product) - -**Integration tests** (`BlockConfigurationTests.cs`): -- New format end-to-end -- Validation error messages (mutual exclusivity, invalid match, old `block:` key) -- Product overrides under publish.products and create.products -- Create include/exclude + match -- Runtime message prefixes `[-exclude]` / `[+include]` - -## Verification - -1. New unit tests for all mode/match combinations -2. Integration tests with new config format -3. Validation error tests — verify all error messages -4. Old `block:` key → error test -5. YAML parsing on both CLI and minimal paths -6. Runtime messages at create and publish time with correct prefixes -7. Match inheritance chain works correctly diff --git a/src/Elastic.Documentation.Configuration/DocumentationEndpoints.cs b/src/Elastic.Documentation.Configuration/DocumentationEndpoints.cs index 3636c0478..0e6ee09ce 100644 --- a/src/Elastic.Documentation.Configuration/DocumentationEndpoints.cs +++ b/src/Elastic.Documentation.Configuration/DocumentationEndpoints.cs @@ -26,9 +26,6 @@ public class ElasticsearchEndpoint public int IndexNumThreads { get; set; } = 4; // Reduced for Serverless rate limits public bool NoElasticInferenceService { get; set; } - // index options - public string IndexNamePrefix { get; set; } = "semantic-docs"; - // channel buffer options public int BufferSize { get; set; } = 50; // Reduced for Serverless rate limits public int MaxRetries { get; set; } = 5; // Increased for 429 retries diff --git a/src/Elastic.Documentation.Configuration/ElasticsearchEndpointConfigurator.cs b/src/Elastic.Documentation.Configuration/ElasticsearchEndpointConfigurator.cs index 035fe559c..4bd1586c1 100644 --- a/src/Elastic.Documentation.Configuration/ElasticsearchEndpointConfigurator.cs +++ b/src/Elastic.Documentation.Configuration/ElasticsearchEndpointConfigurator.cs @@ -28,7 +28,6 @@ public record ElasticsearchIndexOptions public int? BootstrapTimeout { get; init; } // index options - public string? IndexNamePrefix { get; init; } public bool? ForceReindex { get; init; } // channel buffer options @@ -84,8 +83,6 @@ public static async Task ApplyAsync( cfg.IndexNumThreads = options.IndexNumThreads.Value; if (options.NoEis.HasValue) cfg.NoElasticInferenceService = options.NoEis.Value; - if (!string.IsNullOrEmpty(options.IndexNamePrefix)) - cfg.IndexNamePrefix = options.IndexNamePrefix; if (options.BufferSize.HasValue) cfg.BufferSize = options.BufferSize.Value; if (options.MaxRetries.HasValue) diff --git a/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs b/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs index f5101dec4..0e1acc17f 100644 --- a/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs +++ b/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs @@ -4,43 +4,34 @@ using Elastic.Mapping; using Elastic.Mapping.Analysis; +using Elastic.Mapping.Mappings; namespace Elastic.Documentation.Search; [ElasticsearchMappingContext] -[Entity( - Target = EntityTarget.Index, - Name = "docs-lexical", - WriteAlias = "docs-lexical", - ReadAlias = "docs-lexical", - SearchPattern = "docs-lexical-*", +[Index( + NameTemplate = "docs-{type}.lexical-{env}", DatePattern = "yyyy.MM.dd.HHmmss", Configuration = typeof(LexicalConfig) )] -[Entity( - Target = EntityTarget.Index, - Name = "docs-semantic", +[Index( + NameTemplate = "docs-{type}.semantic-{env}", Variant = "Semantic", - WriteAlias = "docs-semantic", - ReadAlias = "docs-semantic", - SearchPattern = "docs-semantic-*", DatePattern = "yyyy.MM.dd.HHmmss", Configuration = typeof(SemanticConfig) )] public static partial class DocumentationMappingContext; -public static class LexicalConfig : IConfigureElasticsearch +public class LexicalConfig : IConfigureElasticsearch { - public static AnalysisBuilder ConfigureAnalysis(AnalysisBuilder analysis) => analysis; - - public static DocumentationDocumentMappingsBuilder ConfigureMappings(DocumentationDocumentMappingsBuilder m) => - ConfigureCommonMappings(m) + public MappingsBuilder ConfigureMappings(MappingsBuilder mappings) => + ConfigureCommonMappings(mappings) .StrippedBody(f => f .Analyzer("synonyms_fixed_analyzer") .SearchAnalyzer("synonyms_analyzer") ); - internal static DocumentationDocumentMappingsBuilder ConfigureCommonMappings(DocumentationDocumentMappingsBuilder m) => m + internal static MappingsBuilder ConfigureCommonMappings(MappingsBuilder m) => m // Text fields with custom analyzers and multi-fields .SearchTitle(f => f .Analyzer("synonyms_fixed_analyzer") @@ -87,15 +78,13 @@ internal static DocumentationDocumentMappingsBuilder ConfigureCommonMappings(Doc .MultiField("keyword", mf => mf.Keyword())); } -public static class SemanticConfig +public class SemanticConfig : IConfigureElasticsearch { private const string ElserInferenceId = ".elser-2-elastic"; private const string JinaInferenceId = ".jina-embeddings-v5-text-small"; - public static AnalysisBuilder ConfigureAnalysis(AnalysisBuilder analysis) => analysis; - - public static DocumentationDocumentMappingsBuilder ConfigureMappings(DocumentationDocumentMappingsBuilder m) => - LexicalConfig.ConfigureCommonMappings(m) + public MappingsBuilder ConfigureMappings(MappingsBuilder mappings) => + LexicalConfig.ConfigureCommonMappings(mappings) .StrippedBody(s => s .Analyzer("synonyms_fixed_analyzer") .SearchAnalyzer("synonyms_analyzer") @@ -158,5 +147,4 @@ public static AnalysisBuilder BuildAnalysis(AnalysisBuilder analysis, string syn .TokenizeOnChars("whitespace", ",", ";", "?", "!", "(", ")", "&", "'", "\"", "/", "[", "]", "{", "}")) .Tokenizer("path_tokenizer", t => t.PathHierarchy() .Delimiter('/')); - } diff --git a/src/services/Elastic.Documentation.Assembler/Indexing/AssemblerIndexService.cs b/src/services/Elastic.Documentation.Assembler/Indexing/AssemblerIndexService.cs index cb84652c2..323129a40 100644 --- a/src/services/Elastic.Documentation.Assembler/Indexing/AssemblerIndexService.cs +++ b/src/services/Elastic.Documentation.Assembler/Indexing/AssemblerIndexService.cs @@ -37,7 +37,6 @@ ICoreService githubActionsService /// The number of index threads the inference endpoint should use. Defaults: 8 /// Do not use the Elastic Inference Service, bootstrap inference endpoint /// Timeout in minutes for the inference endpoint creation. Defaults: 4 - /// The prefix for the computed index/alias names. Defaults: semantic-docs /// Force reindex strategy to semantic index /// The number of documents to send to ES as part of the bulk. Defaults: 100 /// The number of times failed bulk items should be retried. Defaults: 3 @@ -65,7 +64,6 @@ public async Task Index(IDiagnosticsCollector collector, bool? noEis = null, int? bootstrapTimeout = null, // index options - string? indexNamePrefix = null, bool? forceReindex = null, // channel buffer options int? bufferSize = null, @@ -94,7 +92,6 @@ public async Task Index(IDiagnosticsCollector collector, IndexNumThreads = indexNumThreads, NoEis = noEis, BootstrapTimeout = bootstrapTimeout, - IndexNamePrefix = indexNamePrefix, ForceReindex = forceReindex, BufferSize = bufferSize, MaxRetries = maxRetries, diff --git a/src/services/Elastic.Documentation.Isolated/IsolatedIndexService.cs b/src/services/Elastic.Documentation.Isolated/IsolatedIndexService.cs index 9d996c8ed..19e060b1b 100644 --- a/src/services/Elastic.Documentation.Isolated/IsolatedIndexService.cs +++ b/src/services/Elastic.Documentation.Isolated/IsolatedIndexService.cs @@ -34,7 +34,6 @@ ICoreService githubActionsService /// The number of index threads the inference endpoint should use. Defaults: 8 /// Do not use the Elastic Inference Service, bootstrap inference endpoint /// Timeout in minutes for the inference endpoint creation. Defaults: 4 - /// The prefix for the computed index/alias names. Defaults: semantic-docs /// Force reindex strategy to semantic index /// The number of documents to send to ES as part of the bulk. Defaults: 100 /// The number of times failed bulk items should be retried. Defaults: 3 @@ -62,7 +61,6 @@ public async Task Index(IDiagnosticsCollector collector, bool? noEis = null, int? bootstrapTimeout = null, // index options - string? indexNamePrefix = null, bool? forceReindex = null, // channel buffer options int? bufferSize = null, @@ -91,7 +89,6 @@ public async Task Index(IDiagnosticsCollector collector, IndexNumThreads = indexNumThreads, NoEis = noEis, BootstrapTimeout = bootstrapTimeout, - IndexNamePrefix = indexNamePrefix, ForceReindex = forceReindex, BufferSize = bufferSize, MaxRetries = maxRetries, diff --git a/src/tooling/docs-builder/Commands/Assembler/AssemblerIndexCommand.cs b/src/tooling/docs-builder/Commands/Assembler/AssemblerIndexCommand.cs index ba3e8ebc9..df29d5666 100644 --- a/src/tooling/docs-builder/Commands/Assembler/AssemblerIndexCommand.cs +++ b/src/tooling/docs-builder/Commands/Assembler/AssemblerIndexCommand.cs @@ -34,7 +34,6 @@ ICoreService githubActionsService /// The number of search threads the inference endpoint should use. Defaults: 8 /// The number of index threads the inference endpoint should use. Defaults: 8 /// Do not use the Elastic Inference Service, bootstrap inference endpoint - /// The prefix for the computed index/alias names. Defaults: semantic-docs /// Force reindex strategy to semantic index /// Timeout in minutes for the inference endpoint creation. Defaults: 4 /// The number of documents to send to ES as part of the bulk. Defaults: 100 @@ -65,7 +64,6 @@ public async Task Index( int? bootstrapTimeout = null, // index options - string? indexNamePrefix = null, bool? forceReindex = null, // channel buffer options @@ -97,7 +95,7 @@ public async Task Index( // inference options enableAiEnrichment, indexNumThreads, searchNumThreads, noEis, bootstrapTimeout, // channel and connection options - indexNamePrefix, forceReindex, bufferSize, maxRetries, debugMode, + forceReindex, bufferSize, maxRetries, debugMode, // proxy options proxyAddress, proxyPassword, proxyUsername, // certificate options @@ -110,7 +108,7 @@ static async (s, collector, state, ctx) => await s.Index(collector, state.fs, // inference options state.enableAiEnrichment, state.searchNumThreads, state.indexNumThreads, state.noEis, state.bootstrapTimeout, // channel and connection options - state.indexNamePrefix, state.forceReindex, state.bufferSize, state.maxRetries, state.debugMode, + state.forceReindex, state.bufferSize, state.maxRetries, state.debugMode, // proxy options state.proxyAddress, state.proxyPassword, state.proxyUsername, // certificate options diff --git a/src/tooling/docs-builder/Commands/Codex/CodexIndexCommand.cs b/src/tooling/docs-builder/Commands/Codex/CodexIndexCommand.cs index cefabf0d5..ae11fbb96 100644 --- a/src/tooling/docs-builder/Commands/Codex/CodexIndexCommand.cs +++ b/src/tooling/docs-builder/Commands/Codex/CodexIndexCommand.cs @@ -40,7 +40,6 @@ ICoreService githubActionsService /// The number of search threads the inference endpoint should use. Defaults: 8 /// The number of index threads the inference endpoint should use. Defaults: 8 /// Do not use the Elastic Inference Service, bootstrap inference endpoint - /// The prefix for the computed index/alias names. Defaults: semantic-docs /// Force reindex strategy to semantic index /// Timeout in minutes for the inference endpoint creation. Defaults: 4 /// The number of documents to send to ES as part of the bulk. Defaults: 100 @@ -71,7 +70,6 @@ public async Task Index( int? bootstrapTimeout = null, // index options - string? indexNamePrefix = null, bool? forceReindex = null, // channel buffer options @@ -137,7 +135,6 @@ public async Task Index( IndexNumThreads = indexNumThreads, NoEis = noEis, BootstrapTimeout = bootstrapTimeout, - IndexNamePrefix = indexNamePrefix, ForceReindex = forceReindex, BufferSize = bufferSize, MaxRetries = maxRetries, diff --git a/src/tooling/docs-builder/Commands/IndexCommand.cs b/src/tooling/docs-builder/Commands/IndexCommand.cs index ad34f4eaf..ff402ce16 100644 --- a/src/tooling/docs-builder/Commands/IndexCommand.cs +++ b/src/tooling/docs-builder/Commands/IndexCommand.cs @@ -31,7 +31,6 @@ ICoreService githubActionsService /// Enable AI enrichment of documents using LLM-generated metadata /// The number of search threads the inference endpoint should use. Defaults: 8 /// The number of index threads the inference endpoint should use. Defaults: 8 - /// The prefix for the computed index/alias names. Defaults: semantic-docs /// Do not use the Elastic Inference Service, bootstrap inference endpoint /// Force reindex strategy to semantic index /// Timeout in minutes for the inference endpoint creation. Defaults: 4 @@ -63,7 +62,6 @@ public async Task Index( int? bootstrapTimeout = null, // index options - string? indexNamePrefix = null, bool? forceReindex = null, // channel buffer options @@ -95,7 +93,7 @@ public async Task Index( // inference options enableAiEnrichment, indexNumThreads, noEis, searchNumThreads, bootstrapTimeout, // channel and connection options - indexNamePrefix, forceReindex, bufferSize, maxRetries, debugMode, + forceReindex, bufferSize, maxRetries, debugMode, // proxy options proxyAddress, proxyPassword, proxyUsername, // certificate options @@ -108,7 +106,7 @@ static async (s, collector, state, ctx) => await s.Index(collector, state.fs, st // inference options state.enableAiEnrichment, state.searchNumThreads, state.indexNumThreads, state.noEis, state.bootstrapTimeout, // channel and connection options - state.indexNamePrefix, state.forceReindex, state.bufferSize, state.maxRetries, state.debugMode, + state.forceReindex, state.bufferSize, state.maxRetries, state.debugMode, // proxy options state.proxyAddress, state.proxyPassword, state.proxyUsername, // certificate options From 40b3241a97f1fc133445845df94eb129e18ec234 Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Tue, 24 Feb 2026 19:18:40 +0100 Subject: [PATCH 13/14] Bump ingest libraries --- Directory.Packages.props | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Directory.Packages.props b/Directory.Packages.props index 5ca9831a0..39eff5fef 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -48,8 +48,8 @@ - - + + From a19a4cff59b58fa58e00b84cae296d583169be92 Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Tue, 24 Feb 2026 19:20:57 +0100 Subject: [PATCH 14/14] Bump ingest libraries --- Directory.Packages.props | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Directory.Packages.props b/Directory.Packages.props index 39eff5fef..87f4bd481 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -48,8 +48,8 @@ - - + +