From 6e47ba019c341aea1f014fa0b43a65dd98275ec6 Mon Sep 17 00:00:00 2001 From: Ben King <9087625+benfdking@users.noreply.github.com> Date: Tue, 29 Jul 2025 12:47:08 +0100 Subject: [PATCH 1/2] feat(vscode): adding ability to update columns --- pnpm-lock.yaml | 61 +++++++++++++ sqlmesh/core/schema_loader.py | 32 ++++--- sqlmesh/lsp/commands.py | 1 + sqlmesh/lsp/context.py | 108 +++++++++++++++++++++++- sqlmesh/lsp/main.py | 50 +++++++++++ sqlmesh/utils/lineage.py | 28 +++++- vscode/extension/package.json | 1 + vscode/extension/tests/commands.spec.ts | 96 +++++++++++++++++++++ 8 files changed, 358 insertions(+), 19 deletions(-) create mode 100644 sqlmesh/lsp/commands.py create mode 100644 vscode/extension/tests/commands.spec.ts diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index a94b230fc1..3d0a5449f2 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -20,6 +20,9 @@ importers: vscode/extension: dependencies: + '@duckdb/node-api': + specifier: 1.3.2-alpha.25 + version: 1.3.2-alpha.25 '@types/fs-extra': specifier: ^11.0.4 version: 11.0.4 @@ -692,6 +695,37 @@ packages: resolution: {integrity: sha512-Vd/9EVDiu6PPJt9yAh6roZP6El1xHrdvIVGjyBsHR0RYwNHgL7FJPyIIW4fANJNG6FtyZfvlRPpFI4ZM/lubvw==} engines: {node: '>=18'} + '@duckdb/node-api@1.3.2-alpha.25': + resolution: {integrity: sha512-AzDyyjTtnYUxoy/MHDFRwfOggDOkS8RBgGA82OI6nla8B9NDNZeAYJ97T3PvCL8cx7y00EtGVN3g03aoW4fRmw==} + + '@duckdb/node-bindings-darwin-arm64@1.3.2-alpha.25': + resolution: {integrity: sha512-vRjzNgkz2TAYW5c2rzPwcHBctBWr0lxQ4blFASAv0DdeGPOeuCMXJUA3982X7iPNwAppH0VMII6cYzON0GA+RA==} + cpu: [arm64] + os: [darwin] + + '@duckdb/node-bindings-darwin-x64@1.3.2-alpha.25': + resolution: {integrity: sha512-BSg/DZjT25QZe87+pmdMfE1XlHdi2WxtAO+F2PEXN6VnPeLyTdl5bYlnhOGrDKquKDmUEqok5OwF7mR4QfU+Aw==} + cpu: [x64] + os: [darwin] + + '@duckdb/node-bindings-linux-arm64@1.3.2-alpha.25': + resolution: {integrity: sha512-VhjUH/AvolZWDX/URqiIh58JbAB1vYbDgSmQ0wvqhS9jzJ9Sj88urGDw+XWXw49Rr4BhIgDtX70SoARhO2i/Gg==} + cpu: [arm64] + os: [linux] + + '@duckdb/node-bindings-linux-x64@1.3.2-alpha.25': + resolution: {integrity: sha512-raav2ypBiV4TlpnKU9hocsuFDO4ipwIcQQmkMIh20/Qd9vkv35QcQYNqStiZVJh2LAaVoQffNvcKMlclblYqUQ==} + cpu: [x64] + os: [linux] + + '@duckdb/node-bindings-win32-x64@1.3.2-alpha.25': + resolution: {integrity: sha512-/fAKax+xYkdRhkUl3PkL3HfFd1ZsezG1yiOkL0StHBdD3xB80Njm1JGHxx1fO3WWE5XTbE1MTJ5I0xjEzPwsfQ==} + cpu: [x64] + os: [win32] + + '@duckdb/node-bindings@1.3.2-alpha.25': + resolution: {integrity: sha512-FkoSaoeRAi6Em0hs0qzr3SN04ykN99R+Qap5kLwhi6GNPnHzWMU1VrNpK9cE4eBj0n+RWlNK0TiO712dn44QzQ==} + '@esbuild/aix-ppc64@0.25.8': resolution: {integrity: sha512-urAvrUedIqEiFR3FYSLTWQgLu5tb+m0qZw0NBEasUeo6wuqatkMDaRT+1uABiGXEu5vqgPd7FGE1BhsAIy9QVA==} engines: {node: '>=18'} @@ -6674,6 +6708,33 @@ snapshots: '@csstools/css-tokenizer@3.0.4': {} + '@duckdb/node-api@1.3.2-alpha.25': + dependencies: + '@duckdb/node-bindings': 1.3.2-alpha.25 + + '@duckdb/node-bindings-darwin-arm64@1.3.2-alpha.25': + optional: true + + '@duckdb/node-bindings-darwin-x64@1.3.2-alpha.25': + optional: true + + '@duckdb/node-bindings-linux-arm64@1.3.2-alpha.25': + optional: true + + '@duckdb/node-bindings-linux-x64@1.3.2-alpha.25': + optional: true + + '@duckdb/node-bindings-win32-x64@1.3.2-alpha.25': + optional: true + + '@duckdb/node-bindings@1.3.2-alpha.25': + optionalDependencies: + '@duckdb/node-bindings-darwin-arm64': 1.3.2-alpha.25 + '@duckdb/node-bindings-darwin-x64': 1.3.2-alpha.25 + '@duckdb/node-bindings-linux-arm64': 1.3.2-alpha.25 + '@duckdb/node-bindings-linux-x64': 1.3.2-alpha.25 + '@duckdb/node-bindings-win32-x64': 1.3.2-alpha.25 + '@esbuild/aix-ppc64@0.25.8': optional: true diff --git a/sqlmesh/core/schema_loader.py b/sqlmesh/core/schema_loader.py index 8df5164a8a..52ab807c78 100644 --- a/sqlmesh/core/schema_loader.py +++ b/sqlmesh/core/schema_loader.py @@ -57,28 +57,17 @@ def create_external_models_file( external_model_fqns -= existing_model_fqns with ThreadPoolExecutor(max_workers=max_workers) as pool: - - def _get_columns(table: str) -> t.Optional[t.Dict[str, t.Any]]: - try: - return adapter.columns(table, include_pseudo_columns=True) - except Exception as e: - msg = f"Unable to get schema for '{table}': '{e}'." - if strict: - raise SQLMeshError(msg) from e - get_console().log_warning(msg) - return None - gateway_part = {"gateway": gateway} if gateway else {} schemas = [ { "name": exp.to_table(table).sql(dialect=dialect), - "columns": {c: dtype.sql(dialect=dialect) for c, dtype in columns.items()}, + "columns": columns, **gateway_part, } for table, columns in sorted( pool.map( - lambda table: (table, _get_columns(table)), + lambda table: (table, get_columns(adapter, dialect, table, strict)), external_model_fqns, ) ) @@ -94,3 +83,20 @@ def _get_columns(table: str) -> t.Optional[t.Dict[str, t.Any]]: with open(path, "w", encoding="utf-8") as file: yaml.dump(entries_to_keep + schemas, file) + + +def get_columns( + adapter: EngineAdapter, dialect: DialectType, table: str, strict: bool +) -> t.Optional[t.Dict[str, t.Any]]: + """ + Return the column and their types in a dictionary + """ + try: + columns = adapter.columns(table, include_pseudo_columns=True) + return {c: dtype.sql(dialect=dialect) for c, dtype in columns.items()} + except Exception as e: + msg = f"Unable to get schema for '{table}': '{e}'." + if strict: + raise SQLMeshError(msg) from e + get_console().log_warning(msg) + return None diff --git a/sqlmesh/lsp/commands.py b/sqlmesh/lsp/commands.py new file mode 100644 index 0000000000..bea81f898a --- /dev/null +++ b/sqlmesh/lsp/commands.py @@ -0,0 +1 @@ +EXTERNAL_MODEL_UPDATE_COLUMNS = "sqlmesh.external_model_update_columns" diff --git a/sqlmesh/lsp/context.py b/sqlmesh/lsp/context.py index 43eb9c8f16..934a015bdd 100644 --- a/sqlmesh/lsp/context.py +++ b/sqlmesh/lsp/context.py @@ -1,16 +1,21 @@ from dataclasses import dataclass from pathlib import Path +from pygls.server import LanguageServer from sqlmesh.core.context import Context import typing as t - from sqlmesh.core.linter.rule import Range -from sqlmesh.core.model.definition import SqlModel +from sqlmesh.core.model.definition import SqlModel, ExternalModel from sqlmesh.core.linter.definition import AnnotatedRuleViolation +from sqlmesh.core.schema_loader import get_columns +from sqlmesh.lsp.commands import EXTERNAL_MODEL_UPDATE_COLUMNS from sqlmesh.lsp.custom import ModelForRendering, TestEntry, RunTestResponse from sqlmesh.lsp.custom import AllModelsResponse, RenderModelEntry from sqlmesh.lsp.tests_ranges import get_test_ranges +from sqlmesh.lsp.helpers import to_lsp_range from sqlmesh.lsp.uri import URI from lsprotocol import types +from sqlmesh.utils import yaml +from sqlmesh.utils.lineage import get_yaml_model_name_ranges @dataclass @@ -298,6 +303,36 @@ def get_code_actions( return code_actions if code_actions else None + def get_code_lenses(self, uri: URI) -> t.Optional[t.List[types.CodeLens]]: + models_in_file = self.map.get(uri.to_path()) + if isinstance(models_in_file, ModelTarget): + models = [self.context.get_model(model) for model in models_in_file.names] + if any(isinstance(model, ExternalModel) for model in models): + code_lenses = self._get_external_model_code_lenses(uri) + if code_lenses: + return code_lenses + + return None + + def _get_external_model_code_lenses(self, uri: URI) -> t.List[types.CodeLens]: + """Get code lenses for external models YAML files.""" + ranges = get_yaml_model_name_ranges(uri.to_path()) + if ranges is None: + return [] + return [ + types.CodeLens( + range=to_lsp_range(range), + command=types.Command( + title="Update Columns", + command=EXTERNAL_MODEL_UPDATE_COLUMNS, + arguments=[ + name, + ], + ), + ) + for name, range in ranges.items() + ] + def list_of_models_for_rendering(self) -> t.List[ModelForRendering]: """Get a list of models for rendering. @@ -399,3 +434,72 @@ def diagnostic_to_lsp_diagnostic( code=diagnostic.rule.name, code_description=types.CodeDescription(href=rule_uri), ) + + def update_external_model_columns(self, ls: LanguageServer, uri: URI, model_name: str) -> bool: + """ + Update the columns for an external model in the YAML file. Returns True if changed, False if didn't because + of the columns already being up to date. + + Errors still throw exceptions to be handled by the caller. + """ + models = yaml.load(uri.to_path()) + if not isinstance(models, list): + raise ValueError( + f"Expected a list of models in {uri.to_path()}, but got {type(models).__name__}" + ) + + existing_model = next((model for model in models if model.get("name") == model_name), None) + if existing_model is None: + raise ValueError(f"Could not find model {model_name} in {uri.to_path()}") + + existing_model_columns = existing_model.get("columns") + + # Get the adapter and fetch columns + adapter = self.context.engine_adapter + # Get columns for the model + new_columns = get_columns( + adapter=adapter, + dialect=self.context.config.model_defaults.dialect, + table=model_name, + strict=True, + ) + # Compare existing columns and matching types and if they are the same, do not update + if existing_model_columns is not None: + if existing_model_columns == new_columns: + return False + + # Model index to update + model_index = next( + (i for i, model in enumerate(models) if model.get("name") == model_name), None + ) + if model_index is None: + raise ValueError(f"Could not find model {model_name} in {uri.to_path()}") + + # Get end of the file to set the edit range + with open(uri.to_path(), "r", encoding="utf-8") as file: + read_file = file.read() + + end_line = read_file.count("\n") + end_character = len(read_file.splitlines()[-1]) if end_line > 0 else 0 + + models[model_index]["columns"] = new_columns + edit = types.TextDocumentEdit( + text_document=types.OptionalVersionedTextDocumentIdentifier( + uri=uri.value, + version=None, + ), + edits=[ + types.TextEdit( + range=types.Range( + start=types.Position(line=0, character=0), + end=types.Position( + line=end_line, + character=end_character, + ), + ), + new_text=yaml.dump(models), + ) + ], + ) + ls.apply_edit(types.WorkspaceEdit(document_changes=[edit])) + return True diff --git a/sqlmesh/lsp/main.py b/sqlmesh/lsp/main.py index 1f53e71861..13e4c5d8f0 100755 --- a/sqlmesh/lsp/main.py +++ b/sqlmesh/lsp/main.py @@ -24,6 +24,7 @@ ApiResponseGetModels, ) +from sqlmesh.lsp.commands import EXTERNAL_MODEL_UPDATE_COLUMNS from sqlmesh.lsp.completions import get_sql_completions from sqlmesh.lsp.context import ( LSPContext, @@ -368,6 +369,44 @@ def function_call(ls: LanguageServer, params: t.Any) -> t.Dict[str, t.Any]: self.server.feature(name)(create_function_call(method)) + @self.server.command(EXTERNAL_MODEL_UPDATE_COLUMNS) + def command_external_models_update_columns(ls: LanguageServer, raw: t.Any) -> None: + try: + if not isinstance(raw, list): + raise ValueError("Invalid command parameters") + if len(raw) != 1: + raise ValueError("Command expects exactly one parameter") + model_name = raw[0] + if not isinstance(model_name, str): + raise ValueError("Command parameter must be a string") + + context = self._context_get_or_load() + if not isinstance(context, LSPContext): + raise ValueError("Context is not loaded or invalid") + model = context.context.get_model(model_name) + if model is None: + raise ValueError(f"External model '{model_name}' not found") + if model._path is None: + raise ValueError(f"External model '{model_name}' does not have a file path") + uri = URI.from_path(model._path) + updated = context.update_external_model_columns( + ls=ls, + uri=uri, + model_name=model_name, + ) + if updated: + ls.show_message( + f"Updated columns for '{model_name}'", + types.MessageType.Info, + ) + else: + ls.show_message( + f"Columns for '{model_name}' are already up to date", + ) + except Exception as e: + ls.show_message(f"Error executing command: {e}", types.MessageType.Error) + return None + @self.server.feature(types.INITIALIZE) def initialize(ls: LanguageServer, params: types.InitializeParams) -> None: """Initialize the server when the client connects.""" @@ -750,6 +789,17 @@ def code_action( ls.log_trace(f"Error getting code actions: {e}") return None + @self.server.feature(types.TEXT_DOCUMENT_CODE_LENS) + def code_lens(ls: LanguageServer, params: types.CodeLensParams) -> t.List[types.CodeLens]: + try: + uri = URI(params.text_document.uri) + context = self._context_get_or_load(uri) + code_lenses = context.get_code_lenses(uri) + return code_lenses if code_lenses else [] + except Exception as e: + ls.log_trace(f"Error getting code lenses: {e}") + return [] + @self.server.feature( types.TEXT_DOCUMENT_COMPLETION, types.CompletionOptions(trigger_characters=["@"]), # advertise "@" for macros diff --git a/sqlmesh/utils/lineage.py b/sqlmesh/utils/lineage.py index 8fcb92f56b..f5b4506c68 100644 --- a/sqlmesh/utils/lineage.py +++ b/sqlmesh/utils/lineage.py @@ -387,6 +387,22 @@ def _get_yaml_model_range(path: Path, model_name: str) -> t.Optional[Range]: Returns: The Range of the model block in the YAML file, or None if not found """ + model_name_ranges = get_yaml_model_name_ranges(path) + if model_name_ranges is None: + return None + return model_name_ranges.get(model_name, None) + + +def get_yaml_model_name_ranges(path: Path) -> t.Optional[t.Dict[str, Range]]: + """ + Get the ranges of all model names in a YAML file. + + Args: + path: Path to the YAML file + + Returns: + A dictionary mapping model names to their ranges in the YAML file. + """ yaml = YAML() with path.open("r", encoding="utf-8") as f: data = yaml.load(f) @@ -394,11 +410,15 @@ def _get_yaml_model_range(path: Path, model_name: str) -> t.Optional[Range]: if not isinstance(data, list): return None + model_name_ranges = {} for item in data: - if isinstance(item, dict) and item.get("name") == model_name: - # Get size of block by taking the earliest line/col in the items block and the last line/col of the block + if isinstance(item, dict): position_data = item.lc.data["name"] # type: ignore start = Position(line=position_data[2], character=position_data[3]) end = Position(line=position_data[2], character=position_data[3] + len(item["name"])) - return Range(start=start, end=end) - return None + name = item.get("name") + if not name: + continue + model_name_ranges[name] = Range(start=start, end=end) + + return model_name_ranges diff --git a/vscode/extension/package.json b/vscode/extension/package.json index 1db45abcf4..a0d853a0a9 100644 --- a/vscode/extension/package.json +++ b/vscode/extension/package.json @@ -134,6 +134,7 @@ "package": "rm -rf ./src_react && mkdir -p ./src_react && cd ../react && pnpm run build && cd ../extension && cp -r ../react/dist/* ./src_react && pnpm run check-types && node esbuild.js --production" }, "dependencies": { + "@duckdb/node-api": "1.3.2-alpha.25", "@types/fs-extra": "^11.0.4", "@vscode/python-extension": "^1.0.5", "fs-extra": "^11.3.0", diff --git a/vscode/extension/tests/commands.spec.ts b/vscode/extension/tests/commands.spec.ts new file mode 100644 index 0000000000..afd926310c --- /dev/null +++ b/vscode/extension/tests/commands.spec.ts @@ -0,0 +1,96 @@ +import { test, expect } from './fixtures' +import path from 'path' +import fs from 'fs-extra' +import os from 'os' +import { + openServerPage, + saveFile, + SUSHI_SOURCE_PATH, + waitForLoadedSQLMesh, +} from './utils' +import { createPythonInterpreterSettingsSpecifier } from './utils_code_server' +import { DuckDBInstance } from '@duckdb/node-api' + +test.describe('Update external models columns', () => { + test('New external model', async ({ page, sharedCodeServer }) => { + // Normal setting up + const tempDir = await fs.mkdtemp( + path.join(os.tmpdir(), 'vscode-test-sushi-'), + ) + await fs.copy(SUSHI_SOURCE_PATH, tempDir) + await createPythonInterpreterSettingsSpecifier(tempDir) + + // Changing the config to set the default gateway to use the fixed one. + const configPath = path.join(tempDir, 'config.py') + const configContent = await fs.readFile(configPath, 'utf8') + const original = `default_gateway="duckdb",` + expect(configContent).toContain(original) + const target = `default_gateway="duckdb_persistent",` + const updatedConfigContent = configContent.replace(original, target) + expect(updatedConfigContent).toContain(target) + await fs.writeFile(configPath, updatedConfigContent) + + // Create an additional table in the database + const table = 'raw.test_table' + const databasePath = path.join(tempDir, 'data', 'duckdb.db') + const instance = await DuckDBInstance.create(databasePath) + const connection = await instance.connect() + await connection.run(`CREATE SCHEMA IF NOT EXISTS raw`) + await connection.run( + `CREATE TABLE IF NOT EXISTS ${table}( + id INTEGER, + value VARCHAR + )`, + ) + connection.closeSync() + instance.closeSync() + expect(fs.existsSync(databasePath)).toBe(true) + + // Update the external_models in the config to include the new table but + // not the columns by appending '- name: ${table}' to the external_models.yaml file + const externalModelsPath = path.join(tempDir, 'external_models.yaml') + const externalModelsContent = await fs.readFile(externalModelsPath, 'utf8') + const newExternalModel = `- name: ${table}` + const updatedExternalModelsContent = `${externalModelsContent}\n${newExternalModel}` + await fs.writeFile(externalModelsPath, updatedExternalModelsContent) + + // Open the server page + await openServerPage(page, tempDir, sharedCodeServer) + + // Wait for the models folder to be visible + await page.waitForSelector('text=models') + + // Click on the models folder, excluding external_models + await page + .getByRole('treeitem', { name: 'external_models.yaml', exact: true }) + .locator('a') + .click() + + await waitForLoadedSQLMesh(page) + + // Click the update columns button + await page.waitForSelector('text=Update Columns') + const updateColumnButtons = page.getByRole('button', { + name: 'Update Columns', + exact: true, + }) + // Click each one of them + for (const button of await updateColumnButtons.all()) { + await button.click() + await page.waitForTimeout(1_000) // Wait for the action to complete + } + + await page.waitForTimeout(1_000) + await saveFile(page) + await page.waitForTimeout(1_000) + + // Check the file contains the columns + const updatedExternalModelsContentAfterUpdate = await fs.readFile( + externalModelsPath, + 'utf8', + ) + expect(updatedExternalModelsContentAfterUpdate).toContain( + `- name: ${table}\n columns:\n id: INT\n value: TEXT`, + ) + }) +}) From 7c1fabe58476c33fa8e873df06d3055d24ba0f37 Mon Sep 17 00:00:00 2001 From: Ben King <9087625+benfdking@users.noreply.github.com> Date: Tue, 29 Jul 2025 15:20:57 +0100 Subject: [PATCH 2/2] add documenttion --- sqlmesh/lsp/context.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sqlmesh/lsp/context.py b/sqlmesh/lsp/context.py index 934a015bdd..c3026da2e6 100644 --- a/sqlmesh/lsp/context.py +++ b/sqlmesh/lsp/context.py @@ -440,6 +440,8 @@ def update_external_model_columns(self, ls: LanguageServer, uri: URI, model_name Update the columns for an external model in the YAML file. Returns True if changed, False if didn't because of the columns already being up to date. + In this case, the model name is the name of the external model as is defined in the YAML file, not any other version of it. + Errors still throw exceptions to be handled by the caller. """ models = yaml.load(uri.to_path())