Skip to content

Commit a74905c

Browse files
author
Naoya Kanai
authored
Fix MotherDuck multi-catalog configs (#4001)
1 parent ae5ae2c commit a74905c

File tree

2 files changed

+66
-32
lines changed

2 files changed

+66
-32
lines changed

sqlmesh/core/config/connection.py

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
# Nullable types are problematic
4242
"clickhouse",
4343
}
44-
MOTHERDUCK_TOKEN_REGEX = re.compile(r"(\?motherduck_token=)(\S*)")
44+
MOTHERDUCK_TOKEN_REGEX = re.compile(r"(\?|\&)(motherduck_token=)(\S*)")
4545

4646

4747
class ConnectionConfig(abc.ABC, BaseConfig):
@@ -134,7 +134,6 @@ class DuckDBAttachOptions(BaseConfig):
134134
type: str
135135
path: str
136136
read_only: bool = False
137-
token: t.Optional[str] = None
138137

139138
def to_sql(self, alias: str) -> str:
140139
options = []
@@ -144,14 +143,18 @@ def to_sql(self, alias: str) -> str:
144143
options.append(f"TYPE {self.type.upper()}")
145144
if self.read_only:
146145
options.append("READ_ONLY")
146+
options_sql = f" ({', '.join(options)})" if options else ""
147+
alias_sql = ""
147148
# TODO: Add support for Postgres schema. Currently adding it blocks access to the information_schema
148-
alias_sql = (
149+
if self.type == "motherduck":
149150
# MotherDuck does not support aliasing
150-
f" AS {alias}" if not (self.type == "motherduck" or self.path.startswith("md:")) else ""
151-
)
152-
options_sql = f" ({', '.join(options)})" if options else ""
153-
token_sql = "?motherduck_token=" + self.token if self.token else ""
154-
return f"ATTACH '{self.path}{token_sql}'{alias_sql}{options_sql}"
151+
if (md_db := self.path.replace("md:", "")) != alias.replace('"', ""):
152+
raise ConfigError(
153+
f"MotherDuck does not support assigning an alias different from the database name {md_db}."
154+
)
155+
else:
156+
alias_sql += f" AS {alias}"
157+
return f"ATTACH '{self.path}'{alias_sql}{options_sql}"
155158

156159

157160
class BaseDuckDBConnectionConfig(ConnectionConfig):
@@ -293,16 +296,20 @@ def init(cursor: duckdb.DuckDBPyConnection) -> None:
293296
query = f"ATTACH '{path_options}'"
294297
if not path_options.startswith("md:"):
295298
query += f" AS {alias}"
296-
elif self.token:
297-
query += f"?motherduck_token={self.token}"
298299
cursor.execute(query)
299300
except BinderException as e:
300301
# If a user tries to create a catalog pointing at `:memory:` and with the name `memory`
301302
# then we don't want to raise since this happens by default. They are just doing this to
302303
# set it as the default catalog.
303-
if not (
304-
'database with name "memory" already exists' in str(e)
305-
and path_options == ":memory:"
304+
# If a user tried to attach a MotherDuck database/share which has already by attached via
305+
# `ATTACH 'md:'`, then we don't want to raise since this is expected.
306+
if (
307+
not (
308+
'database with name "memory" already exists' in str(e)
309+
and path_options == ":memory:"
310+
)
311+
and f"""database with name "{path_options.path.replace('md:', '')}" already exists"""
312+
not in str(e)
306313
):
307314
raise e
308315
if i == 0 and not getattr(self, "database", None):
@@ -355,7 +362,9 @@ def get_catalog(self) -> t.Optional[str]:
355362
return None
356363

357364
def _mask_motherduck_token(self, string: str) -> str:
358-
return MOTHERDUCK_TOKEN_REGEX.sub(lambda m: f"{m.group(1)}{'*' * len(m.group(2))}", string)
365+
return MOTHERDUCK_TOKEN_REGEX.sub(
366+
lambda m: f"{m.group(1)}{m.group(2)}{'*' * len(m.group(3))}", string
367+
)
359368

360369

361370
class MotherDuckConnectionConfig(BaseDuckDBConnectionConfig):
@@ -373,11 +382,12 @@ def _static_connection_kwargs(self) -> t.Dict[str, t.Any]:
373382
from sqlmesh import __version__
374383

375384
custom_user_agent_config = {"custom_user_agent": f"SQLMesh/{__version__}"}
376-
if not self.database:
377-
return {"config": custom_user_agent_config}
378-
connection_str = f"md:{self.database or ''}"
385+
connection_str = "md:"
386+
if self.database:
387+
# Attach single MD database instead of all databases on the account
388+
connection_str += f"{self.database}?attach_mode=single"
379389
if self.token:
380-
connection_str += f"?motherduck_token={self.token}"
390+
connection_str += f"{'&' if self.database else '?'}motherduck_token={self.token}"
381391
return {"database": connection_str, "config": custom_user_agent_config}
382392

383393

tests/core/test_connection_config.py

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -647,42 +647,66 @@ def _thread_connection():
647647

648648

649649
def test_motherduck_token_mask(make_config):
650-
config = make_config(
650+
config_1 = make_config(
651+
type="motherduck",
652+
token="short",
653+
database="whodunnit",
654+
)
655+
config_2 = make_config(
656+
type="motherduck",
657+
token="longtoken123456789",
658+
database="whodunnit",
659+
)
660+
config_3 = make_config(
651661
type="motherduck",
662+
token="secret1235",
652663
catalogs={
653-
"test2": DuckDBAttachOptions(
654-
type="motherduck",
655-
path="md:whodunnit?motherduck_token=short",
656-
),
657664
"test1": DuckDBAttachOptions(
658-
type="motherduck", path="md:whodunnit", token="longtoken123456789"
665+
type="motherduck",
666+
path="md:whodunnit",
659667
),
660668
},
661669
)
662-
assert isinstance(config, MotherDuckConnectionConfig)
663670

664-
assert config._mask_motherduck_token(config.catalogs["test1"].path) == "md:whodunnit"
671+
assert isinstance(config_1, MotherDuckConnectionConfig)
672+
assert isinstance(config_2, MotherDuckConnectionConfig)
673+
assert isinstance(config_3, MotherDuckConnectionConfig)
674+
assert config_1._mask_motherduck_token(config_1.database) == "whodunnit"
665675
assert (
666-
config._mask_motherduck_token(config.catalogs["test2"].path)
676+
config_1._mask_motherduck_token(f"md:{config_1.database}?motherduck_token={config_1.token}")
667677
== "md:whodunnit?motherduck_token=*****"
668678
)
669679
assert (
670-
config._mask_motherduck_token("?motherduck_token=secret1235")
680+
config_1._mask_motherduck_token(
681+
f"md:{config_1.database}?attach_mode=single&motherduck_token={config_1.token}"
682+
)
683+
== "md:whodunnit?attach_mode=single&motherduck_token=*****"
684+
)
685+
assert (
686+
config_2._mask_motherduck_token(f"md:{config_2.database}?motherduck_token={config_2.token}")
687+
== "md:whodunnit?motherduck_token=******************"
688+
)
689+
assert (
690+
config_3._mask_motherduck_token(f"md:?motherduck_token={config_3.token}")
691+
== "md:?motherduck_token=**********"
692+
)
693+
assert (
694+
config_1._mask_motherduck_token("?motherduck_token=secret1235")
671695
== "?motherduck_token=**********"
672696
)
673697
assert (
674-
config._mask_motherduck_token("md:whodunnit?motherduck_token=short")
698+
config_1._mask_motherduck_token("md:whodunnit?motherduck_token=short")
675699
== "md:whodunnit?motherduck_token=*****"
676700
)
677701
assert (
678-
config._mask_motherduck_token("md:whodunnit?motherduck_token=longtoken123456789")
702+
config_1._mask_motherduck_token("md:whodunnit?motherduck_token=longtoken123456789")
679703
== "md:whodunnit?motherduck_token=******************"
680704
)
681705
assert (
682-
config._mask_motherduck_token("md:whodunnit?motherduck_token=")
706+
config_1._mask_motherduck_token("md:whodunnit?motherduck_token=")
683707
== "md:whodunnit?motherduck_token="
684708
)
685-
assert config._mask_motherduck_token(":memory:") == ":memory:"
709+
assert config_1._mask_motherduck_token(":memory:") == ":memory:"
686710

687711

688712
def test_bigquery(make_config):

0 commit comments

Comments
 (0)