diff --git a/Makefile b/Makefile index 3367b7ee..839e3108 100644 --- a/Makefile +++ b/Makefile @@ -18,6 +18,11 @@ DUCKDB_GEN ?= ninja DUCKDB_VERSION = v1.4.3 # duckdb build tweaks DUCKDB_CMAKE_VARS = -DCXX_EXTRA=-fvisibility=default -DBUILD_SHELL=0 -DBUILD_PYTHON=0 -DBUILD_UNITTESTS=0 +# -Bsymbolic: required for postgres_scanner (used by DuckLake FDW) to avoid +# ELF symbol conflict with PostgreSQL backend's pg_link_canary_is_frontend() +ifeq ($(shell uname -s),Linux) +DUCKDB_CMAKE_VARS += -DCMAKE_SHARED_LINKER_FLAGS=-Wl,-Bsymbolic +endif # set to 1 to disable asserts in DuckDB. This is particularly useful in combinition with MotherDuck. # When asserts are enabled the released motherduck extension will fail some of # those asserts. By disabling asserts it's possible to run a debug build of @@ -82,6 +87,14 @@ override PG_CXXFLAGS += -std=c++17 ${DUCKDB_BUILD_CXX_FLAGS} ${COMPILER_FLAGS} - # changes to the vendored code in one place. override PG_CFLAGS += -Wno-declaration-after-statement +# -Bsymbolic: required for postgres_scanner (used by DuckLake FDW) to avoid +# ELF symbol conflict with PostgreSQL backend's pg_link_canary_is_frontend() +ifeq ($(DUCKDB_BUILD), ReleaseStatic) +ifeq ($(shell uname -s), Linux) +SHLIB_LINK += -Wl,-Bsymbolic +endif +endif + SHLIB_LINK += $(PG_DUCKDB_LINK_FLAGS) include Makefile.global diff --git a/include/pgduckdb/pgduckdb_types.hpp b/include/pgduckdb/pgduckdb_types.hpp index d1edd4bc..a4eb9d26 100644 --- a/include/pgduckdb/pgduckdb_types.hpp +++ b/include/pgduckdb/pgduckdb_types.hpp @@ -32,8 +32,8 @@ constexpr int64_t PGDUCKDB_MIN_TIMESTAMP_VALUE = -210866803200000000; void CheckForUnsupportedPostgresType(duckdb::LogicalType type); duckdb::LogicalType ConvertPostgresToDuckColumnType(Form_pg_attribute &attribute); -Oid GetPostgresDuckDBType(const duckdb::LogicalType &type, bool throw_error = false); -int32_t GetPostgresDuckDBTypemod(const duckdb::LogicalType &type); +__attribute__((visibility("default"))) Oid GetPostgresDuckDBType(const duckdb::LogicalType &type, bool throw_error = false); +__attribute__((visibility("default"))) int32_t GetPostgresDuckDBTypemod(const duckdb::LogicalType &type); duckdb::Value ConvertPostgresParameterToDuckValue(Datum value, Oid postgres_type); void ConvertPostgresToDuckValue(Oid attr_type, Datum value, duckdb::Vector &result, uint64_t offset); bool ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, uint64_t col); diff --git a/src/pgduckdb_ddl.cpp b/src/pgduckdb_ddl.cpp index 9ea89041..430e706d 100644 --- a/src/pgduckdb_ddl.cpp +++ b/src/pgduckdb_ddl.cpp @@ -5,6 +5,7 @@ #include "pgduckdb/pgduckdb_ddl.hpp" #include "pgduckdb/pgduckdb_hooks.hpp" #include "pgduckdb/pgduckdb_planner.hpp" +#include "pgduckdb/pgduckdb_table_am.hpp" #include "pgduckdb/pg/string_utils.hpp" extern "C" { @@ -644,7 +645,8 @@ DuckdbHandleDDLPre(PlannedStmt *pstmt, const char *query_string) { return DuckdbHandleRenameViewPre(stmt); } - if (pgduckdb::IsDuckdbTable(rel)) { + if (pgduckdb::IsDuckdbTable(rel) || + pgduckdb::DuckdbTableAmGetName(rel->rd_tableam) != nullptr) { if (pgduckdb::top_level_duckdb_ddl_type != pgduckdb::DDLType::NONE) { ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("Only one DuckDB %s can be renamed in a single statement", @@ -685,7 +687,9 @@ DuckdbHandleDDLPre(PlannedStmt *pstmt, const char *query_string) { * afterwards. We currently only do this to get a better error message, * because we don't support REFERENCES anyway. */ - if (pgduckdb::IsDuckdbTable(relation) && pgduckdb::top_level_duckdb_ddl_type == pgduckdb::DDLType::NONE) { + if ((pgduckdb::IsDuckdbTable(relation) || + pgduckdb::DuckdbTableAmGetName(relation->rd_tableam) != nullptr) && + pgduckdb::top_level_duckdb_ddl_type == pgduckdb::DDLType::NONE) { pgduckdb::top_level_duckdb_ddl_type = pgduckdb::DDLType::ALTER_TABLE; pgduckdb::ClaimCurrentCommandId(); } @@ -916,7 +920,37 @@ DuckdbHandleViewStmtPre(Node *parsetree, PlannedStmt *pstmt, const char *query_s } if (!pgduckdb::NeedsToBeMotherDuckView(stmt, schema_name)) { - // Let Postgres handle this view + /* + * For views over queries that require DuckDB execution (e.g., + * duckdb_only_functions like read_parquet, time_travel), expand + * duckdb.row columns to proper PostgreSQL types so that + * pg_attribute shows real column names and types. + * + * Skip the expensive parse_analyze when pg_duckdb has no registered + * functions/tables — pure-Postgres views need no rewriting. + */ + if (!pgduckdb::IsExtensionRegistered()) { + return false; + } + RawStmt *rawstmt = makeNode(RawStmt); + rawstmt->stmt = stmt->query; + rawstmt->stmt_location = pstmt->stmt_location; + rawstmt->stmt_len = pstmt->stmt_len; +#if PG_VERSION_NUM >= 150000 + Query *viewParse = parse_analyze_fixedparams(rawstmt, query_string, NULL, 0, NULL); +#else + Query *viewParse = parse_analyze(rawstmt, query_string, NULL, 0, NULL); +#endif + if (IsA(viewParse, Query) && viewParse->commandType == CMD_SELECT && + pgduckdb::NeedsDuckdbExecution(viewParse)) { + char *duckdb_query_string = pgduckdb_get_querydef((Query *)copyObjectImpl(viewParse)); + char *function_call = psprintf("duckdb.query(%s)", quote_literal_cstr(duckdb_query_string)); + RawStmt *wrapped_query = EntrenchColumnsFromCall(viewParse, function_call, &query_string); + MemoryContext query_context = GetMemoryChunkContext(stmt->query); + MemoryContext oldcontext = MemoryContextSwitchTo(query_context); + stmt->query = (Node *)copyObjectImpl(wrapped_query->stmt); + MemoryContextSwitchTo(oldcontext); + } return false; } @@ -1851,3 +1885,15 @@ DECLARE_PG_FUNCTION(duckdb_grant_trigger) { PG_RETURN_NULL(); } } + +namespace pgduckdb { +/* + * Exported getter for top_level_duckdb_ddl_type, so external extensions + * (like pg_ducklake) can check if an ALTER TABLE is in progress. + * This is needed because pgduckdb uses -fvisibility=hidden for C++ symbols. + */ +__attribute__((visibility("default"))) bool +DuckdbIsAlterTableInProgress() { + return top_level_duckdb_ddl_type == DDLType::ALTER_TABLE; +} +} // namespace pgduckdb diff --git a/src/pgduckdb_duckdb.cpp b/src/pgduckdb_duckdb.cpp index 68f07fd0..eeef5320 100644 --- a/src/pgduckdb_duckdb.cpp +++ b/src/pgduckdb_duckdb.cpp @@ -88,13 +88,24 @@ ToString(char *value) { config.options.ddb_option_name = duckdb_##ddb_option_name; \ elog(DEBUG2, "[PGDuckDB] Set DuckDB option: '" #ddb_option_name "'=%s", ToString(duckdb_##ddb_option_name).c_str()); +typedef void (*DuckDBLoadExtension)(duckdb::DuckDB &db); +static std::vector load_extensions; + +__attribute__((visibility("default"))) bool +RegisterDuckdbLoadExtension(DuckDBLoadExtension extension) { + load_extensions.push_back(extension); + return true; +} + +__attribute__((visibility("default"))) bool +DuckdbIsInitialized() { + return DuckDBManager::IsInitialized(); +} + void DuckDBManager::Initialize() { elog(DEBUG2, "(PGDuckDB/DuckDBManager) Creating DuckDB instance"); - // Block signals before initializing DuckDB to ensure signal is handled by the Postgres main thread only - pgduckdb::ThreadSignalBlockGuard guard; - // Make sure directories provided in config exists std::filesystem::create_directories(duckdb_temporary_directory); std::filesystem::create_directories(duckdb_extension_directory); @@ -217,6 +228,10 @@ DuckDBManager::Initialize() { } } + for (auto extension : load_extensions) { + extension(*database); + } + if (duckdb_autoinstall_known_extensions) { InstallExtensions(context); } @@ -231,6 +246,7 @@ DuckDBManager::Reset() { UnclaimBgwSessionHint(); } + int64 GetSeqLastValue(const char *seq_name) { Oid duckdb_namespace = get_namespace_oid("duckdb", false); diff --git a/src/pgduckdb_hooks.cpp b/src/pgduckdb_hooks.cpp index 8157c3b1..e61bc555 100644 --- a/src/pgduckdb_hooks.cpp +++ b/src/pgduckdb_hooks.cpp @@ -43,6 +43,17 @@ static ExecutorFinish_hook_type prev_executor_finish_hook = NULL; static ExplainOneQuery_hook_type prev_explain_one_query_hook = NULL; static emit_log_hook_type prev_emit_log_hook = NULL; +typedef bool (*DuckdbExternalTableCheckFn)(Oid relid); +static std::vector external_table_checks; + +namespace pgduckdb { +__attribute__((visibility("default"))) bool +RegisterDuckdbExternalTableCheck(DuckdbExternalTableCheckFn callback) { + external_table_checks.push_back(callback); + return true; +} +} // namespace pgduckdb + static bool ContainsCatalogTable(List *rtes) { foreach_node(RangeTblEntry, rte, rtes) { @@ -67,7 +78,13 @@ ContainsCatalogTable(List *rtes) { static bool IsDuckdbTable(Oid relid) { - return pgduckdb::DuckdbTableAmGetName(relid) != nullptr; + if (pgduckdb::DuckdbTableAmGetName(relid) != nullptr) + return true; + for (auto &check : external_table_checks) { + if (check(relid)) + return true; + } + return false; } static bool diff --git a/src/pgduckdb_metadata_cache.cpp b/src/pgduckdb_metadata_cache.cpp index b11543da..daa3e9c8 100644 --- a/src/pgduckdb_metadata_cache.cpp +++ b/src/pgduckdb_metadata_cache.cpp @@ -33,7 +33,20 @@ extern "C" { #include "pgduckdb/pgduckdb_background_worker.hpp" #include "pgduckdb/pgduckdb_guc.hpp" +static std::vector external_extension_names; +static std::vector external_duckdb_only_function_names; + namespace pgduckdb { + +__attribute__((visibility("default"))) void +RegisterDuckdbOnlyExtension(const char *extension_name) { + external_extension_names.push_back(extension_name); +} + +__attribute__((visibility("default"))) void +RegisterDuckdbOnlyFunction(const char *function_name) { + external_duckdb_only_function_names.push_back(function_name); +} struct { /* * Does the cache contain valid data, i.e. is it initialized? Or is it @@ -197,14 +210,33 @@ BuildDuckdbOnlyFunctions() { "map_keys", "map_values"}; - for (uint32_t i = 0; i < lengthof(function_names); i++) { - CatCList *catlist = SearchSysCacheList1(PROCNAMEARGSNSP, CStringGetDatum(function_names[i])); + /* Collect OIDs of externally registered extensions */ + std::vector external_ext_oids; + for (auto ext_name : external_extension_names) { + Oid ext_oid = get_extension_oid(ext_name, true); + if (OidIsValid(ext_oid)) { + external_ext_oids.push_back(ext_oid); + } + } + + auto add_functions_by_name = [&](const char *func_name) { + CatCList *catlist = SearchSysCacheList1(PROCNAMEARGSNSP, CStringGetDatum(func_name)); for (int j = 0; j < catlist->n_members; j++) { HeapTuple tuple = &catlist->members[j]->tuple; Form_pg_proc function = (Form_pg_proc)GETSTRUCT(tuple); - if (getExtensionOfObject(ProcedureRelationId, function->oid) != cache.extension_oid) { - continue; + Oid ext_oid = getExtensionOfObject(ProcedureRelationId, function->oid); + if (ext_oid != cache.extension_oid) { + bool found = false; + for (auto ®istered_oid : external_ext_oids) { + if (ext_oid == registered_oid) { + found = true; + break; + } + } + if (!found) { + continue; + } } /* The cache needs to outlive the current transaction so store the list in TopMemoryContext */ @@ -214,6 +246,14 @@ BuildDuckdbOnlyFunctions() { } ReleaseSysCacheList(catlist); + }; + + for (uint32_t i = 0; i < lengthof(function_names); i++) { + add_functions_by_name(function_names[i]); + } + + for (auto func_name : external_duckdb_only_function_names) { + add_functions_by_name(func_name); } } diff --git a/src/pgduckdb_ruleutils.cpp b/src/pgduckdb_ruleutils.cpp index 8e4dfe32..1314f75d 100644 --- a/src/pgduckdb_ruleutils.cpp +++ b/src/pgduckdb_ruleutils.cpp @@ -2,6 +2,7 @@ #include "pgduckdb/pg/string_utils.hpp" #include "pgduckdb/pgduckdb_types.hpp" #include "pgduckdb/pgduckdb_ddl.hpp" +#include "pgduckdb/pgduckdb_table_am.hpp" #include "pgduckdb/pg/relations.hpp" #include "pgduckdb/pg/locale.hpp" @@ -44,6 +45,16 @@ extern "C" { #include "pgduckdb/pgduckdb_metadata_cache.hpp" #include "pgduckdb/pgduckdb_userdata_cache.hpp" +typedef char *(*DuckdbRelationNameCallbackFn)(Oid relid); +static std::vector relation_name_callbacks; + +namespace pgduckdb { +__attribute__((visibility("default"))) void +RegisterDuckdbRelationNameCallback(DuckdbRelationNameCallbackFn callback) { + relation_name_callbacks.push_back(callback); +} +} // namespace pgduckdb + extern "C" { bool outermost_query = true; @@ -563,8 +574,14 @@ pgduckdb_db_and_schema_string(const char *postgres_schema_name, const char *duck * DuckDB for the specified Postgres OID. This includes the DuckDB database name * too. */ -char * +extern "C" __attribute__((visibility("default"))) char * pgduckdb_relation_name(Oid relation_oid) { + for (auto &callback : relation_name_callbacks) { + char *name = callback(relation_oid); + if (name) + return name; + } + HeapTuple tp = SearchSysCache1(RELOID, ObjectIdGetDatum(relation_oid)); if (!HeapTupleIsValid(tp)) elog(ERROR, "cache lookup failed for relation %u", relation_oid); @@ -597,7 +614,7 @@ pgduckdb_relation_name(Oid relation_oid) { * use in get_target_list to determine if we're processing the outermost * targetlist or not. */ -char * +extern "C" __attribute__((visibility("default"))) char * pgduckdb_get_querydef(Query *query) { outermost_query = true; auto save_nestlevel = NewGUCNestLevel(); @@ -619,7 +636,7 @@ pgduckdb_get_querydef(Query *query) { * the following patch that I (Jelte) submitted to Postgres in 2023: * https://www.postgresql.org/message-id/CAGECzQSqdDHO_s8=CPTb2+4eCLGUscdh=KjYGTunhvrwcC7ZSQ@mail.gmail.com */ -char * +extern "C" __attribute__((visibility("default"))) char * pgduckdb_get_tabledef(Oid relation_oid) { Relation relation = relation_open(relation_oid, AccessShareLock); const char *relation_name = pgduckdb_relation_name(relation_oid); @@ -649,6 +666,8 @@ pgduckdb_get_tabledef(Oid relation_oid) { // allowed } else if (relation->rd_rel->relpersistence != RELPERSISTENCE_PERMANENT) { elog(ERROR, "Only TEMP and non-UNLOGGED tables are supported in DuckDB"); + } else if (strcmp(duckdb_table_am_name, "duckdb") != 0) { + // not a duckdb table, let them decide } else if (relation->rd_rel->relowner != pgduckdb::MotherDuckPostgresUserOid()) { elog(ERROR, "MotherDuck tables must be owned by the duckb.postgres_role"); } @@ -792,7 +811,7 @@ pgduckdb_get_tabledef(Oid relation_oid) { /* close create table's outer parentheses */ appendStringInfoString(&buffer, ")"); - if (!pgduckdb::IsDuckdbTableAm(relation->rd_tableam)) { + if (duckdb_table_am_name == nullptr) { /* Shouldn't happen but seems good to check anyway */ elog(ERROR, "Only a table with the DuckDB can be stored in DuckDB, %d %d", relation->rd_rel->relam, pgduckdb::DuckdbTableAmOid()); @@ -882,7 +901,7 @@ cookConstraint(ParseState *pstate, Node *raw_constraint, char *relname) { return expr; } -char * +extern "C" __attribute__((visibility("default"))) char * pgduckdb_get_rename_relationdef(Oid relation_oid, RenameStmt *rename_stmt) { if (rename_stmt->renameType != OBJECT_TABLE && rename_stmt->renameType != OBJECT_VIEW && rename_stmt->renameType != OBJECT_COLUMN) { @@ -890,10 +909,18 @@ pgduckdb_get_rename_relationdef(Oid relation_oid, RenameStmt *rename_stmt) { } Relation relation = relation_open(relation_oid, AccessShareLock); - Assert(pgduckdb::IsDuckdbTable(relation) || pgduckdb::IsMotherDuckView(relation)); + Assert(pgduckdb::IsDuckdbTable(relation) || pgduckdb::IsMotherDuckView(relation) || + pgduckdb::DuckdbTableAmGetName(relation->rd_tableam) != nullptr); const char *postgres_schema_name = get_namespace_name_or_temp(relation->rd_rel->relnamespace); - const char *db_and_schema = pgduckdb_db_and_schema_string(postgres_schema_name, "duckdb"); + const char *duckdb_table_am_name = "duckdb"; + if (relation->rd_rel->relkind == RELKIND_RELATION) { + const char *table_am_name = pgduckdb::DuckdbTableAmGetName(relation->rd_tableam); + if (table_am_name != nullptr) { + duckdb_table_am_name = table_am_name; + } + } + const char *db_and_schema = pgduckdb_db_and_schema_string(postgres_schema_name, duckdb_table_am_name); const char *old_table_name = psprintf("%s.%s", db_and_schema, quote_identifier(rename_stmt->relation->relname)); const char *relation_type = "TABLE"; @@ -924,7 +951,7 @@ pgduckdb_get_rename_relationdef(Oid relation_oid, RenameStmt *rename_stmt) { * * TODO: Add support indexes */ -char * +extern "C" __attribute__((visibility("default"))) char * pgduckdb_get_alter_tabledef(Oid relation_oid, AlterTableStmt *alter_stmt) { Relation relation = relation_open(relation_oid, AccessShareLock); const char *relation_name = pgduckdb_relation_name(relation_oid); diff --git a/src/pgduckdb_types.cpp b/src/pgduckdb_types.cpp index 96bf2f3c..25ba2fbc 100644 --- a/src/pgduckdb_types.cpp +++ b/src/pgduckdb_types.cpp @@ -1543,7 +1543,7 @@ CheckForUnsupportedPostgresType(duckdb::LogicalType type) { } } -Oid +__attribute__((visibility("default"))) Oid GetPostgresDuckDBType(const duckdb::LogicalType &type, bool throw_error) { CheckForUnsupportedPostgresType(type); switch (type.id()) { @@ -1627,7 +1627,7 @@ GetPostgresDuckDBType(const duckdb::LogicalType &type, bool throw_error) { } } -int32 +__attribute__((visibility("default"))) int32 GetPostgresDuckDBTypemod(const duckdb::LogicalType &type) { switch (type.id()) { case duckdb::LogicalTypeId::DECIMAL: { diff --git a/src/pgduckdb_xact.cpp b/src/pgduckdb_xact.cpp index f3261be7..728c3420 100644 --- a/src/pgduckdb_xact.cpp +++ b/src/pgduckdb_xact.cpp @@ -6,14 +6,46 @@ #include "pgduckdb/pgduckdb_hooks.hpp" #include "pgduckdb/pgduckdb_utils.hpp" #include "pgduckdb/pgduckdb_background_worker.hpp" +#include "pgduckdb/pgduckdb_process_lock.hpp" #include "pgduckdb/pg/transactions.hpp" #include "pgduckdb/utility/cpp_wrapper.hpp" namespace pgduckdb { +__attribute__((visibility("default"))) void +DuckdbLockGlobalProcess(void) { + GlobalProcessLock::GetLock().lock(); +} + +__attribute__((visibility("default"))) void +DuckdbUnlockGlobalProcess(void) { + GlobalProcessLock::GetLock().unlock(); +} + static CommandId next_expected_command_id = FirstCommandId; static bool top_level_statement = true; +static bool allow_subtransaction = false; + +/* + * Unsafe hook for external extensions to set next_expected_command_id. + * + * These allow extensions like pg_ducklake to temporarily suppress mixed-write + * detection for internal metadata operations (e.g., SPI writes to ducklake_* + * tables) that should not count as user-initiated Postgres writes. + * + * WARNING: Misuse can mask genuine mixed-write violations. Only use for + * operations that are logically part of a DuckDB transaction. + */ +__attribute__((visibility("default"))) void +DuckdbUnsafeSetNextExpectedCommandId(uint32_t command_id) { + next_expected_command_id = command_id; +} + +__attribute__((visibility("default"))) void +DuckdbAllowSubtransaction(bool allow) { + allow_subtransaction = allow; +} namespace pg { @@ -313,7 +345,7 @@ DuckdbSubXactCallback_Cpp(SubXactEvent event) { return; } - if (event == SUBXACT_EVENT_START_SUB) { + if (event == SUBXACT_EVENT_START_SUB && !allow_subtransaction) { throw duckdb::NotImplementedException("SAVEPOINT is not supported in DuckDB"); } } diff --git a/third_party/pg_duckdb_extensions.cmake b/third_party/pg_duckdb_extensions.cmake index be05ffcb..17eda106 100644 --- a/third_party/pg_duckdb_extensions.cmake +++ b/third_party/pg_duckdb_extensions.cmake @@ -4,3 +4,7 @@ duckdb_extension_load(httpfs GIT_URL https://github.com/duckdb/duckdb-httpfs GIT_TAG 9c7d34977b10346d0b4cbbde5df807d1dab0b2bf ) +duckdb_extension_load(postgres_scanner + GIT_URL https://github.com/relytcloud/duckdb-postgres + GIT_TAG 0684e8f70c948b824ac66c8c4b1450b4b1bcd33c +)