From a49a6a8f469490c9040f136986bf70873e4ca3c2 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Tue, 23 Jul 2024 19:45:20 +0400 Subject: [PATCH 1/6] fix dump and vacuum of vector indices --- libsql-sqlite3/src/build.c | 11 +- libsql-sqlite3/src/vectorIndex.c | 211 ++++++++++++++++++++-------- libsql-sqlite3/src/vectorIndexInt.h | 2 +- 3 files changed, 161 insertions(+), 63 deletions(-) diff --git a/libsql-sqlite3/src/build.c b/libsql-sqlite3/src/build.c index afba9b58d7..86767ed1c2 100644 --- a/libsql-sqlite3/src/build.c +++ b/libsql-sqlite3/src/build.c @@ -4000,6 +4000,7 @@ void sqlite3CreateIndex( int nExtraCol; /* Number of extra columns needed */ char *zExtra = 0; /* Extra space after the Index object */ Index *pPk = 0; /* PRIMARY KEY index for WITHOUT ROWID tables */ + int vectorIdxRc = 0, skipRefill = 0; assert( db->pParse==pParse ); if( pParse->nErr ){ @@ -4309,9 +4310,13 @@ void sqlite3CreateIndex( #ifndef SQLITE_OMIT_VECTOR - if( vectorIndexCreate(pParse, pIndex, db->aDb[iDb].zDbSName, pUsing) != SQLITE_OK ) { + vectorIdxRc = vectorIndexCreate(pParse, pIndex, db->aDb[iDb].zDbSName, pUsing); + if( vectorIdxRc < 0 ){ goto exit_create_index; } + if( vectorIdxRc == 1 ){ + skipRefill = 1; + } idxType = pIndex->idxType; // vectorIndexCreate can update idxType to 4 (VECTOR INDEX) #endif @@ -4515,7 +4520,9 @@ void sqlite3CreateIndex( ** to invalidate all pre-compiled statements. */ if( pTblName ){ - sqlite3RefillIndex(pParse, pIndex, iMem); + if( !skipRefill ){ + sqlite3RefillIndex(pParse, pIndex, iMem); + } sqlite3ChangeCookie(pParse, iDb); sqlite3VdbeAddParseSchemaOp(v, iDb, sqlite3MPrintf(db, "name='%q' AND type='index'", pIndex->zName), 0); diff --git a/libsql-sqlite3/src/vectorIndex.c b/libsql-sqlite3/src/vectorIndex.c index 2fc7657e84..efcd0be563 100644 --- a/libsql-sqlite3/src/vectorIndex.c +++ b/libsql-sqlite3/src/vectorIndex.c @@ -30,10 +30,30 @@ #include "sqliteInt.h" #include "vectorIndexInt.h" +/* + * The code which glue SQLite internals with pure DiskANN implementation resides here + * Main internal API methods are: + * vectorIndexCreate() + * vectorIndexClear() + * vectorIndexDrop() + * vectorIndexSearch() + * vectorIndexCursorInit() + * vectorIndexCursorClose() + * + * + cursor operations: + * vectorIndexInsert(cursor) + * vectorIndexDelete(cursor) +*/ + /************************************************************************** ** VectorIdxParams utilities ****************************************************************************/ +// VACUUM creates tables and indices first and only then populate data +// we need to ignore inserts from 'INSERT INTO vacuum.t SELECT * FROM t' statements because +// all shadow tables will be populated by VACUUM process during regular process of table copy +#define IsVacuum(db) ((db->mDbFlags&DBFLAG_Vacuum)!=0) + void vectorIdxParamsInit(VectorIdxParams *pParams, u8 *pBinBuf, int nBinSize) { assert( nBinSize <= VECTOR_INDEX_PARAMS_BUF_SIZE ); @@ -592,9 +612,11 @@ int insertIndexParameters(sqlite3* db, const char *zDbSName, const char *zName, goto clear_and_exit; } rc = sqlite3_step(pStatement); - if( rc != SQLITE_DONE ){ + if( rc == SQLITE_CONSTRAINT ){ + rc = SQLITE_CONSTRAINT; + }else if( rc != SQLITE_DONE ){ rc = SQLITE_ERROR; - } else { + }else{ rc = SQLITE_OK; } clear_and_exit: @@ -633,51 +655,25 @@ int removeIndexParameters(sqlite3* db, const char *zName) { return rc; } -int vectorIndexGetParameters( - sqlite3 *db, - const char *zIndexName, - VectorIdxParams *pParams -) { +int vectorIndexTryGetParametersFromTableFormat(sqlite3 *db, const char *zSql, const char *zIdxName, VectorIdxParams *pParams) { int rc = SQLITE_OK; sqlite3_stmt *pStmt = NULL; int nBinSize; - static const char* zSelectSql = "SELECT metadata FROM " VECTOR_INDEX_GLOBAL_META_TABLE " WHERE name = ?"; - static const char* zSelectSqlPekkaLegacy = "SELECT vector_type, block_size, dims, distance_ops FROM libsql_vector_index WHERE type = ? AND name = ?"; - rc = sqlite3_prepare_v2(db, zSelectSql, -1, &pStmt, 0); - if( rc == SQLITE_OK ) { - rc = sqlite3_bind_text(pStmt, 1, zIndexName, -1, SQLITE_STATIC); - if( rc != SQLITE_OK ){ - goto out_free; - } + vectorIdxParamsInit(pParams, NULL, 0); - if( sqlite3_step(pStmt) == SQLITE_ROW ){ - assert( sqlite3_column_type(pStmt, 0) == SQLITE_BLOB ); - nBinSize = sqlite3_column_bytes(pStmt, 0); - if( nBinSize > VECTOR_INDEX_PARAMS_BUF_SIZE ){ - rc = SQLITE_ERROR; - goto out_free; - } - vectorIdxParamsInit(pParams, (u8*)sqlite3_column_blob(pStmt, 0), nBinSize); - goto out_free; - } - } - if( pStmt ){ - sqlite3_finalize(pStmt); - pStmt = NULL; + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + if( rc != SQLITE_OK ){ + goto out; } - - rc = sqlite3_prepare_v2(db, zSelectSqlPekkaLegacy, -1, &pStmt, 0); + rc = sqlite3_bind_text(pStmt, 1, zIdxName, -1, SQLITE_STATIC); if( rc != SQLITE_OK ){ - goto out_free; + goto out; } - sqlite3_bind_text(pStmt, 1, "diskann", -1, SQLITE_STATIC); - sqlite3_bind_text(pStmt, 2, zIndexName, -1, SQLITE_STATIC); if( sqlite3_step(pStmt) != SQLITE_ROW ){ rc = SQLITE_ERROR; - goto out_free; + goto out; } - vectorIdxParamsPutU64(pParams, VECTOR_FORMAT_PARAM_ID, 1); vectorIdxParamsPutU64(pParams, VECTOR_INDEX_TYPE_PARAM_ID, VECTOR_INDEX_TYPE_DISKANN); vectorIdxParamsPutU64(pParams, VECTOR_TYPE_PARAM_ID, VECTOR_TYPE_FLOAT32); @@ -685,20 +681,81 @@ int vectorIndexGetParameters( vectorIdxParamsPutU64(pParams, VECTOR_METRIC_TYPE_PARAM_ID, VECTOR_METRIC_TYPE_COS); if( vectorIdxParamsPutU64(pParams, VECTOR_BLOCK_SIZE_PARAM_ID, sqlite3_column_int(pStmt, 1)) != 0 ){ rc = SQLITE_ERROR; + goto out; + } + assert( sqlite3_step(pStmt) == SQLITE_DONE ); + rc = SQLITE_OK; +out: + if( pStmt != NULL ){ + sqlite3_finalize(pStmt); + } + return rc; +} + +int vectorIndexTryGetParametersFromBinFormat(sqlite3 *db, const char *zSql, const char *zIdxName, VectorIdxParams *pParams) { + int rc = SQLITE_OK; + sqlite3_stmt *pStmt = NULL; + int nBinSize; + + vectorIdxParamsInit(pParams, NULL, 0); + + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + if( rc != SQLITE_OK ){ + goto out; + } + rc = sqlite3_bind_text(pStmt, 1, zIdxName, -1, SQLITE_STATIC); + if( rc != SQLITE_OK ){ + goto out; + } + if( sqlite3_step(pStmt) != SQLITE_ROW ){ + rc = SQLITE_ERROR; + goto out; + } + assert( sqlite3_column_type(pStmt, 0) == SQLITE_BLOB ); + nBinSize = sqlite3_column_bytes(pStmt, 0); + if( nBinSize > VECTOR_INDEX_PARAMS_BUF_SIZE ){ + rc = SQLITE_ERROR; + goto out; } -out_free: + vectorIdxParamsInit(pParams, (u8*)sqlite3_column_blob(pStmt, 0), nBinSize); + assert( sqlite3_step(pStmt) == SQLITE_DONE ); + rc = SQLITE_OK; +out: if( pStmt != NULL ){ sqlite3_finalize(pStmt); } return rc; } +int vectorIndexGetParameters( + sqlite3 *db, + const char *zIdxName, + VectorIdxParams *pParams +) { + int rc = SQLITE_OK; + + static const char* zSelectSql = "SELECT metadata FROM " VECTOR_INDEX_GLOBAL_META_TABLE " WHERE name = ?"; + static const char* zSelectSqlPekkaLegacy = "SELECT vector_type, block_size, dims, distance_ops FROM libsql_vector_index WHERE name = ?"; + rc = vectorIndexTryGetParametersFromBinFormat(db, zSelectSql, zIdxName, pParams); + if( rc == SQLITE_OK ){ + return SQLITE_OK; + } + rc = vectorIndexTryGetParametersFromTableFormat(db, zSelectSqlPekkaLegacy, zIdxName, pParams); + if( rc == SQLITE_OK ){ + return SQLITE_OK; + } + return SQLITE_ERROR; +} int vectorIndexDrop(sqlite3 *db, const char *zDbSName, const char *zIdxName) { // we want to try delete all traces of index on every attempt // this is done to prevent unrecoverable situations where index were dropped but index parameters deletion failed and second attempt will fail on first step int rcIdx, rcParams; + if( IsVacuum(db) ){ + return SQLITE_OK; + } + assert( zDbSName != NULL ); rcIdx = diskAnnDropIndex(db, zDbSName, zIdxName); @@ -708,15 +765,27 @@ int vectorIndexDrop(sqlite3 *db, const char *zDbSName, const char *zIdxName) { int vectorIndexClear(sqlite3 *db, const char *zDbSName, const char *zIdxName) { assert( zDbSName != NULL ); + + if( IsVacuum(db) ){ + return SQLITE_OK; + } + return diskAnnClearIndex(db, zDbSName, zIdxName); } +/* + * +*/ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const IdList *pUsing) { int i, rc = SQLITE_OK; int dims, type; int hasLibsqlVectorIdxFn = 0, hasCollation = 0; const char *pzErrMsg; + if( IsVacuum(pParse->db) ){ + return SQLITE_OK; + } + assert( zDbSName != NULL ); sqlite3 *db = pParse->db; @@ -732,7 +801,7 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id if( pParse->eParseMode ){ // scheme can be re-parsed by SQLite for different reasons (for example, to check schema after // ALTER COLUMN statements) - so we must skip creation in such cases - goto ignored; + goto ignore; } // backward compatibility: preserve old indices with deprecated syntax but forbid creation of new indices with this syntax @@ -742,15 +811,15 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id } else { sqlite3ErrorMsg(pParse, "USING syntax is deprecated, please use plain CREATE INDEX: CREATE INDEX xxx ON yyy ( " VECTOR_INDEX_MARKER_FUNCTION "(zzz) )"); } - return SQLITE_ERROR; + goto fail; } if( db->init.busy == 1 && pUsing != NULL ){ - goto succeed; + goto ok; } // vector index must have expressions over column if( pIdx->aColExpr == NULL ) { - goto ignored; + goto ignore; } pListItem = pIdx->aColExpr->a; @@ -765,20 +834,20 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id } } if( !hasLibsqlVectorIdxFn ) { - goto ignored; + goto ignore; } if( hasCollation ){ sqlite3ErrorMsg(pParse, "vector index can't have collation"); - return SQLITE_ERROR; + goto fail; } if( pIdx->aColExpr->nExpr != 1 ) { sqlite3ErrorMsg(pParse, "vector index must contain exactly one column wrapped into the " VECTOR_INDEX_MARKER_FUNCTION " function"); - return SQLITE_ERROR; + goto fail; } // we are able to support this but I doubt this works for now - more polishing required to make this work if( pIdx->pPartIdxWhere != NULL ) { sqlite3ErrorMsg(pParse, "partial vector index is not supported"); - return SQLITE_ERROR; + goto fail; } pArgsList = pIdx->aColExpr->a[0].pExpr->x.pList; @@ -786,61 +855,73 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id if( pArgsList->nExpr < 1 ){ sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " must contain at least one argument"); - return SQLITE_ERROR; + goto fail; } if( pListItem[0].pExpr->op != TK_COLUMN ) { sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " first argument must be a column token"); - return SQLITE_ERROR; + goto fail; } iEmbeddingColumn = pListItem[0].pExpr->iColumn; if( iEmbeddingColumn < 0 ) { sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " first argument must be column with vector type"); - return SQLITE_ERROR; + goto fail; } assert( iEmbeddingColumn >= 0 && iEmbeddingColumn < pTable->nCol ); zEmbeddingColumnTypeName = sqlite3ColumnType(&pTable->aCol[iEmbeddingColumn], ""); if( vectorIdxParseColumnType(zEmbeddingColumnTypeName, &type, &dims, &pzErrMsg) != 0 ){ sqlite3ErrorMsg(pParse, "%s: %s", pzErrMsg, zEmbeddingColumnTypeName); - return SQLITE_ERROR; + goto fail; } // schema is locked while db is initializing and we need to just proceed here if( db->init.busy == 1 ){ - goto succeed; + goto ok; } rc = initVectorIndexMetaTable(db, zDbSName); if( rc != SQLITE_OK ){ - return rc; + sqlite3ErrorMsg(pParse, "failed to init vector index meta table: %s", sqlite3_errmsg(db)); + goto fail; } rc = parseVectorIdxParams(pParse, &idxParams, type, dims, pListItem + 1, pArgsList->nExpr - 1); if( rc != SQLITE_OK ){ - return rc; + sqlite3ErrorMsg(pParse, "failed to parse vector idx params"); + goto fail; } if( vectorIdxKeyGet(pTable, &idxKey, &pzErrMsg) != 0 ){ sqlite3ErrorMsg(pParse, "failed to detect underlying table key: %s", pzErrMsg); - return SQLITE_ERROR; + goto fail; } if( idxKey.nKeyColumns != 1 ){ sqlite3ErrorMsg(pParse, "vector index for tables without ROWID and composite primary key are not supported"); - return SQLITE_ERROR; + goto fail; } rc = diskAnnCreateIndex(db, zDbSName, pIdx->zName, &idxKey, &idxParams); if( rc != SQLITE_OK ){ sqlite3ErrorMsg(pParse, "unable to initialize diskann vector index"); - return rc; + goto fail; } rc = insertIndexParameters(db, zDbSName, pIdx->zName, &idxParams); + if( rc == SQLITE_CONSTRAINT ){ + // we are violating unique constraint here which means that someone inserted parameters in the table before us + // taking aside corruption scenarios, this can be in case of loading dump (because tables are loaded before indices) or vacuum-ing DB + // both these cases are valid and we must proceed with index creating but avoid index-refill step as it is already filled + goto skip_refill; + } if( rc != SQLITE_OK ){ sqlite3ErrorMsg(pParse, "unable to update global metadata table"); - return rc; + goto fail; } -succeed: +ok: pIdx->idxType = SQLITE_IDXTYPE_VECTOR; - return SQLITE_OK; -ignored: - return SQLITE_OK; +ignore: + return 0; +skip_refill: + pIdx->idxType = SQLITE_IDXTYPE_VECTOR; + return 1; +fail: + return -1; } int vectorIndexSearch(sqlite3 *db, const char* zDbSName, int argc, sqlite3_value **argv, VectorOutRows *pRows, char **pzErrMsg) { @@ -854,6 +935,7 @@ int vectorIndexSearch(sqlite3 *db, const char* zDbSName, int argc, sqlite3_value VectorIdxParams idxParams; vectorIdxParamsInit(&idxParams, NULL, 0); + assert( !IsVacuum(db) ); assert( zDbSName != NULL ); if( argc != 3 ){ @@ -936,6 +1018,10 @@ int vectorIndexInsert( int rc; VectorInRow vectorInRow; + if( IsVacuum(pCur->db) ){ + return SQLITE_OK; + } + rc = vectorInRowAlloc(pCur->db, pRecord, &vectorInRow, pzErrMsg); if( rc != SQLITE_OK ){ return rc; @@ -954,6 +1040,11 @@ int vectorIndexDelete( char **pzErrMsg ){ VectorInRow payload; + + if( IsVacuum(pCur->db) ){ + return SQLITE_OK; + } + payload.pVector = NULL; payload.nKeys = r->nField - 1; payload.pKeyValues = r->aMem + 1; diff --git a/libsql-sqlite3/src/vectorIndexInt.h b/libsql-sqlite3/src/vectorIndexInt.h index 34b1a8ab24..2ee4b7b7eb 100644 --- a/libsql-sqlite3/src/vectorIndexInt.h +++ b/libsql-sqlite3/src/vectorIndexInt.h @@ -227,11 +227,11 @@ int vectorIdxParseColumnType(const char *, int *, int *, const char **); int vectorIndexCreate(Parse*, Index*, const char *, const IdList*); int vectorIndexClear(sqlite3 *, const char *, const char *); int vectorIndexDrop(sqlite3 *, const char *, const char *); +int vectorIndexSearch(sqlite3 *, const char *, int, sqlite3_value **, VectorOutRows *, char **); int vectorIndexCursorInit(sqlite3 *, const char *, const char *, VectorIdxCursor **); void vectorIndexCursorClose(sqlite3 *, VectorIdxCursor *); int vectorIndexInsert(VectorIdxCursor *, const UnpackedRecord *, char **); int vectorIndexDelete(VectorIdxCursor *, const UnpackedRecord *, char **); -int vectorIndexSearch(sqlite3 *, const char *, int, sqlite3_value **, VectorOutRows *, char **); #ifdef __cplusplus } /* end of the 'extern "C"' block */ From acec55b4bbb33521aff46c148b20c1bc816a3b50 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Tue, 23 Jul 2024 19:59:16 +0400 Subject: [PATCH 2/6] fix comment a bit --- libsql-sqlite3/src/vectorIndex.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/libsql-sqlite3/src/vectorIndex.c b/libsql-sqlite3/src/vectorIndex.c index efcd0be563..be451d6c63 100644 --- a/libsql-sqlite3/src/vectorIndex.c +++ b/libsql-sqlite3/src/vectorIndex.c @@ -774,7 +774,17 @@ int vectorIndexClear(sqlite3 *db, const char *zDbSName, const char *zIdxName) { } /* + * vectorIndexCreate analyzes any index creation expression and create vector index if needed + * it tolerates the situation when insert into VECTOR_INDEX_GLOBAL_META_TABLE failed with conflict + * this made intentionally in order to natively support upload of SQLite dumps * + * dump populates tables first and create indices after + * so we must omit them because shadow tables already filled + * + * 1. if vector index must not be created : 0 returned and pIdx is unchanged + * 2. if vector index must be created and refilled from base table: 0 returned and pIdx->idxType set to SQLITE_IDXTYPE_VECTOR + * 3. if vector index must be created but refill must be skipped : 1 returned and pIdx->idxType set to SQLITE_IDXTYPE_VECTOR + * 4. in case of any error :-1 returned (and pParse errMsg is populated with some error message) */ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const IdList *pUsing) { int i, rc = SQLITE_OK; From 87799e0bced76abc01d52c15e3457abe773c4244 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Tue, 23 Jul 2024 20:03:42 +0400 Subject: [PATCH 3/6] build bundles --- .../SQLite3MultipleCiphers/src/sqlite3.c | 234 +++++++++++++----- libsql-ffi/bundled/src/sqlite3.c | 234 +++++++++++++----- 2 files changed, 342 insertions(+), 126 deletions(-) diff --git a/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c b/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c index c840cb8032..30302d1411 100644 --- a/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c +++ b/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c @@ -85133,11 +85133,11 @@ int vectorIdxParseColumnType(const char *, int *, int *, const char **); int vectorIndexCreate(Parse*, Index*, const char *, const IdList*); int vectorIndexClear(sqlite3 *, const char *, const char *); int vectorIndexDrop(sqlite3 *, const char *, const char *); +int vectorIndexSearch(sqlite3 *, const char *, int, sqlite3_value **, VectorOutRows *, char **); int vectorIndexCursorInit(sqlite3 *, const char *, const char *, VectorIdxCursor **); void vectorIndexCursorClose(sqlite3 *, VectorIdxCursor *); int vectorIndexInsert(VectorIdxCursor *, const UnpackedRecord *, char **); int vectorIndexDelete(VectorIdxCursor *, const UnpackedRecord *, char **); -int vectorIndexSearch(sqlite3 *, const char *, int, sqlite3_value **, VectorOutRows *, char **); #if 0 } /* end of the 'extern "C"' block */ @@ -125791,6 +125791,7 @@ SQLITE_PRIVATE void sqlite3CreateIndex( int nExtraCol; /* Number of extra columns needed */ char *zExtra = 0; /* Extra space after the Index object */ Index *pPk = 0; /* PRIMARY KEY index for WITHOUT ROWID tables */ + int vectorIdxRc = 0, skipRefill = 0; assert( db->pParse==pParse ); if( pParse->nErr ){ @@ -126100,9 +126101,13 @@ SQLITE_PRIVATE void sqlite3CreateIndex( #ifndef SQLITE_OMIT_VECTOR - if( vectorIndexCreate(pParse, pIndex, db->aDb[iDb].zDbSName, pUsing) != SQLITE_OK ) { + vectorIdxRc = vectorIndexCreate(pParse, pIndex, db->aDb[iDb].zDbSName, pUsing); + if( vectorIdxRc < 0 ){ goto exit_create_index; } + if( vectorIdxRc == 1 ){ + skipRefill = 1; + } idxType = pIndex->idxType; // vectorIndexCreate can update idxType to 4 (VECTOR INDEX) #endif @@ -126306,7 +126311,9 @@ SQLITE_PRIVATE void sqlite3CreateIndex( ** to invalidate all pre-compiled statements. */ if( pTblName ){ - sqlite3RefillIndex(pParse, pIndex, iMem); + if( !skipRefill ){ + sqlite3RefillIndex(pParse, pIndex, iMem); + } sqlite3ChangeCookie(pParse, iDb); sqlite3VdbeAddParseSchemaOp(v, iDb, sqlite3MPrintf(db, "name='%q' AND type='index'", pIndex->zName), 0); @@ -211447,10 +211454,30 @@ int vectorF64ParseSqliteBlob( /* #include "sqliteInt.h" */ /* #include "vectorIndexInt.h" */ +/* + * The code which glue SQLite internals with pure DiskANN implementation resides here + * Main internal API methods are: + * vectorIndexCreate() + * vectorIndexClear() + * vectorIndexDrop() + * vectorIndexSearch() + * vectorIndexCursorInit() + * vectorIndexCursorClose() + * + * + cursor operations: + * vectorIndexInsert(cursor) + * vectorIndexDelete(cursor) +*/ + /************************************************************************** ** VectorIdxParams utilities ****************************************************************************/ +// VACUUM creates tables and indices first and only then populate data +// we need to ignore inserts from 'INSERT INTO vacuum.t SELECT * FROM t' statements because +// all shadow tables will be populated by VACUUM process during regular process of table copy +#define IsVacuum(db) ((db->mDbFlags&DBFLAG_Vacuum)!=0) + void vectorIdxParamsInit(VectorIdxParams *pParams, u8 *pBinBuf, int nBinSize) { assert( nBinSize <= VECTOR_INDEX_PARAMS_BUF_SIZE ); @@ -212009,9 +212036,11 @@ int insertIndexParameters(sqlite3* db, const char *zDbSName, const char *zName, goto clear_and_exit; } rc = sqlite3_step(pStatement); - if( rc != SQLITE_DONE ){ + if( rc == SQLITE_CONSTRAINT ){ + rc = SQLITE_CONSTRAINT; + }else if( rc != SQLITE_DONE ){ rc = SQLITE_ERROR; - } else { + }else{ rc = SQLITE_OK; } clear_and_exit: @@ -212050,51 +212079,25 @@ int removeIndexParameters(sqlite3* db, const char *zName) { return rc; } -int vectorIndexGetParameters( - sqlite3 *db, - const char *zIndexName, - VectorIdxParams *pParams -) { +int vectorIndexTryGetParametersFromTableFormat(sqlite3 *db, const char *zSql, const char *zIdxName, VectorIdxParams *pParams) { int rc = SQLITE_OK; sqlite3_stmt *pStmt = NULL; int nBinSize; - static const char* zSelectSql = "SELECT metadata FROM " VECTOR_INDEX_GLOBAL_META_TABLE " WHERE name = ?"; - static const char* zSelectSqlPekkaLegacy = "SELECT vector_type, block_size, dims, distance_ops FROM libsql_vector_index WHERE type = ? AND name = ?"; - rc = sqlite3_prepare_v2(db, zSelectSql, -1, &pStmt, 0); - if( rc == SQLITE_OK ) { - rc = sqlite3_bind_text(pStmt, 1, zIndexName, -1, SQLITE_STATIC); - if( rc != SQLITE_OK ){ - goto out_free; - } + vectorIdxParamsInit(pParams, NULL, 0); - if( sqlite3_step(pStmt) == SQLITE_ROW ){ - assert( sqlite3_column_type(pStmt, 0) == SQLITE_BLOB ); - nBinSize = sqlite3_column_bytes(pStmt, 0); - if( nBinSize > VECTOR_INDEX_PARAMS_BUF_SIZE ){ - rc = SQLITE_ERROR; - goto out_free; - } - vectorIdxParamsInit(pParams, (u8*)sqlite3_column_blob(pStmt, 0), nBinSize); - goto out_free; - } - } - if( pStmt ){ - sqlite3_finalize(pStmt); - pStmt = NULL; + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + if( rc != SQLITE_OK ){ + goto out; } - - rc = sqlite3_prepare_v2(db, zSelectSqlPekkaLegacy, -1, &pStmt, 0); + rc = sqlite3_bind_text(pStmt, 1, zIdxName, -1, SQLITE_STATIC); if( rc != SQLITE_OK ){ - goto out_free; + goto out; } - sqlite3_bind_text(pStmt, 1, "diskann", -1, SQLITE_STATIC); - sqlite3_bind_text(pStmt, 2, zIndexName, -1, SQLITE_STATIC); if( sqlite3_step(pStmt) != SQLITE_ROW ){ rc = SQLITE_ERROR; - goto out_free; + goto out; } - vectorIdxParamsPutU64(pParams, VECTOR_FORMAT_PARAM_ID, 1); vectorIdxParamsPutU64(pParams, VECTOR_INDEX_TYPE_PARAM_ID, VECTOR_INDEX_TYPE_DISKANN); vectorIdxParamsPutU64(pParams, VECTOR_TYPE_PARAM_ID, VECTOR_TYPE_FLOAT32); @@ -212102,20 +212105,81 @@ int vectorIndexGetParameters( vectorIdxParamsPutU64(pParams, VECTOR_METRIC_TYPE_PARAM_ID, VECTOR_METRIC_TYPE_COS); if( vectorIdxParamsPutU64(pParams, VECTOR_BLOCK_SIZE_PARAM_ID, sqlite3_column_int(pStmt, 1)) != 0 ){ rc = SQLITE_ERROR; + goto out; } -out_free: + assert( sqlite3_step(pStmt) == SQLITE_DONE ); + rc = SQLITE_OK; +out: if( pStmt != NULL ){ sqlite3_finalize(pStmt); } return rc; } +int vectorIndexTryGetParametersFromBinFormat(sqlite3 *db, const char *zSql, const char *zIdxName, VectorIdxParams *pParams) { + int rc = SQLITE_OK; + sqlite3_stmt *pStmt = NULL; + int nBinSize; + + vectorIdxParamsInit(pParams, NULL, 0); + + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + if( rc != SQLITE_OK ){ + goto out; + } + rc = sqlite3_bind_text(pStmt, 1, zIdxName, -1, SQLITE_STATIC); + if( rc != SQLITE_OK ){ + goto out; + } + if( sqlite3_step(pStmt) != SQLITE_ROW ){ + rc = SQLITE_ERROR; + goto out; + } + assert( sqlite3_column_type(pStmt, 0) == SQLITE_BLOB ); + nBinSize = sqlite3_column_bytes(pStmt, 0); + if( nBinSize > VECTOR_INDEX_PARAMS_BUF_SIZE ){ + rc = SQLITE_ERROR; + goto out; + } + vectorIdxParamsInit(pParams, (u8*)sqlite3_column_blob(pStmt, 0), nBinSize); + assert( sqlite3_step(pStmt) == SQLITE_DONE ); + rc = SQLITE_OK; +out: + if( pStmt != NULL ){ + sqlite3_finalize(pStmt); + } + return rc; +} + +int vectorIndexGetParameters( + sqlite3 *db, + const char *zIdxName, + VectorIdxParams *pParams +) { + int rc = SQLITE_OK; + + static const char* zSelectSql = "SELECT metadata FROM " VECTOR_INDEX_GLOBAL_META_TABLE " WHERE name = ?"; + static const char* zSelectSqlPekkaLegacy = "SELECT vector_type, block_size, dims, distance_ops FROM libsql_vector_index WHERE name = ?"; + rc = vectorIndexTryGetParametersFromBinFormat(db, zSelectSql, zIdxName, pParams); + if( rc == SQLITE_OK ){ + return SQLITE_OK; + } + rc = vectorIndexTryGetParametersFromTableFormat(db, zSelectSqlPekkaLegacy, zIdxName, pParams); + if( rc == SQLITE_OK ){ + return SQLITE_OK; + } + return SQLITE_ERROR; +} int vectorIndexDrop(sqlite3 *db, const char *zDbSName, const char *zIdxName) { // we want to try delete all traces of index on every attempt // this is done to prevent unrecoverable situations where index were dropped but index parameters deletion failed and second attempt will fail on first step int rcIdx, rcParams; + if( IsVacuum(db) ){ + return SQLITE_OK; + } + assert( zDbSName != NULL ); rcIdx = diskAnnDropIndex(db, zDbSName, zIdxName); @@ -212125,15 +212189,37 @@ int vectorIndexDrop(sqlite3 *db, const char *zDbSName, const char *zIdxName) { int vectorIndexClear(sqlite3 *db, const char *zDbSName, const char *zIdxName) { assert( zDbSName != NULL ); + + if( IsVacuum(db) ){ + return SQLITE_OK; + } + return diskAnnClearIndex(db, zDbSName, zIdxName); } +/* + * vectorIndexCreate analyzes any index creation expression and create vector index if needed + * it tolerates the situation when insert into VECTOR_INDEX_GLOBAL_META_TABLE failed with conflict + * this made intentionally in order to natively support upload of SQLite dumps + * + * dump populates tables first and create indices after + * so we must omit them because shadow tables already filled + * + * 1. if vector index must not be created : 0 returned and pIdx is unchanged + * 2. if vector index must be created and refilled from base table: 0 returned and pIdx->idxType set to SQLITE_IDXTYPE_VECTOR + * 3. if vector index must be created but refill must be skipped : 1 returned and pIdx->idxType set to SQLITE_IDXTYPE_VECTOR + * 4. in case of any error :-1 returned (and pParse errMsg is populated with some error message) +*/ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const IdList *pUsing) { int i, rc = SQLITE_OK; int dims, type; int hasLibsqlVectorIdxFn = 0, hasCollation = 0; const char *pzErrMsg; + if( IsVacuum(pParse->db) ){ + return SQLITE_OK; + } + assert( zDbSName != NULL ); sqlite3 *db = pParse->db; @@ -212149,7 +212235,7 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id if( pParse->eParseMode ){ // scheme can be re-parsed by SQLite for different reasons (for example, to check schema after // ALTER COLUMN statements) - so we must skip creation in such cases - goto ignored; + goto ignore; } // backward compatibility: preserve old indices with deprecated syntax but forbid creation of new indices with this syntax @@ -212159,15 +212245,15 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id } else { sqlite3ErrorMsg(pParse, "USING syntax is deprecated, please use plain CREATE INDEX: CREATE INDEX xxx ON yyy ( " VECTOR_INDEX_MARKER_FUNCTION "(zzz) )"); } - return SQLITE_ERROR; + goto fail; } if( db->init.busy == 1 && pUsing != NULL ){ - goto succeed; + goto ok; } // vector index must have expressions over column if( pIdx->aColExpr == NULL ) { - goto ignored; + goto ignore; } pListItem = pIdx->aColExpr->a; @@ -212182,20 +212268,20 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id } } if( !hasLibsqlVectorIdxFn ) { - goto ignored; + goto ignore; } if( hasCollation ){ sqlite3ErrorMsg(pParse, "vector index can't have collation"); - return SQLITE_ERROR; + goto fail; } if( pIdx->aColExpr->nExpr != 1 ) { sqlite3ErrorMsg(pParse, "vector index must contain exactly one column wrapped into the " VECTOR_INDEX_MARKER_FUNCTION " function"); - return SQLITE_ERROR; + goto fail; } // we are able to support this but I doubt this works for now - more polishing required to make this work if( pIdx->pPartIdxWhere != NULL ) { sqlite3ErrorMsg(pParse, "partial vector index is not supported"); - return SQLITE_ERROR; + goto fail; } pArgsList = pIdx->aColExpr->a[0].pExpr->x.pList; @@ -212203,61 +212289,73 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id if( pArgsList->nExpr < 1 ){ sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " must contain at least one argument"); - return SQLITE_ERROR; + goto fail; } if( pListItem[0].pExpr->op != TK_COLUMN ) { sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " first argument must be a column token"); - return SQLITE_ERROR; + goto fail; } iEmbeddingColumn = pListItem[0].pExpr->iColumn; if( iEmbeddingColumn < 0 ) { sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " first argument must be column with vector type"); - return SQLITE_ERROR; + goto fail; } assert( iEmbeddingColumn >= 0 && iEmbeddingColumn < pTable->nCol ); zEmbeddingColumnTypeName = sqlite3ColumnType(&pTable->aCol[iEmbeddingColumn], ""); if( vectorIdxParseColumnType(zEmbeddingColumnTypeName, &type, &dims, &pzErrMsg) != 0 ){ sqlite3ErrorMsg(pParse, "%s: %s", pzErrMsg, zEmbeddingColumnTypeName); - return SQLITE_ERROR; + goto fail; } // schema is locked while db is initializing and we need to just proceed here if( db->init.busy == 1 ){ - goto succeed; + goto ok; } rc = initVectorIndexMetaTable(db, zDbSName); if( rc != SQLITE_OK ){ - return rc; + sqlite3ErrorMsg(pParse, "failed to init vector index meta table: %s", sqlite3_errmsg(db)); + goto fail; } rc = parseVectorIdxParams(pParse, &idxParams, type, dims, pListItem + 1, pArgsList->nExpr - 1); if( rc != SQLITE_OK ){ - return rc; + sqlite3ErrorMsg(pParse, "failed to parse vector idx params"); + goto fail; } if( vectorIdxKeyGet(pTable, &idxKey, &pzErrMsg) != 0 ){ sqlite3ErrorMsg(pParse, "failed to detect underlying table key: %s", pzErrMsg); - return SQLITE_ERROR; + goto fail; } if( idxKey.nKeyColumns != 1 ){ sqlite3ErrorMsg(pParse, "vector index for tables without ROWID and composite primary key are not supported"); - return SQLITE_ERROR; + goto fail; } rc = diskAnnCreateIndex(db, zDbSName, pIdx->zName, &idxKey, &idxParams); if( rc != SQLITE_OK ){ sqlite3ErrorMsg(pParse, "unable to initialize diskann vector index"); - return rc; + goto fail; } rc = insertIndexParameters(db, zDbSName, pIdx->zName, &idxParams); + if( rc == SQLITE_CONSTRAINT ){ + // we are violating unique constraint here which means that someone inserted parameters in the table before us + // taking aside corruption scenarios, this can be in case of loading dump (because tables are loaded before indices) or vacuum-ing DB + // both these cases are valid and we must proceed with index creating but avoid index-refill step as it is already filled + goto skip_refill; + } if( rc != SQLITE_OK ){ sqlite3ErrorMsg(pParse, "unable to update global metadata table"); - return rc; + goto fail; } -succeed: +ok: pIdx->idxType = SQLITE_IDXTYPE_VECTOR; - return SQLITE_OK; -ignored: - return SQLITE_OK; +ignore: + return 0; +skip_refill: + pIdx->idxType = SQLITE_IDXTYPE_VECTOR; + return 1; +fail: + return -1; } int vectorIndexSearch(sqlite3 *db, const char* zDbSName, int argc, sqlite3_value **argv, VectorOutRows *pRows, char **pzErrMsg) { @@ -212271,6 +212369,7 @@ int vectorIndexSearch(sqlite3 *db, const char* zDbSName, int argc, sqlite3_value VectorIdxParams idxParams; vectorIdxParamsInit(&idxParams, NULL, 0); + assert( !IsVacuum(db) ); assert( zDbSName != NULL ); if( argc != 3 ){ @@ -212353,6 +212452,10 @@ int vectorIndexInsert( int rc; VectorInRow vectorInRow; + if( IsVacuum(pCur->db) ){ + return SQLITE_OK; + } + rc = vectorInRowAlloc(pCur->db, pRecord, &vectorInRow, pzErrMsg); if( rc != SQLITE_OK ){ return rc; @@ -212371,6 +212474,11 @@ int vectorIndexDelete( char **pzErrMsg ){ VectorInRow payload; + + if( IsVacuum(pCur->db) ){ + return SQLITE_OK; + } + payload.pVector = NULL; payload.nKeys = r->nField - 1; payload.pKeyValues = r->aMem + 1; diff --git a/libsql-ffi/bundled/src/sqlite3.c b/libsql-ffi/bundled/src/sqlite3.c index c840cb8032..30302d1411 100644 --- a/libsql-ffi/bundled/src/sqlite3.c +++ b/libsql-ffi/bundled/src/sqlite3.c @@ -85133,11 +85133,11 @@ int vectorIdxParseColumnType(const char *, int *, int *, const char **); int vectorIndexCreate(Parse*, Index*, const char *, const IdList*); int vectorIndexClear(sqlite3 *, const char *, const char *); int vectorIndexDrop(sqlite3 *, const char *, const char *); +int vectorIndexSearch(sqlite3 *, const char *, int, sqlite3_value **, VectorOutRows *, char **); int vectorIndexCursorInit(sqlite3 *, const char *, const char *, VectorIdxCursor **); void vectorIndexCursorClose(sqlite3 *, VectorIdxCursor *); int vectorIndexInsert(VectorIdxCursor *, const UnpackedRecord *, char **); int vectorIndexDelete(VectorIdxCursor *, const UnpackedRecord *, char **); -int vectorIndexSearch(sqlite3 *, const char *, int, sqlite3_value **, VectorOutRows *, char **); #if 0 } /* end of the 'extern "C"' block */ @@ -125791,6 +125791,7 @@ SQLITE_PRIVATE void sqlite3CreateIndex( int nExtraCol; /* Number of extra columns needed */ char *zExtra = 0; /* Extra space after the Index object */ Index *pPk = 0; /* PRIMARY KEY index for WITHOUT ROWID tables */ + int vectorIdxRc = 0, skipRefill = 0; assert( db->pParse==pParse ); if( pParse->nErr ){ @@ -126100,9 +126101,13 @@ SQLITE_PRIVATE void sqlite3CreateIndex( #ifndef SQLITE_OMIT_VECTOR - if( vectorIndexCreate(pParse, pIndex, db->aDb[iDb].zDbSName, pUsing) != SQLITE_OK ) { + vectorIdxRc = vectorIndexCreate(pParse, pIndex, db->aDb[iDb].zDbSName, pUsing); + if( vectorIdxRc < 0 ){ goto exit_create_index; } + if( vectorIdxRc == 1 ){ + skipRefill = 1; + } idxType = pIndex->idxType; // vectorIndexCreate can update idxType to 4 (VECTOR INDEX) #endif @@ -126306,7 +126311,9 @@ SQLITE_PRIVATE void sqlite3CreateIndex( ** to invalidate all pre-compiled statements. */ if( pTblName ){ - sqlite3RefillIndex(pParse, pIndex, iMem); + if( !skipRefill ){ + sqlite3RefillIndex(pParse, pIndex, iMem); + } sqlite3ChangeCookie(pParse, iDb); sqlite3VdbeAddParseSchemaOp(v, iDb, sqlite3MPrintf(db, "name='%q' AND type='index'", pIndex->zName), 0); @@ -211447,10 +211454,30 @@ int vectorF64ParseSqliteBlob( /* #include "sqliteInt.h" */ /* #include "vectorIndexInt.h" */ +/* + * The code which glue SQLite internals with pure DiskANN implementation resides here + * Main internal API methods are: + * vectorIndexCreate() + * vectorIndexClear() + * vectorIndexDrop() + * vectorIndexSearch() + * vectorIndexCursorInit() + * vectorIndexCursorClose() + * + * + cursor operations: + * vectorIndexInsert(cursor) + * vectorIndexDelete(cursor) +*/ + /************************************************************************** ** VectorIdxParams utilities ****************************************************************************/ +// VACUUM creates tables and indices first and only then populate data +// we need to ignore inserts from 'INSERT INTO vacuum.t SELECT * FROM t' statements because +// all shadow tables will be populated by VACUUM process during regular process of table copy +#define IsVacuum(db) ((db->mDbFlags&DBFLAG_Vacuum)!=0) + void vectorIdxParamsInit(VectorIdxParams *pParams, u8 *pBinBuf, int nBinSize) { assert( nBinSize <= VECTOR_INDEX_PARAMS_BUF_SIZE ); @@ -212009,9 +212036,11 @@ int insertIndexParameters(sqlite3* db, const char *zDbSName, const char *zName, goto clear_and_exit; } rc = sqlite3_step(pStatement); - if( rc != SQLITE_DONE ){ + if( rc == SQLITE_CONSTRAINT ){ + rc = SQLITE_CONSTRAINT; + }else if( rc != SQLITE_DONE ){ rc = SQLITE_ERROR; - } else { + }else{ rc = SQLITE_OK; } clear_and_exit: @@ -212050,51 +212079,25 @@ int removeIndexParameters(sqlite3* db, const char *zName) { return rc; } -int vectorIndexGetParameters( - sqlite3 *db, - const char *zIndexName, - VectorIdxParams *pParams -) { +int vectorIndexTryGetParametersFromTableFormat(sqlite3 *db, const char *zSql, const char *zIdxName, VectorIdxParams *pParams) { int rc = SQLITE_OK; sqlite3_stmt *pStmt = NULL; int nBinSize; - static const char* zSelectSql = "SELECT metadata FROM " VECTOR_INDEX_GLOBAL_META_TABLE " WHERE name = ?"; - static const char* zSelectSqlPekkaLegacy = "SELECT vector_type, block_size, dims, distance_ops FROM libsql_vector_index WHERE type = ? AND name = ?"; - rc = sqlite3_prepare_v2(db, zSelectSql, -1, &pStmt, 0); - if( rc == SQLITE_OK ) { - rc = sqlite3_bind_text(pStmt, 1, zIndexName, -1, SQLITE_STATIC); - if( rc != SQLITE_OK ){ - goto out_free; - } + vectorIdxParamsInit(pParams, NULL, 0); - if( sqlite3_step(pStmt) == SQLITE_ROW ){ - assert( sqlite3_column_type(pStmt, 0) == SQLITE_BLOB ); - nBinSize = sqlite3_column_bytes(pStmt, 0); - if( nBinSize > VECTOR_INDEX_PARAMS_BUF_SIZE ){ - rc = SQLITE_ERROR; - goto out_free; - } - vectorIdxParamsInit(pParams, (u8*)sqlite3_column_blob(pStmt, 0), nBinSize); - goto out_free; - } - } - if( pStmt ){ - sqlite3_finalize(pStmt); - pStmt = NULL; + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + if( rc != SQLITE_OK ){ + goto out; } - - rc = sqlite3_prepare_v2(db, zSelectSqlPekkaLegacy, -1, &pStmt, 0); + rc = sqlite3_bind_text(pStmt, 1, zIdxName, -1, SQLITE_STATIC); if( rc != SQLITE_OK ){ - goto out_free; + goto out; } - sqlite3_bind_text(pStmt, 1, "diskann", -1, SQLITE_STATIC); - sqlite3_bind_text(pStmt, 2, zIndexName, -1, SQLITE_STATIC); if( sqlite3_step(pStmt) != SQLITE_ROW ){ rc = SQLITE_ERROR; - goto out_free; + goto out; } - vectorIdxParamsPutU64(pParams, VECTOR_FORMAT_PARAM_ID, 1); vectorIdxParamsPutU64(pParams, VECTOR_INDEX_TYPE_PARAM_ID, VECTOR_INDEX_TYPE_DISKANN); vectorIdxParamsPutU64(pParams, VECTOR_TYPE_PARAM_ID, VECTOR_TYPE_FLOAT32); @@ -212102,20 +212105,81 @@ int vectorIndexGetParameters( vectorIdxParamsPutU64(pParams, VECTOR_METRIC_TYPE_PARAM_ID, VECTOR_METRIC_TYPE_COS); if( vectorIdxParamsPutU64(pParams, VECTOR_BLOCK_SIZE_PARAM_ID, sqlite3_column_int(pStmt, 1)) != 0 ){ rc = SQLITE_ERROR; + goto out; } -out_free: + assert( sqlite3_step(pStmt) == SQLITE_DONE ); + rc = SQLITE_OK; +out: if( pStmt != NULL ){ sqlite3_finalize(pStmt); } return rc; } +int vectorIndexTryGetParametersFromBinFormat(sqlite3 *db, const char *zSql, const char *zIdxName, VectorIdxParams *pParams) { + int rc = SQLITE_OK; + sqlite3_stmt *pStmt = NULL; + int nBinSize; + + vectorIdxParamsInit(pParams, NULL, 0); + + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + if( rc != SQLITE_OK ){ + goto out; + } + rc = sqlite3_bind_text(pStmt, 1, zIdxName, -1, SQLITE_STATIC); + if( rc != SQLITE_OK ){ + goto out; + } + if( sqlite3_step(pStmt) != SQLITE_ROW ){ + rc = SQLITE_ERROR; + goto out; + } + assert( sqlite3_column_type(pStmt, 0) == SQLITE_BLOB ); + nBinSize = sqlite3_column_bytes(pStmt, 0); + if( nBinSize > VECTOR_INDEX_PARAMS_BUF_SIZE ){ + rc = SQLITE_ERROR; + goto out; + } + vectorIdxParamsInit(pParams, (u8*)sqlite3_column_blob(pStmt, 0), nBinSize); + assert( sqlite3_step(pStmt) == SQLITE_DONE ); + rc = SQLITE_OK; +out: + if( pStmt != NULL ){ + sqlite3_finalize(pStmt); + } + return rc; +} + +int vectorIndexGetParameters( + sqlite3 *db, + const char *zIdxName, + VectorIdxParams *pParams +) { + int rc = SQLITE_OK; + + static const char* zSelectSql = "SELECT metadata FROM " VECTOR_INDEX_GLOBAL_META_TABLE " WHERE name = ?"; + static const char* zSelectSqlPekkaLegacy = "SELECT vector_type, block_size, dims, distance_ops FROM libsql_vector_index WHERE name = ?"; + rc = vectorIndexTryGetParametersFromBinFormat(db, zSelectSql, zIdxName, pParams); + if( rc == SQLITE_OK ){ + return SQLITE_OK; + } + rc = vectorIndexTryGetParametersFromTableFormat(db, zSelectSqlPekkaLegacy, zIdxName, pParams); + if( rc == SQLITE_OK ){ + return SQLITE_OK; + } + return SQLITE_ERROR; +} int vectorIndexDrop(sqlite3 *db, const char *zDbSName, const char *zIdxName) { // we want to try delete all traces of index on every attempt // this is done to prevent unrecoverable situations where index were dropped but index parameters deletion failed and second attempt will fail on first step int rcIdx, rcParams; + if( IsVacuum(db) ){ + return SQLITE_OK; + } + assert( zDbSName != NULL ); rcIdx = diskAnnDropIndex(db, zDbSName, zIdxName); @@ -212125,15 +212189,37 @@ int vectorIndexDrop(sqlite3 *db, const char *zDbSName, const char *zIdxName) { int vectorIndexClear(sqlite3 *db, const char *zDbSName, const char *zIdxName) { assert( zDbSName != NULL ); + + if( IsVacuum(db) ){ + return SQLITE_OK; + } + return diskAnnClearIndex(db, zDbSName, zIdxName); } +/* + * vectorIndexCreate analyzes any index creation expression and create vector index if needed + * it tolerates the situation when insert into VECTOR_INDEX_GLOBAL_META_TABLE failed with conflict + * this made intentionally in order to natively support upload of SQLite dumps + * + * dump populates tables first and create indices after + * so we must omit them because shadow tables already filled + * + * 1. if vector index must not be created : 0 returned and pIdx is unchanged + * 2. if vector index must be created and refilled from base table: 0 returned and pIdx->idxType set to SQLITE_IDXTYPE_VECTOR + * 3. if vector index must be created but refill must be skipped : 1 returned and pIdx->idxType set to SQLITE_IDXTYPE_VECTOR + * 4. in case of any error :-1 returned (and pParse errMsg is populated with some error message) +*/ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const IdList *pUsing) { int i, rc = SQLITE_OK; int dims, type; int hasLibsqlVectorIdxFn = 0, hasCollation = 0; const char *pzErrMsg; + if( IsVacuum(pParse->db) ){ + return SQLITE_OK; + } + assert( zDbSName != NULL ); sqlite3 *db = pParse->db; @@ -212149,7 +212235,7 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id if( pParse->eParseMode ){ // scheme can be re-parsed by SQLite for different reasons (for example, to check schema after // ALTER COLUMN statements) - so we must skip creation in such cases - goto ignored; + goto ignore; } // backward compatibility: preserve old indices with deprecated syntax but forbid creation of new indices with this syntax @@ -212159,15 +212245,15 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id } else { sqlite3ErrorMsg(pParse, "USING syntax is deprecated, please use plain CREATE INDEX: CREATE INDEX xxx ON yyy ( " VECTOR_INDEX_MARKER_FUNCTION "(zzz) )"); } - return SQLITE_ERROR; + goto fail; } if( db->init.busy == 1 && pUsing != NULL ){ - goto succeed; + goto ok; } // vector index must have expressions over column if( pIdx->aColExpr == NULL ) { - goto ignored; + goto ignore; } pListItem = pIdx->aColExpr->a; @@ -212182,20 +212268,20 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id } } if( !hasLibsqlVectorIdxFn ) { - goto ignored; + goto ignore; } if( hasCollation ){ sqlite3ErrorMsg(pParse, "vector index can't have collation"); - return SQLITE_ERROR; + goto fail; } if( pIdx->aColExpr->nExpr != 1 ) { sqlite3ErrorMsg(pParse, "vector index must contain exactly one column wrapped into the " VECTOR_INDEX_MARKER_FUNCTION " function"); - return SQLITE_ERROR; + goto fail; } // we are able to support this but I doubt this works for now - more polishing required to make this work if( pIdx->pPartIdxWhere != NULL ) { sqlite3ErrorMsg(pParse, "partial vector index is not supported"); - return SQLITE_ERROR; + goto fail; } pArgsList = pIdx->aColExpr->a[0].pExpr->x.pList; @@ -212203,61 +212289,73 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id if( pArgsList->nExpr < 1 ){ sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " must contain at least one argument"); - return SQLITE_ERROR; + goto fail; } if( pListItem[0].pExpr->op != TK_COLUMN ) { sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " first argument must be a column token"); - return SQLITE_ERROR; + goto fail; } iEmbeddingColumn = pListItem[0].pExpr->iColumn; if( iEmbeddingColumn < 0 ) { sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " first argument must be column with vector type"); - return SQLITE_ERROR; + goto fail; } assert( iEmbeddingColumn >= 0 && iEmbeddingColumn < pTable->nCol ); zEmbeddingColumnTypeName = sqlite3ColumnType(&pTable->aCol[iEmbeddingColumn], ""); if( vectorIdxParseColumnType(zEmbeddingColumnTypeName, &type, &dims, &pzErrMsg) != 0 ){ sqlite3ErrorMsg(pParse, "%s: %s", pzErrMsg, zEmbeddingColumnTypeName); - return SQLITE_ERROR; + goto fail; } // schema is locked while db is initializing and we need to just proceed here if( db->init.busy == 1 ){ - goto succeed; + goto ok; } rc = initVectorIndexMetaTable(db, zDbSName); if( rc != SQLITE_OK ){ - return rc; + sqlite3ErrorMsg(pParse, "failed to init vector index meta table: %s", sqlite3_errmsg(db)); + goto fail; } rc = parseVectorIdxParams(pParse, &idxParams, type, dims, pListItem + 1, pArgsList->nExpr - 1); if( rc != SQLITE_OK ){ - return rc; + sqlite3ErrorMsg(pParse, "failed to parse vector idx params"); + goto fail; } if( vectorIdxKeyGet(pTable, &idxKey, &pzErrMsg) != 0 ){ sqlite3ErrorMsg(pParse, "failed to detect underlying table key: %s", pzErrMsg); - return SQLITE_ERROR; + goto fail; } if( idxKey.nKeyColumns != 1 ){ sqlite3ErrorMsg(pParse, "vector index for tables without ROWID and composite primary key are not supported"); - return SQLITE_ERROR; + goto fail; } rc = diskAnnCreateIndex(db, zDbSName, pIdx->zName, &idxKey, &idxParams); if( rc != SQLITE_OK ){ sqlite3ErrorMsg(pParse, "unable to initialize diskann vector index"); - return rc; + goto fail; } rc = insertIndexParameters(db, zDbSName, pIdx->zName, &idxParams); + if( rc == SQLITE_CONSTRAINT ){ + // we are violating unique constraint here which means that someone inserted parameters in the table before us + // taking aside corruption scenarios, this can be in case of loading dump (because tables are loaded before indices) or vacuum-ing DB + // both these cases are valid and we must proceed with index creating but avoid index-refill step as it is already filled + goto skip_refill; + } if( rc != SQLITE_OK ){ sqlite3ErrorMsg(pParse, "unable to update global metadata table"); - return rc; + goto fail; } -succeed: +ok: pIdx->idxType = SQLITE_IDXTYPE_VECTOR; - return SQLITE_OK; -ignored: - return SQLITE_OK; +ignore: + return 0; +skip_refill: + pIdx->idxType = SQLITE_IDXTYPE_VECTOR; + return 1; +fail: + return -1; } int vectorIndexSearch(sqlite3 *db, const char* zDbSName, int argc, sqlite3_value **argv, VectorOutRows *pRows, char **pzErrMsg) { @@ -212271,6 +212369,7 @@ int vectorIndexSearch(sqlite3 *db, const char* zDbSName, int argc, sqlite3_value VectorIdxParams idxParams; vectorIdxParamsInit(&idxParams, NULL, 0); + assert( !IsVacuum(db) ); assert( zDbSName != NULL ); if( argc != 3 ){ @@ -212353,6 +212452,10 @@ int vectorIndexInsert( int rc; VectorInRow vectorInRow; + if( IsVacuum(pCur->db) ){ + return SQLITE_OK; + } + rc = vectorInRowAlloc(pCur->db, pRecord, &vectorInRow, pzErrMsg); if( rc != SQLITE_OK ){ return rc; @@ -212371,6 +212474,11 @@ int vectorIndexDelete( char **pzErrMsg ){ VectorInRow payload; + + if( IsVacuum(pCur->db) ){ + return SQLITE_OK; + } + payload.pVector = NULL; payload.nKeys = r->nField - 1; payload.pKeyValues = r->aMem + 1; From 6cf9853e109af828d4c5cc942ae06edd2bb403c6 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Wed, 24 Jul 2024 16:19:48 +0400 Subject: [PATCH 4/6] slightly improve test --- libsql-sqlite3/test/libsql_vector_index.test | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libsql-sqlite3/test/libsql_vector_index.test b/libsql-sqlite3/test/libsql_vector_index.test index f46e2a0799..01498121d9 100644 --- a/libsql-sqlite3/test/libsql_vector_index.test +++ b/libsql-sqlite3/test/libsql_vector_index.test @@ -225,8 +225,10 @@ do_execsql_test vector-vacuum { CREATE TABLE t_vacuum ( emb FLOAT32(2) ); INSERT INTO t_vacuum VALUES (vector('[1,2]')), (vector('[3,4]')); CREATE INDEX t_vacuum_idx ON t_vacuum(libsql_vector_idx(emb)); - VACUUM INTO ':memory:'; -} {} + VACUUM; + SELECT COUNT(*) FROM t_vacuum; + SELECT COUNT(*) FROM t_vacuum_idx_shadow; +} {2 2} proc error_messages {sql} { set ret "" From d665e15b38ce03d2b464d24874e29813bcdaa6ff Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Wed, 24 Jul 2024 18:09:30 +0400 Subject: [PATCH 5/6] review fixes --- libsql-sqlite3/src/build.c | 5 +- libsql-sqlite3/src/vectorIndex.c | 74 ++++++++++++++--------------- libsql-sqlite3/src/vectorIndexInt.h | 2 +- 3 files changed, 42 insertions(+), 39 deletions(-) diff --git a/libsql-sqlite3/src/build.c b/libsql-sqlite3/src/build.c index 86767ed1c2..245503127f 100644 --- a/libsql-sqlite3/src/build.c +++ b/libsql-sqlite3/src/build.c @@ -4314,10 +4314,13 @@ void sqlite3CreateIndex( if( vectorIdxRc < 0 ){ goto exit_create_index; } + if( vectorIdxRc >= 1 ){ + idxType = SQLITE_IDXTYPE_VECTOR; + pIndex->idxType = idxType; + } if( vectorIdxRc == 1 ){ skipRefill = 1; } - idxType = pIndex->idxType; // vectorIndexCreate can update idxType to 4 (VECTOR INDEX) #endif /* Append the table key to the end of the index. For WITHOUT ROWID diff --git a/libsql-sqlite3/src/vectorIndex.c b/libsql-sqlite3/src/vectorIndex.c index be451d6c63..67bfbb916b 100644 --- a/libsql-sqlite3/src/vectorIndex.c +++ b/libsql-sqlite3/src/vectorIndex.c @@ -735,6 +735,9 @@ int vectorIndexGetParameters( int rc = SQLITE_OK; static const char* zSelectSql = "SELECT metadata FROM " VECTOR_INDEX_GLOBAL_META_TABLE " WHERE name = ?"; + // zSelectSqlPekkaLegacy handles the case when user created DB before 04 July 2024 (https://discord.com/channels/933071162680958986/1225560924526477322/1258367912402489397) + // when instead of table with binary parameters rigid schema was used for index settings + // we should drop this eventually - but for now we postponed this decision static const char* zSelectSqlPekkaLegacy = "SELECT vector_type, block_size, dims, distance_ops FROM libsql_vector_index WHERE name = ?"; rc = vectorIndexTryGetParametersFromBinFormat(db, zSelectSql, zIdxName, pParams); if( rc == SQLITE_OK ){ @@ -781,19 +784,24 @@ int vectorIndexClear(sqlite3 *db, const char *zDbSName, const char *zIdxName) { * dump populates tables first and create indices after * so we must omit them because shadow tables already filled * - * 1. if vector index must not be created : 0 returned and pIdx is unchanged - * 2. if vector index must be created and refilled from base table: 0 returned and pIdx->idxType set to SQLITE_IDXTYPE_VECTOR - * 3. if vector index must be created but refill must be skipped : 1 returned and pIdx->idxType set to SQLITE_IDXTYPE_VECTOR - * 4. in case of any error :-1 returned (and pParse errMsg is populated with some error message) + * 1. in case of any error :-1 returned (and pParse errMsg is populated with some error message) + * 2. if vector index must not be created : 0 returned + * 3. if vector index must be created but refill must be skipped : 1 returned + * 4. if vector index must be created and refilled from base table: 2 returned */ -int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const IdList *pUsing) { +int vectorIndexCreate(Parse *pParse, const Index *pIdx, const char *zDbSName, const IdList *pUsing) { + static const int CREATE_FAIL = -1; + static const int CREATE_IGNORE = 0; + static const int CREATE_OK_SKIP_REFILL = 1; + static const int CREATE_OK = 2; + int i, rc = SQLITE_OK; int dims, type; int hasLibsqlVectorIdxFn = 0, hasCollation = 0; const char *pzErrMsg; if( IsVacuum(pParse->db) ){ - return SQLITE_OK; + return CREATE_IGNORE; } assert( zDbSName != NULL ); @@ -811,7 +819,7 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id if( pParse->eParseMode ){ // scheme can be re-parsed by SQLite for different reasons (for example, to check schema after // ALTER COLUMN statements) - so we must skip creation in such cases - goto ignore; + return CREATE_IGNORE; } // backward compatibility: preserve old indices with deprecated syntax but forbid creation of new indices with this syntax @@ -821,15 +829,15 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id } else { sqlite3ErrorMsg(pParse, "USING syntax is deprecated, please use plain CREATE INDEX: CREATE INDEX xxx ON yyy ( " VECTOR_INDEX_MARKER_FUNCTION "(zzz) )"); } - goto fail; + return CREATE_FAIL; } if( db->init.busy == 1 && pUsing != NULL ){ - goto ok; + return CREATE_OK; } // vector index must have expressions over column if( pIdx->aColExpr == NULL ) { - goto ignore; + return CREATE_IGNORE; } pListItem = pIdx->aColExpr->a; @@ -844,20 +852,20 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id } } if( !hasLibsqlVectorIdxFn ) { - goto ignore; + return CREATE_IGNORE; } if( hasCollation ){ sqlite3ErrorMsg(pParse, "vector index can't have collation"); - goto fail; + return CREATE_FAIL; } if( pIdx->aColExpr->nExpr != 1 ) { sqlite3ErrorMsg(pParse, "vector index must contain exactly one column wrapped into the " VECTOR_INDEX_MARKER_FUNCTION " function"); - goto fail; + return CREATE_FAIL; } // we are able to support this but I doubt this works for now - more polishing required to make this work if( pIdx->pPartIdxWhere != NULL ) { sqlite3ErrorMsg(pParse, "partial vector index is not supported"); - goto fail; + return CREATE_FAIL; } pArgsList = pIdx->aColExpr->a[0].pExpr->x.pList; @@ -865,73 +873,65 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id if( pArgsList->nExpr < 1 ){ sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " must contain at least one argument"); - goto fail; + return CREATE_FAIL; } if( pListItem[0].pExpr->op != TK_COLUMN ) { sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " first argument must be a column token"); - goto fail; + return CREATE_FAIL; } iEmbeddingColumn = pListItem[0].pExpr->iColumn; if( iEmbeddingColumn < 0 ) { sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " first argument must be column with vector type"); - goto fail; + return CREATE_FAIL; } assert( iEmbeddingColumn >= 0 && iEmbeddingColumn < pTable->nCol ); zEmbeddingColumnTypeName = sqlite3ColumnType(&pTable->aCol[iEmbeddingColumn], ""); if( vectorIdxParseColumnType(zEmbeddingColumnTypeName, &type, &dims, &pzErrMsg) != 0 ){ sqlite3ErrorMsg(pParse, "%s: %s", pzErrMsg, zEmbeddingColumnTypeName); - goto fail; + return CREATE_FAIL; } // schema is locked while db is initializing and we need to just proceed here if( db->init.busy == 1 ){ - goto ok; + return CREATE_OK; } rc = initVectorIndexMetaTable(db, zDbSName); if( rc != SQLITE_OK ){ sqlite3ErrorMsg(pParse, "failed to init vector index meta table: %s", sqlite3_errmsg(db)); - goto fail; + return CREATE_FAIL; } rc = parseVectorIdxParams(pParse, &idxParams, type, dims, pListItem + 1, pArgsList->nExpr - 1); if( rc != SQLITE_OK ){ sqlite3ErrorMsg(pParse, "failed to parse vector idx params"); - goto fail; + return CREATE_FAIL; } if( vectorIdxKeyGet(pTable, &idxKey, &pzErrMsg) != 0 ){ sqlite3ErrorMsg(pParse, "failed to detect underlying table key: %s", pzErrMsg); - goto fail; + return CREATE_FAIL; } if( idxKey.nKeyColumns != 1 ){ sqlite3ErrorMsg(pParse, "vector index for tables without ROWID and composite primary key are not supported"); - goto fail; + return CREATE_FAIL; } rc = diskAnnCreateIndex(db, zDbSName, pIdx->zName, &idxKey, &idxParams); if( rc != SQLITE_OK ){ sqlite3ErrorMsg(pParse, "unable to initialize diskann vector index"); - goto fail; + return CREATE_FAIL; } rc = insertIndexParameters(db, zDbSName, pIdx->zName, &idxParams); if( rc == SQLITE_CONSTRAINT ){ // we are violating unique constraint here which means that someone inserted parameters in the table before us - // taking aside corruption scenarios, this can be in case of loading dump (because tables are loaded before indices) or vacuum-ing DB - // both these cases are valid and we must proceed with index creating but avoid index-refill step as it is already filled - goto skip_refill; + // taking aside corruption scenarios, this can be in case of loading dump (because tables and data are loaded before indices) + // this case is valid and we must proceed with index creating but avoid index-refill step as it is already filled + return CREATE_OK_SKIP_REFILL; } if( rc != SQLITE_OK ){ sqlite3ErrorMsg(pParse, "unable to update global metadata table"); - goto fail; + return CREATE_FAIL; } -ok: - pIdx->idxType = SQLITE_IDXTYPE_VECTOR; -ignore: - return 0; -skip_refill: - pIdx->idxType = SQLITE_IDXTYPE_VECTOR; - return 1; -fail: - return -1; + return CREATE_OK; } int vectorIndexSearch(sqlite3 *db, const char* zDbSName, int argc, sqlite3_value **argv, VectorOutRows *pRows, char **pzErrMsg) { diff --git a/libsql-sqlite3/src/vectorIndexInt.h b/libsql-sqlite3/src/vectorIndexInt.h index 2ee4b7b7eb..a2c1ccfb17 100644 --- a/libsql-sqlite3/src/vectorIndexInt.h +++ b/libsql-sqlite3/src/vectorIndexInt.h @@ -224,7 +224,7 @@ typedef struct VectorIdxCursor VectorIdxCursor; int vectorIdxParseColumnType(const char *, int *, int *, const char **); -int vectorIndexCreate(Parse*, Index*, const char *, const IdList*); +int vectorIndexCreate(Parse*, const Index*, const char *, const IdList*); int vectorIndexClear(sqlite3 *, const char *, const char *); int vectorIndexDrop(sqlite3 *, const char *, const char *); int vectorIndexSearch(sqlite3 *, const char *, int, sqlite3_value **, VectorOutRows *, char **); From c599602bcef44c6e68b53e302e5135411f7b0edc Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Thu, 25 Jul 2024 10:54:30 +0400 Subject: [PATCH 6/6] build bundles --- .../SQLite3MultipleCiphers/src/sqlite3.c | 81 ++++++++++--------- libsql-ffi/bundled/src/sqlite3.c | 81 ++++++++++--------- 2 files changed, 84 insertions(+), 78 deletions(-) diff --git a/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c b/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c index 30302d1411..362008e23f 100644 --- a/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c +++ b/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c @@ -85130,7 +85130,7 @@ typedef struct VectorIdxCursor VectorIdxCursor; int vectorIdxParseColumnType(const char *, int *, int *, const char **); -int vectorIndexCreate(Parse*, Index*, const char *, const IdList*); +int vectorIndexCreate(Parse*, const Index*, const char *, const IdList*); int vectorIndexClear(sqlite3 *, const char *, const char *); int vectorIndexDrop(sqlite3 *, const char *, const char *); int vectorIndexSearch(sqlite3 *, const char *, int, sqlite3_value **, VectorOutRows *, char **); @@ -126105,10 +126105,13 @@ SQLITE_PRIVATE void sqlite3CreateIndex( if( vectorIdxRc < 0 ){ goto exit_create_index; } + if( vectorIdxRc >= 1 ){ + idxType = SQLITE_IDXTYPE_VECTOR; + pIndex->idxType = idxType; + } if( vectorIdxRc == 1 ){ skipRefill = 1; } - idxType = pIndex->idxType; // vectorIndexCreate can update idxType to 4 (VECTOR INDEX) #endif /* Append the table key to the end of the index. For WITHOUT ROWID @@ -212159,6 +212162,9 @@ int vectorIndexGetParameters( int rc = SQLITE_OK; static const char* zSelectSql = "SELECT metadata FROM " VECTOR_INDEX_GLOBAL_META_TABLE " WHERE name = ?"; + // zSelectSqlPekkaLegacy handles the case when user created DB before 04 July 2024 (https://discord.com/channels/933071162680958986/1225560924526477322/1258367912402489397) + // when instead of table with binary parameters rigid schema was used for index settings + // we should drop this eventually - but for now we postponed this decision static const char* zSelectSqlPekkaLegacy = "SELECT vector_type, block_size, dims, distance_ops FROM libsql_vector_index WHERE name = ?"; rc = vectorIndexTryGetParametersFromBinFormat(db, zSelectSql, zIdxName, pParams); if( rc == SQLITE_OK ){ @@ -212205,19 +212211,24 @@ int vectorIndexClear(sqlite3 *db, const char *zDbSName, const char *zIdxName) { * dump populates tables first and create indices after * so we must omit them because shadow tables already filled * - * 1. if vector index must not be created : 0 returned and pIdx is unchanged - * 2. if vector index must be created and refilled from base table: 0 returned and pIdx->idxType set to SQLITE_IDXTYPE_VECTOR - * 3. if vector index must be created but refill must be skipped : 1 returned and pIdx->idxType set to SQLITE_IDXTYPE_VECTOR - * 4. in case of any error :-1 returned (and pParse errMsg is populated with some error message) + * 1. in case of any error :-1 returned (and pParse errMsg is populated with some error message) + * 2. if vector index must not be created : 0 returned + * 3. if vector index must be created but refill must be skipped : 1 returned + * 4. if vector index must be created and refilled from base table: 2 returned */ -int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const IdList *pUsing) { +int vectorIndexCreate(Parse *pParse, const Index *pIdx, const char *zDbSName, const IdList *pUsing) { + static const int CREATE_FAIL = -1; + static const int CREATE_IGNORE = 0; + static const int CREATE_OK_SKIP_REFILL = 1; + static const int CREATE_OK = 2; + int i, rc = SQLITE_OK; int dims, type; int hasLibsqlVectorIdxFn = 0, hasCollation = 0; const char *pzErrMsg; if( IsVacuum(pParse->db) ){ - return SQLITE_OK; + return CREATE_IGNORE; } assert( zDbSName != NULL ); @@ -212235,7 +212246,7 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id if( pParse->eParseMode ){ // scheme can be re-parsed by SQLite for different reasons (for example, to check schema after // ALTER COLUMN statements) - so we must skip creation in such cases - goto ignore; + return CREATE_IGNORE; } // backward compatibility: preserve old indices with deprecated syntax but forbid creation of new indices with this syntax @@ -212245,15 +212256,15 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id } else { sqlite3ErrorMsg(pParse, "USING syntax is deprecated, please use plain CREATE INDEX: CREATE INDEX xxx ON yyy ( " VECTOR_INDEX_MARKER_FUNCTION "(zzz) )"); } - goto fail; + return CREATE_FAIL; } if( db->init.busy == 1 && pUsing != NULL ){ - goto ok; + return CREATE_OK; } // vector index must have expressions over column if( pIdx->aColExpr == NULL ) { - goto ignore; + return CREATE_IGNORE; } pListItem = pIdx->aColExpr->a; @@ -212268,20 +212279,20 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id } } if( !hasLibsqlVectorIdxFn ) { - goto ignore; + return CREATE_IGNORE; } if( hasCollation ){ sqlite3ErrorMsg(pParse, "vector index can't have collation"); - goto fail; + return CREATE_FAIL; } if( pIdx->aColExpr->nExpr != 1 ) { sqlite3ErrorMsg(pParse, "vector index must contain exactly one column wrapped into the " VECTOR_INDEX_MARKER_FUNCTION " function"); - goto fail; + return CREATE_FAIL; } // we are able to support this but I doubt this works for now - more polishing required to make this work if( pIdx->pPartIdxWhere != NULL ) { sqlite3ErrorMsg(pParse, "partial vector index is not supported"); - goto fail; + return CREATE_FAIL; } pArgsList = pIdx->aColExpr->a[0].pExpr->x.pList; @@ -212289,73 +212300,65 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id if( pArgsList->nExpr < 1 ){ sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " must contain at least one argument"); - goto fail; + return CREATE_FAIL; } if( pListItem[0].pExpr->op != TK_COLUMN ) { sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " first argument must be a column token"); - goto fail; + return CREATE_FAIL; } iEmbeddingColumn = pListItem[0].pExpr->iColumn; if( iEmbeddingColumn < 0 ) { sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " first argument must be column with vector type"); - goto fail; + return CREATE_FAIL; } assert( iEmbeddingColumn >= 0 && iEmbeddingColumn < pTable->nCol ); zEmbeddingColumnTypeName = sqlite3ColumnType(&pTable->aCol[iEmbeddingColumn], ""); if( vectorIdxParseColumnType(zEmbeddingColumnTypeName, &type, &dims, &pzErrMsg) != 0 ){ sqlite3ErrorMsg(pParse, "%s: %s", pzErrMsg, zEmbeddingColumnTypeName); - goto fail; + return CREATE_FAIL; } // schema is locked while db is initializing and we need to just proceed here if( db->init.busy == 1 ){ - goto ok; + return CREATE_OK; } rc = initVectorIndexMetaTable(db, zDbSName); if( rc != SQLITE_OK ){ sqlite3ErrorMsg(pParse, "failed to init vector index meta table: %s", sqlite3_errmsg(db)); - goto fail; + return CREATE_FAIL; } rc = parseVectorIdxParams(pParse, &idxParams, type, dims, pListItem + 1, pArgsList->nExpr - 1); if( rc != SQLITE_OK ){ sqlite3ErrorMsg(pParse, "failed to parse vector idx params"); - goto fail; + return CREATE_FAIL; } if( vectorIdxKeyGet(pTable, &idxKey, &pzErrMsg) != 0 ){ sqlite3ErrorMsg(pParse, "failed to detect underlying table key: %s", pzErrMsg); - goto fail; + return CREATE_FAIL; } if( idxKey.nKeyColumns != 1 ){ sqlite3ErrorMsg(pParse, "vector index for tables without ROWID and composite primary key are not supported"); - goto fail; + return CREATE_FAIL; } rc = diskAnnCreateIndex(db, zDbSName, pIdx->zName, &idxKey, &idxParams); if( rc != SQLITE_OK ){ sqlite3ErrorMsg(pParse, "unable to initialize diskann vector index"); - goto fail; + return CREATE_FAIL; } rc = insertIndexParameters(db, zDbSName, pIdx->zName, &idxParams); if( rc == SQLITE_CONSTRAINT ){ // we are violating unique constraint here which means that someone inserted parameters in the table before us - // taking aside corruption scenarios, this can be in case of loading dump (because tables are loaded before indices) or vacuum-ing DB - // both these cases are valid and we must proceed with index creating but avoid index-refill step as it is already filled - goto skip_refill; + // taking aside corruption scenarios, this can be in case of loading dump (because tables and data are loaded before indices) + // this case is valid and we must proceed with index creating but avoid index-refill step as it is already filled + return CREATE_OK_SKIP_REFILL; } if( rc != SQLITE_OK ){ sqlite3ErrorMsg(pParse, "unable to update global metadata table"); - goto fail; + return CREATE_FAIL; } -ok: - pIdx->idxType = SQLITE_IDXTYPE_VECTOR; -ignore: - return 0; -skip_refill: - pIdx->idxType = SQLITE_IDXTYPE_VECTOR; - return 1; -fail: - return -1; + return CREATE_OK; } int vectorIndexSearch(sqlite3 *db, const char* zDbSName, int argc, sqlite3_value **argv, VectorOutRows *pRows, char **pzErrMsg) { diff --git a/libsql-ffi/bundled/src/sqlite3.c b/libsql-ffi/bundled/src/sqlite3.c index 30302d1411..362008e23f 100644 --- a/libsql-ffi/bundled/src/sqlite3.c +++ b/libsql-ffi/bundled/src/sqlite3.c @@ -85130,7 +85130,7 @@ typedef struct VectorIdxCursor VectorIdxCursor; int vectorIdxParseColumnType(const char *, int *, int *, const char **); -int vectorIndexCreate(Parse*, Index*, const char *, const IdList*); +int vectorIndexCreate(Parse*, const Index*, const char *, const IdList*); int vectorIndexClear(sqlite3 *, const char *, const char *); int vectorIndexDrop(sqlite3 *, const char *, const char *); int vectorIndexSearch(sqlite3 *, const char *, int, sqlite3_value **, VectorOutRows *, char **); @@ -126105,10 +126105,13 @@ SQLITE_PRIVATE void sqlite3CreateIndex( if( vectorIdxRc < 0 ){ goto exit_create_index; } + if( vectorIdxRc >= 1 ){ + idxType = SQLITE_IDXTYPE_VECTOR; + pIndex->idxType = idxType; + } if( vectorIdxRc == 1 ){ skipRefill = 1; } - idxType = pIndex->idxType; // vectorIndexCreate can update idxType to 4 (VECTOR INDEX) #endif /* Append the table key to the end of the index. For WITHOUT ROWID @@ -212159,6 +212162,9 @@ int vectorIndexGetParameters( int rc = SQLITE_OK; static const char* zSelectSql = "SELECT metadata FROM " VECTOR_INDEX_GLOBAL_META_TABLE " WHERE name = ?"; + // zSelectSqlPekkaLegacy handles the case when user created DB before 04 July 2024 (https://discord.com/channels/933071162680958986/1225560924526477322/1258367912402489397) + // when instead of table with binary parameters rigid schema was used for index settings + // we should drop this eventually - but for now we postponed this decision static const char* zSelectSqlPekkaLegacy = "SELECT vector_type, block_size, dims, distance_ops FROM libsql_vector_index WHERE name = ?"; rc = vectorIndexTryGetParametersFromBinFormat(db, zSelectSql, zIdxName, pParams); if( rc == SQLITE_OK ){ @@ -212205,19 +212211,24 @@ int vectorIndexClear(sqlite3 *db, const char *zDbSName, const char *zIdxName) { * dump populates tables first and create indices after * so we must omit them because shadow tables already filled * - * 1. if vector index must not be created : 0 returned and pIdx is unchanged - * 2. if vector index must be created and refilled from base table: 0 returned and pIdx->idxType set to SQLITE_IDXTYPE_VECTOR - * 3. if vector index must be created but refill must be skipped : 1 returned and pIdx->idxType set to SQLITE_IDXTYPE_VECTOR - * 4. in case of any error :-1 returned (and pParse errMsg is populated with some error message) + * 1. in case of any error :-1 returned (and pParse errMsg is populated with some error message) + * 2. if vector index must not be created : 0 returned + * 3. if vector index must be created but refill must be skipped : 1 returned + * 4. if vector index must be created and refilled from base table: 2 returned */ -int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const IdList *pUsing) { +int vectorIndexCreate(Parse *pParse, const Index *pIdx, const char *zDbSName, const IdList *pUsing) { + static const int CREATE_FAIL = -1; + static const int CREATE_IGNORE = 0; + static const int CREATE_OK_SKIP_REFILL = 1; + static const int CREATE_OK = 2; + int i, rc = SQLITE_OK; int dims, type; int hasLibsqlVectorIdxFn = 0, hasCollation = 0; const char *pzErrMsg; if( IsVacuum(pParse->db) ){ - return SQLITE_OK; + return CREATE_IGNORE; } assert( zDbSName != NULL ); @@ -212235,7 +212246,7 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id if( pParse->eParseMode ){ // scheme can be re-parsed by SQLite for different reasons (for example, to check schema after // ALTER COLUMN statements) - so we must skip creation in such cases - goto ignore; + return CREATE_IGNORE; } // backward compatibility: preserve old indices with deprecated syntax but forbid creation of new indices with this syntax @@ -212245,15 +212256,15 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id } else { sqlite3ErrorMsg(pParse, "USING syntax is deprecated, please use plain CREATE INDEX: CREATE INDEX xxx ON yyy ( " VECTOR_INDEX_MARKER_FUNCTION "(zzz) )"); } - goto fail; + return CREATE_FAIL; } if( db->init.busy == 1 && pUsing != NULL ){ - goto ok; + return CREATE_OK; } // vector index must have expressions over column if( pIdx->aColExpr == NULL ) { - goto ignore; + return CREATE_IGNORE; } pListItem = pIdx->aColExpr->a; @@ -212268,20 +212279,20 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id } } if( !hasLibsqlVectorIdxFn ) { - goto ignore; + return CREATE_IGNORE; } if( hasCollation ){ sqlite3ErrorMsg(pParse, "vector index can't have collation"); - goto fail; + return CREATE_FAIL; } if( pIdx->aColExpr->nExpr != 1 ) { sqlite3ErrorMsg(pParse, "vector index must contain exactly one column wrapped into the " VECTOR_INDEX_MARKER_FUNCTION " function"); - goto fail; + return CREATE_FAIL; } // we are able to support this but I doubt this works for now - more polishing required to make this work if( pIdx->pPartIdxWhere != NULL ) { sqlite3ErrorMsg(pParse, "partial vector index is not supported"); - goto fail; + return CREATE_FAIL; } pArgsList = pIdx->aColExpr->a[0].pExpr->x.pList; @@ -212289,73 +212300,65 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id if( pArgsList->nExpr < 1 ){ sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " must contain at least one argument"); - goto fail; + return CREATE_FAIL; } if( pListItem[0].pExpr->op != TK_COLUMN ) { sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " first argument must be a column token"); - goto fail; + return CREATE_FAIL; } iEmbeddingColumn = pListItem[0].pExpr->iColumn; if( iEmbeddingColumn < 0 ) { sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " first argument must be column with vector type"); - goto fail; + return CREATE_FAIL; } assert( iEmbeddingColumn >= 0 && iEmbeddingColumn < pTable->nCol ); zEmbeddingColumnTypeName = sqlite3ColumnType(&pTable->aCol[iEmbeddingColumn], ""); if( vectorIdxParseColumnType(zEmbeddingColumnTypeName, &type, &dims, &pzErrMsg) != 0 ){ sqlite3ErrorMsg(pParse, "%s: %s", pzErrMsg, zEmbeddingColumnTypeName); - goto fail; + return CREATE_FAIL; } // schema is locked while db is initializing and we need to just proceed here if( db->init.busy == 1 ){ - goto ok; + return CREATE_OK; } rc = initVectorIndexMetaTable(db, zDbSName); if( rc != SQLITE_OK ){ sqlite3ErrorMsg(pParse, "failed to init vector index meta table: %s", sqlite3_errmsg(db)); - goto fail; + return CREATE_FAIL; } rc = parseVectorIdxParams(pParse, &idxParams, type, dims, pListItem + 1, pArgsList->nExpr - 1); if( rc != SQLITE_OK ){ sqlite3ErrorMsg(pParse, "failed to parse vector idx params"); - goto fail; + return CREATE_FAIL; } if( vectorIdxKeyGet(pTable, &idxKey, &pzErrMsg) != 0 ){ sqlite3ErrorMsg(pParse, "failed to detect underlying table key: %s", pzErrMsg); - goto fail; + return CREATE_FAIL; } if( idxKey.nKeyColumns != 1 ){ sqlite3ErrorMsg(pParse, "vector index for tables without ROWID and composite primary key are not supported"); - goto fail; + return CREATE_FAIL; } rc = diskAnnCreateIndex(db, zDbSName, pIdx->zName, &idxKey, &idxParams); if( rc != SQLITE_OK ){ sqlite3ErrorMsg(pParse, "unable to initialize diskann vector index"); - goto fail; + return CREATE_FAIL; } rc = insertIndexParameters(db, zDbSName, pIdx->zName, &idxParams); if( rc == SQLITE_CONSTRAINT ){ // we are violating unique constraint here which means that someone inserted parameters in the table before us - // taking aside corruption scenarios, this can be in case of loading dump (because tables are loaded before indices) or vacuum-ing DB - // both these cases are valid and we must proceed with index creating but avoid index-refill step as it is already filled - goto skip_refill; + // taking aside corruption scenarios, this can be in case of loading dump (because tables and data are loaded before indices) + // this case is valid and we must proceed with index creating but avoid index-refill step as it is already filled + return CREATE_OK_SKIP_REFILL; } if( rc != SQLITE_OK ){ sqlite3ErrorMsg(pParse, "unable to update global metadata table"); - goto fail; + return CREATE_FAIL; } -ok: - pIdx->idxType = SQLITE_IDXTYPE_VECTOR; -ignore: - return 0; -skip_refill: - pIdx->idxType = SQLITE_IDXTYPE_VECTOR; - return 1; -fail: - return -1; + return CREATE_OK; } int vectorIndexSearch(sqlite3 *db, const char* zDbSName, int argc, sqlite3_value **argv, VectorOutRows *pRows, char **pzErrMsg) {