diff --git a/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c b/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c index c840cb8032..362008e23f 100644 --- a/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c +++ b/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c @@ -85130,14 +85130,14 @@ typedef struct VectorIdxCursor VectorIdxCursor; int vectorIdxParseColumnType(const char *, int *, int *, const char **); -int vectorIndexCreate(Parse*, Index*, const char *, const IdList*); +int vectorIndexCreate(Parse*, const Index*, const char *, const IdList*); int vectorIndexClear(sqlite3 *, const char *, const char *); int vectorIndexDrop(sqlite3 *, const char *, const char *); +int vectorIndexSearch(sqlite3 *, const char *, int, sqlite3_value **, VectorOutRows *, char **); int vectorIndexCursorInit(sqlite3 *, const char *, const char *, VectorIdxCursor **); void vectorIndexCursorClose(sqlite3 *, VectorIdxCursor *); int vectorIndexInsert(VectorIdxCursor *, const UnpackedRecord *, char **); int vectorIndexDelete(VectorIdxCursor *, const UnpackedRecord *, char **); -int vectorIndexSearch(sqlite3 *, const char *, int, sqlite3_value **, VectorOutRows *, char **); #if 0 } /* end of the 'extern "C"' block */ @@ -125791,6 +125791,7 @@ SQLITE_PRIVATE void sqlite3CreateIndex( int nExtraCol; /* Number of extra columns needed */ char *zExtra = 0; /* Extra space after the Index object */ Index *pPk = 0; /* PRIMARY KEY index for WITHOUT ROWID tables */ + int vectorIdxRc = 0, skipRefill = 0; assert( db->pParse==pParse ); if( pParse->nErr ){ @@ -126100,10 +126101,17 @@ SQLITE_PRIVATE void sqlite3CreateIndex( #ifndef SQLITE_OMIT_VECTOR - if( vectorIndexCreate(pParse, pIndex, db->aDb[iDb].zDbSName, pUsing) != SQLITE_OK ) { + vectorIdxRc = vectorIndexCreate(pParse, pIndex, db->aDb[iDb].zDbSName, pUsing); + if( vectorIdxRc < 0 ){ goto exit_create_index; } - idxType = pIndex->idxType; // vectorIndexCreate can update idxType to 4 (VECTOR INDEX) + if( vectorIdxRc >= 1 ){ + idxType = SQLITE_IDXTYPE_VECTOR; + pIndex->idxType = idxType; + } + if( vectorIdxRc == 1 ){ + skipRefill = 1; + } #endif /* Append the table key to the end of the index. For WITHOUT ROWID @@ -126306,7 +126314,9 @@ SQLITE_PRIVATE void sqlite3CreateIndex( ** to invalidate all pre-compiled statements. */ if( pTblName ){ - sqlite3RefillIndex(pParse, pIndex, iMem); + if( !skipRefill ){ + sqlite3RefillIndex(pParse, pIndex, iMem); + } sqlite3ChangeCookie(pParse, iDb); sqlite3VdbeAddParseSchemaOp(v, iDb, sqlite3MPrintf(db, "name='%q' AND type='index'", pIndex->zName), 0); @@ -211447,10 +211457,30 @@ int vectorF64ParseSqliteBlob( /* #include "sqliteInt.h" */ /* #include "vectorIndexInt.h" */ +/* + * The code which glue SQLite internals with pure DiskANN implementation resides here + * Main internal API methods are: + * vectorIndexCreate() + * vectorIndexClear() + * vectorIndexDrop() + * vectorIndexSearch() + * vectorIndexCursorInit() + * vectorIndexCursorClose() + * + * + cursor operations: + * vectorIndexInsert(cursor) + * vectorIndexDelete(cursor) +*/ + /************************************************************************** ** VectorIdxParams utilities ****************************************************************************/ +// VACUUM creates tables and indices first and only then populate data +// we need to ignore inserts from 'INSERT INTO vacuum.t SELECT * FROM t' statements because +// all shadow tables will be populated by VACUUM process during regular process of table copy +#define IsVacuum(db) ((db->mDbFlags&DBFLAG_Vacuum)!=0) + void vectorIdxParamsInit(VectorIdxParams *pParams, u8 *pBinBuf, int nBinSize) { assert( nBinSize <= VECTOR_INDEX_PARAMS_BUF_SIZE ); @@ -212009,9 +212039,11 @@ int insertIndexParameters(sqlite3* db, const char *zDbSName, const char *zName, goto clear_and_exit; } rc = sqlite3_step(pStatement); - if( rc != SQLITE_DONE ){ + if( rc == SQLITE_CONSTRAINT ){ + rc = SQLITE_CONSTRAINT; + }else if( rc != SQLITE_DONE ){ rc = SQLITE_ERROR; - } else { + }else{ rc = SQLITE_OK; } clear_and_exit: @@ -212050,51 +212082,25 @@ int removeIndexParameters(sqlite3* db, const char *zName) { return rc; } -int vectorIndexGetParameters( - sqlite3 *db, - const char *zIndexName, - VectorIdxParams *pParams -) { +int vectorIndexTryGetParametersFromTableFormat(sqlite3 *db, const char *zSql, const char *zIdxName, VectorIdxParams *pParams) { int rc = SQLITE_OK; sqlite3_stmt *pStmt = NULL; int nBinSize; - static const char* zSelectSql = "SELECT metadata FROM " VECTOR_INDEX_GLOBAL_META_TABLE " WHERE name = ?"; - static const char* zSelectSqlPekkaLegacy = "SELECT vector_type, block_size, dims, distance_ops FROM libsql_vector_index WHERE type = ? AND name = ?"; - rc = sqlite3_prepare_v2(db, zSelectSql, -1, &pStmt, 0); - if( rc == SQLITE_OK ) { - rc = sqlite3_bind_text(pStmt, 1, zIndexName, -1, SQLITE_STATIC); - if( rc != SQLITE_OK ){ - goto out_free; - } + vectorIdxParamsInit(pParams, NULL, 0); - if( sqlite3_step(pStmt) == SQLITE_ROW ){ - assert( sqlite3_column_type(pStmt, 0) == SQLITE_BLOB ); - nBinSize = sqlite3_column_bytes(pStmt, 0); - if( nBinSize > VECTOR_INDEX_PARAMS_BUF_SIZE ){ - rc = SQLITE_ERROR; - goto out_free; - } - vectorIdxParamsInit(pParams, (u8*)sqlite3_column_blob(pStmt, 0), nBinSize); - goto out_free; - } - } - if( pStmt ){ - sqlite3_finalize(pStmt); - pStmt = NULL; + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + if( rc != SQLITE_OK ){ + goto out; } - - rc = sqlite3_prepare_v2(db, zSelectSqlPekkaLegacy, -1, &pStmt, 0); + rc = sqlite3_bind_text(pStmt, 1, zIdxName, -1, SQLITE_STATIC); if( rc != SQLITE_OK ){ - goto out_free; + goto out; } - sqlite3_bind_text(pStmt, 1, "diskann", -1, SQLITE_STATIC); - sqlite3_bind_text(pStmt, 2, zIndexName, -1, SQLITE_STATIC); if( sqlite3_step(pStmt) != SQLITE_ROW ){ rc = SQLITE_ERROR; - goto out_free; + goto out; } - vectorIdxParamsPutU64(pParams, VECTOR_FORMAT_PARAM_ID, 1); vectorIdxParamsPutU64(pParams, VECTOR_INDEX_TYPE_PARAM_ID, VECTOR_INDEX_TYPE_DISKANN); vectorIdxParamsPutU64(pParams, VECTOR_TYPE_PARAM_ID, VECTOR_TYPE_FLOAT32); @@ -212102,20 +212108,84 @@ int vectorIndexGetParameters( vectorIdxParamsPutU64(pParams, VECTOR_METRIC_TYPE_PARAM_ID, VECTOR_METRIC_TYPE_COS); if( vectorIdxParamsPutU64(pParams, VECTOR_BLOCK_SIZE_PARAM_ID, sqlite3_column_int(pStmt, 1)) != 0 ){ rc = SQLITE_ERROR; + goto out; } -out_free: + assert( sqlite3_step(pStmt) == SQLITE_DONE ); + rc = SQLITE_OK; +out: if( pStmt != NULL ){ sqlite3_finalize(pStmt); } return rc; } +int vectorIndexTryGetParametersFromBinFormat(sqlite3 *db, const char *zSql, const char *zIdxName, VectorIdxParams *pParams) { + int rc = SQLITE_OK; + sqlite3_stmt *pStmt = NULL; + int nBinSize; + + vectorIdxParamsInit(pParams, NULL, 0); + + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + if( rc != SQLITE_OK ){ + goto out; + } + rc = sqlite3_bind_text(pStmt, 1, zIdxName, -1, SQLITE_STATIC); + if( rc != SQLITE_OK ){ + goto out; + } + if( sqlite3_step(pStmt) != SQLITE_ROW ){ + rc = SQLITE_ERROR; + goto out; + } + assert( sqlite3_column_type(pStmt, 0) == SQLITE_BLOB ); + nBinSize = sqlite3_column_bytes(pStmt, 0); + if( nBinSize > VECTOR_INDEX_PARAMS_BUF_SIZE ){ + rc = SQLITE_ERROR; + goto out; + } + vectorIdxParamsInit(pParams, (u8*)sqlite3_column_blob(pStmt, 0), nBinSize); + assert( sqlite3_step(pStmt) == SQLITE_DONE ); + rc = SQLITE_OK; +out: + if( pStmt != NULL ){ + sqlite3_finalize(pStmt); + } + return rc; +} + +int vectorIndexGetParameters( + sqlite3 *db, + const char *zIdxName, + VectorIdxParams *pParams +) { + int rc = SQLITE_OK; + + static const char* zSelectSql = "SELECT metadata FROM " VECTOR_INDEX_GLOBAL_META_TABLE " WHERE name = ?"; + // zSelectSqlPekkaLegacy handles the case when user created DB before 04 July 2024 (https://discord.com/channels/933071162680958986/1225560924526477322/1258367912402489397) + // when instead of table with binary parameters rigid schema was used for index settings + // we should drop this eventually - but for now we postponed this decision + static const char* zSelectSqlPekkaLegacy = "SELECT vector_type, block_size, dims, distance_ops FROM libsql_vector_index WHERE name = ?"; + rc = vectorIndexTryGetParametersFromBinFormat(db, zSelectSql, zIdxName, pParams); + if( rc == SQLITE_OK ){ + return SQLITE_OK; + } + rc = vectorIndexTryGetParametersFromTableFormat(db, zSelectSqlPekkaLegacy, zIdxName, pParams); + if( rc == SQLITE_OK ){ + return SQLITE_OK; + } + return SQLITE_ERROR; +} int vectorIndexDrop(sqlite3 *db, const char *zDbSName, const char *zIdxName) { // we want to try delete all traces of index on every attempt // this is done to prevent unrecoverable situations where index were dropped but index parameters deletion failed and second attempt will fail on first step int rcIdx, rcParams; + if( IsVacuum(db) ){ + return SQLITE_OK; + } + assert( zDbSName != NULL ); rcIdx = diskAnnDropIndex(db, zDbSName, zIdxName); @@ -212125,15 +212195,42 @@ int vectorIndexDrop(sqlite3 *db, const char *zDbSName, const char *zIdxName) { int vectorIndexClear(sqlite3 *db, const char *zDbSName, const char *zIdxName) { assert( zDbSName != NULL ); + + if( IsVacuum(db) ){ + return SQLITE_OK; + } + return diskAnnClearIndex(db, zDbSName, zIdxName); } -int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const IdList *pUsing) { +/* + * vectorIndexCreate analyzes any index creation expression and create vector index if needed + * it tolerates the situation when insert into VECTOR_INDEX_GLOBAL_META_TABLE failed with conflict + * this made intentionally in order to natively support upload of SQLite dumps + * + * dump populates tables first and create indices after + * so we must omit them because shadow tables already filled + * + * 1. in case of any error :-1 returned (and pParse errMsg is populated with some error message) + * 2. if vector index must not be created : 0 returned + * 3. if vector index must be created but refill must be skipped : 1 returned + * 4. if vector index must be created and refilled from base table: 2 returned +*/ +int vectorIndexCreate(Parse *pParse, const Index *pIdx, const char *zDbSName, const IdList *pUsing) { + static const int CREATE_FAIL = -1; + static const int CREATE_IGNORE = 0; + static const int CREATE_OK_SKIP_REFILL = 1; + static const int CREATE_OK = 2; + int i, rc = SQLITE_OK; int dims, type; int hasLibsqlVectorIdxFn = 0, hasCollation = 0; const char *pzErrMsg; + if( IsVacuum(pParse->db) ){ + return CREATE_IGNORE; + } + assert( zDbSName != NULL ); sqlite3 *db = pParse->db; @@ -212149,7 +212246,7 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id if( pParse->eParseMode ){ // scheme can be re-parsed by SQLite for different reasons (for example, to check schema after // ALTER COLUMN statements) - so we must skip creation in such cases - goto ignored; + return CREATE_IGNORE; } // backward compatibility: preserve old indices with deprecated syntax but forbid creation of new indices with this syntax @@ -212159,15 +212256,15 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id } else { sqlite3ErrorMsg(pParse, "USING syntax is deprecated, please use plain CREATE INDEX: CREATE INDEX xxx ON yyy ( " VECTOR_INDEX_MARKER_FUNCTION "(zzz) )"); } - return SQLITE_ERROR; + return CREATE_FAIL; } if( db->init.busy == 1 && pUsing != NULL ){ - goto succeed; + return CREATE_OK; } // vector index must have expressions over column if( pIdx->aColExpr == NULL ) { - goto ignored; + return CREATE_IGNORE; } pListItem = pIdx->aColExpr->a; @@ -212182,20 +212279,20 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id } } if( !hasLibsqlVectorIdxFn ) { - goto ignored; + return CREATE_IGNORE; } if( hasCollation ){ sqlite3ErrorMsg(pParse, "vector index can't have collation"); - return SQLITE_ERROR; + return CREATE_FAIL; } if( pIdx->aColExpr->nExpr != 1 ) { sqlite3ErrorMsg(pParse, "vector index must contain exactly one column wrapped into the " VECTOR_INDEX_MARKER_FUNCTION " function"); - return SQLITE_ERROR; + return CREATE_FAIL; } // we are able to support this but I doubt this works for now - more polishing required to make this work if( pIdx->pPartIdxWhere != NULL ) { sqlite3ErrorMsg(pParse, "partial vector index is not supported"); - return SQLITE_ERROR; + return CREATE_FAIL; } pArgsList = pIdx->aColExpr->a[0].pExpr->x.pList; @@ -212203,61 +212300,65 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id if( pArgsList->nExpr < 1 ){ sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " must contain at least one argument"); - return SQLITE_ERROR; + return CREATE_FAIL; } if( pListItem[0].pExpr->op != TK_COLUMN ) { sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " first argument must be a column token"); - return SQLITE_ERROR; + return CREATE_FAIL; } iEmbeddingColumn = pListItem[0].pExpr->iColumn; if( iEmbeddingColumn < 0 ) { sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " first argument must be column with vector type"); - return SQLITE_ERROR; + return CREATE_FAIL; } assert( iEmbeddingColumn >= 0 && iEmbeddingColumn < pTable->nCol ); zEmbeddingColumnTypeName = sqlite3ColumnType(&pTable->aCol[iEmbeddingColumn], ""); if( vectorIdxParseColumnType(zEmbeddingColumnTypeName, &type, &dims, &pzErrMsg) != 0 ){ sqlite3ErrorMsg(pParse, "%s: %s", pzErrMsg, zEmbeddingColumnTypeName); - return SQLITE_ERROR; + return CREATE_FAIL; } // schema is locked while db is initializing and we need to just proceed here if( db->init.busy == 1 ){ - goto succeed; + return CREATE_OK; } rc = initVectorIndexMetaTable(db, zDbSName); if( rc != SQLITE_OK ){ - return rc; + sqlite3ErrorMsg(pParse, "failed to init vector index meta table: %s", sqlite3_errmsg(db)); + return CREATE_FAIL; } rc = parseVectorIdxParams(pParse, &idxParams, type, dims, pListItem + 1, pArgsList->nExpr - 1); if( rc != SQLITE_OK ){ - return rc; + sqlite3ErrorMsg(pParse, "failed to parse vector idx params"); + return CREATE_FAIL; } if( vectorIdxKeyGet(pTable, &idxKey, &pzErrMsg) != 0 ){ sqlite3ErrorMsg(pParse, "failed to detect underlying table key: %s", pzErrMsg); - return SQLITE_ERROR; + return CREATE_FAIL; } if( idxKey.nKeyColumns != 1 ){ sqlite3ErrorMsg(pParse, "vector index for tables without ROWID and composite primary key are not supported"); - return SQLITE_ERROR; + return CREATE_FAIL; } rc = diskAnnCreateIndex(db, zDbSName, pIdx->zName, &idxKey, &idxParams); if( rc != SQLITE_OK ){ sqlite3ErrorMsg(pParse, "unable to initialize diskann vector index"); - return rc; + return CREATE_FAIL; } rc = insertIndexParameters(db, zDbSName, pIdx->zName, &idxParams); + if( rc == SQLITE_CONSTRAINT ){ + // we are violating unique constraint here which means that someone inserted parameters in the table before us + // taking aside corruption scenarios, this can be in case of loading dump (because tables and data are loaded before indices) + // this case is valid and we must proceed with index creating but avoid index-refill step as it is already filled + return CREATE_OK_SKIP_REFILL; + } if( rc != SQLITE_OK ){ sqlite3ErrorMsg(pParse, "unable to update global metadata table"); - return rc; + return CREATE_FAIL; } -succeed: - pIdx->idxType = SQLITE_IDXTYPE_VECTOR; - return SQLITE_OK; -ignored: - return SQLITE_OK; + return CREATE_OK; } int vectorIndexSearch(sqlite3 *db, const char* zDbSName, int argc, sqlite3_value **argv, VectorOutRows *pRows, char **pzErrMsg) { @@ -212271,6 +212372,7 @@ int vectorIndexSearch(sqlite3 *db, const char* zDbSName, int argc, sqlite3_value VectorIdxParams idxParams; vectorIdxParamsInit(&idxParams, NULL, 0); + assert( !IsVacuum(db) ); assert( zDbSName != NULL ); if( argc != 3 ){ @@ -212353,6 +212455,10 @@ int vectorIndexInsert( int rc; VectorInRow vectorInRow; + if( IsVacuum(pCur->db) ){ + return SQLITE_OK; + } + rc = vectorInRowAlloc(pCur->db, pRecord, &vectorInRow, pzErrMsg); if( rc != SQLITE_OK ){ return rc; @@ -212371,6 +212477,11 @@ int vectorIndexDelete( char **pzErrMsg ){ VectorInRow payload; + + if( IsVacuum(pCur->db) ){ + return SQLITE_OK; + } + payload.pVector = NULL; payload.nKeys = r->nField - 1; payload.pKeyValues = r->aMem + 1; diff --git a/libsql-ffi/bundled/src/sqlite3.c b/libsql-ffi/bundled/src/sqlite3.c index c840cb8032..362008e23f 100644 --- a/libsql-ffi/bundled/src/sqlite3.c +++ b/libsql-ffi/bundled/src/sqlite3.c @@ -85130,14 +85130,14 @@ typedef struct VectorIdxCursor VectorIdxCursor; int vectorIdxParseColumnType(const char *, int *, int *, const char **); -int vectorIndexCreate(Parse*, Index*, const char *, const IdList*); +int vectorIndexCreate(Parse*, const Index*, const char *, const IdList*); int vectorIndexClear(sqlite3 *, const char *, const char *); int vectorIndexDrop(sqlite3 *, const char *, const char *); +int vectorIndexSearch(sqlite3 *, const char *, int, sqlite3_value **, VectorOutRows *, char **); int vectorIndexCursorInit(sqlite3 *, const char *, const char *, VectorIdxCursor **); void vectorIndexCursorClose(sqlite3 *, VectorIdxCursor *); int vectorIndexInsert(VectorIdxCursor *, const UnpackedRecord *, char **); int vectorIndexDelete(VectorIdxCursor *, const UnpackedRecord *, char **); -int vectorIndexSearch(sqlite3 *, const char *, int, sqlite3_value **, VectorOutRows *, char **); #if 0 } /* end of the 'extern "C"' block */ @@ -125791,6 +125791,7 @@ SQLITE_PRIVATE void sqlite3CreateIndex( int nExtraCol; /* Number of extra columns needed */ char *zExtra = 0; /* Extra space after the Index object */ Index *pPk = 0; /* PRIMARY KEY index for WITHOUT ROWID tables */ + int vectorIdxRc = 0, skipRefill = 0; assert( db->pParse==pParse ); if( pParse->nErr ){ @@ -126100,10 +126101,17 @@ SQLITE_PRIVATE void sqlite3CreateIndex( #ifndef SQLITE_OMIT_VECTOR - if( vectorIndexCreate(pParse, pIndex, db->aDb[iDb].zDbSName, pUsing) != SQLITE_OK ) { + vectorIdxRc = vectorIndexCreate(pParse, pIndex, db->aDb[iDb].zDbSName, pUsing); + if( vectorIdxRc < 0 ){ goto exit_create_index; } - idxType = pIndex->idxType; // vectorIndexCreate can update idxType to 4 (VECTOR INDEX) + if( vectorIdxRc >= 1 ){ + idxType = SQLITE_IDXTYPE_VECTOR; + pIndex->idxType = idxType; + } + if( vectorIdxRc == 1 ){ + skipRefill = 1; + } #endif /* Append the table key to the end of the index. For WITHOUT ROWID @@ -126306,7 +126314,9 @@ SQLITE_PRIVATE void sqlite3CreateIndex( ** to invalidate all pre-compiled statements. */ if( pTblName ){ - sqlite3RefillIndex(pParse, pIndex, iMem); + if( !skipRefill ){ + sqlite3RefillIndex(pParse, pIndex, iMem); + } sqlite3ChangeCookie(pParse, iDb); sqlite3VdbeAddParseSchemaOp(v, iDb, sqlite3MPrintf(db, "name='%q' AND type='index'", pIndex->zName), 0); @@ -211447,10 +211457,30 @@ int vectorF64ParseSqliteBlob( /* #include "sqliteInt.h" */ /* #include "vectorIndexInt.h" */ +/* + * The code which glue SQLite internals with pure DiskANN implementation resides here + * Main internal API methods are: + * vectorIndexCreate() + * vectorIndexClear() + * vectorIndexDrop() + * vectorIndexSearch() + * vectorIndexCursorInit() + * vectorIndexCursorClose() + * + * + cursor operations: + * vectorIndexInsert(cursor) + * vectorIndexDelete(cursor) +*/ + /************************************************************************** ** VectorIdxParams utilities ****************************************************************************/ +// VACUUM creates tables and indices first and only then populate data +// we need to ignore inserts from 'INSERT INTO vacuum.t SELECT * FROM t' statements because +// all shadow tables will be populated by VACUUM process during regular process of table copy +#define IsVacuum(db) ((db->mDbFlags&DBFLAG_Vacuum)!=0) + void vectorIdxParamsInit(VectorIdxParams *pParams, u8 *pBinBuf, int nBinSize) { assert( nBinSize <= VECTOR_INDEX_PARAMS_BUF_SIZE ); @@ -212009,9 +212039,11 @@ int insertIndexParameters(sqlite3* db, const char *zDbSName, const char *zName, goto clear_and_exit; } rc = sqlite3_step(pStatement); - if( rc != SQLITE_DONE ){ + if( rc == SQLITE_CONSTRAINT ){ + rc = SQLITE_CONSTRAINT; + }else if( rc != SQLITE_DONE ){ rc = SQLITE_ERROR; - } else { + }else{ rc = SQLITE_OK; } clear_and_exit: @@ -212050,51 +212082,25 @@ int removeIndexParameters(sqlite3* db, const char *zName) { return rc; } -int vectorIndexGetParameters( - sqlite3 *db, - const char *zIndexName, - VectorIdxParams *pParams -) { +int vectorIndexTryGetParametersFromTableFormat(sqlite3 *db, const char *zSql, const char *zIdxName, VectorIdxParams *pParams) { int rc = SQLITE_OK; sqlite3_stmt *pStmt = NULL; int nBinSize; - static const char* zSelectSql = "SELECT metadata FROM " VECTOR_INDEX_GLOBAL_META_TABLE " WHERE name = ?"; - static const char* zSelectSqlPekkaLegacy = "SELECT vector_type, block_size, dims, distance_ops FROM libsql_vector_index WHERE type = ? AND name = ?"; - rc = sqlite3_prepare_v2(db, zSelectSql, -1, &pStmt, 0); - if( rc == SQLITE_OK ) { - rc = sqlite3_bind_text(pStmt, 1, zIndexName, -1, SQLITE_STATIC); - if( rc != SQLITE_OK ){ - goto out_free; - } + vectorIdxParamsInit(pParams, NULL, 0); - if( sqlite3_step(pStmt) == SQLITE_ROW ){ - assert( sqlite3_column_type(pStmt, 0) == SQLITE_BLOB ); - nBinSize = sqlite3_column_bytes(pStmt, 0); - if( nBinSize > VECTOR_INDEX_PARAMS_BUF_SIZE ){ - rc = SQLITE_ERROR; - goto out_free; - } - vectorIdxParamsInit(pParams, (u8*)sqlite3_column_blob(pStmt, 0), nBinSize); - goto out_free; - } - } - if( pStmt ){ - sqlite3_finalize(pStmt); - pStmt = NULL; + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + if( rc != SQLITE_OK ){ + goto out; } - - rc = sqlite3_prepare_v2(db, zSelectSqlPekkaLegacy, -1, &pStmt, 0); + rc = sqlite3_bind_text(pStmt, 1, zIdxName, -1, SQLITE_STATIC); if( rc != SQLITE_OK ){ - goto out_free; + goto out; } - sqlite3_bind_text(pStmt, 1, "diskann", -1, SQLITE_STATIC); - sqlite3_bind_text(pStmt, 2, zIndexName, -1, SQLITE_STATIC); if( sqlite3_step(pStmt) != SQLITE_ROW ){ rc = SQLITE_ERROR; - goto out_free; + goto out; } - vectorIdxParamsPutU64(pParams, VECTOR_FORMAT_PARAM_ID, 1); vectorIdxParamsPutU64(pParams, VECTOR_INDEX_TYPE_PARAM_ID, VECTOR_INDEX_TYPE_DISKANN); vectorIdxParamsPutU64(pParams, VECTOR_TYPE_PARAM_ID, VECTOR_TYPE_FLOAT32); @@ -212102,20 +212108,84 @@ int vectorIndexGetParameters( vectorIdxParamsPutU64(pParams, VECTOR_METRIC_TYPE_PARAM_ID, VECTOR_METRIC_TYPE_COS); if( vectorIdxParamsPutU64(pParams, VECTOR_BLOCK_SIZE_PARAM_ID, sqlite3_column_int(pStmt, 1)) != 0 ){ rc = SQLITE_ERROR; + goto out; } -out_free: + assert( sqlite3_step(pStmt) == SQLITE_DONE ); + rc = SQLITE_OK; +out: if( pStmt != NULL ){ sqlite3_finalize(pStmt); } return rc; } +int vectorIndexTryGetParametersFromBinFormat(sqlite3 *db, const char *zSql, const char *zIdxName, VectorIdxParams *pParams) { + int rc = SQLITE_OK; + sqlite3_stmt *pStmt = NULL; + int nBinSize; + + vectorIdxParamsInit(pParams, NULL, 0); + + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + if( rc != SQLITE_OK ){ + goto out; + } + rc = sqlite3_bind_text(pStmt, 1, zIdxName, -1, SQLITE_STATIC); + if( rc != SQLITE_OK ){ + goto out; + } + if( sqlite3_step(pStmt) != SQLITE_ROW ){ + rc = SQLITE_ERROR; + goto out; + } + assert( sqlite3_column_type(pStmt, 0) == SQLITE_BLOB ); + nBinSize = sqlite3_column_bytes(pStmt, 0); + if( nBinSize > VECTOR_INDEX_PARAMS_BUF_SIZE ){ + rc = SQLITE_ERROR; + goto out; + } + vectorIdxParamsInit(pParams, (u8*)sqlite3_column_blob(pStmt, 0), nBinSize); + assert( sqlite3_step(pStmt) == SQLITE_DONE ); + rc = SQLITE_OK; +out: + if( pStmt != NULL ){ + sqlite3_finalize(pStmt); + } + return rc; +} + +int vectorIndexGetParameters( + sqlite3 *db, + const char *zIdxName, + VectorIdxParams *pParams +) { + int rc = SQLITE_OK; + + static const char* zSelectSql = "SELECT metadata FROM " VECTOR_INDEX_GLOBAL_META_TABLE " WHERE name = ?"; + // zSelectSqlPekkaLegacy handles the case when user created DB before 04 July 2024 (https://discord.com/channels/933071162680958986/1225560924526477322/1258367912402489397) + // when instead of table with binary parameters rigid schema was used for index settings + // we should drop this eventually - but for now we postponed this decision + static const char* zSelectSqlPekkaLegacy = "SELECT vector_type, block_size, dims, distance_ops FROM libsql_vector_index WHERE name = ?"; + rc = vectorIndexTryGetParametersFromBinFormat(db, zSelectSql, zIdxName, pParams); + if( rc == SQLITE_OK ){ + return SQLITE_OK; + } + rc = vectorIndexTryGetParametersFromTableFormat(db, zSelectSqlPekkaLegacy, zIdxName, pParams); + if( rc == SQLITE_OK ){ + return SQLITE_OK; + } + return SQLITE_ERROR; +} int vectorIndexDrop(sqlite3 *db, const char *zDbSName, const char *zIdxName) { // we want to try delete all traces of index on every attempt // this is done to prevent unrecoverable situations where index were dropped but index parameters deletion failed and second attempt will fail on first step int rcIdx, rcParams; + if( IsVacuum(db) ){ + return SQLITE_OK; + } + assert( zDbSName != NULL ); rcIdx = diskAnnDropIndex(db, zDbSName, zIdxName); @@ -212125,15 +212195,42 @@ int vectorIndexDrop(sqlite3 *db, const char *zDbSName, const char *zIdxName) { int vectorIndexClear(sqlite3 *db, const char *zDbSName, const char *zIdxName) { assert( zDbSName != NULL ); + + if( IsVacuum(db) ){ + return SQLITE_OK; + } + return diskAnnClearIndex(db, zDbSName, zIdxName); } -int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const IdList *pUsing) { +/* + * vectorIndexCreate analyzes any index creation expression and create vector index if needed + * it tolerates the situation when insert into VECTOR_INDEX_GLOBAL_META_TABLE failed with conflict + * this made intentionally in order to natively support upload of SQLite dumps + * + * dump populates tables first and create indices after + * so we must omit them because shadow tables already filled + * + * 1. in case of any error :-1 returned (and pParse errMsg is populated with some error message) + * 2. if vector index must not be created : 0 returned + * 3. if vector index must be created but refill must be skipped : 1 returned + * 4. if vector index must be created and refilled from base table: 2 returned +*/ +int vectorIndexCreate(Parse *pParse, const Index *pIdx, const char *zDbSName, const IdList *pUsing) { + static const int CREATE_FAIL = -1; + static const int CREATE_IGNORE = 0; + static const int CREATE_OK_SKIP_REFILL = 1; + static const int CREATE_OK = 2; + int i, rc = SQLITE_OK; int dims, type; int hasLibsqlVectorIdxFn = 0, hasCollation = 0; const char *pzErrMsg; + if( IsVacuum(pParse->db) ){ + return CREATE_IGNORE; + } + assert( zDbSName != NULL ); sqlite3 *db = pParse->db; @@ -212149,7 +212246,7 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id if( pParse->eParseMode ){ // scheme can be re-parsed by SQLite for different reasons (for example, to check schema after // ALTER COLUMN statements) - so we must skip creation in such cases - goto ignored; + return CREATE_IGNORE; } // backward compatibility: preserve old indices with deprecated syntax but forbid creation of new indices with this syntax @@ -212159,15 +212256,15 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id } else { sqlite3ErrorMsg(pParse, "USING syntax is deprecated, please use plain CREATE INDEX: CREATE INDEX xxx ON yyy ( " VECTOR_INDEX_MARKER_FUNCTION "(zzz) )"); } - return SQLITE_ERROR; + return CREATE_FAIL; } if( db->init.busy == 1 && pUsing != NULL ){ - goto succeed; + return CREATE_OK; } // vector index must have expressions over column if( pIdx->aColExpr == NULL ) { - goto ignored; + return CREATE_IGNORE; } pListItem = pIdx->aColExpr->a; @@ -212182,20 +212279,20 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id } } if( !hasLibsqlVectorIdxFn ) { - goto ignored; + return CREATE_IGNORE; } if( hasCollation ){ sqlite3ErrorMsg(pParse, "vector index can't have collation"); - return SQLITE_ERROR; + return CREATE_FAIL; } if( pIdx->aColExpr->nExpr != 1 ) { sqlite3ErrorMsg(pParse, "vector index must contain exactly one column wrapped into the " VECTOR_INDEX_MARKER_FUNCTION " function"); - return SQLITE_ERROR; + return CREATE_FAIL; } // we are able to support this but I doubt this works for now - more polishing required to make this work if( pIdx->pPartIdxWhere != NULL ) { sqlite3ErrorMsg(pParse, "partial vector index is not supported"); - return SQLITE_ERROR; + return CREATE_FAIL; } pArgsList = pIdx->aColExpr->a[0].pExpr->x.pList; @@ -212203,61 +212300,65 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id if( pArgsList->nExpr < 1 ){ sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " must contain at least one argument"); - return SQLITE_ERROR; + return CREATE_FAIL; } if( pListItem[0].pExpr->op != TK_COLUMN ) { sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " first argument must be a column token"); - return SQLITE_ERROR; + return CREATE_FAIL; } iEmbeddingColumn = pListItem[0].pExpr->iColumn; if( iEmbeddingColumn < 0 ) { sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " first argument must be column with vector type"); - return SQLITE_ERROR; + return CREATE_FAIL; } assert( iEmbeddingColumn >= 0 && iEmbeddingColumn < pTable->nCol ); zEmbeddingColumnTypeName = sqlite3ColumnType(&pTable->aCol[iEmbeddingColumn], ""); if( vectorIdxParseColumnType(zEmbeddingColumnTypeName, &type, &dims, &pzErrMsg) != 0 ){ sqlite3ErrorMsg(pParse, "%s: %s", pzErrMsg, zEmbeddingColumnTypeName); - return SQLITE_ERROR; + return CREATE_FAIL; } // schema is locked while db is initializing and we need to just proceed here if( db->init.busy == 1 ){ - goto succeed; + return CREATE_OK; } rc = initVectorIndexMetaTable(db, zDbSName); if( rc != SQLITE_OK ){ - return rc; + sqlite3ErrorMsg(pParse, "failed to init vector index meta table: %s", sqlite3_errmsg(db)); + return CREATE_FAIL; } rc = parseVectorIdxParams(pParse, &idxParams, type, dims, pListItem + 1, pArgsList->nExpr - 1); if( rc != SQLITE_OK ){ - return rc; + sqlite3ErrorMsg(pParse, "failed to parse vector idx params"); + return CREATE_FAIL; } if( vectorIdxKeyGet(pTable, &idxKey, &pzErrMsg) != 0 ){ sqlite3ErrorMsg(pParse, "failed to detect underlying table key: %s", pzErrMsg); - return SQLITE_ERROR; + return CREATE_FAIL; } if( idxKey.nKeyColumns != 1 ){ sqlite3ErrorMsg(pParse, "vector index for tables without ROWID and composite primary key are not supported"); - return SQLITE_ERROR; + return CREATE_FAIL; } rc = diskAnnCreateIndex(db, zDbSName, pIdx->zName, &idxKey, &idxParams); if( rc != SQLITE_OK ){ sqlite3ErrorMsg(pParse, "unable to initialize diskann vector index"); - return rc; + return CREATE_FAIL; } rc = insertIndexParameters(db, zDbSName, pIdx->zName, &idxParams); + if( rc == SQLITE_CONSTRAINT ){ + // we are violating unique constraint here which means that someone inserted parameters in the table before us + // taking aside corruption scenarios, this can be in case of loading dump (because tables and data are loaded before indices) + // this case is valid and we must proceed with index creating but avoid index-refill step as it is already filled + return CREATE_OK_SKIP_REFILL; + } if( rc != SQLITE_OK ){ sqlite3ErrorMsg(pParse, "unable to update global metadata table"); - return rc; + return CREATE_FAIL; } -succeed: - pIdx->idxType = SQLITE_IDXTYPE_VECTOR; - return SQLITE_OK; -ignored: - return SQLITE_OK; + return CREATE_OK; } int vectorIndexSearch(sqlite3 *db, const char* zDbSName, int argc, sqlite3_value **argv, VectorOutRows *pRows, char **pzErrMsg) { @@ -212271,6 +212372,7 @@ int vectorIndexSearch(sqlite3 *db, const char* zDbSName, int argc, sqlite3_value VectorIdxParams idxParams; vectorIdxParamsInit(&idxParams, NULL, 0); + assert( !IsVacuum(db) ); assert( zDbSName != NULL ); if( argc != 3 ){ @@ -212353,6 +212455,10 @@ int vectorIndexInsert( int rc; VectorInRow vectorInRow; + if( IsVacuum(pCur->db) ){ + return SQLITE_OK; + } + rc = vectorInRowAlloc(pCur->db, pRecord, &vectorInRow, pzErrMsg); if( rc != SQLITE_OK ){ return rc; @@ -212371,6 +212477,11 @@ int vectorIndexDelete( char **pzErrMsg ){ VectorInRow payload; + + if( IsVacuum(pCur->db) ){ + return SQLITE_OK; + } + payload.pVector = NULL; payload.nKeys = r->nField - 1; payload.pKeyValues = r->aMem + 1; diff --git a/libsql-sqlite3/src/build.c b/libsql-sqlite3/src/build.c index afba9b58d7..245503127f 100644 --- a/libsql-sqlite3/src/build.c +++ b/libsql-sqlite3/src/build.c @@ -4000,6 +4000,7 @@ void sqlite3CreateIndex( int nExtraCol; /* Number of extra columns needed */ char *zExtra = 0; /* Extra space after the Index object */ Index *pPk = 0; /* PRIMARY KEY index for WITHOUT ROWID tables */ + int vectorIdxRc = 0, skipRefill = 0; assert( db->pParse==pParse ); if( pParse->nErr ){ @@ -4309,10 +4310,17 @@ void sqlite3CreateIndex( #ifndef SQLITE_OMIT_VECTOR - if( vectorIndexCreate(pParse, pIndex, db->aDb[iDb].zDbSName, pUsing) != SQLITE_OK ) { + vectorIdxRc = vectorIndexCreate(pParse, pIndex, db->aDb[iDb].zDbSName, pUsing); + if( vectorIdxRc < 0 ){ goto exit_create_index; } - idxType = pIndex->idxType; // vectorIndexCreate can update idxType to 4 (VECTOR INDEX) + if( vectorIdxRc >= 1 ){ + idxType = SQLITE_IDXTYPE_VECTOR; + pIndex->idxType = idxType; + } + if( vectorIdxRc == 1 ){ + skipRefill = 1; + } #endif /* Append the table key to the end of the index. For WITHOUT ROWID @@ -4515,7 +4523,9 @@ void sqlite3CreateIndex( ** to invalidate all pre-compiled statements. */ if( pTblName ){ - sqlite3RefillIndex(pParse, pIndex, iMem); + if( !skipRefill ){ + sqlite3RefillIndex(pParse, pIndex, iMem); + } sqlite3ChangeCookie(pParse, iDb); sqlite3VdbeAddParseSchemaOp(v, iDb, sqlite3MPrintf(db, "name='%q' AND type='index'", pIndex->zName), 0); diff --git a/libsql-sqlite3/src/vectorIndex.c b/libsql-sqlite3/src/vectorIndex.c index 2fc7657e84..67bfbb916b 100644 --- a/libsql-sqlite3/src/vectorIndex.c +++ b/libsql-sqlite3/src/vectorIndex.c @@ -30,10 +30,30 @@ #include "sqliteInt.h" #include "vectorIndexInt.h" +/* + * The code which glue SQLite internals with pure DiskANN implementation resides here + * Main internal API methods are: + * vectorIndexCreate() + * vectorIndexClear() + * vectorIndexDrop() + * vectorIndexSearch() + * vectorIndexCursorInit() + * vectorIndexCursorClose() + * + * + cursor operations: + * vectorIndexInsert(cursor) + * vectorIndexDelete(cursor) +*/ + /************************************************************************** ** VectorIdxParams utilities ****************************************************************************/ +// VACUUM creates tables and indices first and only then populate data +// we need to ignore inserts from 'INSERT INTO vacuum.t SELECT * FROM t' statements because +// all shadow tables will be populated by VACUUM process during regular process of table copy +#define IsVacuum(db) ((db->mDbFlags&DBFLAG_Vacuum)!=0) + void vectorIdxParamsInit(VectorIdxParams *pParams, u8 *pBinBuf, int nBinSize) { assert( nBinSize <= VECTOR_INDEX_PARAMS_BUF_SIZE ); @@ -592,9 +612,11 @@ int insertIndexParameters(sqlite3* db, const char *zDbSName, const char *zName, goto clear_and_exit; } rc = sqlite3_step(pStatement); - if( rc != SQLITE_DONE ){ + if( rc == SQLITE_CONSTRAINT ){ + rc = SQLITE_CONSTRAINT; + }else if( rc != SQLITE_DONE ){ rc = SQLITE_ERROR; - } else { + }else{ rc = SQLITE_OK; } clear_and_exit: @@ -633,51 +655,25 @@ int removeIndexParameters(sqlite3* db, const char *zName) { return rc; } -int vectorIndexGetParameters( - sqlite3 *db, - const char *zIndexName, - VectorIdxParams *pParams -) { +int vectorIndexTryGetParametersFromTableFormat(sqlite3 *db, const char *zSql, const char *zIdxName, VectorIdxParams *pParams) { int rc = SQLITE_OK; sqlite3_stmt *pStmt = NULL; int nBinSize; - static const char* zSelectSql = "SELECT metadata FROM " VECTOR_INDEX_GLOBAL_META_TABLE " WHERE name = ?"; - static const char* zSelectSqlPekkaLegacy = "SELECT vector_type, block_size, dims, distance_ops FROM libsql_vector_index WHERE type = ? AND name = ?"; - rc = sqlite3_prepare_v2(db, zSelectSql, -1, &pStmt, 0); - if( rc == SQLITE_OK ) { - rc = sqlite3_bind_text(pStmt, 1, zIndexName, -1, SQLITE_STATIC); - if( rc != SQLITE_OK ){ - goto out_free; - } + vectorIdxParamsInit(pParams, NULL, 0); - if( sqlite3_step(pStmt) == SQLITE_ROW ){ - assert( sqlite3_column_type(pStmt, 0) == SQLITE_BLOB ); - nBinSize = sqlite3_column_bytes(pStmt, 0); - if( nBinSize > VECTOR_INDEX_PARAMS_BUF_SIZE ){ - rc = SQLITE_ERROR; - goto out_free; - } - vectorIdxParamsInit(pParams, (u8*)sqlite3_column_blob(pStmt, 0), nBinSize); - goto out_free; - } - } - if( pStmt ){ - sqlite3_finalize(pStmt); - pStmt = NULL; + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + if( rc != SQLITE_OK ){ + goto out; } - - rc = sqlite3_prepare_v2(db, zSelectSqlPekkaLegacy, -1, &pStmt, 0); + rc = sqlite3_bind_text(pStmt, 1, zIdxName, -1, SQLITE_STATIC); if( rc != SQLITE_OK ){ - goto out_free; + goto out; } - sqlite3_bind_text(pStmt, 1, "diskann", -1, SQLITE_STATIC); - sqlite3_bind_text(pStmt, 2, zIndexName, -1, SQLITE_STATIC); if( sqlite3_step(pStmt) != SQLITE_ROW ){ rc = SQLITE_ERROR; - goto out_free; + goto out; } - vectorIdxParamsPutU64(pParams, VECTOR_FORMAT_PARAM_ID, 1); vectorIdxParamsPutU64(pParams, VECTOR_INDEX_TYPE_PARAM_ID, VECTOR_INDEX_TYPE_DISKANN); vectorIdxParamsPutU64(pParams, VECTOR_TYPE_PARAM_ID, VECTOR_TYPE_FLOAT32); @@ -685,20 +681,84 @@ int vectorIndexGetParameters( vectorIdxParamsPutU64(pParams, VECTOR_METRIC_TYPE_PARAM_ID, VECTOR_METRIC_TYPE_COS); if( vectorIdxParamsPutU64(pParams, VECTOR_BLOCK_SIZE_PARAM_ID, sqlite3_column_int(pStmt, 1)) != 0 ){ rc = SQLITE_ERROR; + goto out; } -out_free: + assert( sqlite3_step(pStmt) == SQLITE_DONE ); + rc = SQLITE_OK; +out: if( pStmt != NULL ){ sqlite3_finalize(pStmt); } return rc; } +int vectorIndexTryGetParametersFromBinFormat(sqlite3 *db, const char *zSql, const char *zIdxName, VectorIdxParams *pParams) { + int rc = SQLITE_OK; + sqlite3_stmt *pStmt = NULL; + int nBinSize; + + vectorIdxParamsInit(pParams, NULL, 0); + + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + if( rc != SQLITE_OK ){ + goto out; + } + rc = sqlite3_bind_text(pStmt, 1, zIdxName, -1, SQLITE_STATIC); + if( rc != SQLITE_OK ){ + goto out; + } + if( sqlite3_step(pStmt) != SQLITE_ROW ){ + rc = SQLITE_ERROR; + goto out; + } + assert( sqlite3_column_type(pStmt, 0) == SQLITE_BLOB ); + nBinSize = sqlite3_column_bytes(pStmt, 0); + if( nBinSize > VECTOR_INDEX_PARAMS_BUF_SIZE ){ + rc = SQLITE_ERROR; + goto out; + } + vectorIdxParamsInit(pParams, (u8*)sqlite3_column_blob(pStmt, 0), nBinSize); + assert( sqlite3_step(pStmt) == SQLITE_DONE ); + rc = SQLITE_OK; +out: + if( pStmt != NULL ){ + sqlite3_finalize(pStmt); + } + return rc; +} + +int vectorIndexGetParameters( + sqlite3 *db, + const char *zIdxName, + VectorIdxParams *pParams +) { + int rc = SQLITE_OK; + + static const char* zSelectSql = "SELECT metadata FROM " VECTOR_INDEX_GLOBAL_META_TABLE " WHERE name = ?"; + // zSelectSqlPekkaLegacy handles the case when user created DB before 04 July 2024 (https://discord.com/channels/933071162680958986/1225560924526477322/1258367912402489397) + // when instead of table with binary parameters rigid schema was used for index settings + // we should drop this eventually - but for now we postponed this decision + static const char* zSelectSqlPekkaLegacy = "SELECT vector_type, block_size, dims, distance_ops FROM libsql_vector_index WHERE name = ?"; + rc = vectorIndexTryGetParametersFromBinFormat(db, zSelectSql, zIdxName, pParams); + if( rc == SQLITE_OK ){ + return SQLITE_OK; + } + rc = vectorIndexTryGetParametersFromTableFormat(db, zSelectSqlPekkaLegacy, zIdxName, pParams); + if( rc == SQLITE_OK ){ + return SQLITE_OK; + } + return SQLITE_ERROR; +} int vectorIndexDrop(sqlite3 *db, const char *zDbSName, const char *zIdxName) { // we want to try delete all traces of index on every attempt // this is done to prevent unrecoverable situations where index were dropped but index parameters deletion failed and second attempt will fail on first step int rcIdx, rcParams; + if( IsVacuum(db) ){ + return SQLITE_OK; + } + assert( zDbSName != NULL ); rcIdx = diskAnnDropIndex(db, zDbSName, zIdxName); @@ -708,15 +768,42 @@ int vectorIndexDrop(sqlite3 *db, const char *zDbSName, const char *zIdxName) { int vectorIndexClear(sqlite3 *db, const char *zDbSName, const char *zIdxName) { assert( zDbSName != NULL ); + + if( IsVacuum(db) ){ + return SQLITE_OK; + } + return diskAnnClearIndex(db, zDbSName, zIdxName); } -int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const IdList *pUsing) { +/* + * vectorIndexCreate analyzes any index creation expression and create vector index if needed + * it tolerates the situation when insert into VECTOR_INDEX_GLOBAL_META_TABLE failed with conflict + * this made intentionally in order to natively support upload of SQLite dumps + * + * dump populates tables first and create indices after + * so we must omit them because shadow tables already filled + * + * 1. in case of any error :-1 returned (and pParse errMsg is populated with some error message) + * 2. if vector index must not be created : 0 returned + * 3. if vector index must be created but refill must be skipped : 1 returned + * 4. if vector index must be created and refilled from base table: 2 returned +*/ +int vectorIndexCreate(Parse *pParse, const Index *pIdx, const char *zDbSName, const IdList *pUsing) { + static const int CREATE_FAIL = -1; + static const int CREATE_IGNORE = 0; + static const int CREATE_OK_SKIP_REFILL = 1; + static const int CREATE_OK = 2; + int i, rc = SQLITE_OK; int dims, type; int hasLibsqlVectorIdxFn = 0, hasCollation = 0; const char *pzErrMsg; + if( IsVacuum(pParse->db) ){ + return CREATE_IGNORE; + } + assert( zDbSName != NULL ); sqlite3 *db = pParse->db; @@ -732,7 +819,7 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id if( pParse->eParseMode ){ // scheme can be re-parsed by SQLite for different reasons (for example, to check schema after // ALTER COLUMN statements) - so we must skip creation in such cases - goto ignored; + return CREATE_IGNORE; } // backward compatibility: preserve old indices with deprecated syntax but forbid creation of new indices with this syntax @@ -742,15 +829,15 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id } else { sqlite3ErrorMsg(pParse, "USING syntax is deprecated, please use plain CREATE INDEX: CREATE INDEX xxx ON yyy ( " VECTOR_INDEX_MARKER_FUNCTION "(zzz) )"); } - return SQLITE_ERROR; + return CREATE_FAIL; } if( db->init.busy == 1 && pUsing != NULL ){ - goto succeed; + return CREATE_OK; } // vector index must have expressions over column if( pIdx->aColExpr == NULL ) { - goto ignored; + return CREATE_IGNORE; } pListItem = pIdx->aColExpr->a; @@ -765,20 +852,20 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id } } if( !hasLibsqlVectorIdxFn ) { - goto ignored; + return CREATE_IGNORE; } if( hasCollation ){ sqlite3ErrorMsg(pParse, "vector index can't have collation"); - return SQLITE_ERROR; + return CREATE_FAIL; } if( pIdx->aColExpr->nExpr != 1 ) { sqlite3ErrorMsg(pParse, "vector index must contain exactly one column wrapped into the " VECTOR_INDEX_MARKER_FUNCTION " function"); - return SQLITE_ERROR; + return CREATE_FAIL; } // we are able to support this but I doubt this works for now - more polishing required to make this work if( pIdx->pPartIdxWhere != NULL ) { sqlite3ErrorMsg(pParse, "partial vector index is not supported"); - return SQLITE_ERROR; + return CREATE_FAIL; } pArgsList = pIdx->aColExpr->a[0].pExpr->x.pList; @@ -786,61 +873,65 @@ int vectorIndexCreate(Parse *pParse, Index *pIdx, const char *zDbSName, const Id if( pArgsList->nExpr < 1 ){ sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " must contain at least one argument"); - return SQLITE_ERROR; + return CREATE_FAIL; } if( pListItem[0].pExpr->op != TK_COLUMN ) { sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " first argument must be a column token"); - return SQLITE_ERROR; + return CREATE_FAIL; } iEmbeddingColumn = pListItem[0].pExpr->iColumn; if( iEmbeddingColumn < 0 ) { sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " first argument must be column with vector type"); - return SQLITE_ERROR; + return CREATE_FAIL; } assert( iEmbeddingColumn >= 0 && iEmbeddingColumn < pTable->nCol ); zEmbeddingColumnTypeName = sqlite3ColumnType(&pTable->aCol[iEmbeddingColumn], ""); if( vectorIdxParseColumnType(zEmbeddingColumnTypeName, &type, &dims, &pzErrMsg) != 0 ){ sqlite3ErrorMsg(pParse, "%s: %s", pzErrMsg, zEmbeddingColumnTypeName); - return SQLITE_ERROR; + return CREATE_FAIL; } // schema is locked while db is initializing and we need to just proceed here if( db->init.busy == 1 ){ - goto succeed; + return CREATE_OK; } rc = initVectorIndexMetaTable(db, zDbSName); if( rc != SQLITE_OK ){ - return rc; + sqlite3ErrorMsg(pParse, "failed to init vector index meta table: %s", sqlite3_errmsg(db)); + return CREATE_FAIL; } rc = parseVectorIdxParams(pParse, &idxParams, type, dims, pListItem + 1, pArgsList->nExpr - 1); if( rc != SQLITE_OK ){ - return rc; + sqlite3ErrorMsg(pParse, "failed to parse vector idx params"); + return CREATE_FAIL; } if( vectorIdxKeyGet(pTable, &idxKey, &pzErrMsg) != 0 ){ sqlite3ErrorMsg(pParse, "failed to detect underlying table key: %s", pzErrMsg); - return SQLITE_ERROR; + return CREATE_FAIL; } if( idxKey.nKeyColumns != 1 ){ sqlite3ErrorMsg(pParse, "vector index for tables without ROWID and composite primary key are not supported"); - return SQLITE_ERROR; + return CREATE_FAIL; } rc = diskAnnCreateIndex(db, zDbSName, pIdx->zName, &idxKey, &idxParams); if( rc != SQLITE_OK ){ sqlite3ErrorMsg(pParse, "unable to initialize diskann vector index"); - return rc; + return CREATE_FAIL; } rc = insertIndexParameters(db, zDbSName, pIdx->zName, &idxParams); + if( rc == SQLITE_CONSTRAINT ){ + // we are violating unique constraint here which means that someone inserted parameters in the table before us + // taking aside corruption scenarios, this can be in case of loading dump (because tables and data are loaded before indices) + // this case is valid and we must proceed with index creating but avoid index-refill step as it is already filled + return CREATE_OK_SKIP_REFILL; + } if( rc != SQLITE_OK ){ sqlite3ErrorMsg(pParse, "unable to update global metadata table"); - return rc; + return CREATE_FAIL; } -succeed: - pIdx->idxType = SQLITE_IDXTYPE_VECTOR; - return SQLITE_OK; -ignored: - return SQLITE_OK; + return CREATE_OK; } int vectorIndexSearch(sqlite3 *db, const char* zDbSName, int argc, sqlite3_value **argv, VectorOutRows *pRows, char **pzErrMsg) { @@ -854,6 +945,7 @@ int vectorIndexSearch(sqlite3 *db, const char* zDbSName, int argc, sqlite3_value VectorIdxParams idxParams; vectorIdxParamsInit(&idxParams, NULL, 0); + assert( !IsVacuum(db) ); assert( zDbSName != NULL ); if( argc != 3 ){ @@ -936,6 +1028,10 @@ int vectorIndexInsert( int rc; VectorInRow vectorInRow; + if( IsVacuum(pCur->db) ){ + return SQLITE_OK; + } + rc = vectorInRowAlloc(pCur->db, pRecord, &vectorInRow, pzErrMsg); if( rc != SQLITE_OK ){ return rc; @@ -954,6 +1050,11 @@ int vectorIndexDelete( char **pzErrMsg ){ VectorInRow payload; + + if( IsVacuum(pCur->db) ){ + return SQLITE_OK; + } + payload.pVector = NULL; payload.nKeys = r->nField - 1; payload.pKeyValues = r->aMem + 1; diff --git a/libsql-sqlite3/src/vectorIndexInt.h b/libsql-sqlite3/src/vectorIndexInt.h index 34b1a8ab24..a2c1ccfb17 100644 --- a/libsql-sqlite3/src/vectorIndexInt.h +++ b/libsql-sqlite3/src/vectorIndexInt.h @@ -224,14 +224,14 @@ typedef struct VectorIdxCursor VectorIdxCursor; int vectorIdxParseColumnType(const char *, int *, int *, const char **); -int vectorIndexCreate(Parse*, Index*, const char *, const IdList*); +int vectorIndexCreate(Parse*, const Index*, const char *, const IdList*); int vectorIndexClear(sqlite3 *, const char *, const char *); int vectorIndexDrop(sqlite3 *, const char *, const char *); +int vectorIndexSearch(sqlite3 *, const char *, int, sqlite3_value **, VectorOutRows *, char **); int vectorIndexCursorInit(sqlite3 *, const char *, const char *, VectorIdxCursor **); void vectorIndexCursorClose(sqlite3 *, VectorIdxCursor *); int vectorIndexInsert(VectorIdxCursor *, const UnpackedRecord *, char **); int vectorIndexDelete(VectorIdxCursor *, const UnpackedRecord *, char **); -int vectorIndexSearch(sqlite3 *, const char *, int, sqlite3_value **, VectorOutRows *, char **); #ifdef __cplusplus } /* end of the 'extern "C"' block */ diff --git a/libsql-sqlite3/test/libsql_vector_index.test b/libsql-sqlite3/test/libsql_vector_index.test index f46e2a0799..01498121d9 100644 --- a/libsql-sqlite3/test/libsql_vector_index.test +++ b/libsql-sqlite3/test/libsql_vector_index.test @@ -225,8 +225,10 @@ do_execsql_test vector-vacuum { CREATE TABLE t_vacuum ( emb FLOAT32(2) ); INSERT INTO t_vacuum VALUES (vector('[1,2]')), (vector('[3,4]')); CREATE INDEX t_vacuum_idx ON t_vacuum(libsql_vector_idx(emb)); - VACUUM INTO ':memory:'; -} {} + VACUUM; + SELECT COUNT(*) FROM t_vacuum; + SELECT COUNT(*) FROM t_vacuum_idx_shadow; +} {2 2} proc error_messages {sql} { set ret ""