Bug 1953266 - Vendor sqlite-vec extension for Firefox. r=asuth,glandium,tjr

Sqlite-vec is a SQLite extension that allows to store and retrieve
vectors and various distance between them. It is a bring your own
vectors kind of solution.

Differential Revision: https://phabricator.services.mozilla.com/D244347
This commit is contained in:
Marco Bonardo
2025-04-22 16:38:14 +00:00
parent e5239ed2cc
commit a7368f6216
13 changed files with 10067 additions and 0 deletions

View File

@@ -101,6 +101,11 @@ if CONFIG["MOZ_THUNDERBIRD"] or CONFIG["MOZ_SUITE"]:
if not CONFIG["MOZ_AVOID_DISK_REMNANT_ON_CLOSE"]:
DEFINES["MOZ_SQLITE_PERSIST_AUXILIARY_FILES"] = 1
# The vector extension is enabled only for Firefox, it could be opened up
# to other products in the future.
if CONFIG["MOZ_BUILD_APP"] == "browser":
DEFINES["MOZ_SQLITE_VEC0_EXT"] = 1
LOCAL_INCLUDES += [
"/dom/base",
"/third_party/sqlite3/ext",

View File

@@ -2786,6 +2786,9 @@ Connection::LoadExtension(const nsACString& aExtensionName,
static constexpr nsLiteralCString sSupportedExtensions[] = {
// clang-format off
"fts5"_ns,
#ifdef MOZ_SQLITE_VEC0_EXT
"vec"_ns,
#endif
// clang-format on
};
if (std::find(std::begin(sSupportedExtensions),

View File

@@ -0,0 +1,151 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// This file tests support for the sqlite-vec extension.
function tensorToBlob(tensor) {
return new Uint8ClampedArray(new Float32Array(tensor).buffer);
}
add_setup(async function () {
cleanup();
});
add_task(async function test_synchronous() {
info("Testing synchronous connection");
let conn = getOpenedUnsharedDatabase();
Assert.throws(
() =>
conn.executeSimpleSQL(
`CREATE VIRTUAL TABLE test USING vec0(
embedding FLOAT[4]
);`
),
/NS_ERROR_FAILURE/,
"Should not be able to use vec without loading the extension"
);
await loadExtension(conn);
conn.executeSimpleSQL(
`
CREATE VIRTUAL TABLE test USING vec0(
embedding FLOAT[4]
)
`
);
let stmt = conn.createStatement(
`
INSERT INTO test(rowid, embedding)
VALUES (1, :vector)
`
);
stmt.bindBlobByName("vector", tensorToBlob([0.3, 0.3, 0.3, 0.3]));
stmt.executeStep();
stmt.reset();
stmt.finalize();
stmt = conn.createStatement(
`
SELECT
rowid,
distance
FROM test
WHERE embedding MATCH :vector
ORDER BY distance
LIMIT 1
`
);
stmt.bindBlobByName("vector", tensorToBlob([0.3, 0.3, 0.3, 0.3]));
Assert.ok(stmt.executeStep());
Assert.equal(stmt.getInt32(0), 1);
Assert.equal(stmt.getDouble(1), 0.0);
stmt.reset();
stmt.finalize();
cleanup();
});
add_task(async function test_asynchronous() {
info("Testing asynchronous connection");
let conn = await openAsyncDatabase(getTestDB());
await Assert.rejects(
executeSimpleSQLAsync(
conn,
`
CREATE VIRTUAL TABLE test USING vec0(
embedding float[4]
)
`
),
err => err.message.startsWith("no such module"),
"Should not be able to use vec without loading the extension"
);
await loadExtension(conn);
await executeSimpleSQLAsync(
conn,
`
CREATE VIRTUAL TABLE test USING vec0(
embedding float[4]
)
`
);
await asyncClose(conn);
await IOUtils.remove(getTestDB().path, { ignoreAbsent: true });
});
add_task(async function test_clone() {
info("Testing cloning synchronous connection loads extensions in clone");
let conn1 = getOpenedUnsharedDatabase();
await loadExtension(conn1);
let conn2 = conn1.clone(false);
conn2.executeSimpleSQL(
`
CREATE VIRTUAL TABLE test USING vec0(
embedding float[4]
)
`
);
conn2.close();
cleanup();
});
add_task(async function test_asyncClone() {
info("Testing asynchronously cloning connection loads extensions in clone");
let conn1 = getOpenedUnsharedDatabase();
await loadExtension(conn1);
let conn2 = await asyncClone(conn1, false);
await executeSimpleSQLAsync(
conn2,
`
CREATE VIRTUAL TABLE test USING vec0(
embedding float[4]
)
`
);
await asyncClose(conn2);
await asyncClose(conn1);
await IOUtils.remove(getTestDB().path, { ignoreAbsent: true });
});
async function loadExtension(conn, ext = "vec") {
await new Promise((resolve, reject) => {
conn.loadExtension(ext, status => {
if (Components.isSuccessCode(status)) {
resolve();
} else {
reject(status);
}
});
});
}

View File

@@ -79,6 +79,9 @@ skip-if = ["appname != 'thunderbird' && appname != 'seamonkey'"]
["test_storage_ext_fts5.js"]
["test_storage_ext_vec.js"]
run-if = ["buildapp == 'browser'"]
["test_storage_function.js"]
["test_storage_progresshandler.js"]

View File

@@ -3,3 +3,4 @@
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
sqlite3_fts5_init
sqlite3_vec_init

View File

@@ -18,6 +18,7 @@ else:
SOURCES += [
"fts5.c",
"sqlite-vec/sqlite-vec.c",
]
if CONFIG["OS_TARGET"] == "Linux" or CONFIG["OS_TARGET"] == "Android":

View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2024 Alex Garcia
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -0,0 +1 @@
0.1.7-alpha.2

View File

@@ -0,0 +1,50 @@
schema: 1
bugzilla:
product: Toolkit
component: Storage
origin:
name: sqlite-vec
description: >
An extremely small, "fast enough" vector search SQLite extension that runs
anywhere!
url: https://github.com/asg017/sqlite-vec
release: v0.1.7-alpha.2 (2025-01-10T14:54:13-08:00).
revision: v0.1.7-alpha.2
license: MIT
license-file: LICENSE-MIT
notes: >
Please check for eventual limitations of this extension on the Github page
official documentation.
vendoring:
url: https://github.com/asg017/sqlite-vec
source-hosting: github
tracking: tag
vendor-directory: third_party/sqlite3/ext/sqlite-vec
skip-vendoring-steps:
- update-moz-build
exclude:
- "**"
include:
- LICENSE-MIT
- VERSION
- sqlite-vec.c
keep:
- moz.yaml
- vendor.sh
update-actions:
- action: move-file
from: '{vendor_dir}/VERSION'
to: '{vendor_dir}/VERSION.txt'
# Linking multiple extensions in the same library ends up defining
# sqlite3_api multiple times, INIT3 is just an extern declaration.
- action: replace-in-file
pattern: 'SQLITE_EXTENSION_INIT1'
with: 'SQLITE_EXTENSION_INIT3'
file: '{vendor_dir}/sqlite-vec.c'
- action: run-script
script: '{yaml_dir}/vendor.sh'
cwd: '{vendor_dir}'

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,41 @@
#ifndef SQLITE_VEC_H
#define SQLITE_VEC_H
#ifndef SQLITE_CORE
#include "sqlite3ext.h"
#else
#include "sqlite3.h"
#endif
#ifdef SQLITE_VEC_STATIC
#define SQLITE_VEC_API
#else
#ifdef _WIN32
#define SQLITE_VEC_API __declspec(dllexport)
#else
#define SQLITE_VEC_API
#endif
#endif
#define SQLITE_VEC_VERSION "v0.1.7-alpha.2"
// TODO rm
#define SQLITE_VEC_DATE "2025-01-10T23:18:50Z+0000"
#define SQLITE_VEC_SOURCE "bdc336d1cf2a2222b6227784bd30c6631603279b"
#define SQLITE_VEC_VERSION_MAJOR 0
#define SQLITE_VEC_VERSION_MINOR 1
#define SQLITE_VEC_VERSION_PATCH 7
#ifdef __cplusplus
extern "C" {
#endif
SQLITE_VEC_API int sqlite3_vec_init(sqlite3 *db, char **pzErrMsg,
const sqlite3_api_routines *pApi);
#ifdef __cplusplus
} /* end of the 'extern "C"' block */
#endif
#endif /* ifndef SQLITE_VEC_H */

View File

@@ -0,0 +1,28 @@
#!/bin/bash
# IMPORTANT: use `./mach vendor third_party/sqlite3/ext-sqlite-vec.yaml`,
# don't invoke this script directly.
# Script to download header from sqlite-vec extension amalgamation.
set -e
# Retrieve latest version value.
echo ""
echo "Get extension version."
version=`cat VERSION.txt`
echo "Github version: $version";
# Retrieve files and update sources.
echo ""
echo "Retrieving amalgamation..."
amalgamation_url=""https://github.com/asg017/sqlite-vec/releases/download/v$version/sqlite-vec-$version-amalgamation.zip""
wget -t 3 --retry-connrefused -w 5 --random-wait $amalgamation_url -qO amalgamation.zip
echo "Unpacking source files..."
unzip -p "amalgamation.zip" "sqlite-vec.h" > "sqlite-vec.h"
rm -f "amalgamation.zip"
echo ""
echo "Update complete, please commit and check in your changes."
echo ""

11
third_party/sqlite3/readme.txt vendored Normal file
View File

@@ -0,0 +1,11 @@
To vendor a new version of SQLite:
./mach vendor third_party/sqlite3/moz.yaml
To vendor new versions of SQLite extensions, check for moz.yaml
files in ext/ subfolders. For example, to vendor sqlite-vec:
./mach vendor third_party/sqlite3/ext/sqlite-vec/moz.yaml
Vendoring tracks GitHub tags, specific tags can be targeted
using the --revision option.