From af92207932db36dd59cb36cc7bda3eb9114d7b08 Mon Sep 17 00:00:00 2001 From: Marco Mastrodonato Date: Sun, 15 Jun 2025 11:15:50 +0200 Subject: [PATCH 1/4] Implemented Variable Width Allocation - Allocated embeddings with `rb_data_typed_object_zalloc` and marked the type as `RUBY_TYPED_EMBEDDABLE` in the C extension --- README.md | 1 + ext/rag_embeddings/embedding.c | 19 +++++++------------ lib/rag_embeddings/version.rb | 2 +- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 8176e0c..e8659ae 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ - Embedding + text storage in SQLite (BLOB) - Retrieve top-K most similar texts to a query using cosine similarity - Memory-safe and 100% Ruby compatible +- ⭐️ Efficient variable-width allocation with embeddable typed data for compact memory usage - Plug-and-play for RAG, semantic search, and retrieval AI --- diff --git a/ext/rag_embeddings/embedding.c b/ext/rag_embeddings/embedding.c index dc683a5..c2a1386 100644 --- a/ext/rag_embeddings/embedding.c +++ b/ext/rag_embeddings/embedding.c @@ -11,11 +11,6 @@ typedef struct { } embedding_t; // Callback for freeing memory when Ruby's GC collects our object -static void embedding_free(void *ptr) { - if (ptr) { - xfree(ptr); // Ruby's memory free function (with null check) - } -} // Callback to report memory usage to Ruby's GC static size_t embedding_memsize(const void *ptr) { @@ -27,9 +22,9 @@ static size_t embedding_memsize(const void *ptr) { // Tells Ruby how to manage our C data structure static const rb_data_type_t embedding_type = { "RagEmbeddings/Embedding", // Type name - {0, embedding_free, embedding_memsize,}, // Functions: mark, free, size + {0, 0, embedding_memsize,}, // No free needed when embedded 0, 0, // Parent type, data - RUBY_TYPED_FREE_IMMEDIATELY // Flags for immediate cleanup + RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE }; // Class method: RagEmbeddings::Embedding.from_array([1.0, 2.0, ...]) @@ -51,8 +46,10 @@ static VALUE embedding_from_array(VALUE klass, VALUE rb_array) { uint16_t dim = (uint16_t)array_len; - // Allocate memory for struct + array of floats - embedding_t *ptr = xmalloc(sizeof(embedding_t) + dim * sizeof(float)); + // Allocate Ruby object with embedded memory for the vector + size_t total = sizeof(embedding_t) + dim * sizeof(float); + VALUE obj = rb_data_typed_object_zalloc(klass, total, &embedding_type); + embedding_t *ptr = (embedding_t *)RTYPEDDATA_GET_DATA(obj); ptr->dim = dim; // Copy values from Ruby array to our C array @@ -63,15 +60,13 @@ static VALUE embedding_from_array(VALUE klass, VALUE rb_array) { // Ensure the value is numeric if (!RB_FLOAT_TYPE_P(val) && !RB_INTEGER_TYPE_P(val)) { - xfree(ptr); // Clean up allocated memory before raising exception rb_raise(rb_eTypeError, "Array element at index %d is not numeric", i); } ptr->values[i] = (float)NUM2DBL(val); } - // Wrap our C struct in a Ruby object - VALUE obj = TypedData_Wrap_Struct(klass, &embedding_type, ptr); + // obj already wraps the allocated memory return obj; } diff --git a/lib/rag_embeddings/version.rb b/lib/rag_embeddings/version.rb index cc509bb..298fe97 100644 --- a/lib/rag_embeddings/version.rb +++ b/lib/rag_embeddings/version.rb @@ -1,3 +1,3 @@ module RagEmbeddings - VERSION = "0.2.1" + VERSION = "0.3.0" end \ No newline at end of file From 10316a4843e5e94782208b366260de75a780b05d Mon Sep 17 00:00:00 2001 From: Marco Mastrodonato Date: Sun, 15 Jun 2025 13:39:02 +0200 Subject: [PATCH 2/4] Fixed VERSION --- lib/rag_embeddings/version.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/rag_embeddings/version.rb b/lib/rag_embeddings/version.rb index 298fe97..7b70ac6 100644 --- a/lib/rag_embeddings/version.rb +++ b/lib/rag_embeddings/version.rb @@ -1,3 +1,3 @@ module RagEmbeddings - VERSION = "0.3.0" + VERSION = "0.3.0".freeze end \ No newline at end of file From 112d5ab78d8265bf1a2ca9dbe0be29509ea1f488 Mon Sep 17 00:00:00 2001 From: Marco Mastrodonato Date: Sun, 15 Jun 2025 13:41:28 +0200 Subject: [PATCH 3/4] Added required_ruby_version --- rag_embeddings.gemspec | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rag_embeddings.gemspec b/rag_embeddings.gemspec index 1db5139..5c9b8d3 100644 --- a/rag_embeddings.gemspec +++ b/rag_embeddings.gemspec @@ -18,6 +18,8 @@ Gem::Specification.new do |spec| spec.metadata["homepage_uri"] = spec.homepage spec.metadata["source_code_uri"] = "https://github.com/marcomd/rag_embeddings" + spec.required_ruby_version = '>= 3.3' + spec.add_runtime_dependency "sqlite3" spec.add_runtime_dependency "langchainrb" spec.add_runtime_dependency "faraday" From 7077238027546582f53fa66a50a749880b5e2843 Mon Sep 17 00:00:00 2001 From: Marco Mastrodonato Date: Sun, 15 Jun 2025 13:42:46 +0200 Subject: [PATCH 4/4] Updated README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e8659ae..bbfc363 100644 --- a/README.md +++ b/README.md @@ -185,7 +185,7 @@ The library uses a **hybrid memory-storage approach**: 1. **In-Memory Processing**: All vector operations (cosine similarity calculations, embedding manipulations) happen entirely in memory using optimized C code 2. **Persistent Storage**: SQLite serves as a simple, portable storage layer for embeddings and associated text -3. **Dynamic C Objects**: Embeddings are managed as native C structures with automatic memory management +3. **Dynamic C Objects**: Embeddings are managed as native C structures with automatic memory management using variable-width allocation for each vector ### Key Components