Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
- Embedding + text storage in SQLite (BLOB)
- Retrieve top-K most similar texts to a query using cosine similarity
- Memory-safe and 100% Ruby compatible
- ⭐️ Efficient variable-width allocation with embeddable typed data for compact memory usage
- Plug-and-play for RAG, semantic search, and retrieval AI

---
Expand Down Expand Up @@ -184,7 +185,7 @@ The library uses a **hybrid memory-storage approach**:

1. **In-Memory Processing**: All vector operations (cosine similarity calculations, embedding manipulations) happen entirely in memory using optimized C code
2. **Persistent Storage**: SQLite serves as a simple, portable storage layer for embeddings and associated text
3. **Dynamic C Objects**: Embeddings are managed as native C structures with automatic memory management
3. **Dynamic C Objects**: Embeddings are managed as native C structures with automatic memory management using variable-width allocation for each vector

### Key Components

Expand Down
19 changes: 7 additions & 12 deletions ext/rag_embeddings/embedding.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,6 @@ typedef struct {
} embedding_t;

// Callback for freeing memory when Ruby's GC collects our object
static void embedding_free(void *ptr) {
if (ptr) {
xfree(ptr); // Ruby's memory free function (with null check)
}
}

// Callback to report memory usage to Ruby's GC
static size_t embedding_memsize(const void *ptr) {
Expand All @@ -27,9 +22,9 @@ static size_t embedding_memsize(const void *ptr) {
// Tells Ruby how to manage our C data structure
static const rb_data_type_t embedding_type = {
"RagEmbeddings/Embedding", // Type name
{0, embedding_free, embedding_memsize,}, // Functions: mark, free, size
{0, 0, embedding_memsize,}, // No free needed when embedded
0, 0, // Parent type, data
RUBY_TYPED_FREE_IMMEDIATELY // Flags for immediate cleanup
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE
};

// Class method: RagEmbeddings::Embedding.from_array([1.0, 2.0, ...])
Expand All @@ -51,8 +46,10 @@ static VALUE embedding_from_array(VALUE klass, VALUE rb_array) {

uint16_t dim = (uint16_t)array_len;

// Allocate memory for struct + array of floats
embedding_t *ptr = xmalloc(sizeof(embedding_t) + dim * sizeof(float));
// Allocate Ruby object with embedded memory for the vector
size_t total = sizeof(embedding_t) + dim * sizeof(float);
VALUE obj = rb_data_typed_object_zalloc(klass, total, &embedding_type);
embedding_t *ptr = (embedding_t *)RTYPEDDATA_GET_DATA(obj);
ptr->dim = dim;

// Copy values from Ruby array to our C array
Expand All @@ -63,15 +60,13 @@ static VALUE embedding_from_array(VALUE klass, VALUE rb_array) {

// Ensure the value is numeric
if (!RB_FLOAT_TYPE_P(val) && !RB_INTEGER_TYPE_P(val)) {
xfree(ptr); // Clean up allocated memory before raising exception
rb_raise(rb_eTypeError, "Array element at index %d is not numeric", i);
}

ptr->values[i] = (float)NUM2DBL(val);
}

// Wrap our C struct in a Ruby object
VALUE obj = TypedData_Wrap_Struct(klass, &embedding_type, ptr);
// obj already wraps the allocated memory
return obj;
}

Expand Down
2 changes: 1 addition & 1 deletion lib/rag_embeddings/version.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module RagEmbeddings
VERSION = "0.2.2".freeze
VERSION = "0.3.0".freeze
end
2 changes: 2 additions & 0 deletions rag_embeddings.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ Gem::Specification.new do |spec|
spec.metadata["homepage_uri"] = spec.homepage
spec.metadata["source_code_uri"] = "https://github.com/marcomd/rag_embeddings"

spec.required_ruby_version = '>= 3.3'

spec.add_runtime_dependency "sqlite3"
spec.add_runtime_dependency "langchainrb"
spec.add_runtime_dependency "faraday"
Expand Down