Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,15 @@ puts "Most similar text: #{result.first[1]}, score: #{result.first[2]}"

## 🔢 Embeddings dimension

The size of embeddings is dynamic and fits with what the LLM provides.
In the previous version the size of embeddings was dynamic. Now the size of embeddings is static to use the RUBY_TYPED_EMBEDDABLE flag

- the size of embeddings is currently set to 3072 and it's defined in two places:
- c side is defined in `ext/rag_embeddings/embedding_config.h`
- ruby side is defined in `lib/rag_embeddings/config.rb`

Remember to recompile the c extension after changing the size:

`rake compile`

## 👷 Requirements

Expand Down
Binary file modified ext/rag_embeddings/embedding.bundle
Binary file not shown.
Binary file not shown.
86 changes: 55 additions & 31 deletions ext/rag_embeddings/embedding.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,102 +2,125 @@
#include <stdint.h> // For integer types like uint16_t
#include <stdlib.h> // For memory allocation functions
#include <math.h> // For math functions like sqrt
#include "embedding_config.h" // Import the configuration

// Main data structure for storing embeddings
// Flexible array member (values[]) allows variable length arrays
// Main data structure for storing embeddings with fixed size
typedef struct {
uint16_t dim; // Dimension of the embedding vector
float values[]; // Flexible array member to store the actual values
uint16_t dim; // Actual dimension used (can be <= EMBEDDING_DIMENSION)
float values[EMBEDDING_DIMENSION]; // Fixed-size array for embedding values
} embedding_t;

// Callback for freeing memory when Ruby's GC collects our object
static void embedding_free(void *ptr) {
xfree(ptr); // Ruby's memory free function
// With RUBY_TYPED_EMBEDDABLE and TypedData_Make_Struct,
// Ruby handles the deallocation automatically
// No need to explicitly free the memory
}

// Callback to report memory usage to Ruby's GC
static size_t embedding_memsize(const void *ptr) {
const embedding_t *emb = (const embedding_t *)ptr;
return emb ? sizeof(embedding_t) + emb->dim * sizeof(float) : 0;
// With embedded objects, we report the full struct size
return sizeof(embedding_t);
}

// Type information for Ruby's GC:
// Tells Ruby how to manage our C data structure
// Type information for Ruby's GC with embedding support
static const rb_data_type_t embedding_type = {
"RagEmbeddings/Embedding", // Type name
{0, embedding_free, embedding_memsize,}, // Functions: mark, free, size
0, 0, // Parent type, data
RUBY_TYPED_FREE_IMMEDIATELY // Flags
"RagEmbeddings/Embedding", // Type name
{0, embedding_free, embedding_memsize,}, // Functions: mark, free, size
0, 0, // Parent type, data
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE // Flags with embedding!
};

// Class method: RagEmbeddings::Embedding.from_array([1.0, 2.0, ...])
// Creates a new embedding from a Ruby array
// Creates a new embedding from a Ruby array - NOW FASTER!
static VALUE embedding_from_array(VALUE klass, VALUE rb_array) {
Check_Type(rb_array, T_ARRAY); // Ensure argument is a Ruby array
uint16_t dim = (uint16_t)RARRAY_LEN(rb_array);
long array_len = RARRAY_LEN(rb_array);

// Check if the array size exceeds our maximum dimension
if (array_len > EMBEDDING_DIMENSION) {
rb_raise(rb_eArgError, "Embedding dimension %ld exceeds maximum %d",
array_len, EMBEDDING_DIMENSION);
}

uint16_t dim = (uint16_t)array_len;

// With RUBY_TYPED_EMBEDDABLE, use TypedData_Make_Struct
// This automatically allocates Ruby object + embedded data space
embedding_t *ptr;
VALUE obj = TypedData_Make_Struct(klass, embedding_t, &embedding_type, ptr);

// Allocate memory for struct + array of floats
embedding_t *ptr = xmalloc(sizeof(embedding_t) + dim * sizeof(float));
ptr->dim = dim;

// Copy values from Ruby array to our C array
for (int i = 0; i < dim; ++i)
ptr->values[i] = (float)NUM2DBL(rb_ary_entry(rb_array, i));

// Wrap our C struct in a Ruby object
VALUE obj = TypedData_Wrap_Struct(klass, &embedding_type, ptr);
// Zero out unused slots for consistency
for (int i = dim; i < EMBEDDING_DIMENSION; ++i)
ptr->values[i] = 0.0f;

return obj;
}

// Class method to get the maximum supported dimension
static VALUE embedding_max_dimension(VALUE klass) {
return INT2NUM(EMBEDDING_DIMENSION);
}

// Instance method: embedding.dim
// Returns the dimension of the embedding
// Returns the actual dimension of the embedding
static VALUE embedding_dim(VALUE self) {
embedding_t *ptr;
// Get the C struct from the Ruby object
// Get the C struct from the Ruby object - NOW FASTER!
TypedData_Get_Struct(self, embedding_t, &embedding_type, ptr);
return INT2NUM(ptr->dim);
}

// Instance method: embedding.to_a
// Converts the embedding back to a Ruby array
// Converts the embedding back to a Ruby array (only actual dimensions)
static VALUE embedding_to_a(VALUE self) {
embedding_t *ptr;
TypedData_Get_Struct(self, embedding_t, &embedding_type, ptr);

// Create a new Ruby array with pre-allocated capacity
// Create a new Ruby array with pre-allocated capacity for actual dimension
VALUE arr = rb_ary_new2(ptr->dim);

// Copy each float value to the Ruby array
// Copy only the used float values to the Ruby array - FASTER MEMORY ACCESS!
for (int i = 0; i < ptr->dim; ++i)
rb_ary_push(arr, DBL2NUM(ptr->values[i]));

return arr;
}

// Instance method: embedding.cosine_similarity(other_embedding)
// Calculate cosine similarity between two embeddings
// Calculate cosine similarity - MUCH FASTER with embedded data!
static VALUE embedding_cosine_similarity(VALUE self, VALUE other) {
embedding_t *a, *b;
// Get C structs for both embeddings
// Get C structs for both embeddings - direct access, no pointer deref!
TypedData_Get_Struct(self, embedding_t, &embedding_type, a);
TypedData_Get_Struct(other, embedding_t, &embedding_type, b);

// Ensure dimensions match
if (a->dim != b->dim)
rb_raise(rb_eArgError, "Dimension mismatch");
rb_raise(rb_eArgError, "Dimension mismatch: %d vs %d", a->dim, b->dim);

float dot = 0.0f, norm_a = 0.0f, norm_b = 0.0f;

// Calculate dot product and vector magnitudes
// Better cache locality = faster calculations!
for (int i = 0; i < a->dim; ++i) {
dot += a->values[i] * b->values[i]; // Dot product
norm_a += a->values[i] * a->values[i]; // Square of magnitude for vector a
norm_b += b->values[i] * b->values[i]; // Square of magnitude for vector b
float val_a = a->values[i];
float val_b = b->values[i];
dot += val_a * val_b; // Dot product
norm_a += val_a * val_a; // Square of magnitude for vector a
norm_b += val_b * val_b; // Square of magnitude for vector b
}

// Apply cosine similarity formula: dot(a,b)/(|a|*|b|)
// Small epsilon (1e-8) added to prevent division by zero
return DBL2NUM(dot / (sqrt(norm_a) * sqrt(norm_b) + 1e-8));
float magnitude_product = sqrt(norm_a) * sqrt(norm_b);
return DBL2NUM(dot / (magnitude_product + 1e-8f));
}

// Ruby extension initialization function
Expand All @@ -109,6 +132,7 @@ void Init_embedding(void) {

// Register class methods
rb_define_singleton_method(cEmbedding, "from_array", embedding_from_array, 1);
rb_define_singleton_method(cEmbedding, "max_dimension", embedding_max_dimension, 0);

// Register instance methods
rb_define_method(cEmbedding, "dim", embedding_dim, 0);
Expand Down
3 changes: 3 additions & 0 deletions ext/rag_embeddings/embedding_config.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
// Configuration: Change this to match your LLM embedding size
// Common sizes: 768 (BERT), 1536 (OpenAI text-embedding-ada-002), 3072 (text-embedding-3-large)
#define EMBEDDING_DIMENSION 3072 // <--- this must be the same as what is set in ruby lib/rag_embeddings/config.rb
1 change: 1 addition & 0 deletions lib/rag_embeddings.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
require_relative "rag_embeddings/config"
require_relative "rag_embeddings/version"
require_relative "rag_embeddings/engine"
require_relative "rag_embeddings/database"
Expand Down
3 changes: 3 additions & 0 deletions lib/rag_embeddings/config.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module RagEmbeddings
EMBEDDING_DIMENSION = 3072 # <--- this must be the same as what is set in C! ext/rag_embeddings/embedding_config.h
end
4 changes: 4 additions & 0 deletions lib/rag_embeddings/database.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,13 @@ def all
# "Raw" search: returns the N texts most similar to the query
def top_k_similar(query_text, k: 5)
query_embedding = RagEmbeddings.embed(query_text)
raise "Wrong embedding size #{query_embedding.size}, #{RagEmbeddings::EMBEDDING_DIMENSION} was expected! Change the configuration." unless query_embedding.size == RagEmbeddings::EMBEDDING_DIMENSION

query_obj = RagEmbeddings::Embedding.from_array(query_embedding)

all.map do |id, content, emb|
raise "Wrong embedding size #{query_embedding.size}, #{RagEmbeddings::EMBEDDING_DIMENSION} was expected! Change the configuration." unless emb.size == RagEmbeddings::EMBEDDING_DIMENSION

emb_obj = RagEmbeddings::Embedding.from_array(emb)
similarity = emb_obj.cosine_similarity(query_obj)
[id, content, similarity]
Expand Down
2 changes: 1 addition & 1 deletion spec/performance_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
let(:text1) { "Performance test one" }
let(:text2) { "Performance test two" }
let(:n) { 10_000 }
let(:embedding_size) { 3072 }
let(:embedding_size) { RagEmbeddings::EMBEDDING_DIMENSION }

let(:emb1) { Array.new(embedding_size) { rand } }
let(:emb2) { Array.new(embedding_size) { rand } }
Expand Down