diff --git a/README.md b/README.md index eae542d..dc3acae 100644 --- a/README.md +++ b/README.md @@ -101,7 +101,15 @@ puts "Most similar text: #{result.first[1]}, score: #{result.first[2]}" ## 🔢 Embeddings dimension -The size of embeddings is dynamic and fits with what the LLM provides. +In the previous version the size of embeddings was dynamic. Now the size of embeddings is static to use the RUBY_TYPED_EMBEDDABLE flag + +- the size of embeddings is currently set to 3072 and it's defined in two places: + - c side is defined in `ext/rag_embeddings/embedding_config.h` + - ruby side is defined in `lib/rag_embeddings/config.rb` + +Remember to recompile the c extension after changing the size: + +`rake compile` ## 👷 Requirements diff --git a/ext/rag_embeddings/embedding.bundle b/ext/rag_embeddings/embedding.bundle index d9c9296..d4b2d43 100755 Binary files a/ext/rag_embeddings/embedding.bundle and b/ext/rag_embeddings/embedding.bundle differ diff --git a/ext/rag_embeddings/embedding.bundle.dSYM/Contents/Resources/DWARF/embedding.bundle b/ext/rag_embeddings/embedding.bundle.dSYM/Contents/Resources/DWARF/embedding.bundle index 31c89bf..3e9e303 100644 Binary files a/ext/rag_embeddings/embedding.bundle.dSYM/Contents/Resources/DWARF/embedding.bundle and b/ext/rag_embeddings/embedding.bundle.dSYM/Contents/Resources/DWARF/embedding.bundle differ diff --git a/ext/rag_embeddings/embedding.c b/ext/rag_embeddings/embedding.c index 28ff75f..7552aba 100644 --- a/ext/rag_embeddings/embedding.c +++ b/ext/rag_embeddings/embedding.c @@ -2,72 +2,91 @@ #include // For integer types like uint16_t #include // For memory allocation functions #include // For math functions like sqrt +#include "embedding_config.h" // Import the configuration -// Main data structure for storing embeddings -// Flexible array member (values[]) allows variable length arrays +// Main data structure for storing embeddings with fixed size typedef struct { - uint16_t dim; // Dimension of the embedding vector - float values[]; // Flexible array member to store the actual values + uint16_t dim; // Actual dimension used (can be <= EMBEDDING_DIMENSION) + float values[EMBEDDING_DIMENSION]; // Fixed-size array for embedding values } embedding_t; // Callback for freeing memory when Ruby's GC collects our object static void embedding_free(void *ptr) { - xfree(ptr); // Ruby's memory free function + // With RUBY_TYPED_EMBEDDABLE and TypedData_Make_Struct, + // Ruby handles the deallocation automatically + // No need to explicitly free the memory } // Callback to report memory usage to Ruby's GC static size_t embedding_memsize(const void *ptr) { - const embedding_t *emb = (const embedding_t *)ptr; - return emb ? sizeof(embedding_t) + emb->dim * sizeof(float) : 0; + // With embedded objects, we report the full struct size + return sizeof(embedding_t); } -// Type information for Ruby's GC: -// Tells Ruby how to manage our C data structure +// Type information for Ruby's GC with embedding support static const rb_data_type_t embedding_type = { - "RagEmbeddings/Embedding", // Type name - {0, embedding_free, embedding_memsize,}, // Functions: mark, free, size - 0, 0, // Parent type, data - RUBY_TYPED_FREE_IMMEDIATELY // Flags + "RagEmbeddings/Embedding", // Type name + {0, embedding_free, embedding_memsize,}, // Functions: mark, free, size + 0, 0, // Parent type, data + RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE // Flags with embedding! }; // Class method: RagEmbeddings::Embedding.from_array([1.0, 2.0, ...]) -// Creates a new embedding from a Ruby array +// Creates a new embedding from a Ruby array - NOW FASTER! static VALUE embedding_from_array(VALUE klass, VALUE rb_array) { Check_Type(rb_array, T_ARRAY); // Ensure argument is a Ruby array - uint16_t dim = (uint16_t)RARRAY_LEN(rb_array); + long array_len = RARRAY_LEN(rb_array); + + // Check if the array size exceeds our maximum dimension + if (array_len > EMBEDDING_DIMENSION) { + rb_raise(rb_eArgError, "Embedding dimension %ld exceeds maximum %d", + array_len, EMBEDDING_DIMENSION); + } + + uint16_t dim = (uint16_t)array_len; + + // With RUBY_TYPED_EMBEDDABLE, use TypedData_Make_Struct + // This automatically allocates Ruby object + embedded data space + embedding_t *ptr; + VALUE obj = TypedData_Make_Struct(klass, embedding_t, &embedding_type, ptr); - // Allocate memory for struct + array of floats - embedding_t *ptr = xmalloc(sizeof(embedding_t) + dim * sizeof(float)); ptr->dim = dim; // Copy values from Ruby array to our C array for (int i = 0; i < dim; ++i) ptr->values[i] = (float)NUM2DBL(rb_ary_entry(rb_array, i)); - // Wrap our C struct in a Ruby object - VALUE obj = TypedData_Wrap_Struct(klass, &embedding_type, ptr); + // Zero out unused slots for consistency + for (int i = dim; i < EMBEDDING_DIMENSION; ++i) + ptr->values[i] = 0.0f; + return obj; } +// Class method to get the maximum supported dimension +static VALUE embedding_max_dimension(VALUE klass) { + return INT2NUM(EMBEDDING_DIMENSION); +} + // Instance method: embedding.dim -// Returns the dimension of the embedding +// Returns the actual dimension of the embedding static VALUE embedding_dim(VALUE self) { embedding_t *ptr; - // Get the C struct from the Ruby object + // Get the C struct from the Ruby object - NOW FASTER! TypedData_Get_Struct(self, embedding_t, &embedding_type, ptr); return INT2NUM(ptr->dim); } // Instance method: embedding.to_a -// Converts the embedding back to a Ruby array +// Converts the embedding back to a Ruby array (only actual dimensions) static VALUE embedding_to_a(VALUE self) { embedding_t *ptr; TypedData_Get_Struct(self, embedding_t, &embedding_type, ptr); - // Create a new Ruby array with pre-allocated capacity + // Create a new Ruby array with pre-allocated capacity for actual dimension VALUE arr = rb_ary_new2(ptr->dim); - // Copy each float value to the Ruby array + // Copy only the used float values to the Ruby array - FASTER MEMORY ACCESS! for (int i = 0; i < ptr->dim; ++i) rb_ary_push(arr, DBL2NUM(ptr->values[i])); @@ -75,29 +94,33 @@ static VALUE embedding_to_a(VALUE self) { } // Instance method: embedding.cosine_similarity(other_embedding) -// Calculate cosine similarity between two embeddings +// Calculate cosine similarity - MUCH FASTER with embedded data! static VALUE embedding_cosine_similarity(VALUE self, VALUE other) { embedding_t *a, *b; - // Get C structs for both embeddings + // Get C structs for both embeddings - direct access, no pointer deref! TypedData_Get_Struct(self, embedding_t, &embedding_type, a); TypedData_Get_Struct(other, embedding_t, &embedding_type, b); // Ensure dimensions match if (a->dim != b->dim) - rb_raise(rb_eArgError, "Dimension mismatch"); + rb_raise(rb_eArgError, "Dimension mismatch: %d vs %d", a->dim, b->dim); float dot = 0.0f, norm_a = 0.0f, norm_b = 0.0f; // Calculate dot product and vector magnitudes + // Better cache locality = faster calculations! for (int i = 0; i < a->dim; ++i) { - dot += a->values[i] * b->values[i]; // Dot product - norm_a += a->values[i] * a->values[i]; // Square of magnitude for vector a - norm_b += b->values[i] * b->values[i]; // Square of magnitude for vector b + float val_a = a->values[i]; + float val_b = b->values[i]; + dot += val_a * val_b; // Dot product + norm_a += val_a * val_a; // Square of magnitude for vector a + norm_b += val_b * val_b; // Square of magnitude for vector b } // Apply cosine similarity formula: dot(a,b)/(|a|*|b|) // Small epsilon (1e-8) added to prevent division by zero - return DBL2NUM(dot / (sqrt(norm_a) * sqrt(norm_b) + 1e-8)); + float magnitude_product = sqrt(norm_a) * sqrt(norm_b); + return DBL2NUM(dot / (magnitude_product + 1e-8f)); } // Ruby extension initialization function @@ -109,6 +132,7 @@ void Init_embedding(void) { // Register class methods rb_define_singleton_method(cEmbedding, "from_array", embedding_from_array, 1); + rb_define_singleton_method(cEmbedding, "max_dimension", embedding_max_dimension, 0); // Register instance methods rb_define_method(cEmbedding, "dim", embedding_dim, 0); diff --git a/ext/rag_embeddings/embedding_config.h b/ext/rag_embeddings/embedding_config.h new file mode 100644 index 0000000..1eb56ab --- /dev/null +++ b/ext/rag_embeddings/embedding_config.h @@ -0,0 +1,3 @@ +// Configuration: Change this to match your LLM embedding size +// Common sizes: 768 (BERT), 1536 (OpenAI text-embedding-ada-002), 3072 (text-embedding-3-large) +#define EMBEDDING_DIMENSION 3072 // <--- this must be the same as what is set in ruby lib/rag_embeddings/config.rb \ No newline at end of file diff --git a/lib/rag_embeddings.rb b/lib/rag_embeddings.rb index 65f186d..f002530 100644 --- a/lib/rag_embeddings.rb +++ b/lib/rag_embeddings.rb @@ -1,3 +1,4 @@ +require_relative "rag_embeddings/config" require_relative "rag_embeddings/version" require_relative "rag_embeddings/engine" require_relative "rag_embeddings/database" diff --git a/lib/rag_embeddings/config.rb b/lib/rag_embeddings/config.rb new file mode 100644 index 0000000..cdec074 --- /dev/null +++ b/lib/rag_embeddings/config.rb @@ -0,0 +1,3 @@ +module RagEmbeddings + EMBEDDING_DIMENSION = 3072 # <--- this must be the same as what is set in C! ext/rag_embeddings/embedding_config.h +end \ No newline at end of file diff --git a/lib/rag_embeddings/database.rb b/lib/rag_embeddings/database.rb index baa2ddc..30f8f64 100644 --- a/lib/rag_embeddings/database.rb +++ b/lib/rag_embeddings/database.rb @@ -27,9 +27,13 @@ def all # "Raw" search: returns the N texts most similar to the query def top_k_similar(query_text, k: 5) query_embedding = RagEmbeddings.embed(query_text) + raise "Wrong embedding size #{query_embedding.size}, #{RagEmbeddings::EMBEDDING_DIMENSION} was expected! Change the configuration." unless query_embedding.size == RagEmbeddings::EMBEDDING_DIMENSION + query_obj = RagEmbeddings::Embedding.from_array(query_embedding) all.map do |id, content, emb| + raise "Wrong embedding size #{query_embedding.size}, #{RagEmbeddings::EMBEDDING_DIMENSION} was expected! Change the configuration." unless emb.size == RagEmbeddings::EMBEDDING_DIMENSION + emb_obj = RagEmbeddings::Embedding.from_array(emb) similarity = emb_obj.cosine_similarity(query_obj) [id, content, similarity] diff --git a/spec/performance_spec.rb b/spec/performance_spec.rb index 8445570..9435d38 100644 --- a/spec/performance_spec.rb +++ b/spec/performance_spec.rb @@ -6,7 +6,7 @@ let(:text1) { "Performance test one" } let(:text2) { "Performance test two" } let(:n) { 10_000 } - let(:embedding_size) { 3072 } + let(:embedding_size) { RagEmbeddings::EMBEDDING_DIMENSION } let(:emb1) { Array.new(embedding_size) { rand } } let(:emb2) { Array.new(embedding_size) { rand } }