-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathexample.cpp
More file actions
167 lines (143 loc) · 6.38 KB
/
example.cpp
File metadata and controls
167 lines (143 loc) · 6.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#include <array>
#include <cassert>
#include <iostream>
#include <string>
#include <vector>
#include "dalotia.h"
#include "dalotia.hpp"
// application code
int main(int argc, char *argv[]) {
char filename[] = "data/model.safetensors";
char tensor_name[] = "embedding_firstchanged";
DalotiaTensorFile *file = dalotia_open_file(filename);
bool tensor_is_sparse =
dalotia_is_sparse(file, tensor_name); //...repeat later
char *tensor;
int permutation[3] = {0, 1, 2};
constexpr dalotia_WeightFormat weightFormat =
dalotia_WeightFormat::dalotia_float_64;
dalotia_Ordering ordering = dalotia_Ordering::dalotia_C_ordering;
std::cout << "tensor is sparse: " << tensor_is_sparse << std::endl;
if (!tensor_is_sparse) {
// get the tensor extents
int extents[10]; // ? or call get_num_dimensions before?
// file formats: gguf, safetensors, onnx? channel
// orders? gguf with quantized ops?
// -> look at dnnl, darknet, safetensors-cpp, tinyml?
// torch: named dimensions tensor name data format
// data shape offsets
int num_dimensions =
dalotia_get_tensor_extents(file, tensor_name, extents);
std::cout << "num_dim: " << num_dimensions << std::endl;
// calculate the total number of elements
int total_size = 1;
for (int i = 0; i < 10; i++) {
if (extents[i] == -1) {
assert(i > 0);
break;
}
total_size *= extents[i];
}
assert(total_size ==
dalotia_get_num_tensor_elements(file, tensor_name));
std::cout << "total size: " << total_size << std::endl;
// I want to store the tensor as a very long array
// allocate memory for the tensor
tensor = (char *)malloc(dalotia::sizeof_weight_format<weightFormat>() *
total_size);
// load the tensor
dalotia_load_tensor_dense_with_permutation(
file, tensor_name, tensor, weightFormat, ordering, permutation);
// load_tensor_dense(file, tensor_name, tensor, weightFormat, ordering);
} else {
dalotia_SparseFormat format = dalotia_SparseFormat::dalotia_CSR;
// get the tensor extents
int extents[10];
int num_dimensions =
dalotia_get_tensor_extents(file, tensor_name, extents);
for (int i = 0; i < 10; i++) {
if (extents[i] == -1) {
assert(i > 0);
break;
}
std::cout << extents[i] << " ";
}
int sparse_extents[10];
dalotia_get_sparse_tensor_extents(file, tensor_name, sparse_extents,
dalotia_CSR);
for (int i = 0; i < 10; i++) {
if (sparse_extents[i] == -1) {
assert(i > 0);
break;
}
std::cout << sparse_extents[i] << " ";
}
// I want to store the tensor as compressed sparse row
char *values = reinterpret_cast<char *>(
new float[sparse_extents[0]]); // blah blah malloc...
int *first_indices = new int[sparse_extents[1]];
int *second_indices = new int[sparse_extents[2]];
dalotia_load_tensor_sparse(file, tensor_name, values, first_indices,
second_indices, format, weightFormat,
ordering);
}
dalotia_close_file(file);
// print
if (!tensor_is_sparse) {
double *tensor_double = reinterpret_cast<double *>(tensor);
for (int i = 0; i < 256; i++) {
std::cout << tensor_double[i] << " ";
}
}
std::cout << std::endl;
std::cout << std::endl;
// alternative: the C++ version (implicitly creates a file object)
auto [extents, tensor_cpp] =
dalotia::load_tensor_dense(filename, tensor_name, weightFormat);
// typed return values and permutations!
auto vector_permutation = std::vector<int>{1, 2, 0};
auto file_cpp = std::unique_ptr<dalotia::TensorFile>(dalotia::make_tensor_file(filename));
auto [extents_file_obj, tensor_cpp_file_obj] = file_cpp->load_tensor_dense(tensor_name, weightFormat,
ordering, vector_permutation);
#ifdef DALOTIA_WITH_SAFETENSORS_CPP
// if we create a derived file on the stack, we have less template magic available
auto stack_file = dalotia::SafetensorsFile(filename);
// for instance, this will call the non-template overload and fail:
// auto [extents_safetensors, tensor_cpp_safetensors] = stack_file.load_tensor_dense(tensor_name, weightFormat,
// ordering, vector_permutation);
// but we can call the base-class' method directly
auto [extents_safetensors, tensor_cpp_safetensors] =
stack_file.dalotia::TensorFile::load_tensor_dense(
tensor_name, weightFormat,
ordering, vector_permutation);
auto [extents_derived_float, tensor_cpp_derived_float] =
file_cpp->load_tensor_dense<float>(tensor_name, ordering, vector_permutation);
#endif
#ifdef DALOTIA_WITH_CPP_PMR
// C++17 pmr -> small tensors can even live on the stack
std::array<double, 300> storage_array;
std::pmr::monotonic_buffer_resource storage_resource(
storage_array.data(), storage_array.size() * sizeof(double));
std::pmr::polymorphic_allocator<dalotia_byte> storage_allocator(
&storage_resource);
auto [extents2, tensor_cpp2] = dalotia::load_tensor_dense<double>(
filename, tensor_name, weightFormat, dalotia_C_ordering,
vector_permutation, storage_allocator);
for (int i = 0; i < storage_array.size(); ++i) {
std::cout << storage_array[i] << " ";
}
std::cout << std::endl;
#else // DALOTIA_WITH_CPP_PMR
auto [extents2, tensor_cpp2] = dalotia::load_tensor_dense<double>(
filename, tensor_name, weightFormat, ordering, vector_permutation);
#endif // DALOTIA_WITH_CPP_PMR
for (int i = 0; i < extents2.size(); ++i) {
std::cout << extents2[i] << " ";
}
std::cout << std::endl;
for (int i = 0; i < tensor_cpp2.size(); ++i) {
std::cout << tensor_cpp2[i] << " ";
}
std::cout << std::endl;
return 0;
}