dalotia/example.cpp at main · RIKEN-RCCS/dalotia · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#include <array>
#include <cassert>
#include <iostream>
#include <string>
#include <vector>

#include "dalotia.h"
#include "dalotia.hpp"

// application code

int main(int argc, char *argv[]) {
    char filename[] = "data/model.safetensors";
    char tensor_name[] = "embedding_firstchanged";
    DalotiaTensorFile *file = dalotia_open_file(filename);
    bool tensor_is_sparse =
        dalotia_is_sparse(file, tensor_name);  //...repeat later
    char *tensor;
    int permutation[3] = {0, 1, 2};
    constexpr dalotia_WeightFormat weightFormat =
        dalotia_WeightFormat::dalotia_float_64;
    dalotia_Ordering ordering = dalotia_Ordering::dalotia_C_ordering;

    std::cout << "tensor is sparse: " << tensor_is_sparse << std::endl;

    if (!tensor_is_sparse) {
        // get the tensor extents
        int extents[10];  // ? or call get_num_dimensions before?
                          // file formats: gguf, safetensors, onnx? channel
                          // orders? gguf with quantized ops?
                          // -> look at dnnl, darknet, safetensors-cpp, tinyml?
                          // torch: named dimensions tensor name data format
                          // data shape offsets
        int num_dimensions =
            dalotia_get_tensor_extents(file, tensor_name, extents);
        std::cout << "num_dim: " << num_dimensions << std::endl;

        // calculate the total number of elements
        int total_size = 1;
        for (int i = 0; i < 10; i++) {
            if (extents[i] == -1) {
                assert(i > 0);
                break;
            }
            total_size *= extents[i];
        }

        assert(total_size ==
               dalotia_get_num_tensor_elements(file, tensor_name));
        std::cout << "total size: " << total_size << std::endl;

        // I want to store the tensor as a very long array
        // allocate memory for the tensor
        tensor = (char *)malloc(dalotia::sizeof_weight_format<weightFormat>() *
                                total_size);

        // load the tensor

        dalotia_load_tensor_dense_with_permutation(
            file, tensor_name, tensor, weightFormat, ordering, permutation);
        // load_tensor_dense(file, tensor_name, tensor, weightFormat, ordering);
    } else {
        dalotia_SparseFormat format = dalotia_SparseFormat::dalotia_CSR;
        // get the tensor extents
        int extents[10];
        int num_dimensions =
            dalotia_get_tensor_extents(file, tensor_name, extents);

        for (int i = 0; i < 10; i++) {
            if (extents[i] == -1) {
                assert(i > 0);
                break;
            }
            std::cout << extents[i] << " ";
        }
        int sparse_extents[10];
        dalotia_get_sparse_tensor_extents(file, tensor_name, sparse_extents,
                                          dalotia_CSR);

        for (int i = 0; i < 10; i++) {
            if (sparse_extents[i] == -1) {
                assert(i > 0);
                break;
            }
            std::cout << sparse_extents[i] << " ";
        }

        // I want to store the tensor as compressed sparse row
        char *values = reinterpret_cast<char *>(
            new float[sparse_extents[0]]);  // blah blah malloc...
        int *first_indices = new int[sparse_extents[1]];
        int *second_indices = new int[sparse_extents[2]];
        dalotia_load_tensor_sparse(file, tensor_name, values, first_indices,
                                   second_indices, format, weightFormat,
                                   ordering);
    }
    dalotia_close_file(file);

    // print
    if (!tensor_is_sparse) {
        double *tensor_double = reinterpret_cast<double *>(tensor);
        for (int i = 0; i < 256; i++) {
            std::cout << tensor_double[i] << " ";
        }
    }
    std::cout << std::endl;
    std::cout << std::endl;

    // alternative: the C++ version (implicitly creates a file object)
    auto [extents, tensor_cpp] =
        dalotia::load_tensor_dense(filename, tensor_name, weightFormat);

    // typed return values and permutations!
    auto vector_permutation = std::vector<int>{1, 2, 0};

    auto file_cpp = std::unique_ptr<dalotia::TensorFile>(dalotia::make_tensor_file(filename));
    auto [extents_file_obj, tensor_cpp_file_obj] = file_cpp->load_tensor_dense(tensor_name, weightFormat,
                                                 ordering, vector_permutation);

#ifdef DALOTIA_WITH_SAFETENSORS_CPP
    // if we create a derived file on the stack, we have less template magic available
    auto stack_file = dalotia::SafetensorsFile(filename);
    // for instance, this will call the non-template overload and fail:
    // auto [extents_safetensors, tensor_cpp_safetensors] = stack_file.load_tensor_dense(tensor_name, weightFormat,
    //                                             ordering, vector_permutation);
    // but we can call the base-class' method directly
    auto [extents_safetensors, tensor_cpp_safetensors] =
        stack_file.dalotia::TensorFile::load_tensor_dense(
                                        tensor_name, weightFormat,
                                        ordering, vector_permutation);

    auto [extents_derived_float, tensor_cpp_derived_float] =
        file_cpp->load_tensor_dense<float>(tensor_name, ordering, vector_permutation);
#endif

#ifdef DALOTIA_WITH_CPP_PMR
    // C++17 pmr -> small tensors can even live on the stack
    std::array<double, 300> storage_array;
    std::pmr::monotonic_buffer_resource storage_resource(
        storage_array.data(), storage_array.size() * sizeof(double));
    std::pmr::polymorphic_allocator<dalotia_byte> storage_allocator(
        &storage_resource);
    auto [extents2, tensor_cpp2] = dalotia::load_tensor_dense<double>(
        filename, tensor_name, weightFormat, dalotia_C_ordering,
        vector_permutation, storage_allocator);

    for (int i = 0; i < storage_array.size(); ++i) {
        std::cout << storage_array[i] << " ";
    }
    std::cout << std::endl;
#else   // DALOTIA_WITH_CPP_PMR
    auto [extents2, tensor_cpp2] = dalotia::load_tensor_dense<double>(
        filename, tensor_name, weightFormat, ordering, vector_permutation);
#endif  // DALOTIA_WITH_CPP_PMR

    for (int i = 0; i < extents2.size(); ++i) {
        std::cout << extents2[i] << " ";
    }
    std::cout << std::endl;

    for (int i = 0; i < tensor_cpp2.size(); ++i) {
        std::cout << tensor_cpp2[i] << " ";
    }
    std::cout << std::endl;

    return 0;
}