Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# llama.cpp.zig

A `build.zig` for [llama.cpp](https://github.com/ggml-org/llama.cpp), with Vulkan.
A `build.zig` for [llama.cpp](https://github.com/ggml-org/llama.cpp), with Vulkan and Metal.

You can use llama.cpp from Zig projects.

Expand All @@ -14,11 +14,13 @@ Supported targets are:
- Linux aarch64
- Windows x86_64
- Windows aarch64
- macOS aarch64 (Apple Silicon)

Supported backends are:

- CPU
- Vulkan
- Metal (macOS only)

Other targets and backends can be added with time and test devices.

Expand All @@ -29,6 +31,7 @@ Other targets and backends can be added with time and test devices.
- Raspberry Pi 5 (aarch64 linux): CPU works, Vulkan compiles but don't due to some lack of memory.
- Surface pro X SQ2 (aarch64 windows): CPU works, vulkan compiles but don't run due to some missing feature.
- Termux (aarch64 android/linux): CPU works, vulkan compiles but don't run.
- M4 Pro (aarch64 macOS): All good.

## How to build

Expand All @@ -45,6 +48,7 @@ You can choose the backend used:
```sh
zig build install -Dbackend=vulkan
zig build install -Dbackend=cpu #default
zig build install -Dbackend=metal
```

And choose a target architecture and OS:
Expand Down Expand Up @@ -77,6 +81,22 @@ you_module.linkLibrary(llama_cpp_lib);

Refer to [src/demo.zig] for an usage example.

### Metal Backend (macOS)

Metal requires the Metal Toolchain to compile shaders at build time:

```sh
xcodebuild -downloadComponent MetalToolchain
```
When building with Metal, the output includes a `default.metallib` file that must be distributed alongside your binaries:

```
zig-out/bin/
├── llama-run
├── llama-server
└── default.metallib # required for Metal to work
```

## Licenses

MIT
38 changes: 38 additions & 0 deletions build.zig
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ const Options = struct {
const Backend = enum(u8) {
cpu,
vulkan,
metal,
};

fn buildLlamaCpp(
Expand Down Expand Up @@ -124,6 +125,12 @@ fn buildLlamaCpp(
mod.linkLibrary(ggml_lib);
mod.lib_paths.appendSlice(b.allocator, ggml_lib.root_module.lib_paths.items) catch unreachable;

// Install Metal shader library if using Metal backend
if (options.backend == .metal) {
const metallib = compileMetalLib(b, ggml_dep);
b.getInstallStep().dependOn(&b.addInstallFile(metallib, "bin/default.metallib").step);
}

const common_lib = buildCommon(b, ggml_lib, options);
mod.linkLibrary(common_lib);

Expand Down Expand Up @@ -580,3 +587,34 @@ const cppflags: []const []const u8 = &.{
"-std=c++17",
"-O3",
};

fn compileMetalLib(b: *std.Build, ggml_dep: *std.Build.Dependency) std.Build.LazyPath {
// Compile Metal shaders to .metallib at build time using xcrun
// The ggml dependency contains the ggml source which has the metal shaders
const ggml_src = ggml_dep.builder.dependency("ggml", .{});
const metal_src = ggml_src.path("ggml/src/ggml-metal/ggml-metal.metal");
const include_path = ggml_src.path("ggml/src");
const metal_include_path = ggml_src.path("ggml/src/ggml-metal");

// Step 1: Compile .metal to .air (Metal Intermediate Representation)
const compile_cmd = b.addSystemCommand(&.{
"xcrun", "-sdk", "macosx", "metal",
"-c",
"-O3",
});
compile_cmd.addPrefixedDirectoryArg("-I", include_path);
compile_cmd.addPrefixedDirectoryArg("-I", metal_include_path);
compile_cmd.addArg("-o");
const air_output = compile_cmd.addOutputFileArg("ggml.air");
compile_cmd.addFileArg(metal_src);

// Step 2: Link .air to .metallib
const link_cmd = b.addSystemCommand(&.{
"xcrun", "-sdk", "macosx", "metallib",
"-o",
});
const metallib_output = link_cmd.addOutputFileArg("default.metallib");
link_cmd.addFileArg(air_output);

return metallib_output;
}
79 changes: 79 additions & 0 deletions ggml/build.zig
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ const Options = struct {
const Backend = enum(u8) {
cpu,
vulkan,
metal,
};

fn buildGGML(
Expand Down Expand Up @@ -69,6 +70,18 @@ fn buildGGML(
mod.linkLibrary(vulkan_lib);
mod.lib_paths.appendSlice(b.allocator, vulkan_lib.root_module.lib_paths.items) catch unreachable;
},
.metal => {
if (options.target.result.os.tag != .macos) {
@panic("Metal backend is only supported on macOS");
}
mod.addCMacro("GGML_USE_METAL", "1");
mod.linkFramework("Metal", .{});
mod.linkFramework("MetalKit", .{});
mod.linkFramework("Foundation", .{});
mod.linkFramework("Accelerate", .{});
const metal_lib = buildGGMLMetal(b, options);
mod.linkLibrary(metal_lib);
},
else => {},
}

Expand Down Expand Up @@ -250,6 +263,72 @@ fn buildGGMLCpu(
return lib;
}

fn buildGGMLMetal(
b: *std.Build,
options: Options,
) *std.Build.Step.Compile {
const dep = b.dependency("ggml", .{});

var mod = b.addModule(
"ggml_metal",
.{
.target = options.target,
.optimize = options.optimize,
.strip = options.strip,
.link_libc = true,
.link_libcpp = true,
},
);

// Use precompiled metallib (not embedded source)
mod.addIncludePath(dep.path(src_prefix ++ "src"));
mod.addIncludePath(dep.path(src_prefix ++ "include"));
mod.addIncludePath(dep.path(src_prefix ++ "src/ggml-metal"));

// C++ source files
mod.addCSourceFiles(.{
.root = dep.path(src_prefix ++ "src/ggml-metal"),
.files = &.{
"ggml-metal.cpp",
"ggml-metal-device.cpp",
"ggml-metal-common.cpp",
"ggml-metal-ops.cpp",
},
.flags = cppflags,
});

// Objective-C source files (no ARC - upstream uses manual memory management)
const objc_flags: []const []const u8 = &.{
"-fPIC",
"-O3",
"-fno-objc-arc",
};
mod.addCSourceFiles(.{
.root = dep.path(src_prefix ++ "src/ggml-metal"),
.files = &.{
"ggml-metal-device.m",
"ggml-metal-context.m",
},
.flags = objc_flags,
});

// Link Apple frameworks
mod.linkFramework("Metal", .{});
mod.linkFramework("MetalKit", .{});
mod.linkFramework("Foundation", .{});
mod.linkFramework("Accelerate", .{});

// Note: metallib is compiled and installed by the main build.zig
const lib = b.addLibrary(.{
.name = "ggml_metal",
.root_module = mod,
.linkage = .static,
});
b.installArtifact(lib);

return lib;
}

fn buildGGMLVulkan(
b: *std.Build,
options: Options,
Expand Down