From 25c0380b54516c13580603b4affc8d96c264beac Mon Sep 17 00:00:00 2001 From: Paul Miller Date: Tue, 27 Jan 2026 23:37:45 -0600 Subject: [PATCH] add metal support --- README.md | 22 +++++++++++++- build.zig | 38 ++++++++++++++++++++++++ ggml/build.zig | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 138 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 458c75a..974de74 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # llama.cpp.zig -A `build.zig` for [llama.cpp](https://github.com/ggml-org/llama.cpp), with Vulkan. +A `build.zig` for [llama.cpp](https://github.com/ggml-org/llama.cpp), with Vulkan and Metal. You can use llama.cpp from Zig projects. @@ -14,11 +14,13 @@ Supported targets are: - Linux aarch64 - Windows x86_64 - Windows aarch64 +- macOS aarch64 (Apple Silicon) Supported backends are: - CPU - Vulkan +- Metal (macOS only) Other targets and backends can be added with time and test devices. @@ -29,6 +31,7 @@ Other targets and backends can be added with time and test devices. - Raspberry Pi 5 (aarch64 linux): CPU works, Vulkan compiles but don't due to some lack of memory. - Surface pro X SQ2 (aarch64 windows): CPU works, vulkan compiles but don't run due to some missing feature. - Termux (aarch64 android/linux): CPU works, vulkan compiles but don't run. +- M4 Pro (aarch64 macOS): All good. ## How to build @@ -45,6 +48,7 @@ You can choose the backend used: ```sh zig build install -Dbackend=vulkan zig build install -Dbackend=cpu #default +zig build install -Dbackend=metal ``` And choose a target architecture and OS: @@ -77,6 +81,22 @@ you_module.linkLibrary(llama_cpp_lib); Refer to [src/demo.zig] for an usage example. +### Metal Backend (macOS) + +Metal requires the Metal Toolchain to compile shaders at build time: + +```sh +xcodebuild -downloadComponent MetalToolchain +``` +When building with Metal, the output includes a `default.metallib` file that must be distributed alongside your binaries: + +``` +zig-out/bin/ +├── llama-run +├── llama-server +└── default.metallib # required for Metal to work +``` + ## Licenses MIT diff --git a/build.zig b/build.zig index b36b86a..b1ba3cc 100644 --- a/build.zig +++ b/build.zig @@ -35,6 +35,7 @@ const Options = struct { const Backend = enum(u8) { cpu, vulkan, + metal, }; fn buildLlamaCpp( @@ -124,6 +125,12 @@ fn buildLlamaCpp( mod.linkLibrary(ggml_lib); mod.lib_paths.appendSlice(b.allocator, ggml_lib.root_module.lib_paths.items) catch unreachable; + // Install Metal shader library if using Metal backend + if (options.backend == .metal) { + const metallib = compileMetalLib(b, ggml_dep); + b.getInstallStep().dependOn(&b.addInstallFile(metallib, "bin/default.metallib").step); + } + const common_lib = buildCommon(b, ggml_lib, options); mod.linkLibrary(common_lib); @@ -580,3 +587,34 @@ const cppflags: []const []const u8 = &.{ "-std=c++17", "-O3", }; + +fn compileMetalLib(b: *std.Build, ggml_dep: *std.Build.Dependency) std.Build.LazyPath { + // Compile Metal shaders to .metallib at build time using xcrun + // The ggml dependency contains the ggml source which has the metal shaders + const ggml_src = ggml_dep.builder.dependency("ggml", .{}); + const metal_src = ggml_src.path("ggml/src/ggml-metal/ggml-metal.metal"); + const include_path = ggml_src.path("ggml/src"); + const metal_include_path = ggml_src.path("ggml/src/ggml-metal"); + + // Step 1: Compile .metal to .air (Metal Intermediate Representation) + const compile_cmd = b.addSystemCommand(&.{ + "xcrun", "-sdk", "macosx", "metal", + "-c", + "-O3", + }); + compile_cmd.addPrefixedDirectoryArg("-I", include_path); + compile_cmd.addPrefixedDirectoryArg("-I", metal_include_path); + compile_cmd.addArg("-o"); + const air_output = compile_cmd.addOutputFileArg("ggml.air"); + compile_cmd.addFileArg(metal_src); + + // Step 2: Link .air to .metallib + const link_cmd = b.addSystemCommand(&.{ + "xcrun", "-sdk", "macosx", "metallib", + "-o", + }); + const metallib_output = link_cmd.addOutputFileArg("default.metallib"); + link_cmd.addFileArg(air_output); + + return metallib_output; +} diff --git a/ggml/build.zig b/ggml/build.zig index 028cc44..482a77d 100644 --- a/ggml/build.zig +++ b/ggml/build.zig @@ -33,6 +33,7 @@ const Options = struct { const Backend = enum(u8) { cpu, vulkan, + metal, }; fn buildGGML( @@ -69,6 +70,18 @@ fn buildGGML( mod.linkLibrary(vulkan_lib); mod.lib_paths.appendSlice(b.allocator, vulkan_lib.root_module.lib_paths.items) catch unreachable; }, + .metal => { + if (options.target.result.os.tag != .macos) { + @panic("Metal backend is only supported on macOS"); + } + mod.addCMacro("GGML_USE_METAL", "1"); + mod.linkFramework("Metal", .{}); + mod.linkFramework("MetalKit", .{}); + mod.linkFramework("Foundation", .{}); + mod.linkFramework("Accelerate", .{}); + const metal_lib = buildGGMLMetal(b, options); + mod.linkLibrary(metal_lib); + }, else => {}, } @@ -250,6 +263,72 @@ fn buildGGMLCpu( return lib; } +fn buildGGMLMetal( + b: *std.Build, + options: Options, +) *std.Build.Step.Compile { + const dep = b.dependency("ggml", .{}); + + var mod = b.addModule( + "ggml_metal", + .{ + .target = options.target, + .optimize = options.optimize, + .strip = options.strip, + .link_libc = true, + .link_libcpp = true, + }, + ); + + // Use precompiled metallib (not embedded source) + mod.addIncludePath(dep.path(src_prefix ++ "src")); + mod.addIncludePath(dep.path(src_prefix ++ "include")); + mod.addIncludePath(dep.path(src_prefix ++ "src/ggml-metal")); + + // C++ source files + mod.addCSourceFiles(.{ + .root = dep.path(src_prefix ++ "src/ggml-metal"), + .files = &.{ + "ggml-metal.cpp", + "ggml-metal-device.cpp", + "ggml-metal-common.cpp", + "ggml-metal-ops.cpp", + }, + .flags = cppflags, + }); + + // Objective-C source files (no ARC - upstream uses manual memory management) + const objc_flags: []const []const u8 = &.{ + "-fPIC", + "-O3", + "-fno-objc-arc", + }; + mod.addCSourceFiles(.{ + .root = dep.path(src_prefix ++ "src/ggml-metal"), + .files = &.{ + "ggml-metal-device.m", + "ggml-metal-context.m", + }, + .flags = objc_flags, + }); + + // Link Apple frameworks + mod.linkFramework("Metal", .{}); + mod.linkFramework("MetalKit", .{}); + mod.linkFramework("Foundation", .{}); + mod.linkFramework("Accelerate", .{}); + + // Note: metallib is compiled and installed by the main build.zig + const lib = b.addLibrary(.{ + .name = "ggml_metal", + .root_module = mod, + .linkage = .static, + }); + b.installArtifact(lib); + + return lib; +} + fn buildGGMLVulkan( b: *std.Build, options: Options,