179 lines
4.5 KiB
Nix
179 lines
4.5 KiB
Nix
{
|
|
lib,
|
|
autoAddDriverRunpath,
|
|
cmake,
|
|
fetchFromGitHub,
|
|
stdenv,
|
|
ninja,
|
|
pkg-config,
|
|
curl,
|
|
|
|
config,
|
|
cudaSupport ? config.cudaSupport,
|
|
cudaPackages ? { },
|
|
|
|
rocmSupport ? config.rocmSupport,
|
|
rocmPackages ? { },
|
|
rocmGpuTargets ? rocmPackages.clr.localGpuTargets or rocmPackages.clr.gpuTargets,
|
|
|
|
openclSupport ? false,
|
|
clblast,
|
|
|
|
blasSupport ? builtins.all (x: !x) [
|
|
cudaSupport
|
|
metalSupport
|
|
openclSupport
|
|
rocmSupport
|
|
syclSupport
|
|
vulkanSupport
|
|
],
|
|
blas,
|
|
|
|
metalSupport ? stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isAarch64 && !openclSupport,
|
|
vulkanSupport ? false,
|
|
rpcSupport ? false,
|
|
shaderc,
|
|
vulkan-headers,
|
|
vulkan-loader,
|
|
|
|
syclSupport ? false,
|
|
mkl ? null,
|
|
oneDNN ? null,
|
|
syclStdenv ? null,
|
|
syclF16Support ? false,
|
|
syclDeviceArch ? "",
|
|
}:
|
|
|
|
let
|
|
effectiveStdenv =
|
|
if cudaSupport then
|
|
cudaPackages.backendStdenv
|
|
else if syclSupport && syclStdenv != null then
|
|
syclStdenv
|
|
else
|
|
stdenv;
|
|
inherit (lib)
|
|
cmakeBool
|
|
cmakeFeature
|
|
optionals
|
|
optionalString
|
|
;
|
|
|
|
cudaBuildInputs = with cudaPackages; [
|
|
cuda_cccl
|
|
cuda_cudart
|
|
libcublas
|
|
];
|
|
|
|
rocmBuildInputs = with rocmPackages; [
|
|
clr
|
|
hipblas
|
|
rocblas
|
|
];
|
|
|
|
vulkanBuildInputs = [
|
|
shaderc
|
|
vulkan-headers
|
|
vulkan-loader
|
|
];
|
|
in
|
|
effectiveStdenv.mkDerivation (finalAttrs: {
|
|
pname = "llama-cpp-nightly";
|
|
version = "8763";
|
|
|
|
src = fetchFromGitHub {
|
|
owner = "ggml-org";
|
|
repo = "llama.cpp";
|
|
tag = "b${finalAttrs.version}";
|
|
hash = "sha256-bDI7a7OMCbuZyaJX4o22fmQIyrGdzYkoIeVvxBYlnRI=";
|
|
leaveDotGit = true;
|
|
postFetch = ''
|
|
git -C "$out" rev-parse --short HEAD > $out/COMMIT
|
|
find "$out" -name .git -print0 | xargs -0 rm -rf
|
|
'';
|
|
};
|
|
|
|
nativeBuildInputs = [
|
|
cmake
|
|
ninja
|
|
pkg-config
|
|
]
|
|
++ optionals cudaSupport [
|
|
cudaPackages.cuda_nvcc
|
|
autoAddDriverRunpath
|
|
];
|
|
|
|
buildInputs =
|
|
optionals cudaSupport cudaBuildInputs
|
|
++ optionals openclSupport [ clblast ]
|
|
++ optionals rocmSupport rocmBuildInputs
|
|
++ optionals blasSupport [ blas ]
|
|
++ optionals vulkanSupport vulkanBuildInputs
|
|
++ optionals syclSupport ([ mkl ] ++ optionals (oneDNN != null) [ oneDNN ])
|
|
++ [ curl ];
|
|
|
|
preConfigure = ''
|
|
prependToVar cmakeFlags "-DLLAMA_BUILD_COMMIT:STRING=$(cat COMMIT)"
|
|
'';
|
|
|
|
cmakeFlags = [
|
|
(cmakeBool "GGML_NATIVE" false)
|
|
(cmakeBool "LLAMA_BUILD_EXAMPLES" false)
|
|
(cmakeBool "LLAMA_BUILD_SERVER" true)
|
|
(cmakeBool "LLAMA_BUILD_TESTS" false)
|
|
(cmakeBool "LLAMA_CURL" true)
|
|
(cmakeBool "BUILD_SHARED_LIBS" true)
|
|
(cmakeBool "GGML_BLAS" blasSupport)
|
|
(cmakeBool "GGML_CLBLAST" openclSupport)
|
|
(cmakeBool "GGML_CUDA" cudaSupport)
|
|
(cmakeBool "GGML_HIP" rocmSupport)
|
|
(cmakeBool "GGML_METAL" metalSupport)
|
|
(cmakeBool "GGML_RPC" rpcSupport)
|
|
(cmakeBool "GGML_VULKAN" vulkanSupport)
|
|
(cmakeBool "GGML_SYCL" syclSupport)
|
|
(cmakeFeature "LLAMA_BUILD_NUMBER" finalAttrs.version)
|
|
]
|
|
++ optionals cudaSupport [
|
|
(cmakeFeature "CMAKE_CUDA_ARCHITECTURES" cudaPackages.flags.cmakeCudaArchitecturesString)
|
|
]
|
|
++ optionals rocmSupport [
|
|
(cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.clr.hipClangPath}/clang++")
|
|
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmGpuTargets))
|
|
]
|
|
++ optionals metalSupport [
|
|
(cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
|
|
(cmakeBool "LLAMA_METAL_EMBED_LIBRARY" true)
|
|
]
|
|
++ optionals rpcSupport [
|
|
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
|
|
]
|
|
++ optionals syclSupport [
|
|
(cmakeFeature "GGML_SYCL_TARGET" "INTEL")
|
|
(cmakeBool "GGML_SYCL_DNN" (oneDNN != null))
|
|
(cmakeBool "GGML_SYCL_F16" syclF16Support)
|
|
]
|
|
++ optionals (syclSupport && syclDeviceArch != "") [
|
|
(cmakeFeature "GGML_SYCL_DEVICE_ARCH" syclDeviceArch)
|
|
];
|
|
|
|
postInstall = ''
|
|
ln -sf $out/bin/llama-cli $out/bin/llama
|
|
mkdir -p $out/include
|
|
cp $src/include/llama.h $out/include/
|
|
''
|
|
+ optionalString rpcSupport "cp bin/rpc-server $out/bin/llama-rpc-server";
|
|
|
|
doCheck = false;
|
|
|
|
meta = {
|
|
description = "Inference of Meta's LLaMA model (and others) in pure C/C++)";
|
|
homepage = "https://github.com/ggml-org/llama.cpp";
|
|
license = lib.licenses.mit;
|
|
mainProgram = "llama";
|
|
platforms = if syclSupport then [ "x86_64-linux" ] else lib.platforms.unix;
|
|
badPlatforms = optionals (cudaSupport || openclSupport) lib.platforms.darwin;
|
|
broken =
|
|
(metalSupport && !effectiveStdenv.hostPlatform.isDarwin) || (syclSupport && syclStdenv == null);
|
|
};
|
|
})
|