From b200beb7acdfa348429dec33157aa8276ea1cafd Mon Sep 17 00:00:00 2001 From: Adrian Gunnar Lauterer Date: Tue, 7 Apr 2026 17:48:27 +0200 Subject: [PATCH] ai updated --- modules/llama-swap.nix | 22 +++++++++++----------- packages/llama-cpp-nightly/default.nix | 4 ++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/modules/llama-swap.nix b/modules/llama-swap.nix index 9595e69..4b8c940 100644 --- a/modules/llama-swap.nix +++ b/modules/llama-swap.nix @@ -77,22 +77,22 @@ }; }; - # "glm4.7-flash" = { - # cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} $\{kv_cache\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}"; - # ttl = 900; - # aliases = [ "coder" ]; - # macros = { - # hf_repo = "unsloth/GLM-4.7-Flash-REAP-23B-A3B-GGUF"; # Reap should allow more context in gpu mem - # ctx = 48000; - # }; - # }; + "glm4.7-flash" = { + cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -fa 0 -ngl $\{ngl\} --hf-repo $\{hf_repo\}"; + ttl = 900; + aliases = [ "coder" ]; + macros = { + hf_repo = "unsloth/GLM-4.7-Flash-REAP-23B-A3B-GGUF"; # Reap should allow more context in gpu mem + ctx = 32000; + }; + }; "gemma4" = { - cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} $\{kv_cache\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}"; + cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -fa 0 -ngl $\{ngl\} --hf-repo $\{hf_repo\}"; ttl = 900; macros = { hf_repo = "unsloth/gemma-4-26B-A4B-it-GGUF:UD-IQ3_XXS"; - ctx = 64000; + ctx = 36000; }; }; diff --git a/packages/llama-cpp-nightly/default.nix b/packages/llama-cpp-nightly/default.nix index 6e3d7f1..ef15581 100644 --- a/packages/llama-cpp-nightly/default.nix +++ b/packages/llama-cpp-nightly/default.nix @@ -65,7 +65,7 @@ let in effectiveStdenv.mkDerivation (finalAttrs: { pname = "llama-cpp-nightly"; - version = "8667"; + version = "8690"; src = fetchFromGitHub { owner = "ggml-org"; @@ -142,7 +142,7 @@ effectiveStdenv.mkDerivation (finalAttrs: { doCheck = false; meta = { - description = "Inference of Meta's LLaMA model (and others) in pure C/C++ (nightly b8667)"; + description = "Inference of Meta's LLaMA model (and others) in pure C/C++)"; homepage = "https://github.com/ggml-org/llama.cpp"; license = lib.licenses.mit; mainProgram = "llama";