ai updated

This commit is contained in:
2026-04-07 17:48:27 +02:00
parent 0cd2f1ea6d
commit b200beb7ac
2 changed files with 13 additions and 13 deletions

View File

@@ -77,22 +77,22 @@
};
};
# "glm4.7-flash" = {
# cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} $\{kv_cache\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
# ttl = 900;
# aliases = [ "coder" ];
# macros = {
# hf_repo = "unsloth/GLM-4.7-Flash-REAP-23B-A3B-GGUF"; # Reap should allow more context in gpu mem
# ctx = 48000;
# };
# };
"glm4.7-flash" = {
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -fa 0 -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
ttl = 900;
aliases = [ "coder" ];
macros = {
hf_repo = "unsloth/GLM-4.7-Flash-REAP-23B-A3B-GGUF"; # Reap should allow more context in gpu mem
ctx = 32000;
};
};
"gemma4" = {
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} $\{kv_cache\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -fa 0 -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
ttl = 900;
macros = {
hf_repo = "unsloth/gemma-4-26B-A4B-it-GGUF:UD-IQ3_XXS";
ctx = 64000;
ctx = 36000;
};
};

View File

@@ -65,7 +65,7 @@ let
in
effectiveStdenv.mkDerivation (finalAttrs: {
pname = "llama-cpp-nightly";
version = "8667";
version = "8690";
src = fetchFromGitHub {
owner = "ggml-org";
@@ -142,7 +142,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
doCheck = false;
meta = {
description = "Inference of Meta's LLaMA model (and others) in pure C/C++ (nightly b8667)";
description = "Inference of Meta's LLaMA model (and others) in pure C/C++)";
homepage = "https://github.com/ggml-org/llama.cpp";
license = lib.licenses.mit;
mainProgram = "llama";