ai updated
This commit is contained in:
@@ -77,22 +77,22 @@
|
||||
};
|
||||
};
|
||||
|
||||
# "glm4.7-flash" = {
|
||||
# cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} $\{kv_cache\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
|
||||
# ttl = 900;
|
||||
# aliases = [ "coder" ];
|
||||
# macros = {
|
||||
# hf_repo = "unsloth/GLM-4.7-Flash-REAP-23B-A3B-GGUF"; # Reap should allow more context in gpu mem
|
||||
# ctx = 48000;
|
||||
# };
|
||||
# };
|
||||
"glm4.7-flash" = {
|
||||
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -fa 0 -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
|
||||
ttl = 900;
|
||||
aliases = [ "coder" ];
|
||||
macros = {
|
||||
hf_repo = "unsloth/GLM-4.7-Flash-REAP-23B-A3B-GGUF"; # Reap should allow more context in gpu mem
|
||||
ctx = 32000;
|
||||
};
|
||||
};
|
||||
|
||||
"gemma4" = {
|
||||
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} $\{kv_cache\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
|
||||
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -fa 0 -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
|
||||
ttl = 900;
|
||||
macros = {
|
||||
hf_repo = "unsloth/gemma-4-26B-A4B-it-GGUF:UD-IQ3_XXS";
|
||||
ctx = 64000;
|
||||
ctx = 36000;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ let
|
||||
in
|
||||
effectiveStdenv.mkDerivation (finalAttrs: {
|
||||
pname = "llama-cpp-nightly";
|
||||
version = "8667";
|
||||
version = "8690";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "ggml-org";
|
||||
@@ -142,7 +142,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
|
||||
doCheck = false;
|
||||
|
||||
meta = {
|
||||
description = "Inference of Meta's LLaMA model (and others) in pure C/C++ (nightly b8667)";
|
||||
description = "Inference of Meta's LLaMA model (and others) in pure C/C++)";
|
||||
homepage = "https://github.com/ggml-org/llama.cpp";
|
||||
license = lib.licenses.mit;
|
||||
mainProgram = "llama";
|
||||
|
||||
Reference in New Issue
Block a user