ai updated

2026-04-07 17:48:27 +02:00
parent 0cd2f1ea6d
commit b200beb7ac
2 changed files with 13 additions and 13 deletions
@@ -77,22 +77,22 @@
            };
          };

-          # "glm4.7-flash" = {
-          #   cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\}  $\{kv_cache\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
-          #   ttl = 900;
-          #   aliases = [ "coder" ];
-          #   macros = {
-          #     hf_repo = "unsloth/GLM-4.7-Flash-REAP-23B-A3B-GGUF"; # Reap should allow more context in gpu mem
-          #     ctx = 48000;
-          #   };
-          # };
+          "glm4.7-flash" = {
+            cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -fa 0 -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
+            ttl = 900;
+            aliases = [ "coder" ];
+            macros = {
+              hf_repo = "unsloth/GLM-4.7-Flash-REAP-23B-A3B-GGUF"; # Reap should allow more context in gpu mem
+              ctx = 32000;
+            };
+          };

          "gemma4" = {
-            cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\}  $\{kv_cache\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
+            cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -fa 0 -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
            ttl = 900;
            macros = {
              hf_repo = "unsloth/gemma-4-26B-A4B-it-GGUF:UD-IQ3_XXS";
-              ctx = 64000;
+              ctx = 36000;
            };
          };

@@ -65,7 +65,7 @@ let
 in
 effectiveStdenv.mkDerivation (finalAttrs: {
  pname = "llama-cpp-nightly";
-  version = "8667";
+  version = "8690";

  src = fetchFromGitHub {
    owner = "ggml-org";
@@ -142,7 +142,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
  doCheck = false;

  meta = {
-    description = "Inference of Meta's LLaMA model (and others) in pure C/C++ (nightly b8667)";
+    description = "Inference of Meta's LLaMA model (and others) in pure C/C++)";
    homepage = "https://github.com/ggml-org/llama.cpp";
    license = lib.licenses.mit;
    mainProgram = "llama";