unique: further work

This commit is contained in:
2025-04-08 13:55:50 +02:00
parent 71dc449b19
commit 18f15a2285

View File

@@ -2,12 +2,43 @@ lib:
let
# O(n^2), assuming foldl' is O(n) and ++ causes a reallocation every time
uniqueOld = builtins.foldl' (acc: e: if builtins.elem e acc then acc else acc ++ [ e ]) [];
# uniqueNew = xs: let
# entries = lib.genAttrs xs (_: null);
# in builtins.concatMap (x: if );
bigdataMostlyDuplicates = (lib.replicate 999999 null) ++ ["unique"];
bigdataMostlyUnique = lib.imap0 (i: _: toString i) (lib.replicate 999999 null) ++ ["123" "456"];
# Not sure about the runtime of removing duplicates in attrsets and/or genericClosure,
# but it could theoretically be surpassed by the time it takes to create identifiers.
# O(a + b) where a is duplication removal runtime and b is identifier creation runtime
#
# This function performs well for large inputs with mostly unique values
uniqueHashable = let
identifier = x: {
"int" = "int:${toString x}";
"bool" = "bool:${toString x}";
"string" = "string:${x}";
"path" = "path:${toString x}";
"null" = "null";
"set" = "set:{${toString (lib.mapAttrsToList (name: value: "${name}=${identifier value},") x)}}";
"list" = "list:[${toString (map identifier x)}]";
"float" = "float:${toString x}";
# "lambda"
}.${builtins.typeOf x};
# Does not preserve order
# in list: builtins.attrValues (builtins.listToAttrs (map (x: {
# name = identifier x;
# value = x;
# }) list));
# Seemingly preserves order
in list: builtins.catAttrs "value" (builtins.genericClosure {
startSet = map (x: { key = identifier x; value = x; }) list;
operator = _: [];
});
bigdataCount = 9999;
bigdataMostlyDuplicates = (lib.replicate bigdataCount "") ++ ["unique" {a = 1;} [1 2 3]];
bigdataMostlyUnique = lib.imap0 (i: _: toString i) (lib.replicate bigdataCount null) ++ ["123" "456" {a = 1;} [1 2 3]];
in {
old = uniqueOld bigdataMostlyDuplicates;
oldMostlyDuplicates = uniqueOld bigdataMostlyDuplicates;
oldMostlyUnique = uniqueOld bigdataMostlyUnique;
hashableMostlyDuplicates = uniqueHashable bigdataMostlyDuplicates;
hashableMostlyUnique = uniqueHashable bigdataMostlyUnique;
}