unique: further work
This commit is contained in:
@@ -2,12 +2,43 @@ lib:
|
||||
let
|
||||
# O(n^2), assuming foldl' is O(n) and ++ causes a reallocation every time
|
||||
uniqueOld = builtins.foldl' (acc: e: if builtins.elem e acc then acc else acc ++ [ e ]) [];
|
||||
# uniqueNew = xs: let
|
||||
# entries = lib.genAttrs xs (_: null);
|
||||
# in builtins.concatMap (x: if );
|
||||
|
||||
bigdataMostlyDuplicates = (lib.replicate 999999 null) ++ ["unique"];
|
||||
bigdataMostlyUnique = lib.imap0 (i: _: toString i) (lib.replicate 999999 null) ++ ["123" "456"];
|
||||
# Not sure about the runtime of removing duplicates in attrsets and/or genericClosure,
|
||||
# but it could theoretically be surpassed by the time it takes to create identifiers.
|
||||
# O(a + b) where a is duplication removal runtime and b is identifier creation runtime
|
||||
#
|
||||
# This function performs well for large inputs with mostly unique values
|
||||
uniqueHashable = let
|
||||
identifier = x: {
|
||||
"int" = "int:${toString x}";
|
||||
"bool" = "bool:${toString x}";
|
||||
"string" = "string:${x}";
|
||||
"path" = "path:${toString x}";
|
||||
"null" = "null";
|
||||
"set" = "set:{${toString (lib.mapAttrsToList (name: value: "${name}=${identifier value},") x)}}";
|
||||
"list" = "list:[${toString (map identifier x)}]";
|
||||
"float" = "float:${toString x}";
|
||||
# "lambda"
|
||||
}.${builtins.typeOf x};
|
||||
|
||||
# Does not preserve order
|
||||
# in list: builtins.attrValues (builtins.listToAttrs (map (x: {
|
||||
# name = identifier x;
|
||||
# value = x;
|
||||
# }) list));
|
||||
|
||||
# Seemingly preserves order
|
||||
in list: builtins.catAttrs "value" (builtins.genericClosure {
|
||||
startSet = map (x: { key = identifier x; value = x; }) list;
|
||||
operator = _: [];
|
||||
});
|
||||
|
||||
bigdataCount = 9999;
|
||||
bigdataMostlyDuplicates = (lib.replicate bigdataCount "") ++ ["unique" {a = 1;} [1 2 3]];
|
||||
bigdataMostlyUnique = lib.imap0 (i: _: toString i) (lib.replicate bigdataCount null) ++ ["123" "456" {a = 1;} [1 2 3]];
|
||||
in {
|
||||
old = uniqueOld bigdataMostlyDuplicates;
|
||||
oldMostlyDuplicates = uniqueOld bigdataMostlyDuplicates;
|
||||
oldMostlyUnique = uniqueOld bigdataMostlyUnique;
|
||||
hashableMostlyDuplicates = uniqueHashable bigdataMostlyDuplicates;
|
||||
hashableMostlyUnique = uniqueHashable bigdataMostlyUnique;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user