diff --git a/src/unique.nix b/src/unique.nix index d77380a..e54d17f 100644 --- a/src/unique.nix +++ b/src/unique.nix @@ -2,12 +2,43 @@ lib: let # O(n^2), assuming foldl' is O(n) and ++ causes a reallocation every time uniqueOld = builtins.foldl' (acc: e: if builtins.elem e acc then acc else acc ++ [ e ]) []; - # uniqueNew = xs: let - # entries = lib.genAttrs xs (_: null); - # in builtins.concatMap (x: if ); - bigdataMostlyDuplicates = (lib.replicate 999999 null) ++ ["unique"]; - bigdataMostlyUnique = lib.imap0 (i: _: toString i) (lib.replicate 999999 null) ++ ["123" "456"]; + # Not sure about the runtime of removing duplicates in attrsets and/or genericClosure, + # but it could theoretically be surpassed by the time it takes to create identifiers. + # O(a + b) where a is duplication removal runtime and b is identifier creation runtime + # + # This function performs well for large inputs with mostly unique values + uniqueHashable = let + identifier = x: { + "int" = "int:${toString x}"; + "bool" = "bool:${toString x}"; + "string" = "string:${x}"; + "path" = "path:${toString x}"; + "null" = "null"; + "set" = "set:{${toString (lib.mapAttrsToList (name: value: "${name}=${identifier value},") x)}}"; + "list" = "list:[${toString (map identifier x)}]"; + "float" = "float:${toString x}"; + # "lambda" + }.${builtins.typeOf x}; + + # Does not preserve order + # in list: builtins.attrValues (builtins.listToAttrs (map (x: { + # name = identifier x; + # value = x; + # }) list)); + + # Seemingly preserves order + in list: builtins.catAttrs "value" (builtins.genericClosure { + startSet = map (x: { key = identifier x; value = x; }) list; + operator = _: []; + }); + + bigdataCount = 9999; + bigdataMostlyDuplicates = (lib.replicate bigdataCount "") ++ ["unique" {a = 1;} [1 2 3]]; + bigdataMostlyUnique = lib.imap0 (i: _: toString i) (lib.replicate bigdataCount null) ++ ["123" "456" {a = 1;} [1 2 3]]; in { - old = uniqueOld bigdataMostlyDuplicates; + oldMostlyDuplicates = uniqueOld bigdataMostlyDuplicates; + oldMostlyUnique = uniqueOld bigdataMostlyUnique; + hashableMostlyDuplicates = uniqueHashable bigdataMostlyDuplicates; + hashableMostlyUnique = uniqueHashable bigdataMostlyUnique; }