diff --git a/default.nix b/default.nix index 4bad62c8ac3..ac38a76f615 100644 --- a/default.nix +++ b/default.nix @@ -98,6 +98,11 @@ rec { "test/.*.sh" "samples/" "samples/.*" + "stdlib/" + "stdlib/.*Makefile.*" + "stdlib/.*.as" + "stdlib/examples/" + "stdlib/examples/.*.as" ]; buildInputs = @@ -112,6 +117,7 @@ rec { buildPhase = '' patchShebangs . asc --version + make -C stdlib ASC=asc all make -C samples ASC=asc all make -C test/run VERBOSE=1 ASC=asc all make -C test/fail VERBOSE=1 ASC=asc all diff --git a/src/Makefile b/src/Makefile index fd811340b00..bfcd4a47372 100644 --- a/src/Makefile +++ b/src/Makefile @@ -49,6 +49,7 @@ clean: $(MAKE) -C ../test clean test: $(NAME) + $(MAKE) -C ../stdlib ASC=$(ASC) all $(MAKE) -C ../test ASC=$(ASC) all $(MAKE) -C ../samples ASC=$(ASC) all diff --git a/stdlib/.gitignore b/stdlib/.gitignore new file mode 100644 index 00000000000..c1d18d8a8b2 --- /dev/null +++ b/stdlib/.gitignore @@ -0,0 +1 @@ +_out diff --git a/stdlib/Makefile b/stdlib/Makefile new file mode 100644 index 00000000000..7f51ebb7cfb --- /dev/null +++ b/stdlib/Makefile @@ -0,0 +1,110 @@ +ASC=../src/asc +OUTDIR=_out + +## VT100 stuff +HRULE="\x1b[2;34m----------------------------------------------------------------\x1b[0m" +MODULE_NAME="\x1b[1;32mModule:\x1b[1;34m" +BEGIN="\x1b[0;1mBegin...\x1b[0m" +DONE="\x1b[1mDone.\n"$(HRULE) +MODULE_NAME_COLOR="\x1b[0;1;34m" +NO_COLOR="\x1b[0m" + +# Add new module targets here: +MODULES=\ + List \ + ListTest \ + Trie \ + Set \ + SetDb \ + SetDbTest \ + ProduceExchange \ + + +OUTFILES=$(addsuffix .out, $(MODULES)) + +OUTPATHS=$(addprefix $(OUTDIR)/, $(OUTFILES)) + +.PHONY: default all clean startmsg + +default: all + +startmsg: + @echo Begin build: $(MODULE_NAME_COLOR)$(MODULES)$(NO_COLOR)... + @echo $(HRULE) + +all: $(OUTDIR) startmsg $(OUTPATHS) + @echo Build done : $(MODULE_NAME_COLOR)$(MODULES)$(NO_COLOR) + +clean: + rm -rf $(OUTDIR) + +$(OUTDIR): + @mkdir $(OUTDIR) + + +$(OUTDIR)/List.out: $(OUTDIR) list.as + @echo $(MODULE_NAME) $(basename $(notdir $@)) + @echo $(BEGIN) + $(ASC) -r $(filter-out $(OUTDIR), $^) > $@ + @echo $(DONE) + +$(OUTDIR)/ListTest.out: $(OUTDIR) list.as listTest.as + @echo $(MODULE_NAME) $(basename $(notdir $@)) + @echo $(BEGIN) + $(ASC) -r $(filter-out $(OUTDIR), $^) > $@ + @echo $(DONE) + +$(OUTDIR)/Trie.out: $(OUTDIR) list.as trie.as + @echo $(MODULE_NAME) $(basename $(notdir $@)) + @echo $(BEGIN) + $(ASC) -r $(filter-out $(OUTDIR), $^) > $@ + @echo $(DONE) + +$(OUTDIR)/Set.out: $(OUTDIR) list.as trie.as set.as + @echo $(MODULE_NAME) $(basename $(notdir $@)) + @echo $(BEGIN) + $(ASC) -r $(filter-out $(OUTDIR), $^) > $@ + @echo $(DONE) + +$(OUTDIR)/SetDb.out: $(OUTDIR) list.as trie.as set.as setDb.as + @echo $(MODULE_NAME) $(basename $(notdir $@)) + @echo $(BEGIN) + $(ASC) -r $(filter-out $(OUTDIR), $^) > $@ + @echo $(DONE) + +$(OUTDIR)/SetDbTest.out: $(OUTDIR) list.as trie.as set.as setDb.as setDbTest.as + @echo $(MODULE_NAME) $(basename $(notdir $@)) + @echo $(BEGIN) + $(ASC) -r $(filter-out $(OUTDIR), $^) > $@ + @echo $(DONE) + +$(OUTDIR)/ProduceExchange.out: $(OUTDIR) list.as trie.as examples/produceExchange.as + @echo $(MODULE_NAME) $(basename $(notdir $@)) + @echo $(BEGIN) + $(ASC) -r $(filter-out $(OUTDIR), $^) > $@ + @echo $(DONE) + + +######################################################################################### +# TODO(Matthew): Figure out why this "compressed" version of the rules doesn't work. + +# $(OUTDIR)/List.out: $(OUTDIR) list.as ; @$(doModule) + +# $(OUTDIR)/ListTest.out: $(OUTDIR) list.as listTest.as ; @$(doModule) + +# $(OUTDIR)/Trie.out: $(OUTDIR) list.as trie.as ; @$(doModule) + +# $(OUTDIR)/Set.out: $(OUTDIR) list.as trie.as set.as ; @$(doModule) + +# $(OUTDIR)/SetDb.out: $(OUTDIR) list.as trie.as set.as setDb.as ; @$(doModule) + +# $(OUTDIR)/SetDbTest.out: $(OUTDIR) list.as trie.as set.as setDb.as setDbTest.as ; @$(doModule) + +# $(OUTDIR)/ProduceExchange.out: $(OUTDIR) list.as trie.as produceExchange.as ; @$(doModule) + +# define doModule = +# @echo $(MODULE_NAME) $(basename $(notdir $@)) +# @echo $(BEGIN) +# $(ASC) -r $(filter-out $(OUTDIR), $^) > $@ +# @echo $(DONE) +# endef diff --git a/stdlib/README.md b/stdlib/README.md new file mode 100644 index 00000000000..1be6257d30c --- /dev/null +++ b/stdlib/README.md @@ -0,0 +1,20 @@ +[See #127](https://github.com/dfinity-lab/actorscript/issues/127) + +Critical modules +================== + - [x] **List**: See [`List` module from SML Basis library](http://sml-family.org/Basis/list.html). + - [x] **Hashtrie**: Persistent maps, as functional hash tries. + - [x] **Set**: Persistent sets, based directly on persistent maps. + - [ ] **Hashtable**: Mutable maps, as imperative hash tables. + +Secondary modules +================== +These modules _may_ be useful in the collections library: + - [ ] **Stream**: Type def done; most operations are pending... + +Other modules +================== +These modules are merely exercises (toys/examples), and _not_ essential to the collections library: + - [ ] **Thunk**: Type def and some operations are done. + + diff --git a/stdlib/examples/produceExchange.as b/stdlib/examples/produceExchange.as new file mode 100644 index 00000000000..dfc40a2ca7b --- /dev/null +++ b/stdlib/examples/produceExchange.as @@ -0,0 +1,214 @@ +// Produce Exchange Dapp +// ===================== +// +// Start here: +// - Detailed examples: https://dfinity.atlassian.net/wiki/x/joXUBg +// - More background: https://dfinity.atlassian.net/wiki/x/4gg5Bg +// + +// Open Questions: +// ------------------------------------------------- + +// 1. Massive result messages: +// How do we represent and send these? +// +// - lazy lists? (seems "easy" from AS programmer perspective, but +// requires non-first-order data in the IDL) +// +// - list iterators? (almost as good as lazy lists, but requires +// references in the IDL, and complicates the GC story). +// +// - arrays? (expensive to build and send; can become way *too big*). +// + +// 2. For now, wan we assume that the canister is maintained by the +// central authority? + +//////////////////////////////////////////////////////////////// + +// Use the standard library of AS: +// =============================== +// + +// Collections implement internal tables: +// -------------------------------------- +// import Table (same as Trie?) xxx + +// import Date +// xxx Dates, eventually from a standard library: +type Date = Nat; + +// xxx standard weight units? +type Weight = Nat; + +// xxx standard price units? +type Price = Nat; + +///////////////////////////////////////////////////////////////// + +// Fixed types +// =============================== +// +// We assume some fixed types (for now). +// Updating these types requires a canister upgrade. +// +// ?? defined by the central authority, aka, the "canister maintainer"? +// + +type Unit = Nat; // xxx replace with a variant type +type Grade = Nat; // xxx replace with a variant type + +type TruckType = Nat; // ??? replace with a variant type + +type TruckCapacity = Weight; + +type Quantity = Nat; + +type PricePerUnit = Price; // needed to calculate prices +type PriceTotal = Price; + +type WeightPerUnit = Weight; // needed to meet truck constraints + +type RegionId = Nat; // xxx variant type? + + +// +// Unique Ids +// ---------- +// Internally, each type of Id serves as a "row key" for a table (or two). +// + +type ProduceId = Nat; +type ProducerId = Nat; +type RetailerId = Nat; +type TruckTypeId = Nat; +type InventoryId = Nat; +type TransporterId = Nat; +type RouteId = Nat; +type OrderId = Nat; + +// +// Query parameters and results +// ---------------------------- +// + +type OrderInfo = shared { + produce: ProduceId; + producer: ProducerId; + quant: Quantity; + ppu: PricePerUnit; + transporter: TransporterId; + truck_type: TruckTypeId; + weight: Weight; + region_begin:RegionId; + region_end: RegionId; + date_begin: Date; + date_end: Date; + prod_cost: PriceTotal; + trans_cost: PriceTotal; +}; + +// xxx same as an OrderInfo? If different, then how? +type QueryAllResult = shared { + produce: ProduceId; + producer: ProducerId; + quant: Quantity; + ppu: PricePerUnit; + transporter: TransporterId; + truck_type: TruckTypeId; + weight: Weight; + region_begin:RegionId; + region_end: RegionId; + date_begin: Date; + date_end: Date; + prod_cost: PriceTotal; + trans_cost: PriceTotal; +}; + +// xxx how to represent huge result messages? +type QueryAllResults = [QueryAllResult]; + +// the "Service" +actor ProduceExchange { + + // Producer-based ingress messages: + // ================================ + + producerAddInventory( + prod: ProduceId, + quant:Quantity, + ppu: PricePerUnit, + begin:Date, + end: Date, + ) : async ?InventoryId { + // xxx + null + }; + + producerRemInventory(id:InventoryId) : async ?() { + // xxx + null + }; + + producerOrders(id:ProducerId) : async ?[OrderId] { + // xxx + null + }; + + // Transporter-based ingress messages: + // =================================== + + transporterAddRoute( + trans: TransporterId, + rstart: RegionId, + rend: RegionId, + start: Date, + end: Date, + cost: Price, + tt: TruckTypeId + ) : async ?RouteId { + // xxx + null + }; + + transporterRemRoute(id:RouteId) : async ?() { + // xxx + null + }; + + transporterOrders(id:TransporterId) : async ?[OrderId] { + // xxx + null + }; + + // Retailer-based ingress messages: + // =================================== + + retailerQueryAll(id:RetailerId) : async ?QueryAllResults { + // xxx + null + }; + + retailerPlaceOrder( + id:RetailerId, + inventory:InventoryId, + route:RouteId) : async ?OrderId + { + // xxx + null + }; + + retailerOrders(id:RetailerId) : async ?[OrderId] { + // xxx + null + }; + + // (Producer/Transporter/Retailer) ingress messages: + // ======================================================== + + orderInfo(id:OrderId) : async ?OrderInfo { + // xxx + null + }; + +}; diff --git a/stdlib/list.as b/stdlib/list.as new file mode 100644 index 00000000000..d16b2f78902 --- /dev/null +++ b/stdlib/list.as @@ -0,0 +1,339 @@ +/* + * Lists, a la functional programming, in ActorScript. + */ + +// Done: +// +// - standard list definition +// - standard list recursors: foldl, foldr, iter +// - standard higher-order combinators: map, filter, etc. +// - (Every function here: http://sml-family.org/Basis/list.html) + +// TODO-Matthew: File issues: +// +// - 'assert_unit' vs 'assert_any' (related note: 'any' vs 'none') +// - apply type args, but no actual args? (should be ok, and zero cost, right?) +// - unhelpful error message around conditional parens (search for XXX below) + +// TODO-Matthew: Write: +// +// - iterator objects, for use in 'for ... in ...' patterns +// - lists+pairs: zip, split, etc +// - regression tests for everything that is below + + +// polymorphic linked lists +type List = ?(T, List); + +let List = new { + + // empty list + func nil() : List = + null; + + // test for empty list + func isNil(l : List) : Bool { + switch l { + case null { true }; + case _ { false }; + } + }; + + // aka "list cons" + func push(x : T, l : List) : List = + ?(x, l); + + // last element, optionally; tail recursive + func last(l : List) : ?T = { + switch l { + case null { null }; + case (?(x,null)) { ?x }; + case (?(_,t)) { last(t) }; + } + }; + + // treat the list as a stack; combines 'hd' and (non-failing) 'tl' into one operation + func pop(l : List) : (?T, List) = { + switch l { + case null { (null, null) }; + case (?(h, t)) { (?h, t) }; + } + }; + + // length; tail recursive + func len(l : List) : Nat = { + func rec(l : List, n : Nat) : Nat { + switch l { + case null { n }; + case (?(_,t)) { rec(t,n+1) }; + } + }; + rec(l,0) + }; + + // array-like list access, but in linear time; tail recursive + func nth(l : List, n : Nat) : ?T = { + switch (n, l) { + case (_, null) { null }; + case (0, (?(h,t))) { ?h }; + case (_, (?(_,t))) { nth(t, n - 1) }; + } + }; + + // reverse; tail recursive + func rev(l : List) : List = { + func rec(l : List, r : List) : List { + switch l { + case null { r }; + case (?(h,t)) { rec(t,?(h,r)) }; + } + }; + rec(l, null) + }; + + // Called "app" in SML Basis, and "iter" in OCaml; tail recursive + func iter(l : List, f:T -> ()) : () = { + func rec(l : List) : () { + switch l { + case null { () }; + case (?(h,t)) { f(h) ; rec(t) }; + } + }; + rec(l) + }; + + // map; non-tail recursive + // (Note: need mutable Cons tails for tail-recursive map) + func map(l : List, f:T -> S) : List = { + func rec(l : List) : List { + switch l { + case null { null }; + case (?(h,t)) { ?(f(h),rec(t)) }; + } + }; + rec(l) + }; + + // filter; non-tail recursive + // (Note: need mutable Cons tails for tail-recursive version) + func filter(l : List, f:T -> Bool) : List = { + func rec(l : List) : List { + switch l { + case null { null }; + case (?(h,t)) { if (f(h)){ ?(h,rec(t)) } else { rec(t) } }; + } + }; + rec(l) + }; + + // map-and-filter; non-tail recursive + // (Note: need mutable Cons tails for tail-recursive version) + func mapFilter(l : List, f:T -> ?S) : List = { + func rec(l : List) : List { + switch l { + case null { null }; + case (?(h,t)) { + switch (f(h)) { + case null { rec(t) }; + case (?h_){ ?(h_,rec(t)) }; + } + }; + } + }; + rec(l) + }; + + // append; non-tail recursive + // (Note: need mutable Cons tails for tail-recursive version) + func append(l : List, m : List) : List = { + func rec(l : List) : List { + switch l { + case null { m }; + case (?(h,t)) {?(h,rec(l))}; + } + }; + rec(l) + }; + + // concat (aka "list join"); tail recursive, but requires "two passes" + func concat(l : List>) : List = { + // 1/2: fold from left to right, reverse-appending the sublists... + let r = + { let f = func(a:List, b:List) : List { revAppend(a,b) }; + foldLeft, List>(l, null, f) + }; + // 2/2: ...re-reverse the elements, to their original order: + rev(r) + }; + + // (See SML Basis library); tail recursive + func revAppend(l1 : List, l2 : List) : List = { + switch l1 { + case null { l2 }; + case (?(h,t)) { revAppend(t, ?(h,l2)) }; + } + }; + + // take; non-tail recursive + // (Note: need mutable Cons tails for tail-recursive version) + func take(l : List, n:Nat) : List = { + switch (l, n) { + case (_, 0) { null }; + case (null,_) { null }; + case (?(h, t), m) {?(h, take(t, m - 1))}; + } + }; + + // drop; tail recursive + func drop(l : List, n:Nat) : List = { + switch (l, n) { + case (l_, 0) { l_ }; + case (null, _) { null }; + case ((?(h,t)), m) { drop(t, m - 1) }; + } + }; + + // fold list left-to-right using f; tail recursive + func foldLeft(l : List, a:S, f:(T,S) -> S) : S = { + func rec(l:List, a:S) : S = { + switch l { + case null { a }; + case (?(h,t)) { rec(t, f(h,a)) }; + } + }; + rec(l,a) + }; + + // fold list right-to-left using f; non-tail recursive + func foldRight(l : List, a:S, f:(T,S) -> S) : S = { + func rec(l:List) : S = { + switch l { + case null { a }; + case (?(h,t)) { f(h, rec(t)) }; + } + }; + rec(l) + }; + + // test if there exists list element for which given predicate is true + func find(l: List, f:T -> Bool) : ?T = { + func rec(l:List) : ?T { + switch l { + case null { null }; + case (?(h,t)) { if (f(h)) { ?h } else { rec(t) } }; + } + }; + rec(l) + }; + + // test if there exists list element for which given predicate is true + func exists(l: List, f:T -> Bool) : Bool = { + func rec(l:List) : Bool { + switch l { + case null { false }; + // XXX/minor --- Missing parens on condition leads to unhelpful error: + //case (?(h,t)) { if f(h) { true } else { rec(t) } }; + case (?(h,t)) { if (f(h)) { true } else { rec(t) } }; + } + }; + rec(l) + }; + + // test if given predicate is true for all list elements + func all(l: List, f:T -> Bool) : Bool = { + func rec(l:List) : Bool { + switch l { + case null { true }; + case (?(h,t)) { if (f(h)) { false } else { rec(t) } }; + } + }; + rec(l) + }; + + // Given two ordered lists, merge them into a single ordered list + func merge(l1: List, l2: List, lte:(T,T) -> Bool) : List { + func rec(l1: List, l2: List) : List { + switch (l1, l2) { + case (null, _) { l2 }; + case (_, null) { l1 }; + case (?(h1,t1), ?(h2,t2)) { + if (lte(h1,h2)) { + ?(h1, rec(t1, ?(h2,t2))) + } else { + ?(h2, rec(?(h1,t1), t2)) + } + }; + } + }; + rec(l1, l2) + }; + + // Compare two lists lexicographic` ordering. tail recursive. + // XXX: Eventually, follow `collate` design from SML Basis, with real sum types, use 3-valued `order` type here. + // + func lessThanEq(l1: List, l2: List, lte:(T,T) -> Bool) : Bool { + func rec(l1: List, l2: List) : Bool { + switch (l1, l2) { + case (null, _) { true }; + case (_, null) { false }; + case (?(h1,t1), ?(h2,t2)) { + if (lte(h1,h2)) { + rec(t1, t2) + } else { + false + } + }; + } + }; + rec(l1, l2) + }; + + // Compare two lists for equality. tail recursive. + // `isEq(l1, l2)` =equiv= `lessThanEq(l1,l2) && lessThanEq(l2,l1)`, but the former is more efficient. + func isEq(l1: List, l2: List, eq:(T,T) -> Bool) : Bool { + func rec(l1: List, l2: List) : Bool { + switch (l1, l2) { + case (null, null) { true }; + case (null, _) { false }; + case (_, null) { false }; + case (?(h1,t1), ?(h2,t2)) { + if (eq(h1,h2)) { + rec(t1, t2) + } else { + false + } + }; + } + }; + rec(l1, l2) + }; + + // using a predicate, create two lists from one: the "true" list, and the "false" list. + // (See SML basis library); non-tail recursive + func partition(l: List, f:T -> Bool) : (List, List) { + func rec(l: List) : (List, List) { + switch l { + case null { (null, null) }; + case (?(h,t)) { + let (pl,pr) = rec(t); + if (f(h)) { + (?(h, pl), pr) + } else { + (pl, ?(h, pr)) + } + }; + } + }; + rec(l) + }; + + // generate a list based on a length, and a function from list index to list element; + // (See SML basis library); non-tail recursive + func tabulate(n:Nat, f:Nat -> T) : List { + func rec(i:Nat) : List { + if (i == n) { null } else { ?(f(i), rec(i+1)) } + }; + rec(0) + }; + +}; diff --git a/stdlib/listTest.as b/stdlib/listTest.as new file mode 100644 index 00000000000..4d84bd6e6ec --- /dev/null +++ b/stdlib/listTest.as @@ -0,0 +1,65 @@ +// import List + +type X = Nat; + +func List__tests() { + + func opnatEq(a : ?Nat, b : ?Nat) : Bool { + switch (a, b) { + case (null, null) { true }; + case (?aaa, ?bbb) { aaa == bbb }; + case (_, _ ) { false }; + } + }; + func opnat_isnull(a : ?Nat) : Bool { + switch a { + case (null) { true }; + case (?aaa) { false }; + } + }; + + // ## Construction + let l1 = List.nil(); + let l2 = List.push(2, l1); + let l3 = List.push(3, l2); + + // ## Projection -- use nth + assert (opnatEq(List.nth(l3, 0), ?3)); + assert (opnatEq(List.nth(l3, 1), ?2)); + assert (opnatEq(List.nth(l3, 2), null)); + //assert (opnatEq (hd(l3), ?3)); + //assert (opnatEq (hd(l2), ?2)); + //assert (opnat_isnull(hd(l1))); + + /* + // ## Projection -- use nth + assert (opnatEq(nth(l3, 0), ?3)); + assert (opnatEq(nth(l3, 1), ?2)); + assert (opnatEq(nth(l3, 2), null)); + assert (opnatEq (hd(l3), ?3)); + assert (opnatEq (hd(l2), ?2)); + assert (opnat_isnull(hd(l1))); + */ + + // ## Deconstruction + let (a1, t1) = List.pop(l3); + assert (opnatEq(a1, ?3)); + let (a2, t2) = List.pop(l2); + assert (opnatEq(a2, ?2)); + let (a3, t3) = List.pop(l1); + assert (opnatEq(a3, null)); + assert (List.isNil(t3)); + + // ## List functions + assert (List.len(l1) == 0); + assert (List.len(l2) == 1); + assert (List.len(l3) == 2); + + // ## List functions + assert (List.len(l1) == 0); + assert (List.len(l2) == 1); + assert (List.len(l3) == 2); +}; + +// Run the tests +List__tests(); diff --git a/stdlib/nonCriticalPath/stream.as b/stdlib/nonCriticalPath/stream.as new file mode 100644 index 00000000000..9945016de04 --- /dev/null +++ b/stdlib/nonCriticalPath/stream.as @@ -0,0 +1,109 @@ +/* + * Streams, a la functional programming, in ActorScript. + * + * Streams are lazy lists that may or may not end. + * If non-empty, a stream contains a value "today", + * and a thunk for the value "tomorrow", if any. + * + */ + +// Done: +// +// - standard stream definition (well, two versions) +// - standard higher-order combinators: map, mapfilter, merge + +// TODO-Matthew: Write: +// +// - (more) stream combinators: take, drop, sort, etc... +// - iterator objects, for use in 'for ... in ...' patterns +// - streams+pairs: zip, split, etc +// - regression tests for everything that is below + +// TODO-Matthew: File issues: +// +// - unhelpful error message around variable shadowing (search for XXX below) +// + +// Thunks are not primitive in AS, +// ..but we can encode them as objects with a force method: +type Thk = {force:() -> T}; + +// A "Stream Head" ("Sh") is the head of the stream, which _always_ +// contains a value "today"; Its "tail" is a thunk that produces the +// next stream head ("tomorrow"), or `null`. +type Sh = (T, ?Thk>); + +// "Optional Stream Head" (Osh) is the optional head of the stream. +// This type is related to Sh, but is not equivalent. +type Osh = ?(T, Thk>); + +// type Stream = +// ??? Sh or Osh +// Q: Which is more more "conventional?" +// + +// stream map; trivially tail recursive. lazy. +func map(l : Osh, f:T -> S) : Osh = { + func rec(l : Osh) : Osh { + switch l { + case null { null }; + case (?(h,t)) { + let s = new{force():Osh{ rec(t.force()) }}; + ?(f(h),s) + }; + } + }; + rec(l) +}; + +// stream merge (aka "collate"); trivially tail recursive. lazy. +func merge(s1 : Osh, s2 : Osh, f:(T,T) -> Bool) : Osh = { + func rec(s1 : Osh, s2 : Osh) : Osh { + switch (s1, s2) { + case (null, _) { s2 }; + case (_, null) { s1 }; + case (?(h1,t1), ?(h2,t2)) { + if (f(h1,h2)) { + // case: h1 is "today", h2 is "later"... + let s = new{force():Osh{ rec(t1.force(), s2) }}; + ?(h1,s) + } else { + // case: h2 is "today", h2 is "later"... + let s = new{force():Osh{ rec(s1, t2.force()) }}; + ?(h2,s) + } + } + } + }; + rec(s1, s2) +}; + +// stream map-and-filter; tail recursive. +// acts eagerly when the predicate fails, +// and lazily when it succeeds. +func mapfilter(l : Osh, f:T -> ?S) : Osh = { + func rec(s : Osh) : Osh { + switch s { + case null { null }; + case (?(h,t)) { + switch (f(h)) { + case null { rec(t.force()) }; + case (?h_){ + // XXX -- When we shadow `t` we get a strange/wrong type error: + // + // type error, expression of type + // Osh = ?(S/3, Thk>) + // cannot produce expected type + // ?(T/28, Thk>) + // + // let t = new{force():Osh{ rec(t.force()) }}; + // ?(h_,t) + let s = new{force():Osh{ rec(t.force()) }}; + ?(h_,s) + }; + } + }; + } + }; + rec(l) +} diff --git a/stdlib/nonCriticalPath/thunk.as b/stdlib/nonCriticalPath/thunk.as new file mode 100644 index 00000000000..c13612b7b01 --- /dev/null +++ b/stdlib/nonCriticalPath/thunk.as @@ -0,0 +1,31 @@ +/* + * Thunks, a la functional programming, in ActorScript. + */ + +// Thunks are not primitive in AS, +// ..but we can encode them as objects with a force method: +type Thk = {force:() -> T}; + +// lift a value into a "value-producing thunk" +func lift(a:T) : Thk = + new { force() : T { a } }; + +// apply a function to a thunk's value +func app(f:T->S, x:Thk) : Thk { + new { force() : S { f(x.force()) } } +}; + +// pair two thunks' values +func pair(x:Thk, y:Thk) : Thk<(T,S)> { + new { force() : ((T,S)) { (x.force(), y.force()) } } +}; + +// project first from a pair-valued thunk +func fst(x:Thk<(T,S)>) : Thk { + new { force() : T { x.force().0 } } +}; + +// project second from a pair-valued thunk +func snd(x:Thk<(T,S)>) : Thk { + new { force() : S { x.force().1 } } +}; diff --git a/stdlib/set.as b/stdlib/set.as new file mode 100644 index 00000000000..00ebf8e664c --- /dev/null +++ b/stdlib/set.as @@ -0,0 +1,75 @@ +// import Trie; + +/////////////////////////////////////////////////////////////////////// + +/* + Sets are partial maps from element type to unit type, + i.e., the partial map represents the set with its domain. +*/ + +// TODO-Matthew: +// +// - for now, we pass a hash value each time we pass an element value; +// in the future, we might avoid passing element hashes with each element in the API; +// related to: https://dfinity.atlassian.net/browse/AST-32 +// +// - similarly, we pass an equality function when we do some operations. +// in the future, we might avoid this via https://dfinity.atlassian.net/browse/AST-32 +// + +type Set = Trie; + +let Set = new { + + func empty():Set = + Trie.empty(); + + func insert(s:Set, x:T, xh:Hash):Set = { + let (s2, _) = Trie.insert(s, x, xh, ()); + s2 + }; + + func remove(s:Set, x:T, xh:Hash):Set = { + let (s2, _) = Trie.remove(s, x, xh); + s2 + }; + + func eq(s1:Set, s2:Set, eq:(T,T)->Bool):Bool { + // XXX: Todo: use a smarter check + Trie.equalStructure(s1, s2, eq, unitEq) + }; + + func card(s:Set) : Nat { + Trie.foldUp + (s, + func(n:Nat,m:Nat):Nat{n+m}, + func(_:T,_:()):Nat{1}, + 0) + }; + + func mem(s:Set, x:T, xh:Hash, eq:(T,T)->Bool):Bool { + switch (Trie.find(s, x, xh, eq)) { + case null { false }; + case (?_) { true }; + } + }; + + func union(s1:Set, s2:Set):Set { + let s3 = Trie.merge(s1, s2); + s3 + }; + + func diff(s1:Set, s2:Set, eq:(T,T)->Bool):Set { + let s3 = Trie.diff(s1, s2, eq); + s3 + }; + + func intersect(s1:Set, s2:Set, eq:(T,T)->Bool):Set { + let noop : ((),())->(()) = func (_:(),_:()):(())=(); + let s3 = Trie.conj(s1, s2, eq, noop); + s3 + }; + + func unitEq (_:(),_:()):Bool{ true }; + +}; \ No newline at end of file diff --git a/stdlib/setDb.as b/stdlib/setDb.as new file mode 100644 index 00000000000..96a7d9a99cc --- /dev/null +++ b/stdlib/setDb.as @@ -0,0 +1,107 @@ +// import Set + +//////////////////////////////////////////////////////////////////// +let SetDb = new { + + private func setDbPrint(s:Set) { + func rec(s:Set, ind:Nat, bits:Hash) { + func indPrint(i:Nat) { + if (i == 0) { } else { print "| "; indPrint(i-1) } + }; + func bitsPrintRev(bits:Bits) { + switch bits { + case null { print "" }; + case (?(bit,bits_)) { + bitsPrintRev(bits_); + if bit { print "1R." } + else { print "0L." } + } + } + }; + switch s { + case null { + //indPrint(ind); + //bitsPrintRev(bits); + //print "(null)\n"; + }; + case (?n) { + switch (n.key) { + case null { + //indPrint(ind); + //bitsPrintRev(bits); + //print "bin \n"; + rec(n.right, ind+1, ?(true, bits)); + rec(n.left, ind+1, ?(false,bits)); + //bitsPrintRev(bits); + //print ")\n" + }; + case (?k) { + //indPrint(ind); + bitsPrintRev(bits); + print "(leaf "; + printInt k; + print ")\n"; + }; + } + }; + } + }; + rec(s, 0, null); + }; + + //////////////////////////////////////////////////////////////////////////////// + + private func natEq(n:Nat,m:Nat):Bool{ n == m}; + + func insert(s:Set, x:Nat, xh:Hash):Set = { + print " setInsert("; + printInt x; + print ")"; + let r = Set.insert(s,x,xh); + print ";\n"; + setDbPrint(r); + r + }; + + func mem(s:Set, sname:Text, x:Nat, xh:Hash):Bool = { + print " setMem("; + print sname; + print ", "; + printInt x; + print ")"; + let b = Set.mem(s,x,xh,natEq); + if b { print " = true" } else { print " = false" }; + print ";\n"; + b + }; + + func union(s1:Set, s1name:Text, s2:Set, s2name:Text):Set = { + print " setUnion("; + print s1name; + print ", "; + print s2name; + print ")"; + // also: test that merge agrees with disj: + let r1 = Set.union(s1, s2); + let r2 = Trie.disj(s1, s2, natEq, func (_:?(),_:?()):(())=()); + assert(Trie.equalStructure(r1, r2, natEq, Set.unitEq)); + print ";\n"; + setDbPrint(r1); + print "=========\n"; + setDbPrint(r2); + r1 + }; + + func intersect(s1:Set, s1name:Text, s2:Set, s2name:Text):Set = { + print " setIntersect("; + print s1name; + print ", "; + print s2name; + print ")"; + let r = Set.intersect(s1, s2, natEq); + print ";\n"; + setDbPrint(r); + r + }; + +}; \ No newline at end of file diff --git a/stdlib/setDbTest.as b/stdlib/setDbTest.as new file mode 100644 index 00000000000..193e54e7772 --- /dev/null +++ b/stdlib/setDbTest.as @@ -0,0 +1,175 @@ +// import SetDb + +func SetDb__test() { + let hash_0 = ?(false,?(false,?(false,?(false, null)))); + let hash_1 = ?(false,?(false,?(false,?(true, null)))); + let hash_2 = ?(false,?(false,?(true, ?(false, null)))); + let hash_3 = ?(false,?(false,?(true, ?(true, null)))); + let hash_4 = ?(false,?(true, ?(false,?(false, null)))); + let hash_5 = ?(false,?(true, ?(false,?(true, null)))); + let hash_6 = ?(false,?(true, ?(true, ?(false, null)))); + let hash_7 = ?(false,?(true, ?(true, ?(true, null)))); + let hash_8 = ?(true, ?(false,?(false,?(false, null)))); + + print "inserting...\n"; + // Insert numbers [0..8] into the set, using their bits as their hashes: + let s0 : Set = Set.empty(); + assert(Set.card(s0) == 0); + + let s1 : Set = SetDb.insert(s0, 0, hash_0); + assert(Set.card(s1) == 1); + + let s2 : Set = SetDb.insert(s1, 1, hash_1); + assert(Set.card(s2) == 2); + + let s3 : Set = SetDb.insert(s2, 2, hash_2); + assert(Set.card(s3) == 3); + + let s4 : Set = SetDb.insert(s3, 3, hash_3); + assert(Set.card(s4) == 4); + + let s5 : Set = SetDb.insert(s4, 4, hash_4); + assert(Set.card(s5) == 5); + + let s6 : Set = SetDb.insert(s5, 5, hash_5); + assert(Set.card(s6) == 6); + + let s7 : Set = SetDb.insert(s6, 6, hash_6); + assert(Set.card(s7) == 7); + + let s8 : Set = SetDb.insert(s7, 7, hash_7); + assert(Set.card(s8) == 8); + + let s9 : Set = SetDb.insert(s8, 8, hash_8); + assert(Set.card(s9) == 9); + print "done.\n"; + + print "unioning...\n"; + let s1s2 : Set = SetDb.union(s1, "s1", s2, "s2"); + let s2s1 : Set = SetDb.union(s2, "s2", s1, "s1"); + let s3s2 : Set = SetDb.union(s3, "s3", s2, "s2"); + let s4s2 : Set = SetDb.union(s4, "s4", s2, "s2"); + let s1s5 : Set = SetDb.union(s1, "s1", s5, "s5"); + let s0s2 : Set = SetDb.union(s0, "s0", s2, "s2"); + print "done.\n"; + + print "intersecting...\n"; + let s3is6 : Set = SetDb.intersect(s3, "s3", s6, "s6"); + let s2is1 : Set = SetDb.intersect(s2, "s2", s1, "s1"); + print "done.\n"; + + + print "testing membership...\n"; + + // Element 0: Test memberships of each set defined above for element 0 + assert( not( SetDb.mem(s0, "s0", 0, hash_0 ) )); + assert( SetDb.mem(s1, "s1", 0, hash_0 ) ); + assert( SetDb.mem(s2, "s2", 0, hash_0 ) ); + assert( SetDb.mem(s3, "s3", 0, hash_0 ) ); + assert( SetDb.mem(s4, "s4", 0, hash_0 ) ); + assert( SetDb.mem(s5, "s5", 0, hash_0 ) ); + assert( SetDb.mem(s6, "s6", 0, hash_0 ) ); + assert( SetDb.mem(s7, "s7", 0, hash_0 ) ); + assert( SetDb.mem(s8, "s8", 0, hash_0 ) ); + assert( SetDb.mem(s9, "s9", 0, hash_0 ) ); + + // Element 1: Test memberships of each set defined above for element 1 + assert( not(SetDb.mem(s0, "s0", 1, hash_1 )) ); + assert( not(SetDb.mem(s1, "s1", 1, hash_1 )) ); + assert( SetDb.mem(s2, "s2", 1, hash_1 ) ); + assert( SetDb.mem(s3, "s3", 1, hash_1 ) ); + assert( SetDb.mem(s4, "s4", 1, hash_1 ) ); + assert( SetDb.mem(s5, "s5", 1, hash_1 ) ); + assert( SetDb.mem(s6, "s6", 1, hash_1 ) ); + assert( SetDb.mem(s7, "s7", 1, hash_1 ) ); + assert( SetDb.mem(s8, "s8", 1, hash_1 ) ); + assert( SetDb.mem(s9, "s9", 1, hash_1 ) ); + + // Element 2: Test memberships of each set defined above for element 2 + assert( not(SetDb.mem(s0, "s0", 2, hash_2 )) ); + assert( not(SetDb.mem(s1, "s1", 2, hash_2 )) ); + assert( not(SetDb.mem(s2, "s2", 2, hash_2 )) ); + assert( SetDb.mem(s3, "s3", 2, hash_2 ) ); + assert( SetDb.mem(s4, "s4", 2, hash_2 ) ); + assert( SetDb.mem(s5, "s5", 2, hash_2 ) ); + assert( SetDb.mem(s6, "s6", 2, hash_2 ) ); + assert( SetDb.mem(s7, "s7", 2, hash_2 ) ); + assert( SetDb.mem(s8, "s8", 2, hash_2 ) ); + assert( SetDb.mem(s9, "s9", 2, hash_2 ) ); + + // Element 3: Test memberships of each set defined above for element 3 + assert( not(SetDb.mem(s0, "s0", 3, hash_3 )) ); + assert( not(SetDb.mem(s1, "s1", 3, hash_3 )) ); + assert( not(SetDb.mem(s2, "s2", 3, hash_3 )) ); + assert( not(SetDb.mem(s3, "s3", 3, hash_3 )) ); + assert( SetDb.mem(s4, "s4", 3, hash_3 ) ); + assert( SetDb.mem(s5, "s5", 3, hash_3 ) ); + assert( SetDb.mem(s6, "s6", 3, hash_3 ) ); + assert( SetDb.mem(s7, "s7", 3, hash_3 ) ); + assert( SetDb.mem(s8, "s8", 3, hash_3 ) ); + assert( SetDb.mem(s9, "s9", 3, hash_3 ) ); + + // Element 4: Test memberships of each set defined above for element 4 + assert( not(SetDb.mem(s0, "s0", 4, hash_4 )) ); + assert( not(SetDb.mem(s1, "s1", 4, hash_4 )) ); + assert( not(SetDb.mem(s2, "s2", 4, hash_4 )) ); + assert( not(SetDb.mem(s3, "s3", 4, hash_4 )) ); + assert( not(SetDb.mem(s4, "s4", 4, hash_4 )) ); + assert( SetDb.mem(s5, "s5", 4, hash_4 ) ); + assert( SetDb.mem(s6, "s6", 4, hash_4 ) ); + assert( SetDb.mem(s7, "s7", 4, hash_4 ) ); + assert( SetDb.mem(s8, "s8", 4, hash_4 ) ); + assert( SetDb.mem(s9, "s9", 4, hash_4 ) ); + + // Element 5: Test memberships of each set defined above for element 5 + assert( not(SetDb.mem(s0, "s0", 5, hash_5 )) ); + assert( not(SetDb.mem(s1, "s1", 5, hash_5 )) ); + assert( not(SetDb.mem(s2, "s2", 5, hash_5 )) ); + assert( not(SetDb.mem(s3, "s3", 5, hash_5 )) ); + assert( not(SetDb.mem(s4, "s4", 5, hash_5 )) ); + assert( not(SetDb.mem(s5, "s5", 5, hash_5 )) ); + assert( SetDb.mem(s6, "s6", 5, hash_5 ) ); + assert( SetDb.mem(s7, "s7", 5, hash_5 ) ); + assert( SetDb.mem(s8, "s8", 5, hash_5 ) ); + assert( SetDb.mem(s9, "s9", 5, hash_5 ) ); + + // Element 6: Test memberships of each set defined above for element 6 + assert( not(SetDb.mem(s0, "s0", 6, hash_6 )) ); + assert( not(SetDb.mem(s1, "s1", 6, hash_6 )) ); + assert( not(SetDb.mem(s2, "s2", 6, hash_6 )) ); + assert( not(SetDb.mem(s3, "s3", 6, hash_6 )) ); + assert( not(SetDb.mem(s4, "s4", 6, hash_6 )) ); + assert( not(SetDb.mem(s5, "s5", 6, hash_6 )) ); + assert( not(SetDb.mem(s6, "s6", 6, hash_6 )) ); + assert( SetDb.mem(s7, "s7", 6, hash_6 ) ); + assert( SetDb.mem(s8, "s8", 6, hash_6 ) ); + assert( SetDb.mem(s9, "s9", 6, hash_6 ) ); + + // Element 7: Test memberships of each set defined above for element 7 + assert( not(SetDb.mem(s0, "s0", 7, hash_7 )) ); + assert( not(SetDb.mem(s1, "s1", 7, hash_7 )) ); + assert( not(SetDb.mem(s2, "s2", 7, hash_7 )) ); + assert( not(SetDb.mem(s3, "s3", 7, hash_7 )) ); + assert( not(SetDb.mem(s4, "s4", 7, hash_7 )) ); + assert( not(SetDb.mem(s5, "s5", 7, hash_7 )) ); + assert( not(SetDb.mem(s6, "s6", 7, hash_7 )) ); + assert( not(SetDb.mem(s7, "s7", 7, hash_7 )) ); + assert( SetDb.mem(s8, "s8", 7, hash_7 ) ); + assert( SetDb.mem(s9, "s9", 7, hash_7 ) ); + + // Element 8: Test memberships of each set defined above for element 8 + assert( not(SetDb.mem(s0, "s0", 8, hash_8 )) ); + assert( not(SetDb.mem(s1, "s1", 8, hash_8 )) ); + assert( not(SetDb.mem(s2, "s2", 8, hash_8 )) ); + assert( not(SetDb.mem(s3, "s3", 8, hash_8 )) ); + assert( not(SetDb.mem(s4, "s4", 8, hash_8 )) ); + assert( not(SetDb.mem(s6, "s6", 8, hash_8 )) ); + assert( not(SetDb.mem(s6, "s6", 8, hash_8 )) ); + assert( not(SetDb.mem(s7, "s7", 8, hash_8 )) ); + assert( not(SetDb.mem(s8, "s8", 8, hash_8 )) ); + assert( SetDb.mem(s9, "s9", 8, hash_8 ) ); + + print "done.\n"; +}; + +SetDb__test(); \ No newline at end of file diff --git a/stdlib/trie.as b/stdlib/trie.as new file mode 100644 index 00000000000..aaec442f09c --- /dev/null +++ b/stdlib/trie.as @@ -0,0 +1,721 @@ +/* + Hash Tries in ActorScript + ------------------------- + + Functional maps (and sets) whose representation is "canonical", and + history independent. + + See this POPL 1989 paper (Section 6): + - "Incremental computation via function caching", Pugh & Teitelbaum. + - https://dl.acm.org/citation.cfm?id=75305 + - Public copy here: http://matthewhammer.org/courses/csci7000-s17/readings/Pugh89.pdf + + By contrast, other usual functional representations of maps (AVL + Trees, Red-Black Trees) do not enjoy history independence, and are + each more complex to implement (e.g., each requires "rebalancing"; + these trees never do). + + */ + +// Done: +// +// - (hacky) type definition; XXX: need real sum types to clean it up +// - find operation +// - insert operation +// - remove operation +// - replace operation (remove+insert via a single traversal) +// - basic encoding of sets, and some set operations +// - basic tests (and primitive debugging) for set operations +// - write trie operations that operate over pairs of tries: +// for set union, difference and intersection. + +// TODO-Matthew: +// +// - (more) regression tests for everything that is below +// +// - handle hash collisions gracefully; +// ==> Blocked on AS module support, for using List module. +// +// - adapt the path length of each subtree to its cardinality; avoid +// needlessly long paths, or paths that are too short for their +// subtree's size. +// +// - iterator objects, for use in 'for ... in ...' patterns + + +// import List + +// TEMP: A "bit string" as a linked list of bits: +type Bits = ?(Bool, Bits); + +// TODO: Replace this definition WordX, for some X, once we have these types in AS. +type Hash = Bits; +//type Hash = Word16; +//type Hash = Word8; + +// Uniform depth assumption: +// +// We make a simplifying assumption, for now: All defined paths in the +// trie have a uniform length, the same as the number of bits of a +// hash, starting at the LSB, that we use for indexing. +// +// - If the number is too low, our expected O(log n) bounds become +// expected O(n). +// +// - If the number is too high, we waste constant factors for +// representing small sets/maps. +// +// TODO: Make this more robust by making this number adaptive for each +// path, and based on the content of the trie along that path. +// +let HASH_BITS = 4; + +// XXX: See AST-42 +type Node = { + left:Trie; + right:Trie; + key:?K; + val:?V +}; +type Trie = ?Node; + +/* See AST-42 (sum types); we want this type definition instead: + +// Use a sum type (AST-42) +type Trie = { #leaf : LeafNode; #bin : BinNode; #empty }; +type BinNode = { left:Trie; right:Trie }; +type LeafNode = { key:K; val:V }; + +*/ + +let Trie = new { + + // XXX: until AST-42: + func isNull(x : ?X) : Bool { + switch x { + case null { true }; + case (?_) { false }; + }; + }; + + // XXX: until AST-42: + func assertIsNull(x : ?X) { + switch x { + case null { assert(true) }; + case (?_) { assert(false) }; + }; + }; + + // XXX: until AST-42: + func makeEmpty() : Trie + = null; + + // Note: More general version of this operation below, which tests for + // "deep emptiness" (subtrees that have branching structure, but no + // leaves; these can result from naive filtering operations, for + // instance). + // + // // XXX: until AST-42: + // func isEmpty(t:Trie) : Bool { + // switch t { + // case null { true }; + // case (?_) { false }; + // }; + // }; + + // XXX: until AST-42: + func assertIsEmpty(t : Trie) { + switch t { + case null { assert(true) }; + case (?_) { assert(false) }; + }; + }; + + // XXX: until AST-42: + func makeBin(l:Trie, r:Trie) : Trie { + ?(new {left=l; right=r; key=null; val=null }) + }; + + // XXX: until AST-42: + func isBin(t:Trie) : Bool { + switch t { + case null { false }; + case (?t_) { + switch (t_.key) { + case null { true }; + case _ { false }; + }; + }; + } + }; + + // XXX: until AST-42: + func makeLeaf(k:K, v:V) : Trie { + ?(new {left=null; right=null; key=?k; val=?v }) + }; + + // XXX: until AST-42: + func matchLeaf(t:Trie) : ?(K,V) { + switch t { + case null { null }; + case (?t_) { + switch (t_.key, t_.val) { + case (?k,?v) ?(k,v); + case (_) null; + } + }; + } + }; + + // XXX: until AST-42: + func isLeaf(t:Trie) : Bool { + switch t { + case null { false }; + case (?t_) { + switch (t_.key) { + case null { false }; + case _ { true }; + } + }; + } + }; + // XXX: until AST-42: + func assertIsBin(t : Trie) { + switch t { + case null { assert(false) }; + case (?n) { + assertIsNull(n.key); + assertIsNull(n.val); + }; + } + }; + + // XXX: until AST-42: + func getLeafKey(t : Node) : K { + assertIsNull>(t.left); + assertIsNull>(t.right); + switch (t.key) { + case (?k) { k }; + case null { getLeafKey(t) }; + } + }; + + // XXX: this helper is an ugly hack; we need real sum types to avoid it, I think: + func getLeafVal(t : Node) : ?V { + assertIsNull>(t.left); + assertIsNull>(t.right); + t.val + }; + + // TODO: Replace with bitwise operations on Words, once we have each of those in AS. + // For now, we encode hashes as lists of booleans. + func getHashBit(h:Hash, pos:Nat) : Bool { + switch h { + case null { + // XXX: Should be an error case; it shouldn't happen in our tests if we set them up right. + false + }; + case (?(b, h_)) { + if (pos == 0) { b } + else { getHashBit(h_, pos-1) } + }; + } + }; + + // part of "public interface": + func empty() : Trie = makeEmpty(); + + // helper function for constructing new paths of uniform length + func buildNewPath(bitpos:Nat, k:K, k_hash:Hash, ov:?V) : Trie { + func rec(bitpos:Nat) : Trie { + if ( bitpos < HASH_BITS ) { + // create new bin node for this bit of the hash + let path = rec(bitpos+1); + let bit = getHashBit(k_hash, bitpos); + if (not bit) { + ?(new {left=path; right=null; key=null; val=null}) + } + else { + ?(new {left=null; right=path; key=null; val=null}) + } + } else { + // create new leaf for (k,v) pair + ?(new {left=null; right=null; key=?k; val=ov }) + } + }; + rec(bitpos) + }; + + // replace the given key's value option with the given one, returning the previous one + func replace(t : Trie, k:K, k_hash:Hash, v:?V) : (Trie, ?V) { + // For `bitpos` in 0..HASH_BITS, walk the given trie and locate the given value `x`, if it exists. + func rec(t : Trie, bitpos:Nat) : (Trie, ?V) { + if ( bitpos < HASH_BITS ) { + switch t { + case null { (buildNewPath(bitpos, k, k_hash, v), null) }; + case (?n) { + assertIsBin(t); + let bit = getHashBit(k_hash, bitpos); + // rebuild either the left or right path with the inserted (k,v) pair + if (not bit) { + let (l, v_) = rec(n.left, bitpos+1); + (?(new {left=l; right=n.right; key=null; val=null }), v_) + } + else { + let (r, v_) = rec(n.right, bitpos+1); + (?(new {left=n.left; right=r; key=null; val=null }), v_) + } + }; + } + } else { + // No more walking; we should be at a leaf now, by construction invariants. + switch t { + case null { (buildNewPath(bitpos, k, k_hash, v), null) }; + case (?l) { + // TODO: Permit hash collisions by walking a list/array of KV pairs in each leaf: + (?(new{left=null;right=null;key=?k;val=v}), l.val) + }; + } + } + }; + rec(t, 0) + }; + + // insert the given key's value in the trie; return the new trie + func insert(t : Trie, k:K, k_hash:Hash, v:V) : (Trie, ?V) { + replace(t, k, k_hash, ?v) + }; + + // remove the given key's value in the trie; return the new trie + func remove(t : Trie, k:K, k_hash:Hash) : (Trie, ?V) { + replace(t, k, k_hash, null) + }; + + // find the given key's value in the trie, or return null if nonexistent + func find(t : Trie, k:K, k_hash:Hash, keq:(K,K) -> Bool) : ?V { + // For `bitpos` in 0..HASH_BITS, walk the given trie and locate the given value `x`, if it exists. + func rec(t : Trie, bitpos:Nat) : ?V { + if ( bitpos < HASH_BITS ) { + switch t { + case null { + // the trie may be "sparse" along paths leading to no keys, and may end early. + null + }; + case (?n) { + assertIsBin(t); + let bit = getHashBit(k_hash, bitpos); + if (not bit) { rec(n.left, bitpos+1) } + else { rec(n.right, bitpos+1) } + }; + } + } else { + // No more walking; we should be at a leaf now, by construction invariants. + switch t { + case null { null }; + case (?l) { + // TODO: Permit hash collisions by walking a list/array of KV pairs in each leaf: + if (keq(getLeafKey(l), k)) { + getLeafVal(l) + } else { + null + } + }; + } + } + }; + rec(t, 0) + }; + + // merge tries, preferring the right trie where there are collisions + // in common keys. note: the `disj` operation generalizes this `merge` + // operation in various ways, and does not (in general) loose + // information; this operation is a simpler, special case. + func merge(tl:Trie, tr:Trie) : Trie { + switch (tl, tr) { + case (null, _) { return tr }; + case (_, null) { return tl }; + case (?nl,?nr) { + switch (isBin(tl), + isBin(tr)) { + case (true, true) { + let t0 = merge(nl.left, nr.left); + let t1 = merge(nl.right, nr.right); + makeBin(t0, t1) + }; + case (false, true) { + assert(false); + // XXX impossible, until we lift uniform depth assumption + tr + }; + case (true, false) { + assert(false); + // XXX impossible, until we lift uniform depth assumption + tr + }; + case (false, false) { + /// XXX: handle hash collisions here. + tr + }; + } + }; + } + }; + + // The key-value pairs of the final trie consists of those pairs of + // the left trie whose keys are not present in the right trie; the + // values of the right trie are irrelevant. + func diff(tl:Trie, tr:Trie, keq:(K,K)->Bool) : Trie { + func rec(tl:Trie, tr:Trie) : Trie { + switch (tl, tr) { + case (null, _) { return makeEmpty() }; + case (_, null) { return tl }; + case (?nl,?nr) { + switch (isBin(tl), + isBin(tr)) { + case (true, true) { + let t0 = rec(nl.left, nr.left); + let t1 = rec(nl.right, nr.right); + makeBin(t0, t1) + }; + case (false, true) { + assert(false); + // XXX impossible, until we lift uniform depth assumption + tl + }; + case (true, false) { + assert(false); + // XXX impossible, until we lift uniform depth assumption + tl + }; + case (false, false) { + /// XXX: handle hash collisions here. + switch (nl.key, nr.key) { + case (?kl, ?kr) { + if (keq(kl, kr)) { + makeEmpty(); + } else { + tl + }}; + // XXX impossible, and unnecessary with AST-42. + case _ { tl } + } + }; + } + }; + }}; + rec(tl, tr) + }; + + // This operation generalizes the notion of "set union" to finite maps. + // Produces a "disjunctive image" of the two tries, where the values of + // matching keys are combined with the given binary operator. + // + // For unmatched key-value pairs, the operator is still applied to + // create the value in the image. To accomodate these various + // situations, the operator accepts optional values, but is never + // applied to (null, null). + // + func disj(tl:Trie, tr:Trie, + keq:(K,K)->Bool, vbin:(?V,?W)->X) + : Trie + { + func recL(t:Trie) : Trie { + switch t { + case (null) null; + case (? n) { + switch (matchLeaf(t)) { + case (?(k,v)) { makeLeaf(k, vbin(?v, null)) }; + case _ { makeBin(recL(n.left), recL(n.right)) } + } + }; + }}; + func recR(t:Trie) : Trie { + switch t { + case (null) null; + case (? n) { + switch (matchLeaf(t)) { + case (?(k,w)) { makeLeaf(k, vbin(null, ?w)) }; + case _ { makeBin(recR(n.left), recR(n.right)) } + } + }; + }}; + func rec(tl:Trie, tr:Trie) : Trie { + switch (tl, tr) { + // empty-empty terminates early, all other cases do not. + case (null, null) { makeEmpty() }; + case (null, _ ) { recR(tr) }; + case (_, null) { recL(tl) }; + case (? nl, ? nr) { + switch (isBin(tl), + isBin(tr)) { + case (true, true) { + let t0 = rec(nl.left, nr.left); + let t1 = rec(nl.right, nr.right); + makeBin(t0, t1) + }; + case (false, true) { + assert(false); + // XXX impossible, until we lift uniform depth assumption + makeEmpty() + }; + case (true, false) { + assert(false); + // XXX impossible, until we lift uniform depth assumption + makeEmpty() + }; + case (false, false) { + assert(isLeaf(tl)); + assert(isLeaf(tr)); + switch (nl.key, nl.val, nr.key, nr.val) { + // leaf-leaf case + case (?kl, ?vl, ?kr, ?vr) { + if (keq(kl, kr)) { + makeLeaf(kl, vbin(?vl, ?vr)); + } else { + // XXX: handle hash collisions here. + makeEmpty() + } + }; + // XXX impossible, and unnecessary with AST-42. + case _ { makeEmpty() }; + } + }; + } + }; + }}; + rec(tl, tr) + }; + + // This operation generalizes the notion of "set intersection" to + // finite maps. Produces a "conjuctive image" of the two tries, where + // the values of matching keys are combined with the given binary + // operator, and unmatched key-value pairs are not present in the output. + func conj(tl:Trie, tr:Trie, + keq:(K,K)->Bool, vbin:(V,W)->X) + : Trie + { + func rec(tl:Trie, tr:Trie) : Trie { + switch (tl, tr) { + case (null, null) { return makeEmpty() }; + case (null, ? nr) { return makeEmpty() }; + case (? nl, null) { return makeEmpty() }; + case (? nl, ? nr) { + switch (isBin(tl), + isBin(tr)) { + case (true, true) { + let t0 = rec(nl.left, nr.left); + let t1 = rec(nl.right, nr.right); + makeBin(t0, t1) + }; + case (false, true) { + assert(false); + // XXX impossible, until we lift uniform depth assumption + makeEmpty() + }; + case (true, false) { + assert(false); + // XXX impossible, until we lift uniform depth assumption + makeEmpty() + }; + case (false, false) { + assert(isLeaf(tl)); + assert(isLeaf(tr)); + switch (nl.key, nl.val, nr.key, nr.val) { + // leaf-leaf case + case (?kl, ?vl, ?kr, ?vr) { + if (keq(kl, kr)) { + makeLeaf(kl, vbin(vl, vr)); + } else { + // XXX: handle hash collisions here. + makeEmpty() + } + }; + // XXX impossible, and unnecessary with AST-42. + case _ { makeEmpty() }; + } + }; + } + } + }}; + rec(tl, tr) + }; + + // This operation gives a recursor for the internal structure of + // tries. Many common operations are instantiations of this function, + // either as clients, or as hand-specialized versions (e.g., see map, + // mapFilter, exists and forAll below). + func foldUp(t:Trie, bin:(X,X)->X, leaf:(K,V)->X, empty:X) : X { + func rec(t:Trie) : X { + switch t { + case (null) { empty }; + case (?n) { + switch (matchLeaf(t)) { + case (?(k,v)) { leaf(k,v) }; + case null { bin(rec(n.left), rec(n.right)) }; + } + }; + }}; + rec(t) + }; + + // Fold over the key-value pairs of the trie, using an accumulator. + // The key-value pairs have no reliable or meaningful ordering. + func fold(t:Trie, f:(K,V,X)->X, x:X) : X { + func rec(t:Trie, x:X) : X { + switch t { + case (null) x; + case (?n) { + switch (matchLeaf(t)) { + case (?(k,v)) { f(k,v,x) }; + case null { rec(n.left,rec(n.right,x)) }; + } + }; + }}; + rec(t, x) + }; + + // specialized foldUp operation. + func exists(t:Trie, f:(K,V)->Bool) : Bool { + func rec(t:Trie) : Bool { + switch t { + case (null) { false }; + case (?n) { + switch (matchLeaf(t)) { + case (?(k,v)) { f(k,v) }; + case null { rec(n.left) or rec(n.right) }; + } + }; + }}; + rec(t) + }; + + // specialized foldUp operation. + func forAll(t:Trie, f:(K,V)->Bool) : Bool { + func rec(t:Trie) : Bool { + switch t { + case (null) { true }; + case (?n) { + switch (matchLeaf(t)) { + case (?(k,v)) { f(k,v) }; + case null { rec(n.left) and rec(n.right) }; + } + }; + }}; + rec(t) + }; + + // specialized foldUp operation. + // Test for "deep emptiness": subtrees that have branching structure, + // but no leaves. These can result from naive filtering operations; + // filter uses this function to avoid creating such subtrees. + func isEmpty(t:Trie) : Bool { + func rec(t:Trie) : Bool { + switch t { + case (null) { true }; + case (?n) { + switch (matchLeaf(t)) { + case (?(k,v)) { false }; + case null { rec(n.left) and rec(n.right) }; + } + }; + } + }; + rec(t) + }; + + func filter(t:Trie, f:(K,V)->Bool) : Trie { + func rec(t:Trie) : Trie { + switch t { + case (null) { null }; + case (?n) { + switch (matchLeaf(t)) { + case (?(k,v)) { + // XXX-Typechecker: + // This version of the next line gives _really_ + // strange type errors, and no parse errors. + // if f(k,v) { + if (f(k,v)) { + makeLeaf(k,v) + } else { + null + } + }; + case null { + let l = rec(n.left); + let r = rec(n.right); + switch (isEmpty(l), + isEmpty(r)) { + case (true, true) null; + case (false, true) r; + case (true, false) l; + case (false, false) makeBin(l, r); + } + }; + } + }; + } + }; + rec(t) + }; + + func mapFilter(t:Trie, f:(K,V)->?(K,W)) : Trie { + func rec(t:Trie) : Trie { + switch t { + case (null) { null }; + case (?n) { + switch (matchLeaf(t)) { + case (?(k,v)) { + switch (f(k,v)) { + case (null) null; + case (?(k,w)) { makeLeaf(k,w) }; + }}; + case null { + let l = rec(n.left); + let r = rec(n.right); + switch (isEmpty(l), + isEmpty(r)) { + case (true, true) null; + case (false, true) r; + case (true, false) l; + case (false, false) makeBin(l, r); + } + }; + } + }; + } + }; + rec(t) + }; + + // Test for equality, but naively, based on structure. + // Does not attempt to remove "junk" in the tree; + // For instance, a "smarter" approach would equate + // `#bin{left=#empty;right=#empty}` + // with + // `#empty`. + // We do not observe that equality here. + func equalStructure( + tl:Trie, + tr:Trie, + keq:(K,K)->Bool, + veq:(V,V)->Bool + ) : Bool { + func rec(tl:Trie, tr:Trie) : Bool { + switch (tl, tr) { + case (null, null) { true }; + case (_, null) { false }; + case (null, _) { false }; + case (?nl, ?nr) { + switch (matchLeaf(tl), + matchLeaf(tr)) { + case (?(kl,vl), ?(kr,vr)) { keq(kl,kr) and veq(vl,vr) }; + case (null, null) { rec(nl.left, nr.left) + and rec(nl.right, nr.right) }; + case _ { false } + } + }; + }}; + rec(tl, tr) + }; + +};