From 794b3e6c61ed30e9022903ed2588b1d5782b6ecb Mon Sep 17 00:00:00 2001 From: Shachar Itzhaky Date: Mon, 10 Jun 2019 12:48:16 +0300 Subject: [PATCH 1/7] Runtime: more efficient (space-wise) marshalling. Modified caml_output_val to correctly reflects sharing. As a result, it avoids duplication and saves a lot of space. --- runtime/marshal.js | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/runtime/marshal.js b/runtime/marshal.js index c2490b3fa0..e39a43a015 100644 --- a/runtime/marshal.js +++ b/runtime/marshal.js @@ -354,6 +354,11 @@ var caml_output_val = function (){ for (var i = size - 8;i >= 0;i -= 8) this.chunk[this.chunk_idx++] = (value >> i) & 0xFF; }, + write_shared:function (offset) { + if (offset < (1 << 8)) this.write_code(8, 0x04 /*cst.CODE_SHARED8*/, offset); + else if (offset < (1 << 16)) this.write_code(16, 0x05 /*cst.CODE_SHARED16*/, offset); + else this.write_code(32, 0x06 /*cst.CODE_SHARED32*/, offset); + }, finalize:function () { this.block_len = this.chunk_idx - 20; this.chunk_idx = 0; @@ -368,10 +373,26 @@ var caml_output_val = function (){ return function (v) { var writer = new Writer (); var stack = []; + var intern_obj_table = []; + + function store(v) { intern_obj_table.push(v); writer.obj_counter = intern_obj_table.length; } + function recall(v) { + for (var i = 0; i < intern_obj_table.length; i++) { + if (intern_obj_table[i] === v) return intern_obj_table.length - i; + } + } + + function memo(v) { + var existing_offset = recall(v); + if (existing_offset) { writer.write_shared(existing_offset); return existing_offset; } + else store(v); + } + function extern_rec (v) { if (v instanceof Array && v[0] === (v[0]|0)) { if (v[0] == 255) { // Int64 + if (memo(v)) return; writer.write (8, 0x12 /*cst.CODE_CUSTOM*/); for (var i = 0; i < 3; i++) writer.write (8, "_j\0".charCodeAt(i)); var b = caml_int64_to_bytes (v); @@ -383,6 +404,7 @@ var caml_output_val = function (){ if (v[0] == 251) { caml_failwith("output_value: abstract value (Abstract)"); } + if (v.length > 1 && memo(v)) return; if (v[0] < 16 && v.length - 1 < 8) writer.write (8, 0x80 /*cst.PREFIX_SMALL_BLOCK*/ + v[0] + ((v.length - 1)<<4)); else @@ -391,6 +413,7 @@ var caml_output_val = function (){ writer.size_64 += v.length; if (v.length > 1) stack.push (v, 1); } else if (v instanceof MlBytes) { + if (memo(v)) return; var len = caml_ml_string_length(v); if (len < 0x20) writer.write (8, 0x20 /*cst.PREFIX_SMALL_STRING*/ + len); From af0903b23b956b7538907457038023521c735943 Mon Sep 17 00:00:00 2001 From: Shachar Itzhaky Date: Mon, 10 Jun 2019 23:34:33 +0300 Subject: [PATCH 2/7] Runtime: added test for marshal. Test sharing and cyclic data. --- compiler/tests/std_marshal.ml | 76 +++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 compiler/tests/std_marshal.ml diff --git a/compiler/tests/std_marshal.ml b/compiler/tests/std_marshal.ml new file mode 100644 index 0000000000..e97317a541 --- /dev/null +++ b/compiler/tests/std_marshal.ml @@ -0,0 +1,76 @@ +(* Js_of_ocaml tests + * http://www.ocsigen.org/js_of_ocaml/ + * Copyright (C) 2019 Hugo Heuzard + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, with linking exception; + * either version 2.1 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + *) + +(* https://github.com/ocsigen/js_of_ocaml/pull/814 *) + +let%expect_test _ = + Util.compile_and_run + {| + type sign = SPlus | SMinus + type raw_numeral = int + + type prim_token = + | Numeral of sign * raw_numeral + | String of string + + type operator_token = Add | Sub | Times | PlusPlus + + type expr = + | Literal of prim_token + | Op of operator_token * expr list + + let write_out chan v = + let start = pos_out chan in + Marshal.to_channel chan v []; + pos_out chan - start + + let _ = + let tmp_filename = Filename.temp_file "out" "txt" in + let chan = open_out tmp_filename in + let v1 = Op (Add, [Literal (Numeral (SPlus, 5)); Literal (Numeral (SMinus, 7))]) in + let v2 = Op (Times, [v1; v1]) (* shared *) in + let v1_sz = write_out chan v1 in + let v2_sz = write_out chan v2 in + flush chan; + Format.printf "sizes = %d %d (|v2| %s 2|v1|)\n%!" v1_sz v2_sz + (if v2_sz < 2 * v1_sz then "<" else ">="); + + let chan = open_in tmp_filename in + let v1' = Marshal.from_channel chan in + let v2' = Marshal.from_channel chan in + Format.printf "readback = %B %B\n%!" (v1 = v1') (v2 = v2') +|}; + [%expect {| + sizes = 33 40 (|v2| < 2|v1|) + readback = true true |}] + +(* https://github.com/ocsigen/js_of_ocaml/issues/359 *) + +let%expect_test _ = + Util.compile_and_run + {| + type loop = { mutable pointer : loop option } + let l = { pointer = None } + let () = l.pointer <- Some l + + let _ = + let s = Marshal.to_string l [] in + Format.printf "not stuck! %d\n%!" (String.length s) +|}; + [%expect {| not stuck! 24 |}] From 87085e9ca96f906e5c07be4815d678384b66275a Mon Sep 17 00:00:00 2001 From: Shachar Itzhaky Date: Tue, 11 Jun 2019 15:21:43 +0300 Subject: [PATCH 3/7] Runtime: Use WeakMap in marshal. Where available; fallback to list is used for pre-2015 envs. --- runtime/marshal.js | 54 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/runtime/marshal.js b/runtime/marshal.js index e39a43a015..d4a7051b28 100644 --- a/runtime/marshal.js +++ b/runtime/marshal.js @@ -337,10 +337,46 @@ function caml_marshal_data_size (s, ofs) { return (get32(s, ofs + 4)); } +//Provides: MlObjectTable +var MlObjectTable; +if (typeof joo_global_object.WeakMap === 'undefined') { + MlObjectTable = function() { + /* polyfill */ + function NaiveLookup(objs) { this.objs = objs; } + NaiveLookup.prototype.get = function(v) { + for (var i = 0; i < this.objs.length; i++) { + if (this.objs[i] === v) return i; + } + }; + NaiveLookup.prototype.set = function() { }; + + return function MlObjectTable() { + this.objs = []; this.lookup = new NaiveLookup(this.objs); + }; + }(); +} +else { + MlObjectTable = function MlObjectTable() { + this.objs = []; this.lookup = new joo_global_object.WeakMap(); + }; +} + +MlObjectTable.prototype.store = function(v) { + this.lookup.set(v, this.objs.length); + this.objs.push(v); +} + +MlObjectTable.prototype.recall = function(v) { + var i = this.lookup.get(v); + return (i === undefined) + ? undefined : this.objs.length - i; /* index is relative */ +} + //Provides: caml_output_val //Requires: caml_int64_to_bytes, caml_failwith //Requires: caml_int64_bits_of_float //Requires: MlBytes, caml_ml_string_length, caml_string_unsafe_get +//Requires: MlObjectTable var caml_output_val = function (){ function Writer () { this.chunk = []; } Writer.prototype = { @@ -373,19 +409,12 @@ var caml_output_val = function (){ return function (v) { var writer = new Writer (); var stack = []; - var intern_obj_table = []; + var intern_obj_table = new MlObjectTable(); - function store(v) { intern_obj_table.push(v); writer.obj_counter = intern_obj_table.length; } - function recall(v) { - for (var i = 0; i < intern_obj_table.length; i++) { - if (intern_obj_table[i] === v) return intern_obj_table.length - i; - } - } - function memo(v) { - var existing_offset = recall(v); - if (existing_offset) { writer.write_shared(existing_offset); return existing_offset; } - else store(v); + var existing_offset = intern_obj_table.recall(v); + if (existing_offset) { writer.write_shared(existing_offset); return existing_offset; } + else intern_obj_table.store(v); } function extern_rec (v) { @@ -460,7 +489,8 @@ var caml_output_val = function (){ if (i + 1 < v.length) stack.push (v, i + 1); extern_rec (v[i]); } - writer.finalize (); + writer.obj_counter = intern_obj_table.objs.length; + writer.finalize(); return writer.chunk; } } (); From 2709d6d6ecf92bd923325f2ad068ca140c756504 Mon Sep 17 00:00:00 2001 From: Shachar Itzhaky Date: Wed, 12 Jun 2019 12:46:35 +0300 Subject: [PATCH 4/7] Runtime: support Marshal flags. Only Marshal.No_sharing makes sense in this context. Marshal.Closures cannot be supported currently. Marshal.Compat_32 is meaningless since all JavaScript integers are 32 bit. --- compiler/tests/std_marshal.ml | 12 +++++++++--- runtime/io.js | 4 ++-- runtime/marshal.js | 32 +++++++++++++++++++------------- runtime/stdlib.js | 9 +++++++++ 4 files changed, 39 insertions(+), 18 deletions(-) diff --git a/compiler/tests/std_marshal.ml b/compiler/tests/std_marshal.ml index e97317a541..08cd9ce073 100644 --- a/compiler/tests/std_marshal.ml +++ b/compiler/tests/std_marshal.ml @@ -40,6 +40,11 @@ let%expect_test _ = Marshal.to_channel chan v []; pos_out chan - start + let write_out_noshare chan v = + let start = pos_out chan in + Marshal.to_channel chan v [Marshal.No_sharing]; + pos_out chan - start + let _ = let tmp_filename = Filename.temp_file "out" "txt" in let chan = open_out tmp_filename in @@ -47,9 +52,10 @@ let%expect_test _ = let v2 = Op (Times, [v1; v1]) (* shared *) in let v1_sz = write_out chan v1 in let v2_sz = write_out chan v2 in + let v2_ns_sz = write_out_noshare chan v2 in flush chan; - Format.printf "sizes = %d %d (|v2| %s 2|v1|)\n%!" v1_sz v2_sz - (if v2_sz < 2 * v1_sz then "<" else ">="); + Format.printf "sizes = %d %d %d (|v2| %s |v2_ns|)\n%!" v1_sz v2_sz v2_ns_sz + (if v2_sz < v2_ns_sz then "<" else ">="); let chan = open_in tmp_filename in let v1' = Marshal.from_channel chan in @@ -57,7 +63,7 @@ let%expect_test _ = Format.printf "readback = %B %B\n%!" (v1 = v1') (v2 = v2') |}; [%expect {| - sizes = 33 40 (|v2| < 2|v1|) + sizes = 33 40 51 (|v2| < |v2_ns|) readback = true true |}] (* https://github.com/ocsigen/js_of_ocaml/issues/359 *) diff --git a/runtime/io.js b/runtime/io.js index e351c2e63d..75fa9740a4 100644 --- a/runtime/io.js +++ b/runtime/io.js @@ -386,8 +386,8 @@ function caml_ml_output_char (chanid,c) { //Provides: caml_output_value //Requires: caml_output_value_to_string, caml_ml_output,caml_ml_string_length -function caml_output_value (chanid,v,_flags) { - var s = caml_output_value_to_string(v); +function caml_output_value (chanid,v,flags) { + var s = caml_output_value_to_string(v, flags); caml_ml_output(chanid,s,0,caml_ml_string_length(s)); return 0; } diff --git a/runtime/marshal.js b/runtime/marshal.js index d4a7051b28..76ceb088c4 100644 --- a/runtime/marshal.js +++ b/runtime/marshal.js @@ -376,7 +376,7 @@ MlObjectTable.prototype.recall = function(v) { //Requires: caml_int64_to_bytes, caml_failwith //Requires: caml_int64_bits_of_float //Requires: MlBytes, caml_ml_string_length, caml_string_unsafe_get -//Requires: MlObjectTable +//Requires: MlObjectTable, caml_list_to_js_array var caml_output_val = function (){ function Writer () { this.chunk = []; } Writer.prototype = { @@ -406,13 +406,22 @@ var caml_output_val = function (){ return this.chunk; } } - return function (v) { + return function (v, flags) { + flags = caml_list_to_js_array(flags); + + var no_sharing = (flags.indexOf(0 /*Marshal.No_sharing*/) !== -1), + closures = (flags.indexOf(1 /*Marshal.Closures*/) !== -1); + /* Marshal.Compat_32 is redundant since integers are 32-bit anyway */ + + if (closures) + joo_global_object.console.warn("in caml_output_val: flag Marshal.Closures is not supported."); + var writer = new Writer (); var stack = []; var intern_obj_table = new MlObjectTable(); - + function memo(v) { - var existing_offset = intern_obj_table.recall(v); + var existing_offset = no_sharing ? undefined : intern_obj_table.recall(v); if (existing_offset) { writer.write_shared(existing_offset); return existing_offset; } else intern_obj_table.store(v); } @@ -497,23 +506,20 @@ var caml_output_val = function (){ //Provides: caml_output_value_to_string mutable //Requires: caml_output_val, caml_string_of_array -function caml_output_value_to_string (v, _fl) { - /* ignores flags... */ - return caml_string_of_array (caml_output_val (v)); +function caml_output_value_to_string (v, flags) { + return caml_string_of_array (caml_output_val (v, flags)); } //Provides: caml_output_value_to_bytes mutable //Requires: caml_output_val, caml_string_of_array -function caml_output_value_to_bytes (v, _fl) { - /* ignores flags... */ - return caml_string_of_array (caml_output_val (v)); +function caml_output_value_to_bytes (v, flags) { + return caml_string_of_array (caml_output_val (v, flags)); } //Provides: caml_output_value_to_buffer //Requires: caml_output_val, caml_failwith, caml_blit_bytes -function caml_output_value_to_buffer (s, ofs, len, v, _fl) { - /* ignores flags... */ - var t = caml_output_val (v); +function caml_output_value_to_buffer (s, ofs, len, v, flags) { + var t = caml_output_val (v, flags); if (t.length > len) caml_failwith ("Marshal.to_buffer: buffer overflow"); caml_blit_bytes(t, 0, s, ofs, t.length); return 0; diff --git a/runtime/stdlib.js b/runtime/stdlib.js index fc213a6e12..0f881659af 100644 --- a/runtime/stdlib.js +++ b/runtime/stdlib.js @@ -1215,6 +1215,15 @@ function caml_list_of_js_array(a){ return l } +//Provides: caml_list_to_js_array const (const) +function caml_list_to_js_array(l){ + var a = []; + for(; l !== 0; l = l[2]) { + a.push(l[1]); + } + return a; +} + //Provides: caml_runtime_warnings var caml_runtime_warnings = 0; From 9ddd401992295a8805dc01bc5acfeab8ea0e1d8d Mon Sep 17 00:00:00 2001 From: Hugo Heuzard Date: Sat, 15 Jun 2019 12:41:55 +0800 Subject: [PATCH 5/7] tuning --- .../tests/{std_marshal.ml => test_marshal.ml} | 28 ++++++++++++++++--- runtime/marshal.js | 1 + 2 files changed, 25 insertions(+), 4 deletions(-) rename compiler/tests/{std_marshal.ml => test_marshal.ml} (80%) diff --git a/compiler/tests/std_marshal.ml b/compiler/tests/test_marshal.ml similarity index 80% rename from compiler/tests/std_marshal.ml rename to compiler/tests/test_marshal.ml index 08cd9ce073..c0f3ff0854 100644 --- a/compiler/tests/std_marshal.ml +++ b/compiler/tests/test_marshal.ml @@ -1,6 +1,6 @@ (* Js_of_ocaml tests * http://www.ocsigen.org/js_of_ocaml/ - * Copyright (C) 2019 Hugo Heuzard + * Copyright (C) 2019 Shachar Itzhaky * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by @@ -62,12 +62,29 @@ let%expect_test _ = let v2' = Marshal.from_channel chan in Format.printf "readback = %B %B\n%!" (v1 = v1') (v2 = v2') |}; - [%expect {| + [%expect {| sizes = 33 40 51 (|v2| < |v2_ns|) readback = true true |}] (* https://github.com/ocsigen/js_of_ocaml/issues/359 *) +let%expect_test _ = + let module M = struct + type loop = {mutable pointer : loop option} + + let l = {pointer = None} + + let () = l.pointer <- Some l + + let _ = + let s = Marshal.to_string l [] in + Format.printf "%d\n%S\n%!" (String.length s) s + end in + [%expect + {| + 24 + "\132\149\166\190\000\000\000\004\000\000\000\002\000\000\000\004\000\000\000\004\144\144\004\002" |}] + let%expect_test _ = Util.compile_and_run {| @@ -77,6 +94,9 @@ let%expect_test _ = let _ = let s = Marshal.to_string l [] in - Format.printf "not stuck! %d\n%!" (String.length s) + Format.printf "%d\n%S\n%!" (String.length s) s |}; - [%expect {| not stuck! 24 |}] + [%expect + {| + 24 + "\132\149\166\190\000\000\000\004\000\000\000\002\000\000\000\004\000\000\000\004\144\144\004\002" |}] diff --git a/runtime/marshal.js b/runtime/marshal.js index 76ceb088c4..1b2901246e 100644 --- a/runtime/marshal.js +++ b/runtime/marshal.js @@ -421,6 +421,7 @@ var caml_output_val = function (){ var intern_obj_table = new MlObjectTable(); function memo(v) { + if(no_sharing) return undefined; var existing_offset = no_sharing ? undefined : intern_obj_table.recall(v); if (existing_offset) { writer.write_shared(existing_offset); return existing_offset; } else intern_obj_table.store(v); From e5d7cd30ed2381dacccf39610b6b1822c1d6a4ac Mon Sep 17 00:00:00 2001 From: Hugo Heuzard Date: Sat, 15 Jun 2019 12:51:47 +0800 Subject: [PATCH 6/7] comment --- runtime/marshal.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/runtime/marshal.js b/runtime/marshal.js index 1b2901246e..c833ff610f 100644 --- a/runtime/marshal.js +++ b/runtime/marshal.js @@ -348,7 +348,9 @@ if (typeof joo_global_object.WeakMap === 'undefined') { if (this.objs[i] === v) return i; } }; - NaiveLookup.prototype.set = function() { }; + NaiveLookup.prototype.set = function() { + // Do nothing here. [MlObjectTable.store] will push to [this.objs] directly. + }; return function MlObjectTable() { this.objs = []; this.lookup = new NaiveLookup(this.objs); From 68e11d79e2680b01e440fa10d5bb9be7a69d48fb Mon Sep 17 00:00:00 2001 From: Shachar Itzhaky Date: Sat, 15 Jun 2019 13:53:32 +0300 Subject: [PATCH 7/7] more tuning. --- runtime/marshal.js | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/runtime/marshal.js b/runtime/marshal.js index c833ff610f..a9b35fb91d 100644 --- a/runtime/marshal.js +++ b/runtime/marshal.js @@ -341,7 +341,7 @@ function caml_marshal_data_size (s, ofs) { var MlObjectTable; if (typeof joo_global_object.WeakMap === 'undefined') { MlObjectTable = function() { - /* polyfill */ + /* polyfill (using linear search) */ function NaiveLookup(objs) { this.objs = objs; } NaiveLookup.prototype.get = function(v) { for (var i = 0; i < this.objs.length; i++) { @@ -420,13 +420,13 @@ var caml_output_val = function (){ var writer = new Writer (); var stack = []; - var intern_obj_table = new MlObjectTable(); + var intern_obj_table = no_sharing ? null : new MlObjectTable(); function memo(v) { - if(no_sharing) return undefined; - var existing_offset = no_sharing ? undefined : intern_obj_table.recall(v); - if (existing_offset) { writer.write_shared(existing_offset); return existing_offset; } - else intern_obj_table.store(v); + if (no_sharing) return false; + var existing_offset = intern_obj_table.recall(v); + if (existing_offset) { writer.write_shared(existing_offset); return true; } + else { intern_obj_table.store(v); return false; } } function extern_rec (v) { @@ -501,7 +501,7 @@ var caml_output_val = function (){ if (i + 1 < v.length) stack.push (v, i + 1); extern_rec (v[i]); } - writer.obj_counter = intern_obj_table.objs.length; + if (intern_obj_table) writer.obj_counter = intern_obj_table.objs.length; writer.finalize(); return writer.chunk; }