-
Notifications
You must be signed in to change notification settings - Fork 1
/
schemer.sch
1821 lines (1655 loc) · 59.6 KB
/
schemer.sch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
;; Copyright 2012 J. Aaron Pendergrass
;; This file is part of toy-bytecode.
;; toy-bytecode is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; toy-bytecode is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with toy-bytecode. If not, see <http://www.gnu.org/licenses/>.
;; Section 0: General Utility Functions
;;
;; These seem more or less standard lib-like but don't exist in the
;; scheme standard lib.
;; Equivalent to CL last function, given a pair if the cdr is a pair
;; or a pair with null cdr, return the it else return last of the cdr.
(define last
(lambda (l)
(if (and (not (null? l))
(pair? l)
(pair? (cdr l))
(not (null? (cdr l))))
(last (cdr l))
l)))
;; Return true if a string represents a decimal number with an
;; optional +/- prefix
(define string-is-numeric?
(lambda (str)
(let ((first-char (string-ref str 0))
(strlen (string-length str))
(test (lambda (c v) (if v (char-is-digit? c) v))))
(and (< 0 strlen)
(if (< 1 strlen)
(and
(or (char=? first-char #\-)
(char=? first-char #\+)
(char-is-digit? first-char))
(string-fold test #t str 1 (string-length str)))
(char-is-digit? first-char))))))
;; Section 1: Assembly Code Generation Helpers
;;
;; First we define symbols for all the commonly used instructions,
;; then some helper functions to generate ASM syntax for the assembler
;; executable.
;; String constants for common instructions
(define ins-push "PUSH")
(define ins-pop "POP")
(define ins-swap "SWAP")
(define ins-dup "DUP")
(define ins-rot "ROT")
(define ins-call "CALL")
(define ins-ret "RET")
(define ins-jmp "JMP")
(define ins-jtrue "JTRUE")
(define ins-end "END")
(define ins-add "ADD")
(define ins-sub "SUB")
(define ins-eq "EQ")
(define ins-lt "LT")
(define ins-stor "STOR")
(define ins-load "LOAD")
(define ins-aloc "ALOC")
(define ins-rdrr "RDRR")
(define ins-wtrr "WTRR")
(define ins-isnum "ISNUM")
(define ins-isptr "ISPTR")
(define ins-shl "SHL")
(define ins-shr "SHR")
(define ins-mul "MUL")
(define ins-bor "BOR")
(define ins-band "BAND")
;; The instructions below are all used at most once in the compiler so
;; its cheaper to include them as string literals then to clutter up
;; the environment with them.
;; (define ins-div "DIV")
;; (define ins-mod "MOD")
;; (define ins-getc "GETC")
;; (define ins-dump "DUMP")
;; (define ins-pint "PINT")
;; (define ins-pchr "PCHR")
;; (define ins-islconst "ISLCONST")
;; (define ins-ischr "ISCHR")
;; (define ins-isins "ISINS")
;; (define ins-pbin "PBIN")
;; (define ins-pblconsti "PBLCONSTI")
;; (define ins-pblvconsti "PBLVCONSTI")
;; (define ins-pbptri "PBPTRI")
;; (define ins-brk "BRK")
;; The underlying VM uses a tagged memory model that differentiates between
;; numbers, pointers, vm constants, and language constants.
;; Numeric values in the assembly must be tagged with an appropriate identifier
;; to convince the assembler to tag the cell with the appropriate type.
;; Language constants
(define false-value "FALSE")
(define true-value "TRUE")
;; Return a string containing the asm representation of a number
(define asm-number (lambda (x) (string-append "n" (if (number? x) (number->string x) x))))
; return a string containing the asm representation of a pointer
(define asm-pointer (lambda (x y) (string-append "p" (if (number? x) (number->string x) x)
"," (if (number? x) (number->string y) y)
)))
;; Return a string containing the asm representation of a language constant
(define asm-lang-const (lambda (x) (string-append "l" (if (number? x) (number->string x) x))))
;; Return a string containing the asm representation of a label reference
(define asm-label-reference (lambda (name) (string-append "@" name)))
;; Return a string containing the asm representation of a label with undefined size
(define asm-label-definition (lambda (name) (string-append ":" name)))
;; Return a string containing the asm representation of a label with defined size
(define asm-label-definition-sz (lambda (name sz)
(string-append (asm-label-definition name)
(string-append "," (number->string sz)))))
;;
;; Section 2: Assembly and Compiler Constants for Scheme-Level Datastructure
;;
;; We support three (and pretend we support four) datatypes at the
;; scheme level and we want to make sure we're able to distinguish
;; them at the bytecode level:
;; * pairs (consboxes)
;; * vectors
;; * strings
;; * symbols
;; When we create Scheme level objects (strings, vectors, symbols),
;; we store a type tag at offset zero from the object base.
(define asm-ptr-type-offset (asm-number 0))
;; Consboxes don't get the type field, we just store the car at offset
;; 0 and cdr at offset 1
(define consbox-size 2)
(define asm-consbox-size (asm-number consbox-size))
(define asm-consbox-car-offset (asm-number 0))
(define asm-consbox-cdr-offset (asm-number 1))
;; Vectors, Strings, and Symbols are all the same under the hood here,
;; except we don't actually deal with symbols.
(define vector-type-flag (asm-lang-const 2))
(define asm-vector-length-offset (asm-number 1))
(define raw-vector-elems-offset 2)
(define asm-vector-elems-offset (asm-number raw-vector-elems-offset))
(define string-type-flag (asm-lang-const 3))
(define asm-string-length-offset asm-vector-length-offset)
(define asm-string-chars-offset asm-vector-elems-offset)
;; We should actually support symbols
(define symbol-type-flag (asm-lang-const 3))
;;
;; Section 3: Label Generation
;;
;; We should probably be a bit more careful to ensure the user can't
;; possibly introduce symbols that collide with our compiler generated
;; symbols, but instead we just claim ownership of the __ prefix.
;; Check if the given string is "safe" as an assembly label. Meaning
;; all chars are alphanumeric+'_'
(define asm-safe
(lambda (s)
(list->string
(reverse
(string-fold (lambda (c acc)
(if (char-is-asm-safe? c) (cons c acc)
(cons #\_ acc))) '() s 0 (string-length s))))))
;; Keep track of how many labels we've generated, we'll append and
;; increment this every time we create a new label.
(define label-counter 0)
;; Generate a fresh label to be used in the assembly. Primarily used
;; for lambdas and constants appearing in the source code (e.g.,
;; strings or vectors). The argument `lbl` may be nil, or may be some
;; related string that the calling code would like included in the
;; generated label. This is ued to include when compiling
;; `(define foo (lambda ...))` expressions to include `foo` in the
;; label of generated for the lambda to make it slightly easier to read
;; the assembly.
(define fresh-label (lambda (lbl)
(set! label-counter (+ label-counter 1))
(string-append (if lbl
(string-append "__anonymous_"
(asm-safe lbl))
"__anonymous")
(number->string label-counter))
))
;;
;; Section 4: Special Forms Preamble
;;
;; A few builtin forms are handled specially by the compiler. Some of
;; these will later be subsumed by macro capabilities but for right
;; now they are just special voodoo.
;;
;; This list associates the symbols with special compiler
;; functions. The handlers are actually defined below with other
;; compilation functions.
(define special-forms
(lambda ()
(list
(cons "set!" compile-set!)
(cons "lambda" compile-lambda)
(cons "let" compile-let)
(cons "letrec" compile-letrec)
(cons "if" compile-if)
(cons "define" compile-define)
(cons "begin" compile-begin)
(cons "quote" compile-quote)
(cons "and" compile-and)
(cons "or" compile-or)
)
))
;; Search the list of builtin forms for a
;; particular form
(define find-special
(lambda (f)
(let ((x (assoc f (special-forms))))
(and x (cdr x)))))
;;
;; Section 5: The Environment
;;
;; The top-level-env is a list containing the list of symbols defined
;; by the compiler at the top level.
;;
;; The runtime environment is represented as a list of vectors
;; references to symbols are replaced with a traversal of this
;; structure based on the level of enclosing scope where the symbol
;; was defined, and the index of the variable in that scope. This is
;; taken directly from the SECD machine's representation of the
;; environment.
;;
;; For example suppose we have something like:
;; (((lambda (x)
;; (lambda (z) (+ x z)))
;; 5)
;; 7)
;;
;; The inner lambda (that gets returned and applied to the arg 7)
;; refers to three different symbols: '+' 'x' and 'z' that are each
;; declared at a different depth in the enclosing environment. When
;; this lambda is evaluated (with the argument 7) the environment will
;; look like:
;; ([7]
;; [5]
;; ["=" "null?" "cons" "car" "cdr" "+" ...])
;;
;; So the reference to symbol 'z' will be compiled to (vector-ref (car env) 0)
;; the reference to symbol 'x' will be compiled to (vector-ref (car (cdr env)) 0) and
;; the reference to symbol '+' will be compiled to (vector-ref (car (cdr (cdr env))) 5)
;;
;; Note: this isn't strictly accurate since the symbols 'vector-ref',
;; 'car' and 'cdr' are themselves defined in the environment and
;; would thus require lookups making this expansion impossible.
;; What really happens is that a non-closure form of the car and
;; cdr procedures are invoked directly. See the functions
;; u-call-* below.
(define top-level-env
(quote ((("equal?" "equal")
("=" "equal")
("<" "less_than")
("null?" "null_q")
("cons" "cons")
("car" "car")
("cdr" "cdr")
("+" "add")
("-" "subtract")
("*" "multiply")
("%" "modulo")
("/" "divide")
("quotient" "divide")
("remainder" "modulo")
("ash" "arithmetic_shift")
("logior" "logior")
("logand" "logand")
("print-char" "print_char")
("print-num" "print_num")
("string?" "string_q")
("number?" "number_q")
("char?" "char_q")
("pair?" "pair_q")
("read-char" "read_char")
("quit" "quit")
("set-car!" "set_car")
("set-cdr!" "set_cdr")
("make-vector" "make_vector")
("vector-ref" "vector_ref")
("vector-set!" "vector_set")
("vector-length" "vector_length")
("make-string" "make_string")
("string-set!" "vector_set")
("string-ref" "vector_ref")
("string-length" "vector_length")
("vapply" "vapply")
("char=?" "equal") ;; for characters
("char<?" "less_than") ;; for characters
("eof-object?" "eof_object_q")
("print-binary" "print_binary")
("print-lconst" "print_lconst")
("print-vconst" "print_vconst")
("print-pointer" "print_pointer")
))))
(define top-level-env-endptr (last (car top-level-env)))
(define initial-env-label "__initial_environment")
;;
;; Section 6: Assembly Output Helpers
;;
;; Functions for outputting assembly code. These get used by the
;; compilation routines to actually generate assembly.
;; Output a named literal consbox definition to the assembly stream
;; Returns the name of the consbox
(define append-named-consbox
(lambda (name car-value cdr-value)
(append-instructions
(asm-label-definition-sz name consbox-size)
car-value
cdr-value)
name))
;; Output a literal "anonymous" consbox defintion by generating a
;; fresh label. Returns the label used so that subsequent code can
;; refer to it.
(define append-consbox
(lambda (car-value cdr-value)
(append-named-consbox (fresh-label #f) car-value cdr-value)))
;; Given a list of assembly values (numbers, constants, or symbols),
;; output a literal vector definition with the same elements to the
;; assembly code.
;; Returns a fresh label genearted to refer to the vector
(define append-list-as-vector
(lambda (vec-list)
(let ((lbl (fresh-label #f)))
(append-instructions
(asm-label-definition-sz lbl (+ (length vec-list) raw-vector-elems-offset))
vector-type-flag
(asm-number (length vec-list)))
(apply append-instructions vec-list)
lbl)))
;; Append the initial environment as a literal list with one element
;; that is a vector (as described above) to the assembly stream.
;;
;; This should only be called once.
(define append-initial-env
(lambda ()
(append-named-consbox "__nil"
(asm-label-reference "__nil")
(asm-label-reference "__nil"))
(append-named-consbox
initial-env-label
(asm-label-reference
(append-list-as-vector (map (lambda (l) (asm-label-reference (cadr l))) (car top-level-env))))
(asm-label-reference "__nil"))))
;; Append an instruction to the ouptut stream
(define append-instruction
(lambda (ins)
(begin
(display ins)
(display "\n"))))
;; Append a list of instructions to the output stream
(define append-instructions
(lambda inss
(letrec ((helper (lambda (inss)
(if (null? inss) (quote ())
(begin
(append-instruction (car inss))
(helper (cdr inss)))))))
(helper inss))))
;; Section 7: Intrinsics and Compilation
;;
;; We're now actually into the guts of the compiler.
;; The conventions are as follows:
;; Functions of the form
;; - 'assembly-foo' take no arguments, append asm instructions
;; to the output stream for completing the task
;; foo, and return no useful result.
;;
;; - 'u-call-foo' serve as a wrapper around the assembly-foo
;; functions. For larger blocks of assembly code
;; the u-call-foo insert a CALL to the definitions,
;; shorter ones are inlined. Again, no useful result
;; is returned.
;;
;; - 'compile-foo' these are the main compiler functions. All of
;; these take atleast two arguments, the s-expression
;; to compile and the symbolic environment list used
;; to resolve references. Some of these take a boolean
;; 'rest' argument which is a bad hack to support tail-call
;; optimization. If 'rest' is false it means their is
;; definitely no continuation to this expression and so
;; a closure invocation can be optimized by not storing the
;; return environment and using a JMP rather than a CALL
;; (see assembly-funcall vs. assembly-tailcall below).
;; All compile-* functions must return either 0-arity function
;; or false. The 0-arity function represents work that is
;; being delayed until after the compilation of the main
;; program body, e.g., the body of lambda expressions.
;; It is vital that these return values be propagated out
;; to the main compiler loop 'do-compiler-task'
;;
;; When generating non-trivial sequences of assembly functions, we'll
;; track the stack as a list in comments with the bottom of the stack
;; as the first element and the top in the last.
;;
;; Subsection 7.1: Consbox Primitives ASM
;;
;; Assembly for the primitive list functions car, cdr, and cons
(define assembly-car (lambda ()
(append-instructions ; (pair)
ins-push asm-consbox-car-offset ; (pair car-offset)
ins-load))) ; (car)
(define assembly-cdr (lambda ()
(append-instructions ; (pair)
ins-push asm-consbox-cdr-offset ; (pair cdr-offset)
ins-load))) ; (cdr)
(define assembly-cons (lambda ()
(append-instructions ; (car cdr)
ins-push asm-consbox-size ; (car cdr 2)
ins-aloc ; (car cdr hp)
ins-push asm-consbox-cdr-offset ; (car cdr hp 1)
ins-stor ; (car hp) cdr stored
ins-push asm-consbox-car-offset ; (car hp 0)
ins-stor) ; (hp) cdr stored
))
(define assembly-set-car
(lambda ()
(append-instructions ; (value pair)
ins-push asm-consbox-car-offset ; (value pair car-offset)
ins-stor))) ; (pair)
(define assembly-set-cdr
(lambda ()
(append-instructions ; (value pair)
ins-push asm-consbox-cdr-offset ; (value pair car-offset)
ins-stor))) ; (pair)
;;
;; Subsection 7.2: Consbox Primitive Invocation
;;
;; These define how to call the primitives car, cdr, set-car, set-cdr,
;; cons, and make-vector as part of larger compiler generated
;; sequences (e.g., function application) for car, cdr, set-car, and
;; set-cdr we just inline the assembly. For cons and make-vector we
;; do a machine level call into a function.
(define u-call-car (lambda () (assembly-car))) ; car is 3 instructions, a function call is the same length
; so there is no reason not to inline it.
(define u-call-cdr (lambda () (assembly-cdr))) ; same with cdr.
(define u-call-cons ; cons is really big (13 instructions)! we'll never inline it
(lambda ()
;; (assembly-cons)
(append-instructions ins-push (asm-label-reference "__u_cons") ins-call)
))
(define u-call-set-car (lambda () (assembly-set-car)))
(define u-call-set-cdr (lambda () (assembly-set-cdr)))
(define u-call-make-vector
(lambda ()
(append-instructions
ins-push (asm-label-reference "__u_make_vector_nofill")
ins-call)))
;;
;; Section 7.3: Function Invocation and Argument Handling
;;
;; The convention is that the top of stack is the closure to apply,
;; then the arguments this is tricky. We need to cons the argument
;; list onto the closure's environment, store the existing environment
;; pointer to the stack, set the environment pointer to the new list,
;; invoke the closure's code, then restore the environment pointer on
;; return.
;;
(define assembly-make-args-helper (lambda (nr-args)
(if (= nr-args 0) #f
(begin
(append-instructions
ins-push (asm-number (+ (+ raw-vector-elems-offset nr-args) -1))
ins-stor)
(assembly-make-args-helper (- nr-args 1))))))
(define assembly-make-args (lambda (nr-args)
(append-instructions
ins-push (asm-number nr-args))
(u-call-make-vector)
(assembly-make-args-helper nr-args)))
;; Special case for referencing arguments to this function (i.e., depth = 0).
(define assembly-get-arg
(lambda (idx)
(append-instruction ins-rdrr)
(u-call-car)
(append-instructions
ins-push (asm-number (+ raw-vector-elems-offset idx))
ins-load)))
(define assembly-set-arg
(lambda (idx)
(append-instruction ins-rdrr)
(u-call-car)
(append-instructions
ins-push (asm-number (+ raw-vector-elems-offset idx))
ins-stor
ins-pop)))
(define assembly-nrargs
(lambda ()
(append-instruction ins-rdrr)
(u-call-car)
(append-instructions
ins-push asm-vector-length-offset
ins-load)))
;; Actual assembly code for performing a scheme level function
;; invocation. This is relatively long, so actual callsites will do a
;; macihne level CALL to this stub to perform the functioncall.
(define assembly-funcall (lambda ()
(append-instructions ; (args clos rp)
(asm-label-definition "__funcall_tramp")
ins-rot ; (rp args clos)
ins-dup) ; (rp args clos clos)
(u-call-car) ; (rp args clos env)
(append-instructions
ins-swap ; (rp args env clos)
ins-rot) ; (rp clos args env)
(u-call-cons) ; (rp clos (args . env)*)
(append-instructions
ins-rdrr ; (rp clos (args . env) renv)
ins-swap ; (rp clos renv (args . env) )
ins-wtrr ; (rp clos renv) rr = (args . env)
ins-rot) ; (renv rp clos)
(u-call-cdr) ; (renv rp clos-code)
(append-instruction ins-jmp)))
(define u-call-funcall
(lambda ()
(append-instructions ; (args clos)
ins-push (asm-label-reference "__funcall_tramp") ; (args clos __funcall_tramp)
ins-call ; (envptr retval)
ins-swap ; (retval envptr)
ins-wtrr))) ; (retval)
;; Tail calls are sneakier because we avoid saving the current env pointer.
(define assembly-tailcall
(lambda ()
(append-instructions
(asm-label-definition "__tailcall_tramp")
ins-dup) ; (renv rp args clos clos)
(u-call-car) ; (renv rp args clos env)
(append-instructions
ins-swap ; (renv rp args env clos)
ins-rot) ; (renv rp clos args env)
(u-call-cons) ; (renv rp clos (args . env)* )
(append-instruction ins-wtrr) ; (renv rp clos) rr = (args . env)
; note that we didn't store the current env
; this is a tail call so we'll return straight
; to the current renv/rp!
(u-call-cdr) ; (renv rp code)
(append-instruction ins-jmp) ; we jump into the call with
; (renv rp)
; on return we'll have pc = rp, and
; (renv rval) on the stack
; just as on return from non-tail call above.
))
(define u-call-tailcall
(lambda ()
(append-instructions ; (renv rp args clos)
ins-push (asm-label-reference "__tailcall_tramp") ; (renv rp args clos __tailcal_tramp)
ins-jmp))) ; never comes back
; returning is simple since cleanup is handled by the caller
(define assembly-funret (lambda () (append-instruction ins-ret)))
;; Assembly for loading a cell from the environment.
;; assembly-env-cell places the cons box whose car is at the desired
;; offsets on the stack. assembly-env-val actually loads the value.
(define assembly-env-vec
(lambda (depth)
(append-instructions
ins-rdrr ; (env)
ins-push (asm-number depth) ; (env d)
ins-push (asm-label-reference "__u_nth_cell") ; (env d u_nth)
ins-call)
(u-call-car)))
(define assembly-env-val
(lambda (env-length depth idx)
(if (= env-length (+ depth 1)) ;; getting something from top-level-env
(begin
(append-instructions
ins-push (asm-label-reference initial-env-label))
(u-call-car)
(append-instructions
ins-push (asm-number (+ raw-vector-elems-offset idx))
ins-load))
(if (= depth 0)
(assembly-get-arg idx)
(begin
(assembly-env-vec depth)
(append-instructions
ins-push (asm-number (+ raw-vector-elems-offset idx))
ins-load))))))
(define assembly-set-env-val
(lambda (env-length depth idx)
(if (= env-length (+ depth 1))
(begin
(append-instructions
ins-push (asm-label-reference initial-env-label))
(u-call-car)
(append-instructions
ins-push (asm-number (+ raw-vector-elems-offset idx))
ins-stor))
(begin
(assembly-env-vec depth)
(append-instructions
ins-push (asm-number (+ raw-vector-elems-offset idx)) ins-stor)))))
(define assembly-nil
(lambda ()
(append-instructions ins-push (asm-label-reference "__nil") )))
;;
;; Subsection 7.3: Symbolic Environment
;;
;; As noted above, the runtime environment is indexed by integers
;; corresponding to when things are declared. The compiler tracks the
;; "symbolic environment" mapping between symbol names and their index
;; in the runtime environment. These routines deal with that mapping.
;;
;; Unlike the runtime environment, the symbolic environment is a list
;; of lists (rather than a list of vectors. Symbols from the most
;; local scope are stored in the car of the list working out to the
;; top-level env. The 'depth' of a symbol is the index of its scsope
;; in the environment, while the 'offset' is the index within this
;; scope.
;; Lookup the index of a symbol in an list of symbols return #f if not
;; found.
;; * r is the symbol name we're looking up
;; * e is the environment to search in
;; * cont is the continuation to pass the offset to
;;
;; For some reason this handles the case where the element in the list
;; is a list whose car is the symbol we're looking for. Not sure if
;; this is important.
(define lookup-reference-offset
(lambda (r e cont)
(if (null? e) (cont #f)
(if (string=? r (if (list? (car e)) (car (car e)) (car e)))
(cont 0)
(lookup-reference-offset r (cdr e)
(lambda (z)
(cont (if z (+ z 1) z))))))))
;; Lookup the depth and offset of a symbol name in the
;; environment. Arguments are the same as for lookup-reference-offset
;; except that `e` is the full (or a tail of) the symbolic
;; environment.
;;
;; Return value is either a conspair of (depth . offset) or #f
(define lookup-reference-depth
(lambda (r e cont)
(if (null? e) (cont #f)
(lookup-reference-offset r (car e)
(lambda (z)
(cont
(if z
(cons 0 z)
(lookup-reference-depth r (cdr e)
(lambda (w)
(if w
(cons (+ (car w) 1) (cdr w))
#f))))))))))
(define lookup-reference
(lambda (r e)
(lookup-reference-depth r e (lambda (x) x))))
;;
;; Subsection 7.4: Actually Compiling Stuff
;;
;; The structure/flow of the compilation process is a little
;; convoluted to ensure all (define ...) forms are introduced before
;; their bodies are compiled to allow them to refer to eachother.
;;
;; * compiler-run that reads sexps (via the reader) until EOF is
;; hit and calls compile-sexp on each
;; * compile-sexp takes a sexp, compiles it, and returns a 0-arity
;; function for any deferred compilation tasks (e.g., lambda
;; bodies). Actual compilation is performed by various
;; `compile-foo` functions specializing on different forms
;; (lists, numbers, special forms defined above, etc...)
;;
;; * do-compile-task is used to recursively evaluate a compilation
;; task expressed as a 0-arity function that returns either a new
;; compilation task or #f on completion.
;;
;; do-compile-task is the main compiler loop. it takes a 0-arity
;; function to invoke (or false), and recurs on the result of invoking
;; the function.
(define do-compile-task
(lambda (t) (if t (do-compile-task (t)) #f)))
(define compile-number
(lambda (c env) (append-instructions ins-push (asm-number c)) #f))
(define calculate-string-list-length
(lambda (strl n)
(if (null? strl) n
(calculate-string-list-length
(if (char=? (car strl) #\\) (cdr (cdr strl)) (cdr strl))
(+ n 1)))))
(define calculate-string-length
(lambda (str)
(calculate-string-list-length (string->list str) -2)))
(define compile-string
(lambda (s env)
(let ((strlabel (fresh-label #f))
(strlen (calculate-string-length s)))
(append-instructions ins-push (asm-label-reference strlabel))
(lambda ()
(append-instructions
(asm-label-definition-sz strlabel (+ strlen 2))
string-type-flag
(asm-number strlen)
s)
#f))))
(define calculate-symbol-length
(lambda (s) 0))
(define compile-symbol
(lambda (s env)
(let ((symlabel (fresh-label #f))
(symlen (calculate-symbol-length s)))
(append-instructions ins-push (asm-label-reference symlabel))
(lambda ()
(append-instructions
(asm-label-definition-sz symlabel (+ symlen 2))
symbol-type-flag
(asm-number symlen)
(string-append "\"" (string-append s "\"")))
#f))))
; this doesn't handle escaped chars except newline, tab, quote, double quote and backslash
(define compile-char
(lambda (s env)
(append-instructions
ins-push
(string-append "'"
(string-append
(if (string=? s "#\\tab")
"\\t"
(if (string=? s "#\\newline")
"\\n"
(if (string=? s "#\\\\")
"\\\\"
(if (string=? s "#\\'")
"\\'"
(if (string=? s "#\\\"") "\\\""
(if (string=? s "#\\space")
" "
(substring s 2 3)))))))
"'"))) #f))
(define compile-reference
(lambda (r env)
(let ((i (lookup-reference r env)))
(if i
(begin
(append-instruction (string-append ";; Resolving symbol " r))
(assembly-env-val (length env) (car i) (cdr i))
(append-instruction (string-append ";; Resolved symbol " r)))
;; this is an error
(begin
;; this should really write to stderr.
(display (string-append "Undefined symbol: " r))
(newline)
(quit))
)
#f
)))
(define compile-atom
(lambda (x env quoted)
(if (string=? x "#t")
(begin (append-instructions ins-push true-value) #f)
(if (string=? x "#f")
(begin (append-instructions ins-push false-value) #f)
(if (string-is-numeric? x)
(compile-number x env)
(if (char=? (car (string->list x)) #\")
(compile-string x env)
(if (char=? (car (string->list x)) #\#)
(compile-char x env)
(if (string=? "nil" x)
(begin (assembly-nil) #f)
(if quoted
(compile-symbol x env)
(compile-reference x env))))))))))
;; Return the prefix of a list that is a "proper" list. e.g., given
;; `(1 2 3 . 4)` returns `(1 2 3)`
(define list-part
(lambda (l)
(letrec ((helper (lambda (l acc)
(if (pair? l)
(helper (cdr l) (cons (car l) acc))
(reverse acc)))))
(helper l '()))))
(define process-params
(lambda (plist)
(if (list? plist)
(begin ;; (display "plist is list")
;; (newline)
plist)
(let ((fixed-params (list-part plist))
(variadic-param (if (pair? plist) (cdr (last plist)) plist)))
(let ((nr-fixed-params (length fixed-params)))
(append-instructions
ins-push (asm-number nr-fixed-params)
ins-push (asm-label-reference "__u_make_varargs_list")
ins-call
ins-pop)
(append fixed-params (list variadic-param)))))))
; Hm, we should probably be flagging code pointers with something
; so that we can avoid gc'ing them. Right now the VM just assumes the
; code is statically defined below initial heap pointer but in order
; to support eval we'll have to do something more clever later.
(define compile-lambda
(lambda (l env rest lbl)
(let ((label (fresh-label lbl)))
(append-instructions
ins-rdrr ins-push (asm-label-reference label) )
(u-call-cons)
(lambda ()
(append-instruction (asm-label-definition label))
(let ((r (compile-sequence (cddr l)
(cons
(process-params (cadr l))
env) #f)))
(assembly-funret)
r)))))
(define compile-let-bindings
(lambda (bs env)
(if (null? bs) #f
(let ((r2 (compile-sexp (car (cdr (car bs))) env #t #f)))
(let ((r1 (compile-let-bindings (cdr bs) env)))
(lambda ()
(do-compile-task r1)
(do-compile-task r2)))))))
(define compile-let
(lambda (l env rest lbl)
(let ((r1 (compile-let-bindings (car (cdr l)) env))
(e (map (lambda (x) (car x)) (car (cdr l)))))
(assembly-make-args (length (cadr l)))
(append-instruction ins-rdrr)
(u-call-cons)
(append-instruction ins-wtrr)
(let ((r2 (compile-sequence (cdr (cdr l)) (cons e env) rest)))
(if rest
(begin
(append-instruction ins-rdrr)
(u-call-cdr)
(append-instruction ins-wtrr))
#f)
(lambda ()
(do-compile-task r1)
(do-compile-task r2))
))))
(define compile-set!
(lambda (l env rest lbl)
(let ((cell-id (lookup-reference (cadr l) env)))
(let ((r (compile-sexp (caddr l) env #t #f)))
(assembly-set-env-val (length env) (car cell-id) (cdr cell-id))
r))))
(define compile-letrec
(lambda (l env rest lbl)
(letrec ((empty-binders (map (lambda (b) (list (car b) (list "quote" '())))
(cadr l)))
(helper (lambda (binders body)
(if (null? binders) body
(helper (cdr binders)
(cons (cons "set!" (car binders))
body))))))
(compile-sexp
(cons "let"
(cons empty-binders
(helper (reverse (cadr l))
(cddr l))))
env rest #f))))
(define compile-begin
(lambda (l env rest lbl) (compile-sequence (cdr l) env rest)))
(define compile-sequence
(lambda (l env rest)
(if (null? l) #f
(let ((r1 (compile-sexp (car l) env (if (null? (cdr l)) rest #t) #f)))
(if (not (null? (cdr l)))
(append-instruction ins-pop)
#f
)
(let ((r2 (compile-sequence (cdr l) env rest)))
(lambda ()
(do-compile-task r1)
(do-compile-task r2)
))))))
; define is really sneaky in that it has to modify
; the environment (both symbolic and the non) so that
; whatever symbol is being defined can be referenced in
; lambda bodies that were declared previously (which is
; part of why lambda body compilation is delayed until
; after the main compilation). This involves using set-car!
; to modify both environment pointers such that
; (car post-env) == (cons v (car pre-env))
; where v is the value of the defined symbol and pre-env and
; post-env are the environments before and after the call.
(define compile-define
(lambda (l env rest lbl)
(append-instruction (string-append ";; Definition of " (car (cdr l))))
(let ((v (lookup-reference (car (cdr l)) env))
(r (compile-sexp (car (cdr (cdr l))) env #t (car (cdr l)))))
(if v
(begin
(append-instruction (string-append ";; Updating binding " (car (cdr l))))
(assembly-set-env-val (length env) (car v) (cdr v)))
(begin
(assembly-set-env-val (length env) (- (length env) 1) (length (car top-level-env)))
(set-cdr! top-level-env-endptr
(cons (list (car (cdr l)) "__nil")
(cdr top-level-env-endptr)))
(set! top-level-env-endptr (cdr top-level-env-endptr))))
(append-instruction ins-pop)
r)))
(define compile-and
(lambda (l env rest lbl)
(let ((out-label (fresh-label #f)))
(letrec ((helper (lambda (es rs)
(let ((r (compile-sexp (car es) env #t #f))
(es (cdr es)))
(if (null? es)
(begin
(append-instruction (asm-label-definition out-label))
(lambda () (do-compile-task r) (rs)))
(begin
(append-instructions ins-dup
ins-push false-value
ins-eq
ins-push (asm-label-reference out-label)
ins-jtrue