8.3

Homework 13

home work!

Programming Language ISL+

Due Date Friday 12/10 at 9:00pm (Week 13)

Purpose To extend the type checker and explore StackLang and compilers. This is the complete assignment, including Extra Credit.

Graded Exercises

Type Checker

We are starting from code used in class:

; SIMPLE TYPED LANGUAGE
 
; An Expression is a:
; - Number
; - Boolean
; - (list AopName Expression Expression)
; - (list BopName Expression Expression)
; - (list CmpopName Expression Expression)
; - (list 'if Expression Expression Expression)
; - (list 'var Symbol)
; - (list 'lam Symbol Type Expression)
; - (list 'app Expression Expression)
 
(define AOPS '(+ - *))
; An AopName is a member of AOPS, all of which have type: Number Number -> Number
(define BOPS '(and or))
; An BopName is a member of BOPS, all of which have type: Boolean Boolean -> Boolean
(define CmpOPS '(> < =))
; An CmpopName is a member of CmpOPS, all of which have type: Number Number -> Boolean
 
; A Type is one of:
; - 'Number
; - 'Boolean
(define-struct funty [arg ret])
; - (make-funty Type Type)
 
(define (Fun ta tr) (make-funty ta tr))
 
; ensuretype : Environment Expression Type -> Boolean
; Check that expression e has type t, error if e's type does not match t
(define (ensuretype env e t)
  (local ((define ty-e (typecheck-env env e)))
    (if (equal? ty-e t)
        #true
        (error "Expression " e " has type " ty-e " but was expected to have type " t))))
 
(define-struct var:ty [var ty])
; An Environment is a [List of (make-var:ty Symbol Type)]
; Interp: A mapping from variables to types
 
; typecheck-env : Environment Expression -> Type
; returns the type of the expression e in environment env, or errors if the expression is ill-typed
; ACCUM: env is the accumulator which keeps track of all var:ty lambda-bindings encountered so far
(define (typecheck-env env e)
  (cond [(number? e) 'Number]
        [(boolean? e) 'Boolean]
        [(member? (first e) AOPS)
         (local [(define ty-1 (ensuretype env (second e) 'Number))
                 (define ty-2 (ensuretype env (third e) 'Number))]
           'Number)]
        [(member? (first e) BOPS)
         (local [(define ty-1 (ensuretype env (second e) 'Boolean))
                 (define ty-2 (ensuretype env (third e) 'Boolean))]
           'Boolean)]
        [(member? (first e) CmpOPS)
         (local [(define ty-1 (ensuretype env (second e) 'Number))
                 (define ty-2 (ensuretype env (third e) 'Number))]
           'Boolean)]
        [(symbol=? (first e) 'if)
         (local [(define ty-0 (ensuretype env (second e) 'Boolean))
                 (define ty-1 (typecheck-env env (third e)))
                 (define ty-2 (typecheck-env env (fourth e)))]
           (if (equal? ty-1 ty-2)
               ty-1
               (error "Branches of if expression " e "do not match")))]
        [(symbol=? (first e) 'var)
         (lookup-env env (second e))]
        [(symbol=? (first e) 'lam)
         (Fun (third e)
          (typecheck-env (upd-env env (second e) (third e))
                         (fourth e)))]
        [(symbol=? (first e) 'app)
         (local [(define ty-e1 (typecheck-env env (second e)))
                 (define ty-e2 (typecheck-env env (third e)))]
           (if (not (funty? ty-e1))
               (error "Trying to apply a non-function expression " (second e) " of type" ty-e1)
               (if (equal? (funty-arg ty-e1) ty-e2)
                   (funty-ret ty-e1)
                   (error "Trying to apply a function of type " ty-e1 " to an argument of type " ty-e2))))]))
 
; upd-env : Environment Symbol Type -> Environment
; Adds a variable:type binding to the environment env
(define (upd-env env var ty)
  (cons (make-var:ty var ty) env))
 
; lookup-env : Environment Symbol -> Type
; lookup the type associated with var in env
(define (lookup-env env var)
  (cond [(empty? env) (error "Variable not found " var)]
        [(cons? env) (if (symbol=? (var:ty-var (first env)) var)
                         (var:ty-ty (first env))
                         (lookup-env (rest env) var))]))
 
(check-error (lookup-env '() 'x))
(check-expect (lookup-env (list (make-var:ty 'a 'Number)
                                (make-var:ty 'c 'Boolean))
                          'c)
              'Boolean)
(check-expect (lookup-env (list (make-var:ty 'a 'Number)
                                (make-var:ty 'c 'Boolean)
                                (make-var:ty 'a 'Boolean))
                          'a)
              'Number)
 
; typecheck : Expression -> Type
; returns the type of the expression e or errors if the expression is ill-typed
(define (typecheck e)
  (typecheck-env empty e))
 
(check-expect (typecheck 1) 'Number)
(check-expect (typecheck #false) 'Boolean)
(check-expect (typecheck (list '+ 1 2)) 'Number)
(check-expect (typecheck (list '- (list '+ 1 2) 5)) 'Number)
(check-error (typecheck (list '+ #false 2)))
 
(check-error (typecheck (list 'if 1 #true #false)))
(check-error (typecheck '(if (> 3 9) 1 #true)))
(check-expect (typecheck '(if (> 3 9) 1 4)) 'Number)
(check-expect (typecheck '(and #false (> 2 2))) 'Boolean)
(check-error (typecheck '(and #false (+ 2 2))))

For reference, here are the typing rules we showed on the board for variables, lambda, and application:

 Γ(x) = t
---------- Var
Γ  x : t
 
 Γ,x:t1  e : t2
--------------------- Lam
Γ  λx:t1.e : t1  t2
 
Γ  e1 : t1  t2   Γ  e2 : t1
------------------------------ App
        Γ  e1 e2 : t2

Exercise 1 Your first task is to write tests for typecheck that involve typechecking lambda and application expressions. Provide at least four tests for each.

Exercise 2 A pair can be represented in our expression language as (e1, e2). where we can get the two components (first and second, respectively) with fst and snd. Pairs should have the pair type t1 x t2.

t ::= ... | t1 x t2 e ::= ... | (e1, e2) | fst e | snd e

We can write typing rules for all three expressions as follows:

Γ  e1 : t1   Γ  e2 : t2
------------------------- Pair
   Γ  (e1,e2) : t1 x t2
 
Γ  e : t1 x t2
--------------- Fst
Γ  fst e : t1
 
Γ  e : t1 x t2
--------------- Snd
Γ  snd e : t2
 

Extend the expression data definition, type data definition, and typechecker to handle pairs, fst, and snd.

A common type that we use in our signatures is an enumeration, e.g.,

; A Pet is one of
; - Dog
; - Cat

If Dog and Cat were themselves types, we said we could represent Pet as [Union Dog Cat]. This is an example of an "untagged" union, because there is no value (or tag) that allows us to distinguish that a given Dog is indeed a Pet. A more disciplined approach results in tagged unions (which we will call Sum types and write t1 + t2), which we will add in a restricted form to our language. We will add:

t ::= ... | t1 + t2 e ::= ... | inleft(t1,t2) e | inright(t1,t2) e | case e of inleft x => e1, inright y => e2

Where inleft and inright are how we construct values of our sum type, and case is how we use a value that has type t1 + t2. When evaluating a case expression, if the expression e evaluates to an inleft(t1,t2) v, we will bind v to x and evaluate e1, whereas if e evaluates to an inright v, we will bind v to y and evaluate e2, and the resulting types of e1 and e2 must be the same (though they can depend on values of different types: t1 and t2 respectively). Note that x and y here are placeholders for any variable, which can then we used in the corresponding branch.

NOTE: Since when you have inleft e (same for inright), you don’t know what the other type is, we explicitly annotate with both types (that’s what we write in the parentheses). In a language with type inference, writing these would not be necessary, but that would require a more complicated type checker.

The typing rules for inleft, inright, and case are as follows:

       Γ  e : t1
---------------------- Inleft
Γ  inleft(t1,t2) e : t1 + t2
 
       Γ  e : t2
----------------------- Inright
Γ  inright(t1,t2) e : t1 + t2
 
Γ  e : t1 + t2    Γ,x:t1  e1 : t    Γ,y:t2  e2 : t
----------------------------------------------------- Case
Γ  case e of inleft x => e1, inright y => e2 : t
 

Here in an example of how case evaluates:

case (inleft(Num,Bool) 10) of inleft x => x + 2, inright y => if y 3 4
====>
x=10 in x + 2 (this is a fake step just to show the substitution of x)
====>
10 + 2
====>
12
case (inright(Num,Bool) true) of inleft x => x + 2, inright y => if y 3 4
====>
y=true in if y 3 4 (this is a fake step just to show the substitution of y)
====>
if true 3 4
====>
3

Exercise 3 Extend the expression data definition and the type-checker to handle sum types.

You may have noticed that handling sum types shares some aspects with how if was handled. In fact, you can implement booleans in terms of a sum type where the actual values are ignored (as the important part is whether the boolean is an inleft or an inright, not the values that are carried). A helpful value to introduce in this case is the so-called "unit" value, which is called void in some languages. This value has the type Unit, and it is the only value of that type. In that sense, it carries no interesting information.

We can write the unit value as unit and its type as Unit, as follows:

t ::= ... | Unit e ::= ... | unit

And the typing rule is similarly uninteresting:

---------------
Γ  unit : Unit

Exercise 4 Extend the source language with cases for unit, and extend the typechecker to handle unit.

Exercise 5 Show how booleans can be translated into values of type Unit + Unit, and how an if expression can be translated into a case expression. Please write an expression for each of the following cases that does not use booleans or if, but captures the same meaning:

true
false
if e0 then e1 else e2

StackLang and Compilers

Here is part of an interpreter for StackLang, which we started working on in class. We’ve extended that code below, adding support for interpreting call and lam instructions, as well as the substitution that is required to deal with lam. We will go over the design of interp-call, interp-lam, and substitute (and substitute helpers) in class on Monday. Look over the code over the weekend and bring your questions to class.

; 
; StackLang ;;
; 
 
; P ::= (i-1,...,i-n)
; i ::= push v | sub | add | mul | if0 P P | call | lam x.P
; v ::= 'x | n | thunk P
; 
; S ::= (v-1, ..., v-n)  (where v-i are closed values)
; 
; (P,S) ==> S'    (corresponds to interp)
; 
; (i,P,S) ==> S'  (corresponds to interp-instr)
 
; A Program is a [List-of Instruction]
 
; An Instruction is a:
; - (list 'push Value)
; - (list 'sub)
; - (list 'add)
; - (list 'mul)
; - (list 'if0 Program Program)
; - (list 'call)
; - (list 'lam Symbol Program)
 
; A Value is a:
; - Symbol
; - (list 'num Number)
; - (list 'thunk Program)
 
; A Stack is a [List-of ClosedValue]
 
; A ClosedValue is a Value with no free variables.
 
; 
; INTERPRETER ;;
; 
 
; interp : Program Stack -> Stack
; runs a program with a given starting stack, producing the final stack or erroring.
(define (interp prog stk)
  (cond [(empty? prog) stk]
        [(cons? prog) (interp-instr (first prog) (rest prog) stk)]))
 
(check-expect (interp (list '(push (num 2)))
                      (list))
              (list '(num 2)))
(check-expect (interp (list '(push (num 2)) '(push (num 1)) '(sub))
                      (list))
              (list '(num -1)))
(check-error (interp '((push x))
                     (list)))
 
(check-expect (interp (list '(push (thunk ((push (num 1))))) '(call))
                      (list))
              (list '(num 1)))
 
(check-expect (interp (list '(push (num 0)) '(if0 ((push (num 10))) ((push (num 20)))))
                      (list))
              (list '(num 10)))
 
(define p1 (list '(push (num 1)) '(push (thunk ((lam x ((push x)))))) '(call)))
(check-expect (interp p1 (list))
              (list '(num 1)))
(define p2 (list '(push (num 2)) '(push (num 1)) '(push (num 3)) '(push (thunk ((lam x ((sub)))))) '(call)))
(check-expect (interp p2 (list))
              (list '(num -1)))
 
; interp-instr : Instruction Program Stack -> Stack
; runs an instruction with a stack and rest of program, producing the final stack or erroring.
(define (interp-instr i prog stk)
  (cond
    [(symbol=? (first i) 'push)
     (if (symbol? (second i))
         (error "Trying to interp a free variable: " (second i))
         (interp prog (cons (second i) stk)))]
    [(symbol=? (first i) 'sub) (interp-sub prog stk)]
    [(symbol=? (first i) 'add) (interp-add prog stk)]
    [(symbol=? (first i) 'mul) (interp-mul prog stk)]
    [(symbol=? (first i) 'if0) (interp-if0 (second i) (third i) prog stk)]
    [(symbol=? (first i) 'call) (interp-call prog stk)]
    [(symbol=? (first i) 'lam) (interp-lam (second i) (third i) prog stk)]))
 
(check-expect (interp-instr '(sub)
                            (list)
                            '((num 1) (num 2)))
              (list '(num -1)))
(check-expect (interp-instr '(push (num 1))
                            (list '(sub))
                            '((num 1)))
              (list '(num 0)))
(check-expect (interp-instr '(push (num 1))
                            (list)
                            '((num 1)))
              '((num 1) (num 1)))
 
 
; interp-sub : Program Stack -> Stack
; applies subtraction and then continues running the rest of the program on resulting stack
(define (interp-sub prog stk)
  ; DEFINE ME!
  stk)
 
; interp-add : Program Stack -> Stack
; applies addition and then continues running the rest of the program on resulting stack
(define (interp-add prog stk)
  ; DEFINE ME!
  stk)
 
; interp-mul : Program Stack -> Stack
; applies multiplication and then continues running the rest of the program on resulting stack
(define (interp-mul prog stk)
  ; DEFINE ME!
  stk)
 
; interp-call : Program Stack -> Stack
; pops a Program off the top of the stack and continues running the program, erroring if no thunk.
(define (interp-call prog stk)
  (cond
    [(< (length stk) 1)
     (error "Could not apply 'call, as the stack was empty.")]
    [(or (not (list? (first stk)))
         (not (equal? 'thunk (first (first stk)))))
     (error "Could not apply 'call, as the top of the stack was not a thunk.")]
    [else (interp (append (second (first stk)) prog)
                  (rest stk))]))
 
(check-expect (interp-call (list)
                           (list (list 'thunk '((push (num 1))))))
              (list '(num 1)))
(check-expect (interp-call (list)
                           (list (list 'thunk '((sub))) '(num 2) '(num 1)))
              (list '(num 1)))
 
; interp-if0 : Program Program Program Stack -> Stack
; pops a number off the stack;
; if number is 0, run thn Program followed by prog on the resulting stack,
; otherwise run els Program and then prog on the resulting stack;
; error if no number on top of stack.
(define (interp-if0 thn els prog stk)
 ; DEFINE ME!
  stk)
 
; -
; interp-lam : Symbol Program Program Stack -> Stack
; pops a value from the stack, substitutes it for x in lambda body,
; and runs body followed by prog on the rest of the stack
(define (interp-lam x body prog stk)
  (cond
    [(< (length stk) 1)
     (error "could not apply 'lam, as the stack was empty")]
    [else (interp (append (substitute x (first stk) body) prog)
                  (rest stk))]))
 
(check-expect (interp-lam 'x (list '(push x)) '()
                          (list '(num 1)))
              (list '(num 1)))
 
; substitute : Symbol ClosedValue Program -> Program
; substitutes free occurrences of x with v in prog
(define (substitute x v prog)
  (cond [(empty? prog) prog]
        [(cons? prog) (cons (substitute-instr x v (first prog)) (substitute x v (rest prog)))]))
 
(check-expect (substitute 'x '(num 1) (list '(push x) '(push (num 2))))
              (list '(push (num 1)) '(push (num 2))))
(check-expect (substitute 'x '(num 1) (list '(push x) '(lam x ((push x)))))
              (list '(push (num 1)) '(lam x ((push x)))))
 
; substitute-instr : Symbol ClosedValue Instruction -> Instruction
; substitutes free occurrences of x with v in instruction i
(define (substitute-instr x v i)
  (cond
    [(symbol=? (first i) 'push)
     (list 'push (substitute-val x v (second i)))]
    [(symbol=? (first i) 'sub) i]
    [(symbol=? (first i) 'mul) i]
    [(symbol=? (first i) 'add) i]
    [(symbol=? (first i) 'if0)
     (list 'if0 (substitute x v (second i))
           (substitute x v (third i)))]
    [(symbol=? (first i) 'call) i]
    [(symbol=? (first i) 'lam)
     (if (not (symbol=? (second i) x))
         (list 'lam (second i) (substitute x v (third i)))
         i)]))
 
(check-expect (substitute-instr 'x '(num 1) '(push x))
              '(push (num 1)))
(check-expect (substitute-instr 'x '(num 1) '(lam x ((push x))))
              '(lam x ((push x))))
 
; substitute-val : Symbol ClosedValue Value -> Value
; substitutes free occurrences of x with v in original value v0
(define (substitute-val x v v0)
  (cond [(symbol? v0)
         (if (symbol=? x v0) v v0)]
        [(symbol=? (first v0) 'num) v0]
        [(symbol=? (first v0) 'thunk) (list 'thunk (substitute x v (second v0)))]))

Exercise 6 In the above interpreter, we are missing the implementation and test cases for the 'sub,'add,'mul, and 'if0 cases. Fill in the definitions and add appropriate test cases.

Exercise 7 When writing programs in stack languages (or, more likely, when generating stack programs, as they are less likely to be written by hand), there are certain common stack "bookkeeping" functions that we want to be able to use. Please implement the following as StackLang Programs (i.e., a [List-of Instruction]). These should not handle errors (i.e., if they are applied when there are not enough values on the stack, the interpreter will report the error).

; dup : Program
; dup duplicates the top value on the stack,
; transforming a stack from v1,v2,v3... to v1,v1,v2,v3...
(define dup ...)
 
; drop : Program
; drop drops the top value from the stack,
; transforming a stack from v1,v2,v3... to v2,v3...
(define drop ...)
 
; swap : Program
; swap swaps the top two values on the stack,
; transforming a stack from v1,v2,v3... to v2,v1,v3...
(define swap ...)

Exercise 8 In Exercise 2 above you added pairs to the Simply Typed Language, and we would like to add them as values to StackLang as well.

Please extend the Value and ClosedValue definitions and add a new instruction unwrap which expects a pair to be on the top of the stack, removes it, and pushes the two elements of the pair (right _below_ left) onto the stack. Update any other aspects of the interpreter (in particular, substitution) that you need.

Exercise 9 Implement a StackLang Program (call it diverge), that does not terminate when we run it with interp on an empty stack.
; diverge : Program
; diverge runs forever when run with an empty stack.
(define diverge ...)

Exercise 10 (EXTRA CREDIT)

The Extra Credit involves extending the compiler we developed in lecture.

In Exercise 2 and 3 above, you extended the Simply Typed Language with pair and sum types along with expressions for pair, fst, snd, inleft, inright, and case. We want to be able to use these expressions in the programs that we write in the Simply Typed Language, which means we need to be able to compile them to StackLang so that we can run them with our interpreter.

First, extend StackLang with one final instruction, lt, which expects two numbers at the top of the stack, removes them, and if the first number from the top of the stack is less than the second number, it pushes 1 onto the stack, otherwise it pushes 0 onto the stack.

Below is the partial code for compile. Your next task is to complete the function: specifically, complete the cases for compling BOPS, CmpOPS, pair, fst, snd, inleft, inright, and case expressions into StackLang, and write corresponding test cases. You may find unwrap useful when compiling some of these expressions. To understand the behavior of case, please re-read the section before Exercise 3 above.

; An Expression is a:
; - Number
; - Boolean
; - (list AopName Expression Expression)
; - (list BopName Expression Expression)
; - (list CmpopName Expression Expression)
; - (list 'if Expression Expression Expression)
; - (list 'var Symbol)
; - (list 'lam Symbol Type Expression)
; - (list 'app Expression Expression)
 
; compile : Expression -> Program
; translates a Simply Typed Expression into a StackLang Program
(define (compile e)
  (cond [(number? e) `((push (num ,e)))]
        [(boolean? e) `((push (num ,(if e 1 0))))] ; true compiles to 1, false compiles to 0
        [(member? (first e) AOPS)
         (append (compile (third e))
                 (compile (second e))
                 (list (compile-aop (first e))))]
        [(symbol=? (first e) 'if) (append
                                   (compile (second e)) ; second e is bool
                                  `((if0
                                    ,(compile (fourth e))     ; else branch of e
                                    ,(compile (third e)))))]  ; then branch of e
        [(member? (first e) BOPS) ...]
        [(member? (first e) CmpOPS) ...]
        [(symbol=? (first e) 'var) `((push ,(second e)))]
        [(symbol=? (first e) 'lam) `((push
                                      (thunk
                                       ((lam ,(second e)
                                             ,(compile (fourth e)))))))]
        [(symbol=? (first e) 'app) (append (compile (third e))
                                           (compile (second e))
                                           '((call)))]
         [(symbol=? (first e) 'pair) ...]
         [(symbol=? (first e) 'fst) ...]
         [(symbol=? (first e) 'snd) ...]
         [(symbol=? (first e) 'inleft) ...]
         [(symbol=? (first e) 'inright) ...]
         [(symbol=? (first e) 'case) ...]))
 
; compile-aop : AopName -> Instruction
; translates an AopName into the corresponding instruction
(define (compile-aop aop)
  (cond [(symbol=? aop '+) '(add)]
        [(symbol=? aop '-) '(sub)]
        [(symbol=? aop '*) '(mul)]))