Lecture 4.2: Generic Testing via Polymorphism

Last class, we saw polymorphism, which allows us to implement functions that work for different types of inputs:

let rec map (f : 'a -> 'b) (xs : 'a list) : 'b list =
    match xs with
    | [] -> []
    | x :: xs' -> f x :: map f xs'

let ys = map (fun x -> if x then 1 else 2) [false; true; true] (* OK *)
let zs = map (fun x -> float_of_int (x + 1)) [0; 1; 2] (* OK *)

We also saw type parameters for data types, that allows us to talk about trees in general:

type 'a tree = 
    | Leaf of 'a
    | Node : 'a tree * 'a tree

type 'a my_option = 
    | MyNone
    | MySome of 'a

type 'a my_list = 
    | MyNil
    | MyCons of 'a * 'a my_list

(* Using multiple type variables: use tuple syntax *)
type ('a, 'e) result = 
    | Ok of 'a
    | Err of 'e

By combining these two ideas, a very powerful programming pattern emerges. Suppose you want to implement a set data structure holding strings. Thus, you will choose a particular underlying representation:

type my_set = 
    | Leaf 
    | Node of my_map * string * my_map 


let ms_mem : my_set -> string -> bool = ...
let ms_insert : my_set -> string -> my_set = ...
let ms_remove : my_set -> string -> my_set = ...

OK, great. Now, you write client code that uses the map:

let rec build_set (elts : string list) : my_set = 
    match kvs with
    | [] -> Leaf (* Empty my_set *)
    | v :: elts' -> ms_insert (build_set elts') v

let use_set (s : my_map) =
    match ms_mem s "A" with
    | false -> 1
    | true -> 2

Everything works smoothly so far. You can build sets up, and everything type checks. But now, let's say you want to support a new kind of set new_set (say, with trees that have three branches instead of two, or we store the strings via their hashes, which would have a different type). What would we do? One possibility is to just duplicate the functionality of build_set/use_set for new_set, but that is dangerous --- now we have multiple implementations that can go out of sync. This is made harder by the fact that build_set depends on certain internal details of my_set (it uses Leaf to return an empty set); because of this, we would need to refactor the code as well. Thus, we want to do something else: we want to make build_set/use_set polymorphic, and work for any type of set we pass in.

To make a more general build_set/use_set that can take any map, we need to define what a "set" data structure is. We can do this first on paper: given a type of values \(V\), a set of \(V\)s is given by a type \(S\) (the underlying representation) along with some operations on it:

Empty: \(S\)
Insert: \(S \to V \to S\)
Remove: \(S \to V \to S\)
Mem: \(S \to K \to \mathsf{bool}\).

We can translate this into OCaml by packing these operations together into a record.

type 's set_ops = {
    empty : 's;
    insert : 's -> string -> 's;
    remove : 's -> string -> 's;
    mem : 's -> string -> bool 
}

Then we can refactor our code to use any set data structure, by explicitly using the functions inside of set_ops:

let rec build_set (ops : 's set_ops) (elts : string list) : 's = 
    match elts with
    | [] -> ops.empty
    | v :: elts' -> ops.insert (build_set ops elts') v

let use_set (ops : 's set_ops) (s : 's) = 
    match ops.mem s "A" with
    | false -> 1
    | true -> 2

Here, build_set/use_set can use any set data structure, as long as they implement set_ops. We can then glue these functions to our implementation of my_set by providing set_ops:

let my_set_ops : my_set set_ops = {
    empty = Leaf;
    insert = ms_insert;
    remove = ms_remove;
    mem = ms_mem
}

let my_build_set : string list -> my_set = build_set my_set_ops

In this way, we have successfully decoupled the client of the set (the code that uses the set datatype) with the provider of the set (the code that defines my_set). Note that our refactoring forced us to call ops.empty in build_set, instead of making direct reference to Leaf. This is great for modularity: allowing different pieces of code to be put together in new combinations.

Testing Unknown Set Implementations

A curious thing happens when we compose build_set and use_set:

let build_and_use_set (ops : 's set_ops) (elts : string list) : int = 
    use_set ops (build_set ops elts)

While build_set returns an element of type 's, and use_set consumes an element of type 's, the combination doesn't touch anything of type 's directly at all: it has type 's set_ops -> string list -> int. This means we can test it. Remember our friend forall from last class:

let rec forall (gen : unit -> 'a) (prop : 'a -> bool) (trials : int) : 'a option =
    if trials <= 0 then     
        None (* No counter-example found *)
    else 
        let xs = gen () in 
        if prop xs then 
            forall gen prop (trials - 1)
        else Some xs

Let's use forall to write a formal spec for build_and_use_set. For example, that for any list of elts, build_and_use_set returns 2 only if "A" is in the list. Because we haven't yet chosen an actual implementation for set_ops, we will pass it in to our function:

(* Throws exception if given empty list *)
let random_elt (xs : 'a list) : unit -> 'a = fun _ ->
    List.nth xs (Random.int (List.length xs))


let gen_elts : unit -> string list = fun _ -> 
    let elts = ["A"; "B"; "C"; "D"; "E"; "F"; "G"; "H"; "I"; "J"] in 
    List.init 5 (

let test_build_and_use_set (ops : 's set_ops) =
    forall gen_elts (fun elts -> (build_and_use_set ops elts = 2) = (List.mem 2 elts)) 100

Let's test it out by actually implementing sets using lists:

let list_set_ops : (string list) set_ops = {
    empty = [];
    insert = (fun s x -> x :: s);
    remove = (fun s x -> List.filter (fun y -> y <> x) s); (* <> = "not equal" *)
    mem = (fun s x -> List.mem s x)
}

let list_set_tests : unit -> bool = fun _ -> 
    test_build_and_use_set list_set_ops

A Specification for Set

While the above test on build_and_use_set is a silly test (what if it behaves well on every string except "A"?), we can turn it into a very realistic test suite for sets. What should be true of a set?

If we add an element to a set, it should be present when we look it up: forall s, forall x, mem (insert s x) x = true;
If we remove an element from a set, it should be absent: forall s, forall x, mem (remove s x) x = false;
Nothing should be in the empty set: forall s, mem empty x = false.

Any other properties?

Adding one element shouldn't add other ones: forall s, forall x, forall y, mem s y = false -> y <> x -> mem (insert s x) y = false.
Removing one element shouldn't remove other ones: forall s, forall x, forall y, mem s y = true -> y <> x -> mem (remove s x) y = true.

How should we formalize this? First, because we are quantifying over multiple things, we should pull in gen_pair so we just need to use forall once:

let gen_pair (genA : unit -> 'a) (genB : unit -> 'b) : unit -> ('a * 'b) = fun _ -> 
    let x = genA () in 
    let y = genB () in 
    (x, y)

Next, we can write our specifications which take in the quantified variables as arguments. This is easy to do, since we've already written them in OCaml syntax:

let mem_insert (ops : 's set_ops) (s : 's) (x : string) : bool : 
    ops.mem (ops.insert s x) x = true

let mem_remove (ops : 's set_ops) (s : 's) (x : string) : bool : 
    ops.mem (ops.remove s x) x = false

let mem_empty (ops : 's set_ops) (x : string) : bool = 
    ops.mem ops.empty x = false

(We will refrain from formalizing the other two properties, but the same ideas will apply.)

Now, how should we use forall to check that they're true? We can use random_elt to sample a string from our random list of possible strings. But what about generating something of type 's?

Our trick here is to expand upon our build_set function. First, one thing we could do is randomly add a bunch of strings to an empty set:

let elts = ["A"; "B"; "C"; "D"; "E"; "F"; "G"; "H"; "I"; "J"]

let gen_string () : string = 
    List.nth elts (Random.int (List.length elts))

let rec mk_set (ops : 's set_ops) (n : int) = 
    if n <= 0 then ops.empty else
        ops.insert (mk_set ops (n - 1)) (gen_string ())

However, I could create a set that would pass the tests for mem_insert and mem_remove but still be wrong. Here's how the set would work: if I do two remove's in a row, return the empty set; otherwise, behave as normal. Because mem_insert and mem_remove only will call remove once, and mk_set never calls remove, we won't hit this bad case. Thus, instead, let's make mk_set call insert and remove.

let rec mk_set (ops : 's set_ops) (n : int) = 
    if n <= 0 then ops.empty else
        if Random.bool () then 
            ops.insert (mk_set ops (n - 1)) (gen_string ())
        else 
            ops.remove (mk_set ops (n - 1)) (gen_string ())

Now, we can put things together:

let check_mem_insert (ops : 's set_ops) =
    forall (gen_pair (mk_set ops 10) gen_string) (fun (s, x) -> mem_insert ops s x) 100

let check_mem_remove (ops : 's set_ops) =
    forall (gen_pair (mk_set ops 10) gen_string) (fun (s, x) -> mem_remove ops s x) 100

let check_mem_empty (ops : 's set_ops) = 
    forall gen_string (fun x -> mem_empty ops x)

let test_suite (ops : 's set_ops) : bool =
    check_mem_insert ops = None && 
    check_mem_remove ops = None && 
    check_mem_empty ops = None

Writing and Implementing Other Sets

Now that we have a good test suite for what a "set" is, we can evaluate it against some implementations.

let fun_set : (string -> bool) set_ops = {
    empty = (fun _ -> false);
    insert = (fun f s s' -> if s = s' then true else f s');
    remove = (fun f s s' -> if s = s' then false else f s');
    mem = (fun s -> f s);
}

let fun_set_ok () : bool = 
    test_suite fun_set

(* Below, 
    String.hash : string -> int 
    hashes the string, returning a unique integer per string
*)
let hash_set : (int list) set_ops = {
    empty = [];
    insert = (fun s x -> (String.hash s) :: x);
    remove = (fun s x -> List.filter (fun y -> y <> String.hash x) s);
    mem = (fun s x -> List.mem (String.hash x) s)
}

let fun_set_ok () : bool = 
    test_suite hash_set