Lecture 2.2: Lists, Pairs, Option types; Recursion; Formalizing a property using lists

Recursion

Given what we have seen so far, it is easy for us to print a given string a fixed number of times in a row:

let print_three_times (s : string) = 
    print_endline s;
    print_endline s;
    print_endline s;
    ()

But how would we print something n times in a row, where n is an arbitrary integer? If you were writing in Python or Java, you may be tempted to use a loop. While this is technically possible, OCaml programmers generally prefer to use recursion. Here is an example:

let rec print_n_times (n : int) (s : string) : unit = 
    if n > 0 then 
        (print_endline s; print_n_times (n - 1) s)
    else 
        ()

Whenever we use recursion, we need to write let rec instead of let. print_n_times, when given n and s, first checks if n is bigger than zero; if it is, we print s and continue with n - 1; otherwise, we stop and return ().

Parsing ; and if/then/else

Note above that we had to wrap the "then" branch above with parentheses. If we didn't, we would get a syntax error, because OCaml would get confused about where the "then" branch begins and ends.

To make a function recursive in OCaml, it needs be written as a let (as opposed to fun args -> ...). Then, you need to annotate it with rec after the let.

Exercise

let rec f (n : int) : int = 
    if n = 0 then 1 else 
    if n = 1 then 1 else 
    f (n - 2) + f (n - 1)

(* What does this evaluate to? *)
f 5

Exercise

let rec f (n : int) : string = 
    if n = 0 then "" else 
    (string_of_int n) ^ " " ^ (f (n - 1))

(* What does this evaluate to? *)
f 3

More types

So far, we have only worked with int, string, unit, bool, and function types. Let's now see some more types we can use.

Lists

Basic lists in OCaml are created by using the following syntax:

utop # [1; 2; 3];;
- : int list = [1; 2; 3]

Note a few things:

Lists are separated by ;, rather than , as in other languages.
Given a type t (e.g., int), the type of lists of t is t list.
Lists can only hold a single type of value, so [1; "hi"] is ill-typed.

Given a list, you can add an element to the front by using the cons operator:

utop # let x = [1; 2; 3];;
- : int list = [1; 2; 3]

utop # 0 :: x;;
- : int list = [0; 1; 2; 3]

Lists in OCaml are either empty, written [], or a cons of two elements: x :: xs. Given a list, we can destruct it via pattern matching:

let rec sum (xs : int list) : int = 
    match xs with
    | [] -> 0
    | x::xs' -> x + sum xs'

More examples

let rec len (xs : int list) : int = 
match xs with 
| [] -> 0
| _x::xs' -> 1 + sum xs' (* We use variables with underscores if we don't use the result. *)

let rec add1 (xs : int list) : int list = 
match xs with 
| [] -> []
| x::xs' -> (1 + x) :: add1 xs'

let rec mul2 (xs : int list) : int list = 
match xs with 
| [] -> []
| x::xs' -> (2 * x) :: add1 xs'

(* Generalizing add1 and mul2, by applying an arbitrary function to each element. *)
let rec map (f : int -> int) (xs : int list) : int list = 
match xs with 
| [] -> []
| x::xs' -> (f x) :: map f xs'

(* We can recover add1 and mul2 by instantiating the function. *)
let add1 = map (fun (x : int) -> x + 1)
let mul2 = map (fun (x : int) -> x * 2)

(* Generalizing sum, by using an arbitrary function with an accumulator. *)
let rec fold (f : int -> int -> int) (acc : int) (xs : int list) : int = 
match xs with 
| [] -> acc
| x::xs' -> 
    let acc' = f acc x in 
    fold f acc' xs'

(* sum is then given by instantiating the function and the initial value of the accumulator. *)
let sum = fold (fun (acc : int) (x : int) -> acc + x) 0
let mul = fold (fun (acc : int) (x : int) -> acc * x) 1

let max_nonneg = fold (fun (acc : int) (x : int) -> if acc <= x then x else acc) 0
(* TODO: What if we got the accumulator wrong for mul? What if it was 0? *)

let rec append (xs : int list) (ys : int list) : int list = 
match xs with 
| [] -> ys
| x::xs' -> x :: append xs' ys

let rec reverse (xs : int list) : int list = 
match xs with 
| [] -> [] (* Reverse of empty is empty *)
| x::xs -> append (reverse xs) [x] (* Reverse the tail; append x at the end of it *)

Pairs

We can create pairs by using tuple syntax:

utop # (2, 3);;
- : int * int = (2, 3)

Note

Strictly speaking, the parentheses are not needed:

utop # 2,3;;
- : int * int = (2, 3)

but we will always use them.

We can see that the type of a pair of two ints is int * int. Given a pair, we can get the components by using fst and snd:

utop # let x = (2, 3);;
val x : int * int = (2, 3)

utop # fst x;;
- : int = 2

utop # snd x;;
- : int = 3

We can also pattern match on pairs:

utop # let p = (2, 3) in 
        match p with 
        | (x, y) -> x + y ;; 
- : int = 5

Additionally, we can destruct pairs through let-bindings:

utop # let p = (2, 3) in 
       let (x, y) = p in 
       x;;
- : int = 2

Pairs can have more than two elements, but fst and snd doesn't work for them, so we must use pattern matching:

utop # let p = (1, 2, 3, 4) in 
       let (x, y, z, w) = p in 
       x;;
- : int = 1

Options

If you want to write a function that could fail --- or simply not return a result --- you can use an option type: for any type t, a value of type t option can either be None (indicating no value), or Some v for some v of type t. Let's see option types in action:

let rec last (xs : int list) : int option = 
    match xs with
    | [] -> None
    | x::xs -> 
        match xs with 
        | [] -> Some x
        | _ -> last xs

To compute the last element of a list, we first pattern match on it; if the list is empty, it has no last element, so we return None. If the list is nonempty, it is of the form x::xs; we then pattern match on the rest of the list: if the rest of the list is empty, we return Some x (since x must be the last element); otherwise, we return last xs, since we haven't yet hit the end of the list.

Just like with lists and pairs, we inspect option types via pattern matching:

let print_last (xs : int list) : unit = 
    match last xs with
    | None -> print_endline "List has no last element!"
    | Some x -> print_string "Last element is: "; print_int x; print_string "\n"

Let's see it in action:

utop # print_last [1;2;3];;
Last element is: 3
- : unit = ()

utop # print_last [];;
List has no last element!
- : unit = ()

Checked arithmetic

In OCaml (like other languages), the type of integer division is int -> int -> int. If we pass 0 as the second argument, we get an exception:

utop # 3 / 0;;
Exception: Division_by_zero.

One way to avoid throwing an exception is to write a checked version that refuses to run if the second argument is zero:

let divide_checked (i : int) (j : int) : int option = 
    if j = 0 then None else Some (i / j)

Evaluating specifications on lists

Earlier, we wrote two functions: sum : int list -> int, which sums up the list; and reverse : int list -> int list, which reverses the list. Last time, we showed via max3 how you can specify the correctness of an implementation by constructing a relation between inputs and outputs. This time, we will show a different kind of specification: one that relates two different implementations. Often, by composing two different functions together, you can express properties that are likely to catch bugs that you wouldn't catch by just looking at the functions in isolation.

How are sum and reverse related? Well, what happens if I compose them?

let sum_rev (xs : int list) = sum (reverse xs)

This should give me the same answer as just sum xs, because + is commutative and associative (so it doesn't matter in what order I sum elements together).

We can express this as a mathematical property:

\[ \forall xs : \mathsf{int\ list}, \mathsf{sum}(\mathsf{reverse}(xs)) = \mathsf{sum}(xs). \]

We can then implement it as a test on inputs: sum (reverse xs) = sum xs.

Thus, our desired property is now that sum_rev_test should return true on all inputs. To evaluate whether this is actually true, we can write a "tester" like we did last time for max3, which randomly generates an input to test. But, we've hit a hurdle: how do we randomly generate inputs to sum_rev_test?

We can do so by writing a generator for int list, which generates random numbers.This generator will first generate a length, and then generate a random list of that length:

let rec mk_list_of_length (len : int) : int list = 
    if len <= 0 then [] else 
        (* Each element is random from 0..500 *)
        let x = Random.int 500 in 
        let rest = mk_list_of_length (len - 1) in 
        x :: rest

let list_gen () : int list = 
    (* Length is random from 0..300 *)
    let len = Random.int 300 in 
    mk_list_of_length len

Finally, we can write our tester:

let string_of_int_list (xs : int list) = 
    let rec go (xs : int list) = 
        match xs with 
        | [] -> "]"
        | x::xs' -> string_of_int x ^ "; " ^ go xs'
    in
    "[" ^ go xs

let sum_rev_test (sum : int list -> int) (rev : int list -> int list) = 
    let xs = list_gen () in 
    if sum (rev xs) = sum xs then
        () (* OK *)
    else 
        print_endline ("Found a counter-example: " ^ string_of_int_list xs)

Of course, running one test in isolation won't always find bugs in programs. However, we can modify this example to run many tests until one succeeds:

let sum_rev_test (sum : int list -> int) (rev : int list -> int list) : string option = 
    let xs = list_gen () in 
    if sum (rev xs) = sum xs then
        None (* OK, no bug *)
    else 
        Some ("Found a counter-example: " ^ string_of_int_list xs)

let rec do_tests (sum : int list -> int) (rev : int list -> int list) (n : int) = 
    if n <= 0 then 
        print_endline "All tests succeeded!"
    else 
        match sum_rev_test sum rev with
        | None -> do_tests sum rev (n - 1) (* OK, continue *)
        | Some s -> print_endline s (* Found a bug; stop *)