TC-6 Canonicalization Samples (The Tiger Compiler Project Assignment)

The first task in TC-6 is getting rid of all the eseq. To do this, you have to move the statement part of an eseq at the end of the current sequence point, and keeping the expression part in place.

let function print_ints(a: int, b: int) =
    (print_int(a); print(", "); print_int(b); print("\n"))
    var a := 0
in
  print_ints(1, (a := a + 1; a))
end

$ tc -eH preincr-1.tig
/* == High Level Intermediate representation. == */
label l1
        ", "
label l2
        "\n"
# Routine: print_ints
label l0
# Prologue
move
  temp t2
  temp fp
move
  temp fp
  temp sp
move
  temp sp
  binop sub
    temp sp
    const 4
move
  mem
    temp fp
  temp i0
move
  temp t0
  temp i1
move
  temp t1
  temp i2
# Body
seq
  sxp
    call
      name print_int
      temp t0
    call end
  sxp
    call
      name print
      name l1
    call end
  sxp
    call
      name print_int
      temp t1
    call end
  sxp
    call
      name print
      name l2
    call end
seq end
# Epilogue
move
  temp sp
  temp fp
move
  temp fp
  temp t2
label end

# Routine: _main
label main
# Prologue
# Body
seq
  seq
    move
      temp t3
      const 0
    sxp
      call
        name l0
        temp fp
        const 1
        eseq
          move
            temp t3
            binop add
              temp t3
              const 1
          temp t3
      call end
  seq end
  sxp
    const 0
seq end
# Epilogue
label end

$ tc -eL preincr-1.tig
/* == Low Level Intermediate representation. == */
label l1
        ", "
label l2
        "\n"
# Routine: print_ints
label l0
# Prologue
move
  temp t2
  temp fp
move
  temp fp
  temp sp
move
  temp sp
  binop sub
    temp sp
    const 4
move
  mem
    temp fp
  temp i0
move
  temp t0
  temp i1
move
  temp t1
  temp i2
# Body
seq
  label l3
  sxp
    call
      name print_int
      temp t0
    call end
  sxp
    call
      name print
      name l1
    call end
  sxp
    call
      name print_int
      temp t1
    call end
  sxp
    call
      name print
      name l2
    call end
  label l4
seq end
# Epilogue
move
  temp sp
  temp fp
move
  temp fp
  temp t2
label end

# Routine: _main
label main
# Prologue
# Body
seq
  label l5
  move
    temp t3
    const 0
  move
    temp t5
    temp fp
  move
    temp t3
    binop add
      temp t3
      const 1
  sxp
    call
      name l0
      temp t5
      const 1
      temp t3
    call end
  label l6
seq end
# Epilogue
label end

The example above is simple because ‘1’ commutes with ‘(a := a + 1; a)’: the order does not matter. But if you change the ‘1’ into ‘a’, then you cannot exchange ‘a’ and ‘(a := a + 1; a)’, so the translation is different. Compare the previous LIR with the following, and pay attention to

let function print_ints(a: int, b: int) =
    (print_int(a); print(", "); print_int(b); print("\n"))
    var a := 0
in
  print_ints(a, (a := a + 1; a))
end

$ tc -eL preincr-2.tig
/* == Low Level Intermediate representation. == */
label l1
        ", "
label l2
        "\n"
# Routine: print_ints
label l0
# Prologue
move
  temp t2
  temp fp
move
  temp fp
  temp sp
move
  temp sp
  binop sub
    temp sp
    const 4
move
  mem
    temp fp
  temp i0
move
  temp t0
  temp i1
move
  temp t1
  temp i2
# Body
seq
  label l3
  sxp
    call
      name print_int
      temp t0
    call end
  sxp
    call
      name print
      name l1
    call end
  sxp
    call
      name print_int
      temp t1
    call end
  sxp
    call
      name print
      name l2
    call end
  label l4
seq end
# Epilogue
move
  temp sp
  temp fp
move
  temp fp
  temp t2
label end

# Routine: _main
label main
# Prologue
# Body
seq
  label l5
  move
    temp t3
    const 0
  move
    temp t5
    temp fp
  move
    temp t6
    temp t3
  move
    temp t3
    binop add
      temp t3
      const 1
  sxp
    call
      name l0
      temp t5
      temp t6
      temp t3
    call end
  label l6
seq end
# Epilogue
label end

$ tc -eH preincr-2.tig >preincr-2.hir

$ havm preincr-2.hir
0, 1

$ tc -eL preincr-2.tig >preincr-2.lir

$ havm preincr-2.lir
0, 1

Be very careful when dealing with mem. For instance, rewriting something like:

call(foo, eseq(move(temp t, const 51), temp t))

move temp t1, temp t
move temp t, const 51
call(foo, temp t)

is wrong: ‘temp t’ is not a subexpression, rather it is being defined here. You should produce:

move temp t, const 51
call(foo, temp t)

move(temp t, foo)
move(mem(temp t), x)

move(temp t, mem(foo))
move(temp t, x)

In other words, the first subexpression of ‘move(mem(foo), )’ is ‘foo’, not ‘mem(foo)’. The following example is a good crash test against this problem:

let type int_array = array of int
    var tab := int_array [2] of 51
in
  tab[0] := 100;
  tab[1] := 200;
  print_int(tab[0]); print("\n");
  print_int(tab[1]); print("\n")
end

$ tc -eL move-mem.tig >move-mem.lir

$ havm move-mem.lir
100
200

print(chr(ord("\n")))

$ tc -L nested-calls.tig
/* == Low Level Intermediate representation. == */
label l0
        "\n"
# Routine: _main
label main
# Prologue
# Body
seq
  label l1
  move
    temp t1
    call
      name ord
      name l0
    call end
  move
    temp t2
    call
      name chr
      temp t1
    call end
  sxp
    call
      name print
      temp t2
    call end
  label l2
seq end
# Epilogue
label end

There are only two valid call forms: ‘sxp(call(...))’, and ‘move(temp(...), call(...))’.

Contrary to C, the HIR and LIR always denote the same value. For instance the following Tiger code:

let
  var a := 1
  function a(t: int) : int =
     (a := a + 1;
      print_int(t); print(" -> "); print_int(a); print("\n");
      a)
  var b := a(1) + a(2) * a(3)
in
  print_int(b); print("\n")
end

$ tc -L seq-point.tig >seq-point.lir

$ havm seq-point.lir
1 -> 2
2 -> 3
3 -> 4
14

independently of the what IR you ran. It has nothing to do with operator precedence!

In C, you have no such guarantee: the following program can give different results with different compilers and/or on different architectures.

#include <stdio.h>

int a_ = 1;
int
a(int t)
{
  ++a_;
  printf("%d -> %d\n", t, a_);
  return a_;
}

int
main(void)
{
  int b = a(1) + a(2) * a(3);
  printf("%d\n", b);
  return 0;
}

4.15.2.1 TC-6 Canonicalization Samples