summaryrefslogtreecommitdiff
path: root/docs/opt-coercion/fc-normalization-rta.tex
diff options
context:
space:
mode:
Diffstat (limited to 'docs/opt-coercion/fc-normalization-rta.tex')
-rwxr-xr-xdocs/opt-coercion/fc-normalization-rta.tex1627
1 files changed, 1627 insertions, 0 deletions
diff --git a/docs/opt-coercion/fc-normalization-rta.tex b/docs/opt-coercion/fc-normalization-rta.tex
new file mode 100755
index 0000000000..a1e7d4201d
--- /dev/null
+++ b/docs/opt-coercion/fc-normalization-rta.tex
@@ -0,0 +1,1627 @@
+\documentclass[a4paper,UKenglish]{lipics}
+
+\usepackage{enumerate}
+%% \usepackage{abbrev}
+\usepackage{xspace}
+\usepackage{denot}
+\usepackage{prooftree}
+\usepackage{afterpage}
+\usepackage{float}
+%% \usepackage{pstricks}
+\usepackage{url}
+
+\usepackage{amsthm}
+
+\usepackage{latexsym}
+%% %% less space consuming enumerates and itemizes
+\usepackage{mdwlist}
+\usepackage{stmaryrd}
+
+%% \usepackage{amsfonts}
+%% \usepackage{amssymb}
+%% % Local packages
+\usepackage{code}
+
+%% % \newcommand{\text}[1]{\mbox{#1}}
+
+%% \newcommand{\reach}[2]{\widehat{#1}(#2)}
+%% \newcommand{\ftv}[2]{ftv^{#1}(#2)}
+
+\usepackage{color}
+%% %% \newcommand{\color}[1]{}
+
+%% % \newcommand{\scf}{\sigma^\dagger}
+%% % \newcommand{\rcf}{\rho^\dagger}
+%% % \newcommand{\tcf}{\tau^\dagger}
+
+\newcommand{\simon}[1]{{\bf SPJ:}\begin{color}{blue} #1 \end{color}}
+\newcommand{\dv}[1]{{\bf DV:}\begin{color}{red} #1 \end{color}}
+\def\fiddle#1{\hspace*{-0.8ex}\raisebox{0.1ex}{$\scriptscriptstyle#1$}}
+
+\newcommand{\highlight}[1]{\colorbox{green}{\ensuremath{#1}}}
+
+
+\def\twiddleiv{\endprooftree\qquad\prooftree} % ~~~~
+\def\twiddlev{\endprooftree\\ \\ \prooftree} % ~~~~~...
+\def\rulename#1{\textsc{#1}}
+\def\minusv#1{\using\text{\rulename{#1}}\justifies} % \minusv...
+
+
+\newcommand{\OK}[2]{#1 \vdash^{\fiddle{\sf{E}}} #2}
+\newcommand{\wfe}{\vdash^{\fiddle{\sf{E}}}}
+\newcommand{\wfco}{\vdash^{\fiddle{\sf{co}}}}
+\newcommand{\wftm}{\vdash^{\fiddle{\sf{tm}}}}
+\newcommand{\wfty}{\vdash^{\fiddle{\sf{ty}}}}
+\newcommand{\unboxed}[1]{\mathop{unboxed}(#1)}
+
+\newcommand{\psim}{\mathrel{\sim_{\tiny \#}}}
+\newcommand{\static}{\textsf{$Constraint_{\#}$}}
+
+%% %% \newtheorem{theorem}{Theorem}[section]
+%% %% %% \newtheorem{proof}{Proof}[]
+%% %% \newtheorem{lemma}[theorem]{Lemma}
+%% %% \newtheorem{proposition}[theorem]{Proposition}
+%% %% \newtheorem{corollary}[theorem]{Corollary}
+
+%% %% bibliography guidelines
+%% \usepackage{natbib}
+%% \bibpunct();A{},
+%% \let\cite=\citep
+
+\def\rulename#1{\textsc{#1}}
+\def\ruleform#1{\fbox{$#1$}}
+
+
+%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% % Floats
+
+%% %% \renewcommand{\textfraction}{0.1}
+%% %% \renewcommand{\topfraction}{0.95}
+%% %% \renewcommand{\dbltopfraction}{0.95}
+%% %% \renewcommand{\floatpagefraction}{0.8}
+%% %% \renewcommand{\dblfloatpagefraction}{0.8}
+
+%% %% \setlength{\floatsep}{16pt plus 4pt minus 4pt}
+%% %% \setlength{\textfloatsep}{16pt plus 4pt minus 4pt}
+
+%% % Figures should be boxed
+%% % *** Uncomment the next two lines to box the floats ***
+\floatstyle{boxed}
+\restylefloat{figure}
+
+%% %% % Keep footnotes on one page
+%% %% \interfootnotelinepenalty=10000
+
+%% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% %% % Indentation
+%% %% \setlength{\parskip}{0.35\baselineskip plus 0.2\baselineskip minus 0.1\baselineskip}
+%% %% \setlength{\parsep}{\parskip}
+%% %% \setlength{\topsep}{0cm}
+%% %% \setlength{\parindent}{0cm}
+
+%% \renewcommand{\phi}{\varphi}
+
+\newcommand{\E}{{\cal E}}
+\newcommand{\ol}[1]{\overline{#1}}
+\newcommand{\sym}[1]{\mathop{sym}\, #1}
+\newcommand{\bnd}{\mathop{bnd}}
+\newcommand{\cval}{\textsf{cv}}
+\newcommand{\nfval}{\textsf{nf}}
+\newcommand{\tval}{\textsf{tv}}
+\newcommand{\val}{\textsf{val}}
+\newcommand{\clift}[1]{\lfloor#1\rfloor}
+\newcommand{\lifting}[2]{[#1]{\uparrow}(#2)}
+\newcommand{\erase}[2]{\{#2\}_{#1}}
+\newcommand{\nth}[2]{\mathop{nth} #1\;#2}
+\newcommand{\inst}[2]{#1@#2}
+\newcommand{\refl}[1]{\langle#1\rangle} % Reflexivity
+
+\newcommand{\tcase}[2]{\mathbf{case}\;#1\;\mathbf{of}\;\ol{#2}}
+\newcommand{\tlet}[4]{\mathbf{let}\;#1{:}#2 = #3\;\mathbf{in}\;#4}
+\newcommand{\tcast}[2]{#1\;\triangleright\;#2}
+\newcommand{\rsa}[1]{\rightsquigarrow_{#1}}
+\newcommand{\as}{\ol{a}}
+\newcommand{\bs}{\ol{b}}
+\newcommand{\cs}{\ol{c}}
+\newcommand{\ds}{\ol{d}}
+\newcommand{\es}{\ol{e}}
+\newcommand{\fs}{\ol{f}}
+\newcommand{\gs}{\ol{g}}
+
+\newcommand{\alphas}{\ol{\alpha}}
+\newcommand{\betas}{\ol{\beta}}
+\newcommand{\gammas}{\ol{\gamma}}
+\newcommand{\deltas}{\ol{\delta}}
+\newcommand{\epsilons}{\ol{\epsilon}}
+\newcommand{\zetas}{\ol{\zeta}}
+\newcommand{\etas}{\ol{\eta}}
+
+\newcommand{\phis}{\ol{\phi}}
+
+\newcommand{\sigmas}{\ol{\sigma}}
+\newcommand{\taus}{\ol{\tau}}
+
+\newcommand{\xs}{\ol{x}}
+
+%% %% \theoremstyle{plain}
+%% %% \newtheorem{definition}{Definition}[section]
+
+
+\title{Evidence normalization in System FC}
+
+\author{Dimitrios Vytiniotis}
+\author{Simon Peyton Jones}
+\affil{Microsoft Research, Cambridge}
+
+ %% \{\{dimitris,simonpj@microsoft.com}}
+
+\authorrunning{D. Vytiniotis and S. Peyton Jones}
+% \Copyright{TO BE PROVIDED}
+
+\subjclass{F.4.2 Grammars and Other Rewriting Systems}
+
+
+\begin{document}
+
+
+
+%% \preprintfooter{\textbf{--- DRAFT submitted to ICFP 2011 ---}}
+
+%% \conferenceinfo{ICFP'08,} {September 22--24, 2008, Victoria, BC, Canada.}
+%% \CopyrightYear{2008}
+%% \copyrightdata{978-1-59593-919-7/08/09}
+
+
+
+%% \category{D.3.3}{Language Constructs and Features}{Abstract data types}
+%% \category{F.3.3}{Studies of Program Constructs}{Type structure}
+
+%% \terms{Design, Languages}
+
+%% \keywords{Type equalities, Deferred type errors, System FC}
+
+
+%% Theory of computation → Rewrite systems
+%% Software and its engineering → Data types and structures
+
+
+\maketitle
+\makeatactive
+
+\begin{abstract}
+System FC is an explicitly typed language that serves as the target language for Haskell
+source programs. System FC is based on System F with the addition of erasable but explicit type equality
+proof witnesses. Equality proof witnesses are generated from type inference performed on source Haskell
+programs. Such witnesses may be very large objects, which causes performance degradation in later stages of
+compilation, and makes it hard to debug the results of type inference and subsequent program transformations.
+In this paper we present an equality proof simplification algorithm, implemented in GHC, which greatly reduces
+the size of the target System FC programs.
+\end{abstract}
+%% \category{D.3.3}{Language Constructs and Features}{Abstract data types}
+%% \category{F.3.3}{Studies of Program Constructs}{Type structure}
+%% \terms{Design,Languages}
+%% \keywords{Haskell, Type functions, System FC}
+
+\section{Introduction}\label{s:intro}
+
+A statically-typed intermediate language brings a lot of benefits to a compiler: it is free
+from the design trade-offs that come with source language features; types
+can inform optimisations; and type checking
+programs in the intermediate language provides a powerful consistency check on
+each stage of the compiler.
+
+%% Type checking the intermediate programs
+%% that result from further program transformation and optimization passes
+%% checks
+%% that these stages
+%% the results of further program transformation and optimization passes.
+%% A typed intermediate language provides a firm place for a compiler to stand,
+%% free from the design trade-offs of a complex source language. Moreover,
+%% type-checking the intermediate program provides a
+%% powerful consistency check on the earlier stages of elaboration,
+%% desugaring, and optimization.
+
+The Glasgow Haskell Compiler (GHC) has just such an intermediate language,
+which has evolved from System F to System FC
+\cite{sulzmann+:fc-paper,weirich+:fc2} to accommodate the
+source-language features of
+\emph{GADTs}~\cite{cheney-hinze:phantom-types,sheard:omega,spj+:gadt}
+and \emph{type families}~\cite{Kiselyov09funwith,chak+:synonyms}.
+The key feature that allows System FC to accomodate GADTs and type
+families is its use of explicit \emph{coercions} that witness the
+equality of two syntactically-different types. Coercions are erased
+before runtime but, like types, serve as a static consistency
+proof that the program will not ``go wrong''.
+
+In GHC, coercions are produced by a fairly complex
+type inference (and proof inference) algorithm
+that elaborates source Haskell programs into FC programs \cite{pjv:modular}.
+Furthermore, coercions undergo major transformations during subsequent program
+optimization passes. As a consequence, they can become very large,
+making the compiler bog down. This paper describes how we fixed the problem:
+\begin{itemize*}
+\item Our main contribution is a novel coercion simplification algorithm, expressed
+as a rewrite system, that allows the compiler to replace a coercion
+with an equivalent but much smaller one
+(Section~\ref{s:normalization}).
+ \item Coercion simplification is important in practice.
+ We encountered programs whose un-simplified
+ coercion terms grow to many times the size of the actual executable terms,
+ to the point where GHC choked and ran out of heap. When the simplifier
+ is enabled, coercions simplify to a small fraction of their
+ size (Section~\ref{ssect:ghc}).
+ \item To get these benefits, coercion simplification must take user-declared equality axioms
+ into account, but the simplifier {\em must never loop} while optimizing a coercion -- no matter
+ which axioms are declared by users. Proof normalization theorems are notoriously hard,
+ but we present such a theorem for our coercion simplification. (Section~\ref{ssect:termination})
+ \end{itemize*}
+Equality proof normalization was first studied in the context of monoidal categories and we give
+pointers to early work in Section~\ref{s:related} -- this work in addition addresses the simplification
+of open coercions containing variables and arbitrary user-declared axioms.
+
+%% Coercion simplification has been studied in the context
+%% Despite its great practical importance,
+%% coercion simplification did not appear to be well-studied in the coercion
+%% literature, but we give some connections to related work in Section~\ref{s:related}.
+
+% --------------------------------------------------------------------------
+\section{An overview of System FC} \label{s:intro-coercions}
+
+\begin{figure}\small
+\[\begin{array}{l}
+\begin{array}{lrll}
+%% \multicolumn{3}{l}{\text{Terms}} \\
+c & \in & \text{Coercion variables} \\
+x & \in & \text{Term variables} \\
+e,u & ::= & x \mid l \mid \lambda x{:}\sigma @.@ e \mid e\;u \\
+ & \mid & \Lambda a{:}\eta @.@ e \mid e\;\phi & \text{Type polymorphism} \\
+ & \mid & \lambda c{:}\tau @.@ e \mid e\;\gamma & \text{Coercion abstraction/application} \\
+ & \mid & K \mid \tcase{e}{p \to u} & \text{Constructors and case expressions} \\
+ & \mid & \tlet{x}{\tau}{e}{u} & \text{Let binding} \\
+ & \mid & \tcast{e}{\gamma} & \text{Cast} \\
+%% & \mid & \text{\sout{\ensuremath{\lambda c{:}\tau_1 \psim \tau_2 @.@ e}}} & \text{\sout{Coercion abstraction}} \\[2mm]
+p & ::= & K\;\ol{c{:}\tau}\;\ol{x{:}\tau} & \text{Patterns} \\[3mm]
+\end{array}
+\end{array}\]
+\caption{Syntax of System FC (Terms)}\label{fig:syntax1}
+\end{figure}
+
+
+We begin by reviewing the role of an intermediate
+language. GHC desugars a rich, complex source language (Haskell) into
+a small, simple intermediate language. The source language, Haskell, is \emph{implicitly typed},
+and a type inference engine figures out the type of every binder and sub-expression.
+To make type inference feasible, Haskell embodies many somewhat ad-hoc design
+compromises; for example, $\lambda$-bound variables are assigned monomorphic types.
+By contrast, the intermediate language is simple, uniform, and \emph{explicitly typed}.
+It can be typechecked by a simple, linear time algorithm. The type inference
+engine \emph{elaborates} the implicitly-typed Haskell program into an
+explicitly-typed FC program.
+
+To make this concrete, Figure~\ref{fig:syntax1} gives the syntax of
+System FC, the calculus implemented by GHC's intermediate language.
+The term language is mostly conventional, consisting of System F,
+together with let bindings, data constructors and case expressions.
+The syntax of a term encodes its typing derivation: every binder
+carries its type, and type abstractions $\Lambda a{:}\eta@.@e$ and
+type applications $e\,\phi$ are explicit.
+
+\begin{figure}\small
+\[\begin{array}{l|l}
+\begin{array}{lrll}
+\multicolumn{3}{l}{\text{Types}} \\
+\phi,\sigma,\tau,\upsilon & ::= & a & \text{Variables} \\
+ & \mid & H & \text{Constants} \\
+ & \mid & F & \text{Type functions} \\
+ & \mid & \phi_1\;\phi_2 & \text{Application} \\
+%% & \mid & \phi\;\kappa & \text{Kind application} \\
+ & \mid & \forall a{:}\eta @.@ \phi & \text{Polymorphic types} \\
+%% & \mid & \forall\kvar @.@ \tau & \text{Kind-polymorphic types}\\[2mm]
+\multicolumn{3}{l}{\text{Type constants}} \\
+H & ::= & T & \text{Datatypes} \\
+ & \mid & (\to) & \text{Arrow} \\
+ & \mid & (\psim) & \text{Coercion} \\
+%% & \mid & (\psim) & \text{Primitive equality type} \\
+\multicolumn{3}{l}{\text{Kinds}} \\
+ \kappa,\eta & ::= & \star \mid \kappa \to \kappa \\
+%% & \mid & \forall\kvar @.@ \kappa & \text{Polymorphic kinds} \\
+ & \mid & \static & \text{Coercion kind} \\[2mm]
+\end{array} &
+\begin{array}{lrll}
+\multicolumn{3}{l}{\text{Coercion values}} \\
+\gamma,\delta & ::= & c & \text{Variables} \\
+%% & \mid & \text{\sout{\ensuremath{c}}} & \text{\sout{Coercion variables}} \\
+%% & \mid & C\;\ol{\kappa}\;\gammas & \text{Axiom application} \\
+%% & \mid & \gamma_1\;\kappa & \text{Kind application} \\
+ & \mid & \refl{\phi} & \text{Reflexivity} \\
+ & \mid & \gamma_1;\gamma_2 & \text{Transitivity} \\
+ & \mid & \sym{\gamma} & \text{Symmetry} \\
+ & \mid & \nth{k}{\gamma} & \text{Injectivity} \\
+ & \mid & \gamma_1\;\gamma_2 & \text{Application} \\
+ & \mid & C\;\gammas & \text{Type family axiom} \\
+ & \mid & \forall a{:}\eta @.@ \gamma & \text{Polym. coercion} \\
+ & \mid & \inst{\gamma}{\phi} & \text{Instantiation} \\ \\ \\ \\
+%% & \mid & \forall\kvar @.@ \gamma & \text{Kind polymorphic coercion} \\
+%% & \mid & \inst{\gamma}{\kappa} & \text{Kind instantiation}
+\end{array}
+\end{array}\]
+\caption{Syntax of System FC (types and coercions)}\label{fig:syntax2}
+\end{figure}
+
+The types and kinds of the language are given in Figure~\ref{fig:syntax2}. Types include variables ($a$)
+and constants $H$ (such as $@Int@$ and @Maybe@), type applications (such as $@Maybe@\;@Int@$),
+and polymorphic types ($\forall a{:}\eta @.@ \phi$). The syntax of types also includes {\em type functions}
+(or {\em type families} in the Haskell jargon), which are used to express type level computation.
+For instance the following declaration in source Haskell:
+\begin{code}
+ type family F (a :: *) :: a
+ type instance F [a] = a
+\end{code}
+introduces a type function $F$ at the level of System FC. The accompanying @instance@ line asserts
+that any expression of type @F [a]@ can be viewed as having type @a@. We shall see in
+Section~\ref{sec:type-funs} how this fact
+is expressed in FC. Finally type constants include datatype
+constructors ($T$) but also arrow ($\to$) as well as a special type constructor $\psim$ whose
+role we explain in the following section. The kind language includes the familiar $\star$ and
+$\kappa_1 \to \kappa_2$ kinds but also a special kind called $\static$ that we explain along with
+the $\psim$ constructor.
+
+The typing rules for System FC are given in Figure~\ref{fig:wftm}. We
+urge the reader to consult \cite{sulzmann+:fc-paper,weirich+:fc2} for
+more examples and intuition.
+
+\begin{figure}\small
+\[\begin{array}{l}
+\begin{array}{lrll}
+\multicolumn{3}{l}{\text{Environments}} \\
+\Gamma,\Delta & ::= & \cdot \mid \Gamma,\bnd \\
+\bnd & ::= %% & \kvar & \text{Kind variable} \\
+ & a : \eta & \text{Type variable} \\
+ & \mid & c : \sigma \psim \phi & \text{Coercion variable}\\
+ & \mid & x : \sigma & \text{Term variable}\\
+ & \mid & T : \ol{\kappa} \to \star & \text{Data type} \\
+ & \mid & K : \forall (\ol{a{:}\eta}) @.@ \taus \to T\;\as & \text{Data constructor} \\
+ & \mid & F^n : \ol{\kappa}^n \to \kappa & \text{Type families (of arity $n$)} \\
+ & \mid & C \,\ol{(a{:}\eta)} : \sigma \psim \phi & \text{Axioms} \\
+%% & \mid & F_n : \kappa_1 \to \ldots \to \kappa_n \to \kappa & \text{Type family, arity $n$} \\
+%% & \mid & C\;\ol{\kvar}\;(\ol{a{:}\kappa}) : \peqt{\sigma}{\phi} & \text{Axiom} \\
+\multicolumn{3}{l}{\text{Notation}} \\
+T\;\ol{\tau} & \equiv & T\;\tau_1 \ldots \tau_n \\
+\taus \to \tau & \equiv & \tau_1 \to \ldots \to \tau_n \to \tau \\
+\taus^{1..n} & \equiv & \tau_1,\ldots,\tau_n
+ %% & & \text{for $\alpha$ either $\kappa$ or $\tau$}\\
+% \Gamma_0 & \equiv & \text{initial (closed) environment} \\
+% & \ni & \multicolumn{2}{l}{(\psim) : \forall\kvar @.@ \kvar \to \kvar \to \static} \\
+\end{array}
+\end{array}\]
+\caption{Syntax of System FC (Auxiliary definitions) }\label{fig:syntax3}
+\end{figure}
+
+\begin{figure}\small
+\[\begin{array}{c}\ruleform{\Gamma \wftm e : \tau } \\ \\
+\prooftree
+ (x{:}\tau) \in \Gamma
+ \minusv{EVar}
+ \Gamma \wftm x : \tau
+ \twiddleiv
+ (K{:}\sigma) \in \Gamma
+ \minusv{ECon}
+ \Gamma \wftm K : \sigma
+ \twiddlev
+ \begin{array}{c}
+ \Gamma,(x{:}\sigma) \wftm e : \tau \quad
+ \Gamma \wfty \sigma : \star \end{array}
+ \minusv{EAbs}
+ \Gamma \wftm \lambda x{:}\sigma @.@ e : \sigma \to \tau
+ \twiddleiv
+ \begin{array}{c}
+ \Gamma \wftm e : \sigma \to \tau \quad \Gamma \wftm u : \sigma
+ \end{array}
+ \minusv{EApp}
+ \Gamma \wftm e\;u : \tau
+ \twiddlev
+ \begin{array}{c}
+ \Gamma,(c{:}\sigma) \wftm e : \tau \\
+ \Gamma \wfty \sigma : \static{}
+ \end{array}
+ \minusv{ECAbs}
+ \Gamma \wftm \lambda c{:}\sigma @.@ e : \sigma \to \tau
+ \twiddleiv
+ \begin{array}{c}
+ \Gamma \wftm e : (\sigma_1 \psim \sigma_2) \to \tau \\
+ \Gamma \wfco \gamma : \sigma_1 \psim \sigma_2
+ \end{array}
+ \minusv{ECApp}
+ \Gamma \wftm e\;\gamma : \tau
+ \twiddlev
+ \begin{array}{c} \phantom{\Gamma}
+%% \Gamma \wfk \eta \\
+ \Gamma,(a{:}\eta) \wftm e : \tau
+ \end{array}
+ \minusv{ETabs}
+ \Gamma \wftm \Lambda a{:}\eta @.@ e : \forall a{:}\eta @.@ \tau
+ \twiddleiv
+ \begin{array}{c}
+ \Gamma \wftm e : \forall a{:}\eta @.@ \tau \quad
+ \Gamma \wfty \phi : \eta
+ \end{array}
+ \minusv{ETApp}
+ \Gamma \wftm e\;\phi : \tau[\phi/a]
+ \twiddlev
+ %% \begin{array}{c} \phantom{\Gamma} \\
+ %% \Gamma,\kvar \wftm e : \tau
+ %% \end{array}
+ %% -----------------------------------------{EKabs}
+ %% \Gamma \wftm \Lambda\kvar @.@ e : \forall\kvar @.@ \tau
+ %% \twiddleiv
+ %% \begin{array}{c}
+ %% \Gamma \wftm e : \forall\kvar @.@ \tau \\
+ %% \Gamma \wfk \kappa
+ %% \end{array}
+ %% -----------------------------------------{EKApp}
+ %% \Gamma \wftm e\;\kappa : \tau[\kappa/\kvar]
+ %% \twiddlev
+ \begin{array}{c}
+ \Gamma,(x{:}\sigma) \wftm u : \sigma \quad
+ \Gamma,(x{:}\sigma) \wftm e : \tau
+ \end{array}
+ \minusv{ELet}
+ \Gamma \wftm \tlet{x}{\sigma}{u}{e} : \tau
+%% \twiddleiv
+%% \begin{array}{c}
+%% \Gamma \wftm u : \sigma \\
+%% \Gamma,(x{:}\sigma) \wftm e : \tau
+%% \end{array}
+%% -------------------------------------------{ELet}
+%% \Gamma \wftm \tlet{x}{\sigma}{u}{e} : \tau
+ \twiddleiv\hspace{-5pt}
+ \begin{array}{c}
+ \Gamma \wftm e : \tau \quad
+ \Gamma \wfco \gamma : \tau \psim \phi
+ \end{array}
+ \minusv{ECast}
+ \Gamma \wftm \tcast{e}{\gamma} : \phi
+ \twiddlev
+ \begin{array}{l}
+ \Gamma \wftm e : T\;\ol{\kappa}\;\sigmas \\
+ \text{For each branch } K\;\ol{x{:}\tau} \to u \\
+ \quad (K{:}\forall (\ol{a{:}\eta_a}) @.@ \ol{\sigma_1\psim\sigma_2} \to \taus \to T\;\as) \in \Gamma \\
+ \quad \phi_i = \tau_i[\sigmas/\as] \\
+ \quad \phi_{1i} = \sigma_{1i}[\sigmas/\as] \\
+ \quad \phi_{2i} = \sigma_{2i}[\sigmas/\as]
+ \quad \Gamma,\ol{c{:}\phi_1\psim\phi_2}\;\ol{x{:}\phi} \wftm u : \sigma
+ \end{array}
+ \minusv{ECase}
+ \Gamma \wftm \tcase{e}{K\;(\ol{c{:}\sigma_1\psim\sigma_2})\;(\ol{x{:}\tau}) \to u} : \sigma
+\endprooftree
+\end{array}\]\caption{Well-formed terms}\label{fig:wftm}
+\end{figure}
+
+
+
+\begin{figure}\small
+\[\begin{array}{c}\ruleform{\Gamma \wfty \tau : \kappa } \\ \\
+\prooftree
+ (a{:}\eta) \in \Gamma
+ \minusv{TVar}
+ \Gamma \wfty a : \eta
+ \twiddleiv
+ (T{:}\kappa) \in \Gamma
+ \minusv{TData}
+ \Gamma \wfty T : \kappa
+ \twiddleiv
+ (F{:}\kappa) \in \Gamma
+ \minusv{TFun}
+ \Gamma \wfty F : \kappa
+ \twiddlev
+ \kappa_1,\kappa_2 \in \{ \static, \star \}
+ \minusv{TArr}
+ \Gamma \wfty (\to) : \kappa_1 \to \kappa_2 \to \star
+ \twiddleiv
+ \phantom{\Gamma}
+ \minusv{TEqPred}
+ \Gamma \wfty (\psim) : \kappa \to \kappa \to \static
+ \twiddlev
+ \begin{array}{c}
+ \Gamma \wfty \phi_1 : \kappa_1 \to \kappa_2 \quad
+ \Gamma \wfty \phi_2 : \kappa_1
+ \end{array} \vspace{2pt}
+ \minusv{TApp}
+ \Gamma \wfty \phi_1\;\phi_2 : \kappa_2
+ \twiddleiv
+ \begin{array}{c} \phantom{\Gamma} \quad
+ \Gamma,(a{:}\eta) \wfty \tau : \star
+ \end{array} \vspace{2pt}
+ \minusv{TAll}
+ \Gamma \wfty \forall a{:}\eta @.@ \tau : \star
+\endprooftree
+\end{array}\]\caption{Well-formed types}\label{fig:wfty}
+\end{figure}
+
+\subsection{Coercions}
+
+The unusual feature of FC is the use of coercions.
+The term $\tcast{e}{\gamma}$ is a cast, that converts a term $e$ of
+type $\tau$ to one of type $\phi$ (rule \rulename{ECast} in
+Figure~\ref{fig:wftm}). The coercion $\gamma$ is a \emph{witness},
+or \emph{proof}, providing
+evidence that $\tau$ and $\phi$ are equal types -- that is, $\gamma$ has
+type $\tau \psim \phi$.
+We use the symbol ``$\psim$'' to denote type equality\footnote{The ``$\#$'' subscript
+is irrelevant for this paper; the interested reader may consult
+\cite{deferred-type-errors} to understand the related type equality $\sim$, and
+the relationship between $\sim$ and $\psim$.}.
+The syntax of coercions $\gamma$ is given in
+Figure~\ref{fig:syntax2}, and their typing rules in Figure~\ref{fig:wfco}.
+For uniformity we treat $\psim$ as an ordinary type constructor, with
+kind $\kappa \to \kappa \to \static$ (Figure~\ref{fig:wfty}).
+
+To see casts in action, consider this Haskell program which uses GADTs:
+\begin{code}
+ data T a where f :: T a -> [a]
+ T1 :: Int -> T Int f (T1 x) = [x+1]
+ T2 :: a -> T a f (T2 v) = [v]
+
+ main = f (T1 4)
+\end{code}
+We regard the GADT data constructor @T1@ as having the type
+$$ @T1@ : \forall a. (a \psim @Int@) \to @Int@ \to @T@\;a $$
+So in FC, @T1@ takes three arguments: a type argument to instantiate $a$,
+a coercion witnessing the equivalence of $a$ and @Int@, and a value of type @Int@.
+Here is the FC elaboration of @main@:
+\begin{code}
+ main = f Int (T1 Int <Int> 4)
+\end{code}
+The coercion argument has kind $(@Int@\psim@Int@)$, for which the evidence
+is just $\refl{@Int@}$ (reflexivity).
+Similarly, pattern-matching on @T1@ binds two variables:
+a coercion variable, and a term variable. Here is the FC elaboration
+of function @f@:
+\begin{code}
+ f = /\(a:*). \(x:T a).
+ case x of
+ T1 (c:a ~# Int) (n:Int) -> (Cons (n+1) Nil) |> sym [c]
+ T2 (v:a) -> Cons v Nil
+\end{code}
+The cast converts the type of the result from @[Int]@ to @[a]@.
+The coercion $\sym{[c]}$ is evidence for (or a proof of)
+the equality of these types, using coercion @c@, of type $(@a@\psim@Int@)$.
+
+\subsection{Typing coercions} \label{sec:newtype} \label{sec:type-funs}
+
+\begin{figure*}\small
+\[\begin{array}{c}\ruleform{\Gamma \wfco \gamma : \sigma_1 \psim \sigma_2 } \\ \\
+\prooftree
+ \begin{array}{c} \phantom{G} \\
+ (c{:}\sigma_1 \psim \sigma_2) \in \Gamma
+ \end{array}
+ \minusv{CVar}
+ \Gamma \wfco c : \sigma_1 \psim \sigma_2
+ \twiddleiv
+ \begin{array}{c}
+ (C\, \ol{a{:}\eta} : \tau_1 \psim \tau_2) \in \Gamma \\
+ \Gamma \wfco \gamma_i : \sigma_i \psim \phi_i \vspace{1pt}
+ \end{array}
+ \minusv{CAx}
+ \Gamma \wfco C\;\gammas : \tau_1[\sigmas/\as]{\psim}\tau_2[\phis/\as]
+ \twiddleiv
+ \begin{array}{c} \phantom{G} \\
+ \Gamma \wfty \phi : \kappa
+ \end{array}
+ \minusv{CRefl}
+ \Gamma \wfco \refl{\phi} : \sigma \psim \sigma
+ \twiddlev
+ \begin{array}{c}
+ \Gamma \wfco \gamma_1 : \sigma_1 \psim \sigma_2 \\
+ \Gamma \wfco \gamma_2 : \sigma_2 \psim \sigma_3 \vspace{1pt}
+ \end{array}
+ \minusv{CTrans}
+ \Gamma \wfco \gamma_1;\gamma_2 : \sigma_1{\psim}\sigma_3
+ \twiddleiv
+ \Gamma \wfco \gamma : \sigma_1 \psim \sigma_2
+ \minusv{CSym}
+ \Gamma \wfco \sym{\gamma} : \sigma_2 \psim \sigma_1
+ \twiddleiv
+ \Gamma \wfco \gamma : H\;\sigmas \psim H\;\taus
+ \minusv{CNth}
+ \Gamma \wfco \nth{k}{\gamma} : \sigma_k \psim \tau_k
+ \twiddlev
+ \Gamma,(a{:}\eta) \wfco \gamma : \sigma_1 \psim \sigma_2
+ \minusv{CAll}
+ \Gamma \wfco \forall a{:}\eta @.@ \gamma : (\forall a{:}\eta @.@ \sigma_1) \psim (\forall a{:}\eta @.@ \sigma_2)
+ %% \twiddleiv
+ %% \Gamma \wfco \gamma_i : \tau_i \psim \sigma_i \quad i \in 1..n
+ %% \minusv{CFun}
+ %% \Gamma \wfco F^n\;\gammas : F\;\taus \psim F\;\sigmas
+ \twiddlev
+ \begin{array}{c}
+ \Gamma \wfco \gamma_1 : \sigma_1 \psim \sigma_2 \\
+ \Gamma \wfco \gamma_2 : \phi_1 \psim \phi_2 \quad \Gamma \wfty : \sigma_1\;\phi_1 : \kappa
+ \end{array}
+ \minusv{CApp}
+ \Gamma \wfco \gamma_1\;\gamma_2 : \sigma_1\;\phi_1 \psim \sigma_2\;\phi_2
+ \twiddleiv
+ \begin{array}{c}
+ \Gamma \wfty \phi : \eta \\
+ \Gamma \wfco \gamma : (\forall a{:}\eta @.@ \sigma_1) \psim (\forall a{:}\eta @.@ \sigma_2) \vspace{1pt}
+ \end{array}
+ \minusv{CInst}
+ \Gamma \wfco \inst{\gamma}{\phi} : \sigma_1[\phi/a] \psim \sigma_2[\phi/a]
+\endprooftree
+\end{array}\]\caption{Well-formed coercions}\label{fig:wfco}
+\end{figure*}
+
+Figure~\ref{fig:wfco} gives the typing rules for coercions. The rules include unsurprising cases
+for reflexivity (\rulename{CRefl}), symmetry (\rulename{CSym}), and transitivity (\rulename{CTrans}).
+Rules \rulename{CAll} and \rulename{CApp} allow us to construct coercions on more complex types from
+coercions on simpler types. Rule \rulename{CInst} instantiates a coercion between two $\forall$-types,
+to get a coercion between two instantiated types. Rule \rulename{CVar} allows us to use a coercion
+that has been introduced to the context by a coercion abstraction $(\lambda c{:}\tau{\psim}\phi @.@ e)$,
+or a pattern match against a GADT (as in the example above).
+
+Rule \rulename{CAx} refers to instantiations of {\em axioms}. In GHC, axioms can arise as a result of {\em newtype} or {\em type family} declarations. Consider the following code:
+\begin{code}
+ newtype N a = MkN (a -> Int)
+
+ type family F (x :: *) :: *
+ type instance F [a] = a
+ type instance F Bool = Char
+\end{code}
+$N$ is a \emph{newtype} (part of the original Haskell 98 definition), and is desugared to
+the following FC coercion axiom:
+\[\begin{array}{rcl}
+C_N \, a& : & N\,a \psim a \rightarrow @Int@
+\end{array}\]
+which provides evidence of the equality of types $(N\,a)$ and $(a \rightarrow @Int@)$.
+
+In the above Haskell code, $F$ is a {\em type family} \cite{chak+:types, chak+:synonyms},
+and the two @type@ @instance@ declarations above introduce two FC coercion axioms:
+\[\begin{array}{rcl}
+C_1 \, a & : & F\;[a] \psim a \\
+C_2 & : & F\;@Bool@ \psim @Char@
+\end{array}
+\]
+Rule \rulename{CAx} describes how these axioms may be used to create coercions. In this
+particular example, if we have $\gamma : \tau \psim \sigma$, then we can prove that
+$ C_1\;\gamma : F\;[\tau] \psim \sigma$. Using such coercions we can get, for example, that
+$(\tcast{3}{\sym{(C_1\;\refl{@Int@})}}) : F\;[@Int@]$.
+
+Axioms always appear saturated in System FC, hence the syntax $C\,\overline{\gamma}$ in Figure~\ref{fig:syntax2}.
+
+%% Lifting and one push rule, in case we need it
+%% \begin{figure*}\small
+%% \[\begin{array}{c}
+%% \ruleform{\lifting{a\mapsto\gamma}{\tau} = \gamma'} \\ \\
+%% \begin{array}{lcl}
+%% \lifting{a \mapsto \gamma}{a} & = & \gamma \\
+%% \lifting{a \mapsto \gamma}{b} & = & \refl{b} \\
+%% \lifting{a \mapsto \gamma}{H} & = & \refl{H} \\
+%% \lifting{a \mapsto \gamma}{F} & = & \refl{F} \\
+%% \lifting{a \mapsto \gamma}{\tau_1\;\tau_2} & = &
+%% \left\{\begin{array}{l}
+%% \refl{\phi_1\;\phi_2} \text{ when } \lifting{a\mapsto\gamma}{\tau_i} = \refl{\phi_i} \\
+%% (\lifting{a \mapsto \gamma}{\tau_1})\;(\lifting{a\mapsto\gamma}{\tau_2}) \text{ otherwise }
+%% \end{array}\right. \\
+%% \lifting{a\mapsto\gamma}{\forall a{:}\eta @.@ \tau} & = &
+%% \left\{\begin{array}{l}
+%% \refl{\forall a{:}\eta @.@ \phi} \text{ when } \lifting{a\mapsto\gamma}{\tau} = \refl{\phi} \\
+%% \forall a{:}\eta @.@ (\lifting{a\mapsto\gamma}{\tau}) \text{ otherwise}
+%% \end{array}\right.
+%% \end{array} \\ \\
+%% \begin{array}{llcl}
+%% & \tcase{\tcast{K\;\taus\;\gammas\;\es}{\gamma}}{p \to u} & \rightsquigarrow & \tcase{K\;\taus'\;\gammas'\;\es'}{p \to u} \\
+%% & & & \hspace{-4pt}\begin{array}{lll}
+%% \text{when } \\ %% & \cval(K\;\taus\;\phis\;\es) \\
+%% & \wfco \gamma : T\;\taus \psim T\;\taus' \\
+%% & K{:}\forall\ol{a{:}\eta} @.@ \ol{\sigma_1 \psim \sigma_2} \to \sigmas \to T\;\as \in \Gamma_0 \\
+%% & e_i' = \tcast{e_i}{\lifting{\as \mapsto \deltas}{\sigma_i[\phis/\cs]}} \\
+%% & \delta_j = \nth{j}{\gamma} \\
+%% & \gamma_j' = \ldots
+%% \end{array}
+%% \end{array}
+%% \end{array} \]
+%% \caption{Coercion pushing in simplification}\label{fig:opsem}
+%% \end{figure*}
+
+
+% --------------------------------------------------------------------------
+\section{The problem with large coercions}\label{ssect:large}
+
+System FC terms arise as the result of elaboration of source language
+terms, through type inference. Type inference typically
+relies on a \emph{constraint solver} \cite{pjv:modular}
+which produces System FC witnesses of
+equality (coercions), that in turn decorate the elaborated term. The constraint solver is
+not typically concerned with producing small or readable witnesses;
+indeed GHC's constraint solver can produce large and complex coercions.
+These complex coercions can make the
+elaborated term practically impossible to understand and debug.
+
+Moreover, GHC's optimiser transforms well-typed FC terms.
+Insofar as these transformations involve coercions, the coercions \emph{themselves}
+may need to be transformed. If you think of the coercions as little proofs that
+fragments of the program are well-typed, then the optimiser must maintain the proofs
+as it transforms the terms.
+
+\subsection{How big coercions arise}
+
+The trouble is that \emph{term-level optimisation tends to make
+coercions bigger}. The full details of these transformations are given in the so called {\em push}
+rules in our previous work~\cite{weirich+:fc2}, but we illustrate them here with an example.
+Consider this term:
+$$
+ (\tcast{\lambda x.e}{\gamma})\, a
+$$
+where
+$$
+\begin{array}{rcl}
+ \gamma & : & (\sigma_1 \rightarrow \tau_1) \psim (\sigma_2 \rightarrow \tau_2) \\
+ a & : & \sigma_2
+\end{array}
+$$
+We would like to perform the beta reduction, but the cast is getting in
+the way. No matter! We can transform thus:
+$$\begin{array}{ll}
+ & (\tcast{\lambda x.e}{\gamma})\, a \\
+= & \tcast{((\lambda x.e)\, (\tcast{a}{\sym{(\nth{0}{\gamma})}}))}{\nth{1}{\gamma}}
+\end{array}
+$$
+From the coercion $\gamma$ we have derived two coercions whose syntactic form
+is larger, but whose types are smaller:
+$$
+\begin{array}{rcl}
+ \gamma & : & (\sigma_1 \rightarrow \tau_1) \psim (\sigma_2 \rightarrow \tau_2) \\
+\sym{(\nth{0}{\gamma})} & : & \sigma_2 \psim \sigma_1 \\
+\nth{1}{\gamma} & : & \tau_1 \psim \tau_2
+\end{array}
+$$
+Here we make use of the coercion combinators $sym$, which reverses the sense of
+the proof; and $nth\,i$, which from a proof of $T\,\overline{\sigma} \psim T \, \overline{\tau}$
+gives a proof of $\sigma_i \psim \tau_i$. Finally, we use the derived coercions to
+cast the argument and result of the function separately. Now the lambda is
+applied directly to an argument (without a cast in the way), so
+$\beta$-reduction can proceed as desired.
+Since $\beta$-reduction is absolutely
+crucial to the optimiser, this ability to ``push coercions out of the way'' is
+fundamental. Without it, the optimiser is hopelessly compromised.
+
+A similar situation arises with @case@ expressions:
+$$@case@\,(\tcast{K\,e_1}{\gamma})\,@of@\,\{\ldots;\,K\,x \rightarrow e_2; \ldots \}$$
+where $K$ is a data constructor.
+Here we want to simplify the @case@ expression, by picking the correct alternative
+$K\,x \rightarrow e_2$, and substituting $e_1$ for $x$. Again the coercion gets in the way, but
+again it is possible to push the coercion out of way.
+
+\subsection{How coercions can be simplified}
+
+Our plan is to simplify complicated coercion terms into simpler ones, using rewriting.
+Here are some obvious rewrites we might think of immediately:
+$$
+\begin{array}{rcll}
+\sym{(\sym{\gamma})} & \rsa{} & \gamma \\
+\gamma ; \sym{\gamma} & \rsa{} & \refl{\tau} & \text{if}\,\gamma : \tau \psim \phi
+\end{array}
+$$
+But ther are much more complicated rewrites to consider.
+Consider these coercions, where $C_N$ is the axiom generated by the newtype coercion in
+Section~\ref{sec:newtype}:
+$$
+\begin{array}{rcl}
+\gamma_1 & : & \tau_1 \psim \tau_2 \\
+\gamma_2 = \sym{(C_N\,\refl{\tau_1})} & : & (\tau_1 \rightarrow @Int@) \psim (N\,\tau_1) \\
+\gamma_3 = N\,\refl{\gamma_1} & : & (N\,\tau_1) \psim (N\,\tau_2) \\
+\gamma_4 = C_N\,\refl{\tau_2} & : & (N\,\tau_2) \psim (\tau_2 \rightarrow @Int@) \\
+\\
+\gamma_5 = \gamma_2 ; \gamma_3 ; \gamma_4 & : & (\tau_1 \rightarrow @Int@) \psim (\tau_2 \rightarrow @Int@)
+\end{array}
+$$
+Here $\gamma_2$ takes a function, and wraps it in the newtype; then $\gamma_3$ coerces that newtype from
+$N\,\tau_1$ to $N\,\tau_2$; and $\gamma_4$ unwraps the newtype.
+Composing the three gives a rather large, complicated
+coercion $\gamma_2 ; \gamma_3 ; \gamma_4$. \emph{But its type
+is pretty simple}, and indeed the coercion $\gamma_1 \to \refl{@Int@}$ is a much simpler
+witness of the same equality. The rewrite system we present shortly will rewrite
+the former to the latter.
+
+Finally, here is an actual example taken from a real program compiled by GHC
+(don't look at the details!):
+$$
+\begin{array}{ll}
+& @Mut@\, \refl{v}\, (\sym{(C_{StateT} \, \refl{s})})\, \refl{a} \\
+& ; \sym{(\nth{0}{(\inst{\inst{\inst{(\forall
+ w % :*\rightarrow *
+ t % :*
+ b % : *
+.\,
+ @Mut@\, \refl{w}\, (\sym{(C_{StateT}\, \refl{t})})\, \refl{b}
+ \rightarrow \refl{@ST@\, t\,(w\, b)})}{v}}{s}}{a})}} \\
+\rsa{} & \refl{@Mut@\, v\,s\,a}
+\end{array}
+$$
+As you can see, the shrinkage in coercion size can be dramatic.
+
+%% \dv{Simon will update some of the examples here. Not sure if we should
+%% include the push figure then if we can explain it with a couple of
+%% examples. The figure will introduce lifting etc and maybe that is a
+%% distraction. However Simon, note that we refer to this example from
+%% a later section and describe how exactly it was optimized, so probably
+%% you do not want to eliminate it entirely, but just add more examples?}
+
+
+\section{Coercion simplification}\label{s:normalization}
+\newcommand{\G}{{\cal G}}
+
+We now proceed to the details of our coercion simplification algorithm. We note that the design of the algorithm
+is guided by empirical evidence of its effectiveness on actual programs and that other choices might be possible.
+Nevertheless, we formally study the properties of this algorithm, namely we will show that it preserves validity
+of coercions and terminates -- even when the rewrite system induced by the axioms is not strongly normalizing.
+
+\subsection{Simplification rules}\label{ssect:rules}
+
+Coercion simplification is given as a non-deterministic relation in Figure~\ref{fig:optimization1} and Figure~\ref{fig:optimization2}
+In these two figures we use some syntactic conventions: Namely, for sequences of coercions $\gammas_1$ and $\gammas_2$,
+we write $\ol{\gamma_1;\gamma_2}$ for the sequence of pointwise transitive compositions and $\sym{\gammas_1}$ for pointwise
+application of symmetry. We write $nontriv(\gamma)$ iff $\gamma$ {\em contains} some variable $c$ or axiom application $C\;\gammas$.
+
+\begin{figure}\small
+\[\begin{array}{l}
+\text{Coercion evaluation contexts} \quad\quad \G ::= \Box \mid \G\;\gamma \mid \gamma\;\G \mid C\;\gammas_1\G\gammas_2 \mid \sym{\G} \mid \forall a{:}\eta @.@ \G \mid \inst{\G}{\tau} \mid \G;\gamma \mid \gamma;\G \\ \\
+\prooftree
+ \begin{array}{c}
+ \gamma \approx \G[\gamma_1] \text{ modulo associativity of } ({;}) \quad
+ \Delta \wfco \gamma_1 : \sigma \psim \phi \quad \Delta \vdash \gamma_1 \rsa{} \gamma_2 \vspace{2pt}
+ \end{array}
+ \minusv{CoEval}
+ \gamma \longrightarrow \G[\gamma_2]
+\endprooftree \\ \\
+\ruleform{\Delta \vdash \gamma_1 \rsa{} \gamma_2} \\ \\
+\begin{array}{llcl}
+\multicolumn{4}{l}{\text{Reflexivity rules}} \\
+\rulename{ReflApp} & \Delta \vdash \refl{\phi_1}\;\refl{\phi_2} & \rightsquigarrow & \refl{\phi_1\;\phi_2} \\
+\rulename{ReflAll} & \Delta \vdash \forall a{:}\eta @.@ \refl{\phi} & \rightsquigarrow & \refl{\forall a{:}\eta @.@ \phi} \\
+\rulename{ReflElimL} & \Delta \vdash \refl{\phi};\gamma & \rsa{} & \gamma \\
+\rulename{ReflElimR} & \Delta \vdash \gamma;\refl{\phi} & \rsa{} & \gamma \\ \phantom{\Delta}
+\end{array} \\
+\begin{array}{llcl}
+\multicolumn{4}{l}{\text{Eta rules}} \\
+\rulename{EtaAllL} & \Delta \vdash \inst{((\forall a{:}\eta @.@ \gamma_1);\gamma_2)}{\phi} & \rsa{} & \gamma_1[\phi/a];(\inst{\gamma_2}{\phi}) \\
+\rulename{EtaAllR} & \Delta \vdash \inst{(\gamma_1;(\forall a{:}\eta @.@ \gamma_2))}{\phi} & \rsa{} & \inst{\gamma_1}{\phi};\gamma_2[\phi/a] \\
+\rulename{EtaNthL} & \Delta \vdash \nth{k}{(\refl{H\;\taus^{1..\ell}}\;\gammas;\gamma)} & \rsa{} & \left\{\begin{array}{ll} \nth{k}{\gamma} & \text{ if } k \leq \ell \\
+ \gamma_{k-\ell};\nth{k}{\gamma} & \text{ otherwise }
+
+ \end{array}\right. \\
+\rulename{EtaNthR} & \Delta \vdash \nth{k}{(\gamma;\refl{H\;\taus^{1..\ell}}\;\gammas)} & \rsa{} & \left\{\begin{array}{ll}
+ \nth{k}{\gamma} & \text{ if } k \leq \ell \\
+ \nth{k}{\gamma};\gamma_{k-\ell} & \text{ otherwise }
+ \end{array}\right.
+\end{array} \\
+\begin{array}{llcl}
+\multicolumn{4}{l}{\text{Symmetry rules}} \\
+\rulename{SymRefl} & \Delta \vdash \sym{\refl{\phi}} & \rightsquigarrow & \refl{\phi} \\
+\rulename{SymAll} & \Delta \vdash \sym{(\forall a{:}\eta @.@ \gamma)} & \rightsquigarrow & \forall a{:}\eta @.@ \sym{\gamma} \\
+\rulename{SymApp} & \Delta \vdash \sym{(\gamma_1\;\gamma_2)} & \rightsquigarrow & (\sym{\gamma_1})\;(\sym{\gamma_2}) \\
+\rulename{SymTrans} & \Delta \vdash \sym{(\gamma_1;\gamma_2)} & \rightsquigarrow & (\sym{\gamma_2}){;}(\sym{\gamma_1}) \\
+\rulename{SymSym} & \Delta \vdash \sym{(\sym{\gamma})} & \rightsquigarrow & \gamma
+\end{array} \\ \\
+\begin{array}{llcl}
+\multicolumn{4}{l}{\text{Reduction rules}} \\
+\rulename{RedNth} & \Delta \vdash \nth{k}{(\refl{H\;\taus^{1..\ell}}\;\gammas)} & \rightsquigarrow & \left\{\begin{array}{ll} \refl{\tau_k} & \text{ if }k \leq \ell \\
+ \gamma_{k-\ell} & \text{ otherwise }
+ \end{array}\right. \\
+\rulename{RedInstCo} & \Delta \vdash \inst{(\forall a{:}\eta @.@ \gamma)}{\phi} & \rsa{} & \gamma[\phi/a] \\
+\rulename{RedInstTy} & \Delta \vdash \inst{\refl{\forall a{:}\eta @.@ \tau}}{\phi} & \rsa{} & \refl{\tau[\phi/a]}
+\end{array} \\ \\
+\begin{array}{llcll}
+\multicolumn{4}{l}{\text{Push transitivity rules }} \\
+\rulename{PushApp} & \Delta \vdash (\gamma_1\;\gamma_2);(\gamma_3\;\gamma_4) & \rsa{} & (\gamma_1;\gamma_3)\;(\gamma_2;\gamma_4) \\
+\rulename{PushAll} & \Delta \vdash (\forall a{:}\eta @.@ \gamma_1); (\forall a{:}\eta @.@ \gamma_2) & \rsa{} & \forall a{:}\eta @.@ \gamma_1;\gamma_2 \\
+\rulename{PushInst}& \Delta \vdash (\inst{\gamma_1}{\tau});(\inst{\gamma_2}{\tau}) & \rsa{} & \inst{(\gamma_1;\gamma_2)}{\tau}
+ & \text{ when } \Delta \wfco \gamma_1;\gamma_2 : \sigma_1 \psim \sigma_2 \\
+\rulename{PushNth} & \Delta \vdash (\nth{k}{\gamma_1});(\nth{k}{\gamma_2}) & \rsa{} & \nth{k}{(\gamma_1;\gamma_2)}
+ & \text{ when } \Delta \wfco \gamma_1;\gamma_2 : \sigma_1 \psim \sigma_2
+\end{array}
+\end{array}\]\caption{Coercion simplification (I)}\label{fig:optimization1}
+\end{figure}
+
+We define coercion evaluation contexts, $\G$, as coercion terms with holes inside them. The syntax of $\G$ allows us to rewrite anywhere
+inside a coercion. The main coercion evaluation rule is \rulename{CoEval}. If we are given a coercion $\gamma$, we first decompose it to some
+evaluation context $\G$ with $\gamma_1$ in its hole. Rule \rulename{CoEval} works up to associativity of transitive composition;
+% -- for example, the compiler may use a flat list of coercions
+%that are transitively composed to each other instead of the binary composition operator $(;)$.
+for example, we will
+allow the term $(\gamma_1;\gamma_2;);\gamma_3$ to be written as $\G[\gamma_2;\gamma_3]$ where $\G = \gamma_1;\Box$. This treatment of
+transitivity is extremely convenient, but we must be careful to ensure that our argument for termination
+remains robust under associativity (Section~\ref{ssect:termination}). Once we
+have figured out a decomposition $\G[\gamma_1]$, \rulename{CoEval} performs
+a single step of rewriting $\Delta \vdash \gamma_1 \rsa{} \gamma_2$ and simply return $\G[\gamma_2]$.
+Since we are allowed to rewrite coercions under a type environment ($\forall a{:}\eta @.@ \G$ is a valid coercion
+evaluation context), $\Delta$ (somewhat informally) enumerates the type variables bound by $\G$. For instance we
+should be allowed to rewrite $\forall a{:}\eta @.@ \gamma_1$ to $\forall a{:}\eta @.@ \gamma_2$. This can happen
+if $(a{:}\eta) |- \gamma_1 \rsa{} \gamma_2$. The precondition $\Delta \wfco \gamma_1 : \sigma \psim \phi$ of rule
+\rulename{CoEval} ensures that this context corresponds to the decomposition of $\gamma$ into a context and $\gamma_1$.
+Moreover, the $\Delta$ is passed on to the $\rsa{}$ relation, since some of the rules of the $\rsa{}$ relation that we will present
+later may have to consult the context $\Delta$ to establish preconditions for rewriting.
+
+The soundness property for the $\longrightarrow$ relation is given by the following theorem.
+\begin{theorem}[Coercion subject reduction]\label{thm:sr-theorem}
+If $\wfco \gamma_1 : \sigma \psim \phi$ and $\gamma_1 \longrightarrow \gamma_2$ then $\wfco \gamma_2 : \sigma \psim \phi$.
+\end{theorem}
+The rewriting judgement $\Delta \vdash \gamma_1 \rsa{} \gamma_2$ satisfies a similar property.
+\begin{lemma}\label{lem:sr-lemma}
+If $\Delta \wfco \gamma_1 : \sigma \psim \phi$ and $\Delta \vdash \gamma_1 \rsa{} \gamma_2$ then $\Delta \wfco \gamma_2 : \sigma \psim \phi$.
+\end{lemma}
+
+To explain coercion simplification, we now present the reaction rules
+for the $\rsa{}$ relation, organized in several groups.
+
+\subsubsection{Pulling reflexivity up}
+Rules \rulename{ReflApp}, \rulename{ReflAll}, \rulename{ReflElimL}, and \rulename{ReflElimR}, deal with
+uses of reflexivity. Rules \rulename{ReflApp} and \rulename{ReflAll} ``swallow'' constructors from the
+coercion language (coercion application, and quantification respectively) into the type language
+(type application, and quantification respectively). Hence they pull reflexivity as high as
+possible in the tree structure of a coercion term. Rules \rulename{ReflElimL} and \rulename{ReflElimR}
+simply eliminate reflexivity uses that are composed with other coercions.
+
+\subsubsection{Pushing symmetry down}
+Uses of symmetry, contrary to reflexivity, are pushed as close to the leaves as possible or eliminated,
+(rules \rulename{SymRefl}, \rulename{SymAll}, \rulename{SymApp}, \rulename{SymTrans}, and \rulename{SymSym})
+only getting stuck at terms of the form
+$\sym{x}$ and $\sym{(C\;\gammas)}$.
+The idea is that by pushing uses of symmetry towards the leaves,
+the rest of the rules may completely ignore symmetry, except where
+symmetry-pushing gets stuck (variables or axiom applications).
+
+\subsubsection{Reducing coercions}
+Rules \rulename{RedNth}, \rulename{RedInstCo}, and \rulename{RedInstTy} comprise the first interesting group of rules.
+They eliminate uses of injectivity and instantiation. Rule \rulename{RedNth} is concerned with the case where
+we wish to decompose a coercion of type $H\;\phis \psim H\;\sigmas$, where the coercion term contains $H$ in its head.
+Notice that $H$ is a type and may already be applied to some type arguments $\taus^{1..\ell}$, and hence the rule
+has to account for selection from the first $\ell$ arguments, or a later argument. Rule \rulename{RedInstCo} deals
+with instantiation of a polymorphic coercion with a type. Notice that in rule \rulename{RedInstCo} the quantified variable
+may only appear ``protected'' under some $\refl{\sigma}$ inside $\gamma$, and hence simply substituting $\gamma[\phi/a]$ is
+guaranteed to produce a syntactically well-formed coercion. Rule \rulename{RedInstTy} deals with the instantiation of a
+polymorphic coercion that is {\em just} a type.
+
+\subsubsection{Eta expanding and subsequent reducing}
+Redexes of \rulename{RedNth} and \rulename{RedInstCo} or \rulename{RedInstTy} may not be directly visible.
+Consider $\nth{k}{(\refl{H\;\taus^{1..\ell}}\;\gammas;\gamma)}$. The use of transitivity stands in our way for the
+firing of rule \rulename{RedNth}. To the rescue, rules \rulename{EtaAllL}, \rulename{EtaAllR}, \rulename{EtaNthL},
+and \rulename{EtaNthR}, push decomposition or instantiation through transitivity and eliminate such redexes.
+We call these rules ``eta'' because in effect we are $\eta$-expanding and immediately reducing one of the components of the transitive composition.
+Here is a decomposition of \rulename{EtaAllL} in smaller steps that involve an $\eta$-expansion (of $\gamma_2$ in the second line):
+\[\begin{array}{ll}
+ & \inst{((\forall a{:}\eta @.@ \gamma_1);\gamma_2)}{\phi} \\
+ \rsa{} & \inst{((\forall a{:}\eta @.@ \gamma_1);(\forall a{:}\eta @.@ \inst{\gamma_2}{a}))}{\phi} \\
+ \rsa{} & \inst{(\forall a{:}\eta @.@ \gamma_1;\inst{\gamma_2}{a})}{\phi} \;\;\rsa{}\;\; \gamma_1[\phi/a] ; \inst{\gamma_2}{\phi}
+\end{array}\]
+We have merged these steps in a single rule to facilitate the proof of
+termination. In doing this, we do not lose any reactions, since all of the intermediate terms can also reduce to the final coercion.
+
+There are many design possibilities for rules that look like our $\eta$-rules. For instance one may wonder why
+we are not always expanding terms of the form $\gamma_1;(\forall a{:}\eta @.@ \gamma_2)$ to $\forall a{:}\eta @.@ \inst{\gamma_1}{a} ; \gamma_2$,
+whenever $\gamma_1$ is of type $\forall a{:}\eta @.@ \tau \psim \forall a{:}\eta @.@ \phi$. We experimented with several variations like this, but we found
+that such expansions either complicated the termination argument, or did not result in smaller coercion terms. Our rules in
+effect perform $\eta$-expansion {\em only} when there is a firing reduction directly after the expansion.
+
+%% Finally, one may wonder if we are missing a rule that reduces $\inst{(\gamma_1;(\forall a{:}\eta @.@ \gamma_2);\gamma_3)}{\phi}$ (and similarly for $\mathrel{nth}$).
+%% However, if $\gamma_1$ or $\gamma_3$ are $\forall$-coercions then the ``push rules'' (to be described next) will recover this reduction. If not, then
+%% we are not gaining anything if we push the instantiation inwards to get: $\inst{\gamma_1}{\phi}; \gamma_2[\phi/a]; \inst{\gamma_3}{\phi}$. In fact, the
+%% resulting term is bigger than the one we started with!
+
+\subsubsection{Pushing transitivity down}
+Rules \rulename{PushApp}, \rulename{PushAll}, \rulename{PushNth}, and \rulename{PushInst} push uses of transitivity
+{\em down} the structure of a coercion term, towards the leaves. These rules aim to reveal more redexes
+at the leaves, that will be reduced by the next (and final) set of rules. Notice that rules \rulename{PushInst} and \rulename{PushNth}
+impose side conditions on the transitive composition $\gamma_1;\gamma_2$. Without these conditions, the resulting coercion may not be well-formed.
+Take $\gamma_1 = \forall a{:}\eta @.@ \refl{T\;a\;a}$ and $\gamma_2 = \forall a{:}\eta @.@ \refl{T\;a\;@Int@}$. It is
+certainly the case that $(\inst{\gamma_1}{@Int@});(\inst{\gamma_2}{@Int@})$ is well formed. However, $\wfco \gamma_1 : \forall a{:}\eta @.@ T\;a\;a \psim \forall a{:}\eta @.@ T\;a\;a$
+and $\wfco \gamma_2 : \forall a{:}\eta @.@ T\;a\;@Int@ \psim \forall a{:}\eta @.@ T\;a\;@Int@$, and hence $\inst{(\gamma_1;\gamma_2)}{@Int@}$ is not well-formed. A similar argument
+applies to rule \rulename{PushNth}.
+
+\begin{figure}[t]\small
+\[\begin{array}{c}
+%% \text{Leaf rules} \\ \\
+\prooftree
+ \Delta \wfco c : \tau \psim \upsilon
+ \minusv{VarSym}
+ \Delta \vdash c ; \sym{c} \rsa{} \refl{\tau}
+ \twiddleiv
+ \Delta \wfco c : \tau \psim \upsilon
+ \minusv{SymVar}
+ \Delta \vdash \sym{c} ; c \rsa{} \refl{\upsilon}
+ \twiddlev
+ (C\, \ol{(a{:}\eta)} : \tau \psim \upsilon) \in \Gamma \quad \as \subseteq ftv(\upsilon)
+ \minusv{AxSym}
+ \begin{array}{l}
+ \Delta \vdash C\;\gammas_1;\sym{(C\;\gammas_2)} \rsa{} \\
+ \qquad\quad \lifting{\as \mapsto \ol{\gamma_1;\sym{\gamma_2}}}{\tau}
+ \end{array}
+ \twiddleiv
+ (C\,\ol{(a{:}\eta)} : \tau \psim \upsilon) \in \Gamma \quad \as \subseteq ftv(\tau)
+ \minusv{SymAx}
+ \begin{array}{l}
+ \Delta \vdash \sym{(C\;\gammas_1)};C\;\gammas_2 \rsa{} \\
+ \qquad\quad \lifting{\as \mapsto \ol{\sym{\gamma_1};\gamma_2}}{\upsilon}
+ \end{array}
+ \twiddlev
+ \begin{array}{c}
+ (C\,\ol{(a{:}\eta)} : \tau \psim \upsilon) \in \Gamma \\ \as \subseteq ftv(\upsilon) \quad
+ nontriv(\delta) \\ \delta = \lifting{\as \mapsto\gammas_2}{\upsilon}
+ \end{array}
+ \minusv{AxSuckR}
+ \Delta \vdash (C\;\gammas_1) ; \delta \rsa{} C\;\ol{\gamma_1{;}\gamma_2}
+ \twiddleiv
+ \begin{array}{c}
+ (C \ol{(a{:}\eta)} : \tau \psim \upsilon) \in \Gamma \\ \as \subseteq ftv(\tau) \quad
+ nontriv(\delta) \\ \delta = \lifting{\as \mapsto\gammas_1}{\tau} \vspace{2pt}
+ \end{array}
+ \minusv{AxSuckL}
+ \Delta \vdash \delta ; (C\;\gammas_2) \rsa{} C\;\ol{\gamma_1{;}\gamma_2}
+ \twiddlev
+ \begin{array}{c}
+ (C\, \ol{(a{:}\eta)} : \tau \psim \upsilon) \in \Gamma \quad \as \subseteq ftv(\tau) \\
+ nontriv(\delta) \quad \delta = \lifting{\as \mapsto\gammas_2}{\tau} \vspace{2pt}
+ \end{array}
+ \minusv{SymAxSuckR}
+ \Delta \vdash \sym{(C\;\gammas_1)} ; \delta \rsa{} \sym{(C\;\ol{\sym{\gamma_2}{;}\gamma_1})}
+ \twiddlev
+ \begin{array}{c}
+ (C\,\ol{(a{:}\eta)} : \tau \psim \upsilon) \in \Gamma \quad \as \subseteq ftv(\upsilon) \\
+ nontriv(\delta) \quad \delta = \lifting{\as \mapsto\gammas_1}{\upsilon}
+ \end{array}
+ \minusv{SymAxSuckL}
+ \Delta \vdash \delta ; \sym{(C\;\gammas_2)} \rsa{} \sym{(C\;\ol{\gamma_2{;}\sym{\gamma_1}})}
+\endprooftree
+\end{array}\]\caption{Coercion simplification (II)}\label{fig:optimization2}
+\end{figure}
+
+\subsubsection{Leaf reactions}
+When transitivity and symmetry have been pushed as low as possible, new redexes may appear, for which we introduce
+rules \rulename{VarSym}, \rulename{SymVar}, \rulename{AxSym}, \rulename{SymAx}, \rulename{AxSuckR}, \rulename{AxSuckL},
+\rulename{SymAxSuckR}, \rulename{SymAxSuckL}. (Figure~\ref{fig:optimization2})
+\begin{itemize*}
+ \item Rules \rulename{VarSym} and \rulename{SymVar} are entirely straightforward: a coercion variable (or its symmetric coercion) meets its
+symmetric coercion (or the variable) and the result is the identity.
+
+ \item Rules \rulename{AxSym} and \rulename{SymAx} are more involved. Assume that the axiom $(C\;(\ol{a{:}\eta}) {:} \tau \psim \upsilon) \in \Gamma$, and a
+ well-formed coercion of the form: $C\;\gammas_1; \sym{(C\;\gammas_2)}$. Moreover $\Delta \wfco \gammas_1 : \sigmas_1 \psim \phis_1$ and $\Delta \wfco \gammas_2 : \sigmas_2 \psim \phis_2$.
+Then we know that $\Delta \wfco C\;\gammas_1; \sym{(C\;\gammas_2)} : \tau[\sigmas_1/\as] \psim \tau[\sigmas_2/\as]$. Since the composition is
+well-formed, it must be the case that $\upsilon[\phis_1/\as] = \upsilon[\phis_2/\as]$. If $\as \subseteq ftv(\upsilon)$ then it must be $\phis_1 = \phis_2$. Hence,
+the pointwise composition $\ol{\gamma_1;\sym{\gamma_2}}$ is well-formed and of type $\sigmas_1 \psim \sigmas_2$. Consequently, we may replace the original coercion
+with the {\em lifting} of $\tau$ over a substitution that maps $\as$ to $\ol{\gamma_1;\sym{\gamma_2}}$: $\lifting{\as \mapsto \ol{\gamma_1;\sym{\gamma_2}}}{\tau}$.
+
+What is this lifting operation, of a substitution from type variables to coercions, over a type?
+Its result is a new coercion, and the definition of the operation is given in Figure~\ref{fig:lifting}.
+The easiest way to understand it is by its effect on a type:
+\begin{lemma}[Lifting]
+If $\Delta,(a{:}\eta) \wfty \tau : \eta$ and
+$\Delta \wfco \gamma : \sigma \sim \phi$ such that $\Delta \wfty \sigma : \eta$ and $\Delta \wfty \phi : \eta$,
+then $\Delta \wfco \lifting{a \mapsto \gamma}{\tau} : \tau[\sigma/a] \psim \tau[\phi/a]$
+\end{lemma}
+Notice that we have made sure that lifting pulls reflexivity as high as possible in the syntax tree -- the only
+significance of this on-the-fly normalization was that it appeared to simplify the argument we have given for termination of
+coercion normalization.
+
+\begin{figure}\small
+\[\begin{array}{l}
+\ruleform{\lifting{a\mapsto\gamma}{\tau} = \gamma'} \\ \\
+\begin{array}{lcl}
+\lifting{a \mapsto \gamma}{a} & = & \gamma \\
+\lifting{a \mapsto \gamma}{b} & = & \refl{b} \\
+\lifting{a \mapsto \gamma}{H} & = & \refl{H} \\
+\lifting{a \mapsto \gamma}{F} & = & \refl{F} \\
+\lifting{a \mapsto \gamma}{\tau_1\;\tau_2} & = &
+ \left\{\begin{array}{l}
+ \refl{\phi_1\;\phi_2} \text{ when } \lifting{a\mapsto\gamma}{\tau_i} = \refl{\phi_i} \\
+ (\lifting{a \mapsto \gamma}{\tau_1})\;(\lifting{a\mapsto\gamma}{\tau_2}) \text{ otherwise }
+ \end{array}\right. \\
+\lifting{a\mapsto\gamma}{\forall b{:}\eta @.@ \tau} & = &
+ \left\{\begin{array}{l}
+ \refl{\forall a{:}\eta @.@ \phi} \text{ when } \lifting{a\mapsto\gamma}{\tau} = \refl{\phi} \\
+ \forall b{:}\eta @.@ (\lifting{a\mapsto\gamma}{\tau}) \text{ otherwise} \quad (b \notin ftv(\gamma), b \neq a)
+ \end{array}\right.
+\end{array}
+%% \begin{array}{llcl}
+%% & \tcase{\tcast{K\;\taus\;\gammas\;\es}{\gamma}}{p \to u} & \rightsquigarrow & \tcase{K\;\taus'\;\gammas'\;\es'}{p \to u} \\
+%% & & & \hspace{-4pt}\begin{array}{lll}
+%% \text{when } \\ %% & \cval(K\;\taus\;\phis\;\es) \\
+%% & \wfco \gamma : T\;\taus \psim T\;\taus' \\
+%% & K{:}\forall\ol{a{:}\eta} @.@ \ol{\sigma_1 \psim \sigma_2} \to \sigmas \to T\;\as \in \Gamma \\
+%% & e_i' = \tcast{e_i}{\lifting{\as \mapsto \deltas}{\sigma_i[\phis/\cs]}} \\
+%% & \delta_j = \nth{j}{\gamma} \\
+%% & \gamma_j' = \ldots
+%% \end{array}
+%% \end{array}
+\end{array} \]
+\caption{Lifting}\label{fig:lifting}
+\end{figure}
+
+Returning to rules \rulename{AxSym} and \rulename{SymAx}, we stress that the side condition is essential for the rule to be sound. Consider the following example:
+\[ C (a{:}\star): F\;[a] \psim @Int@ \in \Gamma \]
+Then $(C\;\refl{@Int@});\sym{(C\;\refl{@Bool@})}$ is well-formed and of
+type $F\;[@Int@] \psim F\;[@Bool@]$, but $\refl{F}\;(\refl{@Int@};\sym{\refl{@Bool@}})$ is not well-formed!
+Rule \rulename{SymAx} is symmetric and has a similar soundness side condition on the free variables of $\tau$ this time.
+
+ \item The rest of the rules deal with the case when an axiom meets a lifted type -- the reaction swallows the lifted type
+ inside the axiom application. For instance, here is rule \rulename{AxSuckR}:
+ \[\small\prooftree
+ \begin{array}{c}
+ (C\;(\ol{a{:}\eta}) {:} \tau \psim \upsilon) \in \Gamma \quad \as \subseteq ftv(\upsilon) \\
+ nontriv(\delta) \quad \delta = \lifting{\as \mapsto\gammas_2}{\upsilon} \vspace{2pt}
+ \end{array}
+ \minusv{AxSuckR}
+ \Delta \vdash (C\;\gammas_1) ; \delta \rsa{} C\;\ol{\gamma_1{;}\gamma_2}
+ \endprooftree\]
+ This time let us assume that $\Delta \wfco \gammas_1 : \sigmas_1 \psim \phis_1$. Consequently
+ $\Delta \wfco C\;\gammas_1 : \tau[\sigmas_1/\as] \psim \upsilon[\phis_1/\as]$. Since $\as \subseteq ftv(\upsilon)$ it
+ must be that $\Delta \wfco \gammas_2 : \phis_1 \psim \phis_3$ for some $\phis_3$ and we can
+ pointwise compose $\ol{\gamma_1{;}\gamma_2}$ to get coercions between $\sigmas_1 \psim \phis_3$.
+ The resulting coercion $C\;\ol{\gamma_1{;}\gamma_2}$ is well-formed and of type $\tau[\sigmas_1/\as] \psim \upsilon[\phis_3/\as]$.
+ Rules \rulename{AxSuckL}, \rulename{SymAxSuckL}, and \rulename{SymAxSuckR} involve a similar reasoning.
+
+ The side condition $nontriv(\delta)$ is not restrictive in any way -- it merely requires that $\delta$ contains some variable
+ $c$ or axiom application. If not, then $\delta$ can be converted to reflexivity:
+ \begin{lemma}\label{lem:coherence}
+ If $\wfco \delta : \sigma{\psim}\phi$ and $\lnot nontriv(\delta)$, then $\delta {\longrightarrow^{*}} \refl{\phi}$.
+ \end{lemma}
+ Reflexivity, when transitively composed with any other coercion, is eliminable via \rulename{ReflElimL/R} or and consequently the side condition is not preventing any
+ reactions from firing. It will, however, be useful in the simplification termination proof in Section~\ref{ssect:termination}.
+\end{itemize*}
+
+The purpose of rules \rulename{AxSuckL/R} and \rulename{SymAxSuckL/R} is to eliminate intermediate coercions in a
+big transitive composition chain, to give the opportunity to an axiom to meet its symmetric version and react
+with rules \rulename{AxSym} and \rulename{SymAx}. In fact this rule is {\em precisely} what we need for the
+impressive simplifications from Section~\ref{ssect:large}. Consider that example again:
+\[\begin{array}{lrll}
+ \gamma_5 & = & \gamma_2;\gamma_3;\gamma_4 \\
+ & = & \sym{(C_N\;\refl{\tau_1})};(\refl{N}\;\gamma_1);(C_N\;\refl{\tau_2}) & (\rulename{AxSucL} \text{ with } \delta := (\refl{N}\;\gamma_1)) \\
+ & \longrightarrow & \sym{(C_N\;\refl{\tau_1})};(C_N\;(\gamma_1;\refl{\tau_2})) & (\rulename{ReflElimR} \text{ with } \gamma := \gamma_1, \phi := \tau_2) \\
+ & \longrightarrow & \sym{(C_N\;\refl{\tau_1})};(C_N\;\gamma_1) & (\rulename{SymAx}) \\
+ & \longrightarrow & \refl{\to}\;(\refl{\tau_1};\gamma_1)\;\refl{@Int@} & (\rulename{ReflElimL} \text{ with } \phi := \tau_1,\gamma := \gamma_1) \\
+ & \longrightarrow & \refl{\to}\;\gamma_1\;\refl{@Int@}
+\end{array}\]
+
+Notably, rules \rulename{AxSuckL/R} and \rulename{SymAxSuckL/R} generate
+axiom applications of the form $C\;\gammas$ (with a coercion as argument).
+In our previous papers, the syntax of axiom applications was $C\;\taus$, with \emph{types}
+as arugments. But we need the additional generality to allow coercions rewriting to
+proceed without getting stuck.
+
+%% which we now give in mathematical notation. The
+%% relevant axioms are:
+%% \[\begin{array}{lll}
+%% C_n : \forall a{:}\star \to \star @.@ N\;a \psim \forall xy @.@ a\;x \to a\;y & \in & \Gamma \\
+%% C_f : F\;() \psim Maybe & \in & \Gamma
+%% \end{array}\]
+%% The coercion term is:
+%% \[ \nth{2}{(
+%% \inst{(\inst{(\sym{C_n\;\refl{Maybe}};\refl{N}\;(\sym{C_f});C_n\;\refl{F\;()})}{x_{a}})}{y_{a}})} \]
+
+%% Its simplification is given in Figure~\ref{fig:optimization-example}.
+%% Notably, rules \rulename{AxSuckL/R} and \rulename{SymAxSuckL/R} rely
+%% on axiom applications be of the form $C\;\gammas$ instead of the simpler
+%% $C\;\taus$ found in previous FC papers.
+
+%% \begin{figure*}\small
+%% \[\begin{array}{ll}
+%% \nth{2}{(
+%% \inst{(\inst{(\sym{C_n\;\refl{Maybe}};\highlight{\refl{N}\;(\sym{C_f});C_n\;\refl{F\;()}})}{x_{a}})}{y_{a}})} \vspace{3pt} \\
+%% (\rulename{AxSuckL}) \rsa{} \vspace{3pt}\\
+%% \nth{2}{(
+%% \inst{(\inst{\highlight{(\sym{C_n\;\refl{Maybe}};C_n\;((\sym{C_f});\refl{F\;()}))}}{x_{a}})}{y_{a}})} \vspace{3pt} \\
+%% (\rulename{SymAx}) \rsa{} \vspace{3pt}\\
+%% \nth{2}{(
+%% \inst{(\inst{(\forall xy. (\sym{\refl{Maybe}};\sym{C_f};\refl{F\;()})\;\refl{x}\;\refl{\to}\;
+%% (\sym{\refl{Maybe}};\sym{C_f};\refl{F\;()})\;\refl{y})}{x_{a}})}{y_{a}})} \vspace{3pt}\\
+%% (\rulename{ReflElimL},\rulename{ReflElimR},\rulename{SymRefl}) \rsa{}^{*} \vspace{3pt}\\
+%% \nth{2}{(
+%% \inst{(\inst{(\forall x y @.@ (\sym{C_f})\;\refl{x}\;\refl{\to}\;
+%% (\sym{C_f})\;\refl{y})}{x_{a}})}{y_{a}})} \vspace{3pt}\\
+%% (\rulename{RedInstCo}) \rsa{}^{*} \quad
+%% \nth{2}{((\sym{C_f})\;\refl{x_a}\;\refl{\to}\;(\sym{C_f})\;\refl{y_a})} \quad
+%% (\rulename{RedNth}) \rsa{} \quad
+%% (\sym{C_f})\;\refl{y_a}
+%% \end{array}\]
+%% \caption{Simplification example}\label{fig:optimization-example}
+%% \end{figure*}
+
+\section{Coercion simplification in GHC}\label{ssect:ghc}
+
+To assess the usefulness of coercion simplification we added it to GHC.
+For Haskell programs that make no use of GADTs or type families, the
+effect will be precisely zero, so we took measurements on two bodies of code.
+First, our regression suite of 151 tests for GADTs and type families; these are
+all very small programs. Second, the @Data.Accelerate@ library that we know makes use
+of type families \cite{chakravarty+:accelerate}. This library consists
+of 18 modules, containing 8144 lines of code.
+
+We compiled each of these programs with and without coercion simplification,
+and measured the percentage reduction in size of the coercion terms with
+simplification enabled. This table shows the minimum, maximum, and
+aggregate reduction, taken over the 151 tests and 18 modules respectively.
+The ``aggregate reduction'' is obtained by combining all the programs
+in the group (testsuite or @Accelerate@) into one giant ``program'', and computing
+the reduction in coercion size.
+$$
+\begin{array}{rrr}
+& \text{Testsuite} & \text{Accelerate} \\
+\text{Minimum} & -97\% & -81\%\\
+\text{Maximum} & +14\% & 0\% \\
+\text{\bf Aggregate} & {\bf -58\% } & {\bf -69\%}
+\end{array}
+$$
+There is a substantial aggregate decrease of 58\% in the testsuite
+and 69\% in @Accelerate@, with a massive 97\% decrease
+in special cases. These special cases should not be taken lightly:
+in one program the types and coercions taken together were five times
+bigger than the term they decorated; after simplification they were ``only''
+twice as big. The coercion simplifier makes the compiler less vulnerable to
+falling off a cliff.
+
+Only one program showed an increase in coercion size, of 14\%, which turned out to be the
+effect of this rewrite:
+$$ \sym{(C ; D)} \quad \longrightarrow \quad (\sym{D});(\sym{C}) $$
+Smaller coercion terms make the
+compiler faster, but the normalization algorithm itself consumes some time.
+However, the effect on compile time is barely measurable (less than 1\%), and we
+do not present detailed figures.
+
+Of course none of this would matter if coercions were always tiny, so that they
+took very little space in the first place. And indeed that is often the case.
+But for programs that make heavy use of type functions, un-optimised coercions
+can dominate compile time. For example, the @Accelerate@ library makes
+heavy use of type functions. The time and memory consumption of compiling
+all 21 modules of the library are as follows:
+$$
+\begin{array}{lrrr}
+ & \text{Compile time} & \text{Memory allocated} & \text{Max residency} \\
+\text{With coercion optimisation} & 68s & 31\, Gbyte & 153\, Mbyte \\
+\text{Without coercion optimisation} & 291s & 51\, Gbyte & 2,000\, Mbyte
+\end{array}
+$$
+As you can see, the practical effects can be extreme; the cliff is very real.
+
+\section{Termination and confluence}\label{ssect:termination}
+\newcommand{\ps}{\ol{p}}
+
+We have demonstrated the effectiveness of the algorithm in practice, but we must also
+establish termination. This is important, since it would not be acceptable
+for a compiler to loop while simplifying a coercion, no matter what axioms are declared by users.
+Since the rules fire non-deterministically, and some of the rules (such as \rulename{RedInstCo} or \rulename{AxSym})
+create potentially larger coercion trees, termination is not obvious.
+
+\subsection{Termination}
+
+\begin{figure*}\small
+\[\begin{array}{c}
+ \begin{array}{lcl}
+ \multicolumn{3}{l}{\text{Axiom polynomial}} \\
+ p(\sym{\gamma}) & = & p(\gamma) \\
+ p(C\;\gammas) & = & z \cdot \Sigma p(\gamma_i) + z + 1 \\
+ p(c) & = & 1 \\
+ p(\gamma_1;\gamma_2) & = & p(\gamma_1) + p(\gamma_2) + p(\gamma_1)\cdot p(\gamma_2) \\
+ p(\refl{\phi}) & = & 0 \\
+ p(\nth{k}{\gamma}) & = & p(\gamma) \\
+ p(\inst{\gamma}{\phi}) & = & p(\gamma) \\
+ p(\gamma_1\;\gamma_2) & = & p(\gamma_1) + p(\gamma_2) \\
+ p(\forall a{:}\eta @.@ \gamma) & = & p(\gamma)
+ \end{array}
+ \begin{array}{lcl}
+ \multicolumn{3}{l}{\text{Coercion weight}} \\
+ w(\sym{\gamma}) & = & w(\gamma) \\
+ w(C\;\gammas) & = & \Sigma w(\gamma_i) + 1 \\
+ w(c) & = & 1 \\
+ w(\gamma_1;\gamma_2) & = & 1 + w(\gamma_1) + w(\gamma_2) \\
+ w(\refl{\phi}) & = & 1 \\
+ w(\nth{k}{\gamma}) & = & 1 + w(\gamma) \\
+ w(\inst{\gamma}{\phi}) & = & 1 + w(\gamma) \\
+ w(\gamma_1\;\gamma_2) & = & 1 + w(\gamma_1) + w(\gamma_2) \\
+ w(\forall a{:}\eta @.@ \gamma) & = & 1 + w(\gamma)
+ \end{array} \\
+\begin{array}{lcl}
+ \multicolumn{3}{l}{\text{Symmetry weight}} \\
+ sw(\sym{\gamma}) & = & w(\gamma) + sw(\gamma) \\
+ sw(C\;\gammas) & = & \Sigma sw(\gamma_i) \\
+ sw(c) & = & 0 \\
+ sw(\gamma_1;\gamma_2) & = & sw(\gamma_1) + sw(\gamma_2) \\
+ sw(\refl{\phi}) & = & 0 \\
+ sw(\nth{k}{\gamma}) & = & sw(\gamma) \\
+ sw(\inst{\gamma}{\phi}) & = & sw(\gamma) \\
+ sw(\gamma_1\;\gamma_2) & = & sw(\gamma_1) + sw(\gamma_2) \\
+ sw(\forall a{:}\eta @.@ \gamma) & = & sw(\gamma)
+ \end{array}
+\end{array}\]\caption{Metrics on coercion terms}\label{fig:metrics}
+\end{figure*}
+
+To formalize a termination argument, we introduce several definitions in Figure~\ref{fig:metrics}.
+The {\em axiom polynomial} of a coercion over a distinguished variable $z$, $p(\cdot)$, returns a polynomial with natural number coefficients that can be compared
+to any other polynomial over $z$. The {\em coercion weight} of a coercion is defined as the function $w(\cdot)$ and
+the {\em symmetry weight} of a coercion is defined with the function $sw(\cdot)$ in
+ Figure~\ref{fig:metrics}. Unlike the polynomial and coercion weights of a coercion,
+ $sw(\cdot)$ does take symmetry into account.
+Finally, we will also use the {\em number of coercion applications and coercion $\forall$-introductions}, denoted with $intros(\cdot)$ in what follows.
+
+Our termination argument comprises of the lexicographic left-to-right ordering of:
+\[ \mu(\cdot) = \langle p(\cdot),w(\cdot),intros(\cdot),sw(\cdot)\rangle \]
+We will show that each of the $\rsa{}$ reductions reduces this tuple.
+For this to be a valid termination argument for $(\longrightarrow)$ we need two more facts about
+{\em each} component measure, namely that (i)~$(=)$ and $(<)$ are preserved under arbitrary contexts,
+and (ii)~each component is invariant with respect to the associativity of $(;)$.
+
+\begin{lemma} If $\Delta \wfco \gamma_1 : \tau \psim \sigma$ and $\gamma_1 \approx \gamma_2$ modulo associativity
+of $(;)$, then $p(\gamma_1) = p(\gamma_2)$, $w(\gamma_1) = w(\gamma_2)$, $intros(\gamma_1) = intros(\gamma_2)$, and $sw(\gamma_1) = sw(\gamma_2)$.
+\end{lemma}
+\vspace{-10pt}\begin{proof} This is a simple inductive argument, the only interesting case is the case for
+$p(\cdot)$ where the reader can calculate that $p(\gamma_1;(\gamma_2;\gamma_3)) = p((\gamma_1;\gamma_2);\gamma_3)$ and by induction we are done.
+\end{proof}
+
+\begin{lemma} If $\Gamma,\Delta \wfco \gamma_i : \tau \psim \sigma$ (for $i=1,2$) and $p(\gamma_1) < p(\gamma_2)$ then
+$p(\G[\gamma_1]) < p(\G[\gamma_2])$ for any $\G$ with $\Gamma \wfco \G[\gamma_i] : \phi \psim \phi'$.
+Similarly if we replace $(<)$ with $(=)$.
+\end{lemma}
+\vspace{-10pt}\begin{proof} By induction on the shape of $\G$. The only interesting case is the transitivity case
+again. Let $\G = \gamma ; \G'$. Then $p(\gamma;\G'[\gamma_1]) = p(\gamma) + p(\G'[\gamma_1]) + p(\gamma)\cdot p(\G'[\gamma_1])$ whereas
+$p(\gamma;\G'[\gamma_2]) = p(\gamma) + p(\G'[\gamma_2]) + p(\gamma)\cdot p(\G'[\gamma_2])$. Now, either $p(\gamma) = 0$, in which case we are done
+by induction hypothesis for $\G'[\gamma_1]$ and $\G'[\gamma_2]$, or $p(\gamma) \neq 0$ in which case again induction
+hypothesis gives us the result since we are multiplying $p(\G'[\gamma_1])$ and $p(\G'[\gamma_2])$ by the same polynomial.
+The interesting ``trick'' is that the polynomial for transitivity contains both the product
+of the components {\em and} their sum (since product alone is not preserved by contexts!).
+\end{proof}
+\begin{lemma} If $\Gamma,\Delta \wfco \gamma_i : \tau \psim \sigma$ and $w(\gamma_1) < w(\gamma_2)$ then
+$w(\G[\gamma_1]) < w(\G[\gamma_2])$ for any $\G$ with $\Gamma \wfco \G[\gamma_i] : \phi \psim \phi'$.
+Similarly if we replace $(<)$ with $(=)$.
+\end{lemma}
+
+\begin{lemma} If $\Gamma,\Delta \wfco \gamma_i : \tau \psim \sigma$ and $intros(\gamma_1) < intros(\gamma_2)$ then
+$intros(\G[\gamma_1]) < intros(\G[\gamma_2])$ for any $\G$ with $\Gamma \wfco \G[\gamma_i] : \phi \psim \phi'$.
+Similarly if we replace $(<)$ with $(=)$.
+\end{lemma}
+
+
+\begin{lemma} If $\Gamma,\Delta \wfco \gamma_i : \tau \psim \sigma$, $w(\gamma_1) \leq w(\gamma_2)$, and $sw(\gamma_1) < sw(\gamma_2)$ then
+$sw(\G[\gamma_1]) < sw(\G[\gamma_2])$ for any $\G$ with $\Gamma \wfco \G[\gamma_i] : \phi \psim \phi'$.
+\end{lemma}
+\vspace{-10pt}\begin{proof} The only interesting case is when $\G = \sym{\G'}$ and hence we have that
+$sw(\G[\gamma_1]) = sw(\sym{\G'[\gamma_1]}) = w(\G'[\gamma_1]) + sw(\G'[\gamma_1])$.
+Similarly $sw(\G[\gamma_2]) = w(\G'[\gamma_2]) + sw(\G'[\gamma_2])$. By the precondition for the weights and induction
+hypothesis we are done. The precondition on the weights is not restrictive, since
+$w(\cdot)$ has higher precedence than $sw(\cdot)$ inside $\mu(\cdot)$.
+\end{proof}
+
+The conclusion is the following theorem.
+\begin{theorem}
+If $\gamma \approx \G[\gamma_1]$ modulo associativity of $(;)$ and $\Delta \wfco \gamma_1 : \sigma \psim \phi$, and
+$\Delta \vdash \gamma_1 \rsa{} \gamma_2$ such that $\mu(\gamma_2) < \mu(\gamma_1)$, it is the case that $\mu(\G[\gamma_2]) < \mu(\gamma)$.
+\end{theorem}
+
+\begin{corollary}
+$(\longrightarrow)$ terminates on well-formed coercions if each of the $\rsa{}$ transitions reduces $\mu(\cdot)$.
+\end{corollary}
+
+Note that often the term rewrite literature requires similar
+conditions (preservation under contexts and associativity), but also
+{\em stability under substitution} (e.g. see~\cite{Baader:1998:TR:280474},
+Chapter 5). In our setting, variables are essentially treated as
+constants and this is the reason that we do not rely on stability
+under substitutions. For instance the rule \rulename{ReflElimR}
+$\Delta |- \gamma;\refl{\phi} \rsa{} \gamma$ is {\em not} expressed as
+$\Delta |- c;\refl{\phi} \rsa{} c$, as would be customary in a more
+traditional term-rewrite system presentation.
+
+We finally show that indeed each of the $\rsa{}$ steps reduces $\mu(\cdot)$.
+
+\begin{theorem}[Termination]
+If $\Delta \wfco \gamma_1 : \sigma \psim \phi$ and $\Delta \vdash \gamma_1 \rsa{} \gamma_2$ then $\mu(\gamma_2) < \mu(\gamma_1)$.
+\end{theorem}
+\vspace{-10pt}\begin{proof}
+It is easy to see that the reflexivity rules, the symmetry rules, the reduction rules, and the
+$\eta$-rules preserve or reduce the polynomial component $p(\cdot)$. The same is true for the push rules
+but the proof is slightly more interesting. Let us consider \rulename{PushApp}, and let us write
+$p_i$ for $p(\gamma_i)$. We have that
+$p((\gamma_1\;\gamma_2);(\gamma_3\;\gamma_4)) = p_1 + p_2 + p_3 + p_4 + p_1p_3 + p_2p_3 + p_1p_4 + p_2p_4$. On
+the other hand $p((\gamma_1;\gamma_3)\;(\gamma_2;\gamma_4)) = p_1 + p_3 + p_1p_3 + p_2 + p_4 + p_2p_4$ which is a smaller
+or equal polynomial than the left-hand side polynomial.
+Rule \rulename{PushAll} is easier. Rules \rulename{PushInst} and \rulename{PushNth} have exactly the same polynomials on the left-hand and
+the right-hand side so they are ok. Rules \rulename{VarSym} and \rulename{SymVar} reduce $p(\cdot)$.
+The interesting bit is with rules \rulename{AxSym}, \rulename{SymAx}, and \rulename{AxSuckR/L}
+and \rulename{SymAxSuckR/L}. We will only show the cases for \rulename{AxSym} and \rulename{AxSuckR}
+as the rest of the rules involve very similar calculations:
+\begin{itemize*}
+ \item Case \rulename{SymAx}. We will use the notational convention $\ps_1$ for $p(\gammas_1)$ (a vector of polynomials)
+ and similarly $\ps_2$ for $p(\gammas_2)$. Then the left-hand side polynomial is:
+ \[\begin{array}{l}
+ (z\Sigma\ps_1{+}z{+}1) + (z\Sigma\ps_2{+}z{+}1) + \\
+ \quad\quad\quad\quad\quad\quad (z\Sigma\ps_1{+}z{+}1)\cdot(z\Sigma\ps_2{+}z{+}1) = \\
+ (z^2{+}2z)\Sigma\ps_1 + (z^2{+}2z)\Sigma\ps_2 + z^2\Sigma\ps_1\Sigma\ps_2 + (z^2{+}4z{+}3)
+ \end{array}\]
+ For the right-hand side polynomial we know that each $\gamma_{1i};\sym{\gamma_{2i}}$ will have polynomial
+ $p_{1i}+p_{2i}+p_{1i} p_{2i}$ and it cannot be repeated inside the lifted type more than a finite
+ number of times (bounded by the maximum number of occurrences of a type variable from $\as$ in
+ type $\tau$), call it $k$. Hence the right-hand side polynomial is smaller or equal to:
+ \[\begin{array}{ll}
+ k\Sigma\ps_1 + k\Sigma\ps_2 + k\Sigma(p_{1i}p_{2i}) \leq
+ k\Sigma\ps_1 + k\Sigma\ps_2 + k\Sigma\ps_1\Sigma\ps_2
+ \end{array}\]
+ But that polynomial is strictly smaller than the left-hand side polynomial, hence we are done.
+ \item Case \rulename{AxSuckR}. In this case the left-hand side polynomial is going to be greater
+ or equal to (because of reflexivity inside $\delta$ and because some of the $\as$ variables may
+ appear more than once inside $\upsilon$ it is not exactly equal to) the following:
+ \[\begin{array}{l}
+ (z\Sigma\ps_1+z+1) + \Sigma\ps_2 + (z\Sigma\ps_1+z+1)\Sigma\ps2 = \\
+ \quad\quad\quad\quad\quad z\Sigma\ps_1\Sigma\ps_2 + z\Sigma\ps_1 + z\Sigma\ps_2 + 2\Sigma\ps_2 + z + 1
+ \end{array}\]
+ On the other hand, the right-hand side polynomial is:
+$$
+ z\Sigma(p_{1i}+p_{2i}+p_{1i}p_{2i})+z+1 \, \leq \,
+ z\Sigma\ps_1+z\Sigma\ps_2+z\Sigma\ps_1\Sigma\ps_2+z+1
+$$
+ We observe that there is a difference of $2\Sigma\ps_2$, but we know that
+ $\delta$ satisfies $nontriv(\delta)$, and consequently there must exist some variable or
+ axiom application inside one of the $\gammas_2$. Therefore, $\Sigma\ps_2$ is
+ {\em non-zero} and the case is finished.
+\end{itemize*}
+It is the arbitrary copying of coercions $\gammas_1$ and $\gammas_2$ in rules \rulename{AxSym} and \rulename{SymAx}
+that prevents simpler measures that only involve summation of coercions for axioms or transitivity. Other reasonable
+measures such as the height of transitivity uses from the leaves would not be preserved from contexts,
+due to \rulename{AxSym} again.
+
+So far we've shown that all rules but the axiom rules preserve the polynomials, and the axiom rules
+reduce them. We next show that in the remaining rules, some other component reduces, lexicographically.
+Reflexivity rules reduce $w(\cdot)$. Symmetry rules preserve $w(\cdot)$ and $intros(\cdot)$
+but reduce $sw(\cdot)$. Reduction rules and $\eta$-rules reduce $w(\cdot)$.
+Rules \rulename{PushApp} and \rulename{PushAll} preserve or reduce $w(\cdot)$ but certainly
+reduce $intros(\cdot)$. Rules \rulename{PushInst} and \rulename{PushNth} reduce $w(\cdot)$.
+\end{proof}
+We conclude that $(\longrightarrow)$ terminates.
+
+\subsection{Confluence}
+
+Due to the arbitrary types of axioms and coercion variables in the context, we do not expect
+confluence to be true. Here is a short example that demonstrates the lack of
+confluence; assume we have the following in our context:
+\[\begin{array}{lcl}
+C_1 \, (a{:}\star \to \star) : F\;a \psim a & \\
+C_2 \, (a{:}\star \to \star) : G\;a \psim a &
+\end{array}\]
+Consider the coercion:
+\[ (C_1\;\refl{\sigma});\sym{(C_2\;\refl{\sigma})} \]
+of type $F\;\sigma \psim G\;\sigma$. In one reduction possibility, using
+rule \rulename{AxSuckR}, we may get
+\[ C_1\;(\sym{(C_2\;\refl{\sigma})}) \]
+In another possibility, using \rulename{SymAxSuckL}, we
+may get
+\[ \sym{(C_2\;(\sym{(C_1\;\refl{\sigma})}))} \]
+Although the two normal forms are different, it is unclear if one of them is ``better'' than the other.
+
+Despite this drawback, confluence or syntactic characterization of normal forms is, for our purposes,
+of secondary importance (if possible at all for open coercions in such an under-constrained problem!),
+since we never reduce coercions for the purpose of comparing their normal forms. That said, we acknowledge
+that experimental results may vary with respect to the actual evaluation strategy, but we do not expect
+wild variations.
+
+
+%% \begin{array}{l}
+%% \nth{2}{(
+%% \inst{(\inst{(\sym{N\;\refl{Maybe}};C\;\sym{TF};N\;(F\;()))}{x_{a}})}{y_{a}})} \\
+%% \rightsquigarrow
+
+
+
+
+%% axiom N a :: C a ~ forall xy. a x -> a y
+%% axiom TF :: F () ~ Maybe
+%% nth 2
+%% (inst
+%% (inst
+%% (trans
+%% (sym
+%% (N Maybe)
+%% )
+%% (trans (C (sym TF()))
+%% (N (F ()))
+%% ))
+%% xabL)
+%% yabM)
+%% :: Maybe yabM ~ F () yabM
+%% Here is another motivating example from GHC: \dv{Add example.}
+
+
+
+
+
+
+
+%% \section{Discussion}\label{s:discuss}
+%% \dv{Is there anything we want to write here, at all?}
+
+\section{Related and future work}\label{s:related}
+
+%% \subsection{Coercion erasure}
+%% There is a substantial volume of related work on proof erasure in the
+%% context of dependent type theory. Our method for sound, runtime, but
+%% zero-cost equality proof terms lies in the middle ground between two
+%% other general methodologies.
+
+%% \paragraph{Type-based erasure}
+%% On the one hand, Coq~\cite{coq} uses a {\em
+%% type-based} erasure process by introducing a special universe for
+%% propositions, {\em Prop}. Terms whose type lives in {\em
+%% Prop} are erased even when they are applications of functions
+%% (lemmas) to computational terms. This is sound since in Coq
+%% the computation language is also strongly normalizing. As we have seen,
+%% this is not sound in FC.
+%% \paragraph{Irrelevance-based erasure}
+%% On the other hand, {\em irrelevance-based} erasure is another
+%% methodology proposed in the context of pure type systems and type
+%% theory. In the context of Epigram, \cite{DBLP:conf/types/BradyMM03} present an erasure
+%% technique where term-level indices of inductive types can be erased
+%% even when they are deconstructed inside the body of a function, since
+%% values of the indexed inductive datatype will be simultaneously
+%% deconstructed and hence the indices are irrelevant for the
+%% computation. In the Agda language~\cite{norell:thesis} there exist plans to adopt a similar
+%% irrelevance-based erasure strategy. Other related work~\cite{mishra:erasure,abel:fossacs11}
+%% proposes erasure in the context of PTSs guided with lightweight programmer annotations.
+
+%% Finally, our approach of separating the ``computational part'' of a
+%% proof, which always has to run before we get to a zero-cost ``logical part''
+%% is reminiscent of the separation that A-normal forms introduce in refinement
+%% type systems, for instance~\cite{bengtson+:f7}. It is interesting future work to determine
+%% whether our treatment of coercions is also applicable
+%% to types and hopefully paves the way towards full-spectrum dependent types.
+%% \subsection{Coercion simplification}
+
+Traditionally, work on proof theory is concerned with proof normalization theorems, namely cut-elimination.
+Category and proof theory has studied the commutativity of diagrams in {\em monoidal
+categories}~\cite{MacLaneS:catwm}, establishing coherence theorems. In our setting Lemma~\ref{lem:coherence}
+expresses such a result: any coercion that does not include axioms or free coercion variables is equivalent
+to reflexivity. More work on proof theory is concerned with cut-elimination theorems -- in our setting
+eliminating transitivity completely is plainly impossible due to the presence of axioms.
+Recent work on {\em 2-dimensional type theory}~\cite{Licata:2012:CTT:2103656.2103697} provides an equivalence
+relation on equality proofs (and terms),
+which suffices to establish that types enjoy canonical forms. Although that work does not provide an algorithm
+for checking equivalence (this is harder to do because of actual computation embedded with isomorphisms), that
+definition shares many rules with our normalization algorithm. Finally there is a large literature in associative
+commutative rewrite systems~\cite{Dershowitz:1983:AR:1623516.1623594,Bachmair:1985:TOA:6947.6948}.
+
+To our knowledge, most programming languages literature on coercions is not concerned with coercion
+simplification but rather with inferring the placement of coercions in
+source-level programs. Some recent examples are~\cite{luo:coercions} and~\cite{Swamy:2009:TTC:1596550.1596598}.
+A comprehensive study of coercions {\em and their normalization} in programming languages
+is that of~\cite{henglein:coercions}, motivated by coercion placement in a language with
+{\em type dynamic}. Henglein's coercion language differs to ours in that (i)~coercions there
+are not symmetric, (ii)~do not involve polymorphic axiom schemes and (iii)~may have computational significance.
+Unlike us, Henglein is concerned with characterizations of minimal coercions and confluence,
+fixes an equational theory of coercions, and presents a normalization algorithm for that equational theory.
+In our case, in the absence of a denotational semantics for System FC and its coercions,
+such an axiomatization would be no more ad-hoc than the algorithm and
+hence not particularly useful: for instance we could consider adding type-directed
+equations like $\Delta \vdash \gamma \rsa{} \refl{\tau}$ when $\Delta \wfco \gamma : \tau \psim \tau$, or other equations
+that only hold in consistent or confluent axiom sets. It is certainly an
+interesting direction for future work to determine whether
+there even exists a maximal syntactic axiomatization of
+equalities between coercions with respect to some denotational semantics
+of System FC.
+
+In the space of typed intermediate languages, {\sf xMLF}\cite{Remy-Yakobowski:xmlf} is
+a calculus with coercions that capture {\em instantiation} instead of equality, and which serves
+as target for the {\sf MLF} language. Although the authors are not
+directly concerned with normalization as part of an intermediate
+language simplifier, their translation of the graph-based instantiation
+witnesses does produce {\sf xMLF} normal proofs.
+
+Finally, another future work direction would be to
+determine whether we can encode coercions as $\lambda$-terms, and derive coercion
+simplification by normalization in some suitable $\lambda$-calculus.
+
+%% \dv{Related work seems a bit thin at the moment.}
+
+\paragraph*{Acknowledgments}
+Thanks to Tom Schrijvers for early discussions
+and for contributing a first implementation. We would particularly
+like to thank Thomas Str\"{o}der for his insightful and detailed feedback
+in the run-up to submitting the final paper.
+
+\bibliographystyle{plain}
+\bibliography{fc-normalization-rta}
+
+\end{document}