% $Id: editorial.tex,v 1.6 2006/05/01 11:46:55 spitters Exp $
% EDIT Convention:
% when adding comments please use your initials followed by colon, eg: "MN:"
% when you want somebody to work on or look at a piece use his name followed by question mark "Milad?"
\documentclass{mscs}
\usepackage{url}
\usepackage{xspace}
\usepackage{amsmath, amssymb}
\usepackage{theorem} % theorem like environement
\usepackage[all]{xy} % xypic generic package
\usepackage{alltt}
\usepackage[comma]{natbib}
\bibpunct{(}{)}{;}{a}{}{,}
\let\cite=\citep
\newcommand{\weg}[1]{}
\newcommand{\todo}[1]{\textbf{ToDo}\marginpar{#1}}
\newcommand*{\TTE}{\textrm{TTE}\xspace}
\newcommand*{\martinlof}{\textrm{Martin-L\"{o}f}\xspace}
\newcommand*{\coq}{\textrm{Coq}\xspace}
\newcommand*{\role}{\textrm{r\^{o}le}\xspace}
\newcommand*{\mobius}{\textrm{M\"{o}bius}\xspace}
\newcommand*{\naive}{\textrm{na\"{i}ve}\xspace}
\newcommand*{\lub}{\sqcup}
\newcommand*{\Lub}{\bigsqcup}
\newcommand*{\sqleq}{\leq}
\newcommand*{\IN}{\ensuremath{\mathbb{N}}\xspace}
\newcommand*{\ZZ}{\ensuremath{\mathbb{Z}}\xspace}
\newcommand*{\QQ}{\ensuremath{\mathbb{Q}}\xspace}
\newcommand*{\IR}{\mathbb{R}}
\newcommand*{\CC}{\mathbb{C}}
\newcommand{\Baire}{\mathbb{B}}
\newcommand*{\appr}{<\!\!<}
\newcommand*{\sK}{{\sf K}}
\newcommand*{\nat}{{\sf nat}}
\newcommand*{\sreal}{{\sf real}}
\newcommand*{\realpcf}{\textrm{RealPCF}\xspace}
\newcommand*{\pcf}{\textrm{PCF}\xspace}
\newcommand*{\bool}{{\sf bool}}
\newcommand*{\true}{{\sf true}}
\newcommand*{\false}{{\sf false}}
\newcommand*{\leaf}{{\sf leaf}\xspace}
\newcommand*{\join}{{\sf join}\xspace}
\newcommand*{\Tree}{{\sf Tree}}
\newcommand*{\Fixp}{{\sf Fixpoint}\xspace}
\newcommand*{\NCnt}{{\sf NCnt}\xspace}
\newcommand*{\struct}{{\sf struct}\xspace}
\newcommand*{\match}{{\sf match}\xspace}
\newcommand*{\with}{{\sf with}\xspace}
\newcommand*{\send}{{\sf end}}
\newcommand*{\ind}{{\sf ind}\xspace}
\newcommand*{\Gcd}{{\sf Gcd}\xspace}
\newcommand*{\sright}{{\sf right}\xspace}
\newcommand*{\sleft}{{\sf left}\xspace}
\newcommand*{\sle}{{\sf le}\xspace}
\newcommand*{\spair}{{\sf pair}\xspace}
\newcommand*{\oftype}{{:}}
\newcommand*{\Set}{{\sf Set}\xspace}
\newcommand*{\Prop}{{\sf Prop}\xspace}
\newcommand*{\larrx}{\lambda{\rightarrow}{\times}}
%\usepackage[active]{srcltx}%To make kile work with Kdvi, should be removed
% before submitting. Please stop changing this, unless it really bothers you.
% I will remove it before submitting. (BS)
\newcommand{\Mod}{\operatorname{Mod}}
\usepackage{pb-diagram}
\newcommand*{\map}[3]{\ensuremath{#1\colon #2\longrightarrow #3}}
\newcommand*{\DIG}{\ensuremath{\Phi}\xspace}
\newcommand*{\DIGinf}{\ensuremath{\DIG^{\omega}}\xspace}
\newcommand*{\DIGinfin}{\ensuremath{\DIG^{\leq\omega}}\xspace}
\newcommand*{\Iclosed}{\ensuremath{I}\xspace}
\newcommand*{\category}{\ensuremath{\mathcal{C}}\xspace} % ambient category
\newcommand*{\functor}{\ensuremath{\mathrm{F}}\xspace} % arbitrary functor
\newcommand*{\pair}[2]{\ensuremath{\langle #1,\,#2\rangle}}
\newcommand*{\xyto}{\ar @{->}} % ---->
\newcommand*{\xywant}{\ar @{.>}} % ....>
\newcommand*{\fincoalg}[1]{\ensuremath{\nu{#1}}\xspace}
\newcommand*{\destructor}{\ensuremath{\nu\textsf{-out}}\xspace} % structure map of *final* coalgebra
\newcommand*{\coiterator}[1][]{\ensuremath{{\nu\textsf{-it}}(#1)}\xspace}
\newcommand*{\arr}{\ensuremath{\mathord{\to}}\xspace}
\newcommand*{\uniquexists}{\ensuremath{\mathpunct{\exists!}}}
\newcommand*{\textcons}{\ensuremath{\textsf{cons}}\xspace}
\newcommand*{\hd}{\ensuremath{\operatorname{\mathsf{hd}}}\xspace}
\newcommand*{\tl}{\ensuremath{\operatorname{\mathsf{tl}}}\xspace}
\newcommand*{\Sets}{\ensuremath{\textbf{\upshape Set}}\xspace} % category Sets
{\theorembodyfont{\rmfamily} \newtheorem{definition}{Definition}[section]}
\newtheorem{theorem}{Theorem}[section]
% werktitel
\title{Constructive analysis, types and exact real numbers}
% BS: Vraag: Voor wie schrijven we nu eigenlijk?
% Deelnemers van de workshop
% Algemeen wiskundig publiek?
% (volgens FW):
% theoretische informatici en wiskundigen die zekerheid willen over numerieke berekeningen
% toevoegen (HG):
% Dom.Th./Cat.Th. onderzoekers die willen zien waar hun werk nou echt toegepast kan worden.
%Topics include, but are not limited to:
% * the development of constructive analysis in type theory
% * program extraction from such developments
% * exact real number computation
% * co-inductive methods for continuous structures
% * semantics for real computations (e.g. domain theory, formal topology)
% Hier de "outline" van de editorial zoals we die afgesproken hadden:
%
% Computable Analysis / Constructive Analysis
% I ---> Semantiek
% (domein theorie, coalgebras, realizability)
% domein th+TTE+formele topology
% II ---> Bewijzen
% (typetheorie, coinductie, programma extractie[verwijst naar III]
% + const. anal (internal logic)
% III---> Berekeningen
% (representaties, algoritmen, implementaties, programma extractie)
\author[Herman Geuvers, Milad Niqui, Bas Spitters, Freek Wiedijk]{Herman Geuvers, Milad Niqui, Bas Spitters, Freek Wiedijk\\
Radboud University Nijmegen, NL}
\date{16 December 2006; Revised \today}
\begin{document}
\maketitle
\begin{abstract}
In the present paper, we will discuss various aspects of
computable/con\-struc\-tive analysis, namely semantics, proofs and
computations. We will present some of the problems and solutions of
exact real arithmetic varying from concrete implementations,
representation and algorithms to various models for real
computation. We then put these models in a uniform framework using
realisability, opening the door for the use of type theoretic and
coalgebraic constructions both in computing and reasoning about
these computations. We will indicate that it is often natural to use
constructive logic to reason about these computations.
\end{abstract}
\section{Introduction}
Computing with real numbers is usually done via floating point
approximations; it is well-known that the build-up of the rounding off
that is inherent in these computations can lead to catastrophic
errors~\cite{kramer.97}. As a first attempt to prevent this problem
one may use interval arithmetic~\cite{kearfott.96}. A different
approach to computing with real numbers is \emph{exact real
arithmetic} which provides a precision-driven approach to
computation with real numbers~\cite{yap.95}. Exact real arithmetic is
motivated by the need for unbounded precision in numerical
calculations. Real numbers are infinite objects of which arbitrary
good \emph{finite approximations} can be given. A computable function
over the reals is given by an algorithm that given the desired
accuracy of the output, asks for a sufficiently good approximation of
the input to be able to compute the result. Domain theory provides a
systematic approach to interval computations and exact real
arithmetic, using the higher order features of modern programming
languages. A related, but slightly more concrete approach is
Weihrauch's Type Two theory of Effectivity (\TTE). In \TTE one
considers (Turing machine) computations on streams. Yet another
approach (Markov's) is to use the function type $\IN\to\IN$, which is
present in functional languages, to work directly with
Cauchy sequences.
We will illustrate the spectrum between floating point computation and
exact real arithmetic with a small example. Exact real arithmetic has found its
main applications when one wants to answer precise mathematical questions
by means of computation, and therefore we will use an example from
mathematics.
Define a sequence of real numbers by iterating the \emph{logistic
map}:
$$x_0 = 0.5\enspace,\; x_{n + 1} = 3.999 \times x_n (1 - x_n)\enspace.$$
(Note that the number $3.999$ should not be taken to be an
approximation of some number from the real world, but should exactly
have this value.) Now we want to determine a good approximation of
the 10000th element in this sequence, $x_{10000}$.\label{10000} There
are four ways to proceed with this, of increasing sophistication:
\begin{enumerate}
\item First we can use floating point arithmetic, using the IEEE 754
numbers implemented \cite{IEEE:1985:ISBa} in the
floating point unit of our computer. For instance, we might run the
following small C program.
\begin{quote}
\small
\def\{{\char`\{}
\def\}{\char`\}}
\def\\{\char`\\}
\begin{alltt}
main() \{
int n; double x = 0.5;
for (n = 0; n < 10000; n++)
x = 3.999 * x * (1 - x);
printf("%f\\n", x);
\}
\end{alltt}
\end{quote}
This will then give the output {{0.780738}}. Now this number is
totally unrelated to the correct answer, which (rounded to 6
decimals) is $0.354494$. The sequence that the program calculates,
because of rounding errors and the chaotic nature of the logistic
map, will after the first few terms become essentially unrelated to
the actual values of the sequence. This also becomes very clear if
one runs the same program on Intel hardware which does not exactly
follow the IEEE 754 standard. In that case, the program will print
{{0.999336}}.
Note that interval arithmetic implemented using floating point
numbers for the bounds does not help here. In that case the result
of the program will be the interval $[0,0.99975]$. While
mathematically correct, this is not very informative.
\item The second approach to this problem, which \emph{will} give the
correct answer, is to use the methods of numerical analysis. One
still calculates using floating point numbers, but with a greater
precision. This is the method that the Maple computer algebra
package uses. In the case of this example it turns out that
calculating using $10^{4^2}$ digits will give the correct answer for
$x_{10^4}$.
Note that with this approach the numerical analyst is the one that
will need to determine the necessary precision, this will not be
automatically done by the computer. For this specific problem
determining this precision might not be very difficult, but for more
involved problems it might easily become the time bottleneck in
obtaining the answer (instead of the computation time by the
computer.) Also, if this numerical analyst makes a mistake in his
error estimates without this being noticed, then the answer will be
incorrect without this being noticed. Therefore the correctness of
the answer will not only depend on the correctness of the
calculation software, but also on the correctness of the way that
one poses the question.
\item The third approach for this problem is to have the computer keep
track of the errors when running the calculation, and then have it
rerun the program using a larger precision as long as the precision
of the output is not good enough. That way the computer instead of
the human will be the one to determine what precision will be
needed.
This approach can both be implemented using interval arithmetic
(using bounds with sufficiently large precision), as well as using
sufficiently precise floating point numbers together with an error
estimate. It will be clear that both methods are essentially the
same.
However, with this approach we will have various intervals in our
program that are related in the sense that they correspond to the
same exact real number, but without this correspondence necessarily
being reflected in the organisation of the program. This makes
programming using this method more difficult than necessary.
\item The fourth approach is similar to the previous one, but this
time one uses a functional (`higher order') data-type. Instead of
using the (`first order') data-type of intervals one then uses
functions that map desired precisions to intervals. That way the
intervals that correspond to the same real number, but with
different precisions, all become part of one data object in the
program.
Note that when using this approach it is important to \emph{cache}
the intervals that all these functions calculate. Otherwise the
same interval might be recalculated many times, leading to a very
bad complexity.
\end{enumerate}
When looking at these four approaches, it will be clear that the last
three all calculate the correct answer, and that they all use a
similar amount of running time. This means that the increase of
sophistication between the various methods that we have described does
not correspond to a more efficient way of obtaining the answer to the
problem. Instead it is primarily an improvement in
\emph{correctness}: the ease of getting a correct answer, and the ease
of establishing that this answer is indeed correct.
Several approaches to exact real arithmetic have been implemented, as
described in Section~\ref{sec:implementations} below. An interesting
question is what are the applications of these programs. As in the
real world all data is inherently imprecise, it can be disputed
whether for real-world applications exact real arithmetic will be an
essential improvement over floating point computation. However, for
mathematics its usefulness seems quite clear.
We come back to the question of correctness of algorithms
for
real arithmetic. In some modern systematic approaches to program
correctness one uses a realisability interpretation to get a precise
and tight connection between proofs and programs. It turns out that
the same can be done here. Most `higher order' approaches, such as Domains, TTE
and Markov's CRM which we will discuss later, can be unified in a realisability
framework. This means that there is a clear notion of an
internal logic to reason about such computations. As usual when
reasoning about computations, this internal logic is constructive. We
will expand on this in section~\ref{sec:realizability}.
It should be noted that in the transition from floats to a language
for exact real arithmetic with data types there is the usual friction
between craft and technology: should these issues be treated carefully
on an individual basis, or do we use the apparatus of, say, domain theory? A
similar tension exists for proofs: do we treat them individually, or
do we use the technology of category theory, realisability and
constructive mathematics?
The paper is organised as follows. We will focus on three important
aspects of computing with real number: computations, semantics and
proofs. Section~\ref{sec:computations} discusses representations and
implementations. Section~\ref{sec:semantics} discusses domain theory,
Markov's recursive analysis, Type Two Effectivity, coalgebras and
realisability. Section~\ref{sec:proofs} contains type theory, program
extraction and constructive analysis. We finish with a short
conclusion.
\section{Computations}\label{sec:computations}
\subsection{Representations} \label{subsec:representations}
% Milad?
% MN: done
While in theoretical models of computation real numbers can be
considered as anything between a subset of the rationals and an object
in a category, when it comes to practical computations, we must have a
representation of real numbers (or of approximations to real numbers)
that is easily understood by humans and computers. Usually this boils
down to representing real numbers with decimals or bits; even though
the intermediate steps can use other representations, the syntax for
input and output real numbers (or approximations of real numbers)
should not be far from the standard representations used in practice.
This brings up a serious problem: it is well-known that the standard
decimal representation is not suitable for real computations. When
multiplying the stream $x=0.333\ldots$ (considered as an infinite
input) by $3$, the multiplication algorithm cannot give the first
digit of the output: there is no way of deciding whether eventually
the digit $2$ may come (and then the first digit should be $0$) or
eventually a $4$ may come (and then the first digit should be $1$).
Therefore, with the standard decimal representation, the deadlock is
inevitable in calculating the outcome of multiplication, while
multiplication is universally considered to be a computable function.
This implies that the standard decimal representation is not
computationally suitable. This shortcoming of the decimal
representation was already known to Brouwer, who
in~\cite{Brouwer:decimal} showed --- by means of a so-called weak
counter example --- that there are real numbers with no standard
decimal representation. In modern terms one might state this result as
there is no computable map from, say, the Cauchy representation to the
decimal representation of the real numbers. Or as we will express it
in Section~\ref{subsec:tte}, the decimal representation is not
admissible. As another consequence of the example above we
see that the real numbers do not allow an effective way to compare
real numbers, since the problem above arises precisely because we
can not decide whether $x<\frac13$ or $x\geq\frac13$. Similarly, one
does not have an effective equality test.
% This should not be a footnote. It's essential later on.
With the advent of computers, and partly due to this theoretical
shortcoming, partly in order to have a more efficient and
hardware-compatible internal representation, other representations for
real numbers were considered. Some of these non-standard
representations were already known for centuries and others were
discovered and further developed by computer scientists in the course
of the 20th century. Knuth~\cite[\S~4]{knuth.97} gives a thorough
historical account of various representations for different number
systems (integers, rationals and real numbers). While we do not intend
to mention all the different representation systems we focus on some
of the main ideas that are theoretically and practically important.
Furthermore, while a study of different representations for integers
and rational numbers is relevant for approximative computations with
real numbers, we
focus on the various approaches for representing real numbers since this is our
main interest in this paper.
Even though there are plenty of different representations for real
numbers, in the broader context of computable analysis they can be
seen as an instance of one of the few basic approaches. For example
many representations that are used as basis of exact arithmetic
implementations are based on the Cauchy sequences. It is fine-tuning
of the details (such as modulus of convergence, or the representations
for rational numbers) that makes the difference between such
implementations.
Here we mention the three main classes of representations. Related
(but different) classifications can be found
in~\cite{weihrauch.87,weihrauch.00,gowland.01}:
\begin{enumerate}
\item \textbf{Cauchy Sequences.} Cauchy sequences are traditionally
the way that real numbers are represented in mathematics. In this
approach real numbers are represented by Cauchy sequences of
rational numbers (or some other dense countable Archimedean subset
of the real numbers such as the dyadic numbers). The real number
described by this sequence is the limit under the usual Euclidean
metric. The most general case is the \emph{\naive Cauchy
representation} in which there is no modulus of convergence
required. Although this is quite inefficient, its theoretical
importance and its suitability for formalisation has made this
representation to be the basis of the first full implementation of
constructive real numbers in a proof assistant and have been used in
formalising the proof of the fundamental theorem of
algebra~\cite{milad.02}. Other constructive formalisations which
exist in the literature usually use a modulus of
convergence~\cite{troelstra.88,bishop.85,weihrauch.87}.
An important variation on the theme of Cauchy sequences are the so
called \emph{functional representations}. In this approach real
numbers are represented by functions on some fixed countable set,
where the codomain of the function (the elements of the sequence)
need not be rational numbers. The semantics of such a function is
always (in some way) a Cauchy sequence, but the choice of a
different domain or codomain can improve the representation. An
example of such representation was proposed by Boehm et
al.~\cite{boehm.86} where \ZZ is used for both domain and codomain
of the function representing a real number. Implicit semantic
assumptions make sure that these sequences (which are not indexed by
\IN, rather by \ZZ) can be mapped to Cauchy sequences with a
predetermined modulus of convergence. This approach forms the basis
of several exact arithmetic packages, especially inside functional
programming languages.
To give a specific example of this representation, if we represent
the number $x_{10000} = 0.35449383309125298131\dots$ which we
defined on page \pageref{10000}, then in a decimal variant on
Boehm's functional
representation, three examples of possible representations for this number are\label{3544}
$$
\begin{array}{rcl}
&\vdots& \\
-1 &\mapsto& 0 \\
0 &\mapsto& 0 \\
1 &\mapsto& 3 \\
2 &\mapsto& 35 \\
3 &\mapsto& 354 \\
4 &\mapsto& 3544 \\
&\vdots&
\end{array}
\qquad
\begin{array}{rcl}
&\vdots& \\
-1 &\mapsto& 0 \\
0 &\mapsto& 0 \\
1 &\mapsto& 4 \\
2 &\mapsto& 35 \\
3 &\mapsto& 354 \\
4 &\mapsto& 3545 \\
&\vdots&
\end{array}
\qquad
\begin{array}{rcl}
&\vdots& \\
-1 &\mapsto& 1 \\
0 &\mapsto& 1 \\
1 &\mapsto& 4 \\
2 &\mapsto& 36 \\
3 &\mapsto& 355 \\
4 &\mapsto& 3545 \\
&\vdots&
\end{array}
$$
and of course there are uncountably many more.
These essentially are all maps that for each $n$ gives $n$ digits after
the decimal point (either rounded up or rounded down).
\item \textbf{Dedekind cuts.} Dedekind cuts are an alternative
approach to representing real numbers which is based on the least
upper bound property of the reals rather than the Cauchy
completeness. A key feature of Dedekind cuts, as compared to other
representations, is their uniqueness: any real number is represented
by precisely one cut. This feature, which is convenient to reason
about cuts, makes it difficult to compute with them. In
computational approaches to Dedekind cuts a set of rational numbers
with additional computational structure is used to represent a real
number which is the least upper bound (or greatest lower bound) of
that subset; see~\cite[p.~95]{weihrauch.00} for details. Variations
on this class of representations include choosing the characteristic
function of a chosen dense subset of the reals such as dyadic
numbers~\cite{weihrauch.87}.
Such representations have not been used for practical
implementations but have been considered for reasoning about real
numbers in mechanised reasoning. Again (as was the case with the
\naive Cauchy representation) this is due to the theoretical
importance of these representations and their adaptability for use
in formal mathematics. Examples of use of such representations
include the formalisation of real numbers in the HOL theorem
prover~\cite{harrison.94} and the formalisation of the real numbers
that is used in the formalisation of the 4-colour theorem in the
\coq proof assistant~\cite{gonthier.05}.
\item \textbf{Streams of nested intervals.} The most standard way of
representing real numbers is the decimal representation. This is a
positional representation that falls under the more general case of
radix representations in which a real number is represented by a
stream of digits. In the base $b$ radix representation ($b$-ary
representation) starting from the first digit and moving to right,
the effect of each digit can be seen as refining the interval
containing the real number represented by the whole stream. Thus the
$b$-ary representation can be seen as an instance of representing
real numbers with a stream of shrinking nested intervals. Any such
stream for which the diameter of the intervals converges to $0$
represents a real number: the one that inhabits the infinite
intersection of the intervals.
If we return to our example, in a stream representation the number
$x_{10000}$ could be represented by the infinite stream of intervals:
$$
\begin{array}{c}
\vdots \\
{}[-10,10]\phantom{-} \\
{}[-1,1]\phantom{-} \\
{}[0.3,0.5] \\
{}[0.34,0.36] \\
{}[0.353,0.355] \\
{}[0.3544,0.3546] \\
\vdots
\end{array}
$$
These intervals correspond to the second functional representation
on page \pageref{3544}. The other representations there also
correspond to a (different) stream of intervals.
By considering each interval as the image of the previous interval
under a continuous real function one can encode the whole nested
collection as an infinite composition of real maps applied to a base
interval. This can be made formal using the following
definition~\cite[\S~5.3]{milad.04b}.
\begin{definition}[Generalised Digit Set] \label{def:gen_digit_set}
Let \Iclosed be a closed subinterval of the compactification of
the real numbers $[-\infty,+\infty]$. A set \DIG of continuous
functions on \Iclosed is a \emph{generalised digit set for
interval \Iclosed} if there exists a total and surjective map
$\map {\rho} {\DIGinf} \Iclosed$ (note that \DIGinf denotes the
set of streams of \DIG) such that for all streams
$f_0f_1\dots\in\DIGinf$ we have
\begin{displaymath}
\{\rho(f_0f_1\dots)\}=\bigcap\limits_{i=0}^{\infty} f_0\circ\dots\circ f_i(\Iclosed)\enspace.
\end{displaymath}
In other words, if each element $x$ of \Iclosed is the solitary
element of some infinite composition of elements of \DIG and each
infinite composition of elements of \DIG is a singleton. We call
each element of \DIG a \emph{digit}.
\end{definition}
The various representations of this family are characterised by the
different restrictions that are put on the choice of the digits. In
practice usually a finite set of \mobius maps satisfying some property
is chosen as the set of digits while in literature the larger class of
$d$-contractions is also studied~\cite{konecny.00}. \emph{\mobius
maps} are maps of the form
\begin{displaymath}
x\longmapsto \frac{ax+b}{cx+d}\enspace,
\end{displaymath}
where $a,b,c,d\in\CC$ and $ad-bc\neq 0$. In the context of stream
representation for real numbers the \mobius maps with integer
coefficients --- also known as \emph{linear fractional
transformations} (LFT) or \emph{homographic maps}--- are considered.
Taking the \mobius map with integer coefficients to be
\Iclosed-refining (i.e. mapping the closed interval \Iclosed to
itself) forms the basis of Edalat and Potts' approach to lazy exact
arithmetic~\cite{edalat.97,potts.98,edalat.99b}. Restricting this
further by taking $c=0$ one arrives at representations by a finite set
of \emph{affine maps}, a subclass which includes the standard $b$-ary
representations.
As stated the standard representation is not computationally suitable
but the shortcoming is easily fixed by using one of the variants of
radix representation. Different `fixes' include changing the base to a
non-integer base (eg. the golden ratio base~\cite{digianantonio.96})
or increasing the set of digits by introducing negative digits (eg.
redundant $b$-ary representation~\cite{edalat.97}). Both these
workarounds have been used long before the advent of computers. For
example the introduction of negative digits is traced back to the
number system implicit in the work of the 11th century mathematician
Tabari and even used in mechanical computing devices in the
19th century. A detailed historical survey can be found in
~\cite[pp.~205--208]{knuth.97}.
Our example number $x_{10000}$, when using a representation with
negative digits, has many different representations. Next to its
normal decimal representation $$0.35449383309125298131\dots$$ it can
for instance also be written as
$$0.3545(-1)38331(-1)1253(-1)8131\dots$$
It is also possible to use an infinite set of digits. The most
important example where the set of digits is infinite is a
representation based on continued fractions which we consider as a
separate class of representations.
\item\textbf{Continued fractions} For centuries continued fractions
have been used to represent rational and real numbers and elementary
functions~\cite{brezinski91}. As a result some implementations of
exact real arithmetic are based on continued fraction
representations. Some of such representation can be considered as a
subclass of stream representations, but the most standard continued
fraction representation (the so called \IN-fraction) uses finite
lists to represent rational numbers and streams for representing
irrational numbers. The digits of the \IN-fraction representation
can be considered to be the \mobius maps of the form
\begin{displaymath}
x \longmapsto n+\frac{1}{x}\qquad n\in\IN\enspace.
\end{displaymath}
Taking finite lists and streams of such digits leads to a
representation for real numbers larger than $1$ that can be
considered for exact arithmetic~\cite{vuillemin.90,menissier.94}.
One can allow for negative integers (and disallow $0$,$1$ and $-1$)
obtaining the \ZZ-fractions~\cite{vuillemin.90,menissier.94}. In the
\ZZ-fractions representation too, the rational numbers are
represented by finite lists.
The above (\IN-fraction and \ZZ-fraction) continued fraction
representations serve well in the context of exact \emph{rational}
arithmetic; however for representing real numbers they suffer from
the same computational shortcoming that exist for standard decimal
representation. In order to overcome this some modifications of
these representations have been studied and used for implementing
exact arithmetic~\cite{vuillemin.90,lester.01}
There are several ways to obtain a representation based on continued
fractions which uses infinite streams for rational and irrational
numbers alike and therefore fall under stream representations. One
way is to use the redundant Euclidean continued
fractions~\cite{vuillemin.90} which can be seen as an instance of
\mobius maps~\cite[\S~7.3]{potts.98}. Another way would be to use
the Stern--Brocot representation, again expressible in terms of
\mobius maps~\cite[\S~5.7.1]{milad.04b}.
One can extend the notion of generalised digit set to include maps
$$\map {\rho} {\DIGinfin} \Iclosed$$ where \DIGinfin is the set of
finite lists and (infinite) streams of elements of \DIG to obtain a
class of representations that includes the continued fraction
representations. But since such representations are only considered
in the context of continued fractions we preferred to classify them
under a separate class.
The example number $x_{10000}$ is a rational number, of which the
\IN-fraction starts:
$$\displaystyle{\displaystyle 1\over\displaystyle 2 + \!\!{\rlap{
$\displaystyle{\displaystyle 1\over\displaystyle 1 + \!\!{\rlap{
$\displaystyle{\displaystyle 1\over\displaystyle 4 + \!\!{\rlap{
$\displaystyle{\displaystyle 1\over\displaystyle 1 + \!\!{\rlap{
$\displaystyle{\displaystyle 1\over\displaystyle 1 + \!\!{\rlap{
$\displaystyle{\displaystyle 1\over\displaystyle 2 + \!\!{\rlap{
$\displaystyle{\displaystyle 1\over\displaystyle 2 + \!\!{\rlap{
$\displaystyle{\displaystyle 1\over\displaystyle 7 + \!\!{\rlap{
$\displaystyle{\displaystyle 1\over\displaystyle 1 + \!\!{\rlap{
$\displaystyle{\displaystyle 1\over\displaystyle 1 + {\rlap{$\;\dots\phantom{{\displaystyle 1\over\displaystyle 0}}$}}}$
}}\;}$
}}\;}$
}}\;}$
}}\;}$
}}\;}$
}}\;}$
}}\;}$
}}\;}$
}}\;}\hspace{20em}$$
The \ZZ-fraction of this number starts:
$$\displaystyle{\displaystyle 1\over\displaystyle 3 + \!\!{\rlap{
$\displaystyle{\displaystyle 1\over\displaystyle -6 + \!\!{\rlap{
$\displaystyle{\displaystyle 1\over\displaystyle 2 + \!\!{\rlap{
$\displaystyle{\displaystyle 1\over\displaystyle 2 + \!\!{\rlap{
$\displaystyle{\displaystyle 1\over\displaystyle 2 + \!\!{\rlap{
$\displaystyle{\displaystyle 1\over\displaystyle 8 + \!\!{\rlap{
$\displaystyle{\displaystyle 1\over\displaystyle -2 + \!\!{\rlap{
$\displaystyle{\displaystyle 1\over\displaystyle -7 + \!\!{\rlap{
$\displaystyle{\displaystyle 1\over\displaystyle -3 + \!\!{\rlap{
$\displaystyle{\displaystyle 1\over\displaystyle 16 + {\rlap{$\;\dots\phantom{{\displaystyle 1\over\displaystyle 0}}$}}}$
}}\;}$
}}\;}$
}}\;}$
}}\;}$
}}\;}$
}}\;}$
}}\;}$
}}\;}$
}}\;}\hspace{20em}$$
In this case the coefficients come from the set $\{\dots,-3,-2,2,3,\dots\}$
instead of from the set $\{1,2,3,\dots\}$.
If one allows both negative numbers as well as the numbers $1$ and $-1$
for the coefficients (this is needed to make the representation admissible),
then this number gets many more representations.
\end{enumerate}
\subsection{Implementations}\label{sec:implementations}
In recent years several approaches to exact real arithmetic have been
implemented in various programming languages. Some of them have been
considered for real-world applications especially in the field of
visualisation and computational geometry~\cite{yap.95,du.02}. There
have also been studies to enhance the existing hardware architectures
in the direction of exact real
arithmetic~\cite{kornerup.88,mencer.00}.
There are three groups of implementations of exact real arithmetic.
They are all based on the ideas that were presented above.
\begin{itemize}
\item The first group is an implementation of exact real arithmetic as
part of a generic computer algebra package. Examples of such
systems are commercial systems like Mathematica~\cite{wol:96} and
Maple~\cite{mon:ged:hea:lab:vor:97}. These are generally very fast
at basic computations, but sometimes miss the features that are
needed for involved problems.
\item The second group are systems and libraries that have
specifically been designed for exact real arithmetic, and that try
to be as fast as possible at it. Examples of this are MPFR from
LORIA~\cite{zim:lef:pel:fou:hia:whi:05}, GiNaC/CLN by
Kreckel~\cite{kre:05}, iRRAM by M\"uller~\cite{muller.01,mul:05} and
RealLib by Lambov~\cite{lam:05}. Programs from this group generally
are implemented in C++.
\item The third group are system that also have been designed for
exact real arithmetic, but not for speed. Mostly these systems are
part of an effort to move toward a provably correct implementation
of exact real arithmetic. Examples of systems like this are Cr by
Filli\^atre~\cite{fil:05} (an ML reimplementation of CR, a Java
system by Boehm~\cite{boehm.86,schwarz.89,boe:05}), ERA by
Lester~\cite{les:05},
%COMP by Kieffer,
Few Digits by O'Connor~\cite{oco:05}, Bignum by Guy~\cite{guy:05}
and ICReals by Edalat e.a.~\cite{potts.97,potts.98,eda:05}. These
systems generally are implemented in a functional programming
language like ML or Haskell. Like the systems in the first group
they generally miss the features that are needed to do advanced
computations.
\end{itemize}
In practice the first group of systems is the fastest at basic
problems, and the second group of systems are the only ones that are
suitable for involved problems.
\section{Semantics}\label{sec:semantics}
\subsection{Domain Theory}
% Herman?
Domains are used to describe the semantics of programming languages,
both the data types and the programs that are definable over them.
They also provide a denotational model for computability, in the sense
that the set of continuous functions from one domain to another is the
mathematical counterpart to the set of computable functions (in some
language or computational model). The basic structure in domain
theory is that of a {\em directed-complete partial order} (dcpo),
i.e.\ a partially ordered set in which every directed subset has a
least upper-bound. A set $A$ is {\em directed\/} if it is non-empty and
every pair of its elements has an upper-bound in $A$; the least
upper-bound of $A$ is usually denoted by $\lub A$. Sometimes the notion
of `domain' is identified with that of a dcpo, but mostly, authors
reserve `domain' for a specific type of dcpo (with additional
structure), depending on the application one has in mind, see
~\cite{abramsky.94} for an overview on domain theory and the various
notions of domains.
The interesting functions on dcpos are the {\em Scott continuous\/}
ones: the monotone $f : D_1 \rightarrow D_2$ such that $f(\lub A) =
\lub(f (A))$ for every directed set $A$ (where the first $\lub$ is in
$D_1$ and the second in $D_2$). The definable functions in a
programming language (i.e.\ the computable functions in that language)
are Scott continuous when interpreted as functions over a dcpo. The
real `power' of Scott continuity lies in the fact that Scott
continuous functions have a least fixed point. Thus, a recursive
definition of a (functional) program can be given a meaning as the
least fixed point of the (Scott continuous) functional it gives rise
to.
The ordering on a dcpo is best viewed as an `information ordering' or
a `definedness ordering'. A simple example is the {\em flat\/} dcpo of
natural numbers $\IN_{\bot}$, i.e.\ the set $\IN \cup \{\bot\}$ made
into a poset by letting $\bot \sqleq n$ for all $n\in \IN$. Here, the
elements are either `totally defined' (we have full information on
them) or they are `totally undefined' (we have no information on
them). A more interesting example is the set $[\IN_{\bot} \rightarrow
\IN_{\bot}]$ of Scott continuous functions from $\IN_{\bot}$ to
$\IN_{\bot}$ ordered point-wise. The everywhere undefined function
$\lambda x.\bot$ is the least element of this set and $f\sqleq g$ for
functions $f$ and $g$ if $g$ is `at least as defined' as $f$ in every
element of $\IN_{\bot}$. An important fact is that the set of Scott
continuous functions between two dcpos forms a dcpo again.
The idea of `approximation' is important in dcpos: $a$ {\em
approximates\/} $b$ (or $a$ is `way below' $b$), notation $a\appr
b$, if, whenever $b\sqleq \lub X$, then $a\sqleq x$ for some $x\in X$
(where $X$ of course ranges over the directed subsets). An element $a$
is {\em compact} (or {\em finite}) if $a\appr a$. The compact elements
of a dcpo form an important set, which is usually written as $\sK(D)$.
For $f,g\in[\IN_{\bot} \rightarrow \IN_{\bot}]$, $f\appr g$ implies that $f$
is defined only on a finite set of elements ($f(x) \neq \bot$ for
finitely many $x$). The collection of the functions with a property
like $f$ are also the compact elements of this dcpo. A dcpo is {\em
continuous\/} if there is a {\em basis\/} $B$: a set of elements
such that $x = \lub\{ y \in B \mid y \appr x\}$ for all $x$. Thus, in
a continuous dcpo, all elements can be written as the lub of the basis
elements that approximate it. A dcpo $D$ is called {\em algebraic\/}
if the set of its compact elements forms a basis. The adjective
`$\omega$' is added to say that the basis is countable, as in
$\omega$-continuous dcpo and $\omega$-algebraic dcpo. In
~\cite{GunterScott90}, the notion of domain is identified with
$\omega$-algebraic dcpo. That $\omega$-algebraic dcpos are of special
interest comes from the fact that a continuous function $f: D
\rightarrow E$ between two $\omega$-algebraic dcpos can be fully
characterised (in a countable way) by its compact elements as follows.
$$ f(x) = \Lub \{ y\in \sK(E) \mid y \sqleq f(x')\mbox{ for some
}x'\in\sK(D){\mbox{ with }}x'\sqleq x \}$$
\weg{
The exact definition of what a domain is changes slightly from author to author:
~\cite{GunScott} call an algebraic dcpo a domain, whereas~\cite{AbrJung} speak of
`continuous domain' (for a continuous dcpo) an
}
\weg{
The solution to a so called `domain equation' gives a possible
interpretation for an algebraic data-type definable in a programming
language.
}
In the case of computability over the real numbers, some dcpos are of
specific interest. One is the interval dcpo of nested intervals
$I(\IR)$, consisting of $\IR$ and the non-empty closed real intervals,
ordered by reverse inclusion: $I\sqleq J$ if $ I \supseteq J$. This
domain was first proposed, in a slightly different form, by
\cite{scott.72}. The intervals should be understood as approximations of
real numbers, so a smaller interval gives more information, $\IR$ is
the $\bot$-element of the information order and singleton intervals
$\{a\}$ are maximal elements. For the `way below' relation we have
that $a\appr b$ iff $b$ is a subset of the interior of $a$. The
rational intervals together with $\IR$ form a countable basis for
$I(\IR)$. A directed subset of $I(\IR)$ is a collection of intervals
$A$ such that $\cap A
\neq\emptyset$; the lub of such a directed set $A$ is its
intersection: $\lub A := \cap A$, which is a closed non-empty interval
again. The nice thing about this dcpo is that it generalizes functions
from $\IR$ to $\IR$ in a simple way to incorporate partial functions,
in such a way that partiality is not just undefinedness, but may
involve some partial information (an interval approximation). A
continuous function $f:\IR\rightarrow \IR$ extends in the
straightforward way to a function $\hat{f} : I(\IR)\rightarrow
I(\IR)$; the other way around, a function $g: I(\IR)
\rightarrow I(\IR)$ represents a partial function $\bar{g}$ from $\IR$
to $\IR$ given by $\bar{g}(x) := y$ if $g(\{x\}) = \{y\}$ and
undefined otherwise. See~\cite{edalat.02,edalat.04} for a more
detailed study.
\realpcf, a programming language with \sreal\ as a basic
data type of \cite{escardo.96}, was designed in such a way that the
nested interval semantics would be its denotational semantics. An
interesting feature of \realpcf is that it contains a parallel `if'
construct. It is shown in \cite{escardo.04} that this
non-sequentiality is an inherent feature of the nested interval
domain. See \cite{escardo.99,edalat.escardo.96} for a further
discussion of the expressivity of \realpcf.
A second example of a dcpo of interest in this context is the dcpo of
finite and infinite lists over some given pointed dcpo. Typically,
the pointed dcpo is $A_{\bot}$, the flat dcpo over some finite set
$A$, where the elements of $A$ are the `digits'. The dcpo of finite
and infinite lists is then denoted by $[A]$, and the intuitive
understanding of an element $[a_1, a_2, \ldots, a_n]$ is that of a
finite approximation of a real number. The representations based on
streams of nested intervals
% All the representations discussed before
% Continued fractions
%~\cite{contfrac}, Stern-Brocot representations~\cite[\S~5]{milad.04b}\todo{Find
% good reference to continued fractions}
% and generalised decimal representations~\cite{digianantonio.96}
are instances of this.
% and so are decimal representations, although the
% latter are not satisfactory for computational purposes, as discussed
% in Section~\ref{subsec:representations}.
The ordering on $[A]$ is the ordering on $A_{\bot}$ extended to a
prefix ordering. Stated differently, $[A]$ is the solution to the
domain equation $X = A_{\bot}\times X$ in the category of
dcpos~\cite[\S~4.5]{milad.04b}.
%Given an admissible
%representation of the real numbers as digit streams with digits from
% $A$ (see Definition \ref{def:admissible} for what we mean with
% `admissible'), computability of real valued functions, as given by the
The \TTE model of computable real valued functions (see Section
\ref{subsec:tte}) uses as representation of the reals exactly the
data-type $[A]$ (under the proviso that it's `admissible' -- see
Definition \ref{def:admissible} -- which excludes the decimal
representation). So in \TTE a real number is a digit stream with
digits from $A$ and the \TTE-computable functions are all continuous.
% \todo{Work out the
% example of streams to show the connection with \TTE}
%
% Include Formal topology as the way to do Domain theory in type theory
% TTE as concrete branch of Domain theory?
% Further things to mention: dcpo's, interval domains, PCF
Similar to the interval domain, which is used as the semantics for
programming in \realpcf, the domain $[A]$ can also be used as the
semantics of programming with real numbers. This has been shown by
\cite{simpson.98}, who uses \pcf (of \cite{plotkin.77}) as a
programming language and the data type of streams over $\{-1,0,1\}$ as
the type of the reals. In \cite{bauer.02}, this approach is compared
with the one of \realpcf and it is shown that the expressivity is the
same up to second order types. An interesting aspect is that the \pcf
based approach is purely sequential. This is due to the fact that
\realpcf uses \sreal\ as an abstract primitive data type, so special
primitive programming constructs are required to compare two reals,
whereas in the \pcf based approach one programs directly with the
representations.
\subsection{Markov's Recursive Analysis}
% Bas?
Here we will shortly introduce Markov's constructive recursive
mathematics (CRM). Historically, this seems to be the first concrete
model for exact real computations. We refer
to~\cite{TvD,BridgesRichman,Beeson:1985,Aberth:Computable} for more
information. In CRM one represents an object by a certain partial
\emph{recursive} function. In this way most of the representations
in section~\ref{subsec:representations} may be treated. For concreteness let us
consider the Cauchy
representation of the real numbers. A real number $x$ is thus
represented by a partial recursive function which on input $n$ returns
the $n$th element of a Cauchy sequence with limit $x$. One thus
computes not with all the real numbers, but only with the recursive
ones. Fortunately, the well-known constants $e, \pi, \phi$ are
recursive and so are, e.g.\ the trigonometric functions. In this way one can
develop exact real arithmetic in CRM.
% \todo{this paragraph
% is not clear to me, after reading it it is still counterintuitive.}
Since there are only countably many codes for recursive functions
there are only countably many (recursive) real numbers in CRM. This may be
counterintuitive at first, however, given any recursive sequence of
real numbers, Cantor's diagonal construction supplies a new recursive
real number which is apart from all of them. Thus the recursive real
numbers are countable, but recursively uncountable! In practice this
last fact is more important and can be restated as `the real numbers
are uncountable' in the internal logic of CRM. We will introduce the
internal logic in section~\ref{sec:realizability}.
In CRM all (recursive) functions between real numbers are
continuous~\cite{KLS1,KLS2, Tsejtin1, Tsejtin2}.
\begin{theorem}[Kreisel, Lacombe, Schoenfield, Tsejtin]
In CRM: Every (total) mapping of a complete separable metric space into a
metric
space is continuous.
\end{theorem}
The theorem is stated in the internal logic, meaning that all the
objects should be presented effectively. We will discuss the internal
logic more thoroughly in Section~\ref{sec:realizability}. This theorem
may seem counterintuitive at first, when applied to the real function which
is 0 for $x<0$ and 1 for $x\geq0$? This is not a total function in
CRM --- that is, we cannot recursively decide whether $x<0$ or
$x\geq0$ for each real number $x$. Having such a test would solve the
halting problem.
For concrete functions on the real numbers this model behaves as
expected. However, when quantifying over compact spaces there are
some surprises. For instance, one can define an unbounded continuous
function on the unit interval. Such a function cannot be uniformly
continuous. This function is defined in the internal logic, however, externally
one sees that such a function is defined on all the recursive points,
but not on \emph{all} points.
To avoid such problems Brouwer introduced his choice
sequences~\cite{Brouwer, Heyting:1956, Troelstra:choice}. Kleene and Vesley
captured much of his theory using a realisability interpretation. This
interpretation was rediscovered by Weihrauch in his TTE, a theory that
we will now discuss.
% In the semantics of programming languages one distinguishes sequential and
% parallel computations. When one is able to access the code of a given
% function,
% one can parallellize it. This may seen problematic at times. However,
% all the functions on real numbers should respect the usual
% equality. This means that in practice working in CRM has the flavour of
% working
% in constructive mathematics. We'll be more precise in the section on
% realisability.
\subsection{\TTE} \label{subsec:tte}
% Milad?
% Already has done this.
Among the many schools of computable analysis, most of which are known
to be equivalent, \TTE (Type Two Effectivity) is a theory of
computability based on Turing machines with infinite input and
infinite output~\cite{weihrauch.97,weihrauch.00}. The \TTE notion of
computability is nothing but computability of algorithms on infinite
sequences. This is because both the input and output tape can be
thought of as a stream (lazy infinite sequence), and hence the \TTE
model will be very similar to the actual computation on the higher
order data structure of streams or functional representations for real
numbers (see Section~\ref{subsec:representations}). Therefore we
consider computability for algorithms in exact arithmetic
with respect to \TTE. This is in contrast to other approaches to
computability which usually involve heavy encoding of streams and
finite lists. The intuitive model that \TTE uses not only makes the
programmer's understanding of the complexity of the algorithms
relatively easy but also provides a notion of computability that
directly works on the representations of real numbers. In fact the
notion of representation plays a central \role in \TTE, providing a
good framework to compare the relative theoretical strength of various
representations.
In this sense one can use \TTE to give a formal explanation of the
shortcomings of the standard decimal representation which was mentioned
in the example in Section~\ref{subsec:representations}. There we showed
that --- informally--- the standard decimal representation is not
`computationally suitable'. In \TTE there is a notion of
admissibility of representations, which rigorously defines whether a
representation of real numbers is computationally suitable. In
section~\ref{subsec:real:TTE} we show how the definition of admissible
representation was already hidden in Brouwer's example.
\begin{definition}[Admissible Representation] \label{def:admissible}
Let \Iclosed be a closed subinterval of the compactification of real
numbers $[-\infty,+\infty]$. Let \DIG be a set (finite or infinite)
of digits and \DIGinf be the set of streams of elements of \DIG. A
map $\map p {\DIGinf} \Iclosed$ is an \emph{admissible
representation} of \Iclosed if the following conditions hold.
\begin{enumerate}%[i)]
\item\ $p$ is continuous with respect to the product topology of the discrete
topology on \DIG;
\item\ $p$ is surjective;
\item\ $p$ is \emph{maximal}, i.e., for every (partial) continuous $\map r
{\DIGinf}{\Iclosed}$, there is a continuous $\map f {\DIGinf}{\DIGinf}$ such
that $r = p\circ f$.
\end{enumerate}
\end{definition}
The notion of admissibility relates the notion of computability on
streams with continuity on real numbers. Intuitively an admissible
representation gives rise to functions which are computable with type
two Turing machines. Obviously, the standard decimal representation
turns out to be not admissible. In fact following the example in
Section~\ref{subsec:representations} one can show that the
multiplication by $3$ is not a \TTE-computable function when using the
standard decimal representation~\cite{weihrauch.97}.
An important property of admissible representations is that they
provide a redundant representation for real numbers. This means that
every real number has several representations.
% All the representations discussed before are admissible.
Examples of admissible representations include the redundant $b$-ary
representation for $[0,+\infty]$ used in~\cite{edalat.97}, the ternary
Stern--Brocot representation for $[0,+\infty]$~\cite{milad.06b} and
the binary Golden ratio notation for $[0,1]$~\cite{digianantonio.96}.
As stated before, the \TTE notion of computability that is based on
the admissible representations is equivalent to most other models of
computability~\cite[\S~9]{weihrauch.00}.
\subsection{Coalgebras} \label{subsec:coalgebras}
% Milad?
% MN: done
Coalgebras (also called `systems' in~\cite{rutten.00,barwise.96})
provide a semantics for structures that can be considered as an
infinite process, of which only partial observations are available.
Examples of such structures are real numbers, labelled transition
systems, object oriented modularity and dynamical systems. A modern
survey of coalgebras and their applications can be found
in~\cite{jacobs.05}.
In the category theoretic semantics for computer science, to any
functor corresponds a category of coalgebras of that functor. For
certain functors, this category happens to have a final object. Those
functors, or to be more precise, the final coalgebra of those
functors, are used to model infinite processes.
Let \category be a category, and \functor be an endofunctor on
\category. A \emph{\functor-coalgebra} is a pair $\pair {Y} {y}$ in
which $Y$ is an object of \category and $\map{y}{Y}{\functor(Y)}$ is a
morphism in \category. We call the first element of the pair the
\emph{carrier} of the coalgebra, and the second element of the pair
the \emph{structure map} of the coalgebra.
Let $\pair {U} {u}$ and $\pair {V} {v}$ be \functor-coalgebras. Then a
\emph{coalgebra map} from $\pair {U} {u}$ to $\pair {V} {v}$ is a map
$\map {f} {U} {V}$ such that $v\circ f=\functor(f)\circ u$; i.e., the
following diagram commutes.
\begin{displaymath}
\xymatrix@C=3pc{
U \xyto[d]_{u}\xyto[r]^{f} & V\xyto[d]^{v}\\
\functor(U) \xyto[r]_{\functor(f)} & \functor(V) \\
}
\end{displaymath}
It can be checked that the identity morphism is a coalgebra map, and
that the composite of two coalgebra maps is again a coalgebra maps.
Hence the \functor-coalgebras form a category. We are particularly
interested whether this category has a final object. If such a final
object exists, we assign some fixed notation to it.
\begin{definition}[Final Coalgebra, Coiterator]
\label{def:final_coalgebra} A \emph{final \functor-coalgebra} is a
coalgebra\linebreak $\pair {\fincoalg{\functor}}{\destructor} $ such that for
every coalgebra $\pair {U}{u}$ there exists a unique coalgebra map
$\coiterator[u]$ from $\pair {U} {u}$ to $\pair
{\fincoalg{\functor}} {\destructor}$. We shall call
$\coiterator[u]$ the \emph{coiterator} of $u$.
\end{definition}
Thus a coalgebra $\pair {\fincoalg{\functor}}{\destructor}$ is final
if and only if
\begin{displaymath}
\forall U\colon\category,\;\forall u\colon
U\arr\functor(U),\;\uniquexists\coiterator[u]\colon U\arr
{\fincoalg{\functor}},\;\;\destructor\circ\coiterator[u] =
\functor(\coiterator[u])\circ u\enspace;
\end{displaymath}
equivalently if the following diagram commutes.
\begin{displaymath}
\xymatrix@C=3pc{
U \xyto[d]_{u}\xywant[r]^{!\coiterator[u]} &
{\fincoalg{\functor}}\xyto[d]^{\destructor}\\
\functor(U) \xyto[r]_{\functor(\coiterator[u])} &
\functor({\fincoalg{\functor}}) \\
}
\end{displaymath}
In an arbitrary category, the question whether a final coalgebra for a
given functor exists is not always easy to answer. However, if a
final \functor-coalgebra exists then it is unique up to isomorphism.
Moreover the structure map of a final coalgebra is an isomorphism;
hence, a final coalgebra is a fixed point for its
functor~\cite{jacobs.97}. This fixed point property is the origin of our
interest in a final coalgebra, because it makes it possible to use the
theory of final coalgebras as a semantics for data types of infinite
objects.
Finality of coalgebras provides us with coinductive proof principles,
which can be used to reason about objects residing in a final
coalgebra. A well-known example of a coinductive proof principle is
the notion of bisimulation. This can roughly be stated as: two
infinite processes are equal if they are bisimilar, i.e.\ if the
observable parts are equal and the continuation of the two processes
(or the subprocesses) are again bisimilar.
As an example we consider the set of \emph{streams}. In lazy exact
arithmetic, real numbers are represented by means of lazy infinite
sequences of elements of a set, so called streams. The collection of
streams of the elements of the set \DIG is the final coalgebra of a
simple polynomial functor, namely $\functor(X)=\DIG\times X$ in the
category \Sets. Taking as structure map the map
$\map{\pair{\hd}{\tl}} {\DIGinf} {\DIG\times\DIGinf}$ one can show
that the set of streams is indeed a final coalgebra for
\functor~\cite{rutten.00}. The constructor of the final coalgebras of
streams is $\map {\textcons} {\DIGinf} {\DIGinf}$ which prepends an
element to the beginning of a stream.
The standard decimal representation for real numbers is a stream
representation: each real number is denoted by a stream over the
$10$-element set of digits. So are the various admissible
representations that are used in exact real arithmetic. In this way
one views the real numbers as a final coalgebra. In this setting the
functions on real numbers become maps in the category of coalgebras,
the so called coalgebra maps. This does not capture all functions on
real numbers, but only those for which we have suitable partial
observations, i.e.\ computable finite approximations. Hence, in order
to present a theory of computable coalgebra maps one has to adhere to
domain-theoretic (or equivalent \TTE) approaches for a suitable
definition of computability~\cite{pattinson.03}. A more structural
solution would be to interpret coinductive types in a realisability
model, as we will present in the next section.
\subsection{Realisability}\label{sec:realizability}
%Bas?
In this subsection we will describe realisability. For general
overviews we refer to~ \cite{vanOosten,Troelstra:handbook}. After a
general introduction to realisability we will shortly describe three
realisability interpretations: for recursive analysis, for \TTE and
for domain theory. In this way we obtain a nice uniform treatment of
the three models previously discussed. Our presentation of
realisability models in this section mostly follows the presentations by
Birkedal~\cite{Birkedal:real} and Bauer~\cite{Bauer:thesis,
Bauer:book} and we refer to these works and~\cite{vanOosten} for
historical background.
In order to represent data on a computer we need to find a code, a
realization, for it. This suggests a \emph{realisability} relation,
that is a relation $\Vdash$ between a set of codes $R$ and a set $X$
such that each code represents at most one element. Functions are
realized via the following commutative diagram.
\[
\begin{diagram}
\node{R} \arrow{s,l}{\Vdash} \arrow{e,t}{f'} \node{R} \arrow{s,r}{\Vdash}\\
\node{X} \arrow{e,b}{f} \node{Y}
\end{diagram}
\]
It is then said that $f'$ tracks $f$. These representation are
connected to the representations discussed before. There is a one-one
correspondence between a realisability relation $\Vdash $ and a
partial function $\delta$ defined by \[ \delta a =x\qquad \text{ iff
}\qquad a\Vdash x.
\]
Yet another equivalent presentation is given by partial equivalence
relations. The relation `to be a code for the same element' is a
partial equivalence relation.
In order to be able to represent functions by our codes we should be
able to interpret some applicative structure. It turns out to be convenient to
require the realizers to have the structure of a partial combinatory
algebra (PCA). A PCA is a structure $(X,\bullet,
\mathbf{k},\mathbf{s})$ which has all the relevant properties of the
combinator presentation of recursion theory. In Kleene's original
realisability interpretation the realizers are the natural number
encoding of the partial recursive functions. This prime example of a
PCA is called the first Kleene algebra and is simply denoted $\IN$. In
fact, in this way we obtain the computational model of Markov's
recursive mathematics. However, how do we include a data-type for
all, not only the recursive real numbers? To phrase it differently,
how does one make a realisability model corresponding to, say, TTE? To
do this one needs a slightly different picture. The structures are
realized by \emph{all} streams --- that is, elements of \emph{Baire
space}\footnote{We have denoted Baire space before as \DIGinf when
we choose the alphabet \DIG to be the natural numbers.} --- but we
only allow \emph{recursive} functions. To solve this one uses the
notion of \emph{relative realisability}, where one takes the data from
$A$, but restricts the functions to a sub-PCA $A_\sharp$. In fact,
this $\sharp$ may be seen as a modal operator on types, assigning to
each type its subtype of `computable' elements,
see~\cite{Birkedal:real}. A simplified version that
suffices for the present context may be found in~\cite{Bauer:thesis}.
The data types are captured by the notion of a \emph{modest set} over
a PCA $A$ --- that is, a set with a realisability relation $\Vdash$.
The category of modest sets over $A$ with functions over $A_\sharp$ is
denoted by $\Mod(A,A_\sharp)$. When both the sets and the functions
are represented by the same PCA $A$ one simply writes $\Mod(A)$. Now
given a definition of computability on real numbers, one may ask how
to define computability of lists of real numbers, trees of reals
numbers, streams of real numbers, the positive real numbers\ldots
Although one can give concrete answers for each particular model it
would be better to have a structural solution that works for all these
models at once. In advanced programming languages these issues are
solved by the presence of a strong type system which is closed under
certain type forming constructions, see Section~\ref{type}.
Categorical logic and type theory~\cite{LambekScott, Jacobs:cattype}
allow us to define a very strict and structural connection between
logic and semantics by the use of the \emph{internal} logic of a
(categorical) model. It is customary to speak about the internal
logic when one is really talking about the internal logic \emph{and}
type theory. We will stick to this custom. It should be noted that
the principles valid in the internal logic in general depend on the
principles assumed to hold in the meta-logic. The internal logic for
realisability using modest sets is intuitionistic logic, the logic of
constructive mathematics. The internal type theory supports
dependent, inductive and coinductive types. By providing
realisability interpretations for all the models discussed before we
show that we can use this type theory in all these models. This means that we
have a notion of computability on, say, streams of positive real numbers in all
these models.
To give a flavour of how one realizes logic, we present the abstract
realisability interpretation. Here $x$ and $y$ are elements of the PCA.
\newcommand{\real}{\underline{\mathbf{r}}}
\newcommand{\app}{\bullet}
The symbols `$x\real P$' may be pronounced as $x$ realizes $P$.
\begin{eqnarray}
x\real P &:=& P \wedge x\downarrow \quad\text{for $P$
atomic};\\
x\real (A\wedge B) &:=& (\mathbf{p_0}x\real A)\wedge (\mathbf{p_1}x\real
B);\\
x\real (A\to B) &:=& \forall y(y\real A \to x\app y\real B)
\wedge x\downarrow;\\
x\real \forall y A &:=& \forall y (x\app y \real A);\\
x\real \exists y A &:=& \mathbf{p_1}x\real A[y/\mathbf{p_0}x].
\end{eqnarray}
Here $\mathbf{p_i}$ denotes the $i$th projection of a pair. The symbol
$\downarrow$ may be read as `is defined'. We say that $A$ is true in
the model when $A$ is realized, that is the set of realizers is
inhabited. A similar definition for the realization of types can be
given analogous to the
%\todo{so is it Curry--Howard--de Bruijn or just Curry--Howard?}
Curry--Howard isomorphism which we will
discuss in section~\ref{type}.
We will continue to give the realisability interpretation for the
three models presented before.
\subsubsection{Markov's recursive mathematics}\label{real:CRM}
Markov's recursive mathematics can be modelled by $\Mod(\IN)$ the
modest sets over the first Kleene algebra $\IN$, that is the realizers
in the PCA $\IN$ are the ordinary (partial) recursive functions coded
by natural numbers.
The internal logic of CRM satisfies not only the usual axioms of
intuitionistic logic, but also Church's thesis and Markov's principle.
Church's thesis states that we only work on recursive sequences. That
is, our programming language allows us to access the codes of the real
numbers.\[ \forall n\exists m A(n,m) \to \exists k\forall n \exists m
[A(n,Um)\wedge Tknm]\enspace.
\]
Here $T$ denotes Kleene's $T$-predicate and $U$ is the function that
returns the result $Um$ of the computation $m$. This variant of
Church's thesis may be read as: if for each $n$ we can find an $m$
such that $A(n,m)$, then we can find a recursive function that finds
such $m$ for us. Markov's principle allows us unbounded search: if we
know that an element with a decidable property can not fail to exist,
we can just start searching until we find it. Formally, let $P$ be a decidable
predicate. Then
\[
\neg\forall n. \neg P(n) \to \exists n. P(n)\enspace.
\]
% It should be noted that such a programming construct is absent from
% certain\todo{what is `certain' referring to? it sounds very cynical.}
% programming languages.
As stated before this model behaves somewhat unexpectedly when
quantifying over compact spaces. For instance, a point-wise continuous
function on a compact interval may be unbounded. This is due to the
failure of the fan-theorem, the constructive variant of K\"onig's
lemma. In order to remedy this, one introduces choice sequences, a
concept which is captured by Kleene--Vesley's realisability model
which we will discuss now.
\subsubsection{\label{subsec:real:TTE} TTE}
TTE is the model $\Mod(\mathbb{B},\mathbb{B}_\sharp)$, the second
Kleene algebra which was extensively studied by Kleene and Vesley.
Thus TTE may be seen as the
Kleene--Vesley realisability
interpretation~\cite{Kleene/Vesley:1965}. Troelstra~\cite{Troelstra:92} seems to
have been the first to observe the possibility to use realisability to
obtain results in TTE.
It may seem surprising that the notion of admissible representation
which is so important in TTE seems absent in realisability. To
understand this let us consider a representation of the real numbers.
First of all, there is no absolute pre-given notion of real number,
thus it seems impossible to state what an admissible representation of
`the' real numbers is.
% \todo{should `the' be in quotes?}. Yes I think so.
However, one
can axiomatically define the real numbers up to isomorphism. Now we
fix any representation, $c:\Baire\to \IR$, of the real numbers. One defines a
representation $r:\Baire\to \IR$ of the real numbers as
a surjective map from Baire space to the real numbers. Of
course, surjective should be interpreted in the internal logic. Thus
surjectivity means\[ \forall \beta \in \Baire \exists \alpha \in
\Baire [r(\alpha)=c(\beta)]\enspace.
\]
By applying the axiom of choice for variables over Baire space---
denoted C-C in ~\cite{TvD} --- we obtain:\[ \exists f:\Baire\to \Baire
\forall \beta \in\Baire [ r(f(\beta))=c(\beta) ]\enspace.
\]
This is precisely the maximality condition in the notion of admissible
representation defined above. Thus one may view admissibility as the
\emph{external} way of stating the surjectivity of a representation.
We should mention that the axiom C-C which we used above holds in the
internal logic.\footnote{We have been unable to find this simple
remark in the literature.}
It has been crucial in the development of constructive mathematics
that complete separable metric spaces can be represented by a
continuous surjective image of Baire space. This same fact is also
heavily used in the context of TTE. This allows us to directly
transfer constructive theorems about such spaces to TTE,
see~\cite{Lietz} for details. Similarly, compact metric spaces can be
represented by Cantor space and a similar transfer principle exists.
To sum up, one can now view TTE as the assembly language for exact real
computation. Using categorical logic and realisability, one can
compile a dependently typed functional language with (co)inductive
types into this Turing machine model. Thus the relation between
constructive mathematics and TTE is much like the relation between an
advanced programming language and an assembly
language. The former provides more structure, the latter gives finer
control over the computation and in particular over the complexity of
such computations.
\subsubsection{Domains}
The theory of effectively presented continuous domains as used by
Edalat and co-workers~\cite{Edalat:domforcomp} fits into the
model
$\Mod(\mathbb{P},\mathbb{P}_\sharp)$. Here $\mathbb{P}$ denotes Scott's
graph model~\cite{Scott:graph}, which may be seen as the `universal'
countably based $T_0$ topological space; see~\cite{Bauer:thesis} for details.
\subsubsection{Coinductive types}
As mentioned before the realisability models support coinductive
types. One way of seeing this is to observe that such models can be
extended to a topos --- a generalised, or local, set theory. This
construction is due to Hyland~\cite{Hyland} and is called the
effective topos. Thus when we define our data-types coinductively we
can directly interpret them in all the realisability models we have
described.
As an example consider the coinductive streams of natural numbers. It
is straightforward to prove constructively that this final coalgebra
is the function space $\IN\to \IN$. Thus one can directly interpret
these streams in all the models above. For instance, in CRM all these
functions would be recursive.
\section{Proofs}\label{sec:proofs}
\subsection{Type theory}\label{type}
%Herman?
Type theory provides a syntactic analysis of the notion of
computability. In this section we describe some basic concepts of type
theory that are relevant for understanding the connections with
constructive analysis and computation with the reals. For more details
on type theory we refer to
~\cite{martinlof.84,nordstrom.90,luo.94,barendregt.92,barendregt.01}
(We will not deal with programming language aspects of type theory,
for which we refer to~\cite{pierce.02}, nor shall we discuss logical
frameworks, for which we refer to~\cite{pfenning.01}.) The basic
notion of type theory is obviously that of a type, which describes a
collection of terms (the terms of that type) in a {\em syntactic\/}
way: there are rules for constructing terms of a type of a specific
form (so called {\em introduction rules}) and there are rules for
using terms of a type of a specific form (so called {\em elimination
rules}). The crucial point is that it is {\em decidable\/} whether a
term is of a given type, because the type of a term can be computed on
the basis of its syntactic shape. (There are some exceptions, but
almost all type theories adhere to this principle.) This distinguishes
type theory from set theory: $X:= \{ n \in \IN \mid \forall
x,y,z,\;x^n +y^n \neq z^n\}$ is a
typical example of a set and not a type (whether $n\in X$ is not a
matter of syntactic analysis of $n$).
Simple examples of types are $\bool$ and $\nat$. The type $\bool$
contains just $\true$ and $\false$ and $\nat$ contains $0$ and, if
$x:\nat$, then $S\,x : \nat$ as well. Apart from construction
principles for terms, there are construction principles for types as
well, for example, given the types $\sigma$ and $\tau$, we have
$\sigma\times\tau$ and $\sigma \rightarrow \tau$ as types, with the
associated construction principles of `pairing' and
$\lambda$-abstraction. This gives rise to the system $\larrx$ of
simple type theory with products. Exactly how much information one
puts in the terms (and in what form) is a matter of choice. For
programming purposes, one usually would want to put as little
information as possible (because the program is what the user writes)
and let the computer (compiler) compute a type (or a set of types) for
us. So, for $\larrx$, one can have as construction rule that
$\pair{M}{N} :\sigma\times\tau$ if $M:\sigma$ and $N:\tau$ and that
$\lambda x.M :\sigma \rightarrow \tau$ if $M:\tau$ under the
assumption that $x{:}\sigma$.
The construction and elimination principles of type theory give it a
strongly constructive flavour, which was first made explicit by
\martinlof: we describe a collection by telling how we can construct
objects of that collection. Due to the fact that we know the
construction principles, we can define a function {\em from\/} the
collection by distinguishing cases according to the construction rules
(and doing a recursive call if needed).
There are many different type theories, depending on the types one
allows and the functions one allows to define over them. Examples of
additional type constructions are: polymorphic types, higher order
polymorphic types, dependent types, inductive types and recursive
types. An important aspect of the definable functions in type theory
is that they are executable, due to the computational model of the
$\lambda$-calculus that is part of the system.
\subsubsection{Curry--Howard Isomorphism}
%Herman?
Apart from a computational model, type theory also incorporates a
`logical model'. This is due to the Curry--Howard--de Bruijn
isomorphism, that interprets formulas as types and proofs (logical
deductions) as terms. The isomorphism was first noticed by Curry for
minimal propositional logic and simple type theory, and later extended
to the first order case by \cite{howard.80} (but the original paper
dates back to 1968). Howard who also treats the case of proofs by
induction over the natural numbers and he has also coined the name
`formulas-as-types'. Independently of Howard, De Bruijn noticed the
formulas as types analogy in the late 60's in the context of his
logical framework Automath~\cite{Bruijn.80}. In the analogy of De
Bruijn, the logic is encoded in type theory, so his formulas-as-types
analogy is slightly different from what we discuss here. (As a matter
of fact, various encodings of logic in type theory were studied, and
some of the later ones are quite close to what we treat here.) The
isomorphism can also be seen as an operationalisation of the so called
BHK (Brouwer--Heyting--Kolmogorov) interpretation of proofs, where
e.g.\ a proof of $A\rightarrow B$ is interpreted as a {\em method\/}
for producing a proof of $B$ out of a proof of $A$; see
Section~\ref{subsec:cm} for details. This was also the interest of
\martinlof in the formulas-as-types isomorphism, who took it as the
starting point of his intuitionistic theory of types
\cite{martinlof.84} and extended the isomorphism to the existential
quantifier and inductive types.
Combining the computational and the logical interpretation of type
theory, we find that the basic judgement
$$\Gamma \vdash M :A$$
can have two `readings':
\begin{enumerate}
\item $M$ is a piece of data (or algorithm) of data type $A$,
\item $M$ is a proof (deduction) of formula $A$.
\end{enumerate}
To make a (syntactic) distinction between data types and formulas,
most type theories have (at least) two `universa' or `sorts': \Set\
and \Prop, where $A:\Set$ means that $A$ is a data type and $A:\Prop$
means that $A$ is a formula. The context $\Gamma$ consists of
variable declarations $x:B$ and definition $c:=t:B$. Variable
declarations are read as assumptions (assuming a hypothetical proof of
$B$) in case $B:\Prop$. A definition is read as a reference to a
proved lemma (with proof $t$) in case $B:\Prop$.
The correspondence between logic and type theory is so strong that
there is an isomorphism between logic (e.g.\ the
${\wedge}{\rightarrow}$-fragment of propositional logic) and type
theory (the system $\larrx$). The isomorphism maps formulas to types
and proofs in natural deductions to terms. In this isomorphism, the
logical introduction rules correspond to the construction principles
of the type theory and the logical elimination rules correspond to the
elimination principles. The isomorphism also maps computations in
logic (via cut-elimination) to computations in type theory (e.g.\
$\beta$-reduction in $\larrx$).
To extend the Curry--Howard isomorphism to predicate logic, we need
`formula types' (types of type $\Prop$) that depend on objects of a
`data type' (a type of type $\Set$). A predicate over the type $\nat$
should be a function from $\nat$ to $\Prop$ and similarly, a binary
relation (like $\leq$) should be of type $\nat
\rightarrow\nat\rightarrow \Prop$. This phenomenon is called {\em type
dependency}: the possibility to form type expressions that contain
term expressions as subterms. Type dependency also implies the
formation of the {\em dependently typed function space}, usually written
as $\Pi x\oftype A. B(x)$, denoting the type of functions that takes
an $a:A$ and produces a term of type $B(a)$. These dependent function
types are typically used for formalising the $\forall$ quantifier: a
proof of $\forall x\oftype A. B(x)$ is a method that, given an $a:A$
produces a proof of $B(a)$. Similarly one can also introduce a type
dependent product type $\Sigma x\oftype A. B(x)$. This type consists
of pairs $\langle a, b\rangle$ where $a:A$ and $b:B(a)$. There are
various choices for the elimination rule for $\Sigma$-types, the
simplest being: if $p : \Sigma x\oftype A.B(x)$, then $\pi_1\,p : A$
and $\pi_2\, p : B(\pi_1\, p)$. So $\pi_1 : \Sigma x\oftype A.B(x)
\rightarrow A$ and $\pi_2 : \Pi y \oftype(\Sigma x\oftype A.B(x)).
B(\pi_1\, y)$ and there is the usual computation rule for the
projections ($\pi_1$ and $\pi_2$) and pairing ($ \langle
\_,\_\rangle$).
\subsubsection{Inductive Types}
%Herman?
% MN: please also mention that inductive types are equivalent to
% general trees (a la Petersson-Synek)
Taking the idea of sets defined via construction principles as basis,
a general pattern for defining types by induction emerges. This idea
originates from Scott~\cite{scott.70} and
\martinlof~\cite{martinlof.84}; the syntax we present below is loosely
based on~\cite{coquand.90,paulin.93} and the formalisation of
inductive types in the proof assistant \coq~\cite{coq:04}. Basically,
an inductive type $X$ is completely captured by giving its {\em
constructors}, constant terms that have a type of the form $$ A_1
\rightarrow A_2 \rightarrow \ldots A_n \rightarrow X$$ where the type
expressions $A_i$ can only contain $X$ in a {\em strictly positive\/}
position (i.e.\ $A_i$ does not contain $X$ or is of the form
$B_1\rightarrow B_2 \rightarrow \ldots B_m \rightarrow X$ with $X$ not
in $B_j$). In some applications, the condition of strict positivity
may be relaxed to positivity, but in type theories with dependent
types one cannot in general.
The constructors are seen as the (only) ways of constructing terms of
the type, so one is actually describing the free algebra over the
terms generated from these constructors, stated otherwise $X$ is a
solution to the domain equation $X = \sigma_1 + \ldots + \sigma_k$, if
the $\sigma_i$'s correspond to the types of the constructors in the
following way: if $A_1 \rightarrow A_2 \rightarrow \ldots A_n
\rightarrow X$ is the type of the first constructor, then $\sigma_1 =
A_1 \times A_2 \times \ldots A_n$ . Such a free algebra amounts to two
properties:
\begin{itemize}
\item
Coverage: if $t:X$, then $t = c(s_1, \ldots, s_n)$ for some constructor $c$.
\item No confusion (for terms of type $X$): $c(t_1, \ldots, t_n) =
c'(s_1, \ldots, s_m)$ if and only if $c = c'$, $n=m$ and $t_i = s_i$ for all
$i$.
\end{itemize}
In type theory with inductive types, these properties for $X$ are
automatically generated from the declaration of the constructors for
$X$, and they are automatically enforced. These properties have both a
logical and a computational aspect. `Coverage' is enforced logically
by the induction principle and computationally by the principle of
well-founded recursion. `No confusion' is enforced logically by the
fact that we can prove a property of elements of $X$ by an
(exhaustive) case distinction. It is enforced computationally by the
fact that we can define a function over $X$ by cases.
The terms of an inductive type can be seen as trees, with nodes
labelled with constructors. If $c :A_1 \rightarrow A_2 \rightarrow
\ldots A_n \rightarrow X$, then a node labelled with $c$ has $n$
subtrees that are either expressions of type $A_i$ (if $X$ does not
occur in $A_i$) or a $B_1\times B_2 \times \ldots \times B_m$-indexed
family of trees (if $A_i$ is of the form $B_1\rightarrow B_2
\rightarrow \ldots \rightarrow B_m \rightarrow X$).
We treat some examples to make these rather abstract ideas concrete.
The type of trees with labels in $A$ and nodes in $B$ is given by two
constructors.
\begin{eqnarray*}
\leaf &:& A\rightarrow \Tree\\
\join &:& B\rightarrow \Tree\rightarrow \Tree\rightarrow \Tree
\end{eqnarray*}
The intention is that this defines the free algebra of trees over
leaf-type $A$ and node type $B$. So, we want $\leaf\;x \neq \join\;
y\; t_1\; t_2$ for all $x,y,t_1,t_2$, and we want to be able to define
function over $\Tree$ by case distinction and recursing over `smaller
trees'. Finally we want to be able to prove properties of elements of
$\Tree$ by tree-induction. In type theory with inductive types, this
is made possible by allowing to define terms in the following way.
$$\begin{array}{l}
\Fixp\ \NCnt (x : \Tree) : \nat :=\\
\match\ x\ \with\\
\mid \leaf\ a \Rightarrow 1\\
\mid \join\ x\ t_1\ t_2 \Rightarrow (\NCnt\ t1)+(\NCnt\ t2)\\
\send.\\
\end{array}$$
Here we borrow the syntax from \coq; the above can be read as definition of
a recursive function $\NCnt\colon \Tree \rightarrow \nat$ where \Fixp
denotes that we are using recursion. This function counts the numbers of
leaves in a tree. In fact, the function $\NCnt$ is
defined by {\em structural recursion\/} over the tree type, meaning
that in the body of the function definition, $\NCnt$ is only called on
arguments that are smaller according to the structure of the inductive
type. All functions defined by structural recursion are terminating,
but it should be noted that structural recursion is a {\em syntactic}
-- and thus decidable -- criterion for a function to be terminating.
The pattern for function-definition by structural recursion can be
generated directly from the definition of the inductive type, which
makes it possible for computer systems to support this. See
~\cite{paulin.93} for how this is done in the proof assistant \coq.
Structural recursion is quite powerful, but for some functions there
is quite some work to be done to define them. E.g.\ the gcd function
defined in the following way is not structural recursive.
$$\begin{array}{l}
\Fixp\ \Gcd (n\;\; m:\nat):\nat :=\\
\textbf{if } n < m \textbf{ then } \Gcd(n,m-n)\\
\qquad\textbf{else if } n = m \textbf{ then } n\\
\qquad\qquad\textbf{else } \Gcd(n-m,m)
\end{array}$$
The type $\nat$ is defined as an inductive type with constructors
$0:\nat$ and $S:\nat\arr\nat$, so a call of $\Gcd$ on $m-n$ is not
structurally recursive. To establish termination, we would first
have to {\em prove\/} that the recursive calls of $\Gcd$ are only
done on {\em smaller\/} arguments, according to some well-founded
order, and then the function would be defined by recursion over this
well-founded order. Note also that the function $\Gcd$ is not
terminating at all, because on $n=0$ or $m=0$ the recursively called
argument isn't smaller, so really this function would be partial, of
type $\Pi n,m:\nat. (n\neq 0)\rightarrow(m\neq 0) \rightarrow \nat$.
For a solution to the problem of the restrictiveness of structural
recursion, see e.g.\/ ~\cite{bove.05}.
%\todo{ref to Coquand-Paulin, Werner, Bertot, Capretta, Capretta-Bove}
The induction principle for an inductive type can also be generated
from the definition of the inductive type. As a matter of fact, the
induction principle and the recursion principle can be seen as
instances of the same syntactic schema, but we won't go into this
here: see~\cite{paulin.93} for details. The induction principle for
$\Tree$ is the term $\Tree\_\ind$ with the following type.
$$\begin{array}{l}
\Tree\_\ind : \forall P \oftype \Tree \arr \Prop.\\
(\forall a \oftype A. P (\leaf\ a)) \rightarrow (\forall b \oftype B. \forall
t_1 \oftype \Tree.
P\ t_1 \rightarrow \forall t_2 \oftype \Tree. P\ t_2 \rightarrow P (\join\ b\
t_1\ t_2))\\
\rightarrow
\forall t \oftype \Tree. P\ t
\end{array}$$
The power of inductive types lies to a large extent in the fact that
many mathematical `objects' can be defined in an inductive (or
recursive) way. Defining them in inductive type theory then gives the
added value that the recursion scheme and the induction principle come
`for free'. Examples of mathematical `objects' that can be defined as
inductive types are the following. (1) Logical connectives, like
disjunction which has -- given two parameters $A,B:\Prop$ -- two
constructors $\sleft : A \rightarrow A\vee B$ and $\sright : B
\rightarrow A\vee B$, or the existential quantifier which has -- given
two parameters $A:\Set$ and $P:A\arr \Prop$ -- one constructor $\spair
: \Pi x\oftype A. (P\, x) \rightarrow \exists\,A\,P$; here we see the
use of a {\em dependently typed\/} constructor in the definition of an
inductive type. (2) Inductively defined relations, like the `less or equal' on
natural numbers $\le$, which has as constructors
%Inductive le (n : nat) : nat -> Prop :=
% le_n : n <= n | le_S : forall m : nat, n <= m -> n <= S m
\begin{eqnarray*}
\sle\_n &:& \forall n:\nat.\sle\ n\ n\\
\sle\_S &:& \forall m,n : \nat. \sle\ n\ m \rightarrow \sle\ n\ (S m)
\end{eqnarray*}
In the last examples, we see the use of {\em dependently typed
constructors}. This changes the scheme of the type of a constructor
that we described in the beginning of this section. Constructors now
have a type $\Pi x_1\oftype A_1. \ldots \Pi x_n \oftype A_n . X\,t_1
\cdots t_m$, where $X$ may occur in the $A_i$ only in a strictly
positive position (i.e.\ at the end).
Apart from the scheme for inductive types that we describe here, there
is also the possibility to introduce one `generic' well-ordering type,
the so called $W$-type and to define inductive types as instances of
this type. The $W$-type defines a general type of well-founded trees
that can be instantiated to specific sets of trees by choosing the
branching types in a specific way~\cite{nordstrom.90}. Dybjer
~\cite{dybjer.97} shows that the inductive types we describe above can
indeed be represented in this way, but then one has to use an {\em
extensional\/} type theory, i.e.\ where functions are equal if they
have the same graph, which leads to an undecidable typing relation.
\subsubsection{Coinductive Types}
% Milad?
% MN: done
Coinductive types were added to the type theory in order to make it
capable of dealing with infinite
objects~\cite{mendler.91,geuvers.92,mendler.86,hallnas.90,gimenez.96}.
This extension was done by Hagino~\cite{hagino.87} using the
categorical semantics. The idea behind using the categorical semantics
is to consider an ambient category for the type theory, and interpret
the \emph{weakly} final coalgebras (i.e.\ final Coalgebras with
uniqueness property dropped) of this category as coinductive types.
Alternatively Lindstr\"{o}m~\cite{lindstrom.89} extended \martinlof
type theory by coinductive types using the non-well-founded set
theoretic semantics; while Mendler et al.~\cite{mendler.86}
\martinlof~\cite{martin-lof.90} and Halln\"{a}s~\cite{hallnas.90}
tried to directly extend \martinlof's constructive type theory by
adding extra typing rules for infinite objects.
Mendler~\cite{mendler.91} and Geuvers~\cite{geuvers.92} presented a
way to encode coinductive types in type theories altogether simpler
than \martinlof's type theory. Later, Gimenez~\cite{gimenez.96}
extended the calculus of inductive construction by a \emph{cofixpoint
scheme} that allows for introducing infinite objects.
Coinductive type theories provide a programming framework for
algorithms that deal with infinite objects, and therefore are suitable
for exact real arithmetic. Especially, since type theories provide a
basis for formal verification tools, formalising an algorithm in type
theory paves the way for verification of that algorithm by means of a
theorem prover. Therefore a rigorous analysis of correctness of the
algorithms becomes possible by stating these algorithms in the
language of coinductive type theory. This is made easier if one can
devise a coinductive type theory that is specifically suited for
working with real numbers. That is to say, for verification of the
algorithms of exact arithmetic, one does not necessarily need a
general theory of coinductive types and the full power of type theory.
In terms of categorical semantics this means that having the (weakly)
final coalgebras of polynomial functors should suffice. However one
needs the underlying type theory to be strong enough to formalise all
the computable real functions.
This brings up the notion of productivity of infinite objects in type
theory and functional programming which is similar (in fact dual) to
the notion of termination for finite
objects~\cite{dijkstra.80,sijtsma.89,coquand.94}. A function on
streams is productive if it can produce arbitrarily large finite
approximations in finite time. The example of multiplication by $3$ in
Section~\ref{subsec:representations} is not a productive function. In
fact productivity is very similar to the notion of computability (and
continuity, and laziness). In \TTE it can be related to the
\emph{finiteness property} of type two Turing
machines~\cite[\S~2.2]{weihrauch.00}. A domain theoretic treatment of
productivity for streams can be found in~\cite{sijtsma.89} which is
expanded and used in the coinductive treatment of lazy exact
arithmetic in~\cite{milad.04b}. For tackling productivity inside the
type theory the notion of \emph{guardedness} is studied by type
theorists~\cite{coquand.94,telford.00} and is implemented as the basis
for the treatment of coinductive types in \coq proof
assistant~\cite{gimenez.96}.
Guardedness condition is a syntactic criterion that can be used to
ensure the productivity much in the same way (in fact in the dual way)
as it can be used to ensure the termination of structurally recursive
functions: a recursive function with as recursive argument a term with
an inductive type is terminating if the argument of the recursive
calls is structurally smaller than than the original argument of the
function. This structural order is an inherent order that is inherited
from the definition of the inductive type of the recursive argument.
According to this order applying constructors of the inductive type
generates the successors of a term (recall that inductive types are
equivalent to the type of general trees). Dually an infinite object
(i.e.\ a term that has a coinductive type) is productive if the calls
to itself inside the body of its definition are immediate arguments of
constructors of its coinductive type. The above checks are purely
syntactical and hence can be automatised; this is exactly what is done
in the guardedness checker of \coq proof assistant.
As an example the following definition is a guarded definition for a stream
of natural numbers starting from $n$.
\begin{displaymath}
\textsf{nats}\;\;\; n := \textcons\;\;\; n\;\;\; (\textsf{nats}\;\;\; n+1)
\end{displaymath}
This is because the sole occurrence of \textsf{nats} in the right hand
side is the immediate argument of (i.e.\ guarded by) \textcons, the
constructor of the coinductive type of streams.
But the same way as the structural recursion is not powerful enough to
capture all valid terminating recursive definitions, the
guarded-by-constructor approach does not capture the whole class of
productive infinite objects. This is because productivity of streams
can in general be reduced to the question whether a subset of \IN is
infinite, which is an undecidable question~\cite[\S~4.7]{milad.04b}.
Example of the infinite objects that are not guarded and whose
productivity is not syntactically detectable are the filter-like
functions that are used in functional programming. In order to
formalise such infinite objects one option would be to adhere to
semantic approaches, e.g. domain-theoretic methods~\cite{milad.05} or
topological methods~\cite{digianantonio.03}. The other option would be
to use a very extended setting of coinductive types that includes
polymorphic and dependent coinductive types and adapt advanced
type-theoretic methods that are used for tackling general
(non-structural) recursion. This is the approach taken by
Bertot~\cite{bertot.05} and is implementable in \coq as all the
machinery that is necessary (polymorphic and dependent coinductive
types) already exist in \coq.
\subsection{Program Extraction}
Correctness is an important issue in the implementation of computable
analysis (which in practice currently mostly amounts to the
implementation of exact real arithmetic.)
There are two directions in which one can develop correct programs.
In one direction one starts by writing the concrete program and then
afterwards tries to establish its correctness, either using informal
reasoning, or by annotating the program with invariants and then
proving correctness conditions generated from that, or by refining the
types used in the program to be more informative, using a programming
language that supports dependent typing. In the other direction, one
starts very abstractly and then works towards a concrete program. The
methods of program refinement and program extraction both fall in this
second category.
With program extraction one starts from a \emph{formalisation} of some
mathematical theory, a representation of this theory in the computer
that has sufficient detail to allow the computer to establish the
correctness by {proof checking}. This formalisation then is
automatically transformed into a computer program which implements the
constructive content of this formalised theory. This is a direct
application of the realisability implementation of constructive logic.
Therefore to be able to extract a program from a formalised theory, in
general one needs to formalise the theory using constructive logic.
However, there also has been some work on extracting programs from
classical proofs~\cite{ber:buc:sch:01}. Program extraction has been
implemented in many systems, like PX~\cite{hay:nak:87}, Nuprl
~\cite{con:all:bro:cle:cre:har:how:kno:men:pan:sas:smi:86}, \coq
~\cite{coq:04,let:04}, Minlog~\cite{ben:ber:sch:sei:zub:98} and
Isabelle~\cite{nip:pau:wen:02}.
Note that to be able to do program extraction in a proof assistant,
the logic of the assistant does not need to be constructive:
Isabelle/HOL is based on a classical logic, but it supports program
extraction~\cite{ber:03}.
Because the Curry--Howard--de Bruijn isomorphism corresponds in a
natural way to a realisability interpretation, program extraction is
popular with proof assistants that implement type theory. In type
theory the proofs of a theorem already are lambda terms, which can be
seen as functional programs in a simple programming language.
Therefore in type theory program extraction is hardly more than
transforming one functional language into another functional language.
However, because not all computations in these lambda terms are
relevant for the final result of the program, a distinction is made
between informative and non-informative data-types.Then, when
extracting the program, all parts corresponding to non-informative
data-types will be removed.
Program extraction is a popular method for establishing the
correctness of implementations of computable analysis. Most proof
assistants have a formalisation of the theory of real numbers, and an
implementation of exact real arithmetic and computable analysis is
seen as an easy side product of this.
Program extraction is an attractive method, but it is unclear whether
extracted programs will have a competitive performance. For instance
the root finding program extracted from a \coq formalisation of the
intermediate value theorem turned out to be unusable in practice
~\cite{cru:geu:wie:04,cru:spi:03,cru:let:05}. Apparently if one wants
to extract a reasonable program, then one already needs to be aware of
the extraction process when writing the formalisation.
% For this
% reason it might be argued that it probably is easier to first write
% the program and then prove it correct, than to first formalise a proof
% and then extract a program from it.
\subsection{Constructive Analysis}\label{subsec:cm}
%Bas?
Constructive Analysis has had a major impact on various topics
described in this paper. It has its roots in the intuitionistic
mathematics of Brouwer~\cite{Brouwer,Heyting:1956}, which already
showed the strong connections between computability and topology even
before these fields were properly developed. Heyting then defined
formal rules for Brouwer's logic. The interpretation of
intuitionistic logic now goes by the name BHK, after Brouwer, Heyting
and Kolmogorov.
\begin{center}
\begin{table}[h]
\begin{tabular}{l|l}
To prove&one needs to\\ \hline
$A\wedge B$&prove $A$ and prove $B$\\
$A\vee B$& choose one and prove it\\
$A\to B$ &provide a method transforming a proof of $A$ into a proof of $B$\\
$\forall x A$&provide a construction $f$ such that $f(x)$ is a proof of
$A(x)$\\
$\exists x A$ & construct $t$ and prove $A(t)$\\
\end{tabular}
\caption{\textbf{BHK interpretation}}
\end{table}
\end{center}
When a precise theory of computations became available Kleene
developed his realisability interpretation to give a formal model for
intuitionistic logic. See Section~\ref{sec:realizability}. Kleene's
first interpretation did capture Brouwer's logic nicely, as explained
in section~\ref{real:CRM}, but did not capture Brouwer's theory of
choice sequences. This was solved by Kleene and
Vesley~\cite{Kleene/Vesley:1965}, using functions on Baire
space. As we
have seen this is the interpretation that also captures TTE.
As is well-known Brouwer contended that all total real functions are
continuous~\cite{Brouwer:domain}. A statement we can now see is
provable in many concrete computational interpretations. In 1967
Bishop~\cite{bishop.67} showed that although Brouwer's continuity
principle is an important guideline, one can do without this
assumption by just studying the continuous functions and ignoring any
other ones, whether they exist or not. In this way Bishop developed
major parts of modern analysis. It turns out that Bishop's mathematics is a
convenient generalisation
of both recursive and intuitionistic mathematics\footnote{and of
classical mathematics, but that's not the issue here.}. It can be
interpreted in both these computational models described above,
see~\cite{TvD,BridgesRichman}.
Bishop's model of computation is deliberately vague about the precise
notion of computation. It builds on a primitive notion of `operation'.
Martin-L\"of theory of types~\cite{martinlof.84} can be used as a
satisfactory theory of such operations. In fact, the usual way to
treat sets in type theory --- that is, using types modulo an
equivalence relation, called setoids~\cite{Hofmann:thesis} --- was
motivated by Bishop's work.
Finally we would like to refer to two recent monographs for more information
about
constructive
mathematics~\cite{CrosillaSchuster,BridgesVita}.
Moreover, this story is not complete without mentioning formal
topology~\cite{Sambin:1987,FourmanGrayson}. A proper description would
take us to far from exact arithmetic. However, let us just mention
that formal topology may be seen as a way to develop topology or
domain theory inside type theory~\cite{Sambin:domain,SambinVV}.
\section{Conclusion}
%?All
% We should have some kind of conclusion
% Should be heavily rewritten at the end.
We have presented some of the problems and solutions of exact real
arithmetic varying from concrete implementations, representation and
algorithms to various models for real computation. We then put these
models in a uniform framework using realisability, opening the door
for the use of type theoretic and coalgebraic constructions both in
computing and reasoning about these computations. We have indicated
that it is often natural to use constructive logic to reason about
these computations. \bibliography{typesreal}\bibliographystyle{plainnat}
\end{document}
% LocalWords: surjective intuitionistic surjectivity Effectivity
% LocalWords: compactification codomain Kleene Brouwer Boehm Boehm's
% LocalWords: homographic Edalat Brocot Kreckel Lambov reimplementation
% LocalWords: dcpo dcpos definedness poset Kreisel Lacombe Schoenfield Tsejtin
% LocalWords: Vesley Weihrauch bisimilar Birkedal combinatory Baire Bruijn
% LocalWords: Vesley's Troelstra maximality topos Hyland coinductively Heyting
% LocalWords: Automath Kolmogorov universa positivity Dybjer Hagino Mendler
% LocalWords: Geuvers Gimenez automatised Brouwer's