diff options
author | Mark Shinwell <mshinwell@janestreet.com> | 2015-11-30 09:29:04 +0000 |
---|---|---|
committer | Mark Shinwell <mshinwell@janestreet.com> | 2015-11-30 09:29:04 +0000 |
commit | 404883c1b97fcef1e1930d56937986119d1bff3d (patch) | |
tree | 330b39eb116f772ad14a881153d877381ba509f2 | |
parent | e5e21c7de4a7be1a435dd2b89db92f498bb1a8fe (diff) | |
parent | 6c90da49f1320de60e4ee8e936961928ec87a3b2 (diff) | |
download | ocaml-404883c1b97fcef1e1930d56937986119d1bff3d.tar.gz |
merge
190 files changed, 24660 insertions, 184 deletions
diff --git a/.travis-ci.sh b/.travis-ci.sh index aa37577ba9..1ecd8af3c2 100644 --- a/.travis-ci.sh +++ b/.travis-ci.sh @@ -10,26 +10,100 @@ # # ######################################################################### -case $XARCH in -i386) - ./configure - make world.opt - sudo make install - (cd testsuite && make all) - mkdir external-packages - cd external-packages - git clone git://github.com/ocaml/camlp4 - (cd camlp4 && ./configure && make && sudo make install) - git clone git://github.com/ocaml/opam - (cd opam && ./configure && make lib-ext && make && sudo make install) - git config --global user.email "some@name.com" - git config --global user.name "Some Name" - opam init -y -a git://github.com/ocaml/opam-repository - opam install -y oasis - # opam pin add -y utop git://github.com/diml/utop - ;; -*) - echo unknown arch - exit 1 - ;; +PREFIX=~/local + +BuildAndTest () { + case $XARCH in + i386) + echo<<EOF +------------------------------------------------------------------------ +This test builds the OCaml compiler distribution with your pull request, +runs its testsuite, and then tries to install some important OCaml softare +(currently camlp4, opam and oasis) on top of it. + +Failing to build the compiler distribution, or testsuite failures are +critical errors that must be understood and fixed before your pull +request can be merged. The later installation attempts try to run +bleeding-edge software, and failures can sometimes be out of your +control. +------------------------------------------------------------------------ +EOF + mkdir -p $PREFIX + ./configure --prefix $PREFIX + export PATH=$PREFIX/bin:$PATH + make world.opt + make install + (cd testsuite && make all) + mkdir external-packages + cd external-packages + git clone git://github.com/ocaml/camlp4 + (cd camlp4 && + ./configure --bindir=$PREFIX/bin --libdir=$PREFIX/lib/ocaml \ + --pkgdir=$PREFIX/lib/ocaml && \ + make && make install) + git clone git://github.com/ocaml/opam + (cd opam && ./configure --prefix $PREFIX &&\ + make lib-ext && make && make install) + git config --global user.email "some@name.com" + git config --global user.name "Some Name" + opam init -y -a git://github.com/ocaml/opam-repository + opam install -y oasis + # opam pin add -y utop git://github.com/diml/utop + ;; + *) + echo unknown arch + exit 1 + ;; + esac +} + +CheckChangesModified () { + echo<<EOF +------------------------------------------------------------------------ +This test checks that the Changes file has been modified by the pull +request. Most contributions should come with a message in the Changes +file, as described in our contributor documentation: + + https://github.com/ocaml/ocaml/blob/trunk/CONTRIBUTING.md#changelog + +Some very minor changes (typo fixes for example) may not need +a Changes entry, in which case it is acceptable for this test to fail. +------------------------------------------------------------------------ +EOF + # check that Changes has been modified + git diff $TRAVIS_COMMIT_RANGE --name-only --exit-code Changes > /dev/null \ + && exit 1 || echo pass +} + +CheckTestsuiteModified () { + echo<<EOF +------------------------------------------------------------------------ +This test checks that the OCaml testsuite has been modified by the +pull request. Any new feature should come with tests, bugs should come +with regression tests, and generally any change in behavior that can +be exercized by a test should come with a test or modify and existing +test. See our contributor documentation: + + https://github.com/ocaml/ocaml/blob/trunk/CONTRIBUTING.md#test-you-must + +Modifications that result in no change in observable behavior +(documentation contributions for example) can hardly be tested, in +which case it is acceptable for this test to fail. + +Note: the heuristic used by this test is extremely fragile; passing it +does *not* imply that your change is appropriately tested. +------------------------------------------------------------------------ +EOF + # check that at least a file in testsuite/ has been modified + git diff $TRAVIS_COMMIT_RANGE --name-only --exit-code testsuite > /dev/null \ + && exit 1 || echo pass +} + +case $CI_KIND in +build) BuildAndTest;; +changes) CheckChangesModified;; +tests) CheckTestsuiteModified;; +*) echo unknown CI kind + exit 1 + ;; esac diff --git a/.travis.yml b/.travis.yml index 93c47efa69..b0a323af8b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,7 +10,14 @@ # # ######################################################################### +sudo: false language: c script: bash -ex .travis-ci.sh -env: - - XARCH=i386 +matrix: + include: + - env: CI_KIND=build XARCH=i386 + - env: CI_KIND=changes + - env: CI_KIND=tests + allow_failures: + - env: CI_KIND=changes + - env: CI_KIND=tests @@ -60,6 +60,9 @@ Language features: - GPR#282: change short-paths penalty heuristic to assign the same cost to idents containing double underscores as to idents starting with an underscore (Thomas Refis, Leo White) +- GPR#273: allow to get the extension slot of an extension constructor + by writing [%extension_constructor <path>] + (Jérémie Dimino) Compilers: - PR#4800: better compilation of tuple assignment (Gabriel Scherer and @@ -119,7 +122,8 @@ Runtime system: - PR#6760: closures evaluated in the toplevel can now be marshalled (whitequark, review by Jacques-Henri Jourdan) - PR#6902, GPR#210: runtime emits a warning when finalizing an I/O channel - which is still open (Alain Frisch, review by Damien Doligez) + which is still open (Alain Frisch, review by Damien Doligez); + this is controlled by OCAMLRUNPARAM=W=1 or with Sys.enable_runtime_warnings. - Signal handling: for read-and-clear, use GCC/Clang atomic builtins if available. (Xavier Leroy) - PR#6910, GPR#224: marshaling (output_value, input_value, et al) @@ -170,8 +174,8 @@ Standard library: (report and fix by Jeremy Yallop) - GPR#265: new implementation of Queue avoiding Obj.magic (Jérémie Dimino) -- GPR#268: '%h' and '%H' modifiers for printf and scanf to support - floating-point numbers in hexadecimal notation +- GPR#268, GPR#303: '%h' and '%H' modifiers for printf and scanf to + support floating-point numbers in hexadecimal notation (Xavier Leroy, Benoît Vaugon) - GPR#272: Switch classify_float to [@@unboxed] (Alain Frisch) - Improve speed of classify_float by not going through fpclassify() @@ -225,6 +229,14 @@ OCamlbuild: - Changed OCamlbuild's license to LGPLv2 with static linking exception. - GPR#219: speedup target-already-built builds (ygrek) +- PR#6605, GPR#117: use ocamlfind, if available, to discover camlp4 path + (Vincent Laporte) + +Manual: +- GPR#302: The OCaml reference manual is now included in the manual/ + subdirectory of the main OCaml source repository. Contributions to + the manual are warmly welcome. + (François Bobot, review by Florian Angeletti) Bug fixes: - PR#3612: memory leak in bigarray read from file @@ -281,9 +293,12 @@ Bug fixes: - PR#6945 and GPR#227: protect Sys and Unix functions against string arguments containing the null character '\000' (c-cube and Xavier Leroy, report by Daniel Bünzli) +- PR#6946: Uncaught exception with wrong type for "%ignore" - PR#6954: Infinite loop in type checker with module aliases - PR#6972, GPR#276: 4.02.3 regression on documentation comments in .cmt files (Leo White, report by Olivier Andrieu) +- PR#6980: Assert failure from polymorphic variants and existentials +- PR#6981: Ctype.Unify(_) with associated functor arg refering to previous one - PR#6982: unexpected type error when packing a module alias - PR#6985: `module type of struct include Bar end exposes %s#row when Bar contains private row types @@ -325,6 +340,8 @@ Bug fixes: Mark Shinwell) - GPR#283: Fix memory leaks in intern.c when OOM is raised (Marc Lasson, review by Alain Frisch) +- GPR#313: Prevent quadratic cases in CSE + (Pierre Chambart, review by Xavier Leroy) Features wishes: - PR#4518, GPR#29: change location format for reporting errors in ocamldoc diff --git a/bytecomp/translcore.ml b/bytecomp/translcore.ml index 36a5b7851d..fd633e1191 100644 --- a/bytecomp/translcore.ml +++ b/bytecomp/translcore.ml @@ -828,6 +828,8 @@ and transl_exp0 e = Lprim(Pmakeblock(0, Immutable), transl_path e.exp_env path :: ll) end + | Texp_extension_constructor (_, path) -> + transl_path e.exp_env path | Texp_variant(l, arg) -> let tag = Btype.hash_variant l in begin match arg with diff --git a/byterun/io.c b/byterun/io.c index 17995267be..eff504d0e2 100644 --- a/byterun/io.c +++ b/byterun/io.c @@ -439,9 +439,29 @@ CAMLexport void caml_finalize_channel(value vchan) chan->name ); - unlink_channel(chan); - caml_stat_free(chan->name); - caml_stat_free(chan); + if (chan->max == NULL && chan->curr != chan->buff){ + /* + This is an unclosed out channel (chan->max == NULL) with a + non-empty buffer: keep it around so the OCaml [at_exit] function + gets a chance to flush it. We would want to simply flush the + channel now, but (i) flushing can raise exceptions, and (ii) it + is potentially a blocking operation. Both are forbidden in a + finalization function. + + Refs: + http://caml.inria.fr/mantis/view.php?id=6902 + https://github.com/ocaml/ocaml/pull/210 + */ + if (chan->name && caml_runtime_warnings_active()) + fprintf(stderr, + "[ocaml] (moreover, it has unflushed data)\n", + chan->name + ); + } else { + unlink_channel(chan); + caml_stat_free(chan->name); + caml_stat_free(chan); + } } static int compare_channel(value vchan1, value vchan2) diff --git a/byterun/misc.c b/byterun/misc.c index f4738bbeea..8191b83e70 100644 --- a/byterun/misc.c +++ b/byterun/misc.c @@ -182,7 +182,7 @@ CAMLexport char * caml_strconcat(int n, ...) /* Runtime warnings */ -uintnat caml_runtime_warnings = 1; +uintnat caml_runtime_warnings = 0; static int caml_runtime_warnings_first = 1; int caml_runtime_warnings_active(void) diff --git a/manual/LICENSE b/manual/LICENSE new file mode 100644 index 0000000000..21075822f3 --- /dev/null +++ b/manual/LICENSE @@ -0,0 +1,9 @@ +The present documentation is copyright Institut National de Recherche en Informatique et en Automatique (INRIA). + +The OCaml documentation and user's manual may be reproduced and distributed in whole or in part, subject to the following conditions: + +- The copyright notice above and this permission notice must be preserved complete on all complete or partial copies. +- Any translation or derivative work of the OCaml documentation and user's manual must be approved by the authors in writing before distribution. +- If you distribute the OCaml documentation and user's manual in part, instructions for obtaining the complete version of this manual must be included, and a means for obtaining a complete version provided. +- Small portions may be reproduced as illustrations for reviews or quotes in other works without this permission notice if proper citation is given. + diff --git a/manual/Makefile b/manual/Makefile new file mode 100644 index 0000000000..6ff0ca0f69 --- /dev/null +++ b/manual/Makefile @@ -0,0 +1,16 @@ +all: tools + cd manual; ${MAKE} all +# cd fpcl; ${MAKE} all + +clean: + cd manual; ${MAKE} clean + cd tools; ${MAKE} clean +# cd fpcl; ${MAKE} clean + +release: + cd manual; ${MAKE} release +# cd fpcl; ${MAKE} release + +.PHONY: tools +tools: + cd tools; ${MAKE} clean; ${MAKE} all diff --git a/manual/README.md b/manual/README.md new file mode 100644 index 0000000000..a0f8d6d55f --- /dev/null +++ b/manual/README.md @@ -0,0 +1,45 @@ +OCAML DOCUMENTATION +=================== + +Prerequisites +------------- + +- Any prerequisites required to build OCaml from sources. + +- The Unix editor 'ed', no longer installed by default on some systems. + +- A LaTeX installation. + +- The HeVeA LaTeX-to-HTML convertor (available in OPAM): + <http://hevea.inria.fr/> + +Note that you must make sure `hevea.sty` is installed into TeX properly. Your +package manager may not do this for you. Run `kpsewhich hevea.sty` to check. + + +Building +-------- + +0. Install the OCaml distribution. + +1. Run `make` in the manual. + +NB: If you already set `LD_LIBRARY_PATH` (OS X: `DYLD_LIBRARY_PATH`) + in you environnement don't forget to add + `otherlibs/unix:otherlibs/str` to it in an absolute way. + +Outputs +------- + +In the manual: + +- The HTML Manual is in directory `htmlman`. The main file is `index.html`. + +- The plain text manual is in direcory `textman` as file `manual.txt`. + +- The Info manual is in directory `infoman`. + +- The DVI manual is in directory `texstuff` as file `manual.dvi`. + +- The PDF manual is in directory `texstuff` as file `pdfmanual.pdf`. + diff --git a/manual/manual/.cvsignore b/manual/manual/.cvsignore new file mode 100644 index 0000000000..5b687cf734 --- /dev/null +++ b/manual/manual/.cvsignore @@ -0,0 +1,7 @@ +allfiles.tex +biblio.tex +foreword.tex +version.tex +warnings-help.etex +foreword.htex +manual.html diff --git a/manual/manual/.gitignore b/manual/manual/.gitignore new file mode 100644 index 0000000000..71605a704d --- /dev/null +++ b/manual/manual/.gitignore @@ -0,0 +1,8 @@ +allfiles.tex +biblio.tex +foreword.tex +version.tex +warnings.etex +warnings.tex +foreword.htex +manual.html diff --git a/manual/manual/Makefile b/manual/manual/Makefile new file mode 100644 index 0000000000..0a955d3072 --- /dev/null +++ b/manual/manual/Makefile @@ -0,0 +1,143 @@ +# $Id$ + +FILES=allfiles.tex biblio.tex foreword.tex version.tex warnings-help.etex +TEXINPUTS=.:..:../refman:../library:../cmds:../tutorials:../../styles: +TEXFONTS=../../styles: +RELEASE=$$HOME/release/$${RELEASENAME} +HEVEA=hevea +HACHA=hacha +INFO=-fix -exec xxdate.exe -info -w 79 +HTML=-fix -exec xxdate.exe -O +TEXT=-fix -exec xxdate.exe -text -w 79 +SRC = $(abspath ../../) + +export LD_LIBRARY_PATH ?= $(SRC)/otherlibs/unix/:$(SRC)/otherlibs/str/ +export DYLD_LIBRARY_PATH ?= $(SRC)/otherlibs/unix/:$(SRC)/otherlibs/str/ + +OCAMLDOC=$(SRC)/byterun/ocamlrun $(SRC)/ocamldoc/ocamldoc -hide Pervasives +MLIS=$(SRC)/stdlib/*.mli \ + $(SRC)/utils/*.mli \ + $(SRC)/parsing/*.mli \ + $(SRC)/otherlibs/bigarray/bigarray.mli \ + $(SRC)/otherlibs/dynlink/dynlink.mli \ + $(SRC)/otherlibs/graph/graphics.mli \ + $(SRC)/otherlibs/graph/graphicsX11.mli \ + $(SRC)/otherlibs/num/num.mli \ + $(SRC)/otherlibs/num/arith_status.mli \ + $(SRC)/otherlibs/num/big_int.mli \ + $(SRC)/otherlibs/num/ratio.mli \ + $(SRC)/otherlibs/str/*.mli \ + $(SRC)/otherlibs/systhreads/*.mli \ + $(SRC)/otherlibs/unix/*.mli + +manual: files + cd texstuff; \ + TEXINPUTS=$(TEXINPUTS) latex manual.tex + +labltk: cmds/browser.tex library/liblabltk.tex library/tk.mli + cd library; $(MAKE) Tk.tex RELEASEDIR=$(SRC) + cd texstuff; \ + TEXINPUTS=$(TEXINPUTS) latex labltk.tex + +index:: + cd texstuff && \ + ../../tools/fix_index.sh manual.idx && \ + makeindex manual.idx + cd texstuff; makeindex manual.kwd.idx + +pdfmanual: files + cd texstuff; \ + TEXINPUTS=$(TEXINPUTS) pdflatex pdfmanual.tex + +index:: + cd texstuff && \ + ../../tools/fix_index.sh pdfmanual.idx && \ + makeindex pdfmanual.idx + cd texstuff; makeindex pdfmanual.kwd.idx + +html: files + cd htmlman; \ + mkdir -p libref ; \ + $(OCAMLDOC) -colorize-code -sort -html \ + -d libref \ + -I $(SRC)/stdlib \ + -I $(SRC)/utils \ + -I $(SRC)/parsing \ + -I $(SRC)/otherlibs/bigarray \ + -I $(SRC)/otherlibs/dynlink \ + -I $(SRC)/otherlibs/graph \ + -I $(SRC)/otherlibs/num \ + -I $(SRC)/otherlibs/str \ + -I $(SRC)/otherlibs/systhreads \ + -I $(SRC)/otherlibs/unix \ + $(MLIS) ; \ + cp -f ../style.css libref ; \ + ${HEVEA} ${HTML} -I .. -I ../refman -I ../library -I ../cmds \ + -I ../tutorials -I ../../styles -I ../texstuff manual.hva \ + -e macros.tex ../manual.tex ; \ + ${HACHA} -tocter manual.html ; \ + + +info: files + cd infoman; rm -f ocaml.info*; \ + ${HEVEA} ${INFO} -o ocaml.info.body -I .. -I ../refman -I ../library \ + -I ../cmds -I ../tutorials -I ../../styles -I ../texstuff \ + ../manual.inf -e macros.tex ../manual.tex + cat manual.info.header infoman/ocaml.info.body > infoman/ocaml.info + cd infoman; rm -f ocaml.info.tmp; gzip -9 ocaml.info* + +text: files + cd textman; \ + ${HEVEA} ${TEXT} -I .. -I ../refman -I ../library -I ../cmds \ + -I ../tutorials -I ../../styles -I ../texstuff \ + ../manual.inf -e macros.tex ../manual.tex + +files: $(FILES) + cd refman; $(MAKE) all RELEASEDIR=$(SRC) + cd library; $(MAKE) all RELEASEDIR=$(SRC) + cd cmds; $(MAKE) all RELEASEDIR=$(SRC) + cd tutorials; $(MAKE) all RELEASEDIR=$(SRC) + +all: + $(MAKE) manual pdfmanual RELEASEDIR=$(SRC) + $(MAKE) manual pdfmanual RELEASEDIR=$(SRC) + $(MAKE) index RELEASEDIR=$(SRC) + $(MAKE) manual pdfmanual RELEASEDIR=$(SRC) + $(MAKE) html text info RELEASEDIR=$(SRC) + +clean: + rm -f $(FILES) + cd refman; $(MAKE) clean + cd library; $(MAKE) clean + cd cmds; $(MAKE) clean + cd tutorials; $(MAKE) clean + -rm -f texstuff/* + cd htmlman; rm -rf libref index.html manual*.html *.haux *.hind + cd textman; rm -f manual.txt *.haux *.hind + cd infoman; rm -f ocaml.info ocaml.info-* *.haux *.hind + rm -f warnings-help.etex + +release: + gzip < texstuff/manual.dvi > $(RELEASE)refman.dvi.gz + dvips -o '!gzip > $(RELEASE)refman.ps.gz' texstuff/manual.dvi + cp htmlman/manual.html $(RELEASE)refman.html + rm -f htmlman/manual.{html,haux,hmanual*,htoc} + tar zcf $(RELEASE)refman-html.tar.gz htmlman/*.* htmlman/libref + zip -8 $(RELEASE)refman-html.zip htmlman/*.* htmlman/libref/*.* + cp texstuff/pdfmanual.pdf $(RELEASE)refman.pdf + cp textman/manual.txt $(RELEASE)refman.txt + tar cf - infoman/ocaml.info* | gzip > $(RELEASE)refman.info.tar.gz + +.SUFFIXES: +.SUFFIXES: .tex .etex .htex + +.etex.tex: + ../tools/texquote2 < $*.etex > $*.tex + +version.tex: $(SRC)/VERSION + sed -n -e '1s/^\([0-9]*\.[0-9]*\).*$$/\\def\\ocamlversion{\1}/p' \ + $(SRC)/VERSION > version.tex + +warnings-help.etex: $(SRC)/utils/warnings.ml $(SRC)/ocamlc + $(SRC)/boot/ocamlrun $(SRC)/ocamlc -warn-help \ + | sed -e 's/^ *\([0-9A-Z][0-9]*\)\(.*\)/\\item[\1] \2/' >$@ diff --git a/manual/manual/allfiles.etex b/manual/manual/allfiles.etex new file mode 100644 index 0000000000..2cb4f1d09c --- /dev/null +++ b/manual/manual/allfiles.etex @@ -0,0 +1,105 @@ +\makeindex{\jobname} +\makeindex{\jobname.kwd} + +\setlength{\emergencystretch}{50pt} % pour que TeX resolve les overfull hbox lui-meme + +\begin{document} + +\thispagestyle{empty} +\begin{center} +~\vfill +\Huge The OCaml system \\ + release \ocamlversion \\[1cm] +\Large Documentation and user's manual \\[1cm] +\large Xavier Leroy, \\ + Damien Doligez, Alain Frisch, Jacques Garrigue, Didier Rémy and Jérôme Vouillon \\[1cm] + \today \\ + ~ +\vfill +\normalsize Copyright \copyright\ \number\year\ Institut National de + Recherche en Informatique et en Automatique +\end{center} +\cleardoublepage +\setcounter{page}{1} + + +\begin{htmlonly} +\begin{quote} +\rule{}{} +This manual is also available in +\ahref{http://caml.inria.fr/distrib/ocaml-\ocamlversion/ocaml-\ocamlversion-refman.pdf}{PDF}. +\ahref{http://caml.inria.fr/distrib/ocaml-\ocamlversion/ocaml-\ocamlversion-refman.ps.gz}{Postscript}, +\ahref{http://caml.inria.fr/distrib/ocaml-\ocamlversion/ocaml-\ocamlversion-refman.dvi.gz}{DVI}, +\ahref{http://caml.inria.fr/distrib/ocaml-\ocamlversion/ocaml-\ocamlversion-refman.txt}{plain text}, +as a +\ahref{http://caml.inria.fr/distrib/ocaml-\ocamlversion/ocaml-\ocamlversion-refman-html.tar.gz}{bundle of HTML files}, +and as a +\ahref{http://caml.inria.fr/distrib/ocaml-\ocamlversion/ocaml-\ocamlversion-refman.info.tar.gz}{bundle of Emacs Info files}. +\rule{}{} +\end{quote} +\end{htmlonly} + +\tableofcontents + +\input{foreword.tex} + +\part{An introduction to OCaml} +\label{p:tutorials} +\input{coreexamples.tex} +\input{moduleexamples.tex} +\input{objectexamples.tex} +\input{lablexamples.tex} +\input{advexamples.tex} + +\part{The OCaml language} +\label{p:refman} +\input{refman.tex} +\input{exten.tex} + +\part{The OCaml tools} +\label{p:commands} + +\input{comp.tex} +\input{top.tex} +\input{runtime.tex} +\input{native.tex} +\input{lexyacc.tex} +\input{depend.tex} +\input{browser.tex} +\input{ocamldoc.tex} +\input{debugger.tex} +\input{profil.tex} +\input{ocamlbuild.tex} +% \input emacs.tex +\input{intf-c.tex} + +\part{The OCaml library} +\label{p:library} +\input{core.tex} +\input{stdlib.tex} +\input{compilerlibs.tex} +\input{libunix.tex} +\input{libnum.tex} +\input{libstr.tex} +\input{libthreads.tex} +\input{libgraph.tex} +\input{libdynlink.tex} +\input{libbigarray.tex} + +\part{Appendix} +\label{p:appendix} + +\ifouthtml +\begin{links} +\item \ahref{libref/index_modules.html}{Index of modules} +\item \ahref{libref/index_module_types.html}{Index of module types} +\item \ahref{libref/index_types.html}{Index of types} +\item \ahref{libref/index_exceptions.html}{Index of exceptions} +\item \ahref{libref/index_values.html}{Index of values} +\end{links} +\else +\printindex{\jobname}{Index to the library} +\fi +\printindex{\jobname.kwd}{Index of keywords} + +\end{document} diff --git a/manual/manual/biblio.etex b/manual/manual/biblio.etex new file mode 100644 index 0000000000..dd4c26dd92 --- /dev/null +++ b/manual/manual/biblio.etex @@ -0,0 +1,240 @@ +\chapter{Further reading} + +For the interested reader, we list below some references to books and +reports related (sometimes loosely) to Caml Light. + +\section{Programming in ML} + +The books below are programming courses taught in ML. Their main goal +is to teach programming, not to describe ML in full details --- though +most contain fairly good introductions to the ML language. Some of +those books use the Standard ML dialect instead of the Caml dialect, +so you will have to keep in mind the differences in syntax and in +semantics. + +\begin{itemize} + +\item Pierre Weis and Xavier Leroy. {\it Le langage Caml.} +InterÉditions, 1993. + +The natural companion to this manual, provided you read French. This +book is a step-by-step introduction to programming in Caml, and +presents many realistic examples of Caml programs. + +\item Guy Cousineau and Michel Mauny. {\it Approche fonctionnelle de +la programmation}. Ediscience, 1995. + +Another Caml programming course written in French, with many original +examples. + +\item Lawrence C.\ Paulson. {\it ML for the working programmer.} +Cambridge University Press, 1991. + +A good introduction to programming in Standard ML. Develops a +theorem prover as a complete example. Contains a presentation of +the module system of Standard ML. + +\item Jeffrey D.\ Ullman. {\it Elements of ML programming.} +Prentice Hall, 1993. + +Another good introduction to programming in Standard ML. No realistic +examples, but a very detailed presentation of the language constructs. + +\item Ryan Stansifer. {\em ML primer.} Prentice-Hall, 1992. + +A short, but nice introduction to programming in Standard ML. + +\item Thérèse Accart Hardin and Véronique Donzeau-Gouge Viguié. {\em +Concepts et outils de la programmation. Du fonctionnel à +l'impératif avec Caml et Ada.} InterÉditions, 1992. + +A first course in programming, that first introduces the main programming +notions in Caml, then shows them underlying Ada. Intended for +beginners; slow-paced for the others. + +\item Rachel Harrison. {\em Abstract Data Types in Standard ML}. +John Wiley \& Sons, 1993. + +A presentation of Standard ML from the standpoint of abstract data +types. Uses intensively the Standard ML module system. + +\item Harold Abelson and Gerald Jay Sussman. +{\em Structure and Interpretation of Computer Programs.} The MIT +press, 1985. (French translation: {\em Structure et interprétation +des programmes informatiques}, InterÉditions, 1989.) + +An outstanding course on programming, taught in Scheme, the modern +dialect of Lisp. Well worth reading, even if you are more interested +in ML than in Lisp. + +\end{itemize} + +\section{Descriptions of ML dialects} + +The books and reports below are descriptions of various programming +languages from the ML family. They assume some familiarity with ML. + +\begin{itemize} + +\item Xavier Leroy and Pierre Weis. {\em Manuel de référence du +langage Caml.} InterÉditions, 1993. + +The French edition of the present reference manual and user's manual. + +\item Robert Harper. {\em Introduction to Standard ML.} Technical +report ECS-LFCS-86-14, University of Edinburgh, 1986. + +An overview of Standard ML, including the module system. Terse, but +still readable. + +\item Robin Milner, Mads Tofte and Robert Harper. {\em The definition +of Standard ML.} The MIT press, 1990. + +A complete formal definition of Standard ML, in the framework of +structured operational semantics. This book is probably the most +mathematically precise definition of a programming language ever +written. It is heavy on formalism and extremely terse, so +even readers who are thoroughly familiar with ML will have +major difficulties with it. + +\item Robin Milner and Mads Tofte. {\em Commentary on Standard ML.} +The MIT Press, 1991. + +A commentary on the book above, that attempts to explain the most +delicate parts and motivate the design choices. Easier to read than the +Definition, but still rather involving. + +\item Guy Cousineau and Gérard Huet. {\em The CAML primer.} Technical +report~122, INRIA, 1990. + +A short description of the original Caml system, from which Caml Light +has evolved. Some familiarity with Lisp is assumed. + +\item Pierre Weis et al. {\em The CAML reference manual, version +2.6.1.} Technical report~121, INRIA, 1990. + +The manual for the original Caml system, from which Caml Light +has evolved. + +\item Michael J.\ Gordon, Arthur J.\ Milner and Christopher P.\ Wadsworth. +{\em Edinburgh LCF.} Lecture Notes in Computer Science +volume~78, Springer-Verlag, 1979. + +This is the first published description of the ML language, at the +time when it was nothing more than the control language for the LCF +system, a theorem prover. This book is now obsolete, since the ML +language has much evolved since then; but it is still of historical +interest. + +\item Paul Hudak, Simon Peyton-Jones and Philip Wadler. {\em +Report on the programming language Haskell, version 1.1.} Technical +report, Yale University, 1991. + +Haskell is a purely functional language with lazy semantics that +shares many important points with ML (full functionality, polymorphic +typing), but has interesting features of its own (dynamic overloading, +also called type classes). + +\end{itemize} + +\section{Implementing functional programming languages} + +The references below are intended for those who are curious to learn +how a language like Caml Light is compiled and implemented. + +\begin{itemize} + +\item Xavier Leroy. {\em The ZINC experiment: an economical +implementation of the ML language.} Technical report~117, INRIA, 1990. +(Available by anonymous FTP on "ftp.inria.fr".) + +A description of the ZINC implementation, the prototype ML +implementation that has evolved into Caml Light. Large parts of this +report still apply to the current Caml Light system, in particular the +description of the execution model and abstract machine. Other parts +are now obsolete. Yet this report still gives a complete overview of the +implementation techniques used in Caml Light. + +\item Simon Peyton-Jones. {\em The implementation of functional +programming languages.} Prentice-Hall, 1987. (French translation: +{\em Mise en \oe uvre des langages fonctionnels de programmation}, +Masson, 1990.) + +An excellent description of the implementation of purely functional +languages with lazy semantics, using the technique known as graph +reduction. The part of the book that deals with the transformation +from ML to enriched lambda-calculus directly applies to Caml Light. +You will find a good description of how pattern-matching is compiled +and how types are inferred. The remainder of the book does not apply +directly to Caml Light, since Caml Light is not purely functional (it +has side-effects), has strict semantics, and does not use graph +reduction at all. + +\item Andrew W.\ Appel. {\em Compiling with continuations.} Cambridge +University Press, 1992. + +A complete description of an optimizing compiler for Standard ML, +based on an intermediate representation called continuation-passing +style. Shows how many advanced program optimizations can be applied to +ML. Not directly relevant to the Caml Light system, since Caml Light +does not use continuation-passing style at all, and makes little +attempts at optimizing programs. + +\end{itemize} + +\section{Applications of ML} + +The following reports show ML at work in various, sometimes +unexpected, areas. + +\begin{itemize} + +\item Emmanuel Chailloux and Guy Cousineau. {\em The MLgraph primer.} +Technical report 92-15, École Normale Supérieure, 1992. (Available by +anonymous FTP on "ftp.ens.fr".) +%, répertoire "biblio", fichier +% "liens-92-15.A4.300dpi.ps.Z".) + +Describes a Caml Light library that produces Postscript pictures +through high-level drawing functions. + +\item Xavier Leroy. {\em Programmation du système Unix en Caml Light.} +Technical report~147, INRIA, 1992. (Available by anonymous FTP on +"ftp.inria.fr".) +%, répertoire "INRIA/publication", fichier "RT-0147.ps.Z".) + +A Unix systems programming course, demonstrating the use of the Caml +Light library that gives access to Unix system calls. + +\item John H.\ Reppy. {\em Concurrent programming with events --- The +concurrent ML manual.} Cornell University, 1990. +(Available by anonymous FTP on "research.att.com".) +%, répertoire "dist/ml", fichier "CML-0.9.8.tar.Z".) + +Concurrent ML extends Standard ML of New Jersey with concurrent +processes that communicate through channels and events. + +\item Jeannette M. Wing, Manuel Faehndrich, J.\ Gregory Morrisett and +Scottt Nettles. {\em Extensions to Standard ML to support +transactions.} Technical report CMU-CS-92-132, Carnegie-Mellon +University, 1992. (Available by anonymous FTP on +"reports.adm.cs.cmu.edu".) +% , répertoire "1992", fichier "CMU-CS-92-132.ps".) + +How to integrate the basic database operations to Standard ML. + +\item Emden R.\ Gansner and John H.\ Reppy. {\em eXene.} Bell Labs, +1991. (Available by anonymous FTP on "research.att.com".) +%, répertoire "dist/ml", fichier "eXene-0.4.tar.Z".) + +An interface between Standard ML of New Jersey and the X Windows +windowing system. + +%% \item Daniel de Rauglaudre. {\em X toolkit in Caml Light.} INRIA, +%% 1992. (Included in the Caml Light distribution.) +%% % Disponible par FTP anonyme sur +%% % "ftp.inria.fr", répertoire "lang/caml-light", fichier "rt5.tar.Z".) +%% +%% An interface between Caml Light and the X Windows windowing system. + +\end{itemize} diff --git a/manual/manual/cmds/.cvsignore b/manual/manual/cmds/.cvsignore new file mode 100644 index 0000000000..81ccbe7105 --- /dev/null +++ b/manual/manual/cmds/.cvsignore @@ -0,0 +1,2 @@ +*.tex +*.htex diff --git a/manual/manual/cmds/.gitignore b/manual/manual/cmds/.gitignore new file mode 100644 index 0000000000..0d45900b3a --- /dev/null +++ b/manual/manual/cmds/.gitignore @@ -0,0 +1,3 @@ +*.tex +*.htex +warnings.etex diff --git a/manual/manual/cmds/Makefile b/manual/manual/cmds/Makefile new file mode 100644 index 0000000000..70136b1da4 --- /dev/null +++ b/manual/manual/cmds/Makefile @@ -0,0 +1,40 @@ +FILES=comp.tex top.tex runtime.tex native.tex lexyacc.tex intf-c.tex \ + depend.tex profil.tex debugger.tex browser.tex ocamldoc.tex \ + warnings-help.tex ocamlbuild.tex + +TRANSF=../../tools/transf +TEXQUOTE=../../tools/texquote2 +FORMAT=../../tools/format-intf + +all: $(FILES) + +clean:: + rm -f $(FILES) + rm -f *~ #*# + +.SUFFIXES: +.SUFFIXES: .tex .etex + +.etex.tex: + $(TEXQUOTE) < $*.etex > $*.tex + +ocamldoc.tex: ocamldoc.etex $(TRANSF) + $(TRANSF) < ocamldoc.etex | $(TEXQUOTE) > ocamldoc.tex + +top.tex: top.etex $(TRANSF) + $(TRANSF) < top.etex | $(TEXQUOTE) > top.tex + +intf-c.tex: intf-c.etex $(TRANSF) + $(TRANSF) < intf-c.etex | $(TEXQUOTE) > intf-c.tex + +lexyacc.tex: lexyacc.etex $(TRANSF) + $(TRANSF) < lexyacc.etex | $(TEXQUOTE) > lexyacc.tex + +debugger.tex: debugger.etex $(TRANSF) + $(TRANSF) < debugger.etex | $(TEXQUOTE) > debugger.tex + +warnings-help.etex: ../warnings-help.etex + cp ../warnings-help.etex . + +clean:: + rm -f warnings-help.etex diff --git a/manual/manual/cmds/browser.etex b/manual/manual/cmds/browser.etex new file mode 100644 index 0000000000..07bb802dda --- /dev/null +++ b/manual/manual/cmds/browser.etex @@ -0,0 +1,183 @@ +\chapter{The browser/editor (ocamlbrowser)} \label{c:browser} +\pdfchapter{The browser/editor (ocamlbrowser)} +%HEVEA\cutname{browser.html} + +This chapter describes OCamlBrowser, a source and compiled interface +browser, written using LablTk. This is a useful companion to the +programmer. + +Its functions are: +\begin{itemize} +\item navigation through OCaml's modules (using compiled interfaces). +\item source editing, type-checking, and browsing. +\item integrated OCaml shell, running as a subprocess. +\end{itemize} + +\section{Invocation} \label{s:browser-options} + +The browser is started by the command "ocamlbrowser", as follows: +\begin{alltt} + ocamlbrowser \var{options} +\end{alltt} + +The following command-line options are recognized by "ocamlbrowser". + +\begin{options} + +\item["-I" \var{directory}] +Add the given directory to the list of directories searched for +source and compiled files. By default, only the standard library +directory is searched. The standard library can also be changed by +setting the "OCAMLLIB" environment variable. + +\item["-nolabels"] +Ignore non-optional labels in types. Labels cannot be used in +applications, and parameter order becomes strict. + +\item["-oldui"] +Old multi-window interface. The default is now more like Smalltalk's +class browser. + +\item["-rectypes"] +Allow arbitrary recursive types during type-checking. By default, +only recursive types where the recursion goes through an object type +are supported. + +\item["-version"] +Print version string and exit. + +\item["-vnum"] +Print short version number and exit. + +\item["-w" \var{warning-list}] +Enable or disable warnings according to the argument \var{warning-list}. + +\end{options} + +Most options can also be modified inside the application by the {\bf + Modules - Path editor} and {\bf Compiler - Preferences} commands. +They are inherited when you start a toplevel shell. + +\section{Viewer} +This is the first window you get when you start OCamlBrowser. +It displays a search window, and the list of modules in the load path. +At the top a row of menus. + +\begin{itemize} +\item {\bf File - Open} and {\bf File - Editor} give access to the + editor. + +\item {\bf File - Shell} creates an OCaml subprocess in a shell. + +\item {\bf View - Show all defs} displays the signature of the currently + selected module. + +\item {\bf View - Search entry} shows/hides the search entry just + below the menu bar. + +\item {\bf Modules - Path editor} changes the load path. {\bf Modules + - Reset cache} rescans the load path and resets the module cache. + Do it if you recompile some interface, or get confused about what is + in the cache. + +\item {\bf Modules - Search symbol} allows searching a symbol either + by its name, like the bottom line of the viewer, or more + interestingly, by its type. {\bf Exact type} searches for a type + with exactly the same information as the pattern (variables match + only variables). {\bf Included type} allows giving only partial + information: the actual type may take more arguments and return more + results, and variables in the pattern match anything. In both cases, + argument and tuple order is irrelevant\footnote{To avoid + combinatorial explosion of the search space, optional arguments in + the actual type are ignored in the actual if (1) there are too many + of them, and (2) they do not appear explicitly in the pattern.}, + and unlabeled arguments in the pattern match any label. + +\item The {\bf Search entry} just below the menu bar allows one to + search for an identifier in all modules (wildcards ``?'' and ``*'' + allowed). If you choose the "type" option, the search is done by type + inclusion ({\em cf.} Search Symbol - Included type). + +\item The {\bf Close all} button is there to dismiss the windows + created by the Detach button. + By double-clicking on it you will quit the browser. + +\end{itemize} + +\section{Module browsing} + +You select a module in the leftmost box by either clicking on it or +pressing return when it is selected. Fast access is available in all +boxes pressing the first few letter of the desired name. +Double-clicking / double-return displays the whole signature for the +module. + +Defined identifiers inside the module are displayed in a box to the +right of the previous one. If you click on one, this will either +display its contents in another box (if this is a sub-module) or +display the signature for this identifier below. + +Signatures are clickable. Double clicking with the left mouse +button on an identifier in a signature brings you to its signature. +A single click on the right button pops up a menu displaying the +type declaration for the selected identifier. Its title, when +selectable, also brings you to its signature. + +At the bottom, a series of buttons, depending on the context. +\begin{itemize} +\item {\bf Detach} copies the currently displayed signature in a new window, + to keep it. +\item {\bf Impl} and {\bf Intf} bring you to the implementation or + interface of the currently displayed signature, if it is available. +\end{itemize} + +Control-S lets you search a string in the signature. + +\section{File editor} +You can edit files with it, if you're not yet used to emacs. Otherwise +you can use it as a browser, making occasional corrections. + +The {\bf Edit} menu contains commands for jump (C-g), search (C-s), +and sending the current phrase (or selection if some text is selected) +to a sub-shell (M-x). For this last option, you may choose the shell +via a dialog. + +Essential functions are in the {\bf Compiler} menu. + +\begin{itemize} +\item {\bf Preferences} opens a dialog to set internals of the editor + and type-checker. + +\item {\bf Lex} adds colors according to lexical categories. + +\item {\bf Typecheck} verifies typing, and memorizes to let one see an + expression's type by double-clicking on it. This is also valid for + interfaces. If an error occurs, the part of the interface preceding + the error is computed. + + After typechecking, pressing the right button pops up a menu that gives + the type of the pointed expression and, where applicable, provides + some links that can be followed. + +\item {\bf Clear errors} dismisses type-checker error messages and warnings. + +\item {\bf Signature} shows the signature of the current file + (after type checking). +\end{itemize} + +\section{Shell} +When you create a shell, a dialog is presented to you, letting you +choose which command you want to run, and the title of the shell (to +choose it in the Editor). + +%You may change the default command by setting the "OCAML" +%environment variable. + +The executed subshell is given the current load path. + +\begin{itemize} +\item {\bf File} use a source file or load a bytecode file. You may + also import the browser's path into the subprocess. +\item {\bf History} M-p and M-n browse up and down. +\item {\bf Signal} C-c interrupts, and you can also kill the subprocess. +\end{itemize} diff --git a/manual/manual/cmds/comp.etex b/manual/manual/cmds/comp.etex new file mode 100644 index 0000000000..b210892182 --- /dev/null +++ b/manual/manual/cmds/comp.etex @@ -0,0 +1,720 @@ +\chapter{Batch compilation (ocamlc)} \label{c:camlc} +\pdfchapter{Batch compilation (ocamlc)} +%HEVEA\cutname{comp.html} + +This chapter describes the OCaml batch compiler "ocamlc", +which compiles OCaml source files to bytecode object files and links +these object files to produce standalone bytecode executable files. +These executable files are then run by the bytecode interpreter +"ocamlrun". + +\section{Overview of the compiler} + +The "ocamlc" command has a command-line interface similar to the one of +most C compilers. It accepts several types of arguments and processes them +sequentially: + +\begin{itemize} +\item +Arguments ending in ".mli" are taken to be source files for +compilation unit interfaces. Interfaces specify the names exported by +compilation units: they declare value names with their types, define +public data types, declare abstract data types, and so on. From the +file \var{x}".mli", the "ocamlc" compiler produces a compiled interface +in the file \var{x}".cmi". + +\item +Arguments ending in ".ml" are taken to be source files for compilation +unit implementations. Implementations provide definitions for the +names exported by the unit, and also contain expressions to be +evaluated for their side-effects. From the file \var{x}".ml", the "ocamlc" +compiler produces compiled object bytecode in the file \var{x}".cmo". + +If the interface file \var{x}".mli" exists, the implementation +\var{x}".ml" is checked against the corresponding compiled interface +\var{x}".cmi", which is assumed to exist. If no interface +\var{x}".mli" is provided, the compilation of \var{x}".ml" produces a +compiled interface file \var{x}".cmi" in addition to the compiled +object code file \var{x}".cmo". The file \var{x}".cmi" produced +corresponds to an interface that exports everything that is defined in +the implementation \var{x}".ml". + +\item +Arguments ending in ".cmo" are taken to be compiled object bytecode. These +files are linked together, along with the object files obtained +by compiling ".ml" arguments (if any), and the OCaml standard +library, to produce a standalone executable program. The order in +which ".cmo" and ".ml" arguments are presented on the command line is +relevant: compilation units are initialized in that order at +run-time, and it is a link-time error to use a component of a unit +before having initialized it. Hence, a given \var{x}".cmo" file must come +before all ".cmo" files that refer to the unit \var{x}. + +\item +Arguments ending in ".cma" are taken to be libraries of object bytecode. +A library of object bytecode packs in a single file a set of object +bytecode files (".cmo" files). Libraries are built with "ocamlc -a" +(see the description of the "-a" option below). The object files +contained in the library are linked as regular ".cmo" files (see +above), in the order specified when the ".cma" file was built. The +only difference is that if an object file contained in a library is +not referenced anywhere in the program, then it is not linked in. + +\item +Arguments ending in ".c" are passed to the C compiler, which generates +a ".o" object file (".obj" under Windows). This object file is linked +with the program if the "-custom" flag is set (see the description of +"-custom" below). + +\item +Arguments ending in ".o" or ".a" (".obj" or ".lib" under Windows) +are assumed to be C object files and libraries. They are passed to the +C linker when linking in "-custom" mode (see the description of +"-custom" below). + +\item +Arguments ending in ".so" (".dll" under Windows) +are assumed to be C shared libraries (DLLs). During linking, they are +searched for external C functions referenced from the OCaml code, +and their names are written in the generated bytecode executable. +The run-time system "ocamlrun" then loads them dynamically at program +start-up time. + +\end{itemize} + +The output of the linking phase is a file containing compiled bytecode +that can be executed by the OCaml bytecode interpreter: +the command named "ocamlrun". If "a.out" is the name of the file +produced by the linking phase, the command +\begin{alltt} + ocamlrun a.out \nth{arg}{1} \nth{arg}{2} \ldots \nth{arg}{n} +\end{alltt} +executes the compiled code contained in "a.out", passing it as +arguments the character strings \nth{arg}{1} to \nth{arg}{n}. +(See chapter~\ref{c:runtime} for more details.) + +On most systems, the file produced by the linking +phase can be run directly, as in: +\begin{alltt} + ./a.out \nth{arg}{1} \nth{arg}{2} \ldots \nth{arg}{n} +\end{alltt} +The produced file has the executable bit set, and it manages to launch +the bytecode interpreter by itself. + +\section{Options}\label{s:comp-options} + +The following command-line options are recognized by "ocamlc". +The options "-pack", "-a", "-c" and "-output-obj" are mutually exclusive. + + +\begin{options} + +\item["-a"] +Build a library (".cma" file) with the object files (".cmo" files) +given on the command line, instead of linking them into an executable +file. The name of the library must be set with the "-o" option. + +If "-custom", "-cclib" or "-ccopt" options are passed on the command +line, these options are stored in the resulting ".cma" library. Then, +linking with this library automatically adds back the "-custom", +"-cclib" and "-ccopt" options as if they had been provided on the +command line, unless the "-noautolink" option is given. + +\item["-absname"] +Force error messages to show absolute paths for file names. + +\item["-annot"] +Dump detailed information about the compilation (types, bindings, +tail-calls, etc). The information for file \var{src}".ml" +is put into file \var{src}".annot". In case of a type error, dump +all the information inferred by the type-checker before the error. +The \var{src}".annot" file can be used with the emacs commands given in +"emacs/caml-types.el" to display types and other annotations +interactively. + +\item["-bin-annot"] +Dump detailed information about the compilation (types, bindings, +tail-calls, etc) in binary format. The information for file \var{src}".ml" +is put into file \var{src}".cmt". In case of a type error, dump +all the information inferred by the type-checker before the error. +The "*.cmt" files produced by "-bin-annot" contain more information +and are much more compact than the files produced by "-annot". + +\item["-c"] +Compile only. Suppress the linking phase of the +compilation. Source code files are turned into compiled files, but no +executable file is produced. This option is useful to +compile modules separately. + +\item["-cc" \var{ccomp}] +Use \var{ccomp} as the C linker when linking in ``custom runtime'' +mode (see the "-custom" option) +and as the C compiler for compiling ".c" source files. + +\item["-cclib" "-l"\var{libname}] +Pass the "-l"\var{libname} option to the C linker when linking in +``custom runtime'' mode (see the "-custom" option). This causes the +given C library to be linked with the program. + +\item["-ccopt" \var{option}] +Pass the given option to the C compiler and linker. When linking in +``custom runtime'' mode, for instance, +"-ccopt -L"\var{dir} causes the C linker to search for C libraries in +directory \var{dir}. (See the "-custom" option.) + +\item["-compat-32"] +Check that the generated bytecode executable can run on 32-bit +platforms and signal an error if it cannot. This is useful when +compiling bytecode on a 64-bit machine. + +\item["-config"] +Print the version number of "ocamlc" and a detailed summary of its +configuration, then exit. + +\item["-custom"] +Link in ``custom runtime'' mode. In the default linking mode, the +linker produces bytecode that is intended to be executed with the +shared runtime system, "ocamlrun". In the custom runtime mode, the +linker produces an output file that contains both the runtime system +and the bytecode for the program. The resulting file is larger, but it +can be executed directly, even if the "ocamlrun" command is not +installed. Moreover, the ``custom runtime'' mode enables static +linking of OCaml code with user-defined C functions, as described in +chapter~\ref{c:intf-c}. +\begin{unix} +Never use the "strip" command on executables produced by "ocamlc -custom", +this would remove the bytecode part of the executable. +\end{unix} + +\item["-dllib" "-l"\var{libname}] +Arrange for the C shared library "dll"\var{libname}".so" +("dll"\var{libname}".dll" under Windows) to be loaded dynamically +by the run-time system "ocamlrun" at program start-up time. + +\item["-dllpath" \var{dir}] +Adds the directory \var{dir} to the run-time search path for shared +C libraries. At link-time, shared libraries are searched in the +standard search path (the one corresponding to the "-I" option). +The "-dllpath" option simply stores \var{dir} in the produced +executable file, where "ocamlrun" can find it and use it as +described in section~\ref{s-ocamlrun-dllpath}. + +\item["-for-pack" \var{ident}] +This option is accepted for compatibility with "ocamlopt"; it does +nothing. + +\item["-g"] +Add debugging information while compiling and linking. This option is +required in order to be able to debug the program with "ocamldebug" +(see chapter~\ref{c:debugger}), and to produce stack backtraces when +the program terminates on an uncaught exception (see +section~\ref{ocamlrun-options}). + +\item["-i"] +Cause the compiler to print all defined names (with their inferred +types or their definitions) when compiling an implementation (".ml" +file). No compiled files (".cmo" and ".cmi" files) are produced. +This can be useful to check the types inferred by the +compiler. Also, since the output follows the syntax of interfaces, it +can help in writing an explicit interface (".mli" file) for a file: +just redirect the standard output of the compiler to a ".mli" file, +and edit that file to remove all declarations of unexported names. + +\item["-I" \var{directory}] +Add the given directory to the list of directories searched for +compiled interface files (".cmi"), compiled object code files +(".cmo"), libraries (".cma"), and C libraries specified with +"-cclib -lxxx". By default, the current directory is +searched first, then the standard library directory. Directories added +with "-I" are searched after the current directory, in the order in +which they were given on the command line, but before the standard +library directory. See also option "-nostdlib". + +If the given directory starts with "+", it is taken relative to the +standard library directory. For instance, "-I +labltk" adds the +subdirectory "labltk" of the standard library to the search path. + +\item["-impl" \var{filename}] +Compile the file \var{filename} as an implementation file, even if its +extension is not ".ml". + +\item["-intf" \var{filename}] +Compile the file \var{filename} as an interface file, even if its +extension is not ".mli". + +\item["-intf-suffix" \var{string}] +Recognize file names ending with \var{string} as interface files +(instead of the default ".mli"). + +\item["-labels"] +Labels are not ignored in types, labels may be used in applications, +and labelled parameters can be given in any order. This is the default. + +\item["-linkall"] +Force all modules contained in libraries to be linked in. If this +flag is not given, unreferenced modules are not linked in. When +building a library (option "-a"), setting the "-linkall" option forces all +subsequent links of programs involving that library to link all the +modules contained in the library. + +\item["-make-runtime"] +Build a custom runtime system (in the file specified by option "-o") +incorporating the C object files and libraries given on the command +line. This custom runtime system can be used later to execute +bytecode executables produced with the +"ocamlc -use-runtime" \var{runtime-name} option. +See section~\ref{s:custom-runtime} for more information. + +\item["-no-alias-deps"] +Do not record dependencies for module aliases. See +section~\ref{s:module-alias} for more information. + +\item["-no-app-funct"] +Deactivates the applicative behaviour of functors. With this option, +each functor application generates new types in its result and +applying the same functor twice to the same argument yields two +incompatible structures. + +\item["-noassert"] +Do not compile assertion checks. Note that the special form +"assert false" is always compiled because it is typed specially. +This flag has no effect when linking already-compiled files. + +\item["-noautolink"] +When linking ".cma" libraries, ignore "-custom", "-cclib" and "-ccopt" +options potentially contained in the libraries (if these options were +given when building the libraries). This can be useful if a library +contains incorrect specifications of C libraries or C options; in this +case, during linking, set "-noautolink" and pass the correct C +libraries and options on the command line. + +\item["-nolabels"] +Ignore non-optional labels in types. Labels cannot be used in +applications, and parameter order becomes strict. + +\item["-nostdlib"] +Do not include the standard library directory in the list of +directories searched for +compiled interface files (".cmi"), compiled object code files +(".cmo"), libraries (".cma"), and C libraries specified with +"-cclib -lxxx". See also option "-I". + +\item["-o" \var{exec-file}] +Specify the name of the output file produced by the compiler. The +default output name is "a.out" under Unix and "camlprog.exe" under +Windows. If the "-a" option is given, specify the name of the library +produced. If the "-pack" option is given, specify the name of the +packed object file produced. If the "-output-obj" option is given, +specify the name of the output file produced. If the "-c" option is +given, specify the name of the object file produced for the {\em next} +source file that appears on the command line. + +\item["-open" \var{Module}] +Opens the given module before processing the interface or +implementation files. If several "-open" options are given, +they are processed in order, just as if +the statements "open!" \var{Module1}";;" "..." "open!" \var{ModuleN}";;" +were added at the top of each file. + +\item["-output-obj"] +Cause the linker to produce a C object file instead of a bytecode +executable file. This is useful to wrap OCaml code as a C library, +callable from any C program. See chapter~\ref{c:intf-c}, +section~\ref{s:embedded-code}. The name of the output object file +must be set with the "-o" option. This +option can also be used to produce a C source file (".c" extension) or +a compiled shared/dynamic library (".so" extension, ".dll" under Windows). + +\item["-pack"] +Build a bytecode object file (".cmo" file) and its associated compiled +interface (".cmi") that combines the object +files given on the command line, making them appear as sub-modules of +the output ".cmo" file. The name of the output ".cmo" file must be +given with the "-o" option. For instance, +\begin{verbatim} + ocamlc -pack -o p.cmo a.cmo b.cmo c.cmo +\end{verbatim} +generates compiled files "p.cmo" and "p.cmi" describing a compilation +unit having three sub-modules "A", "B" and "C", corresponding to the +contents of the object files "a.cmo", "b.cmo" and "c.cmo". These +contents can be referenced as "P.A", "P.B" and "P.C" in the remainder +of the program. + +\item["-pp" \var{command}] +Cause the compiler to call the given \var{command} as a preprocessor +for each source file. The output of \var{command} is redirected to +an intermediate file, which is compiled. If there are no compilation +errors, the intermediate file is deleted afterwards. + +\item["-ppx" \var{command}] +After parsing, pipe the abstract syntax tree through the preprocessor +\var{command}. The module "Ast_mapper", described in +chapter~\ref{Ast-underscoremapper}, implements the external interface +of a preprocessor. + +\item["-principal"] +Check information path during type-checking, to make sure that all +types are derived in a principal way. When using labelled arguments +and/or polymorphic methods, this flag is required to ensure future +versions of the compiler will be able to infer types correctly, even +if internal algorithms change. +All programs accepted in "-principal" mode are also accepted in the +default mode with equivalent types, but different binary signatures, +and this may slow down type checking; yet it is a good idea to +use it once before publishing source code. + +\item["-rectypes"] +Allow arbitrary recursive types during type-checking. By default, +only recursive types where the recursion goes through an object type +are supported. Note that once you have created an interface using this +flag, you must use it again for all dependencies. + +\item["-runtime-variant" \var{suffix}] +Add the \var{suffix} string to the name of the runtime library used by +the program. Currently, only one such suffix is supported: "d", and +only if the OCaml compiler was configured with option +"-with-debug-runtime". This suffix gives the debug version of the +runtime, which is useful for debugging pointer problems in low-level +code such as C stubs. + +\item["-safe-string"] +Enforce the separation between types "string" and "bytes", +thereby making strings read-only. This will become the default in +a future version of OCaml. + +\item["-short-paths"] +When a type is visible under several module-paths, use the shortest +one when printing the type's name in inferred interfaces and error and +warning messages. + +\item["-strict-sequence"] +Force the left-hand part of each sequence to have type unit. + +\item["-strict-formats"] +Reject invalid formats that were accepted in legacy format +implementations. You should use this flag to detect and fix such +invalid formats, as they will be rejected by future OCaml versions. + +\item["-thread"] +Compile or link multithreaded programs, in combination with the +system "threads" library described in chapter~\ref{c:threads}. + +\item["-unsafe"] +Turn bound checking off for array and string accesses (the "v.(i)" and +"s.[i]" constructs). Programs compiled with "-unsafe" are therefore +slightly faster, but unsafe: anything can happen if the program +accesses an array or string outside of its bounds. + +\item["-unsafe-string"] +Identify the types "string" and "bytes", +thereby making strings writable. For reasons of backward compatibility, +this is the default setting for the moment, but this will change in a future +version of OCaml. + +\item["-use-runtime" \var{runtime-name}] +Generate a bytecode executable file that can be executed on the custom +runtime system \var{runtime-name}, built earlier with +"ocamlc -make-runtime" \var{runtime-name}. +See section~\ref{s:custom-runtime} for more information. + +\item["-v"] +Print the version number of the compiler and the location of the +standard library directory, then exit. + +\item["-verbose"] +Print all external commands before they are executed, in particular +invocations of the C compiler and linker in "-custom" mode. Useful to +debug C library problems. + +\item["-vmthread"] +Compile or link multithreaded programs, in combination with the +VM-level "threads" library described in chapter~\ref{c:threads}. + +\item["-version" or "-vnum"] +Print the version number of the compiler in short form (e.g. "3.11.0"), +then exit. + +\item["-w" \var{warning-list}] +Enable, disable, or mark as fatal the warnings specified by the argument +\var{warning-list}. +Each warning can be {\em enabled} or {\em disabled}, and each warning +can be {\em fatal} or {\em non-fatal}. +If a warning is disabled, it isn't displayed and doesn't affect +compilation in any way (even if it is fatal). If a warning is +enabled, it is displayed normally by the compiler whenever the source +code triggers it. If it is enabled and fatal, the compiler will also +stop with an error after displaying it. + +The \var{warning-list} argument is a sequence of warning specifiers, +with no separators between them. A warning specifier is one of the +following: + +\begin{options} +\item["+"\var{num}] Enable warning number \var{num}. +\item["-"\var{num}] Disable warning number \var{num}. +\item["@"\var{num}] Enable and mark as fatal warning number \var{num}. +\item["+"\var{num1}..\var{num2}] Enable warnings in the given range. +\item["-"\var{num1}..\var{num2}] Disable warnings in the given range. +\item["@"\var{num1}..\var{num2}] Enable and mark as fatal warnings in +the given range. +\item["+"\var{letter}] Enable the set of warnings corresponding to +\var{letter}. The letter may be uppercase or lowercase. +\item["-"\var{letter}] Disable the set of warnings corresponding to +\var{letter}. The letter may be uppercase or lowercase. +\item["@"\var{letter}] Enable and mark as fatal the set of warnings +corresponding to \var{letter}. The letter may be uppercase or +lowercase. +\item[\var{uppercase-letter}] Enable the set of warnings corresponding +to \var{uppercase-letter}. +\item[\var{lowercase-letter}] Disable the set of warnings corresponding +to \var{lowercase-letter}. +\end{options} + +Warning numbers and letters which are out of the range of warnings +that are currently defined are ignored. The warnings are as follows. +\begin{options} +\input{warnings-help.tex} +\end{options} + +The default setting is "-w +a-4-6-7-9-27-29-32..39-41..42-44-45". +It is displayed by "ocamlc -help". +Note that warnings 5 and 10 are not always triggered, depending on +the internals of the type checker. + +\item["-warn-error" \var{warning-list}] +Mark as fatal the warnings specified in the argument \var{warning-list}. +The compiler will stop with an error when one of these warnings is +emitted. The \var{warning-list} has the same meaning as for +the "-w" option: a "+" sign (or an uppercase letter) marks the +corresponding warnings as fatal, a "-" +sign (or a lowercase letter) turns them back into non-fatal warnings, and a +"@" sign both enables and marks as fatal the corresponding warnings. + +Note: it is not recommended to use warning sets (i.e. letters) as +arguments to "-warn-error" +in production code, because this can break your build when future versions +of OCaml add some new warnings. + +The default setting is "-warn-error -a" (all warnings are non-fatal). + +\item["-warn-help"] +Show the description of all available warning numbers. + +\item["-where"] +Print the location of the standard library, then exit. + +\item["-" \var{file}] +Process \var{file} as a file name, even if it starts with a dash ("-") +character. + +\item["-help" or "--help"] +Display a short usage summary and exit. +% +\end{options} + +\section{Modules and the file system} + +This short section is intended to clarify the relationship between the +names of the modules corresponding to compilation units and the names +of the files that contain their compiled interface and compiled +implementation. + +The compiler always derives the module name by taking the capitalized +base name of the source file (".ml" or ".mli" file). That is, it +strips the leading directory name, if any, as well as the ".ml" or +".mli" suffix; then, it set the first letter to uppercase, in order to +comply with the requirement that module names must be capitalized. +For instance, compiling the file "mylib/misc.ml" provides an +implementation for the module named "Misc". Other compilation units +may refer to components defined in "mylib/misc.ml" under the names +"Misc."\var{name}; they can also do "open Misc", then use unqualified +names \var{name}. + +The ".cmi" and ".cmo" files produced by the compiler have the same +base name as the source file. Hence, the compiled files always have +their base name equal (modulo capitalization of the first letter) to +the name of the module they describe (for ".cmi" files) or implement +(for ".cmo" files). + +When the compiler encounters a reference to a free module identifier +"Mod", it looks in the search path for a file named "Mod.cmi" or "mod.cmi" +and loads the compiled interface +contained in that file. As a consequence, renaming ".cmi" files is not +advised: the name of a ".cmi" file must always correspond to the name +of the compilation unit it implements. It is admissible to move them +to another directory, if their base name is preserved, and the correct +"-I" options are given to the compiler. The compiler will flag an +error if it loads a ".cmi" file that has been renamed. + +Compiled bytecode files (".cmo" files), on the other hand, can be +freely renamed once created. That's because the linker never attempts +to find by itself the ".cmo" file that implements a module with a +given name: it relies instead on the user providing the list of ".cmo" +files by hand. + +\section{Common errors} \label{s:comp-errors} + +This section describes and explains the most frequently encountered +error messages. + +\begin{options} + +\item[Cannot find file \var{filename}] +The named file could not be found in the current directory, nor in the +directories of the search path. The \var{filename} is either a +compiled interface file (".cmi" file), or a compiled bytecode file +(".cmo" file). If \var{filename} has the format \var{mod}".cmi", this +means you are trying to compile a file that references identifiers +from module \var{mod}, but you have not yet compiled an interface for +module \var{mod}. Fix: compile \var{mod}".mli" or \var{mod}".ml" +first, to create the compiled interface \var{mod}".cmi". + +If \var{filename} has the format \var{mod}".cmo", this +means you are trying to link a bytecode object file that does not +exist yet. Fix: compile \var{mod}".ml" first. + +If your program spans several directories, this error can also appear +because you haven't specified the directories to look into. Fix: add +the correct "-I" options to the command line. + +\item[Corrupted compiled interface \var{filename}] +The compiler produces this error when it tries to read a compiled +interface file (".cmi" file) that has the wrong structure. This means +something went wrong when this ".cmi" file was written: the disk was +full, the compiler was interrupted in the middle of the file creation, +and so on. This error can also appear if a ".cmi" file is modified after +its creation by the compiler. Fix: remove the corrupted ".cmi" file, +and rebuild it. + +\item[This expression has type \nth{t}{1}, but is used with type \nth{t}{2}] +This is by far the most common type error in programs. Type \nth{t}{1} is +the type inferred for the expression (the part of the program that is +displayed in the error message), by looking at the expression itself. +Type \nth{t}{2} is the type expected by the context of the expression; it +is deduced by looking at how the value of this expression is used in +the rest of the program. If the two types \nth{t}{1} and \nth{t}{2} are not +compatible, then the error above is produced. + +In some cases, it is hard to understand why the two types \nth{t}{1} and +\nth{t}{2} are incompatible. For instance, the compiler can report that +``expression of type "foo" cannot be used with type "foo"'', and it +really seems that the two types "foo" are compatible. This is not +always true. Two type constructors can have the same name, but +actually represent different types. This can happen if a type +constructor is redefined. Example: +\begin{verbatim} + type foo = A | B + let f = function A -> 0 | B -> 1 + type foo = C | D + f C +\end{verbatim} +This result in the error message ``expression "C" of type "foo" cannot +be used with type "foo"''. + +\item[The type of this expression, \var{t}, contains type variables + that cannot be generalized] +Type variables ("'a", "'b", \ldots) in a type \var{t} can be in either +of two states: generalized (which means that the type \var{t} is valid +for all possible instantiations of the variables) and not generalized +(which means that the type \var{t} is valid only for one instantiation +of the variables). In a "let" binding "let "\var{name}" = "\var{expr}, +the type-checker normally generalizes as many type variables as +possible in the type of \var{expr}. However, this leads to unsoundness +(a well-typed program can crash) in conjunction with polymorphic +mutable data structures. To avoid this, generalization is performed at +"let" bindings only if the bound expression \var{expr} belongs to the +class of ``syntactic values'', which includes constants, identifiers, +functions, tuples of syntactic values, etc. In all other cases (for +instance, \var{expr} is a function application), a polymorphic mutable +could have been created and generalization is therefore turned off for +all variables occurring in contravariant or non-variant branches of the +type. For instance, if the type of a non-value is "'a list" the +variable is generalizable ("list" is a covariant type constructor), +but not in "'a list -> 'a list" (the left branch of "->" is +contravariant) or "'a ref" ("ref" is non-variant). + +Non-generalized type variables in a type cause no difficulties inside +a given structure or compilation unit (the contents of a ".ml" file, +or an interactive session), but they cannot be allowed inside +signatures nor in compiled interfaces (".cmi" file), because they +could be used inconsistently later. Therefore, the compiler +flags an error when a structure or compilation unit defines a value +\var{name} whose type contains non-generalized type variables. There +are two ways to fix this error: +\begin{itemize} +\item Add a type constraint or a ".mli" file to give a monomorphic +type (without type variables) to \var{name}. For instance, instead of +writing +\begin{verbatim} + let sort_int_list = Sort.list (<) + (* inferred type 'a list -> 'a list, with 'a not generalized *) +\end{verbatim} +write +\begin{verbatim} + let sort_int_list = (Sort.list (<) : int list -> int list);; +\end{verbatim} +\item If you really need \var{name} to have a polymorphic type, turn +its defining expression into a function by adding an extra parameter. +For instance, instead of writing +\begin{verbatim} + let map_length = List.map Array.length + (* inferred type 'a array list -> int list, with 'a not generalized *) +\end{verbatim} +write +\begin{verbatim} + let map_length lv = List.map Array.length lv +\end{verbatim} +\end{itemize} + +\item[Reference to undefined global \var{mod}] +This error appears when trying to link an incomplete or incorrectly +ordered set of files. Either you have forgotten to provide an +implementation for the compilation unit named \var{mod} on the command line +(typically, the file named \var{mod}".cmo", or a library containing +that file). Fix: add the missing ".ml" or ".cmo" file to the command +line. Or, you have provided an implementation for the module named +\var{mod}, but it comes too late on the command line: the +implementation of \var{mod} must come before all bytecode object files +that reference \var{mod}. Fix: change the order of ".ml" and ".cmo" +files on the command line. + +Of course, you will always encounter this error if you have mutually +recursive functions across modules. That is, function "Mod1.f" calls +function "Mod2.g", and function "Mod2.g" calls function "Mod1.f". +In this case, no matter what permutations you perform on the command +line, the program will be rejected at link-time. Fixes: +\begin{itemize} +\item Put "f" and "g" in the same module. +\item Parameterize one function by the other. +That is, instead of having +\begin{verbatim} +mod1.ml: let f x = ... Mod2.g ... +mod2.ml: let g y = ... Mod1.f ... +\end{verbatim} +define +\begin{verbatim} +mod1.ml: let f g x = ... g ... +mod2.ml: let rec g y = ... Mod1.f g ... +\end{verbatim} +and link "mod1.cmo" before "mod2.cmo". +\item Use a reference to hold one of the two functions, as in : +\begin{verbatim} +mod1.ml: let forward_g = + ref((fun x -> failwith "forward_g") : <type>) + let f x = ... !forward_g ... +mod2.ml: let g y = ... Mod1.f ... + let _ = Mod1.forward_g := g +\end{verbatim} +\end{itemize} + +\item[The external function \var{f} is not available] +This error appears when trying to link code that calls external +functions written in C. As explained in +chapter~\ref{c:intf-c}, such code must be linked with C libraries that +implement the required \var{f} C function. If the C libraries in +question are not shared libraries (DLLs), the code must be linked in +``custom runtime'' mode. Fix: add the required C libraries to the +command line, and possibly the "-custom" option. + +\end{options} + diff --git a/manual/manual/cmds/debugger.etex b/manual/manual/cmds/debugger.etex new file mode 100644 index 0000000000..bc3f6b0db7 --- /dev/null +++ b/manual/manual/cmds/debugger.etex @@ -0,0 +1,667 @@ +\chapter{The debugger (ocamldebug)} \label{c:debugger} +\pdfchapter{The debugger (ocamldebug)} +%HEVEA\cutname{debugger.html} + +This chapter describes the OCaml source-level replay debugger +"ocamldebug". + +\begin{unix} The debugger is available on Unix systems that provide +BSD sockets. +\end{unix} + +\begin{windows} The debugger is available under the Cygwin port of +OCaml, but not under the native Win32 ports. +\end{windows} + +\section{Compiling for debugging} + +Before the debugger can be used, the program must be compiled and +linked with the "-g" option: all ".cmo" and ".cma" files that are part +of the program should have been created with "ocamlc -g", and they +must be linked together with "ocamlc -g". + +Compiling with "-g" entails no penalty on the running time of +programs: object files and bytecode executable files are bigger and +take longer to produce, but the executable files run at +exactly the same speed as if they had been compiled without "-g". + +\section{Invocation} + +\subsection{Starting the debugger} + +The OCaml debugger is invoked by running the program +"ocamldebug" with the name of the bytecode executable file as first +argument: +\begin{alltt} + ocamldebug \optvar{options} \var{program} \optvar{arguments} +\end{alltt} +The arguments following \var{program} are optional, and are passed as +command-line arguments to the program being debugged. (See also the +"set arguments" command.) + +The following command-line options are recognized: +\begin{options} +\item["-c " \var{count}] +Set the maximum number of simultaneously live checkpoints to \var{count}. + +\item["-cd " \var{dir}] +Run the debugger program from the working directory \var{dir}, +instead of the current directory. (See also the "cd" command.) + +\item["-emacs"] +Tell the debugger it is executed under Emacs. (See +section~\ref{s:inf-debugger} for information on how to run the +debugger under Emacs.) + +\item["-I "\var{directory}] +Add \var{directory} to the list of directories searched for source +files and compiled files. (See also the "directory" command.) + +\item["-s "\var{socket}] +Use \var{socket} for communicating with the debugged program. See the +description of the command "set socket" (section~\ref{s:communication}) +for the format of \var{socket}. + +\item["-version"] +Print version string and exit. + +\item["-vnum"] +Print short version number and exit. + +\item["-help" or "--help"] +Display a short usage summary and exit. +% +\end{options} + +\subsection{Exiting the debugger} + +The command "quit" exits the debugger. You can also exit the debugger +by typing an end-of-file character (usually "ctrl-D"). + +Typing an interrupt character (usually "ctrl-C") will not exit the +debugger, but will terminate the action of any debugger command that is in +progress and return to the debugger command level. + +\section{Commands} \label{s:debugger-commands} + +A debugger command is a single line of input. It starts with a command +name, which is followed by arguments depending on this name. Examples: +\begin{verbatim} + run + goto 1000 + set arguments arg1 arg2 +\end{verbatim} + +A command name can be truncated as long as there is no ambiguity. For +instance, "go 1000" is understood as "goto 1000", since there are no +other commands whose name starts with "go". For the most frequently +used commands, ambiguous abbreviations are allowed. For instance, "r" +stands for "run" even though there are others commands starting with +"r". You can test the validity of an abbreviation using the "help" command. + +If the previous command has been successful, a blank line (typing just +"RET") will repeat it. + +\subsection{Getting help} + +The OCaml debugger has a simple on-line help system, which gives +a brief description of each command and variable. + +\begin{options} +\item["help"] +Print the list of commands. + +\item["help "\var{command}] +Give help about the command \var{command}. + +\item["help set "\var{variable}, "help show "\var{variable}] +Give help about the variable \var{variable}. The list of all debugger +variables can be obtained with "help set". + +\item["help info "\var{topic}] +Give help about \var{topic}. Use "help info" to get a list of known topics. +\end{options} + +\subsection{Accessing the debugger state} + +\begin{options} +\item["set "\var{variable} \var{value}] +Set the debugger variable \var{variable} to the value \var{value}. + +\item["show "\var{variable}] +Print the value of the debugger variable \var{variable}. + +\item["info "\var{subject}] +Give information about the given subject. +For instance, "info breakpoints" will print the list of all breakpoints. +\end{options} + +\section{Executing a program} + +\subsection{Events} + +Events are ``interesting'' locations in the source code, corresponding +to the beginning or end of evaluation of ``interesting'' +sub-expressions. Events are the unit of single-stepping (stepping goes +to the next or previous event encountered in the program execution). +Also, breakpoints can only be set at events. Thus, events play the +role of line numbers in debuggers for conventional languages. + +During program execution, a counter is incremented at each event +encountered. The value of this counter is referred as the {\em current +time}. Thanks to reverse execution, it is possible to jump back and +forth to any time of the execution. + +Here is where the debugger events (written \event) are located in +the source code: +\begin{itemize} +\item Following a function application: +\begin{alltt} +(f arg)\event +\end{alltt} +\item On entrance to a function: +\begin{alltt} +fun x y z -> \event ... +\end{alltt} +\item On each case of a pattern-matching definition (function, +"match"\ldots"with" construct, "try"\ldots"with" construct): +\begin{alltt} +function pat1 -> \event expr1 + | ... + | patN -> \event exprN +\end{alltt} +\item Between subexpressions of a sequence: +\begin{alltt} +expr1; \event expr2; \event ...; \event exprN +\end{alltt} +\item In the two branches of a conditional expression: +\begin{alltt} +if cond then \event expr1 else \event expr2 +\end{alltt} +\item At the beginning of each iteration of a loop: +\begin{alltt} +while cond do \event body done +for i = a to b do \event body done +\end{alltt} +\end{itemize} +Exceptions: A function application followed by a function return is replaced +by the compiler by a jump (tail-call optimization). In this case, no +event is put after the function application. +% Also, no event is put after a function application when the function +% is external (written in C). + +\subsection{Starting the debugged program} + +The debugger starts executing the debugged program only when needed. +This allows setting breakpoints or assigning debugger variables before +execution starts. There are several ways to start execution: +\begin{options} +\item["run"] Run the program until a breakpoint is hit, or the program +terminates. +\item["goto 0"] Load the program and stop on the first event. +\item["goto "\var{time}] Load the program and execute it until the +given time. Useful when you already know approximately at what time +the problem appears. Also useful to set breakpoints on function values +that have not been computed at time 0 (see section~\ref{s:breakpoints}). +\end{options} + +The execution of a program is affected by certain information it +receives when the debugger starts it, such as the command-line +arguments to the program and its working directory. The debugger +provides commands to specify this information ("set arguments" and "cd"). +These commands must be used before program execution starts. If you try +to change the arguments or the working directory after starting your +program, the debugger will kill the program (after asking for confirmation). + +\subsection{Running the program} + +The following commands execute the program forward or backward, +starting at the current time. The execution will stop either when +specified by the command or when a breakpoint is encountered. + +\begin{options} +\item["run"] Execute the program forward from current time. Stops at +next breakpoint or when the program terminates. +\item["reverse"] Execute the program backward from current time. +Mostly useful to go to the last breakpoint encountered before the +current time. +\item["step "\optvar{count}] Run the program and stop at the next +event. With an argument, do it \var{count} times. If \var{count} is 0, +run until the program terminates or a breakpoint is hit. +\item["backstep "\optvar{count}] Run the program backward and stop at +the previous event. With an argument, do it \var{count} times. +\item["next "\optvar{count}] Run the program and stop at the next +event, skipping over function calls. With an argument, do it +\var{count} times. +\item["previous "\optvar{count}] Run the program backward and stop at +the previous event, skipping over function calls. With an argument, do +it \var{count} times. +\item["finish"] Run the program until the current function returns. +\item["start"] Run the program backward and stop at the first event +before the current function invocation. +\end{options} + +\subsection{Time travel} + +You can jump directly to a given time, without stopping on +breakpoints, using the "goto" command. + +As you move through the program, the debugger maintains an history of +the successive times you stop at. The "last" command can be used to +revisit these times: each "last" command moves one step back through +the history. That is useful mainly to undo commands such as "step" +and "next". + +\begin{options} +\item["goto "\var{time}] +Jump to the given time. +\item["last "\optvar{count}] +Go back to the latest time recorded in the execution history. With an +argument, do it \var{count} times. +\item["set history "\var{size}] +Set the size of the execution history. +\end{options} + +\subsection{Killing the program} + +\begin{options} +\item["kill"] Kill the program being executed. This command is mainly +useful if you wish to recompile the program without leaving the debugger. +\end{options} + +\section{Breakpoints} \label{s:breakpoints} + +A breakpoint causes the program to stop whenever a certain point in +the program is reached. It can be set in several ways using the +"break" command. Breakpoints are assigned numbers when set, for +further reference. The most comfortable way to set breakpoints is +through the Emacs interface (see section~\ref{s:inf-debugger}). + +\begin{options} +\item["break"] +Set a breakpoint at the current position in the program execution. The +current position must be on an event (i.e., neither at the beginning, +nor at the end of the program). + +\item["break "\var{function}] +Set a breakpoint at the beginning of \var{function}. This works only +when the functional value of the identifier \var{function} has been +computed and assigned to the identifier. Hence this command cannot be +used at the very beginning of the program execution, when all +identifiers are still undefined; use "goto" \var{time} to advance +execution until the functional value is available. + +\item["break \@" \optvar{module} \var{line}] +Set a breakpoint in module \var{module} (or in the current module if +\var{module} is not given), at the first event of line \var{line}. + +\item["break \@" \optvar{module} \var{line} \var{column}] +Set a breakpoint in module \var{module} (or in the current module if +\var{module} is not given), at the event closest to line \var{line}, +column \var{column}. + +\item["break \@" \optvar{module} "#" \var{character}] +Set a breakpoint in module \var{module} at the event closest to +character number \var{character}. + +\item["break "\var{address}] +Set a breakpoint at the code address \var{address}. + +\item["delete "\optvar{breakpoint-numbers}] +Delete the specified breakpoints. Without argument, all breakpoints +are deleted (after asking for confirmation). + +\item["info breakpoints"] Print the list of all breakpoints. +\end{options} + +\section{The call stack} + +Each time the program performs a function application, it saves the +location of the application (the return address) in a block of data +called a stack frame. The frame also contains the local variables of +the caller function. All the frames are allocated in a region of +memory called the call stack. The command "backtrace" (or "bt") +displays parts of the call stack. + +At any time, one of the stack frames is ``selected'' by the debugger; several +debugger commands refer implicitly to the selected frame. In particular, +whenever you ask the debugger for the value of a local variable, the +value is found in the selected frame. The commands "frame", "up" and "down" +select whichever frame you are interested in. + +When the program stops, the debugger automatically selects the +currently executing frame and describes it briefly as the "frame" +command does. + +\begin{options} +\item["frame"] +Describe the currently selected stack frame. + +\item["frame" \var{frame-number}] +Select a stack frame by number and describe it. The frame currently +executing when the program stopped has number 0; its caller has number +1; and so on up the call stack. + +\item["backtrace "\optvar{count}, "bt "\optvar{count}] +Print the call stack. This is useful to see which sequence of function +calls led to the currently executing frame. With a positive argument, +print only the innermost \var{count} frames. +With a negative argument, print only the outermost -\var{count} frames. + +\item["up" \optvar{count}] +Select and display the stack frame just ``above'' the selected frame, +that is, the frame that called the selected frame. An argument says how +many frames to go up. + +\item["down "\optvar{count}] +Select and display the stack frame just ``below'' the selected frame, +that is, the frame that was called by the selected frame. An argument +says how many frames to go down. +\end{options} + +\section{Examining variable values} + +The debugger can print the current value of simple expressions. The +expressions can involve program variables: all the identifiers that +are in scope at the selected program point can be accessed. + +Expressions that can be printed are a subset of OCaml +expressions, as described by the following grammar: +\begin{syntax} +simple-expr: + lowercase-ident + | { capitalized-ident '.' } lowercase-ident + | '*' + | '$' integer + | simple-expr '.' lowercase-ident + | simple-expr '.(' integer ')' + | simple-expr '.[' integer ']' + | '!' simple-expr + | '(' simple-expr ')' +\end{syntax} +The first two cases refer to a value identifier, either unqualified or +qualified by the path to the structure that define it. +"*" refers to the result just computed (typically, the value of a +function application), and is valid only if the selected event is an +``after'' event (typically, a function application). +@'$' integer@ refer to a previously printed value. The remaining four +forms select part of an expression: respectively, a record field, an +array element, a string element, and the current contents of a +reference. + +\begin{options} +\item["print "\var{variables}] +Print the values of the given variables. "print" can be abbreviated as +"p". +\item["display "\var{variables}] +Same as "print", but limit the depth of printing to 1. Useful to +browse large data structures without printing them in full. +"display" can be abbreviated as "d". +\end{options} + +When printing a complex expression, a name of the form "$"\var{integer} +is automatically assigned to its value. Such names are also assigned +to parts of the value that cannot be printed because the maximal +printing depth is exceeded. Named values can be printed later on +with the commands "p $"\var{integer} or "d $"\var{integer}. +Named values are valid only as long as the program is stopped. They +are forgotten as soon as the program resumes execution. + +\begin{options} +\item["set print_depth" \var{d}] +Limit the printing of values to a maximal depth of \var{d}. +\item["set print_length" \var{l}] +Limit the printing of values to at most \var{l} nodes printed. +\end{options} + +\section{Controlling the debugger} + +\subsection{Setting the program name and arguments} + +\begin{options} +\item["set program" \var{file}] +Set the program name to \var{file}. +\item["set arguments" \var{arguments}] +Give \var{arguments} as command-line arguments for the program. +\end{options} + +A shell is used to pass the arguments to the debugged program. You can +therefore use wildcards, shell variables, and file redirections inside +the arguments. To debug programs that read from standard input, it is +recommended to redirect their input from a file (using +"set arguments < input-file"), otherwise input to the program and +input to the debugger are not properly separated, and inputs are not +properly replayed when running the program backwards. + +\subsection{How programs are loaded} + +The "loadingmode" variable controls how the program is executed. + +\begin{options} +\item["set loadingmode direct"] +The program is run directly by the debugger. This is the default mode. +\item["set loadingmode runtime"] +The debugger execute the OCaml runtime "ocamlrun" on the program. +Rarely useful; moreover it prevents the debugging of programs compiled +in ``custom runtime'' mode. +\item["set loadingmode manual"] +The user starts manually the program, when asked by the debugger. +Allows remote debugging (see section~\ref{s:communication}). +\end{options} + +\subsection{Search path for files} + +The debugger searches for source files and compiled interface files in +a list of directories, the search path. The search path initially +contains the current directory "." and the standard library directory. +The "directory" command adds directories to the path. + +Whenever the search path is modified, the debugger will clear any +information it may have cached about the files. + +\begin{options} +\item["directory" \var{directorynames}] +Add the given directories to the search path. These directories are +added at the front, and will therefore be searched first. + +\item["directory" \var{directorynames} "for" \var{modulename}] +Same as "directory" \var{directorynames}, but the given directories will be +searched only when looking for the source file of a module that has +been packed into \var{modulename}. + +\item["directory"] +Reset the search path. This requires confirmation. +\end{options} + +\subsection{Working directory} + +Each time a program is started in the debugger, it inherits its working +directory from the current working directory of the debugger. This +working directory is initially whatever it inherited from its parent +process (typically the shell), but you can specify a new working +directory in the debugger with the "cd" command or the "-cd" +command-line option. + +\begin{options} +\item["cd" \var{directory}] +Set the working directory for "ocamldebug" to \var{directory}. + +\item["pwd"] +Print the working directory for "ocamldebug". +\end{options} + +\subsection{Turning reverse execution on and off} + +In some cases, you may want to turn reverse execution off. This speeds +up the program execution, and is also sometimes useful for interactive +programs. + +Normally, the debugger takes checkpoints of the program state from +time to time. That is, it makes a copy of the current state of the +program (using the Unix system call "fork"). If the variable +\var{checkpoints} is set to "off", the debugger will not take any +checkpoints. + +\begin{options} +\item["set checkpoints" \var{on/off}] +Select whether the debugger makes checkpoints or not. +\end{options} + +\subsection{Communication between the debugger and the program} +\label{s:communication} + +The debugger communicate with the program being debugged through a +Unix socket. You may need to change the socket name, for example if +you need to run the debugger on a machine and your program on another. + +\begin{options} +\item["set socket" \var{socket}] +Use \var{socket} for communication with the program. \var{socket} can be +either a file name, or an Internet port specification +\var{host}:\var{port}, where \var{host} is a host name or an Internet +address in dot notation, and \var{port} is a port number on the host. +\end{options} + +On the debugged program side, the socket name is passed through the +"CAML_DEBUG_SOCKET" environment variable. + +\subsection{Fine-tuning the debugger} \label{s:fine-tuning} + +Several variables enables to fine-tune the debugger. Reasonable +defaults are provided, and you should normally not have to change them. + +\begin{options} +\item["set processcount" \var{count}] +Set the maximum number of checkpoints to \var{count}. More checkpoints +facilitate going far back in time, but use more memory and create more +Unix processes. +\end{options} + +As checkpointing is quite expensive, it must not be done too often. On +the other hand, backward execution is faster when checkpoints are +taken more often. In particular, backward single-stepping is more +responsive when many checkpoints have been taken just before the +current time. To fine-tune the checkpointing strategy, the debugger +does not take checkpoints at the same frequency for long displacements +(e.g. "run") and small ones (e.g. "step"). The two variables "bigstep" +and "smallstep" contain the number of events between two checkpoints +in each case. + +\begin{options} +\item["set bigstep" \var{count}] +Set the number of events between two checkpoints for long displacements. +\item["set smallstep" \var{count}] +Set the number of events between two checkpoints for small +displacements. +\end{options} + +The following commands display information on checkpoints and events: + +\begin{options} +\item["info checkpoints"] +Print a list of checkpoints. +\item["info events" \optvar{module}] +Print the list of events in the given module (the current module, by default). +\end{options} + +\subsection{User-defined printers} + +Just as in the toplevel system (section~\ref{s:toplevel-directives}), +the user can register functions for printing values of certain types. +For technical reasons, the debugger cannot call printing functions +that reside in the program being debugged. The code for the printing +functions must therefore be loaded explicitly in the debugger. + +\begin{options} +\item["load_printer \""\var{file-name}"\""] +Load in the debugger the indicated ".cmo" or ".cma" object file. The +file is loaded in an environment consisting only of the OCaml +standard library plus the definitions provided by object files +previously loaded using "load_printer". If this file depends on other +object files not yet loaded, the debugger automatically loads them if +it is able to find them in the search path. The loaded file does not +have direct access to the modules of the program being debugged. + +\item["install_printer "\var{printer-name}] +Register the function named \var{printer-name} (a +value path) as a printer for objects whose types match the argument +type of the function. That is, the debugger will call +\var{printer-name} when it has such an object to print. +The printing function \var{printer-name} must use the "Format" library +module to produce its output, otherwise its output will not be +correctly located in the values printed by the toplevel loop. + +The value path \var{printer-name} must refer to one of the functions +defined by the object files loaded using "load_printer". It cannot +reference the functions of the program being debugged. + +\item["remove_printer "\var{printer-name}] +Remove the named function from the table of value printers. +\end{options} + +\section{Miscellaneous commands} + +\begin{options} +\item["list" \optvar{module} \optvar{beginning} \optvar{end}] +List the source of module \var{module}, from line number +\var{beginning} to line number \var{end}. By default, 20 lines of the +current module are displayed, starting 10 lines before the current +position. +\item["source" \var{filename}] +Read debugger commands from the script \var{filename}. +\end{options} + +\section{Running the debugger under Emacs} \label{s:inf-debugger} + +The most user-friendly way to use the debugger is to run it under Emacs. +See the file "emacs/README" in the distribution for information on how +to load the Emacs Lisp files for OCaml support. + +The OCaml debugger is started under Emacs by the command "M-x +camldebug", with argument the name of the executable file +\var{progname} to debug. Communication with the debugger takes place +in an Emacs buffer named "*camldebug-"\var{progname}"*". The editing +and history facilities of Shell mode are available for interacting +with the debugger. + +In addition, Emacs displays the source files containing the current +event (the current position in the program execution) and highlights +the location of the event. This display is updated synchronously with +the debugger action. + +The following bindings for the most common debugger commands are +available in the "*camldebug-"\var{progname}"*" buffer: + +\begin{options} +\item["C-c C-s"] (command "step"): execute the program one step forward. +\item["C-c C-k"] (command "backstep"): execute the program one step backward. +\item["C-c C-n"] (command "next"): execute the program one step +forward, skipping over function calls. +\item[Middle mouse button] (command "display"): display named value. +"$"\var{n} under mouse cursor (support incremental browsing of large +data structures). +\item["C-c C-p"] (command "print"): print value of identifier at point. +\item["C-c C-d"] (command "display"): display value of identifier at point. +\item["C-c C-r"] (command "run"): execute the program forward to next +breakpoint. +\item["C-c C-v"] (command "reverse"): execute the program backward to +latest breakpoint. +\item["C-c C-l"] (command "last"): go back one step in the command history. +\item["C-c C-t"] (command "backtrace"): display backtrace of function calls. +\item["C-c C-f"] (command "finish"): run forward till the current +function returns. +\item["C-c <"] (command "up"): select the stack frame below the +current frame. +\item["C-c >"] (command "down"): select the stack frame above the +current frame. +\end{options} + +In all buffers in OCaml editing mode, the following debugger commands +are also available: + +\begin{options} +\item["C-x C-a C-b"] (command "break"): set a breakpoint at event closest +to point +\item["C-x C-a C-p"] (command "print"): print value of identifier at point +\item["C-x C-a C-d"] (command "display"): display value of identifier at point +\end{options} diff --git a/manual/manual/cmds/depend.etex b/manual/manual/cmds/depend.etex new file mode 100644 index 0000000000..28d780b9f4 --- /dev/null +++ b/manual/manual/cmds/depend.etex @@ -0,0 +1,144 @@ +\chapter{Dependency generator (ocamldep)} \label{c:camldep} +\pdfchapter{Dependency generator (ocamldep)} +%HEVEA\cutname{depend.html} + +The "ocamldep" command scans a set of OCaml source files +(".ml" and ".mli" files) for references to external compilation units, +and outputs dependency lines in a format suitable for the "make" +utility. This ensures that "make" will compile the source files in the +correct order, and recompile those files that need to when a source +file is modified. + +The typical usage is: +\begin{alltt} + ocamldep \var{options} *.mli *.ml > .depend +\end{alltt} +where "*.mli *.ml" expands to all source files in the current +directory and ".depend" is the file that should contain the +dependencies. (See below for a typical "Makefile".) + +Dependencies are generated both for compiling with the bytecode +compiler "ocamlc" and with the native-code compiler "ocamlopt". + +The "ocamlbuild" compilation manager (see chapter~\ref{c:ocamlbuild}) +provide a higher-level, more automated alternative to the combination +of "make" and "ocamldep". + +\section{Options} + +The following command-line options are recognized by "ocamldep". + +\begin{options} + +\item["-I" \var{directory}] +Add the given directory to the list of directories searched for +source files. If a source file "foo.ml" mentions an external +compilation unit "Bar", a dependency on that unit's interface +"bar.cmi" is generated only if the source for "bar" is found in the +current directory or in one of the directories specified with "-I". +Otherwise, "Bar" is assumed to be a module from the standard library, +and no dependencies are generated. For programs that span multiple +directories, it is recommended to pass "ocamldep" the same "-I" options +that are passed to the compiler. + +\item["-ml-synonym" \var{.ext}] +Consider the given extension (with leading dot) to be a synonym for .ml. + +\item["-mli-synonym" \var{.ext}] +Consider the given extension (with leading dot) to be a synonym for .mli. + +\item["-modules"] +Output raw dependencies of the form +\begin{verbatim} + filename: Module1 Module2 ... ModuleN +\end{verbatim} +where "Module1", \ldots, "ModuleN" are the names of the compilation +units referenced within the file "filename", but these names are not +resolved to source file names. Such raw dependencies cannot be used +by "make", but can be post-processed by other tools such as "Omake". + +\item["-native"] +Generate dependencies for a pure native-code program (no bytecode +version). When an implementation file (".ml" file) has no explicit +interface file (".mli" file), "ocamldep" generates dependencies on the +bytecode compiled file (".cmo" file) to reflect interface changes. +This can cause unnecessary bytecode recompilations for programs that +are compiled to native-code only. The flag "-native" causes +dependencies on native compiled files (".cmx") to be generated instead +of on ".cmo" files. (This flag makes no difference if all source files +have explicit ".mli" interface files.) + +\item["-pp" \var{command}] +Cause "ocamldep" to call the given \var{command} as a preprocessor +for each source file. + +\item["-slash"] +Under Windows, use a forward slash (/) as the path separator instead +of the usual backward slash ($\backslash$). Under Unix, this option does +nothing. + +\item["-version"] +Print version string and exit. + +\item["-vnum"] +Print short version number and exit. + +\item["-help" or "--help"] +Display a short usage summary and exit. +% +\end{options} + +\section{A typical Makefile} + +Here is a template "Makefile" for a OCaml program. + +\begin{verbatim} +OCAMLC=ocamlc +OCAMLOPT=ocamlopt +OCAMLDEP=ocamldep +INCLUDES= # all relevant -I options here +OCAMLFLAGS=$(INCLUDES) # add other options for ocamlc here +OCAMLOPTFLAGS=$(INCLUDES) # add other options for ocamlopt here + +# prog1 should be compiled to bytecode, and is composed of three +# units: mod1, mod2 and mod3. + +# The list of object files for prog1 +PROG1_OBJS=mod1.cmo mod2.cmo mod3.cmo + +prog1: $(PROG1_OBJS) + $(OCAMLC) -o prog1 $(OCAMLFLAGS) $(PROG1_OBJS) + +# prog2 should be compiled to native-code, and is composed of two +# units: mod4 and mod5. + +# The list of object files for prog2 +PROG2_OBJS=mod4.cmx mod5.cmx + +prog2: $(PROG2_OBJS) + $(OCAMLOPT) -o prog2 $(OCAMLFLAGS) $(PROG2_OBJS) + +# Common rules +.SUFFIXES: .ml .mli .cmo .cmi .cmx + +.ml.cmo: + $(OCAMLC) $(OCAMLFLAGS) -c $< + +.mli.cmi: + $(OCAMLC) $(OCAMLFLAGS) -c $< + +.ml.cmx: + $(OCAMLOPT) $(OCAMLOPTFLAGS) -c $< + +# Clean up +clean: + rm -f prog1 prog2 + rm -f *.cm[iox] + +# Dependencies +depend: + $(OCAMLDEP) $(INCLUDES) *.mli *.ml > .depend + +include .depend +\end{verbatim} + diff --git a/manual/manual/cmds/intf-c.etex b/manual/manual/cmds/intf-c.etex new file mode 100644 index 0000000000..6c9a81b508 --- /dev/null +++ b/manual/manual/cmds/intf-c.etex @@ -0,0 +1,2284 @@ +\chapter{Interfacing\label{c:intf-c} C with OCaml} +\pdfchapterfold{-9}{Interfacing C with OCaml} +%HEVEA\cutname{intfc.html} + +This chapter describes how user-defined primitives, written in C, can +be linked with OCaml code and called from OCaml functions, and how +these C functions can call back to OCaml code. + +\section{Overview and compilation information} +\pdfsection{Overview and compilation information} + +\subsection{Declaring primitives} + +\begin{syntax} +definition: ... + | 'external' value-name ':' typexpr '=' external-declaration +; +external-declaration: string-literal [ string-literal [ string-literal ] ] +\end{syntax} + +User primitives are declared in an implementation file or +@"struct"\ldots"end"@ module expression using the @"external"@ keyword: +\begin{alltt} + external \var{name} : \var{type} = \var{C-function-name} +\end{alltt} +This defines the value name \var{name} as a function with type +\var{type} that executes by calling the given C function. +For instance, here is how the "input" primitive is declared in the +standard library module "Pervasives": +\begin{verbatim} + external input : in_channel -> bytes -> int -> int -> int + = "input" +\end{verbatim} +Primitives with several arguments are always curried. The C function +does not necessarily have the same name as the ML function. + +External functions thus defined can be specified in interface files or +@"sig"\ldots"end"@ signatures either as regular values +\begin{alltt} + val \var{name} : \var{type} +\end{alltt} +thus hiding their implementation as C functions, or explicitly as +``manifest'' external functions +\begin{alltt} + external \var{name} : \var{type} = \var{C-function-name} +\end{alltt} +The latter is slightly more efficient, as it allows clients of the +module to call directly the C function instead of going through the +corresponding OCaml function. On the other hand, it should not be used +in library modules if they have side-effects at toplevel, as this +direct call interferes with the linker's algorithm for removing unused +modules from libraries at link-time. + +The arity (number of arguments) of a primitive is automatically +determined from its OCaml type in the "external" declaration, by +counting the number of function arrows in the type. For instance, +"input" above has arity 4, and the "input" C function is called with +four arguments. Similarly, +\begin{verbatim} + external input2 : in_channel * bytes * int * int -> int = "input2" +\end{verbatim} +has arity 1, and the "input2" C function receives one argument (which +is a quadruple of OCaml values). + +Type abbreviations are not expanded when determining the arity of a +primitive. For instance, +\begin{verbatim} + type int_endo = int -> int + external f : int_endo -> int_endo = "f" + external g : (int -> int) -> (int -> int) = "f" +\end{verbatim} +"f" has arity 1, but "g" has arity 2. This allows a primitive to +return a functional value (as in the "f" example above): just remember +to name the functional return type in a type abbreviation. + +The language accepts external declarations with one or two +flag strings in addition to the C function's name. These flags are +reserved for the implementation of the standard library. + +\subsection{Implementing primitives} + +User primitives with arity $n \leq 5$ are implemented by C functions +that take $n$ arguments of type "value", and return a result of type +"value". The type "value" is the type of the representations for OCaml +values. It encodes objects of several base types (integers, +floating-point numbers, strings,~\ldots) as well as OCaml data +structures. The type "value" and the associated conversion +functions and macros are described in detail below. For instance, +here is the declaration for the C function implementing the "input" +primitive: +\begin{verbatim} +CAMLprim value input(value channel, value buffer, value offset, value length) +{ + ... +} +\end{verbatim} +When the primitive function is applied in an OCaml program, the C +function is called with the values of the expressions to which the +primitive is applied as arguments. The value returned by the function is +passed back to the OCaml program as the result of the function +application. + +User primitives with arity greater than 5 should be implemented by two +C functions. The first function, to be used in conjunction with the +bytecode compiler "ocamlc", receives two arguments: a pointer to an +array of OCaml values (the values for the arguments), and an +integer which is the number of arguments provided. The other function, +to be used in conjunction with the native-code compiler "ocamlopt", +takes its arguments directly. For instance, here are the two C +functions for the 7-argument primitive "Nat.add_nat": +\begin{verbatim} +CAMLprim value add_nat_native(value nat1, value ofs1, value len1, + value nat2, value ofs2, value len2, + value carry_in) +{ + ... +} +CAMLprim value add_nat_bytecode(value * argv, int argn) +{ + return add_nat_native(argv[0], argv[1], argv[2], argv[3], + argv[4], argv[5], argv[6]); +} +\end{verbatim} +The names of the two C functions must be given in the primitive +declaration, as follows: +\begin{alltt} + external \var{name} : \var{type} = + \var{bytecode-C-function-name} \var{native-code-C-function-name} +\end{alltt} +For instance, in the case of "add_nat", the declaration is: +\begin{verbatim} + external add_nat: nat -> int -> int -> nat -> int -> int -> int -> int + = "add_nat_bytecode" "add_nat_native" +\end{verbatim} + +Implementing a user primitive is actually two separate tasks: on the +one hand, decoding the arguments to extract C values from the given +OCaml values, and encoding the return value as an OCaml +value; on the other hand, actually computing the result from the arguments. +Except for very simple primitives, it is often preferable to have two +distinct C functions to implement these two tasks. The first function +actually implements the primitive, taking native C values as +arguments and returning a native C value. The second function, +often called the ``stub code'', is a simple wrapper around the first +function that converts its arguments from OCaml values to C values, +call the first function, and convert the returned C value to OCaml +value. For instance, here is the stub code for the "input" +primitive: +\begin{verbatim} +CAMLprim value input(value channel, value buffer, value offset, value length) +{ + return Val_long(getblock((struct channel *) channel, + &Byte(buffer, Long_val(offset)), + Long_val(length))); +} +\end{verbatim} +(Here, "Val_long", "Long_val" and so on are conversion macros for the +type "value", that will be described later. The "CAMLprim" macro +expands to the required compiler directives to ensure that the +function is exported and accessible from OCaml.) +The hard work is performed by the function "getblock", which is +declared as: +\begin{verbatim} +long getblock(struct channel * channel, char * p, long n) +{ + ... +} +\end{verbatim} + +To write C code that operates on OCaml values, the following +include files are provided: +\begin{tableau}{|l|p{12cm}|}{Include file}{Provides} +\entree{"caml/mlvalues.h"}{definition of the "value" type, and conversion +macros} +\entree{"caml/alloc.h"}{allocation functions (to create structured OCaml +objects)} +\entree{"caml/memory.h"}{miscellaneous memory-related functions +and macros (for GC interface, in-place modification of structures, etc).} +\entree{"caml/fail.h"}{functions for raising exceptions +(see section~\ref{s:c-exceptions})} +\entree{"caml/callback.h"}{callback from C to OCaml (see +section~\ref{s:callback}).} +\entree{"caml/custom.h"}{operations on custom blocks (see +section~\ref{s:custom}).} +\entree{"caml/intext.h"}{operations for writing user-defined +serialization and deserialization functions for custom blocks +(see section~\ref{s:custom}).} +\entree{"caml/threads.h"}{operations for interfacing in the presence + of multiple threads (see section~\ref{s:C-multithreading}).} +\end{tableau} +These files reside in the "caml/" subdirectory of the OCaml +standard library directory, which is returned by the command +"ocamlc -where" (usually "/usr/local/lib/ocaml" or "/usr/lib/ocaml"). + +{\bf Note:} It is recommended to define the macro "CAML_NAME_SPACE" +before including these header files. If you do not define it, the +header files will also define short names (without the "caml_" prefix) +for most functions, which usually produce clashes with names defined +by other C libraries that you might use. Including the header files +without "CAML_NAME_SPACE" is only supported for backward +compatibility. + +\subsection{Statically linking C code with OCaml code} +\label{staticlink-c-code} + +The OCaml runtime system comprises three main parts: the bytecode +interpreter, the memory manager, and a set of C functions that +implement the primitive operations. Some bytecode instructions are +provided to call these C functions, designated by their offset in a +table of functions (the table of primitives). + +In the default mode, the OCaml linker produces bytecode for the +standard runtime system, with a standard set of primitives. References +to primitives that are not in this standard set result in the +``unavailable C primitive'' error. (Unless dynamic loading of C +libraries is supported -- see section~\ref{dynlink-c-code} below.) + +In the ``custom runtime'' mode, the OCaml linker scans the +object files and determines the set of required primitives. Then, it +builds a suitable runtime system, by calling the native code linker with: +\begin{itemize} +\item the table of the required primitives; +\item a library that provides the bytecode interpreter, the +memory manager, and the standard primitives; +\item libraries and object code files (".o" files) mentioned on the +command line for the OCaml linker, that provide implementations +for the user's primitives. +\end{itemize} +This builds a runtime system with the required primitives. The OCaml +linker generates bytecode for this custom runtime system. The +bytecode is appended to the end of the custom runtime system, so that +it will be automatically executed when the output file (custom +runtime + bytecode) is launched. + +To link in ``custom runtime'' mode, execute the "ocamlc" command with: +\begin{itemize} +\item the "-custom" option; +\item the names of the desired OCaml object files (".cmo" and ".cma" files) ; +\item the names of the C object files and libraries (".o" and ".a" +files) that implement the required primitives. Under Unix and Windows, +a library named "lib"\var{name}".a" (respectively, ".lib") residing in one of +the standard library directories can also be specified as "-cclib -l"\var{name}. +\end{itemize} + +If you are using the native-code compiler "ocamlopt", the "-custom" +flag is not needed, as the final linking phase of "ocamlopt" always +builds a standalone executable. To build a mixed OCaml/C executable, +execute the "ocamlopt" command with: +\begin{itemize} +\item the names of the desired OCaml native object files (".cmx" and +".cmxa" files); +\item the names of the C object files and libraries (".o", ".a", +".so" or ".dll" files) that implement the required primitives. +\end{itemize} + +Starting with Objective Caml 3.00, it is possible to record the +"-custom" option as well as the names of C libraries in an OCaml +library file ".cma" or ".cmxa". For instance, consider an OCaml library +"mylib.cma", built from the OCaml object files "a.cmo" and "b.cmo", +which reference C code in "libmylib.a". If the library is +built as follows: +\begin{alltt} + ocamlc -a -o mylib.cma -custom a.cmo b.cmo -cclib -lmylib +\end{alltt} +users of the library can simply link with "mylib.cma": +\begin{alltt} + ocamlc -o myprog mylib.cma ... +\end{alltt} +and the system will automatically add the "-custom" and "-cclib +-lmylib" options, achieving the same effect as +\begin{alltt} + ocamlc -o myprog -custom a.cmo b.cmo ... -cclib -lmylib +\end{alltt} +The alternative is of course to build the library without extra +options: +\begin{alltt} + ocamlc -a -o mylib.cma a.cmo b.cmo +\end{alltt} +and then ask users to provide the "-custom" and "-cclib -lmylib" +options themselves at link-time: +\begin{alltt} + ocamlc -o myprog -custom mylib.cma ... -cclib -lmylib +\end{alltt} +The former alternative is more convenient for the final users of the +library, however. + +\subsection{Dynamically linking C code with OCaml code} +\label{dynlink-c-code} + +Starting with Objective Caml 3.03, an alternative to static linking of C code +using the "-custom" code is provided. In this mode, the OCaml linker +generates a pure bytecode executable (no embedded custom runtime +system) that simply records the names of dynamically-loaded libraries +containing the C code. The standard OCaml runtime system "ocamlrun" +then loads dynamically these libraries, and resolves references to the +required primitives, before executing the bytecode. + +This facility is currently supported and known to work well under +Linux, MacOS~X, and Windows. It is supported, but not +fully tested yet, under FreeBSD, Tru64, Solaris and Irix. It is not +supported yet under other Unixes. + +To dynamically link C code with OCaml code, the C code must first be +compiled into a shared library (under Unix) or DLL (under Windows). +This involves 1- compiling the C files with appropriate C compiler +flags for producing position-independent code (when required by the +operating system), and 2- building a +shared library from the resulting object files. The resulting shared +library or DLL file must be installed in a place where "ocamlrun" can +find it later at program start-up time (see +section~\ref{s-ocamlrun-dllpath}). +Finally (step 3), execute the "ocamlc" command with +\begin{itemize} +\item the names of the desired OCaml object files (".cmo" and ".cma" files) ; +\item the names of the C shared libraries (".so" or ".dll" files) that +implement the required primitives. Under Unix and Windows, +a library named "dll"\var{name}".so" (respectively, ".dll") residing +in one of the standard library directories can also be specified as +"-dllib -l"\var{name}. +\end{itemize} +Do {\em not} set the "-custom" flag, otherwise you're back to static linking +as described in section~\ref{staticlink-c-code}. +The "ocamlmklib" tool (see section~\ref{s-ocamlmklib}) +automates steps 2 and 3. + +As in the case of static linking, it is possible (and recommended) to +record the names of C libraries in an OCaml ".cma" library archive. +Consider again an OCaml library +"mylib.cma", built from the OCaml object files "a.cmo" and "b.cmo", +which reference C code in "dllmylib.so". If the library is +built as follows: +\begin{alltt} + ocamlc -a -o mylib.cma a.cmo b.cmo -dllib -lmylib +\end{alltt} +users of the library can simply link with "mylib.cma": +\begin{alltt} + ocamlc -o myprog mylib.cma ... +\end{alltt} +and the system will automatically add the "-dllib -lmylib" option, +achieving the same effect as +\begin{alltt} + ocamlc -o myprog a.cmo b.cmo ... -dllib -lmylib +\end{alltt} +Using this mechanism, users of the library "mylib.cma" do not need to +known that it references C code, nor whether this C code must be +statically linked (using "-custom") or dynamically linked. + +\subsection{Choosing between static linking and dynamic linking} + +After having described two different ways of linking C code with OCaml +code, we now review the pros and cons of each, to help developers of +mixed OCaml/C libraries decide. + +The main advantage of dynamic linking is that it preserves the +platform-independence of bytecode executables. That is, the bytecode +executable contains no machine code, and can therefore be compiled on +platform $A$ and executed on other platforms $B$, $C$, \ldots, as long +as the required shared libraries are available on all these +platforms. In contrast, executables generated by "ocamlc -custom" run +only on the platform on which they were created, because they embark a +custom-tailored runtime system specific to that platform. In +addition, dynamic linking results in smaller executables. + +Another advantage of dynamic linking is that the final users of the +library do not need to have a C compiler, C linker, and C runtime +libraries installed on their machines. This is no big deal under +Unix and Cygwin, but many Windows users are reluctant to install +Microsoft Visual C just to be able to do "ocamlc -custom". + +There are two drawbacks to dynamic linking. The first is that the +resulting executable is not stand-alone: it requires the shared +libraries, as well as "ocamlrun", to be installed on the machine +executing the code. If you wish to distribute a stand-alone +executable, it is better to link it statically, using "ocamlc -custom +-ccopt -static" or "ocamlopt -ccopt -static". Dynamic linking also +raises the ``DLL hell'' problem: some care must be taken to ensure +that the right versions of the shared libraries are found at start-up +time. + +The second drawback of dynamic linking is that it complicates the +construction of the library. The C compiler and linker flags to +compile to position-independent code and build a shared library vary +wildly between different Unix systems. Also, dynamic linking is not +supported on all Unix systems, requiring a fall-back case to static +linking in the Makefile for the library. The "ocamlmklib" command +(see section~\ref{s-ocamlmklib}) tries to hide some of these system +dependencies. + +In conclusion: dynamic linking is highly recommended under the native +Windows port, because there are no portability problems and it is much +more convenient for the end users. Under Unix, dynamic linking should +be considered for mature, frequently used libraries because it +enhances platform-independence of bytecode executables. For new or +rarely-used libraries, static linking is much simpler to set up in a +portable way. + +\subsection{Building standalone custom runtime systems} +\label{s:custom-runtime} + +It is sometimes inconvenient to build a custom runtime system each +time OCaml code is linked with C libraries, like "ocamlc -custom" does. +For one thing, the building of the runtime system is slow on some +systems (that have bad linkers or slow remote file systems); for +another thing, the platform-independence of bytecode files is lost, +forcing to perform one "ocamlc -custom" link per platform of interest. + +An alternative to "ocamlc -custom" is to build separately a custom +runtime system integrating the desired C libraries, then generate +``pure'' bytecode executables (not containing their own runtime +system) that can run on this custom runtime. This is achieved by the +"-make-runtime" and "-use-runtime" flags to "ocamlc". For example, +to build a custom runtime system integrating the C parts of the +``Unix'' and ``Threads'' libraries, do: +\begin{verbatim} + ocamlc -make-runtime -o /home/me/ocamlunixrun unix.cma threads.cma +\end{verbatim} +To generate a bytecode executable that runs on this runtime system, +do: +\begin{alltt} + ocamlc -use-runtime /home/me/ocamlunixrun -o myprog \char92 + unix.cma threads.cma {\it{your .cmo and .cma files}} +\end{alltt} +The bytecode executable "myprog" can then be launched as usual: +"myprog" \var{args} or "/home/me/ocamlunixrun myprog" \var{args}. + +Notice that the bytecode libraries "unix.cma" and "threads.cma" must +be given twice: when building the runtime system (so that "ocamlc" +knows which C primitives are required) and also when building the +bytecode executable (so that the bytecode from "unix.cma" and +"threads.cma" is actually linked in). + +\section{The \texttt{value} type} +\pdfsection{The value type} + +All OCaml objects are represented by the C type "value", +defined in the include file "caml/mlvalues.h", along with macros to +manipulate values of that type. An object of type "value" is either: +\begin{itemize} +\item an unboxed integer; +\item a pointer to a block inside the heap (such as the blocks +allocated through one of the \verb"caml_alloc_*" functions below); +\item a pointer to an object outside the heap (e.g., a pointer to a block +allocated by "malloc", or to a C variable). + %%% FIXME will change in 4.02.0 (?) +\end{itemize} + +\subsection{Integer values} + +Integer values encode 63-bit signed integers (31-bit on 32-bit +architectures). They are unboxed (unallocated). + +\subsection{Blocks} + +Blocks in the heap are garbage-collected, and therefore have strict +structure constraints. Each block includes a header containing the +size of the block (in words), and the tag of the block. +The tag governs how the contents of the blocks are structured. A tag +lower than "No_scan_tag" indicates a structured block, containing +well-formed values, which is recursively traversed by the garbage +collector. A tag greater than or equal to "No_scan_tag" indicates a +raw block, whose contents are not scanned by the garbage collector. +For the benefit of ad-hoc polymorphic primitives such as equality and +structured input-output, structured and raw blocks are further +classified according to their tags as follows: +\begin{tableau}{|l|p{10cm}|}{Tag}{Contents of the block} +\entree{0 to $\hbox{"No_scan_tag"}-1$}{A structured block (an array of +OCaml objects). Each field is a "value".} +\entree{"Closure_tag"}{A closure representing a functional value. The first +word is a pointer to a piece of code, the remaining words are +"value" containing the environment.} +\entree{"String_tag"}{A character string or a byte sequence.} +\entree{"Double_tag"}{A double-precision floating-point number.} +\entree{"Double_array_tag"}{An array or record of double-precision +floating-point numbers.} +\entree{"Abstract_tag"}{A block representing an abstract datatype.} +\entree{"Custom_tag"}{A block representing an abstract datatype + with user-defined finalization, comparison, hashing, + serialization and deserialization functions atttached.} +\end{tableau} + +\subsection{Pointers outside the heap} + +Any word-aligned pointer to an address outside the heap can be safely +cast to and from the type "value". This includes pointers returned by +"malloc", and pointers to C variables (of size at least one word) +obtained with the \verb'&' operator. + %%% FIXME will change in 4.02.0 (?) + +Caution: if a pointer returned by "malloc" is cast to the type "value" +and returned to OCaml, explicit deallocation of the pointer using +"free" is potentially dangerous, because the pointer may still be +accessible from the OCaml world. Worse, the memory space deallocated +by "free" can later be reallocated as part of the OCaml heap; the +pointer, formerly pointing outside the OCaml heap, now points inside +the OCaml heap, and this can crash the garbage collector. To avoid +these problems, it is preferable to wrap the pointer in a OCaml block +with tag "Abstract_tag" or "Custom_tag". + +\section{Representation of OCaml data types} +\pdfsection{Representation of OCaml data types} + +This section describes how OCaml data types are encoded in the +"value" type. + +\subsection{Atomic types} + +\begin{tableau}{|l|l|}{OCaml type}{Encoding} +\entree{"int"}{Unboxed integer values.} +\entree{"char"}{Unboxed integer values (ASCII code).} +\entree{"float"}{Blocks with tag "Double_tag".} +\entree{"bytes"}{Blocks with tag "String_tag".} +\entree{"string"}{Blocks with tag "String_tag".} +\entree{"int32"}{Blocks with tag "Custom_tag".} +\entree{"int64"}{Blocks with tag "Custom_tag".} +\entree{"nativeint"}{Blocks with tag "Custom_tag".} +\end{tableau} + +\subsection{Tuples and records} + +Tuples are represented by pointers to blocks, with tag~0. + +Records are also represented by zero-tagged blocks. The ordering of +labels in the record type declaration determines the layout of +the record fields: the value associated to the label +declared first is stored in field~0 of the block, the value associated +to the second label goes in field~1, and so on. + +As an optimization, records whose fields all have static type "float" +are represented as arrays of floating-point numbers, with tag +"Double_array_tag". (See the section below on arrays.) + +\subsection{Arrays} + +Arrays of integers and pointers are represented like tuples, +that is, as pointers to blocks tagged~0. They are accessed with the +"Field" macro for reading and the "caml_modify" function for writing. + +Arrays of floating-point numbers (type "float array") +have a special, unboxed, more efficient representation. +These arrays are represented by pointers to blocks with tag +"Double_array_tag". They should be accessed with the "Double_field" +and "Store_double_field" macros. + +\subsection{Concrete data types} + +Constructed terms are represented either by unboxed integers (for +constant constructors) or by blocks whose tag encode the constructor +(for non-constant constructors). The constant constructors and the +non-constant constructors for a given concrete type are numbered +separately, starting from 0, in the order in which they appear in the +concrete type declaration. A constant constructor is represented by +the unboxed integer equal to its constructor number. A non-constant +constructor declared with $n$ arguments is represented by +a block of size $n$, tagged with the constructor number; the $n$ +fields contain its arguments. Example: + +\begin{tableau}{|l|p{8cm}|}{Constructed term}{Representation} +\entree{"()"}{"Val_int(0)"} +\entree{"false"}{"Val_int(0)"} +\entree{"true"}{"Val_int(1)"} +\entree{"[]"}{"Val_int(0)"} +\entree{"h::t"}{Block with size = 2 and tag = 0; first field +contains "h", second field "t".} +\end{tableau} + +As a convenience, "caml/mlvalues.h" defines the macros "Val_unit", +"Val_false" and "Val_true" to refer to "()", "false" and "true". + +The following example illustrates the assignment of +integers and block tags to constructors: +\begin{verbatim} +type t = + | A (* First constant constructor -> integer "Val_int(0)" *) + | B of string (* First non-constant constructor -> block with tag 0 *) + | C (* Second constant constructor -> integer "Val_int(1)" *) + | D of bool (* Second non-constant constructor -> block with tag 1 *) + | E of t * t (* Third non-constant constructor -> block with tag 2 *) +\end{verbatim} + +\subsection{Objects} + +Objects are represented as blocks with tag "Object_tag". The first +field of the block refers to the object's class and associated method +suite, in a format that cannot easily be exploited from C. The second +field contains a unique object ID, used for comparisons. The remaining +fields of the object contain the values of the instance variables of +the object. It is unsafe to access directly instance variables, as the +type system provides no guarantee about the instance variables +contained by an object. +% Instance variables are stored in the order in which they +% appear in the class definition (taking inherited classes into +% account). + +One may extract a public method from an object using the C function +"caml_get_public_method" (declared in "<caml/mlvalues.h>".) +Since public method tags are hashed in the same way as variant tags, +and methods are functions taking self as first argument, if you want +to do the method call "foo#bar" from the C side, you should call: +\begin{verbatim} + callback(caml_get_public_method(foo, hash_variant("bar")), foo); +\end{verbatim} + +\subsection{Polymorphic variants} + +Like constructed terms, polymorphic variant values are represented either +as integers (for polymorphic variants without argument), or as blocks +(for polymorphic variants with an argument). Unlike constructed +terms, variant constructors are not numbered starting from 0, but +identified by a hash value (an OCaml integer), as computed by the C function +"hash_variant" (declared in "<caml/mlvalues.h>"): +the hash value for a variant constructor named, say, "VConstr" +is "hash_variant(\"VConstr\")". + +The variant value "`VConstr" is represented by +"hash_variant(\"VConstr\")". The variant value "`VConstr("\var{v}")" is +represented by a block of size 2 and tag 0, with field number 0 +containing "hash_variant(\"VConstr\")" and field number 1 containing +\var{v}. + +Unlike constructed values, polymorphic variant values taking several +arguments are not flattened. +That is, "`VConstr("\var{v}", "\var{w}")" is represented by a block +of size 2, whose field number 1 contains the representation of the +pair "("\var{v}", "\var{w}")", rather than a block of size 3 +containing \var{v} and \var{w} in fields 1 and 2. + +\section{Operations on values} +\pdfsection{Operations on values} + +\subsection{Kind tests} + +\begin{itemize} +\item "Is_long("\var{v}")" is true if value \var{v} is an immediate integer, +false otherwise +\item "Is_block("\var{v}")" is true if value \var{v} is a pointer to a block, +and false if it is an immediate integer. +\end{itemize} + +\subsection{Operations on integers} + +\begin{itemize} +\item "Val_long("\var{l}")" returns the value encoding the "long int" \var{l}. +\item "Long_val("\var{v}")" returns the "long int" encoded in value \var{v}. +\item "Val_int("\var{i}")" returns the value encoding the "int" \var{i}. +\item "Int_val("\var{v}")" returns the "int" encoded in value \var{v}. +\item "Val_bool("\var{x}")" returns the OCaml boolean representing the +truth value of the C integer \var{x}. +\item "Bool_val("\var{v}")" returns 0 if \var{v} is the OCaml boolean +"false", 1 if \var{v} is "true". +\item "Val_true", "Val_false" represent the OCaml booleans "true" and "false". +\end{itemize} + +\subsection{Accessing blocks} + +\begin{itemize} +\item "Wosize_val("\var{v}")" returns the size of the block \var{v}, in words, +excluding the header. +\item "Tag_val("\var{v}")" returns the tag of the block \var{v}. +\item "Field("\var{v}", "\var{n}")" returns the value contained in the +$n\th$ field of the structured block \var{v}. Fields are numbered from 0 to +$\hbox{"Wosize_val"}(v)-1$. +\item "Store_field("\var{b}", "\var{n}", "\var{v}")" stores the value +\var{v} in the field number \var{n} of value \var{b}, which must be a +structured block. +\item "Code_val("\var{v}")" returns the code part of the closure \var{v}. +\item "caml_string_length("\var{v}")" returns the length (number of bytes) +of the string or byte sequence \var{v}. +\item "Byte("\var{v}", "\var{n}")" returns the $n\th$ byte of the string +or byte sequence \var{v}, with type "char". Bytes are numbered from 0 to +$\hbox{"string_length"}(v)-1$. +\item "Byte_u("\var{v}", "\var{n}")" returns the $n\th$ byte of the string +or byte sequence \var{v}, with type "unsigned char". Bytes are +numbered from 0 to $\hbox{"string_length"}(v)-1$. +\item "String_val("\var{v}")" returns a pointer to the first byte of the string +or byte sequence \var{v}, with type "char *". This pointer is a valid C +string: there is a +null byte after the last byte in the string. However, OCaml +strings and byte sequences can contain embedded null bytes, which will confuse +the usual C functions over strings. +\item "Double_val("\var{v}")" returns the floating-point number contained in +value \var{v}, with type "double". +\item "Double_field("\var{v}", "\var{n}")" returns +the $n\th$ element of the array of floating-point numbers \var{v} (a +block tagged "Double_array_tag"). +\item "Store_double_field("\var{v}", "\var{n}", +"\var{d}")" stores the double precision floating-point number \var{d} +in the $n\th$ element of the array of floating-point numbers \var{v}. +\item "Data_custom_val("\var{v}")" returns a pointer to the data part +of the custom block \var{v}. This pointer has type "void *" and must +be cast to the type of the data contained in the custom block. +\item "Int32_val("\var{v}")" returns the 32-bit integer contained +in the "int32" \var{v}. +\item "Int64_val("\var{v}")" returns the 64-bit integer contained +in the "int64" \var{v}. +\item "Nativeint_val("\var{v}")" returns the long integer contained +in the "nativeint" \var{v}. +\end{itemize} +The expressions "Field("\var{v}", "\var{n}")", +"Byte("\var{v}", "\var{n}")" and +"Byte_u("\var{v}", "\var{n}")" +are valid l-values. Hence, they can be assigned to, resulting in an +in-place modification of value \var{v}. +Assigning directly to "Field("\var{v}", "\var{n}")" must +be done with care to avoid confusing the garbage collector (see +below). + +\subsection{Allocating blocks} + +\subsubsection{Simple interface} + +\begin{itemize} +\item +"Atom("\var{t}")" returns an ``atom'' (zero-sized block) with tag \var{t}. +Zero-sized blocks are preallocated outside of the heap. It is +incorrect to try and allocate a zero-sized block using the functions below. +For instance, "Atom(0)" represents the empty array. +\item +"caml_alloc("\var{n}", "\var{t}")" returns a fresh block of size \var{n} +with tag \var{t}. If \var{t} is less than "No_scan_tag", then the +fields of the block are initialized with a valid value in order to +satisfy the GC constraints. +\item +"caml_alloc_tuple("\var{n}")" returns a fresh block of size +\var{n} words, with tag 0. +\item +"caml_alloc_string("\var{n}")" returns a byte sequence (or string) value of +length \var{n} bytes. The sequence initially contains uninitialized bytes. +\item +"caml_copy_string("\var{s}")" returns a string or byte sequence value +containing a copy of the null-terminated C string \var{s} (a "char *"). +\item +"caml_copy_double("\var{d}")" returns a floating-point value initialized +with the "double" \var{d}. +\item +"caml_copy_int32("\var{i}")", "caml_copy_int64("\var{i}")" and +"caml_copy_nativeint("\var{i}")" return a value of OCaml type "int32", +"int64" and "nativeint", respectively, initialized with the integer +\var{i}. +\item +"caml_alloc_array("\var{f}", "\var{a}")" allocates an array of values, calling +function \var{f} over each element of the input array \var{a} to transform it +into a value. The array \var{a} is an array of pointers terminated by the +null pointer. The function \var{f} receives each pointer as argument, and +returns a value. The zero-tagged block returned by +"alloc_array("\var{f}", "\var{a}")" is filled with the values returned by the +successive calls to \var{f}. (This function must not be used to build +an array of floating-point numbers.) +\item +"caml_copy_string_array("\var{p}")" allocates an array of strings or byte +sequences, copied from the pointer to a string array \var{p} +(a \verb"char **"). \var{p} must be NULL-terminated. +\end{itemize} + +\subsubsection{Low-level interface} + +The following functions are slightly more efficient than "caml_alloc", but +also much more difficult to use. + +From the standpoint of the allocation functions, blocks are divided +according to their size as zero-sized blocks, small blocks (with size +less than or equal to \verb"Max_young_wosize"), and large blocks (with +size greater than \verb"Max_young_wosize"). The constant +\verb"Max_young_wosize" is declared in the include file "mlvalues.h". It +is guaranteed to be at least 64 (words), so that any block with +constant size less than or equal to 64 can be assumed to be small. For +blocks whose size is computed at run-time, the size must be compared +against \verb"Max_young_wosize" to determine the correct allocation procedure. + +\begin{itemize} +\item +"caml_alloc_small("\var{n}", "\var{t}")" returns a fresh small block of size +$n \leq \hbox{"Max_young_wosize"}$ words, with tag \var{t}. +If this block is a structured block (i.e. if $t < \hbox{"No_scan_tag"}$), then +the fields of the block (initially containing garbage) must be initialized +with legal values (using direct assignment to the fields of the block) +before the next allocation. +\item +"caml_alloc_shr("\var{n}", "\var{t}")" returns a fresh block of size +\var{n}, with tag \var{t}. +The size of the block can be greater than \verb"Max_young_wosize". (It +can also be smaller, but in this case it is more efficient to call +"caml_alloc_small" instead of "caml_alloc_shr".) +If this block is a structured block (i.e. if $t < \hbox{"No_scan_tag"}$), then +the fields of the block (initially containing garbage) must be initialized +with legal values (using the "caml_initialize" function described below) +before the next allocation. +\end{itemize} + +\subsection{Raising exceptions} \label{s:c-exceptions} + +Two functions are provided to raise two standard exceptions: +\begin{itemize} +\item "caml_failwith("\var{s}")", where \var{s} is a null-terminated C string (with +type \verb"char *"), raises exception "Failure" with argument \var{s}. +\item "caml_invalid_argument("\var{s}")", where \var{s} is a null-terminated C +string (with type \verb"char *"), raises exception "Invalid_argument" +with argument \var{s}. +\end{itemize} + +Raising arbitrary exceptions from C is more delicate: the +exception identifier is dynamically allocated by the OCaml program, and +therefore must be communicated to the C function using the +registration facility described below in section~\ref{s:register-exn}. +Once the exception identifier is recovered in C, the following +functions actually raise the exception: +\begin{itemize} +\item "caml_raise_constant("\var{id}")" raises the exception \var{id} with +no argument; +\item "caml_raise_with_arg("\var{id}", "\var{v}")" raises the exception +\var{id} with the OCaml value \var{v} as argument; +\item "caml_raise_with_args("\var{id}", "\var{n}", "\var{v}")" +raises the exception \var{id} with the OCaml values +\var{v}"[0]", \ldots, \var{v}"["\var{n}"-1]" as arguments; +\item "caml_raise_with_string("\var{id}", "\var{s}")", where \var{s} is a +null-terminated C string, raises the exception \var{id} with a copy of +the C string \var{s} as argument. +\end{itemize} + +\section{Living in harmony with the garbage collector} +\pdfsection{Living in harmony with the garbage collector} + +Unused blocks in the heap are automatically reclaimed by the garbage +collector. This requires some cooperation from C code that +manipulates heap-allocated blocks. + +\subsection{Simple interface} + +All the macros described in this section are declared in the +"memory.h" header file. + +\begin{gcrule} +A function that has parameters or local variables of type "value" must +begin with a call to one of the "CAMLparam" macros and return with +"CAMLreturn", "CAMLreturn0", or "CAMLreturnT". +\end{gcrule} + +There are six "CAMLparam" macros: "CAMLparam0" to "CAMLparam5", which +take zero to five arguments respectively. If your function has no more +than 5 parameters of type "value", use the corresponding macros +with these parameters as arguments. If your function has more than 5 +parameters of type "value", use "CAMLparam5" with five of these +parameters, and use one or more calls to the "CAMLxparam" macros for +the remaining parameters ("CAMLxparam1" to "CAMLxparam5"). + +The macros "CAMLreturn", "CAMLreturn0", and "CAMLreturnT" are used to +replace the C +keyword "return". Every occurrence of "return x" must be replaced by +"CAMLreturn (x)" if "x" has type "value", or "CAMLreturnT (t, x)" +(where "t" is the type of "x"); every occurrence of "return" without +argument must be +replaced by "CAMLreturn0". If your C function is a procedure (i.e. if +it returns void), you must insert "CAMLreturn0" at the end (to replace +C's implicit "return"). + +\paragraph{Note:} some C compilers give bogus warnings about unused +variables "caml__dummy_xxx" at each use of "CAMLparam" and +"CAMLlocal". You should ignore them. + +\goodbreak + +Example: +\begin{verbatim} +void foo (value v1, value v2, value v3) +{ + CAMLparam3 (v1, v2, v3); + ... + CAMLreturn0; +} +\end{verbatim} + +\paragraph{Note:} if your function is a primitive with more than 5 arguments +for use with the byte-code runtime, its arguments are not "value"s and +must not be declared (they have types "value *" and "int"). + +\begin{gcrule} +Local variables of type "value" must be declared with one of the +"CAMLlocal" macros. Arrays of "value"s are declared with +"CAMLlocalN". These macros must be used at the beginning of the +function, not in a nested block. +\end{gcrule} + +The macros "CAMLlocal1" to "CAMLlocal5" declare and initialize one to +five local variables of type "value". The variable names are given as +arguments to the macros. "CAMLlocalN("\var{x}", "\var{n}")" declares +and initializes a local variable of type "value ["\var{n}"]". You can +use several calls to these macros if you have more than 5 local +variables. + +Example: +\begin{verbatim} +value bar (value v1, value v2, value v3) +{ + CAMLparam3 (v1, v2, v3); + CAMLlocal1 (result); + result = caml_alloc (3, 0); + ... + CAMLreturn (result); +} +\end{verbatim} + +\begin{gcrule} +Assignments to the fields of structured blocks must be done with the +"Store_field" macro (for normal blocks) or "Store_double_field" macro +(for arrays and records of floating-point numbers). Other assignments +must not use "Store_field" nor "Store_double_field". +\end{gcrule} + +"Store_field ("\var{b}", "\var{n}", "\var{v}")" stores the value +\var{v} in the field number \var{n} of value \var{b}, which must be a +block (i.e. "Is_block("\var{b}")" must be true). + +Example: +\begin{verbatim} +value bar (value v1, value v2, value v3) +{ + CAMLparam3 (v1, v2, v3); + CAMLlocal1 (result); + result = caml_alloc (3, 0); + Store_field (result, 0, v1); + Store_field (result, 1, v2); + Store_field (result, 2, v3); + CAMLreturn (result); +} +\end{verbatim} + +\paragraph{Warning:} The first argument of "Store_field" and +"Store_double_field" must be a variable declared by "CAMLparam*" or +a parameter declared by "CAMLlocal*" to ensure that a garbage +collection triggered by the evaluation of the other arguments will not +invalidate the first argument after it is computed. + +\begin{gcrule} Global variables containing values must be registered +with the garbage collector using the "caml_register_global_root" function. +\end{gcrule} + +Registration of a global variable "v" is achieved by calling +"caml_register_global_root(&v)" just before or just after a valid +value is stored in "v" for the first time. You must not call any +of the OCaml runtime functions or macros between registering and +storing the value. + +A registered global variable "v" can be un-registered by calling +"caml_remove_global_root(&v)". + +If the contents of the global variable "v" are seldom modified after +registration, better performance can be achieved by calling +"caml_register_generational_global_root(&v)" to register "v" (after +its initialization with a valid "value", but before any allocation or +call to the GC functions), +and "caml_remove_generational_global_root(&v)" to un-register it. In +this case, you must not modify the value of "v" directly, but you must +use "caml_modify_generational_global_root(&v,x)" to set it to "x". +The garbage collector takes advantage of the guarantee that "v" is not +modified between calls to "caml_modify_generational_global_root" to scan it +less often. This improves performance if the +modifications of "v" happen less often than minor collections. + +\paragraph{Note:} The "CAML" macros use identifiers (local variables, type +identifiers, structure tags) that start with "caml__". Do not use any +identifier starting with "caml__" in your programs. + +\subsection{Low-level interface} + +% Il faudrait simplifier violemment ce qui suit. +% En gros, dire quand on n'a pas besoin de declarer les variables +% et dans quels cas on peut se passer de "Store_field". + +We now give the GC rules corresponding to the low-level allocation +functions "caml_alloc_small" and "caml_alloc_shr". You can ignore those rules +if you stick to the simplified allocation function "caml_alloc". + +\begin{gcrule} After a structured block (a block with tag less than +"No_scan_tag") is allocated with the low-level functions, all fields +of this block must be filled with well-formed values before the next +allocation operation. If the block has been allocated with +"caml_alloc_small", filling is performed by direct assignment to the fields +of the block: +\begin{alltt} + Field(\var{v}, \var{n}) = \nth{v}{n}; +\end{alltt} +If the block has been allocated with "caml_alloc_shr", filling is performed +through the "caml_initialize" function: +\begin{alltt} + caml_initialize(&Field(\var{v}, \var{n}), \nth{v}{n}); +\end{alltt} +\end{gcrule} + +The next allocation can trigger a garbage collection. The garbage +collector assumes that all structured blocks contain well-formed +values. Newly created blocks contain random data, which generally do +not represent well-formed values. + +If you really need to allocate before the fields can receive their +final value, first initialize with a constant value (e.g. +"Val_unit"), then allocate, then modify the fields with the correct +value (see rule~6). + +%% \begin{gcrule} Local variables and function parameters containing +%% values must be registered with the garbage collector (using the +%% "Begin_roots" and "End_roots" macros), if they are to survive a call +%% to an allocation function. +%% \end{gcrule} +%% +%% Registration is performed with the "Begin_roots" set of macros. +%% "Begin_roots1("\var{v}")" registers variable \var{v} with the garbage +%% collector. Generally, \var{v} will be a local variable or a +%% parameter of your function. It must be initialized to a valid value +%% (e.g. "Val_unit") before the first allocation. Likewise, +%% "Begin_roots2", \ldots, "Begin_roots5" +%% let you register up to 5 variables at the same time. "Begin_root" is +%% the same as "Begin_roots1". "Begin_roots_block("\var{ptr}","\var{size}")" +%% allows you to register an array of roots. \var{ptr} is a pointer to +%% the first element, and \var{size} is the number of elements in the +%% array. +%% +%% Once registered, each of your variables (or array element) has the +%% following properties: if it points to a heap-allocated block, this +%% block (and its contents) will not be reclaimed; moreover, if this +%% block is relocated by the garbage collector, the variable is updated +%% to point to the new location for the block. +%% +%% Each of the "Begin_roots" macros open a C block that must be closed +%% with a matching "End_roots" at the same nesting level. The block must +%% be exited normally (i.e. not with "return" or "goto"). However, the +%% roots are automatically un-registered if an OCaml exception is raised, +%% so you can exit the block with "failwith", "invalid_argument", or one +%% of the "raise" functions. +%% +%% {\bf Note:} The "Begin_roots" macros use a local variable and a +%% structure tag named "caml__roots_block". Do not use this identifier +%% in your programs. + +\begin{gcrule} Direct assignment to a field of a block, as in +\begin{alltt} + Field(\var{v}, \var{n}) = \var{w}; +\end{alltt} +is safe only if \var{v} is a block newly allocated by "caml_alloc_small"; +that is, if no allocation took place between the +allocation of \var{v} and the assignment to the field. In all other cases, +never assign directly. If the block has just been allocated by "caml_alloc_shr", +use "caml_initialize" to assign a value to a field for the first time: +\begin{alltt} + caml_initialize(&Field(\var{v}, \var{n}), \var{w}); +\end{alltt} +Otherwise, you are updating a field that previously contained a +well-formed value; then, call the "caml_modify" function: +\begin{alltt} + caml_modify(&Field(\var{v}, \var{n}), \var{w}); +\end{alltt} +\end{gcrule} + +To illustrate the rules above, here is a C function that builds and +returns a list containing the two integers given as parameters. +First, we write it using the simplified allocation functions: +\begin{verbatim} +value alloc_list_int(int i1, int i2) +{ + CAMLparam0 (); + CAMLlocal2 (result, r); + + r = caml_alloc(2, 0); /* Allocate a cons cell */ + Store_field(r, 0, Val_int(i2)); /* car = the integer i2 */ + Store_field(r, 1, Val_int(0)); /* cdr = the empty list [] */ + result = caml_alloc(2, 0); /* Allocate the other cons cell */ + Store_field(result, 0, Val_int(i1)); /* car = the integer i1 */ + Store_field(result, 1, r); /* cdr = the first cons cell */ + CAMLreturn (result); +} +\end{verbatim} +Here, the registering of "result" is not strictly needed, because no +allocation takes place after it gets its value, but it's easier and +safer to simply register all the local variables that have type "value". + +Here is the same function written using the low-level allocation +functions. We notice that the cons cells are small blocks and can be +allocated with "caml_alloc_small", and filled by direct assignments on +their fields. +\begin{verbatim} +value alloc_list_int(int i1, int i2) +{ + CAMLparam0 (); + CAMLlocal2 (result, r); + + r = caml_alloc_small(2, 0); /* Allocate a cons cell */ + Field(r, 0) = Val_int(i2); /* car = the integer i2 */ + Field(r, 1) = Val_int(0); /* cdr = the empty list [] */ + result = caml_alloc_small(2, 0); /* Allocate the other cons cell */ + Field(result, 0) = Val_int(i1); /* car = the integer i1 */ + Field(result, 1) = r; /* cdr = the first cons cell */ + CAMLreturn (result); +} +\end{verbatim} +In the two examples above, the list is built bottom-up. Here is an +alternate way, that proceeds top-down. It is less efficient, but +illustrates the use of "caml_modify". +\begin{verbatim} +value alloc_list_int(int i1, int i2) +{ + CAMLparam0 (); + CAMLlocal2 (tail, r); + + r = caml_alloc_small(2, 0); /* Allocate a cons cell */ + Field(r, 0) = Val_int(i1); /* car = the integer i1 */ + Field(r, 1) = Val_int(0); /* A dummy value + tail = caml_alloc_small(2, 0); /* Allocate the other cons cell */ + Field(tail, 0) = Val_int(i2); /* car = the integer i2 */ + Field(tail, 1) = Val_int(0); /* cdr = the empty list [] */ + caml_modify(&Field(r, 1), tail); /* cdr of the result = tail */ + CAMLreturn (r); +} +\end{verbatim} +It would be incorrect to perform +"Field(r, 1) = tail" directly, because the allocation of "tail" +has taken place since "r" was allocated. + + +\section{A complete example} +\pdfsection{A complete example} + +This section outlines how the functions from the Unix "curses" library +can be made available to OCaml programs. First of all, here is +the interface "curses.mli" that declares the "curses" primitives and +data types: +\begin{verbatim} +(* File curses.mli -- declaration of primitives and data types *) +type window (* The type "window" remains abstract *) +external initscr: unit -> window = "caml_curses_initscr" +external endwin: unit -> unit = "caml_curses_endwin" +external refresh: unit -> unit = "caml_curses_refresh" +external wrefresh : window -> unit = "caml_curses_wrefresh" +external newwin: int -> int -> int -> int -> window = "caml_curses_newwin" +external addch: char -> unit = "caml_curses_addch" +external mvwaddch: window -> int -> int -> char -> unit = "caml_curses_mvwaddch" +external addstr: string -> unit = "caml_curses_addstr" +external mvwaddstr: window -> int -> int -> string -> unit + = "caml_curses_mvwaddstr" +(* lots more omitted *) +\end{verbatim} +To compile this interface: +\begin{verbatim} + ocamlc -c curses.mli +\end{verbatim} + +To implement these functions, we just have to provide the stub code; +the core functions are already implemented in the "curses" library. +The stub code file, "curses_stubs.c", looks like this: +\begin{verbatim} +/* File curses_stubs.c -- stub code for curses */ +#include <curses.h> +#include <caml/mlvalues.h> +#include <caml/memory.h> +#include <caml/alloc.h> +#include <caml/custom.h> + +/* Encapsulation of opaque window handles (of type WINDOW *) + as OCaml custom blocks. */ + +static struct custom_operations curses_window_ops = { + "fr.inria.caml.curses_windows", + custom_finalize_default, + custom_compare_default, + custom_hash_default, + custom_serialize_default, + custom_deserialize_default +}; + +/* Accessing the WINDOW * part of an OCaml custom block */ +#define Window_val(v) (*((WINDOW **) Data_custom_val(v))) + +/* Allocating an OCaml custom block to hold the given WINDOW * */ +static value alloc_window(WINDOW * w) +{ + value v = alloc_custom(&curses_window_ops, sizeof(WINDOW *), 0, 1); + Window_val(v) = w; + return v; +} + +value caml_curses_initscr(value unit) +{ + CAMLparam1 (unit); + CAMLreturn (alloc_window(initscr())); +} + +value caml_curses_endwin(value unit) +{ + CAMLparam1 (unit); + endwin(); + CAMLreturn (Val_unit); +} + +value caml_curses_refresh(value unit) +{ + CAMLparam1 (unit); + refresh(); + CAMLreturn (Val_unit); +} + +value caml_curses_wrefresh(value win) +{ + CAMLparam1 (win); + wrefresh(Window_val(win)); + CAMLreturn (Val_unit); +} + +value caml_curses_newwin(value nlines, value ncols, value x0, value y0) +{ + CAMLparam4 (nlines, ncols, x0, y0); + CAMLreturn (alloc_window(newwin(Int_val(nlines), Int_val(ncols), + Int_val(x0), Int_val(y0)))); +} + +value caml_curses_addch(value c) +{ + CAMLparam1 (c); + addch(Int_val(c)); /* Characters are encoded like integers */ + CAMLreturn (Val_unit); +} + +value caml_curses_mvwaddch(value win, value x, value y, value c) +{ + CAMLparam4 (win, x, y, c); + mvwaddch(Window_val(win), Int_val(x), Int_val(y), Int_val(c)); + CAMLreturn (Val_unit); +} + +value caml_curses_addstr(value s) +{ + CAMLparam1 (s); + addstr(String_val(s)); + CAMLreturn (Val_unit); +} + +value caml_curses_mvwaddstr(value win, value x, value y, value s) +{ + CAMLparam4 (win, x, y, s); + mvwaddstr(Window_val(win), Int_val(x), Int_val(y), String_val(s)); + CAMLreturn (Val_unit); +} + +/* This goes on for pages. */ +\end{verbatim} + +The file "curses_stubs.c" can be compiled with: +\begin{verbatim} + cc -c -I`ocamlc -where` curses_stubs.c +\end{verbatim} +or, even simpler, +\begin{verbatim} + ocamlc -c curses_stubs.c +\end{verbatim} +(When passed a ".c" file, the "ocamlc" command simply calls the C +compiler on that file, with the right "-I" option.) + +Now, here is a sample OCaml program "prog.ml" that uses the "curses" +module: +\begin{verbatim} +(* File prog.ml -- main program using curses *) +open Curses;; +let main_window = initscr () in +let small_window = newwin 10 5 20 10 in + mvwaddstr main_window 10 2 "Hello"; + mvwaddstr small_window 4 3 "world"; + refresh(); + Unix.sleep 5; + endwin() +\end{verbatim} +To compile and link this program, run: +\begin{verbatim} + ocamlc -custom -o prog unix.cma prog.ml curses_stubs.o -cclib -lcurses +\end{verbatim} +(On some machines, you may need to put +"-cclib -lcurses -cclib -ltermcap" or "-cclib -ltermcap" +instead of "-cclib -lcurses".) + +%% Note by Damien: when I launch the program, it only displays "Hello" +%% and not "world". Why? + +\section{Advanced topic: callbacks from C to OCaml} \label{s:callback} +\pdfsection{Advanced topic: callbacks from C to OCaml} + +So far, we have described how to call C functions from OCaml. In this +section, we show how C functions can call OCaml functions, either as +callbacks (OCaml calls C which calls OCaml), or with the main program +written in C. + +\subsection{Applying OCaml closures from C} \label{s:callbacks} + +C functions can apply OCaml function values (closures) to OCaml values. +The following functions are provided to perform the applications: +\begin{itemize} +\item "caml_callback("\var{f, a}")" applies the functional value \var{f} to +the value \var{a} and returns the value returned by~\var{f}. +\item "caml_callback2("\var{f, a, b}")" applies the functional value \var{f} +(which is assumed to be a curried OCaml function with two arguments) to +\var{a} and \var{b}. +\item "caml_callback3("\var{f, a, b, c}")" applies the functional value \var{f} +(a curried OCaml function with three arguments) to \var{a}, \var{b} and \var{c}. +\item "caml_callbackN("\var{f, n, args}")" applies the functional value \var{f} +to the \var{n} arguments contained in the array of values \var{args}. +\end{itemize} +If the function \var{f} does not return, but raises an exception that +escapes the scope of the application, then this exception is +propagated to the next enclosing OCaml code, skipping over the C +code. That is, if an OCaml function \var{f} calls a C function \var{g} that +calls back an OCaml function \var{h} that raises a stray exception, then the +execution of \var{g} is interrupted and the exception is propagated back +into \var{f}. + +If the C code wishes to catch exceptions escaping the OCaml function, +it can use the functions "caml_callback_exn", "caml_callback2_exn", +"caml_callback3_exn", "caml_callbackN_exn". These functions take the same +arguments as their non-"_exn" counterparts, but catch escaping +exceptions and return them to the C code. The return value \var{v} of the +"caml_callback*_exn" functions must be tested with the macro +"Is_exception_result("\var{v}")". If the macro returns ``false'', no +exception occured, and \var{v} is the value returned by the OCaml +function. If "Is_exception_result("\var{v}")" returns ``true'', +an exception escaped, and its value (the exception descriptor) can be +recovered using "Extract_exception("\var{v}")". + +\paragraph{Warning:} If the OCaml function returned with an exception, +"Extract_exception" should be applied to the exception result prior +to calling a function that may trigger garbage collection. +Otherwise, if \var{v} is reachable during garbage collection, the runtime +can crash since \var{v} does not contain a valid value. + +Example: +\begin{verbatim} + value call_caml_f_ex(value closure, value arg) + { + CAMLparam2(closure, arg); + CAMLlocal2(res, tmp); + res = caml_callback_exn(closure, arg); + if(Is_exception_result(res)) { + res = Extract_exception(res); + tmp = caml_alloc(3, 0); /* Safe to allocate: res contains valid value. */ + ... + } + CAMLreturn (res); + } +\end{verbatim} + +\subsection{Obtaining or registering OCaml closures for use in C functions} + +There are two ways to obtain OCaml function values (closures) to +be passed to the "callback" functions described above. One way is to +pass the OCaml function as an argument to a primitive function. For +example, if the OCaml code contains the declaration +\begin{verbatim} + external apply : ('a -> 'b) -> 'a -> 'b = "caml_apply" +\end{verbatim} +the corresponding C stub can be written as follows: +\begin{verbatim} + CAMLprim value caml_apply(value vf, value vx) + { + CAMLparam2(vf, vx); + CAMLlocal1(vy); + vy = caml_callback(vf, vx); + CAMLreturn(vy); + } +\end{verbatim} + +Another possibility is to use the registration mechanism provided by +OCaml. This registration mechanism enables OCaml code to register +OCaml functions under some global name, and C code to retrieve the +corresponding closure by this global name. + +On the OCaml side, registration is performed by evaluating +"Callback.register" \var{n} \var{v}. Here, \var{n} is the global name +(an arbitrary string) and \var{v} the OCaml value. For instance: +\begin{verbatim} + let f x = print_string "f is applied to "; print_int x; print_newline() + let _ = Callback.register "test function" f +\end{verbatim} + +On the C side, a pointer to the value registered under name \var{n} is +obtained by calling "caml_named_value("\var{n}")". The returned +pointer must then be dereferenced to recover the actual OCaml value. +If no value is registered under the name \var{n}, the null pointer is +returned. For example, here is a C wrapper that calls the OCaml function "f" +above: +\begin{verbatim} + void call_caml_f(int arg) + { + caml_callback(*caml_named_value("test function"), Val_int(arg)); + } +\end{verbatim} + +The pointer returned by "caml_named_value" is constant and can safely +be cached in a C variable to avoid repeated name lookups. On the other +hand, the value pointed to can change during garbage collection and +must always be recomputed at the point of use. Here is a more +efficient variant of "call_caml_f" above that calls "caml_named_value" +only once: +\begin{verbatim} + void call_caml_f(int arg) + { + static value * closure_f = NULL; + if (closure_f == NULL) { + /* First time around, look up by name */ + closure_f = caml_named_value("test function"); + } + caml_callback(*closure_f, Val_int(arg)); + } +\end{verbatim} + +\subsection{Registering OCaml exceptions for use in C functions} \label{s:register-exn} + +The registration mechanism described above can also be used to +communicate exception identifiers from OCaml to C. The OCaml code +registers the exception by evaluating +"Callback.register_exception" \var{n} \var{exn}, where \var{n} is an +arbitrary name and \var{exn} is an exception value of the +exception to register. For example: +\begin{verbatim} + exception Error of string + let _ = Callback.register_exception "test exception" (Error "any string") +\end{verbatim} +The C code can then recover the exception identifier using +"caml_named_value" and pass it as first argument to the functions +"raise_constant", "raise_with_arg", and "raise_with_string" (described +in section~\ref{s:c-exceptions}) to actually raise the exception. For +example, here is a C function that raises the "Error" exception with +the given argument: +\begin{verbatim} + void raise_error(char * msg) + { + caml_raise_with_string(*caml_named_value("test exception"), msg); + } +\end{verbatim} + +\subsection{Main program in C} \label{s:main-c} + +In normal operation, a mixed OCaml/C program starts by executing the +OCaml initialization code, which then may proceed to call C +functions. We say that the main program is the OCaml code. In some +applications, it is desirable that the C code plays the role of the +main program, calling OCaml functions when needed. This can be achieved as +follows: +\begin{itemize} +\item The C part of the program must provide a "main" function, +which will override the default "main" function provided by the OCaml +runtime system. Execution will start in the user-defined "main" function +just like for a regular C program. + +\item At some point, the C code must call "caml_main(argv)" to +initialize the OCaml code. The "argv" argument is a C array of strings +(type "char **"), terminated with a "NULL" pointer, +which represents the command-line arguments, as +passed as second argument to "main". The OCaml array "Sys.argv" will +be initialized from this parameter. For the bytecode compiler, +"argv[0]" and "argv[1]" are also consulted to find the file containing +the bytecode. + +\item The call to "caml_main" initializes the OCaml runtime system, +loads the bytecode (in the case of the bytecode compiler), and +executes the initialization code of the OCaml program. Typically, this +initialization code registers callback functions using "Callback.register". +Once the OCaml initialization code is complete, control returns to the +C code that called "caml_main". + +\item The C code can then invoke OCaml functions using the callback +mechanism (see section~\ref{s:callbacks}). +\end{itemize} + +\subsection{Embedding the OCaml code in the C code} \label{s:embedded-code} + +The bytecode compiler in custom runtime mode ("ocamlc -custom") +normally appends the bytecode to the executable file containing the +custom runtime. This has two consequences. First, the final linking +step must be performed by "ocamlc". Second, the OCaml runtime library +must be able to find the name of the executable file from the +command-line arguments. When using "caml_main(argv)" as in +section~\ref{s:main-c}, this means that "argv[0]" or "argv[1]" must +contain the executable file name. + +An alternative is to embed the bytecode in the C code. The +"-output-obj" option to "ocamlc" is provided for this purpose. It +causes the "ocamlc" compiler to output a C object file (".o" file, +".obj" under Windows) containing the bytecode for the OCaml part of the +program, as well as a "caml_startup" function. The C object file +produced by "ocamlc -output-obj" can then be linked with C code using +the standard C compiler, or stored in a C library. + +The "caml_startup" function must be called from the main C program in +order to initialize the OCaml runtime and execute the OCaml +initialization code. Just like "caml_main", it takes one "argv" +parameter containing the command-line parameters. Unlike "caml_main", +this "argv" parameter is used only to initialize "Sys.argv", but not +for finding the name of the executable file. + +The "-output-obj" option can also be used to obtain the C source file. +More interestingly, the same option can also produce directly a shared +library (".so" file, ".dll" under Windows) that contains the OCaml +code, the OCaml runtime system and any other static C code given to +"ocamlc" (".o", ".a", respectively, ".obj", ".lib"). This use of +"-output-obj" is very similar to a normal linking step, but instead of +producing a main program that automatically runs the OCaml code, it +produces a shared library that can run the OCaml code on demand. The +three possible behaviors of "-output-obj" are selected according +to the extension of the resulting file (given with "-o"). + +The native-code compiler "ocamlopt" also supports the "-output-obj" +option, causing it to output a C object file or a shared library +containing the native code for all OCaml modules on the command-line, +as well as the OCaml startup code. Initialization is performed by +calling "caml_startup" as in the case of the bytecode compiler. + +For the final linking phase, in addition to the object file produced +by "-output-obj", you will have to provide the OCaml runtime +library ("libcamlrun.a" for bytecode, "libasmrun.a" for native-code), +as well as all C libraries that are required by the OCaml libraries +used. For instance, assume the OCaml part of your program uses the +Unix library. With "ocamlc", you should do: +\begin{alltt} + ocamlc -output-obj -o camlcode.o unix.cma {\it{other}} .cmo {\it{and}} .cma {\it{files}} + cc -o myprog {\it{C objects and libraries}} \char92 + camlcode.o -L`ocamlc -where` -lunix -lcamlrun +\end{alltt} +With "ocamlopt", you should do: +\begin{alltt} + ocamlopt -output-obj -o camlcode.o unix.cmxa {\it{other}} .cmx {\it{and}} .cmxa {\it{files}} + cc -o myprog {\it{C objects and libraries}} \char92 + camlcode.o -L`ocamlc -where` -lunix -lasmrun +\end{alltt} + +% -- This seems completely wrong -- Damien +% The shared libraries produced by "ocamlc -output-obj" or by "ocamlopt +% -output-obj" already contains the OCaml runtime library as +% well as all the needed C libraries. + +\paragraph{Warning:} On some ports, special options are required on the final +linking phase that links together the object file produced by the +"-output-obj" option and the remainder of the program. Those options +are shown in the configuration file "config/Makefile" generated during +compilation of OCaml, as the variables "BYTECCLINKOPTS" +(for object files produced by "ocamlc -output-obj") and +"NATIVECCLINKOPTS" (for object files produced by "ocamlopt +-output-obj"). +\begin{itemize} +\item Windows with the MSVC compiler: the object file produced by +OCaml have been compiled with the "/MD" flag, and therefore +all other object files linked with it should also be compiled with +"/MD". +\item other systems: you may have to add one or more of "-lcurses", +"-lm", "-ldl", depending on your OS and C compiler. +\end{itemize} + +\paragraph{Stack backtraces.} When OCaml bytecode produced by +"ocamlc -g" is embedded in a C program, no debugging information is +included, and therefore it is impossible to print stack backtraces on +uncaught exceptions. This is not the case when native code produced +by "ocamlopt -g" is embedded in a C program: stack backtrace +information is available, but the backtrace mechanism needs to be +turned on programmatically. This can be achieved from the OCaml side +by calling "Printexc.record_backtrace true" in the initialization of +one of the OCaml modules. This can also be achieved from the C side +by calling "caml_record_backtrace(Val_int(1));" in the OCaml-C glue code. + +\section{Advanced example with callbacks} +\pdfsection{Advanced example with callbacks} + +This section illustrates the callback facilities described in +section~\ref{s:callback}. We are going to package some OCaml functions +in such a way that they can be linked with C code and called from C +just like any C functions. The OCaml functions are defined in the +following "mod.ml" OCaml source: + +\begin{verbatim} +(* File mod.ml -- some "useful" OCaml functions *) + +let rec fib n = if n < 2 then 1 else fib(n-1) + fib(n-2) + +let format_result n = Printf.sprintf "Result is: %d\n" n + +(* Export those two functions to C *) + +let _ = Callback.register "fib" fib +let _ = Callback.register "format_result" format_result +\end{verbatim} + +Here is the C stub code for calling these functions from C: + +\begin{verbatim} +/* File modwrap.c -- wrappers around the OCaml functions */ + +#include <stdio.h> +#include <string.h> +#include <caml/mlvalues.h> +#include <caml/callback.h> + +int fib(int n) +{ + static value * fib_closure = NULL; + if (fib_closure == NULL) fib_closure = caml_named_value("fib"); + return Int_val(caml_callback(*fib_closure, Val_int(n))); +} + +char * format_result(int n) +{ + static value * format_result_closure = NULL; + if (format_result_closure == NULL) + format_result_closure = caml_named_value("format_result"); + return strdup(String_val(caml_callback(*format_result_closure, Val_int(n)))); + /* We copy the C string returned by String_val to the C heap + so that it remains valid after garbage collection. */ +} +\end{verbatim} + +We now compile the OCaml code to a C object file and put it in a C +library along with the stub code in "modwrap.c" and the OCaml runtime system: +\begin{verbatim} + ocamlc -custom -output-obj -o modcaml.o mod.ml + ocamlc -c modwrap.c + cp `ocamlc -where`/libcamlrun.a mod.a && chmod +w mod.a + ar r mod.a modcaml.o modwrap.o +\end{verbatim} +(One can also use "ocamlopt -output-obj" instead of "ocamlc -custom +-output-obj". In this case, replace "libcamlrun.a" (the bytecode +runtime library) by "libasmrun.a" (the native-code runtime library).) + +Now, we can use the two functions "fib" and "format_result" in any C +program, just like regular C functions. Just remember to call +"caml_startup" once before. + +\begin{verbatim} +/* File main.c -- a sample client for the OCaml functions */ + +#include <stdio.h> +#include <caml/callback.h> + +extern int fib(int n); +extern char * format_result(int n); + +int main(int argc, char ** argv) +{ + int result; + + /* Initialize OCaml code */ + caml_startup(argv); + /* Do some computation */ + result = fib(10); + printf("fib(10) = %s\n", format_result(result)); + return 0; +} +\end{verbatim} + +To build the whole program, just invoke the C compiler as follows: +\begin{verbatim} + cc -o prog -I `ocamlc -where` main.c mod.a -lcurses +\end{verbatim} +(On some machines, you may need to put "-ltermcap" or +"-lcurses -ltermcap" instead of "-lcurses".) + +\section{Advanced topic: custom blocks} \label{s:custom} +\pdfsection{Advanced topic: custom blocks} + +Blocks with tag "Custom_tag" contain both arbitrary user data and a +pointer to a C struct, with type "struct custom_operations", that +associates user-provided finalization, comparison, hashing, +serialization and deserialization functions to this block. + +\subsection{The "struct custom_operations"} + +The "struct custom_operations" is defined in "<caml/custom.h>" and +contains the following fields: +\begin{itemize} +\item "char *identifier" \\ +A zero-terminated character string serving as an identifier for +serialization and deserialization operations. + +\item "void (*finalize)(value v)" \\ +The "finalize" field contains a pointer to a C function that is called +when the block becomes unreachable and is about to be reclaimed. +The block is passed as first argument to the function. +The "finalize" field can also be "custom_finalize_default" to indicate that no +finalization function is associated with the block. + +\item "int (*compare)(value v1, value v2)" \\ +The "compare" field contains a pointer to a C function that is +called whenever two custom blocks are compared using OCaml's generic +comparison operators ("=", "<>", "<=", ">=", "<", ">" and +"compare"). The C function should return 0 if the data contained in +the two blocks are structurally equal, a negative integer if the data +from the first block is less than the data from the second block, and +a positive integer if the data from the first block is greater than +the data from the second block. + +The "compare" field can be set to "custom_compare_default"; this +default comparison function simply raises "Failure". + +\item "int (*compare_ext)(value v1, value v2)" \\ +(Since 3.12.1) +The "compare_ext" field contains a pointer to a C function that is +called whenever one custom block and one unboxed integer are compared using OCaml's generic +comparison operators ("=", "<>", "<=", ">=", "<", ">" and +"compare"). As in the case of the "compare" field, the C function +should return 0 if the two arguments are structurally equal, a +negative integer if the first argument compares less than the second +argument, and a positive integer if the first argument compares +greater than the second argument. + +The "compare_ext" field can be set to "custom_compare_ext_default"; this +default comparison function simply raises "Failure". + +\item "intnat (*hash)(value v)" \\ +The "hash" field contains a pointer to a C function that is called +whenever OCaml's generic hash operator (see module "Hashtbl") is +applied to a custom block. The C function can return an arbitrary +integer representing the hash value of the data contained in the +given custom block. The hash value must be compatible with the +"compare" function, in the sense that two structurally equal data +(that is, two custom blocks for which "compare" returns 0) must have +the same hash value. + +The "hash" field can be set to "custom_hash_default", in which case +the custom block is ignored during hash computation. + +\item "void (*serialize)(value v, uintnat * wsize_32, uintnat * wsize_64)" \\ +The "serialize" field contains a pointer to a C function that is +called whenever the custom block needs to be serialized (marshaled) +using the OCaml functions "output_value" or "Marshal.to_...". +For a custom block, those functions first write the identifier of the +block (as given by the "identifier" field) to the output stream, +then call the user-provided "serialize" function. That function is +responsible for writing the data contained in the custom block, using +the "serialize_..." functions defined in "<caml/intext.h>" and listed +below. The user-provided "serialize" function must then store in its +"wsize_32" and "wsize_64" parameters the sizes in bytes of the data +part of the custom block on a 32-bit architecture and on a 64-bit +architecture, respectively. + +The "serialize" field can be set to "custom_serialize_default", +in which case the "Failure" exception is raised when attempting to +serialize the custom block. + +\item "uintnat (*deserialize)(void * dst)" \\ +The "deserialize" field contains a pointer to a C function that is +called whenever a custom block with identifier "identifier" needs to +be deserialized (un-marshaled) using the OCaml functions "input_value" +or "Marshal.from_...". This user-provided function is responsible for +reading back the data written by the "serialize" operation, using the +"deserialize_..." functions defined in "<caml/intext.h>" and listed +below. It must then rebuild the data part of the custom block +and store it at the pointer given as the "dst" argument. Finally, it +returns the size in bytes of the data part of the custom block. +This size must be identical to the "wsize_32" result of +the "serialize" operation if the architecture is 32 bits, or +"wsize_64" if the architecture is 64 bits. + +The "deserialize" field can be set to "custom_deserialize_default" +to indicate that deserialization is not supported. In this case, +do not register the "struct custom_operations" with the deserializer +using "register_custom_operations" (see below). +\end{itemize} + +Note: the "finalize", "compare", "hash", "serialize" and "deserialize" +functions attached to custom block descriptors must never trigger a +garbage collection. Within these functions, do not call any of the +OCaml allocation functions, and do not perform a callback into OCaml +code. Do not use "CAMLparam" to register the parameters to these +functions, and do not use "CAMLreturn" to return the result. + +\subsection{Allocating custom blocks} + +Custom blocks must be allocated via the "caml_alloc_custom" function: +\begin{center} +"caml_alloc_custom("\var{ops}", "\var{size}", "\var{used}", "\var{max}")" +\end{center} +returns a fresh custom block, with room for \var{size} bytes of user +data, and whose associated operations are given by \var{ops} (a +pointer to a "struct custom_operations", usually statically allocated +as a C global variable). + +The two parameters \var{used} and \var{max} are used to control the +speed of garbage collection when the finalized object contains +pointers to out-of-heap resources. Generally speaking, the +OCaml incremental major collector adjusts its speed relative to the +allocation rate of the program. The faster the program allocates, the +harder the GC works in order to reclaim quickly unreachable blocks +and avoid having large amount of ``floating garbage'' (unreferenced +objects that the GC has not yet collected). + +Normally, the allocation rate is measured by counting the in-heap size +of allocated blocks. However, it often happens that finalized +objects contain pointers to out-of-heap memory blocks and other resources +(such as file descriptors, X Windows bitmaps, etc.). For those +blocks, the in-heap size of blocks is not a good measure of the +quantity of resources allocated by the program. + +The two arguments \var{used} and \var{max} give the GC an idea of how +much out-of-heap resources are consumed by the finalized block +being allocated: you give the amount of resources allocated to this +object as parameter \var{used}, and the maximum amount that you want +to see in floating garbage as parameter \var{max}. The units are +arbitrary: the GC cares only about the ratio $\var{used} / \var{max}$. + +For instance, if you are allocating a finalized block holding an X +Windows bitmap of \var{w} by \var{h} pixels, and you'd rather not +have more than 1 mega-pixels of unreclaimed bitmaps, specify +$\var{used} = \var{w} * \var{h}$ and $\var{max} = 1000000$. + +Another way to describe the effect of the \var{used} and \var{max} +parameters is in terms of full GC cycles. If you allocate many custom +blocks with $\var{used} / \var{max} = 1 / \var{N}$, the GC will then do one +full cycle (examining every object in the heap and calling +finalization functions on those that are unreachable) every \var{N} +allocations. For instance, if $\var{used} = 1$ and $\var{max} = 1000$, +the GC will do one full cycle at least every 1000 allocations of +custom blocks. + +If your finalized blocks contain no pointers to out-of-heap resources, +or if the previous discussion made little sense to you, just take +$\var{used} = 0$ and $\var{max} = 1$. But if you later find that the +finalization functions are not called ``often enough'', consider +increasing the $\var{used} / \var{max}$ ratio. + +\subsection{Accessing custom blocks} + +The data part of a custom block \var{v} can be +accessed via the pointer "Data_custom_val("\var{v}")". This pointer +has type "void *" and should be cast to the actual type of the data +stored in the custom block. + +The contents of custom blocks are not scanned by the garbage +collector, and must therefore not contain any pointer inside the OCaml +heap. In other terms, never store an OCaml "value" in a custom block, +and do not use "Field", "Store_field" nor "caml_modify" to access the data +part of a custom block. Conversely, any C data structure (not +containing heap pointers) can be stored in a custom block. + +\subsection{Writing custom serialization and deserialization functions} + +The following functions, defined in "<caml/intext.h>", are provided to +write and read back the contents of custom blocks in a portable way. +Those functions handle endianness conversions when e.g. data is +written on a little-endian machine and read back on a big-endian machine. + +\begin{tableau}{|l|p{10cm}|}{Function}{Action} +\entree{"caml_serialize_int_1"}{Write a 1-byte integer} +\entree{"caml_serialize_int_2"}{Write a 2-byte integer} +\entree{"caml_serialize_int_4"}{Write a 4-byte integer} +\entree{"caml_serialize_int_8"}{Write a 8-byte integer} +\entree{"caml_serialize_float_4"}{Write a 4-byte float} +\entree{"caml_serialize_float_8"}{Write a 8-byte float} +\entree{"caml_serialize_block_1"}{Write an array of 1-byte quantities} +\entree{"caml_serialize_block_2"}{Write an array of 2-byte quantities} +\entree{"caml_serialize_block_4"}{Write an array of 4-byte quantities} +\entree{"caml_serialize_block_8"}{Write an array of 8-byte quantities} +\entree{"caml_deserialize_uint_1"}{Read an unsigned 1-byte integer} +\entree{"caml_deserialize_sint_1"}{Read a signed 1-byte integer} +\entree{"caml_deserialize_uint_2"}{Read an unsigned 2-byte integer} +\entree{"caml_deserialize_sint_2"}{Read a signed 2-byte integer} +\entree{"caml_deserialize_uint_4"}{Read an unsigned 4-byte integer} +\entree{"caml_deserialize_sint_4"}{Read a signed 4-byte integer} +\entree{"caml_deserialize_uint_8"}{Read an unsigned 8-byte integer} +\entree{"caml_deserialize_sint_8"}{Read a signed 8-byte integer} +\entree{"caml_deserialize_float_4"}{Read a 4-byte float} +\entree{"caml_deserialize_float_8"}{Read an 8-byte float} +\entree{"caml_deserialize_block_1"}{Read an array of 1-byte quantities} +\entree{"caml_deserialize_block_2"}{Read an array of 2-byte quantities} +\entree{"caml_deserialize_block_4"}{Read an array of 4-byte quantities} +\entree{"caml_deserialize_block_8"}{Read an array of 8-byte quantities} +\entree{"caml_deserialize_error"}{Signal an error during deserialization; +"input_value" or "Marshal.from_..." raise a "Failure" exception after +cleaning up their internal data structures} +\end{tableau} + +Serialization functions are attached to the custom blocks to which +they apply. Obviously, deserialization functions cannot be attached +this way, since the custom block does not exist yet when +deserialization begins! Thus, the "struct custom_operations" that +contain deserialization functions must be registered with the +deserializer in advance, using the "register_custom_operations" +function declared in "<caml/custom.h>". Deserialization proceeds by +reading the identifier off the input stream, allocating a custom block +of the size specified in the input stream, searching the registered +"struct custom_operation" blocks for one with the same identifier, and +calling its "deserialize" function to fill the data part of the custom block. + +\subsection{Choosing identifiers} + +Identifiers in "struct custom_operations" must be chosen carefully, +since they must identify uniquely the data structure for serialization +and deserialization operations. In particular, consider including a +version number in the identifier; this way, the format of the data can +be changed later, yet backward-compatible deserialisation functions +can be provided. + +Identifiers starting with "_" (an underscore character) are reserved +for the OCaml runtime system; do not use them for your custom +data. We recommend to use a URL +("http://mymachine.mydomain.com/mylibrary/version-number") +or a Java-style package name +("com.mydomain.mymachine.mylibrary.version-number") +as identifiers, to minimize the risk of identifier collision. + +\subsection{Finalized blocks} + +Custom blocks generalize the finalized blocks that were present in +OCaml prior to version 3.00. For backward compatibility, the +format of custom blocks is compatible with that of finalized blocks, +and the "alloc_final" function is still available to allocate a custom +block with a given finalization function, but default comparison, +hashing and serialization functions. "caml_alloc_final("\var{n}", +"\var{f}", "\var{used}", "\var{max}")" returns a fresh custom block of +size \var{n} words, with finalization function \var{f}. The first +word is reserved for storing the custom operations; the other +\var{n}-1 words are available for your data. The two parameters +\var{used} and \var{max} are used to control the speed of garbage +collection, as described for "caml_alloc_custom". + +\section{Advanced topic: cheaper C call} +\label{s:C-cheaper-call} + +This section describe how to make calling C functions cheaper. + +{\bf Note:} this only applies to the native compiler. So whenever you +use any of these methods, you have to provide an alternative byte-code +stub that ignores all the special annotations. + +\subsection{Passing unboxed values} + +We said earlier that all OCaml objects are represented by the C type +"value", and one has to use macros such as "Int_val" to decode data +from the "value" type. It is however possible to tell OCaml to do this +for us and pass arguments unboxed to the C function. Similarly it is +possible to tell OCaml to expect the result unboxed and box it for us. + +The motivation is that, by letting the OCaml compiler deal with +boxing, it can often decide to suppress it entirely. + +For instance let's consider this example: + +\begin{verbatim} +external foo : float -> float -> float = "foo" + +let f a b = + let len = Array.length a in + assert (Array.length b = len); + let res = Array.make len 0. in + for i = 0 to len - 1 do + res.(i) <- foo a.(i) b.(i) + done +\end{verbatim} + +Float arrays are unboxed in OCaml, however the C function "foo" expect +its arguments as boxed floats and returns a boxed float. Hence the +OCaml compiler has no choice but to box "a.(i)" and "b.(i)" and unbox +the result of "foo". This results in the allocation of "3 * len" +temporary float values. + +Now if we annotate the arguments and result with "[@@unboxed]", the +compiler will be able to avoid all these allocations: + +\begin{verbatim} +external foo + : (float [@unboxed]) + -> (float [@unboxed]) + -> (float [@unboxed]) + = "foo_byte" "foo" +\end{verbatim} + +In this case the C functions must look like: + +\begin{verbatim} +CAMLprim double foo(double a, double b) +{ + ... +} + +CAMLprim value foo_byte(value a, value b) +{ + return caml_copy_double(foo(Double_val(a), Double_val(b))) +} +\end{verbatim} + +For convenicence, when all arguments and the result are annotated with +"[@unboxed]", it is possible to put the attribute only once on the +declaration itself. So we can also write instead: + +\begin{verbatim} +external foo : float -> float -> float = "foo_byte" "foo" [@@unboxed] +\end{verbatim} + +The following table summarize what OCaml types can be unboxed, and +what C types should be used in correspondence: + +\begin{tableau}{|l|l|}{OCaml type}{C type} +\entree{"float"}{"double"} +\entree{"int32"}{"int32_t"} +\entree{"int64"}{"int64_t"} +\entree{"nativeint"}{"intnat"} +\end{tableau} + +Similarly, it is possible to pass untagged OCaml integers between +OCaml and C. This is done by annotating the arguments and/or result +with "[@@untagged]": + +\begin{verbatim} +external f : string -> (int [@untagged]) = "f_byte" "f" +\end{verbatim} + +The corresponding C type must be "intnat". + +{\bf Note:} do not use the C "int" type in correspondence with "(int +[@untagged])". This is because they often differ in size. + +\subsection{Direct C call} + +In order to be able to run the garbage collector in the middle of a C +function, the OCaml compiler generates some bookkeeping code around C +calls. Technically it wraps every C call with the C function +"caml_c_call" which is part of the OCaml runtime. + +For small functions that are called repeatedly, this indirection can +have a big impact on performances. However this is not needed if we +know that the C function doesn't allocate and doesn't raise +exceptions. We can instruct the OCaml compiler of this fact by +annotating the external declaration with the attribute "[@@noalloc]": + +\begin{verbatim} +external bar : int -> int -> int = "foo" [@@noalloc] +\end{verbatim} + +In this case calling "bar" from OCaml is as cheap as calling any other +OCaml function, except for the fact that the OCaml compiler can't +inline C functions... + +\subsection{Example: calling C library functions without indirection} + +Using these attributes, it is possible to call C library functions +with no indirection. For instance many math functions are defined this +way in the OCaml standard library: + +\begin{verbatim} +external sqrt : float -> float = "caml_sqrt_float" "sqrt" + [@@unboxed] [@@noalloc] +(** Square root. *) + +external exp : float -> float = "caml_exp_float" "exp" [@@unboxed] [@@noalloc] +(** Exponential. *) + +external log : float -> float = "caml_log_float" "log" [@@unboxed] [@@noalloc] +(** Natural logarithm. *) +\end{verbatim} + +\section{Advanced topic: multithreading} +\label{s:C-multithreading} + +Using multiple threads (shared-memory concurrency) in a mixed OCaml/C +application requires special precautions, which are described in this +section. + +\subsection{Registering threads created from C} + +Callbacks from C to OCaml are possible only if the calling thread is +known to the OCaml run-time system. Threads created from OCaml (through +the "Thread.create" function of the system threads library) are +automatically known to the run-time system. If the application +creates additional threads from C and wishes to callback into OCaml +code from these threads, it must first register them with the run-time +system. The following functions are declared in the include file +"<caml/threads.h>". + +\begin{itemize} +\item +"caml_c_thread_register()" registers the calling thread with the OCaml +run-time system. Returns 1 on success, 0 on error. Registering an +already-register thread does nothing and returns 0. +\item +"caml_c_thread_unregister()" must be called before the thread + terminates, to unregister it from the OCaml run-time system. +Returns 1 on success, 0 on error. If the calling thread was not +previously registered, does nothing and returns 0. +\end{itemize} + +\subsection{Parallel execution of long-running C code} + +The OCaml run-time system is not reentrant: at any time, at most one +thread can be executing OCaml code or C code that uses the OCaml +run-time system. Technically, this is enforced by a ``master lock'' +that any thread must hold while executing such code. + +When OCaml calls the C code implementing a primitive, the master lock +is held, therefore the C code has full access to the facilities of the +run-time system. However, no other thread can execute OCaml code +concurrently with the C code of the primitive. + +If a C primitive runs for a long time or performs potentially blocking +input-output operations, it can explicitly release the master lock, +enabling other OCaml threads to run concurrently with its operations. +The C code must re-acquire the master lock before returning to OCaml. +This is achieved with the following functions, declared in +the include file "<caml/threads.h>". + +\begin{itemize} +\item +"caml_release_runtime_system()" +The calling thread releases the master lock and other OCaml resources, +enabling other threads to run OCaml code in parallel with the execution +of the calling thread. +\item +"caml_acquire_runtime_system()" +The calling thread re-acquires the master lock and other OCaml +resources. It may block until no other thread uses the OCaml run-time +system. +\end{itemize} + +After "caml_release_runtime_system()" was called and until +"caml_acquire_runtime_system()" is called, the C code must not access +any OCaml data, nor call any function of the run-time system, nor call +back into OCaml code. Consequently, arguments provided by OCaml to the +C primitive must be copied into C data structures before calling +"caml_release_runtime_system()", and results to be returned to OCaml +must be encoded as OCaml values after "caml_acquire_runtime_system()" +returns. + +Example: the following C primitive invokes "gethostbyname" to find the +IP address of a host name. The "gethostbyname" function can block for +a long time, so we choose to release the OCaml run-time system while it +is running. +\begin{verbatim} +CAMLprim stub_gethostbyname(value vname) +{ + CAMLparam1 (vname); + CAMLlocal1 (vres); + struct hostent * h; + + /* Copy the string argument to a C string, allocated outside the + OCaml heap. */ + name = stat_alloc(caml_string_length(vname) + 1); + strcpy(name, String_val(vname)); + /* Release the OCaml run-time system */ + caml_release_runtime_system(); + /* Resolve the name */ + h = gethostbyname(name); + /* Re-acquire the OCaml run-time system */ + caml_acquire_runtime_system(); + /* Encode the relevant fields of h as the OCaml value vres */ + ... /* Omitted */ + /* Return to OCaml */ + CAMLreturn (vres); +} +\end{verbatim} + +Callbacks from C to OCaml must be performed while holding the master +lock to the OCaml run-time system. This is naturally the case if the +callback is performed by a C primitive that did not release the +run-time system. If the C primitive released the run-time system +previously, or the callback is performed from other C code that was +not invoked from OCaml (e.g. an event loop in a GUI application), the +run-time system must be acquired before the callback and released +after: +\begin{verbatim} + caml_acquire_runtime_system(); + /* Resolve OCaml function vfun to be invoked */ + /* Build OCaml argument varg to the callback */ + vres = callback(vfun, varg); + /* Copy relevant parts of result vres to C data structures */ + caml_release_runtime_system(); +\end{verbatim} + +Note: the "acquire" and "release" functions described above were +introduced in OCaml 3.12. Older code uses the following historical +names, declared in "<caml/signals.h>": +\begin{itemize} +\item "caml_enter_blocking_section" as an alias for + "caml_release_runtime_system" +\item "caml_leave_blocking_section" as an alias for + "caml_acquire_runtime_system" +\end{itemize} +Intuition: a ``blocking section'' is a piece of C code that does not +use the OCaml run-time system, typically a blocking input/output operation. + +\section{Building mixed C/OCaml libraries: \texttt{ocamlmklib}} +\label{s-ocamlmklib} + +The "ocamlmklib" command facilitates the construction of libraries +containing both OCaml code and C code, and usable both in static +linking and dynamic linking modes. This command is available under +Windows since Objective Caml 3.11 and under other operating systems since +Objective Caml 3.03. + +The "ocamlmklib" command takes three kinds of arguments: +\begin{itemize} +\item OCaml source files and object files (".cmo", ".cmx", ".ml") +comprising the OCaml part of the library; +\item C object files (".o", ".a", respectively, ".obj", ".lib") + comprising the C part of the library; +\item Support libraries for the C part ("-l"\var{lib}). +\end{itemize} +It generates the following outputs: +\begin{itemize} +\item An OCaml bytecode library ".cma" incorporating the ".cmo" and +".ml" OCaml files given as arguments, and automatically referencing the +C library generated with the C object files. +\item An OCaml native-code library ".cmxa" incorporating the ".cmx" and +".ml" OCaml files given as arguments, and automatically referencing the +C library generated with the C object files. +\item If dynamic linking is supported on the target platform, a +".so" (respectively, ".dll") shared library built from the C object files given as arguments, +and automatically referencing the support libraries. +\item A C static library ".a"(respectively, ".lib") built from the C object files. +\end{itemize} +In addition, the following options are recognized: +\begin{options} +\item["-cclib", "-ccopt", "-I", "-linkall"] +These options are passed as is to "ocamlc" or "ocamlopt". +See the documentation of these commands. +\item["-rpath", "-R", "-Wl,-rpath", "-Wl,-R"] +These options are passed as is to the C compiler. Refer to the +documentation of the C compiler. +\item["-custom"] Force the construction of a statically linked library +only, even if dynamic linking is supported. +\item["-failsafe"] Fall back to building a statically linked library +if a problem occurs while building the shared library (e.g. some of +the support libraries are not available as shared libraries). +\item["-L"\var{dir}] Add \var{dir} to the search path for support +libraries ("-l"\var{lib}). +\item["-ocamlc" \var{cmd}] Use \var{cmd} instead of "ocamlc" to call +the bytecode compiler. +\item["-ocamlopt" \var{cmd}] Use \var{cmd} instead of "ocamlopt" to call +the native-code compiler. +\item["-o" \var{output}] Set the name of the generated OCaml library. +"ocamlmklib" will generate \var{output}".cma" and/or \var{output}".cmxa". +If not specified, defaults to "a". +\item["-oc" \var{outputc}] Set the name of the generated C library. +"ocamlmklib" will generate "lib"\var{outputc}".so" (if shared +libraries are supported) and "lib"\var{outputc}".a". +If not specified, defaults to the output name given with "-o". +\end{options} + +\paragraph{Example} Consider an OCaml interface to the standard "libz" +C library for reading and writing compressed files. Assume this +library resides in "/usr/local/zlib". This interface is +composed of an OCaml part "zip.cmo"/"zip.cmx" and a C part "zipstubs.o" +containing the stub code around the "libz" entry points. The +following command builds the OCaml libraries "zip.cma" and "zip.cmxa", +as well as the companion C libraries "dllzip.so" and "libzip.a": +\begin{verbatim} +ocamlmklib -o zip zip.cmo zip.cmx zipstubs.o -lz -L/usr/local/zlib +\end{verbatim} +If shared libraries are supported, this performs the following +commands: +\begin{verbatim} +ocamlc -a -o zip.cma zip.cmo -dllib -lzip \ + -cclib -lzip -cclib -lz -ccopt -L/usr/local/zlib +ocamlopt -a -o zip.cmxa zip.cmx -cclib -lzip \ + -cclib -lzip -cclib -lz -ccopt -L/usr/local/zlib +gcc -shared -o dllzip.so zipstubs.o -lz -L/usr/local/zlib +ar rc libzip.a zipstubs.o +\end{verbatim} +Note: This example is on a Unix system. The exact command lines +may be different on other systems. + +If shared libraries are not supported, the following commands are +performed instead: +\begin{verbatim} +ocamlc -a -custom -o zip.cma zip.cmo -cclib -lzip \ + -cclib -lz -ccopt -L/usr/local/zlib +ocamlopt -a -o zip.cmxa zip.cmx -lzip \ + -cclib -lz -ccopt -L/usr/local/zlib +ar rc libzip.a zipstubs.o +\end{verbatim} +Instead of building simultaneously the bytecode library, the +native-code library and the C libraries, "ocamlmklib" can be called +three times to build each separately. Thus, +\begin{verbatim} +ocamlmklib -o zip zip.cmo -lz -L/usr/local/zlib +\end{verbatim} +builds the bytecode library "zip.cma", and +\begin{verbatim} +ocamlmklib -o zip zip.cmx -lz -L/usr/local/zlib +\end{verbatim} +builds the native-code library "zip.cmxa", and +\begin{verbatim} +ocamlmklib -o zip zipstubs.o -lz -L/usr/local/zlib +\end{verbatim} +builds the C libraries "dllzip.so" and "libzip.a". Notice that the +support libraries ("-lz") and the corresponding options +("-L/usr/local/zlib") must be given on all three invocations of "ocamlmklib", +because they are needed at different times depending on whether shared +libraries are supported. diff --git a/manual/manual/cmds/lexyacc.etex b/manual/manual/cmds/lexyacc.etex new file mode 100644 index 0000000000..294767e418 --- /dev/null +++ b/manual/manual/cmds/lexyacc.etex @@ -0,0 +1,729 @@ +\chapter{Lexer and parser generators (ocamllex, ocamlyacc)} +\label{c:ocamlyacc} +\pdfchapter{Lexer and parser generators (ocamllex, ocamlyacc)} +%HEVEA\cutname{lexyacc.html} + +This chapter describes two program generators: "ocamllex", that +produces a lexical analyzer from a set of regular expressions with +associated semantic actions, and "ocamlyacc", that produces a parser +from a grammar with associated semantic actions. + +These program generators are very close to the well-known "lex" and +"yacc" commands that can be found in most C programming environments. +This chapter assumes a working knowledge of "lex" and "yacc": while +it describes the input syntax for "ocamllex" and "ocamlyacc" and the +main differences with "lex" and "yacc", it does not explain the basics +of writing a lexer or parser description in "lex" and "yacc". Readers +unfamiliar with "lex" and "yacc" are referred to ``Compilers: +principles, techniques, and tools'' by Aho, Sethi and Ullman +(Addison-Wesley, 1986), or ``Lex $\&$ Yacc'', by Levine, Mason and +Brown (O'Reilly, 1992). + +\section{Overview of \texttt{ocamllex}} + +The "ocamllex" command produces a lexical analyzer from a set of regular +expressions with attached semantic actions, in the style of +"lex". Assuming the input file is \var{lexer}".mll", executing +\begin{alltt} + ocamllex \var{lexer}.mll +\end{alltt} +produces OCaml code for a lexical analyzer in file \var{lexer}".ml". +This file defines one lexing function per entry point in the lexer +definition. These functions have the same names as the entry +points. Lexing functions take as argument a lexer buffer, and return +the semantic attribute of the corresponding entry point. + +Lexer buffers are an abstract data type implemented in the standard +library module "Lexing". The functions "Lexing.from_channel", +"Lexing.from_string" and "Lexing.from_function" create +lexer buffers that read from an input channel, a character string, or +any reading function, respectively. (See the description of module +"Lexing" in chapter~\ref{c:stdlib}.) + +When used in conjunction with a parser generated by "ocamlyacc", the +semantic actions compute a value belonging to the type "token" defined +by the generated parsing module. (See the description of "ocamlyacc" +below.) + +\subsection{Options} +The following command-line options are recognized by "ocamllex". + +\begin{options} + +\item["-ml"] +Output code that does not use OCaml's built-in automata +interpreter. Instead, the automaton is encoded by OCaml functions. +This option mainly is useful for debugging "ocamllex", using it for +production lexers is not recommended. + +\item["-o" \var{output-file}] +Specify the name of the output file produced by "ocamllex". +The default is the input file name with its extension replaced by ".ml". + +\item["-q"] +Quiet mode. "ocamllex" normally outputs informational messages +to standard output. They are suppressed if option "-q" is used. + +\item["-v" or "-version"] +Print version string and exit. + +\item["-vnum"] +Print short version number and exit. + +\item["-help" or "--help"] +Display a short usage summary and exit. +% +\end{options} + +\section{Syntax of lexer definitions} + +The format of lexer definitions is as follows: +\begin{alltt} +\{ \var{header} \} +let \var{ident} = \var{regexp} \ldots +[refill \{ \var{refill-handler} \}] +rule \var{entrypoint} [\nth{arg}{1}\ldots{} \nth{arg}{n}] = + parse \var{regexp} \{ \var{action} \} + | \ldots + | \var{regexp} \{ \var{action} \} +and \var{entrypoint} [\nth{arg}{1}\ldots{} \nth{arg}{n}] = + parse \ldots +and \ldots +\{ \var{trailer} \} +\end{alltt} +Comments are delimited by "(*" and "*)", as in OCaml. +The "parse" keyword, can be replaced by the "shortest" keyword, with +the semantic consequences explained below. + +Refill handlers are a recent (optional) feature introduced in 4.02, +documented below in subsection~\ref{ss:refill-handlers}. + +\subsection{Header and trailer} +The {\it header} and {\it trailer} sections are arbitrary OCaml +text enclosed in curly braces. Either or both can be omitted. If +present, the header text is copied as is at the beginning of the +output file and the trailer text at the end. Typically, the +header section contains the "open" directives required +by the actions, and possibly some auxiliary functions used in the +actions. + +\subsection{Naming regular expressions} + +Between the header and the entry points, one can give names to +frequently-occurring regular expressions. This is written +@"let" ident "=" regexp@. +In regular expressions that follow this declaration, the identifier +\var{ident} can be used as shorthand for \var{regexp}. + +\subsection{Entry points} + +The names of the entry points must be valid identifiers for OCaml +values (starting with a lowercase letter). +Similarily, the arguments \texttt{\var{arg$_1$}\ldots{} +\var{arg$_n$}} must be valid identifiers for OCaml. +Each entry point becomes an +OCaml function that takes $n+1$ arguments, +the extra implicit last argument being of type "Lexing.lexbuf". +Characters are read from the "Lexing.lexbuf" argument and matched +against the regular expressions provided in the rule, until a prefix +of the input matches one of the rule. The corresponding action is +then evaluated and returned as the result of the function. + + +If several regular expressions match a prefix of the input, the +``longest match'' rule applies: the regular expression that matches +the longest prefix of the input is selected. In case of tie, the +regular expression that occurs earlier in the rule is selected. + +However, if lexer rules are introduced with the "shortest" keyword in +place of the "parse" keyword, then the ``shortest match'' rule applies: +the shortest prefix of the input is selected. In case of tie, the +regular expression that occurs earlier in the rule is still selected. +This feature is not intended for use in ordinary lexical analyzers, it +may facilitate the use of "ocamllex" as a simple text processing tool. + + + +\subsection{Regular expressions} + +The regular expressions are in the style of "lex", with a more +OCaml-like syntax. +\begin{syntax} +regexp: + \ldots +\end{syntax} +\begin{options} + +\item[@"'" regular-char || escape-sequence "'"@] +A character constant, with the same syntax as OCaml character +constants. Match the denoted character. + +\item["_"] +(underscore) Match any character. + +\item[@"eof"@] +Match the end of the lexer input.\\ +{\bf Note:} On some systems, with interactive input, an end-of-file +may be followed by more characters. However, "ocamllex" will not +correctly handle regular expressions that contain "eof" followed by +something else. + +\item[@'"' { string-character } '"'@] +A string constant, with the same syntax as OCaml string +constants. Match the corresponding sequence of characters. + +\item[@'[' character-set ']'@] +Match any single character belonging to the given +character set. Valid character sets are: single +character constants @"'" @c@ "'"@; ranges of characters +@"'" @c@_1 "'" "-" "'" @c@_2 "'"@ (all characters between $c_1$ and $c_2$, +inclusive); and the union of two or more character sets, denoted by +concatenation. + +\item[@'[' '^' character-set ']'@] +Match any single character not belonging to the given character set. + + +\item[@regexp_1 '#' regexp_2@] +(difference of character sets) +Regular expressions @regexp_1@ and @regexp_2@ must be character sets +defined with @'['\ldots ']'@ (or a a single character expression or +underscore "_"). +Match the difference of the two specified character sets. + + +\item[@regexp '*'@] +(repetition) Match the concatenation of zero or more +strings that match @regexp@. + +\item[@regexp '+'@] +(strict repetition) Match the concatenation of one or more +strings that match @regexp@. + +\item[@regexp '?'@] +(option) Match the empty string, or a string matching @regexp@. + +\item[@regexp_1 '|' regexp_2@] +(alternative) Match any string that matches @regexp_1@ or @regexp_2@ + +\item[@regexp_1 regexp_2@] +(concatenation) Match the concatenation of two strings, the first +matching @regexp_1@, the second matching @regexp_2@. + +\item[@'(' regexp ')'@] +Match the same strings as @regexp@. + +\item[@ident@] +Reference the regular expression bound to @ident@ by an earlier +@"let" ident "=" regexp@ definition. + +\item[@regexp 'as' ident@] +Bind the substring matched by @regexp@ to identifier @ident@. +\end{options} + +Concerning the precedences of operators, "#" has the highest precedence, +followed by "*", "+" and "?", +then concatenation, then "|" (alternation), then "as". + +\subsection{Actions} + +The actions are arbitrary OCaml expressions. They are evaluated in +a context where the identifiers defined by using the "as" construct +are bound to subparts of the matched string. +Additionally, "lexbuf" is bound to the current lexer +buffer. Some typical uses for "lexbuf", in conjunction with the +operations on lexer buffers provided by the "Lexing" standard library +module, are listed below. + +\begin{options} +\item["Lexing.lexeme lexbuf"] +Return the matched string. + +\item["Lexing.lexeme_char lexbuf "$n$] +Return the $n\th$ +character in the matched string. The first character corresponds to $n = 0$. + +\item["Lexing.lexeme_start lexbuf"] +Return the absolute position in the input text of the beginning of the +matched string (i.e. the offset of the first character of the matched +string). The first character read from the input text has offset 0. + +\item["Lexing.lexeme_end lexbuf"] +Return the absolute position in the input text of the end of the +matched string (i.e. the offset of the first character after the +matched string). The first character read from the input text has +offset 0. + +\newcommand{\sub}[1]{$_{#1}$}% +\item[\var{entrypoint} {[\var{exp\sub{1}}\ldots{} \var{exp\sub{n}}]} "lexbuf"] +(Where \var{entrypoint} is the name of another entry point in the same +lexer definition.) Recursively call the lexer on the given entry point. +Notice that "lexbuf" is the last argument. +Useful for lexing nested comments, for example. + +\end{options} + +\subsection{Variables in regular expressions} +The "as" construct is similar to ``\emph{groups}'' as provided by +numerous regular expression packages. +The type of these variables can be "string", "char", "string option" +or "char option". + +We first consider the case of linear patterns, that is the case when +all "as" bound variables are distinct. +In @regexp 'as' ident@, the type of @ident@ normally is "string" (or +"string option") except +when @regexp@ is a character constant, an underscore, a string +constant of length one, a character set specification, or an +alternation of those. Then, the type of @ident@ is "char" (or "char +option"). +Option types are introduced when overall rule matching does not +imply matching of the bound sub-pattern. This is in particular the +case of @'(' regexp 'as' ident ')' '?'@ and of +@regexp_1 '|' '(' regexp_2 'as' ident ')'@. + +There is no linearity restriction over "as" bound variables. +When a variable is bound more than once, the previous rules are to be +extended as follows: +\begin{itemize} +\item A variable is a "char" variable when all its occurrences bind +"char" occurrences in the previous sense. +\item A variable is an "option" variable when the overall expression +can be matched without binding this variable. +\end{itemize} +For instance, in +"('a' as x) | ( 'a' (_ as x) )" the variable "x" is of type +"char", whereas in +"(\"ab\" as x) | ( 'a' (_ as x) ? )" the variable "x" is of type +"string option". + + +In some cases, a successful match may not yield a unique set of bindings. +For instance the matching of \verb+aba+ by the regular expression +"(('a'|\"ab\") as x) ((\"ba\"|'a') as y)" may result in binding +either +\verb+x+ to \verb+"ab"+ and \verb+y+ to \verb+"a"+, or +\verb+x+ to \verb+"a"+ and \verb+y+ to \verb+"ba"+. +The automata produced "ocamllex" on such ambiguous regular +expressions will select one of the possible resulting sets of +bindings. +The selected set of bindings is purposely left unspecified. + +\subsection{Refill handlers} +\label{ss:refill-handlers} + +By default, when ocamllex reaches the end of its lexing buffer, it +will silently call the "refill_buff" function of "lexbuf" structure +and continue lexing. It is sometimes useful to be able to take control +of refilling action; typically, if you use a library for asynchronous +computation, you may want to wrap the refilling action in a delaying +function to avoid blocking synchronous operations. + +Since OCaml 4.02, it is possible to specify a \var{refill-handler}, +a function that will be called when refill happens. It is passed the +continuation of the lexing, on which it has total control. The OCaml +expression used as refill action should have a type that is an +instance of +\begin{verbatim} + (Lexing.lexbuf -> 'a) -> Lexing.lexbuf -> 'a +\end{verbatim} +where the first argument is the continuation which captures the +processing ocamllex would usually perform (refilling the buffer, then +calling the lexing function again), and the result type that +instantiates ['a] should unify with the result type of all lexing +rules. + +As an example, consider the following lexer that is parametrized over +an arbitrary monad: +\begin{verbatim} +{ +type token = EOL | INT of int | PLUS + +module Make (M : sig + type 'a t + val return: 'a -> 'a t + val bind: 'a t -> ('a -> 'b t) -> 'b t + val fail : string -> 'a t + + (* Set up lexbuf *) + val on_refill : Lexing.lexbuf -> unit t + end) += struct + +let refill_handler k lexbuf = + M.bind (M.on_refill lexbuf) (fun () -> k lexbuf) + +} + +refill {refill_handler} + +rule token = parse +| [' ' '\t'] + { token lexbuf } +| '\n' + { M.return EOL } +| ['0'-'9']+ as i + { M.return (INT (int_of_string i)) } +| '+' + { M.return PLUS } +| _ + { M.fail "unexpected character" } +{ +end +} +\end{verbatim} + +\subsection{Reserved identifiers} + +All identifiers starting with "__ocaml_lex" are reserved for use by +"ocamllex"; do not use any such identifier in your programs. + + +\section{Overview of \texttt{ocamlyacc}} + +The "ocamlyacc" command produces a parser from a context-free grammar +specification with attached semantic actions, in the style of "yacc". +Assuming the input file is \var{grammar}".mly", executing +\begin{alltt} + ocamlyacc \var{options} \var{grammar}.mly +\end{alltt} +produces OCaml code for a parser in the file \var{grammar}".ml", +and its interface in file \var{grammar}".mli". + +The generated module defines one parsing function per entry point in +the grammar. These functions have the same names as the entry points. +Parsing functions take as arguments a lexical analyzer (a function +from lexer buffers to tokens) and a lexer buffer, and return the +semantic attribute of the corresponding entry point. Lexical analyzer +functions are usually generated from a lexer specification by the +"ocamllex" program. Lexer buffers are an abstract data type +implemented in the standard library module "Lexing". Tokens are values from +the concrete type "token", defined in the interface file +\var{grammar}".mli" produced by "ocamlyacc". + +\section{Syntax of grammar definitions} + +Grammar definitions have the following format: +\begin{alltt} +\%\{ + \var{header} +\%\} + \var{declarations} +\%\% + \var{rules} +\%\% + \var{trailer} +\end{alltt} + +Comments are enclosed between \verb|/*| and \verb|*/| (as in C) in the +``declarations'' and ``rules'' sections, and between \verb|(*| and +\verb|*)| (as in OCaml) in the ``header'' and ``trailer'' sections. + +\subsection{Header and trailer} + +The header and the trailer sections are OCaml code that is copied +as is into file \var{grammar}".ml". Both sections are optional. The header +goes at the beginning of the output file; it usually contains +"open" directives and auxiliary functions required by the semantic +actions of the rules. The trailer goes at the end of the output file. + +\subsection{Declarations} + +Declarations are given one per line. They all start with a \verb"%" sign. + +\begin{options} + +\item[@"%token" constr \ldots constr@] +Declare the given symbols @constr \ldots constr@ +as tokens (terminal symbols). These symbols +are added as constant constructors for the "token" concrete type. + +\item[@"%token" "<" typexpr ">" constr \ldots constr@] +Declare the given symbols @constr \ldots constr@ as tokens with an +attached attribute of the +given type. These symbols are added as constructors with arguments of +the given type for the "token" concrete type. The @typexpr@ part is +an arbitrary OCaml type expression, except that all type +constructor names must be fully qualified (e.g. "Modname.typename") +for all types except standard built-in types, even if the proper +\verb|open| directives (e.g. \verb|open Modname|) were given in the +header section. That's because the header is copied only to the ".ml" +output file, but not to the ".mli" output file, while the @typexpr@ part +of a \verb"%token" declaration is copied to both. + +\item[@"%start" symbol \ldots symbol@] +Declare the given symbols as entry points for the grammar. For each +entry point, a parsing function with the same name is defined in the +output module. Non-terminals that are not declared as entry points +have no such parsing function. Start symbols must be given a type with +the \verb|%type| directive below. + +\item[@"%type" "<" typexpr ">" symbol \ldots symbol@] +Specify the type of the semantic attributes for the given symbols. +This is mandatory for start symbols only. Other nonterminal symbols +need not be given types by hand: these types will be inferred when +running the output files through the OCaml compiler (unless the +\verb"-s" option is in effect). The @typexpr@ part is an arbitrary OCaml +type expression, except that all type constructor names must be +fully qualified, as explained above for "%token". + +\item[@"%left" symbol \ldots symbol@] +\item[@"%right" symbol \ldots symbol@] +\item[@"%nonassoc" symbol \ldots symbol@] + +Associate precedences and associativities to the given symbols. All +symbols on the same line are given the same precedence. They have +higher precedence than symbols declared before in a \verb"%left", +\verb"%right" or \verb"%nonassoc" line. They have lower precedence +than symbols declared after in a \verb"%left", \verb"%right" or +\verb"%nonassoc" line. The symbols are declared to associate to the +left (\verb"%left"), to the right (\verb"%right"), or to be +non-associative (\verb"%nonassoc"). The symbols are usually tokens. +They can also be dummy nonterminals, for use with the \verb"%prec" +directive inside the rules. + +The precedence declarations are used in the following way to +resolve reduce/reduce and shift/reduce conflicts: +\begin{itemize} +\item Tokens and rules have precedences. By default, the precedence + of a rule is the precedence of its rightmost terminal. You + can override this default by using the @"%prec"@ directive in the rule. +\item A reduce/reduce conflict + is resolved in favor of the first rule (in the order given by the + source file), and "ocamlyacc" outputs a warning. +\item A shift/reduce conflict + is resolved by comparing the precedence of the rule to be + reduced with the precedence of the token to be shifted. If the + precedence of the rule is higher, then the rule will be reduced; + if the precedence of the token is higher, then the token will + be shifted. +\item A shift/reduce conflict between a rule and a token with the + same precedence will be resolved using the associativity: if the + token is left-associative, then the parser will reduce; if the + token is right-associative, then the parser will shift. If the + token is non-associative, then the parser will declare a syntax + error. +\item When a shift/reduce conflict cannot be resolved using the above + method, then "ocamlyacc" will output a warning and the parser will + always shift. +\end{itemize} + +\end{options} + +\subsection{Rules} + +The syntax for rules is as usual: +\begin{alltt} +\var{nonterminal} : + \var{symbol} \ldots \var{symbol} \{ \var{semantic-action} \} + | \ldots + | \var{symbol} \ldots \var{symbol} \{ \var{semantic-action} \} +; +\end{alltt} +% +Rules can also contain the \verb"%prec "{\it symbol} directive in the +right-hand side part, to override the default precedence and +associativity of the rule with the precedence and associativity of the +given symbol. + +Semantic actions are arbitrary OCaml expressions, that +are evaluated to produce the semantic attribute attached to +the defined nonterminal. The semantic actions can access the +semantic attributes of the symbols in the right-hand side of +the rule with the \verb"$" notation: \verb"$1" is the attribute for the +first (leftmost) symbol, \verb"$2" is the attribute for the second +symbol, etc. + +The rules may contain the special symbol "error" to indicate +resynchronization points, as in "yacc". + +Actions occurring in the middle of rules are not supported. + +Nonterminal symbols are like regular OCaml symbols, except that they +cannot end with "'" (single quote). + +\subsection{Error handling} + +Error recovery is supported as follows: when the parser reaches an +error state (no grammar rules can apply), it calls a function named +"parse_error" with the string "\"syntax error\"" as argument. The default +"parse_error" function does nothing and returns, thus initiating error +recovery (see below). The user can define a customized "parse_error" +function in the header section of the grammar file. + +The parser also enters error recovery mode if one of the grammar +actions raises the "Parsing.Parse_error" exception. + +In error recovery mode, the parser discards states from the +stack until it reaches a place where the error token can be shifted. +It then discards tokens from the input until it finds three successive +tokens that can be accepted, and starts processing with the first of +these. If no state can be uncovered where the error token can be +shifted, then the parser aborts by raising the "Parsing.Parse_error" +exception. + +Refer to documentation on "yacc" for more details and guidance in how +to use error recovery. + +\section{Options} + +The "ocamlyacc" command recognizes the following options: + +\begin{options} + +\item["-b"{\it prefix}] +Name the output files {\it prefix}".ml", {\it prefix}".mli", +{\it prefix}".output", instead of the default naming convention. + +\item["-q"] +This option has no effect. + +\item["-v"] +Generate a description of the parsing tables and a report on conflicts +resulting from ambiguities in the grammar. The description is put in +file \var{grammar}".output". + +\item["-version"] +Print version string and exit. + +\item["-vnum"] +Print short version number and exit. + +\item["-"] +Read the grammar specification from standard input. The default +output file names are "stdin.ml" and "stdin.mli". + +\item["--" \var{file}] +Process \var{file} as the grammar specification, even if its name +starts with a dash (-) character. This option must be the last on the +command line. + +\end{options} + +At run-time, the "ocamlyacc"-generated parser can be debugged by +setting the "p" option in the "OCAMLRUNPARAM" environment variable +(see section~\ref{ocamlrun-options}). This causes the pushdown +automaton executing the parser to print a trace of its action (tokens +shifted, rules reduced, etc). The trace mentions rule numbers and +state numbers that can be interpreted by looking at the file +\var{grammar}".output" generated by "ocamlyacc -v". + +\section{A complete example} + +The all-time favorite: a desk calculator. This program reads +arithmetic expressions on standard input, one per line, and prints +their values. Here is the grammar definition: +\begin{verbatim} + /* File parser.mly */ + %token <int> INT + %token PLUS MINUS TIMES DIV + %token LPAREN RPAREN + %token EOL + %left PLUS MINUS /* lowest precedence */ + %left TIMES DIV /* medium precedence */ + %nonassoc UMINUS /* highest precedence */ + %start main /* the entry point */ + %type <int> main + %% + main: + expr EOL { $1 } + ; + expr: + INT { $1 } + | LPAREN expr RPAREN { $2 } + | expr PLUS expr { $1 + $3 } + | expr MINUS expr { $1 - $3 } + | expr TIMES expr { $1 * $3 } + | expr DIV expr { $1 / $3 } + | MINUS expr %prec UMINUS { - $2 } + ; +\end{verbatim} +Here is the definition for the corresponding lexer: +\begin{verbatim} + (* File lexer.mll *) + { + open Parser (* The type token is defined in parser.mli *) + exception Eof + } + rule token = parse + [' ' '\t'] { token lexbuf } (* skip blanks *) + | ['\n' ] { EOL } + | ['0'-'9']+ as lxm { INT(int_of_string lxm) } + | '+' { PLUS } + | '-' { MINUS } + | '*' { TIMES } + | '/' { DIV } + | '(' { LPAREN } + | ')' { RPAREN } + | eof { raise Eof } +\end{verbatim} +Here is the main program, that combines the parser with the lexer: +\begin{verbatim} + (* File calc.ml *) + let _ = + try + let lexbuf = Lexing.from_channel stdin in + while true do + let result = Parser.main Lexer.token lexbuf in + print_int result; print_newline(); flush stdout + done + with Lexer.Eof -> + exit 0 +\end{verbatim} +To compile everything, execute: +\begin{verbatim} + ocamllex lexer.mll # generates lexer.ml + ocamlyacc parser.mly # generates parser.ml and parser.mli + ocamlc -c parser.mli + ocamlc -c lexer.ml + ocamlc -c parser.ml + ocamlc -c calc.ml + ocamlc -o calc lexer.cmo parser.cmo calc.cmo +\end{verbatim} + +\section{Common errors} + +\begin{options} + +\item[ocamllex: transition table overflow, automaton is too big] + +The deterministic automata generated by "ocamllex" are limited to at +most 32767 transitions. The message above indicates that your lexer +definition is too complex and overflows this limit. This is commonly +caused by lexer definitions that have separate rules for each of the +alphabetic keywords of the language, as in the following example. +\begin{verbatim} +rule token = parse + "keyword1" { KWD1 } +| "keyword2" { KWD2 } +| ... +| "keyword100" { KWD100 } +| ['A'-'Z' 'a'-'z'] ['A'-'Z' 'a'-'z' '0'-'9' '_'] * as id + { IDENT id} +\end{verbatim} +To keep the generated automata small, rewrite those definitions with +only one general ``identifier'' rule, followed by a hashtable lookup +to separate keywords from identifiers: +\begin{verbatim} +{ let keyword_table = Hashtbl.create 53 + let _ = + List.iter (fun (kwd, tok) -> Hashtbl.add keyword_table kwd tok) + [ "keyword1", KWD1; + "keyword2", KWD2; ... + "keyword100", KWD100 ] +} +rule token = parse + ['A'-'Z' 'a'-'z'] ['A'-'Z' 'a'-'z' '0'-'9' '_'] * as id + { try + Hashtbl.find keyword_table id + with Not_found -> + IDENT id } +\end{verbatim} + +\item[ocamllex: Position memory overflow, too many bindings] +The deterministic automata generated by "ocamllex" maintain a table of +positions inside the scanned lexer buffer. The size of this table is +limited to at most 255 cells. This error should not show up in normal +situations. + +\end{options} diff --git a/manual/manual/cmds/native.etex b/manual/manual/cmds/native.etex new file mode 100644 index 0000000000..2de5539716 --- /dev/null +++ b/manual/manual/cmds/native.etex @@ -0,0 +1,600 @@ +\chapter{Native-code compilation (ocamlopt)} \label{c:nativecomp} +\pdfchapter{Native-code compilation (ocamlopt)} +%HEVEA\cutname{native.html} + +This chapter describes the OCaml high-performance +native-code compiler "ocamlopt", which compiles OCaml source files to +native code object files and link these object files to produce +standalone executables. + +The native-code compiler is only available on certain platforms. +It produces code that runs faster than the bytecode produced by +"ocamlc", at the cost of increased compilation time and executable code +size. Compatibility with the bytecode compiler is extremely high: the +same source code should run identically when compiled with "ocamlc" and +"ocamlopt". + +It is not possible to mix native-code object files produced by "ocamlopt" +with bytecode object files produced by "ocamlc": a program must be +compiled entirely with "ocamlopt" or entirely with "ocamlc". Native-code +object files produced by "ocamlopt" cannot be loaded in the toplevel +system "ocaml". + +\section{Overview of the compiler} + +The "ocamlopt" command has a command-line interface very close to that +of "ocamlc". It accepts the same types of arguments, and processes them +sequentially: + +\begin{itemize} +\item +Arguments ending in ".mli" are taken to be source files for +compilation unit interfaces. Interfaces specify the names exported by +compilation units: they declare value names with their types, define +public data types, declare abstract data types, and so on. From the +file \var{x}".mli", the "ocamlopt" compiler produces a compiled interface +in the file \var{x}".cmi". The interface produced is identical to that +produced by the bytecode compiler "ocamlc". + +\item +Arguments ending in ".ml" are taken to be source files for compilation +unit implementations. Implementations provide definitions for the +names exported by the unit, and also contain expressions to be +evaluated for their side-effects. From the file \var{x}".ml", the "ocamlopt" +compiler produces two files: \var{x}".o", containing native object code, +and \var{x}".cmx", containing extra information for linking and +optimization of the clients of the unit. The compiled implementation +should always be referred to under the name \var{x}".cmx" (when given +a ".o" or ".obj" file, "ocamlopt" assumes that it contains code compiled from C, +not from OCaml). + +The implementation is checked against the interface file \var{x}".mli" +(if it exists) as described in the manual for "ocamlc" +(chapter~\ref{c:camlc}). + +\item +Arguments ending in ".cmx" are taken to be compiled object code. These +files are linked together, along with the object files obtained +by compiling ".ml" arguments (if any), and the OCaml standard +library, to produce a native-code executable program. The order in +which ".cmx" and ".ml" arguments are presented on the command line is +relevant: compilation units are initialized in that order at +run-time, and it is a link-time error to use a component of a unit +before having initialized it. Hence, a given \var{x}".cmx" file must come +before all ".cmx" files that refer to the unit \var{x}. + +\item +Arguments ending in ".cmxa" are taken to be libraries of object code. +Such a library packs in two files (\var{lib}".cmxa" and \var{lib}".a"/".lib") +a set of object files (".cmx" and ".o"/".obj" files). Libraries are build with +"ocamlopt -a" (see the description of the "-a" option below). The object +files contained in the library are linked as regular ".cmx" files (see +above), in the order specified when the library was built. The only +difference is that if an object file contained in a library is not +referenced anywhere in the program, then it is not linked in. + +\item +Arguments ending in ".c" are passed to the C compiler, which generates +a ".o"/".obj" object file. This object file is linked with the program. + +\item +Arguments ending in ".o", ".a" or ".so" (".obj", ".lib" and ".dll" +under Windows) are assumed to be C object files and +libraries. They are linked with the program. + +\end{itemize} + +The output of the linking phase is a regular Unix or Windows +executable file. It does not need "ocamlrun" to run. + +\section{Options} + +The following command-line options are recognized by "ocamlopt". +The options "-pack", "-a", "-shared", "-c" and "-output-obj" are mutually +exclusive. + +\begin{options} + +\item["-a"] +Build a library (".cmxa" and ".a"/".lib" files) with the object files +(".cmx" and ".o"/".obj" files) given on the command line, instead of +linking them into an executable file. The name of the library must be +set with the "-o" option. + +If "-cclib" or "-ccopt" options are passed on the command +line, these options are stored in the resulting ".cmxa" library. Then, +linking with this library automatically adds back the +"-cclib" and "-ccopt" options as if they had been provided on the +command line, unless the "-noautolink" option is given. + +\item["-absname"] +Force error messages to show absolute paths for file names. + +\item["-annot"] +Dump detailed information about the compilation (types, bindings, +tail-calls, etc). The information for file \var{src}".ml" +is put into file \var{src}".annot". In case of a type error, dump +all the information inferred by the type-checker before the error. +The \var{src}".annot" file can be used with the emacs commands given in +"emacs/caml-types.el" to display types and other annotations +interactively. + +\item["-bin-annot"] +Dump detailed information about the compilation (types, bindings, +tail-calls, etc) in binary format. The information for file \var{src}".ml" +is put into file \var{src}".cmt". In case of a type error, dump +all the information inferred by the type-checker before the error. +The "*.cmt" files produced by "-bin-annot" contain more information +and are much more compact than the files produced by "-annot". + +\item["-c"] +Compile only. Suppress the linking phase of the +compilation. Source code files are turned into compiled files, but no +executable file is produced. This option is useful to +compile modules separately. + +\item["-cc" \var{ccomp}] +Use \var{ccomp} as the C linker called to build the final executable +and as the C compiler for compiling ".c" source files. + +\item["-cclib" "-l"\var{libname}] +Pass the "-l"\var{libname} option to the linker. This causes the given +C library to be linked with the program. + +\item["-ccopt" \var{option}] +Pass the given option to the C compiler and linker. For instance, +"-ccopt -L"\var{dir} causes the C linker to search for C libraries in +directory \var{dir}. + +\item["-compact"] +Optimize the produced code for space rather than for time. This +results in slightly smaller but slightly slower programs. The default is to +optimize for speed. + +\item["-config"] +Print the version number of "ocamlopt" and a detailed summary of its +configuration, then exit. + +\item["-for-pack" \var{module-path}] +Generate an object file (".cmx" and ".o"/".obj" files) that can later be +included +as a sub-module (with the given access path) of a compilation unit +constructed with "-pack". For instance, "ocamlopt -for-pack P -c A.ml" +will generate "a.cmx" and "a.o" files that can later be used with +"ocamlopt -pack -o P.cmx a.cmx". + +\item["-g"] +Add debugging information while compiling and linking. This option is +required in order to produce stack backtraces when +the program terminates on an uncaught exception (see +section~\ref{ocamlrun-options}). + +\item["-i"] +Cause the compiler to print all defined names (with their inferred +types or their definitions) when compiling an implementation (".ml" +file). No compiled files (".cmo" and ".cmi" files) are produced. +This can be useful to check the types inferred by the +compiler. Also, since the output follows the syntax of interfaces, it +can help in writing an explicit interface (".mli" file) for a file: +just redirect the standard output of the compiler to a ".mli" file, +and edit that file to remove all declarations of unexported names. + +\item["-I" \var{directory}] +Add the given directory to the list of directories searched for +compiled interface files (".cmi"), compiled object code files +(".cmx"), and libraries (".cmxa"). By default, the current directory +is searched first, then the standard library directory. Directories +added with "-I" are searched after the current directory, in the order +in which they were given on the command line, but before the standard +library directory. See also option "-nostdlib". + +If the given directory starts with "+", it is taken relative to the +standard library directory. For instance, "-I +labltk" adds the +subdirectory "labltk" of the standard library to the search path. + +\item["-impl" \var{filename}] +Compile the file \var{filename} as an implementation file, even if its +extension is not ".ml". + +\item["-inline" \var{n}] +Set aggressiveness of inlining to \var{n}, where \var{n} is a positive +integer. Specifying "-inline 0" prevents all functions from being +inlined, except those whose body is smaller than the call site. Thus, +inlining causes no expansion in code size. The default aggressiveness, +"-inline 1", allows slightly larger functions to be inlined, resulting +in a slight expansion in code size. Higher values for the "-inline" +option cause larger and larger functions to become candidate for +inlining, but can result in a serious increase in code size. + +\item["-intf" \var{filename}] +Compile the file \var{filename} as an interface file, even if its +extension is not ".mli". + +\item["-intf-suffix" \var{string}] +Recognize file names ending with \var{string} as interface files +(instead of the default ".mli"). + +\item["-labels"] +Labels are not ignored in types, labels may be used in applications, +and labelled parameters can be given in any order. This is the default. + +\item["-linkall"] +Force all modules contained in libraries to be linked in. If this +flag is not given, unreferenced modules are not linked in. When +building a library ("-a" flag), setting the "-linkall" flag forces all +subsequent links of programs involving that library to link all the +modules contained in the library. + +\item["-no-app-funct"] +Deactivates the applicative behaviour of functors. With this option, +each functor application generates new types in its result and +applying the same functor twice to the same argument yields two +incompatible structures. + +\item["-noassert"] +Do not compile assertion checks. Note that the special form +"assert false" is always compiled because it is typed specially. +This flag has no effect when linking already-compiled files. + +\item["-noautolink"] +When linking ".cmxa" libraries, ignore "-cclib" and "-ccopt" +options potentially contained in the libraries (if these options were +given when building the libraries). This can be useful if a library +contains incorrect specifications of C libraries or C options; in this +case, during linking, set "-noautolink" and pass the correct C +libraries and options on the command line. + +\item["-nodynlink"] +Allow the compiler to use some optimizations that are valid only for code +that is never dynlinked. + +\item["-nolabels"] +Ignore non-optional labels in types. Labels cannot be used in +applications, and parameter order becomes strict. + +\item["-nostdlib"] +Do not automatically add the standard library directory the list of +directories searched for compiled interface files (".cmi"), compiled +object code files (".cmx"), and libraries (".cmxa"). See also option +"-I". + +\item["-o" \var{exec-file}] +Specify the name of the output file produced by the linker. The +default output name is "a.out" under Unix and "camlprog.exe" under +Windows. If the "-a" option is given, specify the name of the library +produced. If the "-pack" option is given, specify the name of the +packed object file produced. If the "-output-obj" option is given, +specify the name of the output file produced. If the "-shared" option +is given, specify the name of plugin file produced. + +\item["-output-obj"] +Cause the linker to produce a C object file instead of an executable +file. This is useful to wrap OCaml code as a C library, +callable from any C program. See chapter~\ref{c:intf-c}, +section~\ref{s:embedded-code}. The name of the output object file +must be set with the "-o" option. +This option can also be used to produce a compiled shared/dynamic +library (".so" extension, ".dll" under Windows). + +\item["-p"] +Generate extra code to write profile information when the program is +executed. The profile information can then be examined with the +analysis program "gprof". (See chapter~\ref{c:profiler} for more +information on profiling.) The "-p" option must be given both at +compile-time and at link-time. Linking object files not compiled with +"-p" is possible, but results in less precise profiling. + +\begin{unix} See the Unix manual page for "gprof(1)" for more +information about the profiles. + +Full support for "gprof" is only available for certain platforms +(currently: Intel x86 32 and 64 bits under Linux, BSD and MacOS X). +On other platforms, the "-p" option will result in a less precise +profile (no call graph information, only a time profile). +\end{unix} + +\begin{windows} +The "-p" option does not work under Windows. +\end{windows} + +\item["-pack"] +Build an object file (".cmx" and ".o"/".obj" files) and its associated compiled +interface (".cmi") that combines the ".cmx" object +files given on the command line, making them appear as sub-modules of +the output ".cmx" file. The name of the output ".cmx" file must be +given with the "-o" option. For instance, +\begin{verbatim} + ocamlopt -pack -o P.cmx A.cmx B.cmx C.cmx +\end{verbatim} +generates compiled files "P.cmx", "P.o" and "P.cmi" describing a +compilation unit having three sub-modules "A", "B" and "C", +corresponding to the contents of the object files "A.cmx", "B.cmx" and +"C.cmx". These contents can be referenced as "P.A", "P.B" and "P.C" +in the remainder of the program. + +The ".cmx" object files being combined must have been compiled with +the appropriate "-for-pack" option. In the example above, +"A.cmx", "B.cmx" and "C.cmx" must have been compiled with +"ocamlopt -for-pack P". + +Multiple levels of packing can be achieved by combining "-pack" with +"-for-pack". Consider the following example: +\begin{verbatim} + ocamlopt -for-pack P.Q -c A.ml + ocamlopt -pack -o Q.cmx -for-pack P A.cmx + ocamlopt -for-pack P -c B.ml + ocamlopt -pack -o P.cmx Q.cmx B.cmx +\end{verbatim} +The resulting "P.cmx" object file has sub-modules "P.Q", "P.Q.A" +and "P.B". + +\item["-pp" \var{command}] +Cause the compiler to call the given \var{command} as a preprocessor +for each source file. The output of \var{command} is redirected to +an intermediate file, which is compiled. If there are no compilation +errors, the intermediate file is deleted afterwards. + +\item["-ppx" \var{command}] +After parsing, pipe the abstract syntax tree through the preprocessor +\var{command}. The module "Ast_mapper", described in +chapter~\ref{Ast-underscoremapper}, implements the external interface +of a preprocessor. + +\item["-principal"] +Check information path during type-checking, to make sure that all +types are derived in a principal way. All programs accepted in +"-principal" mode are also accepted in default mode with equivalent +types, but different binary signatures. + +\item["-rectypes"] +Allow arbitrary recursive types during type-checking. By default, +only recursive types where the recursion goes through an object type +are supported. Note that once you have created an interface using this +flag, you must use it again for all dependencies. + +\item["-runtime-variant" \var{suffix}] +Add the \var{suffix} string to the name of the runtime library used by +the program. Currently, only one such suffix is supported: "d", and +only if the OCaml compiler was configured with option +"-with-debug-runtime". This suffix gives the debug version of the +runtime, which is useful for debugging pointer problems in low-level +code such as C stubs. + +\item["-S"] +Keep the assembly code produced during the compilation. The assembly +code for the source file \var{x}".ml" is saved in the file \var{x}".s". + +\item["-shared"] +Build a plugin (usually ".cmxs") that can be dynamically loaded with +the "Dynlink" module. The name of the plugin must be +set with the "-o" option. A plugin can include a number of OCaml +modules and libraries, and extra native objects (".o", ".obj", ".a", +".lib" files). Building native plugins is only supported for some +operating system. Under some systems (currently, +only Linux AMD 64), all the OCaml code linked in a plugin must have +been compiled without the "-nodynlink" flag. Some constraints might also +apply to the way the extra native objects have been compiled (under +Linux AMD 64, they must contain only position-independent code). + +\item["-safe-string"] +Enforce the separation between types "string" and "bytes", +thereby making strings read-only. This will become the default in +a future version of OCaml. + +\item["-short-paths"] +When a type is visible under several module-paths, use the shortest +one when printing the type's name in inferred interfaces and error and +warning messages. + +\item["-strict-sequence"] +Force the left-hand part of each sequence to have type unit. + +\item["-strict-formats"] +Reject invalid formats that were accepted in legacy format +implementations. You should use this flag to detect and fix such +invalid formats, as they will be rejected by future OCaml versions. + +\item["-thread"] +Compile or link multithreaded programs, in combination with the +system "threads" library described in chapter~\ref{c:threads}. + +\item["-unsafe"] +Turn bound checking off for array and string accesses (the "v.(i)" and +"s.[i]" constructs). Programs compiled with "-unsafe" are therefore +faster, but unsafe: anything can happen if the program accesses an +array or string outside of its bounds. Additionally, turn off the +check for zero divisor in integer division and modulus operations. +With "-unsafe", an integer division (or modulus) by zero can halt the +program or continue with an unspecified result instead of raising a +"Division_by_zero" exception. + +\item["-unsafe-string"] +Identify the types "string" and "bytes", +thereby making strings writable. For reasons of backward compatibility, +this is the default setting for the moment, but this will change in a future +version of OCaml. + +\item["-v"] +Print the version number of the compiler and the location of the +standard library directory, then exit. + +\item["-verbose"] +Print all external commands before they are executed, in particular +invocations of the assembler, C compiler, and linker. + +\item["-version" or "-vnum"] +Print the version number of the compiler in short form (e.g. "3.11.0"), +then exit. + +\item["-w" \var{warning-list}] +Enable, disable, or mark as fatal the warnings specified by the argument +\var{warning-list}. +Each warning can be {\em enabled} or {\em disabled}, and each warning +can be {\em fatal} or {\em non-fatal}. +If a warning is disabled, it isn't displayed and doesn't affect +compilation in any way (even if it is fatal). If a warning is +enabled, it is displayed normally by the compiler whenever the source +code triggers it. If it is enabled and fatal, the compiler will also +stop with an error after displaying it. + +The \var{warning-list} argument is a sequence of warning specifiers, +with no separators between them. A warning specifier is one of the +following: + +\begin{options} +\item["+"\var{num}] Enable warning number \var{num}. +\item["-"\var{num}] Disable warning number \var{num}. +\item["@"\var{num}] Enable and mark as fatal warning number \var{num}. +\item["+"\var{num1}..\var{num2}] Enable warnings in the given range. +\item["-"\var{num1}..\var{num2}] Disable warnings in the given range. +\item["@"\var{num1}..\var{num2}] Enable and mark as fatal warnings in +the given range. +\item["+"\var{letter}] Enable the set of warnings corresponding to +\var{letter}. The letter may be uppercase or lowercase. +\item["-"\var{letter}] Disable the set of warnings corresponding to +\var{letter}. The letter may be uppercase or lowercase. +\item["@"\var{letter}] Enable and mark as fatal the set of warnings +corresponding to \var{letter}. The letter may be uppercase or +lowercase. +\item[\var{uppercase-letter}] Enable the set of warnings corresponding +to \var{uppercase-letter}. +\item[\var{lowercase-letter}] Disable the set of warnings corresponding +to \var{lowercase-letter}. +\end{options} + +Warning numbers and letters which are out of the range of warnings +that are currently defined are ignored. The warning are as follows. +\begin{options} +\input{warnings-help.tex} +\end{options} + +The default setting is "-w +a-4-6-7-9-27-29-32..39-41..42-44-45". +It is displayed by "ocamlopt -help". +Note that warnings 5 and 10 are not always triggered, depending on +the internals of the type checker. + +\item["-warn-error" \var{warning-list}] +Mark as fatal the warnings specified in the argument \var{warning-list}. +The compiler will stop with an error when one of these warnings is +emitted. The \var{warning-list} has the same meaning as for +the "-w" option: a "+" sign (or an uppercase letter) marks the +corresponding warnings as fatal, a "-" +sign (or a lowercase letter) turns them back into non-fatal warnings, +and a "@" sign both enables and marks as fatal the corresponding +warnings. + +Note: it is not recommended to use warning sets (i.e. letters) as +arguments to "-warn-error" +in production code, because this can break your build when future versions +of OCaml add some new warnings. + +The default setting is "-warn-error -a" (all warnings are non-fatal). + +\item["-warn-help"] +Show the description of all available warning numbers. + +\item["-where"] +Print the location of the standard library, then exit. + +\item["-" \var{file}] +Process \var{file} as a file name, even if it starts with a dash (-) +character. + +\item["-help" or "--help"] +Display a short usage summary and exit. +% +\end{options} + +\paragraph{Options for the IA32 architecture} +The IA32 code generator (Intel Pentium, AMD Athlon) supports the +following additional option: + +\begin{options} +\item["-ffast-math"] Use the IA32 instructions to compute +trigonometric and exponential functions, instead of calling the +corresponding library routines. The functions affected are: +"atan", "atan2", "cos", "log", "log10", "sin", "sqrt" and "tan". +The resulting code runs faster, but the range of supported arguments +and the precision of the result can be reduced. In particular, +trigonometric operations "cos", "sin", "tan" have their range reduced to +$[-2^{64}, 2^{64}]$. +\end{options} + +\paragraph{Options for the AMD64 architecture} +The AMD64 code generator (64-bit versions of Intel Pentium and AMD +Athlon) supports the following additional options: + +\begin{options} +\item["-fPIC"] Generate position-independent machine code. This is +the default. +\item["-fno-PIC"] Generate position-dependent machine code. +\end{options} + +\paragraph{Options for the Sparc architecture} +The Sparc code generator supports the following additional options: +\begin{options} +\item["-march=v8"] Generate SPARC version 8 code. +\item["-march=v9"] Generate SPARC version 9 code. +\end{options} +The default is to generate code for SPARC version 7, which runs on all +SPARC processors. + +\section{Common errors} + +The error messages are almost identical to those of "ocamlc". +See section~\ref{s:comp-errors}. + +\section{Running executables produced by ocamlopt} + +Executables generated by "ocamlopt" are native, stand-alone executable +files that can be invoked directly. They do +not depend on the "ocamlrun" bytecode runtime system nor on +dynamically-loaded C/OCaml stub libraries. + +During execution of an "ocamlopt"-generated executable, +the following environment variables are also consulted: +\begin{options} +\item["OCAMLRUNPARAM"] Same usage as in "ocamlrun" + (see section~\ref{ocamlrun-options}), except that option "l" + is ignored (the operating system's stack size limit + is used instead). +\item["CAMLRUNPARAM"] If "OCAMLRUNPARAM" is not found in the + environment, then "CAMLRUNPARAM" will be used instead. If + "CAMLRUNPARAM" is not found, then the default values will be used. +\end{options} + +\section{Compatibility with the bytecode compiler} +\label{s:compat-native-bytecode} + +This section lists the known incompatibilities between the bytecode +compiler and the native-code compiler. Except on those points, the two +compilers should generate code that behave identically. + +\begin{itemize} + +\item Signals are detected only when the program performs an +allocation in the heap. That is, if a signal is delivered while in a +piece of code that does not allocate, its handler will not be called +until the next heap allocation. + +\item Stack overflow, typically caused by excessively deep recursion, +is handled in one of the following ways, depending on the +platform used: +\begin{itemize} +\item By raising a "Stack_overflow" exception, like the bytecode + compiler does. (IA32/Linux, AMD64/Linux, PowerPC/MacOSX, MS Windows + 32-bit ports). +\item By aborting the program on a ``segmentation fault'' signal. +(All other Unix systems.) +\item By terminating the program silently. +(MS Windows 64 bits). +\end{itemize} + +\item On IA32 processors only (Intel and AMD x86 processors in 32-bit +mode), some intermediate results in floating-point computations are +kept in extended precision rather than being rounded to double +precision like the bytecode compiler always does. Floating-point +results can therefore differ slightly between bytecode and native code. + +\end{itemize} + diff --git a/manual/manual/cmds/ocamlbuild.etex b/manual/manual/cmds/ocamlbuild.etex new file mode 100644 index 0000000000..cf09fb3495 --- /dev/null +++ b/manual/manual/cmds/ocamlbuild.etex @@ -0,0 +1,1059 @@ +% -*- LaTeX -*- +%(***********************************************************************) +%(* ocamlbuild *) +%(* *) +%(* Nicolas Pouillard, Berke Durak, projet Gallium, INRIA Rocquencourt *) +%(* *) +%(* Copyright 2007 Institut National de Recherche en Informatique et *) +%(* en Automatique. All rights reserved. This file is distributed *) +%(* under the terms of the Q Public License version 1.0. *) +%(* *) +%(***********************************************************************) + +%(*** preamble +%Luc: definition moved in preamble, otherwise hevea does +%not see them while formating table of contents +%\newcommand{\ocb}{\texttt{ocamlbuild}\xspace} +%\newcommand{\tags}{\texttt{\_tags}\xspace} +%***) +%(*** title +\chapter{The ocamlbuild compilation manager} \label{c:ocamlbuild} +\pdfchapter{The ocamlbuild compilation manager} +%HEVEA\cutname{ocamlbuild.html} + +{\it (Chapter written by Berke Durak and Nicolas Pouillard)} + +\bigskip + +\ocb is a tool automating the compilation of most OCaml projects with minimal +user input. Its use is not restricted to projects having a simple structure -- +the extra effort needed to make it work with the more complex projects is in +reasonable proportion with their added complexity. In practice, one will use a +set of small text files, and, if needed, an OCaml compilation module that can +fine-tune the behaviour and define custom rules. + +%***) +%(*** Features of ocamlbuild +\section{Features of \ocb} +{\em This section is intended to read like a sales brochure or a datasheet.} + +\begin{itemize} +\item Built-in compilation rules for OCaml projects handle all the nasty cases: +native and byte-code, missing \texttt{.mli} files, preprocessor rules, +libraries, package (-pack) debugging and profiling flags, C stubs. +\item Plugin mechanism for writing compilation rules and actions in a real programming language, +OCaml itself. +\item Automatic inference of dependencies. +\item Correct handling of dynamically discovered dependencies. +\item Object files and other temporary files are created in a specific directory, leaving your main directory uncluttered. +\item Sanity checks ensure that object files are where they are supposed to be: in the build directory. +\item Regular projects are built using a single command with no extra files. +\item Parallel compilation to speed up things on multi-core systems. +\item Sophisticated display mode to keep your screen free of boring and repetitive compilation message +while giving you important progress information in a glimpse, and correctly multiplexing the error messages. +\item Tags and flags provide a concise and convenient mechanism for automatic selection of compilation, preprocessing and +other options. +\item Extended shell-like glob patterns, that can be combined using boolean operators, +allow you to concisely define the tags that apply to a given file. +\item Mechanisms for defining the mutual visibility of subdirectories. +\item Cache mechanism avoiding unnecessary compilations where reasonably computable. +\end{itemize} +%***) +%(*** Limitations +\section{Limitations} +{\em Not perfect nor complete yet, but already pretty damn useful.} + +We were not expecting to write the ultimate compilation tool in a few man-months, however we believe we have +a tool that solves many compilation problems, especially our own, in a satisfactory way. Hence there are a +lot of missing features, incomplete options and hideous bugs lurking in \ocb, and we hope that the OCaml community +will find our first try at \ocb useful and hopefully help it grow into a tool that satisfies most needs of most users +by providing feedback, bug reports and patches. + +The plugin API maybe somewhat lacking in maturity, as it has only been tested +by a few people. We believe a good API can only evolve under pressure from +many peers and the courage to rewrite things cleanly when time is ripe by the +developers. Most of the important functions a user will need are encapsulated +in the plugin API, which is the \texttt{Ocamlbuild\_plugin} module pack. We +intend to keep that API backwards compatible. It may happen that intricate +projects need features not available in that module -- you may then use +functions or values directly from the core \ocb modules. We ask you to report +such usage to the authors so that we may make the necessary changes to the API; +you may also want to isolate calls to the non-API parts of the \ocb library +from the rest of your plugin to be able to keep the later when incompatible +changes arise. + +The way that \ocb handles the command-line options, the \tags file, +the target names, names of the tags, and so on, are not expected to change in +incompatible ways. We intend to keep a project that compiles without a plugin +compilable without modifications in the future. +%***) +%(*** Using ocamlbuild +\section{Using \ocb} +{\em Learn how to use \ocb with short, specific, straight-to-the-point examples.} + +The amount of time and effort spent on the compilation process of a project +should be proportionate to that spent on the project itself. It should be easy +to set up a small project, maybe a little harder for a medium-sized project, +and it may take some more time, but not too much, for a big project. Ideally +setting up a big project would be as easy as setting up a small project. However, +as projects grow, modularization techniques start to be used, and the probability +of using meta programming or multiple programming languages increases, thus making +the compilation process more delicate. + +\ocb is intended to be very easy to use for projects, large or small, with a simple +compilation process: typing +\texttt{ocamlbuild foo.native} should be enough to compile the native version +of a program whose top module is \texttt{foo.ml} and whose dependencies are in +the same directory. As your project gets more complex, you will gradually +start to use command-line options to specify libraries to link with, then +configuration files, ultimately culminating in a custom OCaml plugin for +complex projects with arbitrary dependencies and actions. + +%(*** Hygiene *) +\subsection{Hygiene \& where is my code ?} +Your code is in the \texttt{\_build} directory, but \ocb automatically creates +a symbolic link to the executables it produces in the current directory. +\ocb copies the source files and compiles them in a separate directory +which is \texttt{\_build} by default. + +For \ocb, any file that is not in the build directory is a source file. +It is not unreasonable to think that some users may have bought binary object files +they keep in their project directory. Usually binary files cluttering the project +directory are due to previous builds using other systems. \ocb has so-called +``hygiene'' rules that state that object files (\texttt{.cmo}, \texttt{.cmi}, +or \texttt{.o} files, for instance) must not appear outside of the build +directory. These rules are enforced at startup; any violations will be reported +and \ocb will exit. You must then remove these files by hand or run, with caution, +the script \texttt{sanitize.sh}, which is generated in your source directory. +This script will contain commands to remove them for you. + +To disable these checks, you can use the \texttt{-no-hygiene} flag. If you have +files that must elude the hygiene squad, just tag them with \texttt{precious} +or \texttt{not\_hygienic}. +%***) +%(*** Hello, world ! +\subsection{Hello, world !} +Assuming we are in a directory named \texttt{example1} containing one file \texttt{hello.ml} +whose contents are +\begin{verbatim} +let _ = + Printf.printf "Hello, %s ! My name is %s\n" + (if Array.length Sys.argv > 1 then Sys.argv.(1) else "stranger") + Sys.argv.(0) +;; +\end{verbatim} +we can compile and link it into a native executable by invoking \texttt{ocamlbuild hello.native}. +Here, \texttt{hello} is the basename of the top-level module and \texttt{native} is an extension used +by \ocb to denote native code executables. +\begin{verbatim} +% ls +hello.ml +% ocamlbuild hello.native +Finished, 4 targets (0 cached) in 00:00:00. +% ls -l +total 12 +drwxrwx--- 2 linus gallium 4096 2007-01-17 16:24 _build/ +-rw-rw---- 1 linus gallium 43 2007-01-17 16:23 hello.ml +lrwxrwxrwx 1 linus gallium 19 2007-01-17 16:24 hello.native -> _build/hello.native* +\end{verbatim} +What's this funny \texttt{\_build} directory ? Well that's where \ocb does its dirty work +of compiling. You usually won't have to look very often into this directory. Source files are copied +into \texttt{\_build} and this is where the compilers will be run. Various cache files are also stored +there. Its contents may look like this: +\begin{verbatim} +% ls -l _build +total 208 +-rw-rw---- 1 linus gallium 337 2007-01-17 16:24 _digests +-rw-rw---- 1 linus gallium 191 2007-01-17 16:24 hello.cmi +-rw-rw---- 1 linus gallium 262 2007-01-17 16:24 hello.cmo +-rw-rw---- 1 linus gallium 225 2007-01-17 16:24 hello.cmx +-rw-rw---- 1 linus gallium 43 2007-01-17 16:23 hello.ml +-rw-rw---- 1 linus gallium 17 2007-01-17 16:24 hello.ml.depends +-rwxrwx--- 1 linus gallium 173528 2007-01-17 16:24 hello.native* +-rw-rw---- 1 linus gallium 936 2007-01-17 16:24 hello.o +-rw-rw---- 1 linus gallium 22 2007-01-17 16:24 ocamlc.where +\end{verbatim} +%***) +%(*** Executing my code +\subsection{Executing my code} +You can execute your code the old-fashioned way (\texttt{./hello.native}). +You may also type +\begin{verbatim} +ocamlbuild hello.native -- Caesar +\end{verbatim} +and it will compile and then run \texttt{hello.native} with the arguments following \texttt{-{}-}, +which should display: +\begin{verbatim} +% ocamlbuild hello.native -- Caesar +Finished, 4 targets (0 cached) in 00:00:00. +Hello, Caesar ! My name is _build/hello.native +\end{verbatim} +%***) +%(*** The log file, verbosity and debugging +\subsection{The log file, verbosity and debugging} +By default, if you run \ocb on a terminal, it will use some ANSI escape sequences +to display a nice, one-line progress indicator. To see what commands \ocb has actually run, +you can check the contents of the \texttt{\_build/\_log} file. To change the name of the +log file or to disable logging, use the \texttt{-log <file>} or \texttt{-no-log} options. +Note that the log file is truncated at each execution of \ocb. + +The log file contains all the external commands that \ocb ran or intended to +run along with the target name and the computed tags. With the +\texttt{-verbose <level>} option, \ocb will also write more or less useful +debugging information; a verbosity level of $1$ (which can also be specified +using the \texttt{-verbose} switch) prints generally useful information; higher +levels produce much more output. +%***) +%(*** Cleaning +\subsection{Cleaning} +\ocb may leave a \texttt{\_build} directory and symbolic links to executables in +that directory (unless when using -no-links). All of these can be removed safely +by hand, or by invoking \ocb with the \texttt{-clean} flag. +%***) +%(*** Where and how to run \ocb +\subsection{Where and how to run \ocb ?} +An important point is that \ocb must be invoked from the root of the project, +even if this project has multiple, nested subdirectories. This is because \ocb +likes to store the object files in a single \texttt{\_build} directory. You +can change the name of that directory with the \texttt{-build-dir} option. + +\ocb can be either invoked manually from the UNIX or Windows shell, or +automatically from a build script or a Makefile. Unless run with the +\texttt{-no-hygiene} option, there is the possibility that \ocb will prompt the +user for a response. By default, on UNIX systems, if \ocb senses that the +standard output is a terminal, it will use a nice progress indicator using ANSI +codes, instrumenting the output of the processes it spawns to have a consistent +display. Under non-UNIX systems, or if the standard output is not a terminal, +it will run in classic mode where it will echo the executed commands on its +standard output. This selection can be overridden with the \texttt{-classic-display} option. +%***) +%(*** Dependencies +\subsection{Dependencies} +{\em Dependencies are automatically discovered.} + +Most of the value of \ocb lies in the fact that it often needs no extra +information to compile a project besides the name of the top-level module. +\ocb calls \texttt{ocamldep} to automatically find the dependencies of any +modules it wants to compile. These dependencies are dynamically incorporated +in the dependency graph, something \texttt{make} cannot do. +For instance, let's add a module \texttt{Greet} that implements various ways of +greeting people. +\begin{verbatim} +% cat greet.ml +type how = Nicely | Badly;; + +let greet how who = + match how with Nicely -> Printf.printf "Hello, %s !\n" who + | Badly -> Printf.printf "Oh, here is that %s again.\n" who +;; +% cat hello.ml +open Greet + +let _ = + let name = + if Array.length Sys.argv > 1 then + Sys.argv.(1) + else + "stranger" + in + greet + (if name = "Caesar" then Nicely else Badly) + name; + Printf.printf "My name is %s\n" Sys.argv.(0) +;; +\end{verbatim} +Then the module \texttt{Hello} depends on the module \texttt{Greet} and \ocb can +figure this out for himself -- we still only have to invoke \texttt{\ocb +hello.native}. Needless to say, this works for any number of modules. +%***) +%(*** Native and byte code +\subsection{Native and byte-code} +If we want to compile byte-code instead of native, we just a target name of +\texttt{hello.byte} instead of \texttt{hello.native}, i.e., we type +\texttt{\ocb hello.byte}. +%***) +%(*** Compile flags +\subsection{Compile flags} +To pass a flag to the compiler, such as the \texttt{-rectypes} option, +use the \texttt{-cflag} option as in: +\begin{verbatim} +ocamlbuild -cflag -rectypes hello.native +\end{verbatim} +You can put multiple \texttt{-cflag} options, they will be passed to the compiler +in the same order. You can also give them in a comma-separated list with the +\texttt{-cflags} option (notice the plural): +\begin{verbatim} +ocamlbuild -cflags -I,+lablgtk,-rectypes hello.native +\end{verbatim} +These flags apply when compiling, that is, when producing \texttt{.cmi}, +\texttt{.cmo},\texttt{.cmx} and \texttt{.o} files from \texttt{.ml} or +\texttt{.mli} files. +%***) +%(*** Link flags +\subsection{Link flags} +Link flags apply when the various object files are collected and linked into +one executable. These will typically be include directories for libraries. +They are given using the \texttt{-lflag} and \texttt{-lflags} options, which +work in the same way as the \texttt{-cflag} and \texttt{-cflags} options. +%***) +%(*** Linking with external libraries +\subsection{Linking with external libraries} +In our third example, we use one Unix system call and functions from the \texttt{num} +library: +\begin{verbatim} +% cat epoch.ml +let _ = + let s = Num.num_of_string (Printf.sprintf "%.0f" (Unix.gettimeofday ())) in + let ps = Num.mult_num (Num.num_of_string "1000000000000") s in + Printf.printf "%s picoseconds have passed since January 1st, 1970.\n" + (Num.string_of_num ps) +;; +\end{verbatim} +This requires linking with the \texttt{unix} and \texttt{num} modules, which is accomplished +by using the \texttt{-lib unix} and \texttt{-lib num} flags, or, alternatively, \texttt{-libs unix,num}: +\begin{verbatim} +% ocamlbuild -libs nums,unix epoch.native -- +Finished, 4 targets (4 cached) in 00:00:00. +1169051647000000000000 picoseconds have passed since January 1st, 1970. +\end{verbatim} +You may need to add options such as \texttt{-cflags -I,/usr/local/lib/ocaml/} +and \texttt{-lflags -I,/usr/local/lib/ocaml/} if the libraries you wish to +link with are not in OCaml's default search path. +%***) +%(*** The _tags files +\subsection{The \tags files} +Finer control over the compiler flags applied to each source file, such as +preprocessing, debugging, profiling and linking options, can be gained using +\ocb's tagging mechanism. + +Every source file has a set of tags which tells \ocb what kind of file it is +and what to do with it. A tag is simply a string, usually lowercase, for +example \texttt{ocaml} or \texttt{native}. The set of tags attached to a file +is computed by applying the tagging rules to the filename. Tagging rules are +defined in \tags files in any parent directory of a file, up to the main +project directory. + +Each line in the \tags file is made of a glob pattern (see subsection +\ref{subsec:glob}) and a list of tags. More than one rule can apply to a file +and rules are applied in the order in which they appear in a file. +By preceding a tag with a minus sign, one may remove tags from one or more files. + +\subsubsection{Example: the built-in \tags file} +\begin{verbatim} + <**/*.ml> or <**/*.mli> or <**/*.mlpack> or <**/*.ml.depends>: ocaml + <**/*.byte>: ocaml, byte, program + <**/*.odoc>: ocaml, doc + <**/*.native>: ocaml, native, program + <**/*.cma>: ocaml, byte, library + <**/*.cmxa>: ocaml, native, library + <**/*.cmo>: ocaml, byte + <**/*.cmi>: ocaml, byte, native + <**/*.cmx>: ocaml, native +\end{verbatim} + +Two special tags made from the path name of the file relative to the toplevel +of the project are automatically defined for each file. For a file +\texttt{foo/bar.ml} those tags will be \texttt{file:foo/bar.ml}, and +\texttt{extension:ml}. + +If you do not have subdirectories, you can put \texttt{*.ml} instead of +\texttt{**/*.ml}. +%***) +%(*** Glob patterns and expressions +\subsection{Glob patterns and expressions} +\label{subsec:glob} +Glob patterns have a syntax similar to those used by UNIX shells to select path +names (like \texttt{foo\_*.ba?}). They are used in \ocb to define the files +and directories to which tags apply. Glob expressions are glob patterns +enclosed in brackets \texttt{<} and \texttt{>} combined using the standard +boolean operators \texttt{and}, \texttt{or}, \texttt{not}. This allows one to +describe sets of path names in more concise and more readable ways. + +Please note that file and directory names are supposed to be made of the +following characters: $\texttt{a}$, $\dots$, $\texttt{z}$, $\texttt{A}$, +$\dots$, $\texttt{Z}$, $\texttt{0}$, $\dots$, $\texttt{9}$, $\texttt{\_}$, +$\texttt{-}$ and $\texttt{.}$. This is called the pathname alphabet $P$. + +\begin{table} + \begin{center} + \small + \begin{tabular}{|p{3cm}|l|p{3cm}|p{3cm}|p{5cm}|} + \hline + {\em Formal syntax} & + {\em Example} & {\em Matches} & {\em Does not match} & + {\em Meaning (formal meaning)} \\ + \hline + \hline +%% + {$u$ \vspace*{0.5em} A string of pathname characters} & + \texttt{foo.ml} & + \texttt{foo.ml} & + \texttt{fo.ml}, \texttt{bar/foo.ml} & + The exact string $u$ + ($\{ u \}$, where $u \in P^*$) \\ + \hline +%% + {\texttt{*} \vspace*{0.5em} The wild-card star}& + \texttt{*}& + $\varepsilon$, \texttt{foo}, \texttt{bar} & + \texttt{foo/bar}, \texttt{/bar} & + Any string not containing a slash + ($P^*$) \\ + \hline +%% + {\texttt{?} \vspace*{0.5em} The joker}& + \texttt{?}& + \texttt{a}, \texttt{b}, \texttt{z} & + \texttt{/}, \texttt{bar} & + Any one-letter string, excluding the slash \\ + \hline +%% + {\texttt{**/} \vspace*{0.5em} The prefix inter-directory star}& + \texttt{**/foo.ml}& + \texttt{foo.ml}, \texttt{bar/foo.ml}, \texttt{bar/baz/foo.ml} & + \texttt{foo/bar}, \texttt{/bar} & + The empty string, or any string ending with a slash + ($\varepsilon \cup P^*\mathtt{/}$) \\ + \hline +%% + {\texttt{/**} \vspace*{0.5em} The suffix inter-directory star}& + \texttt{foo/**}& + \texttt{foo}, \texttt{foo/bar} & + \texttt{bar/foo} & + Any string starting with a slash, or the empty string + ($\varepsilon \cup \mathtt{/}P^*$) \\ + \hline +%% + {\texttt{/**/} \vspace*{0.5em} The infix inter-directory star}& + \texttt{bar/**/foo.ml}& + \texttt{bar/foo.ml}, \texttt{bar/baz/foo.ml} & + \texttt{foo.ml} & + Any string starting and ending with a slash + ($\varepsilon \cup \mathtt{/}P^*\mathtt{/}$) \\ + \hline +%% + {$\mathtt{[} r_1 r_2 \cdots r_k \mathtt{]}$ + where $r_i$ is either $c$ or $c_1-c_2$ $(1 \leq i \leq k)$ + \vspace*{0.5em} The positive character class}& + \texttt{[a-fA-F0-9\_.]}& + \texttt{3}, \texttt{F}, \texttt{.} & + \texttt{z}, \texttt{bar} & + Any one-letter string made of characters from one of the ranges + $r_i$ ($1 \leq i \leq n$). + ($\mathscr L(r_1) \cup \cdots \cup \mathscr L(r_n)$) \\ + \hline +%% + {\texttt{[\char`\^}$r_1 r_2 \cdots r_k \mathtt{]}$ + where $r_i$ is either $c$ or $c_1-c_2$ $(1 \leq i \leq k)$ + \vspace*{0.5em} The negative character class}& + \texttt{[\char`\^a-fA-F0-9\_.]}& + \texttt{z}, \texttt{bar} & + \texttt{3}, \texttt{F}, \texttt{.} & + Any one-letter string NOT made of characters from one of the ranges + $r_i$ ($1 \leq i \leq n$). + ($\Sigma^* \setminus \left(\mathscr L(r_1) \cup \cdots \cup \mathscr L(r_n)\right)$) \\ + \hline +%% + {$p_1 p_2$ \vspace*{0.5em} A concatenation of patterns}& + \texttt{foo*}& + \texttt{foo}, \texttt{foob}, \texttt{foobar} & + \texttt{fo}, \texttt{bar} & + Any string with a prefix matching $p_1$ and the corresponding suffix + matching $p_2$, + ($\{ uv \mid u \in \mathscr L(p_1), v \in \mathscr L(p_2) \}$) \\ + \hline +%% + {$\mathtt{\{} p_1 \mathtt{,} p_2 \mathtt{,} \cdots \mathtt{,} p_k \mathtt{\}}$ \vspace*{0.5em} A union of patterns}& + \texttt{toto.\{ml,mli\}}& + \texttt{toto.ml}, \texttt{toto.mli} & + \texttt{toto.} & + Any string matching one of the patterns $p_i$ for $1 \leq i \leq k$. + ($\mathscr L(p_1) \cup \cdots \cup \mathscr L(p_k)$) \\ + \hline +%% + \end{tabular} + \end{center} + \caption{ + Syntax and semantics of glob patterns. + } +\end{table} +\begin{table} + \begin{center} + \small + \begin{tabular}{|p{2cm}|l|p{7cm}|} + \hline + {\em Formal syntax} & + {\em Example} & + {\em Meaning (formal meaning)} \\ + \hline + \hline + {$\mathtt{<}p\mathtt{>}$} & + \texttt{<foo.ml>} & + Pathnames matching the pattern $p$ \\ + \hline + {$e_1 \; \mathtt{or} \; e_2$} & + \texttt{<*.ml> or <foo/bar.ml>} & + Pathnames matching at least one of the expressions $e_1$ and $e_2$ \\ + \hline + {$e_1 \; \mathtt{and} \; e_2$} & + \texttt{<*.ml> and <foo\_*>} & + Pathnames matching both expressions $e_1$ and $e_2$ \\ + \hline + {$\mathtt{not} \; e$} & + \texttt{not <*.mli>} & + Pathnames not matching the expression $e$ \\ + \hline + {$\mathtt{true}$} & + \texttt{true} & + All pathnames \\ + \hline + {$\mathtt{false}$} & + \texttt{false} & + No pathnames \\ + \hline + \end{tabular} + \end{center} + \caption{ + Syntax and semantics of glob expressions. + } +\end{table} +%***) +%(*** Subdirectories +\subsection{Subdirectories} +If the files of your project are held in one or more subdirectories, +\ocb must be made aware of that fact using the \texttt{-I} or \texttt{-Is} options +or by adding an \texttt{include} tag. For instance, assume your project is made +of three subdirectories, \texttt{foo}, \texttt{bar} and \texttt{baz} containing +various \texttt{.ml} files, the main file being \texttt{foo/main.ml}. Then you can +either type: +\begin{verbatim} +% ocamlbuild -Is foo,bar,baz foo/main.native +\end{verbatim} +or add the following line in the \tags file +\begin{verbatim} +<foo> or <bar> or <baz>: include +\end{verbatim} +and call +\begin{verbatim} +% ocamlbuild foo/main.native +\end{verbatim} + +There are then two cases. If no other modules named \texttt{Bar} or +\texttt{Baz} exist elsewhere in the project, then you are done. Just use +\texttt{Foo}, \texttt{Foo.Bar} and \texttt{Foo.Baz} in your code. +Otherwise, you will need to use the plugin mechanism and define the mutual +visibility of the subdirectories using the \texttt{Pathname.define\_context} +function. + +\subsubsection{Note on subdirectory traversal} +\ocb used to traverse by default any subdirectory not explicitly excluded. +This is no longer the case. Note that you can still have a fine grained +control using your \tags file and the \texttt{traverse} tag. + +There is no longer the \texttt{true: traverse} tag declaration by default. To +make \ocb recursive use one of these: +\begin{enumerate} +\item Give the \texttt{-r} flag to ocamlbuild. +\item Have a \tags or myocamlbuild.ml file in your top directory. +\end{enumerate} + +%***) +%(*** Grouping targets +\subsection{Grouping targets with \texttt{.itarget}} +You can create a file named \texttt{foo.itarget} containing +a list of targets, one per line, such as +\begin{verbatim} +main.native +main.byte +stuff.docdir/index.html +\end{verbatim} +Requesting the target \texttt{foo.otarget} will then build every target +listed in the file \texttt{foo.itarget}. Blank lines and lines starting +with a sharp (\texttt{\#}) are ignored. +%***) +%(*** Packing subdirectories into modules +\subsection{Packing subdirectories into modules} +OCaml's \texttt{-pack} option allows you to structure the contents of a +module in a subdirectory. For instance, assume you have a directory +\texttt{foo} containing two modules \texttt{bar.ml} and \texttt{baz.ml}. +You want from these to build a module \texttt{Foo} containing \texttt{Bar} +and \texttt{Baz} as submodules. In the case where no modules named +\texttt{Bar} or \texttt{Baz} exist outside of \texttt{Foo}, to do this you +must write a file \texttt{foo.mlpack}, preferably sitting in the same +directory as the directory \texttt{Foo} and containing the list of modules +(one per line) it must contain: +\begin{verbatim} +Bar +Baz +\end{verbatim} +Then when you will request for building \texttt{foo.cmo} the package will be +made from \texttt{bar.cmo} and \texttt{baz.cmo}. +%***) +%(*** Making an OCaml library +\subsection{Making an OCaml library} +In a similar way than for packaged modules you can make a library by putting +it's contents in a file (with the mllib extension). For instance, assume you +have a two modules \texttt{bar.ml} and \texttt{baz.ml}. You want from these to +build a library \texttt{foo.cmx?a} containing \texttt{Bar} and \texttt{Baz} +modules. To do this you must write a file \texttt{foo.mllib} containing the +list of modules (one per line) it must contain: +\begin{verbatim} +Bar +Baz +\end{verbatim} +Then when you will request for building \texttt{foo.cma} the library will be +made from \texttt{bar.cmo} and \texttt{baz.cmo}. +%***) +%(*** Making an OCaml toplevel +\subsection{Making an OCaml toplevel} +Making a toplevel is almost the same thing than making a packaged module or a +library. Just write a file with the \texttt{mltop} extension (like +\texttt{foo.mltop}) and request for building the toplevel using the +\texttt{top} extension (\texttt{foo.top} in this example). +%***) +%(*** Preprocessor options +\subsection{Preprocessor options and tags} +You can specify preprocessor options with \texttt{-pp} followed by the +preprocessor string, for instance \texttt{ocamlbuild -pp "camlp4o.opt -unsafe"} +would run your sources through CamlP4 with the \texttt{-unsafe} option. +Another way is to use the tags file. +\begin{center} + \begin{tabular}{|l|l|l|} + \hline + \textbf{Tag} & \textbf{Preprocessor command} & \textbf{Remark} \\ + \hline + \hline + \texttt{pp(cmd...)} & \texttt{cmd...} & Arbitrary + preprocessor command\footnote{The command must not contain newlines or parentheses.} \\ + \hline + \texttt{camlp4o} & \texttt{camlp4o} & Original OCaml syntax \\ + \hline + \texttt{camlp4r} & \texttt{camlp4r} & Revised OCaml syntax \\ + \hline + \texttt{camlp4of} & \texttt{camlp4of} & Original OCaml syntax with extensions \\ + \hline + \texttt{camlp4rf} & \texttt{camlp4rf} & Revised OCaml syntax with extensions \\ + \hline + \end{tabular} +\end{center} + +%%%%% \subsubsection{An example, dealing with some configuration variables} +%%%%% +%%%%% It's quite common to have in your sources some files that you want to access +%%%%% when your program is running. One often uses some variables that are setup by +%%%%% the end user. Now suppose that there is only two files that use these variables +%%%%% (mylib.ml and parseopt.ml). +%%%%% +%%%%% In the \tags file: +%%%%% \begin{verbatim} +%%%%% "mylib.ml" or "parseopt.ml": pp(sed -e "s,LIBDIR,/usr/local/lib/FOO,g") +%%%%% \end{verbatim} +%%%%% +%%%%% In fact that solution is not really acceptable, since the variable is hardcoded +%%%%% in the \tags file. Trying to workaround this issue by using some shell variable +%%%%% does not work either since the -pp argument will be escaped in simple quotes. +%%%%% Note also that using some script shell that will do that sed and use \verb'$LIBDIR' +%%%%% as a shell variable is not a good idea since \ocb don't know this dependency on that +%%%%% shell script. +%%%%% +%%%%% There is in fact at least two good solutions. The first is to tell that dependency +%%%%% using the \texttt{dep} function in your plugin. The second is simpler it just consist +%%%%% on generating some OCaml file at configure time. By naming this configuration file +%%%%% \texttt{myocamlbuild_config.ml} \ocb will make it also available to your plugin. +%%%%% +%%%%% In your \texttt{myocamlbuild_config.mli} interface: +%%%%% \begin{verbotim} +%%%%% val prefix : string +%%%%% val libdir : string +%%%%% \end{verbotim} +%%%%% +%%%%% And in your \texttt{configure} script +%%%%% \begin{verbatim} +%%%%% #!/bin/sh +%%%%% +%%%%% # Setting defaults values +%%%%% PREFIX=/usr/local +%%%%% LIBDIR=$PREFIX/lib/FOO +%%%%% CONF=myocamlbuild_config.ml +%%%%% +%%%%% # ... some shell to parse option and check configuration ... +%%%%% +%%%%% # Dumping the configuration as an OCaml file. +%%%%% rm -f $CONF +%%%%% echo "let prefix = \"$PREFIX\";;" >> $CONF +%%%%% echo "let libdir = \"$LIBDIR\";;" >> $CONF +%%%%% chmod -w $CONF +%%%%% \end{verbatim} + +%***) +%(*** Debugging and profiling +\subsection{Debugging byte code and profiling native code} +The preferred way of compiling code suitable for debugging with \texttt{ocamldebug} or +profiling native code with \texttt{ocamlprof} is to use the appropriate target +extensions, \texttt{.d.byte} for debugging or \texttt{.p.native}. + +Another way is to add use the \texttt{debug} or \texttt{profile} tags. +Note that these tags must be applied at the compilation and linking stages. +Hence you must either use \texttt{-tag debug} or \texttt{-tag profile} +on the command line, or add a +\begin{verbatim} +true: debug +\end{verbatim} +line to your \tags file. +Please note that the byte-code profiler works in a wholly different way +and is not supported by \ocb. +%***) +%(*** Generating documentation using \texttt{ocamldoc} +\subsection{Generating documentation using \texttt{ocamldoc}} +Write the names of the modules whose interfaces will be documented in a file +whose extension is \texttt{.odocl}, for example \texttt{foo.odocl}, then invoke +\ocb on the target \texttt{foo.docdir/index.html}. This will collect all the +documentation from the interfaces (which will be build, if necessary) using +\texttt{ocamldoc} and generate a set of HTML files under the directory +\texttt{foo.docdir/}, which is actually a link to \texttt{\_build/foo.docdir/}. +As for packing subdirectories into modules, the module names must be written +one per line, without extensions and correctly capitalized. Note that +generating documentation in formats other than HTML or from implementations is +not supported. +%***) +%(*** The display line +\subsection{The display line} +Provided \ocb runs in a terminal under a POSIX environment, it will +display a sophisticated progress-indicator line that graciously interacts +with the output of subcommands. This line looks like this: +\begin{verbatim} +00:00:02 210 (180 ) main.cmx ONbp--il / +\end{verbatim} +Here, 00:00:02 is the elapsed time in hour:minute:second format since \ocb has +been invoked; 210 is the number of external commands, typically calls to the +compiler or the like, that may or may not have been invoked; 180 is the number +of external commands that have not been invoked since their result is already +in the build directory; \texttt{main.cmx} is the name of the last target built; +\texttt{ONbp--il} is a short string that describes the tags that have been +encountered and the slash at the end is a frame from a rotating ticker. Hence, +the display line has the following structure: +\begin{verbatim} +HH:MM:SS JOBS (CACHED) PATHNAME TAGS TICKER +\end{verbatim} + +The tag string is made of 8 indicators which each monitor a tag. These tags +are \texttt{ocaml}, \texttt{native}, \texttt{byte}, \texttt{program}, +\texttt{pp}, \texttt{debug}, \texttt{interf} and \texttt{link}. Initially, +each indicator displays a dash \texttt{-}. If the current target has the +monitored tag, then the indicator displays the corresponding character +(see table \ref{tab:tag-chars}) in uppercase. Otherwise, it displays that +character in lowercase. This allows you to see the set of tags that have +been applied to files in your project during the current invocation of \ocb. + +Hence the tag string \texttt{ONbp--il} means that the current target +\texttt{main.cmx} has the tags \texttt{ocaml} and \texttt{native}, and that +the tags \texttt{ocaml}, \texttt{native}, \texttt{byte}, \texttt{program}, +\texttt{interf} and \texttt{link} have already been seen. + +\begin{table} + \begin{center} + \begin{tabular}{|l|c|} + \hline + \textbf{Tag} & \textbf{Display character} \\ + \hline + \hline + ocaml & O \\ + \hline + native & N \\ + \hline + byte & B \\ + \hline + program & P \\ + \hline + pp & R \\ + \hline + debug & D \\ + \hline + interf & I \\ + \hline + link & L \\ + \hline + \end{tabular} + \end{center} + \caption{\label{tab:tag-chars} Relation between the characters displayed in + the tag string and the tags.} +\end{table} +%***) +%(*** ocamllex, ocamlyacc and menhir +\subsection{\texttt{ocamllex}, \texttt{ocamlyacc} and \texttt{menhir}} +\ocb knows how to run the standard lexer and parser generator tools +\texttt{ocamllex} and \texttt{ocamlyacc} when your files have the +standard \texttt{.mll} and \texttt{.mly} extensions. If you want to +use \texttt{menhir} instead of \texttt{ocamlyacc}, you can either +launch \ocb with the \texttt{-use-menhir} option or add a +\begin{verbatim} +true: use_menhir +\end{verbatim} +line to your \tags file. Note that there is currently no way +of using \texttt{menhir} and \texttt{ocamlyacc} in the same execution +of \ocb. +%***) +%(*** Changing the compilers +\subsection{Changing the compilers or tools} +As \ocb is part of your OCaml distribution, it knows if it can call the +native compilers and tools (\texttt{ocamlc.opt}, \texttt{ocamlopt.opt}...) +or not. However you may want \ocb to use another \texttt{ocaml} compiler +for different reasons (such as cross-compiling or using a wrapper such as +\texttt{ocamlfind}). Here is the list of relevant options: +\begin{itemize} + \item \texttt{-ocamlc <command>} + \item \texttt{-ocamlopt <command>} + \item \texttt{-ocamldep <command>} + \item \texttt{-ocamlyacc <command>} + \item \texttt{-menhir <command>} + \item \texttt{-ocamllex <command>} + \item \texttt{-ocamlmktop <command>} + \item \texttt{-ocamlrun <command>} +\end{itemize} + +%***) +%\subsection{Writing a \texttt{myocamlbuild.ml} plugin} +%(*** Interaction with version control systems +\subsection{Interaction with version control systems} +Here are tips for configuring your version control system to ignore the files +and directories generated by \ocb. + +The directory \texttt{\_build} and any symbolic links +pointing into \texttt{\_build} should be ignored. +To do this, you must add the following ignore patterns to your version +control system's ignore set: +\begin{verbatim} +_build +*.native +*.byte +*.d.native +*.p.byte +\end{verbatim} + +For CVS, add the above lines to the \texttt{.cvsignore} file. +For Subversion (SVN), type \texttt{svn propedit svn:ignore .} and add the +above lines. +%***) +%(*** A shell script for driving it all? +\subsection{A shell script for driving it all?} +{\em To shell or to make ?} +Traditionally, makefiles have two major functions. The first one +is the dependency-ordering, rule-matching logic used for compiling. +The second one is as a dispatcher for various actions defined using +phony targets with shell script actions. These actions include cleaning, +cleaning really well, archiving, uploading and so on. Their characteristic +is that they rely little or not on the building process -- they either need +the building to have been completed, or they don't need anything. +As \texttt{/bin/sh} scripts have been here for three to four decades and are +not going anywhere, why not replace that functionality of makefiles with a +shell script ? We have thought of three bad reasons: +\begin{itemize} + \item Typing \texttt{make} to compile is now an automatism, + \item We need to share variable definitions between rules and actions, + \item Escaping already way too special-character-sensitive shell code with + invisible tabs and backslashes is a dangerously fun game. +\end{itemize} +We also have bad reasons for not using an OCaml script to drive everything: +\begin{itemize} + \item \texttt{Sys.command} calls the \texttt{/bin/sh} anyway, + \item Shell scripts can execute partial commands or commands with badly formed arguments. + \item Shell scripts are more concise for expressing... shell scripts. +\end{itemize} +Anyway you are of course free to use a makefile or an OCaml script to call ocamlbuild. +Here is an example shell driver script: +\begin{verbatim} +#!/bin/sh + +set -e + +TARGET=epoch +FLAGS="-libs unix,nums" +OCAMLBUILD=ocamlbuild + +ocb() +{ + $OCAMLBUILD $FLAGS $* +} + +rule() { + case $1 in + clean) ocb -clean;; + native) ocb $TARGET.native;; + byte) ocb $TARGET.byte;; + all) ocb $TARGET.native $TARGET.byte;; + depend) echo "Not needed.";; + *) echo "Unknown action $1";; + esac; +} + +if [ $# -eq 0 ]; then + rule all +else + while [ $# -gt 0 ]; do + rule $1; + shift + done +fi +\end{verbatim} +%***) +%\subsection{Common errors} +%***) +%(*** Motivations +\section{Appendix: Motivations} +{\em This inflammatory appendix describes the frustration that led us to write \ocb.} + +Many people have painfully found that the utilities of the \texttt{make} +family, namely GNU Make, BSD Make, and their derivatives, fail to scale to +large projects, especially when using multi-stage compilation rules, such as +custom pre-processors, unless dependencies are hand-defined. But as your +project gets larger, more modular, and uses more diverse pre-processing tools, +it becomes increasingly difficult to correctly define dependencies by hand. +Hence people tend to use language-specific tools that attempt to extract +dependencies. However another problem then appears: \texttt{make} was designed +with the idea of a static dependency graph. Dependency extracting tools, +however, are typically run by a rule in \texttt{make} itself; this means that +make has to reload the dependency information. This is the origin of the +\texttt{make clean; make depend; make} mantra. This approach tends to work +quite well as long as all the files sit in a single directory and there is only +one stage of pre-processing. If there are two or more stages, then dependency +extracting tools must be run two or more times - and this means multiple +invocations of \texttt{make}. Also, if one distributes the modules of a large +project into multiple subdirectories, it becomes difficult to distribute the +makefiles themselves, because the language of \texttt{make} was not conceived +to be modular; the only two mechanisms permitted, inclusion of makefile +fragments, and invocation of other make instances, must be skillfully +coordinated with phony target names (\texttt{depend1, depend2...}) to insure +inclusion of generated dependencies with multi-stage programming; changes in +the structure of the project must be reflected by hand and the order of +variable definitions must be well-thought ahead to avoid long afternoons spent +combinatorially fiddling makefiles until it works but no one understands why. + +These problems become especially apparent with OCaml: to ensure type safety and +to allow a small amount of cross-unit optimization when compiling native code, +interface and object files include cryptographical digests of interfaces they +are to be linked with. This means that linking is safer, but that makefile sloppiness +leads to messages such as: +\begin{verbatim} +Files foo.cmo and bar.cmo +make inconsistent assumptions over interface Bar +\end{verbatim} + +The typical reaction is then to issue the mantra \texttt{make clean; make +depend; make} and everything compiles just fine... from the beginning. Hence +on medium projects, the programmer often has to wait for minutes instead of the +few seconds that would be taken if \texttt{make} could correctly guess the +small number of files that really had to be recompiled. + +It is not surprising that hacking a build tool such as \texttt{make} to include +a programming language while retaining the original syntax and semantics gives +an improvised and cumbersome macro language of dubious expressive power. For +example, using GNU make, suppose you have a list of \texttt{.ml}s that you want +to convert into a list including both \texttt{.cmo}s and \texttt{.cmi}s, that +is you want to transform \texttt{a.ml b.ml c.ml} into \texttt{a.cmi a.cmo b.cmi +b.cmo c.cmi c.cmo} while preserving the dependency order which must be hand +specified for linking \footnote{By the way, what's the point of having a +declarative language if \texttt{make} can't sort the dependencies in +topological order for giving them to \texttt{gcc} or whatever ?}. +Unfortunately \texttt{\$patsubst \%.ml, \%.cmi \%.cmo, a.ml b.ml c.ml} won't +work since the \%-sign in the right-hand of a \texttt{patsubst} gets +substituted only once. You then have to delve into something that is hardly +lambda calculus: an intricate network of \texttt{foreach}, \texttt{eval}, +\texttt{call} and \texttt{define}s may get you the job done, unless you chicken +out and opt for an external \texttt{awk}, \texttt{sed} or \texttt{perl} call. +People who at this point have not lost their temper or sanity usually resort to +metaprogramming by writing Makefile generators using a mixture of shell and m4. +One such an attempt gave something that is the nightmare of wannabe package +maintainers: it's called \texttt{autotools}. + +Note that it is also difficult to write \texttt{Makefiles} to build object +files in a separate directory. It is not impossible since the language of +\texttt{make} is Turing-complete, a proof of which is left as an exercise. +Note that building things in a separate directory is not necessarily a young +enthusiast's way of giving a different look and feel to his projects -- it may +be a good way of telling the computer that \texttt{foo.mli} is generated by +\texttt{ocamlyacc} using \texttt{foo.mly} and can thus be removed. +%***) +%(*** Default rules +\section{Appendix: Summary of default rules} +The contents of this table give a summary of the most important default rules. +To get the most accurate and up-to-date information, launch \ocb with the +\texttt{-documentation} option. +\begin{center} +\small +\begin{tabular}{|l|l|p{5cm}|} + \hline + \textbf{Tags} & \textbf{Dependencies} & \textbf{Targets} \\ + \hline + \hline + & \%.itarget & \%.otarget \\ + \hline + ocaml & \%.mli \%.mli.depends & \%.cmi \\ + \hline + byte, debug, ocaml & \%.mlpack \%.cmi & \%.d.cmo \\ + \hline + byte, ocaml & \%.mlpack & \%.cmo \%.cmi \\ + \hline + byte, ocaml & \%.mli \%.ml \%.ml.depends \%.cmi & \%.d.cmo \\ + \hline + byte, ocaml & \%.mli \%.ml \%.ml.depends \%.cmi & \%.cmo \\ + \hline + native, ocaml, profile & \%.mlpack \%.cmi & \%.p.cmx \%.p.o \\ + \hline + native, ocaml & \%.mlpack \%.cmi & \%.cmx \%.o \\ + \hline + native, ocaml, profile & \%.ml \%.ml.depends \%.cmi & \%.p.cmx \%.p.o \\ + \hline + native, ocaml & \%.ml \%.ml.depends \%.cmi & \%.cmx \%.o \\ + \hline + debug, ocaml & \%.ml \%.ml.depends \%.cmi & \%.d.cmo \\ + \hline + ocaml & \%.ml \%.ml.depends & \%.cmo \%.cmi \\ + \hline + byte, debug, ocaml, program & \%.d.cmo & \%.d.byte \\ + \hline + byte, ocaml, program & \%.cmo & \%.byte \\ + \hline + native, ocaml, profile, program & \%.p.cmx \%.p.o & \%.p.native \\ + \hline + native, ocaml, program & \%.cmx \%.o & \%.native \\ + \hline + byte, debug, library, ocaml & \%.mllib & \%.d.cma \\ + \hline + byte, library, ocaml & \%.mllib & \%.cma \\ + \hline + byte, debug, library, ocaml & \%.d.cmo & \%.d.cma \\ + \hline + byte, library, ocaml & \%.cmo & \%.cma \\ + \hline + & lib\%(libname).clib & lib\%(libname).a dll\%(libname).so \\ + \hline + & \%(path)/lib\%(libname).clib & \%(path)/lib\%(libname).a \%(path)/dll\%(libname).so \\ + \hline + library, native, ocaml, profile & \%.mllib & \%.p.cmxa \%.p.a \\ + \hline + library, native, ocaml & \%.mllib & \%.cmxa \%.a \\ + \hline + library, native, ocaml, profile & \%.p.cmx \%.p.o & \%.p.cmxa \%.p.a \\ + \hline + library, native, ocaml & \%.cmx \%.o & \%.cmxa \%.a \\ + \hline + & \%.ml & \%.ml.depends \\ + \hline + & \%.mli & \%.mli.depends \\ + \hline + ocaml & \%.mll & \%.ml \\ + \hline + doc, ocaml & \%.mli \%.mli.depends & \%.odoc \\ + \hline + & \%.odocl & \%.docdir/index.html \\ + \hline + ocaml & \%.mly & \%.ml \%.mli \\ + \hline + & \%.c & \%.o \\ + \hline + & \%.ml \%.ml.depends & \%.inferred.mli \\ + \hline +\end{tabular} +\end{center} +%***) + diff --git a/manual/manual/cmds/ocamldoc.etex b/manual/manual/cmds/ocamldoc.etex new file mode 100644 index 0000000000..1a74ec376f --- /dev/null +++ b/manual/manual/cmds/ocamldoc.etex @@ -0,0 +1,1134 @@ +\chapter{The documentation generator (ocamldoc)} \label{c:ocamldoc} +\pdfchapter{The documentation generator (ocamldoc)} +%HEVEA\cutname{ocamldoc.html} + +This chapter describes OCamldoc, a tool that generates documentation from +special comments embedded in source files. The comments used by OCamldoc +are of the form "(**"\ldots"*)" and follow the format described +in section \ref{s:ocamldoc-comments}. + +OCamldoc can produce documentation in various formats: HTML, \LaTeX , +TeXinfo, Unix man pages, and "dot" dependency graphs. Moreover, +users can add their own custom generators, as explained in +section \ref{s:ocamldoc-custom-generators}. + +In this chapter, we use the word {\em element} to refer to any of the +following parts of an OCaml source file: a type declaration, a value, +a module, an exception, a module type, a type constructor, a record +field, a class, a class type, a class method, a class value or a class +inheritance clause. + +\section{Usage} \label{s:ocamldoc-usage} + +\subsection{Invocation} + +OCamldoc is invoked via the command "ocamldoc", as follows: +\begin{alltt} + ocamldoc \var{options} \var{sourcefiles} +\end{alltt} + +\subsubsection*{Options for choosing the output format} + +The following options determine the format for the generated +documentation. + +\begin{options} +\item["-html"] +Generate documentation in HTML default format. The generated HTML pages +are stored in the current directory, or in the directory specified +with the {\bf\tt -d} option. You can customize the style of the +generated pages by editing the generated "style.css" file, or by providing +your own style sheet using option "-css-style". +The file "style.css" is not generated if it already exists or if -css-style is used. + +\item["-latex"] +Generate documentation in \LaTeX\ default format. The generated +\LaTeX\ document is saved in file "ocamldoc.out", or in the file +specified with the {\bf\tt -o} option. The document uses the style file +"ocamldoc.sty". This file is generated when using the "-latex" option, +if it does not already exist. +You can change this file to customize the style of your \LaTeX\ documentation. + +\item["-texi"] +Generate documentation in TeXinfo default format. The generated +\LaTeX\ document is saved in file "ocamldoc.out", or in the file +specified with the {\bf\tt -o} option. + +\item["-man"] +Generate documentation as a set of Unix "man" pages. The generated pages +are stored in the current directory, or in the directory specified +with the {\bf\tt -d} option. + +\item["-dot"] +Generate a dependency graph for the toplevel modules, in a format suitable +for displaying and processing by "dot". The "dot" tool is available from +\url{http://www.research.att.com/sw/tools/graphviz/}. +The textual representation of the graph is written to the file +"ocamldoc.out", or to the file specified with the {\bf\tt -o} option. +Use "dot ocamldoc.out" to display it. + +\item["-g" \var{file.cm[o,a,xs]}] +Dynamically load the given file, which defines a custom documentation +generator. See section \ref{s:ocamldoc-compilation-and-usage}. This +option is supported by the "ocamldoc" command (to load ".cmo" and ".cma" files) +and by its native-code version "ocamldoc.opt" (to load ".cmxs" files). +If the given file is a simple one and does not exist in +the current directory, then ocamldoc looks for it in the custom +generators default directory, and in the directories specified with +optional "-i" options. + +\item["-customdir"] +Display the custom generators default directory. + +\item["-i" \var{directory}] +Add the given directory to the path where to look for custom generators. + +\end{options} + +\subsubsection*{General options} + +\begin{options} + +\item["-d" \var{dir}] +Generate files in directory \var{dir}, rather than the current directory. + +\item["-dump" \var{file}] +Dump collected information into \var{file}. This information can be +read with the "-load" option in a subsequent invocation of "ocamldoc". + +\item["-hide" \var{modules}] +Hide the given complete module names in the generated documentation. +\var{modules} is a list of complete module names separated + by '","', without blanks. For instance: "Pervasives,M2.M3". + +\item["-inv-merge-ml-mli"] +Reverse the precedence of implementations and interfaces when merging. +All elements +in implementation files are kept, and the {\bf\tt -m} option +indicates which parts of the comments in interface files are merged +with the comments in implementation files. + +\item["-keep-code"] +Always keep the source code for values, methods and instance variables, +when available. +The source code is always kept when a ".ml" +file is given, but is by default discarded when a ".mli" is given. +This option keeps the source code in all cases. + +\item["-load" \var{file}] +Load information from \var{file}, which has been produced by +"ocamldoc -dump". Several "-load" options can be given. + +\item["-m" \var{flags}] +Specify merge options between interfaces and implementations. +(see section \ref{s:ocamldoc-merge} for details). +\var{flags} can be one or several of the following characters: +\begin{options} + \item["d"] merge description + \item["a"] merge "\@author" + \item["v"] merge "\@version" + \item["l"] merge "\@see" + \item["s"] merge "\@since" + \item["b"] merge "\@before" + \item["o"] merge "\@deprecated" + \item["p"] merge "\@param" + \item["e"] merge "\@raise" + \item["r"] merge "\@return" + \item["A"] merge everything +\end{options} + +\item["-no-custom-tags"] +Do not allow custom \@-tags (see section \ref{s:ocamldoc-tags}). + +\item["-no-stop"] +Keep elements placed after/between the "(**/**)" special comment(s) +(see section \ref{s:ocamldoc-comments}). + +\item["-o" \var{file}] +Output the generated documentation to \var{file} instead of "ocamldoc.out". +This option is meaningful only in conjunction with the +{\bf\tt -latex}, {\bf\tt -texi}, or {\bf\tt -dot} options. + +\item["-pp" \var{command}] +Pipe sources through preprocessor \var{command}. + +\item["-impl" \var{filename}] +Process the file \var{filename} as an implementation file, even if its +extension is not ".ml". + +\item["-intf" \var{filename}] +Process the file \var{filename} as an interface file, even if its +extension is not ".mli". + +\item["-text" \var{filename}] +Process the file \var{filename} as a text file, even if its +extension is not ".txt". + +\item["-sort"] +Sort the list of top-level modules before generating the documentation. + +\item["-stars"] +Remove blank characters until the first asterisk ('"*"') in each +line of comments. + +\item["-t" \var{title}] +Use \var{title} as the title for the generated documentation. + +\item["-intro" \var{file}] +Use content of \var{file} as ocamldoc text to use as introduction (HTML, +\LaTeX{} and TeXinfo only). +For HTML, the file is used to create the whole "index.html" file. + +\item["-v"] +Verbose mode. Display progress information. + +\item["-version"] +Print version string and exit. + +\item["-vnum"] +Print short version number and exit. + +\item["-warn-error"] +Treat Ocamldoc warnings as errors. + +\item["-hide-warnings"] +Do not print OCamldoc warnings. + +\item["-help" or "--help"] +Display a short usage summary and exit. +% +\end{options} + +\subsubsection*{Type-checking options} + +OCamldoc calls the OCaml type-checker to obtain type +information. The following options impact the type-checking phase. +They have the same meaning as for the "ocamlc" and "ocamlopt" commands. + +\begin{options} + +\item["-I" \var{directory}] +Add \var{directory} to the list of directories search for compiled +interface files (".cmi" files). + +\item["-nolabels"] +Ignore non-optional labels in types. + +\item["-rectypes"] +Allow arbitrary recursive types. (See the "-rectypes" option to "ocamlc".) + +\end{options} + +\subsubsection*{Options for generating HTML pages} + +The following options apply in conjunction with the "-html" option: + +\begin{options} +\item["-all-params"] +Display the complete list of parameters for functions and methods. + +\item["-charset" \var{charset}] +Add information about character encoding being \var{charset} +(default is iso-8859-1). + +\item["-colorize-code"] +Colorize the OCaml code enclosed in "[ ]" and "{[ ]}", using colors +to emphasize keywords, etc. If the code fragments are not +syntactically correct, no color is added. + +\item["-css-style" \var{filename}] +Use \var{filename} as the Cascading Style Sheet file. + +\item["-index-only"] +Generate only index files. + +\item["-short-functors"] +Use a short form to display functors: +\begin{alltt} +module M : functor (A:Module) -> functor (B:Module2) -> sig .. end +\end{alltt} +is displayed as: +\begin{alltt} +module M (A:Module) (B:Module2) : sig .. end +\end{alltt} + +\end{options} + +\subsubsection*{Options for generating \LaTeX\ files} + +The following options apply in conjunction with the "-latex" option: + +\begin{options} +\item["-latex-value-prefix" \var{prefix}] +Give a prefix to use for the labels of the values in the generated +\LaTeX\ document. +The default prefix is the empty string. You can also use the options +{\tt -latex-type-prefix}, {\tt -latex-exception-prefix}, +{\tt -latex-module-prefix}, +{\tt -latex-module-type-prefix}, {\tt -latex-class-prefix}, +{\tt -latex-class-type-prefix}, +{\tt -latex-attribute-prefix} and {\tt -latex-method-prefix}. + +These options are useful when you have, for example, a type and a value with + the same name. If you do not specify prefixes, \LaTeX\ will complain about +multiply defined labels. + +\item["-latextitle" \var{n,style}] +Associate style number \var{n} to the given \LaTeX\ sectioning command +\var{style}, e.g. "section" or "subsection". (\LaTeX\ only.) This is +useful when including the generated document in another \LaTeX\ document, +at a given sectioning level. The default association is 1 for "section", +2 for "subsection", 3 for "subsubsection", 4 for "paragraph" and 5 for +"subparagraph". + +\item["-noheader"] +Suppress header in generated documentation. + +\item["-notoc"] +Do not generate a table of contents. + +\item["-notrailer"] +Suppress trailer in generated documentation. + +\item["-sepfiles"] +Generate one ".tex" file per toplevel module, instead of the global +"ocamldoc.out" file. +\end{options} + +\subsubsection*{Options for generating TeXinfo files} + +The following options apply in conjunction with the "-texi" option: + +\begin{options} +\item["-esc8"] +Escape accented characters in Info files. + +\item["-info-entry"] +Specify Info directory entry. + +\item["-info-section"] +Specify section of Info directory. + +\item["-noheader"] +Suppress header in generated documentation. + +\item["-noindex"] +Do not build index for Info files. + +\item["-notrailer"] +Suppress trailer in generated documentation. +\end{options} + +\subsubsection*{Options for generating "dot" graphs} + +The following options apply in conjunction with the "-dot" option: + +\begin{options} +\item["-dot-colors" \var{colors}] +Specify the colors to use in the generated "dot" code. +When generating module dependencies, "ocamldoc" uses different colors +for modules, depending on the directories in which they reside. +When generating types dependencies, "ocamldoc" uses different colors +for types, depending on the modules in which they are defined. +\var{colors} is a list of color names separated by '","', as +in "Red,Blue,Green". The available colors are the ones supported by +the "dot" tool. + +\item["-dot-include-all"] +Include all modules in the "dot" output, not only modules given +on the command line or loaded with the {\bf\tt -load} option. + +\item["-dot-reduce"] +Perform a transitive reduction of the dependency graph before +outputting the "dot" code. This can be useful if there are +a lot of transitive dependencies that clutter the graph. + +\item["-dot-types"] +Output "dot" code describing the type dependency graph instead of +the module dependency graph. +\end{options} + +\subsubsection*{Options for generating man files} + +The following options apply in conjunction with the "-man" option: + +\begin{options} +\item["-man-mini"] +Generate man pages only for modules, module types, classes and class +types, instead of pages for all elements. + +\item["-man-suffix" \var{suffix}] +Set the suffix used for generated man filenames. Default is '"3o"', +as in "List.3o". + +\item["-man-section" \var{section}] +Set the section number used for generated man filenames. Default is '"3"'. + +\end{options} + +\subsection{Merging of module information} +\label{s:ocamldoc-merge} + +Information on a module can be extracted either from the ".mli" or ".ml" +file, or both, depending on the files given on the command line. +When both ".mli" and ".ml" files are given for the same module, +information extracted from these files is merged according to the +following rules: +\begin{itemize} +\item Only elements (values, types, classes, ...) declared in the ".mli" +file are kept. In other terms, definitions from the ".ml" file that are +not exported in the ".mli" file are not documented. +\item Descriptions of elements and descriptions in \@-tags are handled +as follows. If a description for the same element or in the same +\@-tag of the same element is present in both files, then the +description of the ".ml" file is concatenated to the one in the ".mli" file, +if the corresponding "-m" flag is given on the command line. +If a description is present in the ".ml" file and not in the +".mli" file, the ".ml" description is kept. +In either case, all the information given in the ".mli" file is kept. +\end{itemize} + +\subsection{Coding rules} +\label{s:ocamldoc-rules} +The following rules must be respected in order to avoid name clashes +resulting in cross-reference errors: +\begin{itemize} +\item In a module, there must not be two modules, two module types or + a module and a module type with the same name. + In the default HTML generator, modules "ab" and "AB" will be printed + to the same file on case insensitive file systems. +\item In a module, there must not be two classes, two class types or + a class and a class type with the same name. +\item In a module, there must not be two values, two types, or two + exceptions with the same name. +\item Values defined in tuple, as in "let (x,y,z) = (1,2,3)" +are not kept by OCamldoc. +\item Avoid the following construction: +\begin{verbatim} +open Foo (* which has a module Bar with a value x *) +module Foo = + struct + module Bar = + struct + let x = 1 + end + end + let dummy = Bar.x +\end{verbatim} +In this case, OCamldoc will associate "Bar.x" to the "x" of module +"Foo" defined just above, instead of to the "Bar.x" defined in the +opened module "Foo". +\end{itemize} + +\section{Syntax of documentation comments} +\label{s:ocamldoc-comments} + +Comments containing documentation material are called {\em special +comments} and are written between "(**" and "*)". Special comments +must start exactly with "(**". Comments beginning with "(" and more +than two "*" are ignored. + +\subsection{Placement of documentation comments} +OCamldoc can associate comments to some elements of the language +encountered in the source files. The association is made according to +the locations of comments with respect to the language elements. The +locations of comments in ".mli" and ".ml" files are different. + +%%%%%%%%%%%%% +\subsubsection{Comments in ".mli" files} +A special comment is associated to an element if it is placed before or +after the element.\\ +A special comment before an element is associated to this element if~: +\begin{itemize} +\item There is no blank line or another special comment between the special +comment and the element. However, a regular comment can occur between +the special comment and the element. +\item The special comment is not already associated to the previous element. +\item The special comment is not the first one of a toplevel module. +\end{itemize} + +A special comment after an element is associated to this element if +there is no blank line or comment between the special comment and the +element. + +There are two exceptions: for constructors and record fields in +type definitions, the associated comment can only be placed after the +constructor or field definition, without blank lines or other comments +between them. The special comment for a constructor +with another constructor following must be placed before the '"|"' +character separating the two constructors. + +The following sample interface file "foo.mli" illustrates the +placement rules for comments in ".mli" files. + +\begin{verbatim} +(** The first special comment of the file is the comment associated + with the whole module.*) + + +(** Special comments can be placed between elements and are kept + by the OCamldoc tool, but are not associated to any element. + @-tags in these comments are ignored.*) + +(*******************************************************************) +(** Comments like the one above, with more than two asterisks, + are ignored. *) + +(** The comment for function f. *) +val f : int -> int -> int +(** The continuation of the comment for function f. *) + +(** Comment for exception My_exception, even with a simple comment + between the special comment and the exception.*) +(* Hello, I'm a simple comment :-) *) +exception My_exception of (int -> int) * int + +(** Comment for type weather *) +type weather = +| Rain of int (** The comment for constructor Rain *) +| Sun (** The comment for constructor Sun *) + +(** Comment for type weather2 *) +type weather2 = +| Rain of int (** The comment for constructor Rain *) +| Sun (** The comment for constructor Sun *) +(** I can continue the comment for type weather2 here + because there is already a comment associated to the last constructor.*) + +(** The comment for type my_record *) +type my_record = { + val foo : int ; (** Comment for field foo *) + val bar : string ; (** Comment for field bar *) + } + (** Continuation of comment for type my_record *) + +(** Comment for foo *) +val foo : string +(** This comment is associated to foo and not to bar. *) +val bar : string +(** This comment is associated to bar. *) + +(** The comment for class my_class *) +class my_class : + object + (** A comment to describe inheritance from cl *) + inherit cl + + (** The comment for attribute tutu *) + val mutable tutu : string + + (** The comment for attribute toto. *) + val toto : int + + (** This comment is not attached to titi since + there is a blank line before titi, but is kept + as a comment in the class. *) + + val titi : string + + (** Comment for method toto *) + method toto : string + + (** Comment for method m *) + method m : float -> int + end + +(** The comment for the class type my_class_type *) +class type my_class_type = + object + (** The comment for variable x. *) + val mutable x : int + + (** The commend for method m. *) + method m : int -> int +end + +(** The comment for module Foo *) +module Foo = + struct + (** The comment for x *) + val x : int + + (** A special comment that is kept but not associated to any element *) + end + +(** The comment for module type my_module_type. *) +module type my_module_type = + sig + (** The comment for value x. *) + val x : int + + (** The comment for module M. *) + module M = + struct + (** The comment for value y. *) + val y : int + + (* ... *) + end + + end + +\end{verbatim} + +%%%%%%%%%%%%% +\subsubsection{Comments in {\tt .ml} files} + +A special comment is associated to an element if it is placed before +the element and there is no blank line between the comment and the +element. Meanwhile, there can be a simple comment between the special +comment and the element. There are two exceptions, for +constructors and record fields in type definitions, whose associated +comment must be placed after the constructor or field definition, +without blank line between them. The special comment for a constructor +with another constructor following must be placed before the '"|"' +character separating the two constructors. + +The following example of file "toto.ml" shows where to place comments +in a ".ml" file. + +\begin{verbatim} +(** The first special comment of the file is the comment associated + to the whole module. *) + +(** The comment for function f *) +let f x y = x + y + +(** This comment is not attached to any element since there is another + special comment just before the next element. *) + +(** Comment for exception My_exception, even with a simple comment + between the special comment and the exception.*) +(* A simple comment. *) +exception My_exception of (int -> int) * int + +(** Comment for type weather *) +type weather = +| Rain of int (** The comment for constructor Rain *) +| Sun (** The comment for constructor Sun *) + +(** The comment for type my_record *) +type my_record = { + val foo : int ; (** Comment for field foo *) + val bar : string ; (** Comment for field bar *) + } + +(** The comment for class my_class *) +class my_class = + object + (** A comment to describe inheritance from cl *) + inherit cl + + (** The comment for the instance variable tutu *) + val mutable tutu = "tutu" + (** The comment for toto *) + val toto = 1 + val titi = "titi" + (** Comment for method toto *) + method toto = tutu ^ "!" + (** Comment for method m *) + method m (f : float) = 1 + end + +(** The comment for class type my_class_type *) +class type my_class_type = + object + (** The comment for the instance variable x. *) + val mutable x : int + (** The commend for method m. *) + method m : int -> int + end + +(** The comment for module Foo *) +module Foo = + struct + (** The comment for x *) + val x : int + (** A special comment in the class, but not associated to any element. *) + end + +(** The comment for module type my_module_type. *) +module type my_module_type = + sig + (* Comment for value x. *) + val x : int + (* ... *) + end +\end{verbatim} + +%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{The Stop special comment} +The special comment "(**/**)" tells OCamldoc to discard +elements placed after this comment, up to the end of the current +class, class type, module or module type, or up to the next stop comment. +For instance: +\begin{verbatim} +class type foo = + object + (** comment for method m *) + method m : string + + (**/**) + + (** This method won't appear in the documentation *) + method bar : int + end + +(** This value appears in the documentation, since the Stop special comment + in the class does not affect the parent module of the class.*) +val foo : string + +(**/**) +(** The value bar does not appear in the documentation.*) +val bar : string +(**/**) + +(** The type t appears since in the documentation since the previous stop comment +toggled off the "no documentation mode". *) +type t = string +\end{verbatim} + +The {\bf\tt -no-stop} option to "ocamldoc" causes the Stop special +comments to be ignored. + +%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{Syntax of documentation comments} + +The inside of documentation comments "(**"\ldots"*)" consists of +free-form text with optional formatting annotations, followed by +optional {\em tags} giving more specific information about parameters, +version, authors, \ldots\ The tags are distinguished by a leading "\@" +character. Thus, a documentation comment has the following shape: +\begin{verbatim} +(** The comment begins with a description, which is text formatted + according to the rules described in the next section. + The description continues until the first non-escaped '@' character. + @author Mr Smith + @param x description for parameter x +*) +\end{verbatim} +Some elements support only a subset of all \@-tags. Tags that are not +relevant to the documented element are simply ignored. For instance, +all tags are ignored when documenting type constructors, record +fields, and class inheritance clauses. Similarly, a "\@param" tag on a +class instance variable is ignored. + +At last, "(**)" is the empty documentation comment. + +%%%%%%%%%%%%% + +% enable section numbering for subsubsections (PR#6189, item 3) +\setcounter{secnumdepth}{3} + +\subsection{Text formatting} + +Here is the BNF grammar for the simple markup language used to format +text descriptions. + +\newpage + +\begin{syntax} +text: {{text-element}} +; +\end{syntax} + +\noindent +\begin{syntaxleft} +\nonterm{text-element}\is{} +\end{syntaxleft} + +\begin{tabular}{rlp{10cm}} +@||@&@ '{' {{ "0" \ldots "9" }} text '}' @ & format @text@ as a section header; + the integer following "{" indicates the sectioning level. \\ +@||@&@ '{' {{ "0" \ldots "9" }} ':' @ \nt{label} @ text '}' @ & + same, but also associate the name \nt{label} to the current point. + This point can be referenced by its fully-qualified label in a + "{!" command, just like any other element. \\ +@||@&@ '{b' text '}' @ & set @text@ in bold. \\ +@||@&@ '{i' text '}' @ & set @text@ in italic. \\ +@||@&@ '{e' text '}' @ & emphasize @text@. \\ +@||@&@ '{C' text '}' @ & center @text@. \\ +@||@&@ '{L' text '}' @ & left align @text@. \\ +@||@&@ '{R' text '}' @ & right align @text@. \\ +@||@&@ '{ul' list '}' @ & build a list. \\ +@||@&@ '{ol' list '}' @ & build an enumerated list. \\ +@||@&@ '{{:' string '}' text '}' @ & put a link to the given address +(given as @string@) on the given @text@. \\ +@||@&@ '[' string ']' @ & set the given @string@ in source code style. \\ +@||@&@ '{[' string ']}' @ & set the given @string@ in preformatted + source code style.\\ +@||@&@ '{v' string 'v}' @ & set the given @string@ in verbatim style. \\ +@||@&@ '{%' string '%}' @ & target-specific content + (\LaTeX\ code by default, see details + in \ref{sss:target-specific-syntax}) \\ +@||@&@ '{!' string '}' @ & insert a cross-reference to an element + (see section \ref{sss:crossref} for the syntax of cross-references).\\ +@||@&@ '{!modules:' string string ... '}' @ & insert an index table +for the given module names. Used in HTML only.\\ +@||@&@ '{!indexlist}' @ & insert a table of links to the various indexes +(types, values, modules, ...). Used in HTML only.\\ +@||@&@ '{^' text '}' @ & set text in superscript.\\ +@||@&@ '{_' text '}' @ & set text in subscript.\\ +@||@& \nt{escaped-string} & typeset the given string as is; +special characters ('"{"', '"}"', '"["', '"]"' and '"\@"') +must be escaped by a '"\\"'\\ +@||@& \nt{blank-line} & force a new line. +\end{tabular} \\ + +\subsubsection{List formatting} + +\begin{syntax} +list: +| {{ '{-' text '}' }} +| {{ '{li' text '}' }} +\end{syntax} + +A shortcut syntax exists for lists and enumerated lists: +\begin{verbatim} +(** Here is a {b list} +- item 1 +- item 2 +- item 3 + +The list is ended by the blank line.*) +\end{verbatim} +is equivalent to: +\begin{verbatim} +(** Here is a {b list} +{ul {- item 1} +{- item 2} +{- item 3}} +The list is ended by the blank line.*) +\end{verbatim} + +The same shortcut is available for enumerated lists, using '"+"' +instead of '"-"'. +Note that only one list can be defined by this shortcut in nested lists. + +\subsubsection{Cross-reference formatting} +\label{sss:crossref} + +Cross-references are fully qualified element names, as in the example +"{!Foo.Bar.t}". This is an ambiguous reference as it may designate +a type name, a value name, a class name, etc. It is possible to make +explicit the intended syntactic class, using "{!type:Foo.Bar.t}" to +designate a type, and "{!val:Foo.Bar.t}" a value of the same name. + +The list of possible syntactic class is as follows: +\begin{center} +\begin{tabular}{rl} +\multicolumn{1}{c}{"tag"} & \multicolumn{1}{c}{syntactic class}\\ \hline +"module:" & module \\ +"modtype:" & module type \\ +"class:" & class \\ +"classtype:" & class type \\ +"val:" & value \\ +"type:" & type \\ +"exception:" & exception \\ +"attribute:" & attribute \\ +"method:" & class method \\ +"section:" & ocamldoc section \\ +"const:" & variant constructor \\ +"recfield:" & record field +\end{tabular} +\end{center} + +In the case of variant constructors or record field, the constructor +or field name should be preceded by the name of the correspond type -- +to avoid the ambiguity of several types having the same constructor +names. For example, the constructor "Node" of the type "tree" will be +referenced as "{!tree.Node}" or "{!const:tree.Node}", or possibly +"{!Mod1.Mod2.tree.Node}" from outside the module. + +\subsubsection{First sentence} + +In the description of a value, type, exception, module, module type, class +or class type, the {\em first sentence} is sometimes used in indexes, or +when just a part of the description is needed. The first sentence +is composed of the first characters of the description, until +\begin{itemize} +\item the first dot followed by a blank, or +\item the first blank line +\end{itemize} +outside of the following text formatting : +@ '{ul' list '}' @, +@ '{ol' list '}' @, +@ '[' string ']' @, +@ '{[' string ']}' @, +@ '{v' string 'v}' @, +@ '{%' string '%}' @, +@ '{!' string '}' @, +@ '{^' text '}' @, +@ '{_' text '}' @. + +\subsubsection{Target-specific formatting} +\label{sss:target-specific-syntax} + +The content inside "{%foo: ... %}" is target-specific and will only be +interpreted by the backend "foo", and ignored by the others. The +backends of the distribution are "latex", "html", "texi" and "man". If +no target is specified (syntax "{% ... %}"), "latex" is chosen by +default. Custom generators may support their own target prefix. + +\subsubsection{Recognized HTML tags} +The HTML tags "<b>..</b>", +"<code>..</code>", +"<i>..</i>", +"<ul>..</ul>", +"<ol>..</ol>", +"<li>..</li>", +"<center>..</center>" and +"<h[0-9]>..</h[0-9]>" can be used instead of, respectively, +@ '{b ..}' @, +@ '[..]' @, +@ '{i ..}' @, +@ '{ul ..}' @, +@ '{ol ..}' @, +@ '{li ..}' @, +@ '{C ..}' @ and +"{[0-9] ..}". + +%disable section numbering for subsubsections +\setcounter{secnumdepth}{2} + +%%%%%%%%%%%%% +\subsection{Documentation tags (\@-tags)} +\label{s:ocamldoc-tags} + +\subsubsection{Predefined tags} +The following table gives the list of predefined \@-tags, with their +syntax and meaning.\\ + +\begin{tabular}{|p{5cm}|p{10cm}|}\hline +@ "@author" string @ & The author of the element. One author per +"\@author" tag. +There may be several "\@author" tags for the same element. \\ \hline + +@ "@deprecated" text @ & The @text@ should describe when the element was +deprecated, what to use as a replacement, and possibly the reason +for deprecation. \\ \hline + +@ "@param" id text @ & Associate the given description (@text@) to the +given parameter name @id@. This tag is used for functions, +methods, classes and functors. \\ \hline + +@ "@raise" Exc text @ & Explain that the element may raise + the exception @Exc@. \\ \hline + +@ "@return" text @ & Describe the return value and + its possible values. This tag is used for functions + and methods. \\ \hline + +@ "@see" '<' URL '>' text @ & Add a reference to the @URL@ +with the given @text@ as comment. \\ \hline + +@ "@see" "'"@\nt{filename}@"'" text @ & Add a reference to the given file name +(written between single quotes), with the given @text@ as comment. \\ \hline + +@ "@see" '"'@\nt{document-name}@'"' text @ & Add a reference to the given +document name (written between double quotes), with the given @text@ +as comment. \\ \hline + +@ "@since" string @ & Indicate when the element was introduced. \\ \hline + +@ "@before" @ \nt{version} @ text @ & Associate the given description (@text@) +to the given \nt{version} in order to document compatibility issues. \\ \hline + +@ "@version" string @ & The version number for the element. \\ \hline +\end{tabular} + +\subsubsection{Custom tags} +\label{s:ocamldoc-custom-tags} +You can use custom tags in the documentation comments, but they will +have no effect if the generator used does not handle them. To use a +custom tag, for example "foo", just put "\@foo" with some text in your +comment, as in: +\begin{verbatim} +(** My comment to show you a custom tag. +@foo this is the text argument to the [foo] custom tag. +*) +\end{verbatim} + +To handle custom tags, you need to define a custom generator, +as explained in section \ref{s:ocamldoc-handling-custom-tags}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Custom generators} +\label{s:ocamldoc-custom-generators} + +OCamldoc operates in two steps: +\begin{enumerate} +\item analysis of the source files; +\item generation of documentation, through a documentation generator, + which is an object of class "Odoc_args.class_generator". +\end{enumerate} +Users can provide their own documentation generator to be used during +step 2 instead of the default generators. +All the information retrieved during the analysis step is available through +the "Odoc_info" module, which gives access to all the types and functions + representing the elements found in the given modules, with their associated +description. + +The files you can use to define custom generators are installed in the +"ocamldoc" sub-directory of the OCaml standard library. + +%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{The generator modules} +The type of a generator module depends on the kind of generated documentation. +Here is the list of generator module types, with the name of the generator +class in the module~: +\begin{itemize} +\item for HTML~: "Odoc_html.Html_generator" (class "html"), +\item for \LaTeX~: "Odoc_latex.Latex_generator" (class "latex"), +\item for TeXinfo~: "Odoc_texi.Texi_generator" (class "texi"), +\item for man pages~: "Odoc_man.Man_generator" (class "man"), +\item for graphviz (dot)~: "Odoc_dot.Dot_generator" (class "dot"), +\item for other kinds~: "Odoc_gen.Base" (class "generator"). +\end{itemize} +That is, to define a new generator, one must implement a module with +the expected signature, and with the given generator class, providing +the "generate" method as entry point to make the generator generates +documentation for a given list of modules~: + +\begin{verbatim} + method generate : Odoc_info.Module.t_module list -> unit +\end{verbatim} + +\noindent{}This method will be called with the list of analysed and possibly +merged "Odoc_info.t_module" structures. + +It is recommended to inherit from the current generator of the same +kind as the one you want to define. Doing so, it is possible to +load various custom generators to combine improvements brought by each one. + +This is done using first class modules (see chapter \ref{s-first-class-modules}). + +The easiest way to define a custom generator is the following this example, +here extending the current HTML generator. We don't have to know if this is +the original HTML generator defined in ocamldoc or if it has been extended +already by a previously loaded custom generator~: + +\begin{verbatim} +module Generator (G : Odoc_html.Html_generator) = +struct + class html = + object(self) + inherit G.html as html + (* ... *) + + method generate module_list = + (* ... *) + () + + (* ... *) + end +end;; + +let _ = Odoc_args.extend_html_generator (module Generator : Odoc_gen.Html_functor);; +\end{verbatim} + +To know which methods to override and/or which methods are available, +have a look at the different base implementations, depending on the +kind of generator you are extending~: +\begin{itemize} +\item for HTML~: \href{http://caml.inria.fr/cgi-bin/viewvc.cgi/ocaml/version/\ocamlversion/ocamldoc/odoc_html.ml?view=markup}{"odoc_html.ml"}, +\item for \LaTeX~: \href{http://caml.inria.fr/cgi-bin/viewvc.cgi/ocaml/version/\ocamlversion/ocamldoc/odoc_latex.ml?view=markup}{"odoc_latex.ml"}, +\item for TeXinfo~: \href{http://caml.inria.fr/cgi-bin/viewvc.cgi/ocaml/version/\ocamlversion/ocamldoc/odoc_texi.ml?view=markup}{"odoc_texi.ml"}, +\item for man pages~: \href{http://caml.inria.fr/cgi-bin/viewvc.cgi/ocaml/version/\ocamlversion/ocamldoc/odoc_man.ml?view=markup}{"odoc_man.ml"}, +\item for graphviz (dot)~: \href{http://caml.inria.fr/cgi-bin/viewvc.cgi/ocaml/version/\ocamlversion/ocamldoc/odoc_dot.ml?view=markup}{"odoc_dot.ml"}. +\end{itemize} + +%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{Handling custom tags} +\label{s:ocamldoc-handling-custom-tags} + +Making a custom generator handle custom tags (see +\ref{s:ocamldoc-custom-tags}) is very simple. + +\subsubsection*{For HTML} +Here is how to develop a HTML generator handling your custom tags. + +The class "Odoc_html.Generator.html" inherits +from the class "Odoc_html.info", containing a field "tag_functions" which is a +list pairs composed of a custom tag (e.g. "\"foo\"") and a function taking +a "text" and returning HTML code (of type "string"). +To handle a new tag "bar", extend the current HTML generator + and complete the "tag_functions" field: +\begin{verbatim} +module Generator (G : Odoc_html.Html_generator) = +struct + class html = + object(self) + inherit G.html + + (** Return HTML code for the given text of a bar tag. *) + method html_of_bar t = (* your code here *) + + initializer + tag_functions <- ("bar", self#html_of_bar) :: tag_functions + end +end +let _ = Odoc_args.extend_html_generator (module Generator : Odoc_gen.Html_functor);; +\end{verbatim} + +Another method of the class "Odoc_html.info" will look for the +function associated to a custom tag and apply it to the text given to +the tag. If no function is associated to a custom tag, then the method +prints a warning message on "stderr". + +\subsubsection{For other generators} +You can act the same way for other kinds of generators. + +%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Adding command line options} +The command line analysis is performed after loading the module containing the +documentation generator, thus allowing command line options to be added to the + list of existing ones. Adding an option can be done with the function +\begin{verbatim} + Odoc_args.add_option : string * Arg.spec * string -> unit +\end{verbatim} +\noindent{}Note: Existing command line options can be redefined using +this function. + +%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{Compilation and usage} +\label{s:ocamldoc-compilation-and-usage} + +%%%%%%%%%%%%%% +\subsubsection{Defining a custom generator class in one file} +Let "custom.ml" be the file defining a new generator class. +Compilation of "custom.ml" can be performed by the following command~: +\begin{alltt} + ocamlc -I +ocamldoc -c custom.ml +\end{alltt} +\noindent{}The file "custom.cmo" is created and can be used this way~: +\begin{alltt} + ocamldoc -g custom.cmo \var{other-options} \var{source-files} +\end{alltt} +\noindent{}It is important not to give the "-html" or any other option +selecting a built-in generator to "ocamldoc", +which would result in using this generator instead of the one you just loaded. + +%%%%%%%%%%%%%% +\subsubsection{Defining a custom generator class in several files} +It is possible to define a generator class in several modules, which +are defined in several files \var{\nth{file}{1}}".ml"["i"], +\var{\nth{file}{2}}".ml"["i"], ..., \var{\nth{file}{n}}".ml"["i"]. A ".cma" +library file must be created, including all these files. + +The following commands create the "custom.cma" file from files +\var{\nth{file}{1}}".ml"["i"], ..., \var{\nth{file}{n}}".ml"["i"]~: +\begin{alltt} +ocamlc -I +ocamldoc -c \var{\nth{file}{1}}.ml\textrm{[}i\textrm{]} +ocamlc -I +ocamldoc -c \var{\nth{file}{2}}.ml\textrm{[}i\textrm{]} +... +ocamlc -I +ocamldoc -c \var{\nth{file}{n}}.ml\textrm{[}i\textrm{]} +ocamlc -o custom.cma -a \var{\nth{file}{1}}.cmo \var{\nth{file}{2}}.cmo ... \var{\nth{file}{n}}.cmo +\end{alltt} +\noindent{}Then, the following command uses "custom.cma" as custom generator: +\begin{alltt} + ocamldoc -g custom.cma \var{other-options} \var{source-files} +\end{alltt} +\noindent{}Again, it is important not to give the "-html" or any other +option selecting a built-in generator to "ocamldoc", +which would result in using this generator instead of the one you just loaded. diff --git a/manual/manual/cmds/profil.etex b/manual/manual/cmds/profil.etex new file mode 100644 index 0000000000..0e7c7c2bd3 --- /dev/null +++ b/manual/manual/cmds/profil.etex @@ -0,0 +1,170 @@ +\chapter{Profiling (ocamlprof)} \label{c:profiler} +\pdfchapter{Profiling (ocamlprof)} +%HEVEA\cutname{profil.html} + +This chapter describes how the execution of OCaml +programs can be profiled, by recording how many times functions are +called, branches of conditionals are taken, \ldots + +\section{Compiling for profiling} + +Before profiling an execution, the program must be compiled in +profiling mode, using the "ocamlcp" front-end to the "ocamlc" compiler +(see chapter~\ref{c:camlc}) or the "ocamloptp" front-end to the +"ocamlopt" compiler (see chapter~\ref{c:nativecomp}). When compiling +modules separately, "ocamlcp" or "ocamloptp" must be used when +compiling the modules (production of ".cmo" or ".cmx" files), and can +also be used (though this is not strictly necessary) when linking them +together. + +\paragraph{Note} If a module (".ml" file) doesn't have a corresponding +interface (".mli" file), then compiling it with "ocamlcp" will produce +object files (".cmi" and ".cmo") that are not compatible with the ones +produced by "ocamlc", which may lead to problems (if the ".cmi" or +".cmo" is still around) when switching between profiling and +non-profiling compilations. To avoid this problem, you should always +have a ".mli" file for each ".ml" file. The same problem exists with +"ocamloptp". + +\paragraph{Note} To make sure your programs can be compiled in +profiling mode, avoid using any identifier that begins with +"__ocaml_prof". + +The amount of profiling information can be controlled through the "-P" +option to "ocamlcp" or "ocamloptp", followed by one or several letters +indicating which parts of the program should be profiled: + +%% description des options +\begin{options} +\item["a"] all options +\item["f"] function calls : a count point is set at the beginning of +each function body +\item["i"] {\bf if \ldots then \ldots else \ldots} : count points are set in +both {\bf then} branch and {\bf else} branch +\item["l"] {\bf while, for} loops: a count point is set at the beginning of +the loop body +\item["m"] {\bf match} branches: a count point is set at the beginning of the +body of each branch +\item["t"] {\bf try \ldots with \ldots} branches: a count point is set at the +beginning of the body of each branch +\end{options} + +For instance, compiling with "ocamlcp -P film" profiles function calls, +if\ldots then\ldots else\ldots, loops and pattern matching. + +Calling "ocamlcp" or "ocamloptp" without the "-P" option defaults to +"-P fm", meaning that only function calls and pattern matching are +profiled. + +\paragraph{Note} For compatibility with previous releases, "ocamlcp" +also accepts the "-p" option, with the same arguments and behaviour as +"-P". + +The "ocamlcp" and "ocamloptp" commands also accept all the options of +the corresponding "ocamlc" or "ocamlopt" compiler, except the "-pp" +(preprocessing) option. + + +\section{Profiling an execution} + +Running an executable that has been compiled with "ocamlcp" or +"ocamloptp" records the execution counts for the specified parts of +the program and saves them in a file called "ocamlprof.dump" in the +current directory. + +If the environment variable "OCAMLPROF_DUMP" is set when the program +exits, its value is used as the file name instead of "ocamlprof.dump". + +The dump file is written only if the program terminates +normally (by calling "exit" or by falling through). It is not written +if the program terminates with an uncaught exception. + +If a compatible dump file already exists in the current directory, then the +profiling information is accumulated in this dump file. This allows, for +instance, the profiling of several executions of a program on +different inputs. Note that dump files produced by byte-code +executables (compiled with "ocamlcp") are compatible with the dump +files produced by native executables (compiled with "ocamloptp"). + +\section{Printing profiling information} + +The "ocamlprof" command produces a source listing of the program modules +where execution counts have been inserted as comments. For instance, +\begin{verbatim} + ocamlprof foo.ml +\end{verbatim} +prints the source code for the "foo" module, with comments indicating +how many times the functions in this module have been called. Naturally, +this information is accurate only if the source file has not been modified +after it was compiled. + +The following options are recognized by "ocamlprof": + +\begin{options} + +\item["-f" \var{dumpfile}] +Specifies an alternate dump file of profiling information to be read. + +\item["-F" \var{string}] +Specifies an additional string to be output with profiling information. +By default, "ocamlprof" will annotate programs with comments of the form +{\tt (* \var{n} *)} where \var{n} is the counter value for a profiling +point. With option {\tt -F \var{s}}, the annotation will be +{\tt (* \var{s}\var{n} *)}. + +\item["-impl" \var{filename}] +Process the file \var{filename} as an implementation file, even if its +extension is not ".ml". + +\item["-intf" \var{filename}] +Process the file \var{filename} as an interface file, even if its +extension is not ".mli". + +\item["-version"] +Print version string and exit. + +\item["-vnum"] +Print short version number and exit. + +\item["-help" or "--help"] +Display a short usage summary and exit. +% +\end{options} + +\section{Time profiling} + +Profiling with "ocamlprof" only records execution counts, not the actual +time spent within each function. There is currently no way to perform +time profiling on bytecode programs generated by "ocamlc". + +Native-code programs generated by "ocamlopt" can be profiled for time +and execution counts using the "-p" option and the standard Unix +profiler "gprof". Just add the "-p" option when compiling and linking +the program: +\begin{alltt} + ocamlopt -o myprog -p \var{other-options} \var{files} + ./myprog + gprof myprog +\end{alltt} +OCaml function names in the output of "gprof" have the following format: +\begin{alltt} + \var{Module-name}_\var{function-name}_\var{unique-number} +\end{alltt} +Other functions shown are either parts of the OCaml run-time system or +external C functions linked with the program. + +The output of "gprof" is described in the Unix manual page for +"gprof(1)". It generally consists of two parts: a ``flat'' profile +showing the time spent in each function and the number of invocation +of each function, and a ``hierarchical'' profile based on the call +graph. Currently, only the Intel x86 ports of "ocamlopt" under +Linux, BSD and MacOS X support the two profiles. On other platforms, +"gprof" will report only the ``flat'' profile with just time +information. When reading the output of "gprof", keep in mind that +the accumulated times computed by "gprof" are based on heuristics and +may not be exact. + +\paragraph{Note} The "ocamloptp" command also accepts the "-p" +option. In that case, both kinds of profiling are performed by the +program, and you can display the results with the "gprof" and "ocamlprof" +commands, respectively. diff --git a/manual/manual/cmds/runtime.etex b/manual/manual/cmds/runtime.etex new file mode 100644 index 0000000000..9bec38d837 --- /dev/null +++ b/manual/manual/cmds/runtime.etex @@ -0,0 +1,270 @@ +\chapter{The runtime system (ocamlrun)} \label{c:runtime} +\pdfchapter{The runtime system (ocamlrun)} +%HEVEA\cutname{runtime.html} + +The "ocamlrun" command executes bytecode files produced by the +linking phase of the "ocamlc" command. + +\section{Overview} + +The "ocamlrun" command comprises three main parts: the bytecode +interpreter, that actually executes bytecode files; the memory +allocator and garbage collector; and a set of C functions that +implement primitive operations such as input/output. + +The usage for "ocamlrun" is: +\begin{alltt} + ocamlrun \var{options} \var{bytecode-executable} \nth{arg}{1} ... \nth{arg}{n} +\end{alltt} +The first non-option argument is taken to be the name of the file +containing the executable bytecode. (That file is searched in the +executable path as well as in the current directory.) The remaining +arguments are passed to the OCaml program, in the string array +"Sys.argv". Element 0 of this array is the name of the +bytecode executable file; elements 1 to \var{n} are the remaining +arguments \nth{arg}{1} to \nth{arg}{n}. + +As mentioned in chapter~\ref{c:camlc}, the bytecode executable files +produced by the "ocamlc" command are self-executable, and manage to +launch the "ocamlrun" command on themselves automatically. That is, +assuming "a.out" is a bytecode executable file, +\begin{alltt} + a.out \nth{arg}{1} ... \nth{arg}{n} +\end{alltt} +works exactly as +\begin{alltt} + ocamlrun a.out \nth{arg}{1} ... \nth{arg}{n} +\end{alltt} +Notice that it is not possible to pass options to "ocamlrun" when +invoking "a.out" directly. + +\begin{windows} +Under several versions of Windows, bytecode executable files are +self-executable only if their name ends in ".exe". It is recommended +to always give ".exe" names to bytecode executables, e.g. compile +with "ocamlc -o myprog.exe ..." rather than "ocamlc -o myprog ...". +\end{windows} + +\section{Options} \label{ocamlrun-options} + +The following command-line options are recognized by "ocamlrun". + +\begin{options} + +\item["-b"] +When the program aborts due to an uncaught exception, print a detailed +``back trace'' of the execution, showing where the exception was +raised and which function calls were outstanding at this point. The +back trace is printed only if the bytecode executable contains +debugging information, i.e. was compiled and linked with the "-g" +option to "ocamlc" set. This is equivalent to setting the "b" flag +in the "OCAMLRUNPARAM" environment variable (see below). +\item["-I" \var{dir}] +Search the directory \var{dir} for dynamically-loaded libraries, +in addition to the standard search path (see +section~\ref{s-ocamlrun-dllpath}). +\item["-p"] +Print the names of the primitives known to this version of +"ocamlrun" and exit. +\item["-v"] +Direct the memory manager to print some progress messages on +standard error. This is equivalent to setting "v=63" in the +"OCAMLRUNPARAM" environment variable (see below). +\item["-version"] +Print version string and exit. +\item["-vnum"] +Print short version number and exit. + +\end{options} + +\noindent +The following environment variables are also consulted: + +\begin{options} +\item["CAML_LD_LIBRARY_PATH"] Additional directories to search for + dynamically-loaded libraries (see section~\ref{s-ocamlrun-dllpath}). + +\item["OCAMLLIB"] The directory containing the OCaml standard + library. (If "OCAMLLIB" is not set, "CAMLLIB" will be used instead.) + Used to locate the "ld.conf" configuration file for + dynamic loading (see section~\ref{s-ocamlrun-dllpath}). If not set, + default to the library directory specified when compiling OCaml. + +\item["OCAMLRUNPARAM"] Set the runtime system options + and garbage collection parameters. + (If "OCAMLRUNPARAM" is not set, "CAMLRUNPARAM" will be used instead.) + This variable must be a sequence of parameter specifications. + A parameter specification is an option letter followed by an "=" + sign, a decimal number (or an hexadecimal number prefixed by "0x"), + and an optional multiplier. The options are documented below; + the last six correspond to the fields of the + "control" record documented in +\ifouthtml + \ahref{libref/Gc.html}{Module \texttt{Gc}}. +\else + section~\ref{Gc}. +\fi + \begin{options} + \item[b] (backtrace) Trigger the printing of a stack backtrace + when an uncaught exception aborts the program. + This option takes no argument. + \item[p] (parser trace) Turn on debugging support for + "ocamlyacc"-generated parsers. When this option is on, + the pushdown automaton that executes the parsers prints a + trace of its actions. This option takes no argument. + \item[R] (randomize) Turn on randomization of all hash tables by default + (see +\ifouthtml + \ahref{libref/Hashtbl.html}{Module \texttt{Hashtbl}}). +\else + section~\ref{Hashtbl}). +\fi + This option takes no argument. + \item[h] The initial size of the major heap (in words). + \item[a] ("allocation_policy") The policy used for allocating in the + OCaml heap. Possible values are 0 for the next-fit policy, and 1 + for the first-fit policy. Next-fit is usually faster, but first-fit + is better for avoiding fragmentation and the associated heap + compactions. + \item[s] ("minor_heap_size") Size of the minor heap. (in words) + \item[i] ("major_heap_increment") Default size increment for the + major heap. (in words) + \item[o] ("space_overhead") The major GC speed setting. + \item[O] ("max_overhead") The heap compaction trigger setting. + \item[l] ("stack_limit") The limit (in words) of the stack size. + \item[v] ("verbose") What GC messages to print to stderr. This + is a sum of values selected from the following: + \begin{options} + \item[1 (= 0x001)] Start of major GC cycle. + \item[2 (= 0x002)] Minor collection and major GC slice. + \item[4 (= 0x004)] Growing and shrinking of the heap. + \item[8 (= 0x008)] Resizing of stacks and memory manager tables. + \item[16 (= 0x010)] Heap compaction. + \item[32 (= 0x020)] Change of GC parameters. + \item[64 (= 0x040)] Computation of major GC slice size. + \item[128 (= 0x080)] Calling of finalization functions + \item[256 (= 0x100)] Startup messages (loading the bytecode + executable file, resolving shared libraries). + \item[512 (= 0x200)] Computation of compaction-triggering condition. + \end{options} + \end{options} + The multiplier is "k", "M", or "G", for multiplication by $2^{10}$, + $2^{20}$, and $2^{30}$ respectively. + For example, on a 32-bit machine, under "bash" the command +\begin{verbatim} + export OCAMLRUNPARAM='b,s=256k,v=0x015' +\end{verbatim} + tells a subsequent "ocamlrun" to print backtraces for uncaught exceptions, + set its initial minor heap size to 1~megabyte and + print a message at the start of each major GC cycle, when the heap + size changes, and when compaction is triggered. + +\item["CAMLRUNPARAM"] If "OCAMLRUNPARAM" is not found in the + environment, then "CAMLRUNPARAM" will be used instead. If + "CAMLRUNPARAM" is not found, then the default values will be used. + +\item["PATH"] List of directories searched to find the bytecode +executable file. +\end{options} + +\section{Dynamic loading of shared libraries} \label{s-ocamlrun-dllpath} + +On platforms that support dynamic loading, "ocamlrun" can link +dynamically with C shared libraries (DLLs) providing additional C primitives +beyond those provided by the standard runtime system. The names for +these libraries are provided at link time as described in +section~\ref{dynlink-c-code}), and recorded in the bytecode executable +file; "ocamlrun", then, locates these libraries and resolves references +to their primitives when the bytecode executable program starts. + +The "ocamlrun" command searches shared libraries in the following +directories, in the order indicated: +\begin{enumerate} +\item Directories specified on the "ocamlrun" command line with the +"-I" option. +\item Directories specified in the "CAML_LD_LIBRARY_PATH" environment +variable. +\item Directories specified at link-time via the "-dllpath" option to +"ocamlc". (These directories are recorded in the bytecode executable +file.) +\item Directories specified in the file "ld.conf". This file resides +in the OCaml standard library directory, and lists directory +names (one per line) to be searched. Typically, it contains only one +line naming the "stublibs" subdirectory of the OCaml standard +library directory. Users can add there the names of other directories +containing frequently-used shared libraries; however, for consistency +of installation, we recommend that shared libraries are installed +directly in the system "stublibs" directory, rather than adding lines +to the "ld.conf" file. +\item Default directories searched by the system dynamic loader. +Under Unix, these generally include "/lib" and "/usr/lib", plus the +directories listed in the file "/etc/ld.so.conf" and the environment +variable "LD_LIBRARY_PATH". Under Windows, these include the Windows +system directories, plus the directories listed in the "PATH" +environment variable. +\end{enumerate} + +\section{Common errors} + +This section describes and explains the most frequently encountered +error messages. + +\begin{options} + +\item[{\it filename}": no such file or directory"] +If {\it filename} is the name of a self-executable bytecode file, this +means that either that file does not exist, or that it failed to run +the "ocamlrun" bytecode interpreter on itself. The second possibility +indicates that OCaml has not been properly installed on your +system. + +\item["Cannot exec ocamlrun"] +(When launching a self-executable bytecode file.) The "ocamlrun" + could not be found in the executable path. Check that OCaml + has been properly installed on your system. + +\item["Cannot find the bytecode file"] +The file that "ocamlrun" is trying to execute (e.g. the file given as +first non-option argument to "ocamlrun") either does not exist, or is +not a valid executable bytecode file. + +\item["Truncated bytecode file"] +The file that "ocamlrun" is trying to execute is not a valid executable +bytecode file. Probably it has been truncated or mangled since +created. Erase and rebuild it. + +\item["Uncaught exception"] +The program being executed contains a ``stray'' exception. That is, +it raises an exception at some point, and this exception is never +caught. This causes immediate termination of the program. The name of +the exception is printed, along with its string, byte sequence, and +integer arguments +(arguments of more complex types are not correctly printed). +To locate the context of the uncaught exception, compile the program +with the "-g" option and either run it again under the "ocamldebug" +debugger (see chapter~\ref{c:debugger}), or run it with "ocamlrun -b" +or with the "OCAMLRUNPARAM" environment variable set to "b=1". + +\item["Out of memory"] +The program being executed requires more memory than available. Either +the program builds excessively large data structures; or the program +contains too many nested function calls, and the stack overflows. In +some cases, your program is perfectly correct, it just requires more +memory than your machine provides. In other cases, the ``out of +memory'' message reveals an error in your program: non-terminating +recursive function, allocation of an excessively large array, +string or byte sequence, attempts to build an infinite list or other +data structure, \ldots + +To help you diagnose this error, run your program with the "-v" option +to "ocamlrun", or with the "OCAMLRUNPARAM" environment variable set to +"v=63". If it displays lots of ``"Growing stack"\ldots'' +messages, this is probably a looping recursive function. If it +displays lots of ``"Growing heap"\ldots'' messages, with the heap size +growing slowly, this is probably an attempt to construct a data +structure with too many (infinitely many?) cells. If it displays few +``"Growing heap"\ldots'' messages, but with a huge increment in the +heap size, this is probably an attempt to build an excessively large +array, string or byte sequence. + +\end{options} diff --git a/manual/manual/cmds/top.etex b/manual/manual/cmds/top.etex new file mode 100644 index 0000000000..5e6681b853 --- /dev/null +++ b/manual/manual/cmds/top.etex @@ -0,0 +1,528 @@ +\chapter{The toplevel system (ocaml)} \label{c:camllight} +\pdfchapter{The toplevel system (ocaml)} +%HEVEA\cutname{toplevel.html} + +This chapter describes the toplevel system for OCaml, that permits +interactive use of the OCaml system +through a read-eval-print loop. In this mode, the system repeatedly +reads OCaml phrases from the input, then typechecks, compile and +evaluate them, then prints the inferred type and result value, if +any. The system prints a "#" (sharp) prompt before reading each +phrase. + +Input to the toplevel can span several lines. It is terminated by @";;"@ (a +double-semicolon). The toplevel input consists in one or several +toplevel phrases, with the following syntax: + +\begin{syntax} +toplevel-input: + {{ definition }} ';;' + | expr ';;' + | '#' ident [ directive-argument ] ';;' +; +directive-argument: + string-literal + | integer-literal + | value-path + | 'true' || 'false' +\end{syntax} + +A phrase can consist of a definition, like those found in +implementations of compilation units or in @'struct' \ldots 'end'@ +module expressions. The definition can bind value names, type names, +an exception, a module name, or a module type name. The toplevel +system performs the bindings, then prints the types and values (if +any) for the names thus defined. + +A phrase may also consist in a value expression +(section~\ref{s:value-expr}). It is simply evaluated +without performing any bindings, and its value is +printed. + +Finally, a phrase can also consist in a toplevel directive, +starting with @"#"@ (the sharp sign). These directives control the +behavior of the toplevel; they are listed below in +section~\ref{s:toplevel-directives}. + +\begin{unix} +The toplevel system is started by the command "ocaml", as follows: +\begin{alltt} + ocaml \var{options} \var{objects} # interactive mode + ocaml \var{options} \var{objects} \var{scriptfile} # script mode +\end{alltt} +\var{options} are described below. +\var{objects} are filenames ending in ".cmo" or ".cma"; they are +loaded into the interpreter immediately after \var{options} are set. +\var{scriptfile} is any file name not ending in ".cmo" or ".cma". + +If no \var{scriptfile} is given on the command line, the toplevel system +enters interactive mode: phrases are read on standard input, results +are printed on standard output, errors on standard error. End-of-file +on standard input terminates "ocaml" (see also the "#quit" directive +in section~\ref{s:toplevel-directives}). + +On start-up (before the first phrase is read), if the file +".ocamlinit" exists in the current directory, +its contents are read as a sequence of OCaml phrases +and executed as per the "#use" directive +described in section~\ref{s:toplevel-directives}. +The evaluation outcode for each phrase are not displayed. +If the current directory does not contain an ".ocamlinit" file, but +the user's home directory (environment variable "HOME") does, the +latter is read and executed as described below. + +The toplevel system does not perform line editing, but it can +easily be used in conjunction with an external line editor such as +"ledit", "ocaml2" or "rlwrap" +\begin{latexonly} +(see the Caml Hump "http://caml.inria.fr/humps/index_framed_caml.html"). +\end{latexonly} +\begin{htmlonly} +(see the +\ahref{http://caml.inria.fr/humps/index\_framed\_caml.html}{Caml Hump}). +\end{htmlonly} +Another option is to use "ocaml" under Gnu Emacs, which gives the +full editing power of Emacs (command "run-caml" from library "inf-caml"). + +At any point, the parsing, compilation or evaluation of the current +phrase can be interrupted by pressing "ctrl-C" (or, more precisely, +by sending the "INTR" signal to the "ocaml" process). The toplevel +then immediately returns to the "#" prompt. + +If \var{scriptfile} is given on the command-line to "ocaml", the toplevel +system enters script mode: the contents of the file are read as a +sequence of OCaml phrases and executed, as per the "#use" +directive (section~\ref{s:toplevel-directives}). The outcome of the +evaluation is not printed. On reaching the end of file, the "ocaml" +command exits immediately. No commands are read from standard input. +"Sys.argv" is transformed, ignoring all OCaml parameters, and +starting with the script file name in "Sys.argv.(0)". + +In script mode, the first line of the script is ignored if it starts +with "#!". Thus, it should be possible to make the script +itself executable and put as first line "#!/usr/local/bin/ocaml", +thus calling the toplevel system automatically when the script is +run. However, "ocaml" itself is a "#!" script on most installations +of OCaml, and Unix kernels usually do not handle nested "#!" +scripts. A better solution is to put the following as the first line +of the script: +\begin{verbatim} + #!/usr/local/bin/ocamlrun /usr/local/bin/ocaml +\end{verbatim} + +\end{unix} + +\begin{windows} +In addition to the text-only command "ocaml.exe", which works exactly +as under Unix (see above), a graphical user interface for the +toplevel is available under the name "ocamlwin.exe". It should be +launched from the Windows file manager or program manager. +This interface provides a text window in which commands can be entered +and edited, and the toplevel responses are printed. +\end{windows} + +\section{Options} \label{s:toplevel-options} + +The following command-line options are recognized by the "ocaml" command. + +\begin{options} + +\item["-absname"] +Force error messages to show absolute paths for file names. + +\item["-I" \var{directory}] +Add the given directory to the list of directories searched for +source and compiled files. By default, the current directory is +searched first, then the standard library directory. Directories added +with "-I" are searched after the current directory, in the order in +which they were given on the command line, but before the standard +library directory. + +If the given directory starts with "+", it is taken relative to the +standard library directory. For instance, "-I +labltk" adds the +subdirectory "labltk" of the standard library to the search path. + +Directories can also be added to the list once +the toplevel is running with the "#directory" directive +(section~\ref{s:toplevel-directives}). + +\item["-init" \var{file}] +Load the given file instead of the default initialization file. +The default file is ".ocamlinit" in the current directory if it +exists, otherwise ".ocamlinit" in the user's home directory. + +\item["-labels"] +Labels are not ignored in types, labels may be used in applications, +and labelled parameters can be given in any order. This is the default. + +\item["-no-app-funct"] +Deactivates the applicative behaviour of functors. With this option, +each functor application generates new types in its result and +applying the same functor twice to the same argument yields two +incompatible structures. + +\item["-noassert"] +Do not compile assertion checks. Note that the special form +"assert false" is always compiled because it is typed specially. + +\item["-nolabels"] +Ignore non-optional labels in types. Labels cannot be used in +applications, and parameter order becomes strict. + +\item["-noprompt"] +Do not display any prompt when waiting for input. + +\item["-nopromptcont"] +Do not display the secondary prompt when waiting for continuation +lines in multi-line inputs. This should be used e.g. when running +"ocaml" in an "emacs" window. + +\item["-nostdlib"] +Do not include the standard library directory in the list of +directories searched for source and compiled files. + +\item["-ppx" \var{command}] +After parsing, pipe the abstract syntax tree through the preprocessor +\var{command}. The module "Ast_mapper", described in +chapter~\ref{Ast-underscoremapper}, implements the external interface +of a preprocessor. + +\item["-principal"] +Check information paths during type-checking, to make sure that all +types are derived in a principal way. When using labelled arguments +and/or polymorphic methods, this flag is required to ensure future +versions of the compiler will be able to infer types correctly, even +if internal algorithms change. +All programs accepted in "-principal" mode are also accepted in the +default mode with equivalent types, but different binary signatures, +and this may slow down type checking; yet it is a good idea to +use it once before publishing source code. + +\item["-rectypes"] +Allow arbitrary recursive types during type-checking. By default, +only recursive types where the recursion goes through an object type +are supported. + +\item["-safe-string"] +Enforce the separation between types "string" and "bytes", +thereby making strings read-only. This will become the default in +a future version of OCaml. + +\item["-short-paths"] +When a type is visible under several module-paths, use the shortest +one when printing the type's name in inferred interfaces and error and +warning messages. + +\item["-stdin"] +Read the standard input as a script file rather than starting an +interactive session. + +\item["-strict-sequence"] +Force the left-hand part of each sequence to have type unit. + +\item["-strict-formats"] +Reject invalid formats that were accepted in legacy format +implementations. You should use this flag to detect and fix such +invalid formats, as they will be rejected by future OCaml versions. + +\item["-unsafe"] +See the corresponding option for "ocamlc", chapter~\ref{c:camlc}. +Turn bound checking off on array and string accesses (the "v.(i)" and +"s.[i]" constructs). Programs compiled with "-unsafe" are therefore +slightly faster, but unsafe: anything can happen if the program +accesses an array or string outside of its bounds. + +\item["-unsafe-string"] +Identify the types "string" and "bytes", +thereby making strings writable. For reasons of backward compatibility, +this is the default setting for the moment, but this will change in a future +version of OCaml. + +\item["-version"] +Print version string and exit. + +\item["-vnum"] +Print short version number and exit. + +\item["-w" \var{warning-list}] +Enable or disable warnings according to the argument \var{warning-list}. +See section~\ref{s:comp-options} for the syntax of the argument. + +\item["-warn-error" \var{warning-list}] +Mark as fatal the warnings enabled by the argument \var{warning-list}. +See section~\ref{s:comp-options} for the syntax of the argument. + +\item["-warn-help"] +Show the description of all available warning numbers. + +\item["-" \var{file}] +Use \var{file} as a script file name, even when it starts with a +hyphen (-). + +\item["-help" or "--help"] +Display a short usage summary and exit. +% +\end{options} + +\begin{unix} +The following environment variables are also consulted: +\begin{options} +\item["LC_CTYPE"] If set to "iso_8859_1", accented characters (from the +ISO Latin-1 character set) in string and character literals are +printed as is; otherwise, they are printed as decimal escape sequences +("\\"\var{ddd}). + +\item["TERM"] When printing error messages, the toplevel system +attempts to underline visually the location of the error. It +consults the "TERM" variable to determines the type of output terminal +and look up its capabilities in the terminal database. + +\item["HOME"] Directory where the ".ocamlinit" file is searched. +\end{options} +\end{unix} + +\section{Toplevel directives} +\label{s:toplevel-directives} + +The following directives control the toplevel behavior, load files in +memory, and trace program execution. + +{\bf Note:} all directives start with a "#" (sharp) symbol. This "#" +must be typed before the directive, and must not be confused with the +"#" prompt displayed by the interactive loop. For instance, +typing "#quit;;" will exit the toplevel loop, but typing "quit;;" +will result in an ``unbound value "quit"'' error. + +\begin{options} + +\item["#quit;;"] +Exit the toplevel loop and terminate the "ocaml" command. + +\item["#labels "\var{bool}";;"] +Ignore labels in function types if argument is "false", or switch back +to default behaviour (commuting style) if argument is "true". + +\item["#principal "\var{bool}";;"] +If the argument is "true", check information paths during +type-checking, to make sure that all types are derived in a principal +way. If the argument is "false", do not check information paths. + +\item["#rectypes;;"] +Allow arbitrary recursive types during type-checking. Note: once +enabled, this option cannot be disabled because that would lead to +unsoundness of the type system. + +\item["#warnings \""\var{warning-list}"\";;"] +Enable or disable warnings according to the argument. + +\item["#warn_error \""\var{warning-list}"\";;"] +Treat as errors the warnings enabled by the argument and as normal +warnings the warnings disabled by the argument. + +\item["#directory \""\var{dir-name}"\";;"] +Add the given directory to the list of directories searched for +source and compiled files. + +\item["#remove_directory \""\var{dir-name}"\";;"] +Remove the given directory from the list of directories searched for +source and compiled files. Do nothing if the list does not contain +the given directory. + +\item["#cd \""\var{dir-name}"\";;"] +Change the current working directory. + +\item["#load \""\var{file-name}"\";;"] +Load in memory a bytecode object file (".cmo" file) or library file +(".cma" file) produced by the batch compiler "ocamlc". + +\item["#load_rec \""\var{file-name}"\";;"] +Load in memory a bytecode object file (".cmo" file) or library file +(".cma" file) produced by the batch compiler "ocamlc". +When loading an object file that depends on other modules +which have not been loaded yet, the .cmo files for these modules +are searched and loaded as well, recursively. The loading order +is not specified. + +\item["#use \""\var{file-name}"\";;"] +Read, compile and execute source phrases from the given file. +This is textual inclusion: phrases are processed just as if +they were typed on standard input. The reading of the file stops at +the first error encountered. + +\item["#mod_use \""\var{file-name}"\";;"] +Similar to "#use" but also wrap the code into a top-level module of the +same name as capitalized file name without extensions, following +semantics of the compiler. + +\item["#install_printer "\var{printer-name}";;"] +This directive registers the function named \var{printer-name} (a +value path) as a printer for values whose types match the argument +type of the function. That is, the toplevel loop will call +\var{printer-name} when it has such a value to print. + +The printing function \var{printer-name} should have type +@"Format.formatter" "->" @t@ "->" "unit"@, where @@t@@ is the +type for the values to be printed, and should output its textual +representation for the value of type @@t@@ on the given formatter, +using the functions provided by the "Format" library. For backward +compatibility, \var{printer-name} can also have type +@@t@ "->" "unit"@ and should then output on the standard +formatter, but this usage is deprecated. + +\item["#remove_printer "\var{printer-name}";;"] +Remove the named function from the table of toplevel printers. + +\item["#trace "\var{function-name}";;"] +After executing this directive, all calls to the function named +\var{function-name} will be ``traced''. That is, the argument and the +result are displayed for each call, as well as the exceptions escaping +out of the function, raised either by the function itself or by +another function it calls. If the function is curried, each argument +is printed as it is passed to the function. + +\item["#untrace "\var{function-name}";;"] +Stop tracing the given function. + +\item["#untrace_all;;"] +Stop tracing all functions traced so far. + +\item["#print_depth "\var{n}";;"] +Limit the printing of values to a maximal depth of \var{n}. +The parts of values whose depth exceeds \var{n} are printed as "..." +(ellipsis). + +\item["#print_length "\var{n}";;"] +Limit the number of value nodes printed to at most \var{n}. +Remaining parts of values are printed as "..." (ellipsis). + +\item["#show_val "\var{value-path}";;"]\vspace{-4.7ex} +\item["#show_type "\var{typeconstr}";;"]\vspace{-4.7ex} +\item["#show_module "\var{module-path}";;"]\vspace{-4.7ex} +\item["#show_module_type "\var{modtype-path}";;"]\vspace{-4.7ex} +\item["#show_class "\var{class-path}";;"]\vspace{-4.7ex} +\item["#show_class_type "\var{class-path}";;"] +Print the signature of the corresponding component. + +\item["#show "\var{ident}";;"] +Print the signatures of components with name \var{ident} in all the +above categories. + +\end{options} + +\section{The toplevel and the module system} \label{s:toplevel-modules} + +Toplevel phrases can refer to identifiers defined in compilation units +with the same mechanisms as for separately compiled units: either by +using qualified names ("Modulename.localname"), or by using +the "open" construct and unqualified names (see section~\ref{s:names}). + +However, before referencing another compilation unit, an +implementation of that unit must be present in memory. +At start-up, the toplevel system contains implementations for all the +modules in the the standard library. Implementations for user modules +can be entered with the "#load" directive described above. Referencing +a unit for which no implementation has been provided +results in the error "Reference to undefined global `...'". + +Note that entering "open "\var{Mod} merely accesses the compiled +interface (".cmi" file) for \var{Mod}, but does not load the +implementation of \var{Mod}, and does not cause any error if no +implementation of \var{Mod} has been loaded. The error +``reference to undefined global \var{Mod}'' will occur only when +executing a value or module definition that refers to \var{Mod}. + +\section{Common errors} + +This section describes and explains the most frequently encountered +error messages. + +\begin{options} + +\item[Cannot find file \var{filename}] +The named file could not be found in the current directory, nor in the +directories of the search path. + +If \var{filename} has the format \var{mod}".cmi", this +means you have referenced the compilation unit \var{mod}, but its +compiled interface could not be found. Fix: compile \var{mod}".mli" or +\var{mod}".ml" first, to create the compiled interface \var{mod}".cmi". + +If \var{filename} has the format \var{mod}".cmo", this +means you are trying to load with "#load" a bytecode object file that +does not exist yet. Fix: compile \var{mod}".ml" first. + +If your program spans several directories, this error can also appear +because you haven't specified the directories to look into. Fix: use +the "#directory" directive to add the correct directories to the +search path. + +\item[This expression has type \nth{t}{1}, but is used with type \nth{t}{2}] +See section~\ref{s:comp-errors}. + +\item[Reference to undefined global \var{mod}] +You have neglected to load in memory an implementation for a module +with "#load". See section~\ref{s:toplevel-modules} above. + +\end{options} + +\section{Building custom toplevel systems: \texttt{ocamlmktop}} + +The "ocamlmktop" command builds OCaml toplevels that +contain user code preloaded at start-up. + +The "ocamlmktop" command takes as argument a set of ".cmo" and ".cma" +files, and links them with the object files that implement the OCaml toplevel. +The typical use is: +\begin{verbatim} + ocamlmktop -o mytoplevel foo.cmo bar.cmo gee.cmo +\end{verbatim} +This creates the bytecode file "mytoplevel", containing the OCaml toplevel +system, plus the code from the three ".cmo" +files. This toplevel is directly executable and is started by: +\begin{verbatim} + ./mytoplevel +\end{verbatim} +This enters a regular toplevel loop, except that the code from +"foo.cmo", "bar.cmo" and "gee.cmo" is already loaded in memory, just as +if you had typed: +\begin{verbatim} + #load "foo.cmo";; + #load "bar.cmo";; + #load "gee.cmo";; +\end{verbatim} +on entrance to the toplevel. The modules "Foo", "Bar" and "Gee" are +not opened, though; you still have to do +\begin{verbatim} + open Foo;; +\end{verbatim} +yourself, if this is what you wish. + +\section{Options} + +The following command-line options are recognized by "ocamlmktop". + +\begin{options} + +\item["-cclib" \var{libname}] +Pass the "-l"\var{libname} option to the C linker when linking in +``custom runtime'' mode. See the corresponding option for +"ocamlc", in chapter~\ref{c:camlc}. + +\item["-ccopt" \var{option}] +Pass the given option to the C compiler and linker, when linking in +``custom runtime'' mode. See the corresponding option for +"ocamlc", in chapter~\ref{c:camlc}. + +\item["-custom"] +Link in ``custom runtime'' mode. See the corresponding option for +"ocamlc", in chapter~\ref{c:camlc}. + +\item["-I" \var{directory}] +Add the given directory to the list of directories searched for +compiled object code files (".cmo" and ".cma"). + +\item["-o" \var{exec-file}] +Specify the name of the toplevel file produced by the linker. +The default is "a.out". + +\end{options} diff --git a/manual/manual/foreword.etex b/manual/manual/foreword.etex new file mode 100644 index 0000000000..2529a86cb8 --- /dev/null +++ b/manual/manual/foreword.etex @@ -0,0 +1,81 @@ +\chapter*{Foreword} +\markboth{Foreword}{} +%HEVEA\cutname{foreword.html} + +This manual documents the release \ocamlversion\ of the OCaml +system. It is organized as follows. +\begin{itemize} +\item Part~\ref{p:tutorials}, ``An introduction to OCaml'', +gives an overview of the language. +\item Part~\ref{p:refman}, ``The OCaml language'', is the +reference description of the language. +\item Part~\ref{p:commands}, ``The OCaml tools'', documents +the compilers, toplevel system, and programming utilities. +\item Part~\ref{p:library}, ``The OCaml library'', describes the +modules provided in the standard library. +\begin{latexonly} +\item Part~\ref{p:appendix}, ``Appendix'', contains an +index of all identifiers defined in the standard library, and an +index of keywords. +\end{latexonly} +\end{itemize} + +\section*{Conventions} + +OCaml runs on several operating systems. The parts of +this manual that are specific to one operating system are presented as +shown below: + +\begin{unix} This is material specific to the Unix family of operating +systems, including Linux and \hbox{MacOS~X}. +\end{unix} + +\begin{windows} This is material specific to Microsoft Windows (2000, + XP, Vista, Seven). +\end{windows} + +\section*{License} + +The OCaml system is copyright \copyright\ 1996--\number\year\ +Institut National de Recherche en Informatique et en +Automatique (INRIA). +INRIA holds all ownership rights to the OCaml system. + +The OCaml system is open source and can be freely +redistributed. See the file "LICENSE" in the distribution for +licensing information. + +The present documentation is copyright \copyright\ \number\year\ +Institut National de Recherche en Informatique et en +Automatique (INRIA). The OCaml documentation and user's +manual may be reproduced and distributed in whole or +in part, subject to the following conditions: +\begin{itemize} +\item The copyright notice above and this permission notice must be +preserved complete on all complete or partial copies. +\item Any translation or derivative work of the OCaml +documentation and user's manual must be approved by the authors in +writing before distribution. +\item If you distribute the OCaml +documentation and user's manual in part, instructions for obtaining +the complete version of this manual must be included, and a +means for obtaining a complete version provided. +\item Small portions may be reproduced as illustrations for reviews or +quotes in other works without this permission notice if proper +citation is given. +\end{itemize} + +\section*{Availability} + +\begin{latexonly} +The complete OCaml distribution can be accessed via the Web +site \url{http://caml.inria.fr/}. This Web site contains a lot of +additional information on OCaml. +\end{latexonly} + +\begin{htmlonly} +The complete OCaml distribution can be accessed via the +\href{http://caml.inria.fr/}{Caml Web site}. +The \href{http://caml.inria.fr/}{Caml Web site} +contains a lot of additional information on OCaml. +\end{htmlonly} diff --git a/manual/manual/htmlman/.cvsignore b/manual/manual/htmlman/.cvsignore new file mode 100644 index 0000000000..3cecdc2c40 --- /dev/null +++ b/manual/manual/htmlman/.cvsignore @@ -0,0 +1,8 @@ +*.html +*.haux +*.hind +libref +manual.hmanual +manual.hmanual.kwd +manual.css +*.htoc diff --git a/manual/manual/htmlman/.gitignore b/manual/manual/htmlman/.gitignore new file mode 100644 index 0000000000..3cecdc2c40 --- /dev/null +++ b/manual/manual/htmlman/.gitignore @@ -0,0 +1,8 @@ +*.html +*.haux +*.hind +libref +manual.hmanual +manual.hmanual.kwd +manual.css +*.htoc diff --git a/manual/manual/htmlman/contents_motif.gif b/manual/manual/htmlman/contents_motif.gif Binary files differnew file mode 100644 index 0000000000..5d3d016702 --- /dev/null +++ b/manual/manual/htmlman/contents_motif.gif diff --git a/manual/manual/htmlman/libgraph.gif b/manual/manual/htmlman/libgraph.gif Binary files differnew file mode 100644 index 0000000000..b385985b13 --- /dev/null +++ b/manual/manual/htmlman/libgraph.gif diff --git a/manual/manual/htmlman/next_motif.gif b/manual/manual/htmlman/next_motif.gif Binary files differnew file mode 100644 index 0000000000..3f84bacfb2 --- /dev/null +++ b/manual/manual/htmlman/next_motif.gif diff --git a/manual/manual/htmlman/previous_motif.gif b/manual/manual/htmlman/previous_motif.gif Binary files differnew file mode 100644 index 0000000000..8c8a3e6430 --- /dev/null +++ b/manual/manual/htmlman/previous_motif.gif diff --git a/manual/manual/index.tex b/manual/manual/index.tex new file mode 100644 index 0000000000..aff78b9fb4 --- /dev/null +++ b/manual/manual/index.tex @@ -0,0 +1,20 @@ +\ifouthtml +\begin{rawhtml} +<ul> +<li><a HREF=libref/index_modules.html>Index of modules</a></li> +<li><a HREF=libref/index_module_types.html>Index of module types</a></li> +<li><a HREF=libref/index_types.html>Index of types</a></li> +<li><a HREF=libref/index_exceptions.html>Index of exceptions</a></li> +<li><a HREF=libref/index_values.html>Index of values</a></li> +</ul> +\end{rawhtml} +\else +\chapter*{Index to the library} +\markright{Index to the library} +\addcontentsline{toc}{chapter}{Index to the library} +\myprintindex{\jobname.ind} +\fi +\chapter*{Index of keywords} +\markright{Index of keywords} +\addcontentsline{toc}{chapter}{Index of keywords} +\myprintindex{\jobname.kwd.ind} diff --git a/manual/manual/infoman/.cvsignore b/manual/manual/infoman/.cvsignore new file mode 100644 index 0000000000..35a4b3a96e --- /dev/null +++ b/manual/manual/infoman/.cvsignore @@ -0,0 +1,4 @@ +*.haux +*.hind +*.info*.gz +ocaml.hocaml.kwd diff --git a/manual/manual/infoman/.gitignore b/manual/manual/infoman/.gitignore new file mode 100644 index 0000000000..916af019f2 --- /dev/null +++ b/manual/manual/infoman/.gitignore @@ -0,0 +1,5 @@ +*.haux +*.hind +*.info*.gz +*.info.body* +ocaml.hocaml.kwd diff --git a/manual/manual/labltk.tex b/manual/manual/labltk.tex new file mode 100644 index 0000000000..944acb3daf --- /dev/null +++ b/manual/manual/labltk.tex @@ -0,0 +1,42 @@ +\documentclass[11pt]{book} +\usepackage[latin1]{inputenc} +%HEVEA\@def@charset{US-ASCII}% +\usepackage{alltt} +\usepackage{fullpage} +\usepackage{syntaxdef} +\usepackage{multind} +\usepackage{html} +\usepackage{textcomp} +\usepackage{caml-sl} +\usepackage{ocamldoc} +\usepackage{xspace} +\newif\ifplaintext +\plaintextfalse +%\newif\ifpdf +%\pdffalse +\input{macros.tex} + +\usepackage{hyperref} +%\makeatletter \def\@wrindex#1#2{\xdef \@indexfile{\csname #1@idxfile\endcsname}\@@wrindex#2||\\}\makeatother +\def\th{^{\hbox{\scriptsize th}}} + +\raggedbottom +\input{version.tex} + +\begin{document} +\thispagestyle{empty} +\begin{center} +~\vfill +\Huge The LablTk library + release 8.06.0 \\ + and \\ + The OCamlBrowser library explorer \\[1cm] +\large Jacques Garrigue, Jun Furuse \\ + \today \\ +\vfill +\end{center} +\setcounter{page}{1} + +\input{library/liblabltk.tex} +\input{cmds/browser.tex} +\end{document} diff --git a/manual/manual/library/.cvsignore b/manual/manual/library/.cvsignore new file mode 100644 index 0000000000..8955ee047a --- /dev/null +++ b/manual/manual/library/.cvsignore @@ -0,0 +1,5 @@ +*.tex +*.htex +arithstatus.mli +ocamldoc.out +ocamldoc.sty diff --git a/manual/manual/library/.gitignore b/manual/manual/library/.gitignore new file mode 100644 index 0000000000..8955ee047a --- /dev/null +++ b/manual/manual/library/.gitignore @@ -0,0 +1,5 @@ +*.tex +*.htex +arithstatus.mli +ocamldoc.out +ocamldoc.sty diff --git a/manual/manual/library/Makefile b/manual/manual/library/Makefile new file mode 100644 index 0000000000..de5b7ca753 --- /dev/null +++ b/manual/manual/library/Makefile @@ -0,0 +1,103 @@ +CORE_INTF=Pervasives.tex + +STDLIB_INTF=Arg.tex Array.tex ArrayLabels.tex Char.tex Complex.tex \ + Digest.tex Filename.tex Format.tex \ + Gc.tex Genlex.tex Hashtbl.tex Int32.tex Int64.tex \ + Lazy.tex Lexing.tex List.tex ListLabels.tex Map.tex Marshal.tex \ + MoreLabels.tex Nativeint.tex Obj.tex Oo.tex \ + Parsing.tex Printexc.tex Printf.tex Queue.tex Random.tex Scanf.tex \ + Set.tex Sort.tex Stack.tex Stream.tex String.tex StringLabels.tex Sys.tex \ + Weak.tex Callback.tex Buffer.tex StdLabels.tex \ + Bytes.tex BytesLabels.tex + +COMPILER_LIBS_INTF=Asthelper.tex Astmapper.tex Asttypes.tex \ + Lexer.tex Location.tex Longident.tex Parse.tex Pprintast.tex Printast.tex + +OTHERLIB_INTF=Unix.tex UnixLabels.tex Str.tex \ + Num.tex Arithstatus.tex Bigint.tex \ + Graphics.tex GraphicsX11.tex \ + Thread.tex Mutex.tex Condition.tex Event.tex ThreadUnix.tex \ + Dynlink.tex Bigarray.tex + + +INTF=$(CORE_INTF) $(STDLIB_INTF) $(COMPILER_LIBS_INTF) $(OTHERLIB_INTF) + +MLIS=$(CSLDIR)/stdlib/*.mli \ + $(CSLDIR)/utils/*.mli \ + $(CSLDIR)/parsing/*.mli \ + $(CSLDIR)/otherlibs/bigarray/bigarray.mli \ + $(CSLDIR)/otherlibs/dynlink/dynlink.mli \ + $(CSLDIR)/otherlibs/graph/graphics.mli \ + $(CSLDIR)/otherlibs/graph/graphicsX11.mli \ + $(CSLDIR)/otherlibs/num/num.mli \ + $(CSLDIR)/otherlibs/num/arith_status.mli \ + $(CSLDIR)/otherlibs/num/big_int.mli \ + $(CSLDIR)/otherlibs/str/*.mli \ + $(CSLDIR)/otherlibs/systhreads/*.mli \ + $(CSLDIR)/otherlibs/unix/*.mli + +BLURB=core.tex builtin.tex stdlib.tex compilerlibs.tex \ + libunix.tex libstr.tex libnum.tex libgraph.tex \ + libthreads.tex libdynlink.tex libbigarray.tex + +FILES=$(BLURB) $(INTF) + +FORMAT=../../tools/format-intf +TEXQUOTE=../../tools/texquote2 + +CSLDIR=$(RELEASEDIR) + +VPATH=.:$(CSLDIR)/stdlib:$(CSLDIR)/parsing:$(CSLDIR)/otherlibs/unix:$(CSLDIR)/otherlibs/str:$(CSLDIR)/otherlibs/num:$(CSLDIR)/otherlibs/graph:$(CSLDIR)/otherlibs/threads:$(CSLDIR)/otherlibs/dynlink:$(CSLDIR)/otherlibs/bigarray + +all: libs + +libs: $(FILES) + +$(INTF): $(MLIS) + $(CSLDIR)/byterun/ocamlrun $(CSLDIR)/ocamldoc/ocamldoc -latex \ + -I $(CSLDIR)/utils \ + -I $(CSLDIR)/stdlib \ + -I $(CSLDIR)/parsing \ + -I $(CSLDIR)/otherlibs/bigarray \ + -I $(CSLDIR)/otherlibs/dynlink \ + -I $(CSLDIR)/otherlibs/graph \ + -I $(CSLDIR)/otherlibs/num \ + -I $(CSLDIR)/otherlibs/str \ + -I $(CSLDIR)/otherlibs/systhreads \ + -I $(CSLDIR)/otherlibs/unix \ + $(MLIS) \ + -sepfiles \ + -latextitle "6,subsection*" \ + -latextitle "7,subsubsection*" \ + -latex-type-prefix "TYP" \ + -latex-module-prefix "" \ + -latex-module-type-prefix "" \ + -latex-value-prefix "" + mv -f Arith_status.tex Arithstatus.tex + mv -f Big_int.tex Bigint.tex + mv -f Ast_helper.tex Asthelper.tex + mv -f Ast_mapper.tex Astmapper.tex + +Tk.tex: tk.mli + $(CSLDIR)/byterun/ocamlrun $(CSLDIR)/ocamldoc/ocamldoc -latex \ + -I +labltk tk.mli \ + -sepfiles \ + -latextitle "6,subsection*" \ + -latextitle "7,subsubsection*" \ + -latex-type-prefix "TYP" \ + -latex-module-prefix "" \ + -latex-module-type-prefix "" \ + -latex-value-prefix "" + +clean: + rm -f $(FILES) + + +.SUFFIXES: +.SUFFIXES: .tex .etex .mli + +.etex.tex: $(TEXQUOTE) + $(TEXQUOTE) < $*.etex > $*.tex + +.mli.tex: $(FORMAT) + $(FORMAT) $< > $*.tex < $< diff --git a/manual/manual/library/builtin.etex b/manual/manual/library/builtin.etex new file mode 100644 index 0000000000..404f5608fb --- /dev/null +++ b/manual/manual/library/builtin.etex @@ -0,0 +1,281 @@ +\section{Built-in types and predefined exceptions} + +The following built-in types and predefined exceptions are always +defined in the +compilation environment, but are not part of any module. As a +consequence, they can only be referred by their short names. + +%\vspace{0.1cm} +\subsection*{Built-in types} +%\vspace{0.1cm} + +\begin{ocamldoccode} + type int +\end{ocamldoccode} +\index{int@\verb`int`} +\begin{ocamldocdescription} + The type of integer numbers. +\end{ocamldocdescription} + +\begin{ocamldoccode} + type char +\end{ocamldoccode} +\index{char@\verb`char`} +\begin{ocamldocdescription} + The type of characters. +\end{ocamldocdescription} + +\begin{ocamldoccode} + type bytes +\end{ocamldoccode} +\index{bytes@\verb`bytes`} +\begin{ocamldocdescription} + The type of (writable) byte sequences. +\end{ocamldocdescription} + +\begin{ocamldoccode} + type string +\end{ocamldoccode} +\index{string@\verb`string`} +\begin{ocamldocdescription} + The type of (read-only) character strings. +\end{ocamldocdescription} + +\begin{ocamldoccode} + type float +\end{ocamldoccode} +\index{float@\verb`float`} +\begin{ocamldocdescription} + The type of floating-point numbers. +\end{ocamldocdescription} + +\begin{ocamldoccode} + type bool = false | true +\end{ocamldoccode} +\index{bool@\verb`bool`} +\begin{ocamldocdescription} + The type of booleans (truth values). +\end{ocamldocdescription} + +\begin{ocamldoccode} + type unit = () +\end{ocamldoccode} +\index{unit@\verb`unit`} +\begin{ocamldocdescription} + The type of the unit value. +\end{ocamldocdescription} + +\begin{ocamldoccode} + type exn +\end{ocamldoccode} +\index{exn@\verb`exn`} +\begin{ocamldocdescription} + The type of exception values. +\end{ocamldocdescription} + +\begin{ocamldoccode} + type 'a array +\end{ocamldoccode} +\index{array@\verb`array`} +\begin{ocamldocdescription} + The type of arrays whose elements have type "'a". +\end{ocamldocdescription} + +\begin{ocamldoccode} + type 'a list = [] | :: of 'a * 'a list +\end{ocamldoccode} +\index{list@\verb`list`} +\begin{ocamldocdescription} + The type of lists whose elements have type "'a". +\end{ocamldocdescription} + +\begin{ocamldoccode} +type 'a option = None | Some of 'a +\end{ocamldoccode} +\index{option@\verb`option`} +\begin{ocamldocdescription} + The type of optional values of type "'a". +\end{ocamldocdescription} + +\begin{ocamldoccode} +type int32 +\end{ocamldoccode} +\index{int32@\verb`int32`} +\begin{ocamldocdescription} + The type of signed 32-bit integers. + See the "Int32"[\moduleref{Int32}] module. +\end{ocamldocdescription} + +\begin{ocamldoccode} +type int64 +\end{ocamldoccode} +\index{int64@\verb`int64`} +\begin{ocamldocdescription} + The type of signed 64-bit integers. + See the "Int64"[\moduleref{Int64}] module. +\end{ocamldocdescription} + +\begin{ocamldoccode} +type nativeint +\end{ocamldoccode} +\index{nativeint@\verb`nativeint`} +\begin{ocamldocdescription} + The type of signed, platform-native integers (32 bits on 32-bit + processors, 64 bits on 64-bit processors). + See the "Nativeint"[\moduleref{Nativeint}] module. +\end{ocamldocdescription} + +\begin{ocamldoccode} +type ('a, 'b, 'c, 'd, 'e, 'f) format6 +\end{ocamldoccode} +\index{format4@\verb`format4`} +\begin{ocamldocdescription} + The type of format strings. "'a" is the type of the parameters of + the format, "'f" is the result type for the "printf"-style + functions, "'b" is the type of the first argument given to "%a" and + "%t" printing functions (see module "Printf"[\moduleref{Printf}]), + "'c" is the result type of these functions, and also the type of the + argument transmitted to the first argument of "kprintf"-style + functions, "'d" is the result type for the "scanf"-style functions + (see module "Scanf"[\moduleref{Scanf}]), + and "'e" is the type of the receiver function for the "scanf"-style + functions. +\end{ocamldocdescription} + +\begin{ocamldoccode} +type 'a lazy_t +\end{ocamldoccode} +\index{lazyt@\verb`lazy_t`} +\begin{ocamldocdescription} + This type is used to implement the "Lazy"[\moduleref{Lazy}] module. + It should not be used directly. +\end{ocamldocdescription} + +%\vspace{0.1cm} +\subsection*{Predefined exceptions} +%\vspace{0.1cm} + +\begin{ocamldoccode} +exception Match_failure of (string * int * int) +\end{ocamldoccode} +\index{Matchfailure@\verb`Match_failure`} +\begin{ocamldocdescription} + Exception raised when none of the cases of a pattern-matching + apply. The arguments are the location of the "match" keyword + in the source code (file name, line number, column number). +\end{ocamldocdescription} + +\begin{ocamldoccode} +exception Assert_failure of (string * int * int) +\end{ocamldoccode} +\index{Assertfailure@\verb`Assert_failure`} +\begin{ocamldocdescription} + Exception raised when an assertion fails. The arguments are + the location of the "assert" keyword in the source code + (file name, line number, column number). +\end{ocamldocdescription} + +\begin{ocamldoccode} +exception Invalid_argument of string +\end{ocamldoccode} +\index{Invalidargument@\verb`Invalid_argument`} +\begin{ocamldocdescription} + Exception raised by library functions to signal that the given + arguments do not make sense. The string gives some information + to the programmer. As a general rule, this exception should not + be caught, it denotes a programming error and the code should be + modified not to trigger it. +\end{ocamldocdescription} + +\begin{ocamldoccode} +exception Failure of string +\end{ocamldoccode} +\index{Failure@\verb`Failure`} +\begin{ocamldocdescription} + Exception raised by library functions to signal that they are + undefined on the given arguments. The string is meant to give some + information to the programmer; you must \emph{not} pattern match on + the string literal because it may change in future versions (use + \verb`Failure _` instead). +\end{ocamldocdescription} + +\begin{ocamldoccode} +exception Not_found +\end{ocamldoccode} +\index{Notfound@\verb`Not_found`} +\begin{ocamldocdescription} + Exception raised by search functions when the desired object + could not be found. +\end{ocamldocdescription} + +\begin{ocamldoccode} +exception Out_of_memory +\end{ocamldoccode} +\index{Outofmemory@\verb`Out_of_memory`} +\begin{ocamldocdescription} + Exception raised by the garbage collector + when there is insufficient memory to complete the computation. +\end{ocamldocdescription} + +\begin{ocamldoccode} +exception Stack_overflow +\end{ocamldoccode} +\index{Stackoverflow@\verb`Stack_overflow`} +\begin{ocamldocdescription} + Exception raised by the bytecode interpreter when the evaluation + stack reaches its maximal size. This often indicates infinite + or excessively deep recursion in the user's program. + (Not fully implemented by the native-code compiler; + see section~\ref{s:compat-native-bytecode}.) +\end{ocamldocdescription} + +\begin{ocamldoccode} +exception Sys_error of string +\end{ocamldoccode} +\index{Syserror@\verb`Sys_error`} +\begin{ocamldocdescription} + Exception raised by the input/output functions to report an + operating system error. The string is meant to give some + information to the programmer; you must \emph{not} pattern match on + the string literal because it may change in future versions (use + \verb`Sys_error _` instead). +\end{ocamldocdescription} + +\begin{ocamldoccode} +exception End_of_file +\end{ocamldoccode} +\index{Endoffile@\verb`End_of_file`} +\begin{ocamldocdescription} + Exception raised by input functions to signal that the + end of file has been reached. +\end{ocamldocdescription} + +\begin{ocamldoccode} +exception Division_by_zero +\end{ocamldoccode} +\index{Divisionbyzero@\verb`Division_by_zero`} +\begin{ocamldocdescription} + Exception raised by integer division and remainder operations + when their second argument is zero. +\end{ocamldocdescription} + +\begin{ocamldoccode} +exception Sys_blocked_io +\end{ocamldoccode} +\index{Sysblockedio@\verb`Sys_blocked_io`} +\begin{ocamldocdescription} + A special case of "Sys_error" raised when no I/O is possible + on a non-blocking I/O channel. +\end{ocamldocdescription} + +\begin{ocamldoccode} +exception Undefined_recursive_module of (string * int * int) +\end{ocamldoccode} +\index{Undefinedrecursivemodule@\verb`Undefined_recursive_module`} +\begin{ocamldocdescription} + Exception raised when an ill-founded recursive module definition + is evaluated. (See section~\ref{s-recursive-modules}.) + The arguments are the location of the definition in the source code + (file name, line number, column number). +\end{ocamldocdescription} + diff --git a/manual/manual/library/compilerlibs.etex b/manual/manual/library/compilerlibs.etex new file mode 100644 index 0000000000..0666f18dbc --- /dev/null +++ b/manual/manual/library/compilerlibs.etex @@ -0,0 +1,58 @@ +\chapter{The compiler front-end} \label{c:parsinglib}\cutname{parsing.html} +\pdfchapterfold{-1}{The compiler front-end} + +This chapter describes the OCaml front-end, which declares the abstract +syntax tree used by the compiler, provides a way to parse, print +and pretty-print OCaml code, and ultimately allows to write abstract +syntax tree preprocessors invoked via the {\tt -ppx} flag (see chapters~\ref{c:camlc} +and~\ref{c:nativecomp}). + +It is important to note that the exported front-end interface follows the evolution of the OCaml language and implementation, and thus does not provide {\bf any} backwards compatibility guarantees. + +The front-end is a part of "compiler-libs" library. +Programs that use the "compiler-libs" library should be built as follows: +\begin{alltt} + ocamlfind ocamlc \var{other options} -package compiler-libs.common \var{other files} + ocamlfind ocamlopt \var{other options} -package compiler-libs.common \var{other files} +\end{alltt} +Use of the {\tt ocamlfind} utility is recommended. However, if this is not possible, an alternative method may be used: +\begin{alltt} + ocamlc \var{other options} -I +compiler-libs ocamlcommon.cma \var{other files} + ocamlopt \var{other options} -I +compiler-libs ocamlcommon.cmxa \var{other files} +\end{alltt} +For interactive use of the "compiler-libs" library, start "ocaml" and +type\\*"#load \"compiler-libs/ocamlcommon.cma\";;". + +% Some of the files below are commented out as the documentation is too poor +% or they are thought to be nonessential. + +\ifouthtml +\begin{links} +\item \ahref{libref/Ast\_helper.html}{Module \texttt{Ast_helper}: helper functions for AST construction} +\item \ahref{libref/Ast\_mapper.html}{Module \texttt{Ast_mapper}: -ppx rewriter interface} +\item \ahref{libref/Asttypes.html}{Module \texttt{Asttypes}: auxiliary types used by Parsetree} +% \item \ahref{libref/Lexer.html}{Module \texttt{Lexer}: OCaml syntax lexing} +\item \ahref{libref/Location.html}{Module \texttt{Location}: source code locations} +\item \ahref{libref/Longident.html}{Module \texttt{Longident}: long identifiers} +\item \ahref{libref/Parse.html}{Module \texttt{Parse}: OCaml syntax parsing} +\item \ahref{libref/Parsetree.html}{Module \texttt{Parsetree}: OCaml syntax tree} +\item \ahref{libref/Pprintast.html}{Module \texttt{Pprintast}: OCaml syntax printing} +% \item \ahref{libref/Printast.html}{Module \texttt{Printast}: AST printing} +\end{links} + +\else +% Ast_helper is excluded from the PDF and text manuals. +% It is over 20 pages long and does not have doc-comments. It is expected +% that Ast_helper will be only useful in the HTML manual (to look up signatures). +% \input{Asthelper.tex} +\input{Astmapper.tex} +\input{Asttypes.tex} +% \input{Lexer.tex} +\input{Location.tex} +\input{Longident.tex} +\input{Parse.tex} +\input{Parsetree.tex} +\input{Pprintast.tex} +% \input{Printast.tex} +\fi + diff --git a/manual/manual/library/core.etex b/manual/manual/library/core.etex new file mode 100644 index 0000000000..58297eab85 --- /dev/null +++ b/manual/manual/library/core.etex @@ -0,0 +1,36 @@ +\chapter{The core library} \label{c:corelib}\cutname{core.html} +\pdfchapterfold{-1}{The core library} + +This chapter describes the OCaml core library, which is + composed of declarations for built-in types and exceptions, plus +the module "Pervasives" that provides basic operations on these + built-in types. The "Pervasives" module is special in two +ways: +\begin{itemize} +\item It is automatically linked with the user's object code files by +the "ocamlc" command (chapter~\ref{c:camlc}). + +\item It is automatically ``opened'' when a compilation starts, or +when the toplevel system is launched. Hence, it is possible to use +unqualified identifiers to refer to the functions provided by the +"Pervasives" module, without adding a "open Pervasives" directive. +\end{itemize} + +\section*{Conventions} + +The declarations of the built-in types and the components of module +"Pervasives" are printed one by one in typewriter font, followed by a +short comment. All library modules and the components they provide are +indexed at the end of this report. + +\input{builtin.tex} + +\ifouthtml +\section{Module {\tt Pervasives}: the initially opened module} +\begin{links} +\item \ahref{libref/Pervasives.html}{Module \texttt{Pervasives}: the initially opened module} +\end{links} +\else +\input{Pervasives.tex} +\fi + diff --git a/manual/manual/library/libbigarray.etex b/manual/manual/library/libbigarray.etex new file mode 100644 index 0000000000..b0a0568765 --- /dev/null +++ b/manual/manual/library/libbigarray.etex @@ -0,0 +1,168 @@ +\chapter{The bigarray library} +\pdfchapterfold{-1}{The bigarray library} +%HEVEA\cutname{libbigarray.html} + +The "bigarray" library implements large, multi-dimensional, numerical +arrays. These arrays are called ``big arrays'' to distinguish them +from the standard OCaml arrays described in +\ifouthtml +\ahref{libref/Array.html}{Module \texttt{Array}}. +\else +section~\ref{Array}. +\fi +The main differences between ``big arrays'' and standard OCaml arrays +are as follows: +\begin{itemize} +\item Big arrays are not limited in size, unlike OCaml arrays +("float array" are limited to 2097151 elements on a 32-bit platform, +other "array" types to 4194303 elements). +\item Big arrays are multi-dimensional. Any number of dimensions +between 1 and 16 is supported. In contrast, OCaml arrays are +mono-dimensional and require encoding multi-dimensional arrays as +arrays of arrays. +\item Big arrays can only contain integers and floating-point +numbers, while OCaml arrays can contain arbitrary OCaml data types. +However, big arrays provide more space-efficient storage of integer +and floating-point elements, in particular because they support +``small'' types such as single-precision floats and 8 and 16-bit +integers, in addition to the standard OCaml types of double-precision +floats and 32 and 64-bit integers. +\item The memory layout of big arrays is entirely compatible with that +of arrays in C and Fortran, allowing large arrays to be passed back +and forth between OCaml code and C / Fortran code with no data copying +at all. +\item Big arrays support interesting high-level operations that normal +arrays do not provide efficiently, such as extracting sub-arrays and +``slicing'' a multi-dimensional array along certain dimensions, all +without any copying. +\end{itemize} +% +Programs that use the "bigarray" library must be linked as follows: +\begin{alltt} + ocamlc \var{other options} bigarray.cma \var{other files} + ocamlopt \var{other options} bigarray.cmxa \var{other files} +\end{alltt} +For interactive use of the "bigarray" library, do: +\begin{alltt} + ocamlmktop -o mytop bigarray.cma + ./mytop +\end{alltt} +or (if dynamic linking of C libraries is supported on your platform), +start "ocaml" and type "#load \"bigarray.cma\";;". + +\ifouthtml +\section{Module {\tt Bigarray}: large, multi-dimensional, numerical arrays} +\begin{links} +\item \ahref{libref/Bigarray.html}{Module \texttt{Bigarray}} +\end{links} + +\else +\input{Bigarray.tex} +\fi + +\section{Big arrays in the OCaml-C interface} + +C stub code that interface C or Fortran code with OCaml code, as +described in chapter~\ref{c:intf-c}, can exploit big arrays as +follows. + +\subsection{Include file} + +The include file "<caml/bigarray.h>" must be included in the C stub +file. It declares the functions, constants and macros discussed +below. + +\subsection{Accessing an OCaml bigarray from C or Fortran} + +If \var{v} is a OCaml "value" representing a big array, the expression +"Caml_ba_data_val("\var{v}")" returns a pointer to the data part of the array. +This pointer is of type "void *" and can be cast to the appropriate C +type for the array (e.g. "double []", "char [][10]", etc). + +Various characteristics of the OCaml big array can be consulted from C +as follows: +\begin{tableau}{|l|l|}{C expression}{Returns} +\entree{"Caml_ba_array_val("\var{v}")->num_dims"}{number of dimensions} +\entree{"Caml_ba_array_val("\var{v}")->dim["\var{i}"]"}{\var{i}-th dimension} +\entree{"Caml_ba_array_val("\var{v}")->flags & BIGARRAY_KIND_MASK"}{kind of array elements} +\end{tableau} +The kind of array elements is one of the following constants: +\begin{tableau}{|l|l|}{Constant}{Element kind} +\entree{"CAML_BA_FLOAT32"}{32-bit single-precision floats} +\entree{"CAML_BA_FLOAT64"}{64-bit double-precision floats} +\entree{"CAML_BA_SINT8"}{8-bit signed integers} +\entree{"CAML_BA_UINT8"}{8-bit unsigned integers} +\entree{"CAML_BA_SINT16"}{16-bit signed integers} +\entree{"CAML_BA_UINT16"}{16-bit unsigned integers} +\entree{"CAML_BA_INT32"}{32-bit signed integers} +\entree{"CAML_BA_INT64"}{64-bit signed integers} +\entree{"CAML_BA_CAML_INT"}{31- or 63-bit signed integers} +\entree{"CAML_BA_NATIVE_INT"}{32- or 64-bit (platform-native) integers} +\end{tableau} +% +The following example shows the passing of a two-dimensional big array +to a C function and a Fortran function. +\begin{verbatim} + extern void my_c_function(double * data, int dimx, int dimy); + extern void my_fortran_function_(double * data, int * dimx, int * dimy); + + value caml_stub(value bigarray) + { + int dimx = Caml_ba_array_val(bigarray)->dim[0]; + int dimy = Caml_ba_array_val(bigarray)->dim[1]; + /* C passes scalar parameters by value */ + my_c_function(Caml_ba_data_val(bigarray), dimx, dimy); + /* Fortran passes all parameters by reference */ + my_fortran_function_(Caml_ba_data_val(bigarray), &dimx, &dimy); + return Val_unit; + } +\end{verbatim} + +\subsection{Wrapping a C or Fortran array as an OCaml big array} + +A pointer \var{p} to an already-allocated C or Fortran array can be +wrapped and returned to OCaml as a big array using the "caml_ba_alloc" +or "caml_ba_alloc_dims" functions. +\begin{itemize} +\item +"caml_ba_alloc("\var{kind} "|" \var{layout}, \var{numdims}, \var{p}, \var{dims}")" + +Return an OCaml big array wrapping the data pointed to by \var{p}. +\var{kind} is the kind of array elements (one of the "CAML_BA_" +kind constants above). \var{layout} is "CAML_BA_C_LAYOUT" for an +array with C layout and "CAML_BA_FORTRAN_LAYOUT" for an array with +Fortran layout. \var{numdims} is the number of dimensions in the +array. \var{dims} is an array of \var{numdims} long integers, giving +the sizes of the array in each dimension. + +\item +"caml_ba_alloc_dims("\var{kind} "|" \var{layout}, \var{numdims}, +\var{p}, "(long) "\nth{dim}{1}, "(long) "\nth{dim}{2}, \ldots, "(long) "\nth{dim}{numdims}")" + +Same as "caml_ba_alloc", but the sizes of the array in each dimension +are listed as extra arguments in the function call, rather than being +passed as an array. +\end{itemize} +% +The following example illustrates how statically-allocated C and +Fortran arrays can be made available to OCaml. +\begin{verbatim} + extern long my_c_array[100][200]; + extern float my_fortran_array_[300][400]; + + value caml_get_c_array(value unit) + { + long dims[2]; + dims[0] = 100; dims[1] = 200; + return caml_ba_alloc(CAML_BA_NATIVE_INT | CAML_BA_C_LAYOUT, + 2, my_c_array, dims); + } + + value caml_get_fortran_array(value unit) + { + return caml_ba_alloc_dims(CAML_BA_FLOAT32 | CAML_BA_FORTRAN_LAYOUT, + 2, my_fortran_array_, 300L, 400L); + } +\end{verbatim} + + diff --git a/manual/manual/library/libdynlink.etex b/manual/manual/library/libdynlink.etex new file mode 100644 index 0000000000..2d4d92d908 --- /dev/null +++ b/manual/manual/library/libdynlink.etex @@ -0,0 +1,29 @@ +\chapter{The dynlink library: dynamic loading and linking of object files} +\pdfchapterfold{-1}{The dynlink library: dynamic loading and linking of object files} +%HEVEA\cutname{libdynlink.html} + +The "dynlink" library supports type-safe dynamic loading and linking +of bytecode object files (".cmo" and ".cma" files) in a running +bytecode program, or of native plugins (usually ".cmxs" files) in a +running native program. Type safety is ensured by limiting the set of +modules from the running program that the loaded object file can +access, and checking that the running program and the loaded object +file have been compiled against the same interfaces for these modules. +In native code, there are also some compatibility checks on the +implementations (to avoid errors with cross-module optimizations); it +might be useful to hide ".cmx" files when building native plugins so +that they remain independent of the implementation of modules in the +main program. + +Programs that use the "dynlink" library simply need to link +"dynlink.cma" or "dynlink.cmxa" with their object files and other libraries. + +\ifouthtml +\begin{links} +\item \ahref{libref/Dynlink.html}{Module \texttt{Dynlink}: dynamic loading of bytecode object files} +\end{links} + +\else +\input{Dynlink.tex} +\fi + diff --git a/manual/manual/library/libgraph.etex b/manual/manual/library/libgraph.etex new file mode 100644 index 0000000000..28759f5ba1 --- /dev/null +++ b/manual/manual/library/libgraph.etex @@ -0,0 +1,100 @@ +\chapter{The graphics library} +\pdfchapterfold{-1}{The graphics library} +%HEVEA\cutname{libgraph.html} + +The "graphics" library provides a set of portable drawing primitives. +Drawing takes place +in a separate window that is created when "Graphics.open_graph" is called. + +\begin{unix} +This library is implemented under the X11 windows system. +Programs that use the "graphics" library must be linked as follows: +\begin{alltt} + ocamlc \var{other options} graphics.cma \var{other files} +\end{alltt} +For interactive use of the "graphics" library, do: +\begin{alltt} + ocamlmktop -o mytop graphics.cma + ./mytop +\end{alltt} +or (if dynamic linking of C libraries is supported on your platform), +start "ocaml" and type "#load \"graphics.cma\";;". + +Here are the graphics mode specifications supported by +"Graphics.open_graph" on +the X11 implementation of this library: +the argument to "Graphics.open_graph" has the format +"\""{\it display-name} {\it geometry\/}"\"", +where {\it display-name} is the name of the X-windows display to +connect to, and {\it geometry} is a standard X-windows geometry +specification. The two components are separated by a space. Either can +be omitted, or both. Examples: +\begin{options} +\item["Graphics.open_graph \"foo:0\""] +connects to the display "foo:0" and creates a window with the default geometry +\item["Graphics.open_graph \"foo:0 300x100+50-0\""] +connects to the display "foo:0" and creates a window 300 pixels wide +by 100 pixels tall, at location $(50,0)$ +\item["Graphics.open_graph \" 300x100+50-0\""] +connects to the default display and creates a window 300 pixels wide +by 100 pixels tall, at location $(50,0)$ +\item["Graphics.open_graph \"\""] +connects to the default display and creates a window with the default +geometry. +\end{options} +\end{unix} + +\begin{windows} +This library is available both for standalone compiled programs and +under the toplevel application "ocamlwin.exe". For the latter, this +library must be loaded in-core by typing +\begin{verbatim} + #load "graphics.cma";; +\end{verbatim} +\end{windows} + +The screen coordinates are interpreted as shown in the figure below. +Notice that the coordinate system used is the same as in mathematics: +$y$ increases from the bottom of the screen to the top of the screen, +and angles are measured counterclockwise (in degrees). +Drawing is clipped to the screen. +% +\begin{latexonly} +\begin{center} +\setlength{\unitlength}{0.5mm} +\begin{picture}(130,100)(-10,-10) +\thicklines +\put(-10,0){\vector(1,0){130}} +\put(125,0){\makebox(0,0)[l]{$x$}} +\put(0,-10){\vector(0,1){100}} +\put(0,95){\makebox(0,0){$y$}} +\thinlines +\put(100,80){\line(-1,0){105}} +\put(100,80){\line(0,-1){85}} +\put(95,75){\makebox(0,0)[tr]{Screen}} +\put(100,-10){\makebox(0,0){\tt size\_x()}} +\put(-10,80){\makebox(0,0)[r]{\tt size\_y()}} +\put(30,40){\makebox(4,4){\rule{2mm}{2mm}}} +\put(36,40){pixel at $(x,y)$} +\put(30,40){\line(-1,0){35}} +\put(30,-10){\makebox(0,0){$x$}} +\put(30,40){\line(0,-1){45}} +\put(-10,40){\makebox(0,0)[r]{$y$}} +\end{picture} +\end{center} +\end{latexonly} + +\begin{htmlonly} +\begin{center} +\imgsrc{libgraph.gif} +\end{center} +\end{htmlonly} +% + +\ifouthtml +\begin{links} +\item \ahref{libref/Graphics.html}{Module \texttt{Graphics}: machine-independent graphics primitives} +\end{links} +\else +\input{Graphics.tex} +\fi diff --git a/manual/manual/library/libgraph.fig b/manual/manual/library/libgraph.fig new file mode 100644 index 0000000000..55a6d1de33 --- /dev/null +++ b/manual/manual/library/libgraph.fig @@ -0,0 +1,29 @@ +#FIG 3.2 +Landscape +Center +Inches +Letter +100.00 +Single +-2 +1200 2 +2 1 0 1 0 7 0 0 -1 0.000 0 0 7 1 0 2 + 1 1 1.00 60.00 120.00 + 1050 3375 4575 3375 +2 1 0 1 0 7 0 0 -1 0.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 1200 3525 1200 825 +2 1 0 1 0 7 0 0 -1 0.000 0 0 7 0 0 3 + 1125 1200 3750 1200 3750 3450 +2 1 0 1 0 7 0 0 -1 0.000 0 0 -1 0 0 3 + 1125 2400 2475 2400 2475 3450 +2 2 0 1 0 0 0 0 20 0.000 0 0 7 0 0 5 + 2475 2400 2550 2400 2550 2325 2475 2325 2475 2400 +4 0 0 0 0 0 12 0.0000 4 135 525 2325 1500 Screen\001 +4 0 0 0 0 0 12 0.0000 4 180 990 2175 2250 point at (x,y)\001 +4 0 0 0 0 0 12 0.0000 4 90 90 2400 3600 x\001 +4 0 0 0 0 0 12 0.0000 4 135 90 975 2475 y\001 +4 0 0 0 0 0 12 0.0000 4 180 450 1050 750 y axis\001 +4 0 0 0 0 14 12 0.0000 4 180 840 225 1200 size_y()\001 +4 0 0 0 0 14 12 0.0000 4 165 840 3375 3600 size_x()\001 +4 0 0 0 0 0 12 0.0000 4 135 450 4650 3375 x axis\001 diff --git a/manual/manual/library/libgraph.png b/manual/manual/library/libgraph.png Binary files differnew file mode 100644 index 0000000000..5841bfc805 --- /dev/null +++ b/manual/manual/library/libgraph.png diff --git a/manual/manual/library/liblabltk.etex b/manual/manual/library/liblabltk.etex new file mode 100644 index 0000000000..dcfaa6e8be --- /dev/null +++ b/manual/manual/library/liblabltk.etex @@ -0,0 +1,96 @@ +\chapter{The LablTk library: Tcl/Tk GUI interface} +\pdfchapterfold{-1}{The LablTk library: Tcl/Tk GUI interface} +%HEVEA\cutname{liblabltk.html} + +The "labltk" library provides access to the Tcl/Tk GUI from +OCaml programs. This interface is generated in an automated way, and +you should refer to Tcl/Tk books and man pages for detailed +information on the behavior of the numerous functions. We also suggest +to use "ocamlbrowser" to see the types of the various functions, that +are the best documentation for the library itself. + +\smallskip\noindent +Programs that use the "labltk" library must be linked as follows: +\begin{alltt} + ocamlc \var{other options} -I +labltk labltk.cma \var{other files} + ocamlopt \var{other options} -I +labltk labltk.cmxa \var{other files} +\end{alltt} + +\begin{unix} +The "labltk" library is available for any system with Tcl/Tk installed, +starting from Tcl/Tk 8.0 up to Tcl/Tk 8.6. Beware that some beta +versions may have compatibility problems. + +If the library was not compiled correctly, try to run again the +"configure" script with the option "-tkdefs" \var{switches}, +where \var{switches} is a list of C-style inclusion paths leading to +the right "tcl.h" and "tk.h", for instance +"\"-I/usr/local/include/tcl8.4 -I/usr/local/include/tk8.4\"". + +A script is installed, to make easier the use of the "labltk" +library as toplevel. +\begin{options} +\item["labltk"] +This is a toplevel including the "labltk" library, and the path is +already set as to allow the use of the various modules. It also +includes code for the Unix and Str libraries. You can use it +in place of "ocaml". +\end{options} +\end{unix} + +\begin{windows} +The "labltk" library has been precompiled for use with Tcl/Tk 8.5. +You must first have it installed on your system. +It can be downloaded from \\ +"http://www.activestate.com/products/ActiveTcl/". +After installing it, you must put the dynamically loaded libraries +"tcl85.dll" and "tk85.dll" (from the "bin" directory of the Tcl +installation) in a directory included in you path. + +No toplevel is available, but you can load the library from the +standard toplevel with the following commands. +\begin{quote} +\begin{verbatim} +# #directory "+labltk";; +# #load "labltk.cma";; +\end{verbatim} +\end{quote} +You can also load it directly from the command line. +\begin{quote} +\begin{verbatim} +C:\ocaml\bin> ocaml -I +labltk labltk.cma +\end{verbatim} +\end{quote} +\end{windows} + +The "labltk" library is composed of a large number of modules. +\begin{quote} +\begin{verbatim} +Bell Imagebitmap Place +Button Imagephoto Radiobutton +Canvas Label Scale +Checkbutton Listbox Scrollbar +Clipboard Menu Selection +Dialog Menubutton Text +Entry Message Tk +Focus Option Tkwait +Frame Optionmenu Toplevel +Grab Pack Winfo +Grid Palette Wm +\end{verbatim} +\end{quote} + +Giving a detailed account of each of these module would be impractical +here. We will just present some of the basic functions in the module +"Tk". Note that for most other modules information can be found in the +Tcl "man" page of their name. + +\ifouthtml +\begin{links} +\item \ahref{libref/Tk.html}{The \texttt{Tk} library: Basic functions and types for LablTk} +\end{links} + +\else +\input{Tk.tex} +\fi + diff --git a/manual/manual/library/libnum.etex b/manual/manual/library/libnum.etex new file mode 100644 index 0000000000..6c66f46031 --- /dev/null +++ b/manual/manual/library/libnum.etex @@ -0,0 +1,39 @@ +\chapter{The num library: arbitrary-precision rational arithmetic} +\pdfchapterfold{-3}{The num library: arbitrary-precision integer and rational arithmetic} +%HEVEA\cutname{libnum.html} + +The "num" library implements integer arithmetic and rational +arithmetic in arbitrary precision. + +More documentation on the functions provided in this library can be found +in {\em The CAML Numbers Reference Manual\/} by +Valérie Ménissier-Morain, technical report 141, INRIA, july 1992 +(available electronically, +\url{http://hal.inria.fr/docs/00/07/00/27/PDF/RT-0141.pdf}). + +Programs that use the "num" library must be linked as follows: +\begin{alltt} + ocamlc \var{other options} nums.cma \var{other files} + ocamlopt \var{other options} nums.cmxa \var{other files} +\end{alltt} +For interactive use of the "nums" library, do: +\begin{alltt} + ocamlmktop -o mytop nums.cma + ./mytop +\end{alltt} +or (if dynamic linking of C libraries is supported on your platform), +start "ocaml" and type "#load \"nums.cma\";;". + +\ifouthtml +\begin{links} +\item \ahref{libref/Num.html}{Module \texttt{Num}: operation on arbitrary-precision numbers} +\item \ahref{libref/Big\_int.html}{Module \texttt{Big\_int}: operations on arbitrary-precision integers} +\item \ahref{libref/Arith\_status.html}{Module \texttt{Arith\_status}: flags that control rational arithmetic} +\end{links} +\else +\input{Num.tex} +\input{Bigint.tex} +\input{Arithstatus.tex} +\fi + + diff --git a/manual/manual/library/libstr.etex b/manual/manual/library/libstr.etex new file mode 100644 index 0000000000..a1939f8a31 --- /dev/null +++ b/manual/manual/library/libstr.etex @@ -0,0 +1,32 @@ +\chapter{The str library: regular expressions and string processing} +\pdfchapterfold{-1}{The str library: regular expressions and string processing} +%HEVEA\cutname{libstr.html} + +The "str" library provides high-level string processing functions, +some based on regular expressions. It is intended to support the kind +of file processing that is usually performed with scripting languages +such as "awk", "perl" or "sed". + +Programs that use the "str" library must be linked as follows: +\begin{alltt} + ocamlc \var{other options} str.cma \var{other files} + ocamlopt \var{other options} str.cmxa \var{other files} +\end{alltt} +For interactive use of the "str" library, do: +\begin{alltt} + ocamlmktop -o mytop str.cma + ./mytop +\end{alltt} +or (if dynamic linking of C libraries is supported on your platform), +start "ocaml" and type "#load \"str.cma\";;". + +\ifouthtml +\begin{links} +\item \ahref{libref/Str.html}{Module \texttt{Str}: regular expressions and string processing} +\end{links} + +\else +\input{Str.tex} +\fi + + diff --git a/manual/manual/library/libthreads.etex b/manual/manual/library/libthreads.etex new file mode 100644 index 0000000000..af23291d02 --- /dev/null +++ b/manual/manual/library/libthreads.etex @@ -0,0 +1,60 @@ +\chapter{The threads library} +\label{c:threads}\cutname{threads.html} +\pdfchapterfold{-5}{The threads library} +%HEVEA\cutname{libthreads.html} + +The "threads" library allows concurrent programming in OCaml. +It provides multiple threads of control (also called lightweight +processes) that execute concurrently in the same memory space. Threads +communicate by in-place modification of shared data structures, or by +sending and receiving data on communication channels. + +The "threads" library is implemented by time-sharing on a single +processor. It will not take advantage of multi-processor machines. +Using this library will therefore never make programs run +faster. However, many programs are easier to write when structured as +several communicating processes. + +Two implementations of the "threads" library are available, depending +on the capabilities of the operating system: +\begin{itemize} +\item System threads. This implementation builds on the OS-provided threads +facilities: POSIX 1003.1c threads for Unix, and Win32 threads for +Windows. When available, system threads support both bytecode and +native-code programs. +\item VM-level threads. This implementation performs time-sharing and +context switching at the level of the OCaml virtual machine (bytecode +interpreter). It is available on Unix systems, and supports only +bytecode programs. It cannot be used with native-code programs. +\end{itemize} +Programs that use system threads must be linked as follows: +\begin{alltt} + ocamlc -thread \var{other options} unix.cma threads.cma \var{other files} + ocamlopt -thread \var{other options} unix.cmxa threads.cmxa \var{other files} +\end{alltt} +Compilation units that use the "threads" library must also be compiled with +the "-thread" option (see chapter~\ref{c:camlc}). + +Programs that use VM-level threads must be compiled with the "-vmthread" +option to "ocamlc" (see chapter~\ref{c:camlc}), and be linked as follows: +\begin{alltt} + ocamlc -vmthread \var{other options} threads.cma \var{other files} +\end{alltt} +Compilation units that use "threads" library must also be compiled with +the "-vmthread" option (see chapter~\ref{c:camlc}). + +\ifouthtml +\begin{links} +\item \ahref{libref/Thread.html}{Module \texttt{Thread}: lightweight threads} +\item \ahref{libref/Mutex.html}{Module \texttt{Mutex}: locks for mutual exclusion} +\item \ahref{libref/Condition.html}{Module \texttt{Condition}: condition variables to synchronize between threads} +\item \ahref{libref/Event.html}{Module \texttt{Event}: first-class synchronous communication} +\item \ahref{libref/ThreadUnix.html}{Module \texttt{ThreadUnix}: thread-compatible system calls} +\end{links} +\else +\input{Thread.tex} +\input{Mutex.tex} +\input{Condition.tex} +\input{Event.tex} +\input{ThreadUnix.tex} +\fi diff --git a/manual/manual/library/libunix.etex b/manual/manual/library/libunix.etex new file mode 100644 index 0000000000..a79f5b2d75 --- /dev/null +++ b/manual/manual/library/libunix.etex @@ -0,0 +1,92 @@ +\chapter{The unix library: Unix system calls} +\pdfchapterfold{-1}{The unix library: Unix system calls} +%HEVEA\cutname{libunix.html} + +The "unix" library makes many Unix +system calls and system-related library functions available to +OCaml programs. This chapter describes briefly the functions +provided. Refer to sections 2~and~3 of the Unix manual for more +details on the behavior of these functions. + +Not all functions are provided by all Unix variants. If some functions +are not available, they will raise "Invalid_arg" when called. + +Programs that use the "unix" library must be linked as follows: +\begin{alltt} + ocamlc \var{other options} unix.cma \var{other files} + ocamlopt \var{other options} unix.cmxa \var{other files} +\end{alltt} +For interactive use of the "unix" library, do: +\begin{alltt} + ocamlmktop -o mytop unix.cma + ./mytop +\end{alltt} +or (if dynamic linking of C libraries is supported on your platform), +start "ocaml" and type "#load \"unix.cma\";;". + +\begin{windows} +A fairly complete emulation of the Unix system calls is provided in +the Windows version of OCaml. The end of this chapter gives +more information on the functions that are not supported under Windows. +\end{windows} + +\ifouthtml +\begin{links} +\item \ahref{libref/Unix.html}{Module \texttt{Unix}: Unix system calls} +\item \ahref{libref/UnixLabels.html}{Module \texttt{UnixLabels}: Labeled + Unix system calls} +\end{links} +\else +\input{Unix.tex} + +\section{Module \texttt{UnixLabels}: labelized version of the interface} +\label{UnixLabels} +\index{UnixLabels (module)@\verb~UnixLabels~ (module)}% +\pdfsection{Module UnixLabels: labelized version of the interface} + +This module is identical to "Unix"~(\ref{Unix}), and only differs by +the addition of labels. You may see these labels directly by looking +at "unixLabels.mli", or by using the "ocamlbrowser" tool. +\fi + +\newpage +\begin{windows} +The Cygwin port of OCaml fully implements all functions from +the Unix module. The native Win32 ports implement a subset of them. +Below is a list of the functions that are not implemented, or only +partially implemented, by the Win32 ports. Functions not mentioned are +fully implemented and behave as described previously in this chapter. + +\begin{tableau}{|l|p{8cm}|}{Functions}{Comment} +\entree{"fork"}{not implemented, use "create_process" or threads} +\entree{"wait"}{not implemented, use "waitpid"} +\entree{"waitpid"}{can only wait for a given PID, not any child process} +\entree{"getppid"}{not implemented (meaningless under Windows)} +\entree{"nice"}{not implemented} +\entree{"truncate", "ftruncate"}{not implemented} +\entree{"link", "symlink", "readlink"}{not implemented (no links under +Windows)} +\entree{"access"}{execute permission "X_OK" cannot be tested, + it just tests for read permission instead} +\entree{"fchmod"}{not implemented} +\entree{"chown", "fchown"}{not implemented (make no sense on a DOS +file system)} +\entree{"umask"}{not implemented} +\entree{"mkfifo"}{not implemented} +\entree{"kill", "pause"}{not implemented (no inter-process signals in Windows)} +\entree{"alarm"}{not implemented} +\entree{"times"}{partially implemented, will not report timings for child +processes} +\entree{"getitimer", "setitimer"}{not implemented} +\entree{"getuid", "getgid"}{always return 1} +\entree{"getgid", "getegid", "getgroups"}{not implemented} +\entree{"setuid", "setgid"}{not implemented} +\entree{"getpwnam", "getpwuid"}{always raise "Not_found"} +\entree{"getgrnam", "getgrgid"}{always raise "Not_found"} +\entree{type "socket_domain"}{the domains "PF_UNIX" and "PF_INET6" +are not supported; "PF_INET" is fully supported} +\entree{"establish_server"}{not implemented; use threads} +\entree{terminal functions ("tc*")}{not implemented} +\end{tableau} + +\end{windows} diff --git a/manual/manual/library/stdlib.etex b/manual/manual/library/stdlib.etex new file mode 100644 index 0000000000..e791feab59 --- /dev/null +++ b/manual/manual/library/stdlib.etex @@ -0,0 +1,175 @@ +\chapter{The standard library} \label{c:stdlib}\cutname{stdlib.html} +\pdfchapterfold{-32}{The standard library} + +This chapter describes the functions provided by the OCaml +standard library. The modules from the standard library are +automatically linked with the user's object code files by the "ocamlc" +command. Hence, these modules can be used in standalone programs without +having to add any ".cmo" file on the command line for the linking +phase. Similarly, in interactive use, these globals can be used in +toplevel phrases without having to load any ".cmo" file in memory. + +Unlike the "Pervasives" module from the core library, the modules from the +standard library are not automatically ``opened'' when a compilation +starts, or when the toplevel system is launched. Hence it is necessary +to use qualified identifiers to refer to the functions provided by these +modules, or to add "open" directives. + +\label{stdlib:top} + +\section*{Conventions} + +For easy reference, the modules are listed below in alphabetical order +of module names. +For each module, the declarations from its signature are printed +one by one in typewriter font, followed by a short comment. +All modules and the identifiers they export are indexed at the end of +this report. + +\begin{latexonly} +\section*{Overview} + +Here is a short listing, by theme, of the standard library modules. + +\subsubsection*{Data structures:} +\begin{tabular}{lll} +"Char" & p.~\pageref{Char} & character operations \\ +"String" & p.~\pageref{String} & string operations \\ +"Bytes" & p.~\pageref{Bytes} & operations on byte sequences\\ +"Array" & p.~\pageref{Array} & array operations \\ +"List" & p.~\pageref{List} & list operations \\ +"StdLabels" &p.~\pageref{StdLabels} & labelized versions of +the above 4 modules \\ +"Sort" & p.~\pageref{Sort} & sorting and merging lists \\ +"Hashtbl" & p.~\pageref{Hashtbl} & hash tables and hash functions \\ +"Random" & p.~\pageref{Random} & pseudo-random number generator \\ +"Set" & p.~\pageref{Set} & sets over ordered types \\ +"Map" & p.~\pageref{Map} & association tables over ordered types \\ +"MoreLabels" &p.~\pageref{MoreLabels} & labelized versions of +"Hashtbl", "Set", and "Map" \\ +"Oo" & p.~\pageref{Oo} & useful functions on objects \\ +"Stack" & p.~\pageref{Stack} & last-in first-out stacks \\ +"Queue" & p.~\pageref{Queue} & first-in first-out queues \\ +"Buffer" & p.~\pageref{Buffer} & buffers that grow on demand \\ +"Lazy" & p.~\pageref{Lazy} & delayed evaluation \\ +"Weak" & p.~\pageref{Weak} & references that don't prevent objects +from being garbage-collected +\end{tabular} +\subsubsection*{Arithmetic:} +\begin{tabular}{lll} +"Complex" & p.~\pageref{Complex} & Complex numbers \\ +"Int32" & p.~\pageref{Int32} & operations on 32-bit integers \\ +"Int64" & p.~\pageref{Int64} & operations on 64-bit integers \\ +"Nativeint" & p.~\pageref{Nativeint} & operations on platform-native +integers +\end{tabular} +\subsubsection{Input/output:} +\begin{tabular}{lll} +"Format" & p.~\pageref{Format} & pretty printing with automatic +indentation and line breaking \\ +"Marshal" & p.~\pageref{Marshal} & marshaling of data structures \\ +"Printf" & p.~\pageref{Printf} & formatting printing functions \\ +"Scanf" & p.~\pageref{Scanf} & formatted input functions \\ +"Digest" & p.~\pageref{Digest} & MD5 message digest \\ +\end{tabular} +\subsubsection{Parsing:} +\begin{tabular}{lll} +"Genlex" & p.~\pageref{Genlex} & a generic lexer over streams \\ +"Lexing" & p.~\pageref{Lexing} & the run-time library for lexers generated by "ocamllex" \\ +"Parsing" & p.~\pageref{Parsing} & the run-time library for parsers generated by "ocamlyacc" \\ +"Stream" & p.~\pageref{Stream} & basic functions over streams \\ +\end{tabular} +\subsubsection{System interface:} +\begin{tabular}{lll} +"Arg" & p.~\pageref{Arg} & parsing of command line arguments \\ +"Callback" & p.~\pageref{Callback} & registering OCaml functions to +be called from C \\ +"Filename" & p.~\pageref{Filename} & operations on file names \\ +"Gc" & p.~\pageref{Gc} & memory management control and statistics \\ +"Printexc" & p.~\pageref{Printexc} & a catch-all exception handler \\ +"Sys" & p.~\pageref{Sys} & system interface \\ +\end{tabular} +\end{latexonly} + +\ifouthtml +\begin{links} +\item \ahref{libref/Arg.html}{Module \texttt{Arg}: parsing of command line arguments} +\item \ahref{libref/Array.html}{Module \texttt{Array}: array operations} +\item \ahref{libref/ArrayLabels.html}{Module \texttt{ArrayLabels}: array operations (with labels)} +\item \ahref{libref/Buffer.html}{Module \texttt{Buffer}: extensible buffers} +\item \ahref{libref/Bytes.html}{Module \texttt{Bytes}: byte sequences} +\item \ahref{libref/Callback.html}{Module \texttt{Callback}: registering OCaml values with the C runtime} +\item \ahref{libref/Char.html}{Module \texttt{Char}: character operations} +\item \ahref{libref/Complex.html}{Module \texttt{Complex}: Complex numbers} +\item \ahref{libref/Digest.html}{Module \texttt{Digest}: MD5 message digest} +\item \ahref{libref/Filename.html}{Module \texttt{Filename}: operations on file names} +\item \ahref{libref/Format.html}{Module \texttt{Format}: pretty printing} +\item \ahref{libref/Gc.html}{Module \texttt{Gc}: memory management control and statistics; finalized values} +\item \ahref{libref/Genlex.html}{Module \texttt{Genlex}: a generic lexical analyzer} +\item \ahref{libref/Hashtbl.html}{Module \texttt{Hashtbl}: hash tables and hash functions} +\item \ahref{libref/Int32.html}{Module \texttt{Int32}: 32-bit integers} +\item \ahref{libref/Int64.html}{Module \texttt{Int64}: 64-bit integers} +\item \ahref{libref/Lazy.html}{Module \texttt{Lazy}: deferred computations} +\item \ahref{libref/Lexing.html}{Module \texttt{Lexing}: the run-time library for lexers generated by \texttt{ocamllex}} +\item \ahref{libref/List.html}{Module \texttt{List}: list operations} +\item \ahref{libref/ListLabels.html}{Module \texttt{ListLabels}: list operations (with labels)} +\item \ahref{libref/Map.html}{Module \texttt{Map}: association tables over ordered types} +\item \ahref{libref/Marshal.html}{Module \texttt{Marshal}: marshaling of data structures} +\item \ahref{libref/MoreLabels.html}{Module \texttt{MoreLabels}: Include modules \texttt{Hashtbl}, \texttt{Map} and \texttt{Set} with labels} +\item \ahref{libref/Nativeint.html}{Module \texttt{Nativeint}: processor-native integers} +\item \ahref{libref/Oo.html}{Module \texttt{Oo}: object-oriented extension} +\item \ahref{libref/Parsing.html}{Module \texttt{Parsing}: the run-time library for parsers generated by \texttt{ocamlyacc}} +\item \ahref{libref/Printexc.html}{Module \texttt{Printexc}: facilities for printing exceptions} +\item \ahref{libref/Printf.html}{Module \texttt{Printf}: formatting printing functions} +\item \ahref{libref/Queue.html}{Module \texttt{Queue}: first-in first-out queues} +\item \ahref{libref/Random.html}{Module \texttt{Random}: pseudo-random number generator (PRNG)} +\item \ahref{libref/Scanf.html}{Module \texttt{Scanf}: formatted input functions} +\item \ahref{libref/Set.html}{Module \texttt{Set}: sets over ordered types} +\item \ahref{libref/Sort.html}{Module \texttt{Sort}: sorting and merging lists} +\item \ahref{libref/Stack.html}{Module \texttt{Stack}: last-in first-out stacks} +\item \ahref{libref/StdLabels.html}{Module \texttt{StdLabels}: Include modules \texttt{Array}, \texttt{List} and \texttt{String} with labels} +\item \ahref{libref/Stream.html}{Module \texttt{Stream}: streams and parsers} +\item \ahref{libref/String.html}{Module \texttt{String}: string operations} +\item \ahref{libref/StringLabels.html}{Module \texttt{StringLabels}: string operations (with labels)} +\item \ahref{libref/Sys.html}{Module \texttt{Sys}: system interface} +\item \ahref{libref/Weak.html}{Module \texttt{Weak}: arrays of weak pointers} +\end{links} +\else +\input{Arg.tex} +\input{Array.tex} +\input{Buffer.tex} +\input{Bytes.tex} +\input{Callback.tex} +\input{Char.tex} +\input{Complex.tex} +\input{Digest.tex} +\input{Filename.tex} +\input{Format.tex} +\input{Gc.tex} +\input{Genlex.tex} +\input{Hashtbl.tex} +\input{Int32.tex} +\input{Int64.tex} +\input{Lazy.tex} +\input{Lexing.tex} +\input{List.tex} +\input{Map.tex} +\input{Marshal.tex} +\input{MoreLabels.tex} +\input{Nativeint.tex} +\input{Oo.tex} +\input{Parsing.tex} +\input{Printexc.tex} +\input{Printf.tex} +\input{Queue.tex} +\input{Random.tex} +\input{Scanf.tex} +\input{Set.tex} +\input{Sort.tex} +\input{Stack.tex} +\input{StdLabels.tex} +\input{Stream.tex} +\input{String.tex} +\input{Sys.tex} +\input{Weak.tex} +\fi diff --git a/manual/manual/library/tk.mli b/manual/manual/library/tk.mli new file mode 100644 index 0000000000..d3c8d199fa --- /dev/null +++ b/manual/manual/library/tk.mli @@ -0,0 +1,192 @@ +(* $Id$ *) + +(** Basic functions and types for LablTk *) + +open Widget + +(** {6 Initialization and termination} *) + +val openTk : ?display:string -> ?clas:string -> unit -> toplevel widget + (** Initialize LablTk and open a toplevel window. + [display] is described according to the X11 conventions. + [clas] is used for the X11 resource mechanism. *) +val mainLoop : unit -> unit + (** Start the main event loop *) +val closeTk : unit -> unit + (** Quit the main loop and close all open windows. *) +val destroy : 'a Widget.widget -> unit + (** Destroy an individual widget. *) + +(** {6 Application wide commands} *) + +val update : unit -> unit + (** Synchronize display with internal state. *) + +val appname_get : unit -> string +val appname_set : string -> unit + (** Get or set the application name. *) + +(** {6 Dimensions} *) + +type units = [`Pix of int | `Cm of float | `In of float | `Mm of float | `Pt of float] +val pixels : units -> int + (** Converts various on-screen units to pixels, + respective to the default display. Available units are + pixels, centimeters, inches, millimeters and points *) + +(** {6 Widget layout commands} *) + +type anchor = [`Center|`E|`N|`Ne|`Nw|`S|`Se|`Sw|`W] +type fillMode = [`Both|`None|`X|`Y] +type side = [`Bottom|`Left|`Right|`Top] +val pack : + ?after:'a Widget.widget -> + ?anchor:anchor -> + ?before:'b Widget.widget -> + ?expand:bool -> + ?fill:fillMode -> + ?inside:'c Widget.widget -> + ?ipadx:int -> + ?ipady:int -> + ?padx:int -> + ?pady:int -> + ?side:side -> + 'd Widget.widget list -> unit + (** Pack a widget inside its parent, + using the standard layout engine. *) +val grid : + ?column:int -> + ?columnspan:int -> + ?inside:'a Widget.widget -> + ?ipadx:int -> + ?ipady:int -> + ?padx:int -> + ?pady:int -> + ?row:int -> + ?rowspan:int -> + ?sticky:string -> 'b Widget.widget list -> unit + (** Pack a widget inside its parent, using the grid layout engine. *) + +type borderMode = [`Ignore|`Inside|`Outside] +val place : + ?anchor:anchor -> + ?bordermode:borderMode -> + ?height:int -> + ?inside:'a Widget.widget -> + ?relheight:float -> + ?relwidth:float -> + ?relx:float -> + ?rely:float -> + ?width:int -> + ?x:int -> ?y:int -> 'b Widget.widget -> unit + (** Pack a widget inside its parent, at absolute coordinates. *) + +val raise_window : + ?above:'a Widget.widget -> 'b Widget.widget -> unit +val lower_window : + ?below:'a Widget.widget -> 'b Widget.widget -> unit + (** Raise or lower the window associated to a widget. *) + +(** {6 Event handling} *) + +type modifier = + [ `Control | `Shift | `Lock + | `Button1 | `Button2 | `Button3 | `Button4 | `Button5 + | `Double | `Triple + | `Mod1 | `Mod2 | `Mod3 | `Mod4 | `Mod5 | `Meta | `Alt ] + +type event = + [ `ButtonPress | `ButtonPressDetail of int + | `ButtonRelease | `ButtonReleaseDetail of int + | `Circulate | `ColorMap | `Configure | `Destroy + | `Enter | `Expose | `FocusIn | `FocusOut | `Gravity + | `KeyPress | `KeyPressDetail of string + | `KeyRelease | `KeyReleaseDetail of string + | `Leave | `Map | `Motion | `Property + | `Reparent | `Unmap | `Visibility + | `Modified of modifier list * event ] + +(** An event can be either a basic X event, or modified by a + key or mouse modifier. *) + +type eventInfo = + { mutable ev_Above: int; + mutable ev_ButtonNumber: int; + mutable ev_Count: int; + mutable ev_Detail: string; + mutable ev_Focus: bool; + mutable ev_Height: int; + mutable ev_KeyCode: int; + mutable ev_Mode: string; + mutable ev_OverrideRedirect: bool; + mutable ev_Place: string; + mutable ev_State: string; + mutable ev_Time: int; + mutable ev_Width: int; + mutable ev_MouseX: int; + mutable ev_MouseY: int; + mutable ev_Char: string; + mutable ev_BorderWidth: int; + mutable ev_SendEvent: bool; + mutable ev_KeySymString: string; + mutable ev_KeySymInt: int; + mutable ev_RootWindow: int; + mutable ev_SubWindow: int; + mutable ev_Type: int; + mutable ev_Widget: Widget.any Widget.widget; + mutable ev_RootX: int; + mutable ev_RootY: int } + +(** Event related information accessible in callbacks. *) + +type eventField = + [ `Above | `ButtonNumber | `Count | `Detail | `Focus | `Height + | `KeyCode | `Mode | `OverrideRedirect | `Place | `State + | `Time | `Width | `MouseX | `MouseY | `Char | `BorderWidth + | `SendEvent | `KeySymString | `KeySymInt | `RootWindow + | `SubWindow | `Type | `Widget | `RootX | `RootY ] + +(** In order to access the above event information, one has to pass + a list of required event fields to the [bind] function. *) + +val bind : + events:event list -> + ?extend:bool -> + ?breakable:bool -> + ?fields:eventField list -> + ?action:(eventInfo -> unit) -> + 'a Widget.widget -> unit + (** Bind a succession of [events] on a widget to an [action]. + If [extend] is true then then binding is added after existing + ones, otherwise it replaces them. + [breakable] should be true when [break] is to be called inside + the action. + [action] is called with the [fields] required set in + an [eventInfo] structure. Other fields should not be accessed. + If [action] is omitted then existing bindings are removed. *) + +val bind_class : + events:event list -> + ?extend:bool -> + ?breakable:bool -> + ?fields:eventField list -> + ?action:(eventInfo -> unit) -> + ?on:'a Widget.widget -> + string -> unit + (** Same thing for all widgets of a given class. If a widget + is given with label [~on:], the binding will be removed as + soon as it is destroyed. *) +val bind_tag : + events:event list -> + ?extend:bool -> + ?breakable:bool -> + ?fields:eventField list -> + ?action:(eventInfo -> unit) -> + ?on:'a Widget.widget -> + string -> unit + (** Same thing for all widgets having a given tag *) + +val break : unit -> unit + (** Used inside a bound action, do not call other actions + after this one. This is only possible if this action + was bound with [~breakable:true]. *) diff --git a/manual/manual/macros.hva b/manual/manual/macros.hva new file mode 100644 index 0000000000..4a118efc57 --- /dev/null +++ b/manual/manual/macros.hva @@ -0,0 +1,113 @@ +%%Colors for links +\def\visited@color{\#006000} +\def\link@color{\#00A000} +\def\hover@color{\@getstylecolor{subsection}} +\newstyle{a:link}{color:\link@color;text-decoration:underline;} +\newstyle{a:visited}{color:\visited@color;text-decoration:underline;} +\newstyle{a:hover}{color:black;text-decoration:none;background-color:\hover@color} +%%% +\newcommand{\input@color}{\htmlcolor{006000}} +\newcommand{\output@color}{\maroon} +\newcommand{\machine}{\tt} +\newenvironment{machineenv}{\begin{alltt}}{\end{alltt}} +\newcommand{\firstline}{\black\#\input@color\ } +\newcommand{\nextline}{\ \ } +\newcommand{\@zyva}{\firstline\renewcommand{\?}{\nextline}} +\newenvironment{camlunder}{\@style{U}}{} +\newcommand{\caml}{\begin{alltt}\renewcommand{\;}{}\renewcommand{\\}{\char92}\def\<{\begin{camlunder}}\def\>{\end{camlunder}}\activebracefalse} +\let\?=\@zyva +\newcommand{\endcaml}{\activebracetrue\end{alltt}} +\renewcommand{\:}{\renewcommand{\?}{\@zyva}\output@color} +\newcommand{\var}[1]{\textit{#1}} + +\newenvironment{library}{}{} +\newcounter{page} +\newenvironment{comment}{\begin{quote}}{\end{quote}} +\newcommand{\nth}[2]{\({#1}_{#2}\)} +\newenvironment{options}{\begin{description}}{\end{description}} + + +%%venant de macros.tex + +\def\versionspecific#1{\begin{quote}\textsf{#1:}\quad} +\def\unix{\versionspecific{Unix}} +\def\endunix{\end{quote}} +\def\macos{\versionspecific{MacOS~9}} +\def\endmacos{\end{quote}} +\def\windows{\versionspecific{Windows}} +\def\endwindows{\end{quote}} + +\def\requirements{\trivlist \item[\hskip\labelsep {\bf Requirements.}]} +\def\endrequirements{\endtrivlist} +\def\installation{\trivlist \item[\hskip\labelsep {\bf Installation.}]} +\def\endinstallation{\endtrivlist} +\def\troubleshooting{\trivlist \item[\hskip\labelsep {\bf Troubleshooting.}]} +\def\endtroubleshooting{\endtrivlist} + +\newtheorem{gcrule}{Rule} + +% Pour les tables de priorites et autres tableaux a deux colonnes, encadres + +\def\entree#1#2{#1 & #2 \\} +\def\tableau#1#2#3{% +\par\begin{center}% +\begin{tabular*}{.8\linewidth}{#1}% +\multicolumn{1}{c}{\textbf{#2}} & +\multicolumn{1}{c}{\textbf{#3}} \\ +%%#2 & #3 \\% +}% +\def\endtableau{\end{tabular*}\end{center}\par} + +% L'environnement library (pour composer les descriptions des modules +% de bibliotheque). + + +\def\restoreindent{\begingroup\let\@listI=\@savedlistI} +\def\endrestoreindent{\endgroup} + + +% PDF stuff + +\def\pdfchapterfold#1#2{} +\def\pdfsection#1{} +\def\pdfchapter{\pdfchapterfold{0}} + +%%% Pour camlidl + +\def\transl#1{$[\![\mbox{#1}]\!]$} + +% Pour l'index +\usepackage{multind} +\let\indexentry=\index +\renewcommand{\index}[1]{\indexentry{\jobname}{#1}} +\def\ikwd#1{\indexentry{\jobname.kwd}{#1}} +% nth + +\def\th{^{\mbox{\scriptsize th}}} +\renewcommand{\hbox}[1]{\mbox{#1}} + +% Notations pour les metavariables +\def\nmth#1#2#3{\({#1}_{#2}^{#3}\)} +\def\optvar#1{[\var{#1}\/]} +\def\event{§§} +\def\fromoneto#1#2{$#1 = 1,\ldots{} , #2$} + +\newcommand{\vfill}{} +\def\number{} +\def\year{2013} + +% Pour alltt +\def\rminalltt#1{{\rm #1}} +\def\goodbreak{\ \\} +\def\@savedlistI{} + +%List of links with no space around items +\newstyle{.li-links}{margin:0ex 0ex;} +\newenvironment{links} +{\setenvclass{itemize}{ftoc2}\setenvclass{li-itemize}{li-links}\itemize} +{\enditemize} + +% Pour le chapitre ocamlbuild +\newcommand{\mathscr}[1]{{\mathcal{#1}}} +\newcommand{\ocb}{\texttt{ocamlbuild}\xspace} +\newcommand{\tags}{\texttt{\_tags}\xspace} diff --git a/manual/manual/macros.tex b/manual/manual/macros.tex new file mode 100644 index 0000000000..4a60c5f3b5 --- /dev/null +++ b/manual/manual/macros.tex @@ -0,0 +1,255 @@ +\makeatletter +% Pour hevea +\newif\ifouthtml\outhtmlfalse +\newcommand{\cutname}[1]{} +% Notations pour les metavariables +\def\var#1{{\it#1}} +\def\nth#1#2{${\it#1}_{#2}$} +\def\nmth#1#2#3{${\it#1}_{#2}^{#3}$} +\def\optvar#1{\textrm{[}\var{#1}\/\textrm{]}} +\def\event{$\bowtie$} +\def\fromoneto#1#2{$#1 = 1, \ldots, #2$} + +% Pour avoir les exposants sur la ligne au-dessus (???) + +\ifplaintext +\fontdimen14\tensy=12pt +\fi + +% Numerotation +\setcounter{secnumdepth}{2} % Pour numeroter les \subsection +\setcounter{tocdepth}{1} % Pour ne pas mettre les \subsection + % dans la table des matieres + +% Pour avoir "_" qui marche en mode math et en mode normal +\catcode`\_=13 +\catcode`\=8 +\def\_{\hbox{\tt\char95}} +\def_{\ifmmode\else\_\fi} + +\ifplaintext +\def\ttstretch{\tt} +\else +\def\ttstretch{\tt\spaceskip=5.77pt plus 1.83pt minus 1.22pt} +% La fonte cmr10 a normalement des espaces de 5.25pt non extensibles. +% En 11 pt ca fait 5.77 pt. On lui ajoute la meme flexibilite que +% cmr10 agrandie a 11 pt. +\fi + +% Pour la traduction "xxxx" -> {\machine{xxxx}} faite par texquote2 +\def\machine#1{\mbox{\ttstretch{#1}}} + +% Pour la traduction "\begin{verbatim}...\end{verbatim}" +% -> "\begin{machineenv}...\end{machineenv}" +% faite aussi par texquote2. +\newenvironment{machineenv}{\alltt}{\endalltt} + +% Environnements + +\newlength{\versionwidth} +\setbox0=\hbox{\bf Windows:} \setlength{\versionwidth}{\wd0} + +\def\versionspecific#1{ + \begin{description}\item[#1:]~\\} + +\def\unix{\versionspecific{Unix}} +\def\endunix{\end{description}} +%\def\macos{\versionspecific{MacOS 9}} +%\def\endmacos{\end{description}} +\def\windows{\versionspecific{Windows}} +\def\endwindows{\end{description}} + +\def\requirements{\trivlist \item[\hskip\labelsep {\bf Requirements.}]} +\def\endrequirements{\endtrivlist} +\def\installation{\trivlist \item[\hskip\labelsep {\bf Installation.}]} +\def\endinstallation{\endtrivlist} +\def\troubleshooting{\trivlist \item[\hskip\labelsep {\bf Troubleshooting.}]} +\def\endtroubleshooting{\endtrivlist} + +\newtheorem{gcrule}{Rule} + +% Pour les tables de priorites et autres tableaux a deux colonnes, encadres + +\def\tableau#1#2#3{% +\begin{center} +\begin{tabular}{#1} +\hline +#2 & #3 \\ +\hline +} +\def\endtableau{\hline\end{tabular}\end{center}} +\def\entree#1#2{#1 & #2 \\} + +% L'environnement option + +\def\optionitem[#1]{\if@noparitem \@donoparitem + \else \if@inlabel \indent \par \fi + \ifhmode \unskip\unskip \par \fi + \if@newlist \if@nobreak \@nbitem \else + \addpenalty\@beginparpenalty + \addvspace\@topsep \addvspace{-\parskip}\fi + \else \addpenalty\@itempenalty \addvspace\itemsep + \fi + \global\@inlabeltrue +\fi +\everypar{\global\@minipagefalse\global\@newlistfalse + \if@inlabel\global\@inlabelfalse \hskip -\parindent \box\@labels + \penalty\z@ \fi + \everypar{}}\global\@nobreakfalse +\if@noitemarg \@noitemargfalse \if@nmbrlist \refstepcounter{\@listctr}\fi \fi +\setbox\@tempboxa\hbox{\makelabel{#1}}% +\global\setbox\@labels +\ifdim \wd\@tempboxa >\labelwidth + \hbox{\unhbox\@labels + \hskip -\leftmargin + \box\@tempboxa}\hfil\break + \else + \hbox{\unhbox\@labels + \hskip -\leftmargin + \hbox to\leftmargin {\makelabel{#1}\hfil}} + \fi + \ignorespaces} + +\def\optionlabel#1{\bf #1} +\def\options{\list{}{\let\makelabel\optionlabel\let\@item\optionitem}} +\def\endoptions{\endlist} + +% L'environnement library (pour composer les descriptions des modules +% de bibliotheque). + +\def\comment{\penalty200\list{}{}\item[]} +\def\endcomment{\endlist\penalty-100} + +\def\library{ +\begingroup +\raggedright +\let\@savedlistI=\@listI% +\def\@listI{\leftmargin\leftmargini\parsep 0pt plus 1pt\topsep 0pt plus 2pt}% +\itemsep 0pt +\topsep 0pt plus 2pt +\partopsep 0pt +} + +\def\endlibrary{ +\endgroup +} + +\def\restoreindent{\begingroup\let\@listI=\@savedlistI} +\def\endrestoreindent{\endgroup} + +% ^^A...^^A: compose l'interieur en \tt, comme \verb + +\catcode`\^^A=\active +\def{% +\begingroup\catcode``=13\@noligs\ttstretch\let\do\@makeother\dospecials% +\def\@xobeysp{\leavevmode\penalty100\ }% +\@vobeyspaces\frenchspacing\catcode`\^^A=\active\def{\endgroup}} + +% Pour l'index + +\let\indexentry=\index +\def\index{\indexentry{\jobname}} +\def\ikwd{\indexentry{\jobname.kwd}} + +% Les en-tetes personnalises + +\pagestyle{myheadings} +\def\partmark#1{\markboth{Part \thepart. \ #1}{}} +\def\chaptermark#1{\markright{Chapter \thechapter. \ #1}} + +% nth + +\def\th{^{\hbox{\scriptsize th}}} + +% Pour annuler l'espacement vertical qui suit un "verbatim" +\def\cancelverbatim{\vspace{-\topsep}\vspace{-\parskip}}% exact. + +% Pour annuler l'espacement vertical entre deux \item consecutifs dans \options +\def\cancelitemspace{\vspace{-8mm}}% determine empiriquement + +% Pour faire la cesure apres _ dans les identificateurs +\def\={\discretionary{}{}{}} +\def\cuthere{\discretionary{}{}{}} + +% Pour la coupure en petits documents + +\let\mysection=\section + +%%% Augmenter l'espace entre numero de section +% et nom de section dans la table des matieres. + +\ifplaintext\else +\def\l@section{\@dottedtocline{1}{1.5em}{2.8em}} % D'origine: 2.3 +\fi + +% Pour alltt + +\def\rminalltt#1{{\rm #1}} + +% redefinition de l'environnement alltt pour que les {} \ et % soient +% dans la bonne fonte + +\let\@oldalltt=\alltt +\let\@oldendalltt=\endalltt +\renewenvironment{alltt}{% +\begingroup% +\renewcommand{\{}{\char`\{}% +\renewcommand{\}}{\char`\}}% +\renewcommand{\\}{\char`\\}% +\renewcommand{\%}{\char`\%}% +\@oldalltt% +}{% +\@oldendalltt% +\endgroup% +} + +% Index stuff -- cf multind.sty + +\def\printindex#1#2{\@restonecoltrue\if@twocolumn\@restonecolfalse\fi + \columnseprule \z@ \columnsep 35pt + \newpage \twocolumn[{\Large\bf #2 \vskip4ex}] + \markright{\uppercase{#2}} + \addcontentsline{toc}{section}{#2} + \pdfsection{#2} + \@input{#1.ind}} + +% PDF stuff -- no longer needed, Hyperref does the job + +\def\pdfchapterfold#1#2{} +\def\pdfchapter#1{} +\def\pdfsection#1{} + +%\ifpdf +%\newcount\pdflabel +%\pdflabel=1 +%\def\pdfchapterfold#1#2{ +%\pdfdest num \pdflabel fit +%\pdfoutline goto num \pdflabel count #1 {\arabic{chapter}. #2} +%\global\advance\pdflabel by 1 +%} +%\def\pdfsection#1{ +%\pdfdest num \pdflabel fit +%\pdfoutline goto num \pdflabel {#1} +%\global\advance\pdflabel by 1 +%} +%\else +%\def\pdfchapterfold#1#2{} +%\def\pdfsection#1{} +%\fi +% +%\def\pdfchapter{\pdfchapterfold{0}} + +%%% Pour camlidl + +\def\transl#1{$[\![\mbox{#1}]\!]$} + +%%% Pour les references des modules +\newcommand{\moduleref}[1]{\ref{#1}} +%%% Fin des hacks + +\makeatother + +% Pour le chapitre ocamlbuild +\newcommand{\mathscr}[1]{{\mathcal{#1}}} +\newcommand{\ocb}{\texttt{ocamlbuild}\xspace} +\newcommand{\tags}{\texttt{\_tags}\xspace} diff --git a/manual/manual/manual.hva b/manual/manual/manual.hva new file mode 100644 index 0000000000..4c8d59d93e --- /dev/null +++ b/manual/manual/manual.hva @@ -0,0 +1,4 @@ +\input{book.hva} +\input{fancysection.hva} +\input{macros.hva} +\newif\ifouthtml\outhtmltrue
\ No newline at end of file diff --git a/manual/manual/manual.inf b/manual/manual/manual.inf new file mode 100644 index 0000000000..4681ce7e14 --- /dev/null +++ b/manual/manual/manual.inf @@ -0,0 +1,120 @@ +\input{book.hva} +\renewcommand{\@indexsection}[1]{\chapter{#1}} +\newcommand{\black}{\htmlcolor{#000000}} +\newcommand{\machine}{\tt} +\newenvironment{machineenv}{\begin{alltt}}{\end{alltt}} +\newenvironment{camlunder}{\@style{U}}{} +\newcommand{\caml}{\begin{alltt}\renewcommand{\\}{\char92}\def\<{\begin{camlunder}}\def\>{\end{camlunder}}\activebracefalse} +\newcommand{\endcaml}{\activebracetrue\end{alltt}} +\newcommand{\?}{\black\#\blue } +\renewcommand{\:}{\maroon} +\newcommand{\var}[1]{\textit{#1}} + +\newenvironment{library}{}{} +\newcounter{page} +\newenvironment{comment}{\begin{quote}}{\end{quote}} +\newcommand{\nth}[2]{\({#1}_{#2}\)} +\newenvironment{options}{\begin{description}}{\end{description}} + + +%%venant de macros.tex +\newif\ifouthtml\outhtmlfalse +\def\versionspecific#1{ +\quad\textsf{#1:} +\begin{quote}} + +\def\unix{\versionspecific{Unix}} +\def\endunix{\end{quote}} +\def\macos{\versionspecific{MacOS}} +\def\endmacos{\end{quote}} +\def\windows{\versionspecific{Windows}} +\def\endwindows{\end{quote}} + +\def\requirements{\trivlist \item[\hskip\labelsep {\bf Requirements.}]} +\def\endrequirements{\endtrivlist} +\def\installation{\trivlist \item[\hskip\labelsep {\bf Installation.}]} +\def\endinstallation{\endtrivlist} +\def\troubleshooting{\trivlist \item[\hskip\labelsep {\bf Troubleshooting.}]} +\def\endtroubleshooting{\endtrivlist} + +\newtheorem{gcrule}{Rule} + +% Pour les tables de priorites et autres tableaux a deux colonnes, encadres + +%\def\entree#1#2{#1 & #2 \\} +%\def\tableau#1#2#3{% +%\par\begin{center}% +%\begin{tabular}{#1}% +%\multicolumn{1}{c}{\textbf{#2}} & +%\multicolumn{1}{c}{\textbf{#3}} \\ +%%#2 & #3 \\% +%}% +%\def\endtableau{\end{tabular}\end{center}\par} + +% Pour les tables de priorites et autres tableaux a deux colonnes, encadres + +\def\tableau#1#2#3{% +\begin{center} +\begin{tabular}{#1} +\hline +\multicolumn{1}{|c|}{\textbf{#2}} & \multicolumn{1}{c|}{\textbf{#3}} \\ +\hline +} +\def\endtableau{\hline\end{tabular}\end{center}} +\def\entree#1#2{#1 & #2 \\} + + + +% L'environnement library (pour composer les descriptions des modules +% de bibliotheque). + + +\def\restoreindent{\begingroup\let\@listI=\@savedlistI} +\def\endrestoreindent{\endgroup} + + +% PDF stuff + +\def\pdfchapterfold#1#2{} +\def\pdfsection#1{} +\def\pdfchapter{\pdfchapterfold{0}} + +%%% Pour camlidl + +\def\transl#1{$[\![\mbox{#1}]\!]$} + +% Pour l'index +\usepackage{multind} +\let\indexentry=\index +\renewcommand{\index}[1]{\indexentry{\jobname}{#1}} +\def\ikwd#1{\indexentry{\jobname.kwd}{#1}} + + +% nth +\def\th{^{\mbox{\scriptsize th}}} +\renewcommand{\hbox}[1]{\mbox{#1}} + +% Notations pour les metavariables +\def\nmth#1#2#3{\({#1}_{#2}^{#3}\)} +\def\optvar#1{[\var{#1}\/]} +\def\event{§§} +\def\fromoneto#1#2{$#1 = 1,\ldots{} , #2$} + +\newcommand{\vfill}{} +\def\number{} +\def\year{2013} + +% Pour alltt + +\def\rminalltt#1{{\rm #1}} + +\def\goodbreak{\ \\} + +\def\@savedlistI{} + +%% +% Pour le chapitre ocamlbuild +\newcommand{\mathscr}[1]{{\mathcal{#1}}} +\newcommand{\ocb}{\texttt{ocamlbuild}\xspace} +\newcommand{\tags}{\texttt{\_tags}\xspace} +\newcommand{\@cup}{|\_|} diff --git a/manual/manual/manual.info.header b/manual/manual/manual.info.header new file mode 100644 index 0000000000..7466515837 --- /dev/null +++ b/manual/manual/manual.info.header @@ -0,0 +1,4 @@ +INFO-DIR-SECTION OCaml Programming Language +START-INFO-DIR-ENTRY +* ocaml: (ocaml). OCaml Reference Manual +END-INFO-DIR-ENTRY diff --git a/manual/manual/manual.tex b/manual/manual/manual.tex new file mode 100644 index 0000000000..f50cff41fa --- /dev/null +++ b/manual/manual/manual.tex @@ -0,0 +1,30 @@ +\documentclass[11pt]{book} +\usepackage[latin1]{inputenc} +%HEVEA\@def@charset{US-ASCII}% +\usepackage{alltt} +\usepackage{fullpage} +\usepackage{syntaxdef} +\usepackage{multind} +\usepackage{html} +\usepackage{textcomp} +\usepackage{caml-sl} +\usepackage{ocamldoc} +\usepackage{xspace} +\newif\ifplaintext +\plaintextfalse +%\newif\ifpdf +%\pdffalse +\input{macros.tex} + +\usepackage{hyperref} +%\makeatletter \def\@wrindex#1#2{\xdef \@indexfile{\csname #1@idxfile\endcsname}\@@wrindex#2||\\}\makeatother +\def\th{^{\hbox{\scriptsize th}}} + +\raggedbottom +\input{version.tex} + +%HEVEA\setcounter{cuttingdepth}{1} +%HEVEA\title{The OCaml system, release \ocamlversion} +\input{allfiles.tex} + + diff --git a/manual/manual/pdfmanual.tex b/manual/manual/pdfmanual.tex new file mode 100644 index 0000000000..98db4e6e28 --- /dev/null +++ b/manual/manual/pdfmanual.tex @@ -0,0 +1,31 @@ +%\pdfoutput=1 +\pdfpagewidth=21cm +\pdfpageheight=11in +\pdfcompresslevel=7 + +\documentclass[11pt]{book} + +\usepackage[latin1]{inputenc} +\usepackage{alltt} +\usepackage{fullpage} +\usepackage{syntaxdef} +\usepackage{multind} +\usepackage{html} +\usepackage{textcomp} +\usepackage{caml-sl} +\usepackage{ocamldoc} +\usepackage{xspace} + +\newif\ifplaintext +\plaintextfalse +%\newif\ifpdf +%\pdftrue +\input macros.tex + +\usepackage{hyperref} +\def\th{^{\hbox{\scriptsize th}}} + +\raggedbottom +\input{version.tex} + +\input allfiles.tex diff --git a/manual/manual/plaintext.tex b/manual/manual/plaintext.tex new file mode 100644 index 0000000000..86201b4b05 --- /dev/null +++ b/manual/manual/plaintext.tex @@ -0,0 +1,17 @@ +\documentclass[11pt]{report} + +\usepackage{plaintext} +\usepackage[latin1]{inputenc} +\usepackage{alltt} +\usepackage{fullpage} +\usepackage{syntaxdef} +\usepackage{multind} +\usepackage{html} +\usepackage{caml-sl} + +\newif\ifplaintext +\plaintexttrue +%\newif\ifpdf +%\pdffalse +\input macros.tex +\input allfiles.tex diff --git a/manual/manual/refman/.cvsignore b/manual/manual/refman/.cvsignore new file mode 100644 index 0000000000..81ccbe7105 --- /dev/null +++ b/manual/manual/refman/.cvsignore @@ -0,0 +1,2 @@ +*.tex +*.htex diff --git a/manual/manual/refman/.gitignore b/manual/manual/refman/.gitignore new file mode 100644 index 0000000000..81ccbe7105 --- /dev/null +++ b/manual/manual/refman/.gitignore @@ -0,0 +1,2 @@ +*.tex +*.htex diff --git a/manual/manual/refman/Makefile b/manual/manual/refman/Makefile new file mode 100644 index 0000000000..641537b139 --- /dev/null +++ b/manual/manual/refman/Makefile @@ -0,0 +1,21 @@ +FILES= refman.tex lex.tex names.tex values.tex const.tex types.tex \ + patterns.tex expr.tex typedecl.tex modtypes.tex modules.tex compunit.tex \ + exten.tex classes.tex + +TRANSF=../../tools/transf +TEXQUOTE=../../tools/texquote2 + +ALLFILES=$(FILES) + +all: $(ALLFILES) + +clean: + rm -f $(ALLFILES) + +.SUFFIXES: +.SUFFIXES: .etex .tex + +.etex.tex: + $(TRANSF) < $*.etex | $(TEXQUOTE) > $*.tex + +$(ALLFILES): $(TRANSF) $(TEXQUOTE) diff --git a/manual/manual/refman/classes.etex b/manual/manual/refman/classes.etex new file mode 100644 index 0000000000..925e1f7033 --- /dev/null +++ b/manual/manual/refman/classes.etex @@ -0,0 +1,507 @@ +\section{Classes} +\pdfsection{Classes} +%HEVEA\cutname{classes.html} +Classes are defined using a small language, similar to the module +language. + +\subsection{Class types} + +Class types are the class-level equivalent of type expressions: they +specify the general shape and type properties of classes. + +\ikwd{object\@\texttt{object}} +\ikwd{end\@\texttt{end}} +\ikwd{inherit\@\texttt{inherit}} +\ikwd{val\@\texttt{val}} +\ikwd{mutable\@\texttt{mutable}} +\ikwd{method\@\texttt{method}} +\ikwd{private\@\texttt{private}} +\ikwd{virtual\@\texttt{virtual}} +\ikwd{constraint\@\texttt{constraint}} + +\begin{syntax} +class-type: + [['?']label-name':'] typexpr '->' class-type + | class-body-type +; +class-body-type: + 'object' ['(' typexpr ')'] {class-field-spec} 'end' + | ['[' typexpr {',' typexpr} ']'] classtype-path +; +%\end{syntax} \begin{syntax} +class-field-spec: + 'inherit' class-body-type + | 'val' ['mutable'] ['virtual'] inst-var-name ':' typexpr + | 'val' 'virtual' 'mutable' inst-var-name ':' typexpr + | 'method' ['private'] ['virtual'] method-name ':' poly-typexpr + | 'method' 'virtual' 'private' method-name ':' poly-typexpr + | 'constraint' typexpr '=' typexpr +\end{syntax} + +\subsubsection*{Simple class expressions} + +The expression @classtype-path@ is equivalent to the class type bound to +the name @classtype-path@. Similarly, the expression +@'[' typexpr_1 ',' \ldots typexpr_n ']' classtype-path@ is equivalent to +the parametric class type bound to the name @classtype-path@, in which +type parameters have been instantiated to respectively @typexpr_1@, +\ldots @typexpr_n@. + +\subsubsection*{Class function type} + +The class type expression @typexpr '->' class-type@ is the type of +class functions (functions from values to classes) that take as +argument a value of type @typexpr@ and return as result a class of +type @class-type@. + +\subsubsection*{Class body type} + +The class type expression +@'object' ['(' typexpr ')'] {class-field-spec} 'end'@ +is the type of a class body. It specifies its instance variables and +methods. In this type, @typexpr@ is matched against the self type, therefore +providing a name for the self type. + +A class body will match a class body type if it provides definitions +for all the components specified in the class body type, and these +definitions meet the type requirements given in the class body type. +Furthermore, all methods either virtual or public present in the class +body must also be present in the class body type (on the other hand, some +instance variables and concrete private methods may be omitted). A +virtual method will match a concrete method, which makes it possible +to forget its implementation. An immutable instance variable will match a +mutable instance variable. + +\subsubsection*{Inheritance} + +\ikwd{inherit\@\texttt{inherit}} + +The inheritance construct @'inherit' class-body-type@ provides for inclusion of +methods and instance variables from other class types. +The instance variable and method types from @class-body-type@ are added +into the current class type. + +\subsubsection*{Instance variable specification} + +\ikwd{val\@\texttt{val}} +\ikwd{mutable\@\texttt{mutable}} +\ikwd{virtual\@\texttt{virtual}} + +A specification of an instance variable is written +@'val' ['mutable'] ['virtual'] inst-var-name ':' typexpr@, where +@inst-var-name@ +is the name of the instance variable and @typexpr@ its expected type. +% +The flag @'mutable'@ indicates whether this instance variable can be +physically modified. +% +The flag @'virtual'@ indicates that this instance variable is not +initialized. It can be initialized later through inheritance. + +An instance variable specification will hide any previous +specification of an instance variable of the same name. + +\subsubsection*{Method specification} +\label{sec-methspec} + +\ikwd{method\@\texttt{method}} +\ikwd{private\@\texttt{private}} + +The specification of a method is written +@'method' ['private'] method-name ':' poly-typexpr@, where +@method-name@ is the name of the method and @poly-typexpr@ its +expected type, possibly polymorphic. The flag @'private'@ indicates +that the method cannot be accessed from outside the object. + +The polymorphism may be left implicit in public method specifications: +any type variable which is not bound to a class parameter and does not +appear elsewhere inside the class specification will be assumed to be +universal, and made polymorphic in the resulting method type. +Writing an explicit polymorphic type will disable this behaviour. + +If several specifications are present for the same method, they +must have compatible types. +Any non-private specification of a method forces it to be public. + +\subsubsection*{Virtual method specification} + +\ikwd{virtual\@\texttt{virtual}} +\ikwd{method\@\texttt{method}} +\ikwd{private\@\texttt{private}} + +A virtual method specification is written @'method' ['private'] +'virtual' method-name ':' poly-typexpr@, where @method-name@ is the +name of the method and @poly-typexpr@ its expected type. + +\subsubsection*{Constraints on type parameters} + +\ikwd{constraint\@\texttt{constraint}} + +The construct @'constraint' typexpr_1 '=' typexpr_2@ forces the two +type expressions to be equal. This is typically used to specify type +parameters: in this way, they can be bound to specific type +expressions. + +\subsection{Class expressions} + +Class expressions are the class-level equivalent of value expressions: +they evaluate to classes, thus providing implementations for the +specifications expressed in class types. + +\ikwd{object\@\texttt{object}} +\ikwd{end\@\texttt{end}} +\ikwd{fun\@\texttt{fun}} +\ikwd{let\@\texttt{let}} +\ikwd{rec\@\texttt{rec}} +\ikwd{in\@\texttt{in}} +\ikwd{and\@\texttt{and}} +\ikwd{inherit\@\texttt{inherit}} +\ikwd{as\@\texttt{as}} +\ikwd{val\@\texttt{val}} +\ikwd{mutable\@\texttt{mutable}} +\ikwd{method\@\texttt{method}} +\ikwd{private\@\texttt{private}} +\ikwd{virtual\@\texttt{virtual}} +\ikwd{constraint\@\texttt{constraint}} +\ikwd{initializer\@\texttt{initializer}} + +\begin{syntax} +class-expr: + class-path + | '[' typexpr {',' typexpr} ']' class-path + | '(' class-expr ')' + | '(' class-expr ':' class-type ')' + | class-expr {{argument}} + | 'fun' {{parameter}} '->' class-expr + | 'let' ['rec'] let-binding {'and' let-binding} 'in' class-expr + | 'object' class-body 'end' +; +%BEGIN LATEX +\end{syntax} \begin{syntax} +%END LATEX +class-field: + 'inherit' class-expr ['as' lowercase-ident] + | 'val' ['mutable'] inst-var-name [':' typexpr] '=' expr + | 'val' ['mutable'] 'virtual' inst-var-name ':' typexpr + | 'val' 'virtual' 'mutable' inst-var-name ':' typexpr + | 'method' ['private'] method-name {parameter} [':' typexpr] '=' expr + | 'method' ['private'] method-name ':' poly-typexpr '=' expr + | 'method' ['private'] 'virtual' method-name ':' poly-typexpr + | 'method' 'virtual' 'private' method-name ':' poly-typexpr + | 'constraint' typexpr '=' typexpr + | 'initializer' expr +\end{syntax} + +\subsubsection*{Simple class expressions} + +The expression @class-path@ evaluates to the class bound to the name +@class-path@. Similarly, the expression +@'[' typexpr_1 ',' \ldots typexpr_n ']' class-path@ +evaluates to the parametric class bound to the name @class-path@, +in which type parameters have been instantiated respectively to +@typexpr_1@, \ldots @typexpr_n@. + +The expression @'(' class-expr ')'@ evaluates to the same module as +@class-expr@. + +The expression @'(' class-expr ':' class-type ')'@ checks that +@class-type@ matches the type of @class-expr@ (that is, that the +implementation @class-expr@ meets the type specification +@class-type@). The whole expression evaluates to the same class as +@class-expr@, except that all components not specified in +@class-type@ are hidden and can no longer be accessed. + +\subsubsection*{Class application} + +Class application is denoted by juxtaposition of (possibly labeled) +expressions. It denotes the class whose constructor is the first +expression applied to the given arguments. The arguments are +evaluated as for expression application, but the constructor itself will +only be evaluated when objects are created. In particular, side-effects +caused by the application of the constructor will only occur at object +creation time. + +\subsubsection*{Class function} + +The expression @'fun' [['?']label-name':']pattern '->' class-expr@ evaluates +to a function from values to classes. +When this function is applied to a value \var{v}, this value is +matched against the pattern @pattern@ and the result is the result of +the evaluation of @class-expr@ in the extended environment. + +Conversion from functions with default values to functions with +patterns only works identically for class functions as for normal +functions. + +The expression +\begin{center} +@"fun" parameter_1 \ldots parameter_n "->" class-expr@ +\end{center} +is a short form for +\begin{center} +@"fun" parameter_1 "->" \ldots "fun" parameter_n "->" expr@ +\end{center} + +\subsubsection*{Local definitions} + +The {\tt let} and {\tt let rec} constructs bind value names locally, +as for the core language expressions. + +If a local definition occurs at the very beginning of a class +definition, it will be evaluated when the class is created (just as if +the definition was outside of the class). +Otherwise, it will be evaluated when the object constructor is called. + + +\subsubsection*{Class\label{ss:class-body} body} +\begin{syntax} +class-body: ['(' pattern [':' typexpr] ')'] { class-field } +\end{syntax} +The expression +@'object' class-body 'end'@ denotes +a class body. This is the prototype for an object : it lists the +instance variables and methods of an objet of this class. + +A class body is a class value: it is not evaluated at once. Rather, +its components are evaluated each time an object is created. + +In a class body, the pattern @'(' pattern [':' typexpr] ')'@ is +matched against self, therefore providing a binding for self and self +type. Self can only be used in method and initializers. + +Self type cannot be a closed object type, so that the class remains +extensible. + +Since OCaml 4.01, it is an error if the same method or instance +variable name is defined several times in the same class body. + +\subsubsection*{Inheritance} + +\ikwd{inherit\@\texttt{inherit}} + +The inheritance construct @'inherit' class-expr@ allows reusing +methods and instance variables from other classes. The class +expression @class-expr@ must evaluate to a class body. The instance +variables, methods and initializers from this class body are added +into the current class. The addition of a method will override any +previously defined method of the same name. + +\ikwd{as\@\texttt{as}} +An ancestor can be bound by appending @'as' lowercase-ident@ +to the inheritance construct. @lowercase-ident@ is not a true +variable and can only be used to select a method, i.e. in an expression +@lowercase-ident '#' method-name@. This gives access to the +method @method-name@ as it was defined in the parent class even if it is +redefined in the current class. +The scope of this ancestor binding is limited to the current class. +The ancestor method may be called from a subclass but only indirectly. + +\subsubsection*{Instance variable definition} + +\ikwd{val\@\texttt{val}} +\ikwd{mutable\@\texttt{mutable}} + +The definition @'val' ['mutable'] inst-var-name '=' expr@ adds an +instance variable @inst-var-name@ whose initial value is the value of +expression @expr@. +% +The flag @'mutable'@ allows physical modification of this variable by +methods. + +An instance variable can only be used in the methods and +initializers that follow its definition. + +Since version 3.10, redefinitions of a visible instance variable with +the same name do not create a new variable, but are merged, using the +last value for initialization. They must have identical types and +mutability. +However, if an instance variable is hidden by +omitting it from an interface, it will be kept distinct from +other instance variables with the same name. + +\subsubsection*{Virtual instance variable definition} + +\ikwd{virtual\@\texttt{virtual}} +\ikwd{val\@\texttt{val}} +\ikwd{mutable\@\texttt{mutable}} + +A variable specification is written @'val' ['mutable'] 'virtual' +inst-var-name ':' typexpr@. It specifies whether the variable is +modifiable, and gives its type. + +Virtual instance variables were added in version 3.10. + +\subsubsection*{Method definition} + +\ikwd{method\@\texttt{method}} +\ikwd{private\@\texttt{private}} + +A method definition is written @'method' method-name '=' expr@. The +definition of a method overrides any previous definition of this +method. The method will be public (that is, not private) if any of +the definition states so. + +A private method, @'method' 'private' method-name '=' expr@, is a +method that can only be invoked on self (from other methods of the +same object, defined in this class or one of its subclasses). This +invocation is performed using the expression +@value-name '#' method-name@, where @value-name@ is directly bound to +self at the beginning of the class definition. Private methods do +not appear in object types. A method may have both public and private +definitions, but as soon as there is a public one, all subsequent +definitions will be made public. + +Methods may have an explicitly polymorphic type, allowing them to be +used polymorphically in programs (even for the same object). The +explicit declaration may be done in one of three ways: (1) by giving an +explicit polymorphic type in the method definition, immediately after +the method name, {\em i.e.} +@'method' ['private'] method-name ':' {{"'" ident}} '.' typexpr '=' +expr@; (2) by a forward declaration of the explicit polymorphic type +through a virtual method definition; (3) by importing such a +declaration through inheritance and/or constraining the type of {\em +self}. + +Some special expressions are available in method bodies for +manipulating instance variables and duplicating self: +\begin{syntax} +expr: + \ldots + | inst-var-name '<-' expr + | '{<' [ inst-var-name '=' expr { ';' inst-var-name '=' expr } [';'] ] '>}' +\end{syntax} + +The expression @inst-var-name '<-' expr@ modifies in-place the current +object by replacing the value associated to @inst-var-name@ by the +value of @expr@. Of course, this instance variable must have been +declared mutable. + +The expression +@'{<' inst-var-name_1 '=' expr_1 ';' \ldots ';' inst-var-name_n '=' expr_n '>}'@ +evaluates to a copy of the current object in which the values of +instance variables @inst-var-name_1, \ldots, inst-var-name_n@ have +been replaced by the values of the corresponding expressions @expr_1, +\ldots, expr_n@. + +\subsubsection*{Virtual method definition} + +\ikwd{virtual\@\texttt{virtual}} +\ikwd{method\@\texttt{method}} +\ikwd{private\@\texttt{private}} + +A method specification is written @'method' ['private'] 'virtual' +method-name ':' poly-typexpr@. It specifies whether the method is +public or private, and gives its type. If the method is intended to be +polymorphic, the type must be explicitly polymorphic. + +\subsubsection*{Constraints on type parameters} + +\ikwd{constraint\@\texttt{constraint}} + +The construct @'constraint' typexpr_1 '=' typexpr_2@ forces the two +type expressions to be equals. This is typically used to specify type +parameters: in that way they can be bound to specific type +expressions. + +\subsubsection*{Initializers} + +\ikwd{initializer\@\texttt{initializer}} + +A class initializer @'initializer' expr@ specifies an expression that +will be evaluated whenever an object is created from the class, once +all its instance variables have been initialized. + +\subsection{Class definitions} +\label{s:classdef} + +\ikwd{class\@\texttt{class}} +\ikwd{and\@\texttt{and}} +\ikwd{virtual\@\texttt{virtual}} + +\begin{syntax} +class-definition: + 'class' class-binding { 'and' class-binding } +; +class-binding: + ['virtual'] ['[' type-parameters ']'] class-name + {parameter} [':' class-type] \\ '=' class-expr +; +type-parameters: + "'" ident { "," "'" ident } +\end{syntax} + +A class definition @'class' class-binding { 'and' class-binding }@ is +recursive. Each @class-binding@ defines a @class-name@ that can be +used in the whole expression except for inheritance. It can also be +used for inheritance, but only in the definitions that follow its own. + +A class binding binds the class name @class-name@ to the value of +expression @class-expr@. It also binds the class type @class-name@ to +the type of the class, and defines two type abbreviations : +@class-name@ and @'#' class-name@. The first one is the type of +objects of this class, while the second is more general as it unifies +with the type of any object belonging to a subclass (see +section~\ref{s:sharp-types}). + +\subsubsection*{Virtual class} + +\ikwd{virtual\@\texttt{virtual}} +A class must be flagged virtual if one of its methods is virtual (that +is, appears in the class type, but is not actually defined). +Objects cannot be created from a virtual class. + +\subsubsection*{Type parameters} + +The class type parameters correspond to the ones of the class type and +of the two type abbreviations defined by the class binding. They must +be bound to actual types in the class definition using type +constraints. So that the abbreviations are well-formed, type +variables of the inferred type of the class must either be type +parameters or be bound in the constraint clause. + +\subsection{Class specifications} +\label{s:class-spec} + +\ikwd{class\@\texttt{class}} +\ikwd{and\@\texttt{and}} +\ikwd{virtual\@\texttt{virtual}} + +\begin{syntax} +class-specification: + 'class' class-spec { 'and' class-spec } +; +class-spec: + ['virtual'] ['[' type-parameters ']'] class-name ':' + class-type +\end{syntax} + +This is the counterpart in signatures of class definitions. +A class specification matches a class definition if they have the same +type parameters and their types match. + +\subsection{Class type definitions} +\label{s:classtype} + +\ikwd{class\@\texttt{class}} +\ikwd{type\@\texttt{type}} +\ikwd{and\@\texttt{and}} +\ikwd{virtual\@\texttt{virtual}} + +\begin{syntax} +classtype-definition: + 'class' 'type' classtype-def + { 'and' classtype-def } +; +classtype-def: + ['virtual'] ['[' type-parameters ']'] class-name '=' class-body-type +\end{syntax} + +A class type definition @'class' class-name '=' class-body-type@ +defines an abbreviation @class-name@ for the class body type +@class-body-type@. As for class definitions, two type abbreviations +@class-name@ and @'#' class-name@ are also defined. The definition can +be parameterized by some type parameters. If any method in the class +type body is virtual, the definition must be flagged @'virtual'@. + +Two class type definitions match if they have the same type parameters +and they expand to matching types. diff --git a/manual/manual/refman/compunit.etex b/manual/manual/refman/compunit.etex new file mode 100644 index 0000000000..6c8c09f790 --- /dev/null +++ b/manual/manual/refman/compunit.etex @@ -0,0 +1,42 @@ +\section{Compilation units} +\pdfsection{Compilation units} +%HEVEA\cutname{compunit.html} + +\begin{syntax} +unit-interface: { specification [';;'] } +; +unit-implementation: [ module-items ] +\end{syntax} + +Compilation units bridge the module system and the separate +compilation system. A compilation unit is composed of two parts: an +interface and an implementation. The interface contains a sequence of +specifications, just as the inside of a @'sig' \ldots 'end'@ +signature expression. The implementation contains a sequence of +definitions and expressions, just as the inside of a +@'struct' \ldots 'end'@ module +expression. A compilation unit also has a name @unit-name@, derived +from the names of the files containing the interface and the +implementation (see chapter~\ref{c:camlc} for more details). A +compilation unit behaves roughly as the module definition +\begin{center} +@'module' unit-name ':' 'sig' unit-interface 'end' '=' + 'struct' unit-implementation 'end'@ +\end{center} + +A compilation unit can refer to other compilation units by their +names, as if they were regular modules. For instance, if "U" is a +compilation unit that defines a type "t", other compilation units can +refer to that type under the name "U.t"; they can also refer to "U" as +a whole structure. Except for names of other compilation units, a unit +interface or unit implementation must not have any other free variables. +In other terms, the type-checking and compilation of an interface or +implementation proceeds in the initial environment +\begin{center} +@name_1 ':' 'sig' specification_1 'end' \ldots + name_n ':' 'sig' specification_n 'end'@ +\end{center} +where @name_1 \ldots name_n@ are the names of the other +compilation units available in the search path (see +chapter~\ref{c:camlc} for more details) and @specification_1 \ldots +specification_n@ are their respective interfaces. diff --git a/manual/manual/refman/const.etex b/manual/manual/refman/const.etex new file mode 100644 index 0000000000..113077e7c0 --- /dev/null +++ b/manual/manual/refman/const.etex @@ -0,0 +1,26 @@ +\section{Constants} +\pdfsection{Constants} +%HEVEA\cutname{const.html} + +\begin{syntax} +constant: + integer-literal + | float-literal + | char-literal + | string-literal + | constr + | "false" + | "true" + | "("")" + | "begin" "end" + | "[""]" + | "[|""|]" + | "`"tag-name +\end{syntax} + +The syntactic class of constants comprises literals from the four +base types (integers, floating-point numbers, characters, character +strings), and constant constructors from both normal and polymorphic +variants, as well as the special constants @"false"@, @"true"@, @"("")"@, +@"[""]"@, and @"[|""|]"@, which behave like constant constructors, and +@"begin" "end"@, which is equivalent to @'('')'@. diff --git a/manual/manual/refman/directive.etex b/manual/manual/refman/directive.etex new file mode 100644 index 0000000000..c942667089 --- /dev/null +++ b/manual/manual/refman/directive.etex @@ -0,0 +1,25 @@ +\section{Directives} \label{s:directives} + +\begin{syntax} +directive: + '#' 'open' string + | '#' 'close' string + | '#' ident string +\end{syntax} + +Directives control the behavior of the compiler. They apply to the +remainder of the current compilation unit. + +The two directives \verb"#open" and \verb"#close" modify the list of +opened modules, that the compiler uses to complete unqualified +identifiers, as described in section~\ref{s:names}. The directive +@'#open' string@ adds the module whose name is given by the string +constant @string@ to the list of opened modules, in first position. +The directive @'#close' string@ removes the first occurrence of the +module whose name is given by the string constant @string@ from the +list of opened modules. + +Implementations can provide other directives, provided they follow the +syntax @'#' ident string@, where @ident@ is the name of the directive, +and the string constant @string@ is the argument to the directive. The +behavior of these additional directives is implementation-dependent. diff --git a/manual/manual/refman/expr.etex b/manual/manual/refman/expr.etex new file mode 100644 index 0000000000..4e9548ff84 --- /dev/null +++ b/manual/manual/refman/expr.etex @@ -0,0 +1,837 @@ +\section{Expressions\label{s:value-expr}} +\pdfsection{Expressions} +%HEVEA\cutname{expr.html} +\ikwd{in\@\texttt{in}|see{\texttt{let}}} +\ikwd{and\@\texttt{and}|see{\texttt{let}, \texttt{type}, \texttt{class}}} +\ikwd{rec\@\texttt{rec}|see{\texttt{let}}} +\ikwd{let\@\texttt{let}} +\ikwd{try\@\texttt{try}} +\ikwd{function\@\texttt{function}} +\ikwd{fun\@\texttt{fun}} +\ikwd{with\@\texttt{with}|see{\texttt{match}, \texttt{try}}} +\ikwd{done\@\texttt{done}|see{\texttt{while}, \texttt{for}}} +\ikwd{do\@\texttt{do}|see{\texttt{while}, \texttt{for}}} +\ikwd{downto\@\texttt{downto}|see{\texttt{for}}} +\ikwd{to\@\texttt{to}|see{\texttt{for}}} +\ikwd{for\@\texttt{for}} +\ikwd{else\@\texttt{else}|see{\texttt{if}}} +\ikwd{then\@\texttt{then}|see{\texttt{if}}} +\ikwd{if\@\texttt{if}} +\ikwd{of\@\texttt{of}|see{\texttt{type}, \texttt{exception}}} +\ikwd{or\@\texttt{or}} +\ikwd{match\@\texttt{match}} +\ikwd{begin\@\texttt{begin}} +\ikwd{end\@\texttt{end}} +\ikwd{when\@\texttt{when}} +\ikwd{new\@\texttt{new}} +\ikwd{object\@\texttt{object}} + +\begin{syntax} +expr: + value-path + | constant + | '(' expr ')' + | 'begin' expr 'end' + | '(' expr ':' typexpr ')' + | expr {{',' expr}} + | constr expr + | "`"tag-name expr + | expr '::' expr + | '[' expr { ';' expr } [';'] ']' + | '[|' expr { ';' expr } [';'] '|]' + | '{' field '=' expr { ';' field '=' expr } [';'] '}' + | '{' expr 'with' field '=' expr { ';' field '=' expr } [';'] '}' + | expr {{ argument }} + | prefix-symbol expr + | '-' expr + | '-.' expr + | expr infix-op expr + | expr '.' field + | expr '.' field '<-' expr + | expr '.(' expr ')' + | expr '.(' expr ')' '<-' expr + | expr '.[' expr ']' + | expr '.[' expr ']' '<-' expr + | 'if' expr 'then' expr [ 'else' expr ] + | 'while' expr 'do' expr 'done' + | 'for' value-name '=' expr ( 'to' || 'downto' ) expr 'do' expr 'done' + | expr ';' expr + | 'match' expr 'with' pattern-matching + | 'function' pattern-matching + | 'fun' {{ parameter }} '->' expr + | 'try' expr 'with' pattern-matching + | 'let' ['rec'] let-binding { 'and' let-binding } 'in' expr + | 'new' class-path + | 'object' class-body 'end' + | expr '#' method-name + | inst-var-name + | inst-var-name '<-' expr + | '(' expr ':>' typexpr ')' + | '(' expr ':' typexpr ':>' typexpr ')' + | '{<' [ inst-var-name '=' expr { ';' inst-var-name '=' expr } [';'] ] '>}' +; +%BEGIN LATEX +\end{syntax} \begin{syntax} +%END LATEX +argument: + expr + | '~' label-name + | '~' label-name ':' expr + | '?' label-name + | '?' label-name ':' expr +; +%\end{syntax} \begin{syntax} +pattern-matching: + [ '|' ] pattern ['when' expr] '->' expr + { '|' pattern ['when' expr] '->' expr } +; +let-binding: + pattern '=' expr + | value-name { parameter } [':' typexpr] [':>' typexpr] '=' expr +; +parameter: + pattern + | '~' label-name + | '~' '(' label-name [':' typexpr] ')' + | '~' label-name ':' pattern + | '?' label-name + | '?' '(' label-name [':' typexpr] ['=' expr] ')' + | '?' label-name ':' pattern + | '?' label-name ':' '(' pattern [':' typexpr] ['=' expr] ')' +\end{syntax} + +The table below shows the relative precedences and associativity of +operators and non-closed constructions. The constructions with higher +precedence come first. For infix and prefix symbols, we write +``"*"\ldots'' to mean ``any symbol starting with "*"''. +\ikwd{or\@\texttt{or}}% +\ikwd{if\@\texttt{if}}% +\ikwd{fun\@\texttt{fun}}% +\ikwd{function\@\texttt{function}}% +\ikwd{match\@\texttt{match}}% +\ikwd{try\@\texttt{try}}% +\ikwd{let\@\texttt{let}}% +\begin{tableau}{|l|l|}{Construction or operator}{Associativity} +\entree{prefix-symbol}{--} +\entree{". .( .[ .{" (see section~\ref{s:bigarray-access})}{--} +\entree{"#"\ldots}{--} +\entree{function application, constructor application, tag + application, "assert" (see~\ref{s:assert}), + "lazy" (see~\ref{s:lazy})}{left} +\entree{"- -." (prefix)}{--} +\entree{"**"\ldots" lsl lsr asr"}{right} +\entree{"*"\ldots" /"\ldots" %"\ldots" mod land lor lxor"}{left} + %% "`"@ident@"`" +\entree{"+"\ldots" -"\ldots}{left} +\entree{"::"}{right} +\entree{{\tt \char64}\ldots " ^"\ldots}{right} +\entree{"="\ldots" <"\ldots" >"\ldots" |"\ldots" &"\ldots" $"\ldots" !="}{left} +\entree{"& &&"}{right} +\entree{"or ||"}{right} +\entree{","}{--} +\entree{"<- :="}{right} +\entree{"if"}{--} +\entree{";"}{right} +\entree{"let match fun function try"}{--} +\end{tableau} + +\subsection{Basic expressions} + +\subsubsection*{Constants} + +An expression consisting in a constant evaluates to this constant. + +\subsubsection*{Value paths} \label{expr:var} + +An expression consisting in an access path evaluates to the value bound to +this path in the current evaluation environment. The path can +be either a value name or an access path to a value component of a module. + +\subsubsection*{Parenthesized expressions} +\ikwd{begin\@\texttt{begin}} +\ikwd{end\@\texttt{end}} + +The expressions @'(' expr ')'@ and @'begin' expr 'end'@ have the same +value as @expr@. The two constructs are semantically equivalent, but it +is good style to use @'begin' \ldots 'end'@ inside control structures: +\begin{alltt} + if \ldots then begin \ldots ; \ldots end else begin \ldots ; \ldots end +\end{alltt} +and @'(' \ldots ')'@ for the other grouping situations. + +Parenthesized expressions can contain a type constraint, as in @'(' +expr ':' typexpr ')'@. This constraint forces the type of @expr@ to be +compatible with @typexpr@. + +Parenthesized expressions can also contain coercions +@'(' expr [':' typexpr] ':>' typexpr')'@ (see +subsection~\ref{s:coercions} below). + + +\subsubsection*{Function application} + +Function application is denoted by juxtaposition of (possibly labeled) +expressions. The expression @expr argument_1 \ldots argument_n@ +evaluates the expression @expr@ and those appearing in @argument_1@ +to @argument_n@. The expression @expr@ must evaluate to a +functional value $f$, which is then applied to the values of +@argument_1, \ldots, argument_n@. + +The order in which the expressions @expr, argument_1, \ldots, +argument_n@ are evaluated is not specified. + +Arguments and parameters are matched according to their respective +labels. Argument order is irrelevant, except among arguments with the +same label, or no label. + +If a parameter is specified as optional (label prefixed by @"?"@) in the +type of @expr@, the corresponding argument will be automatically +wrapped with the constructor "Some", except if the argument itself is +also prefixed by @"?"@, in which case it is passed as is. +% +If a non-labeled argument is passed, and its corresponding parameter +is preceded by one or several optional parameters, then these +parameters are {\em defaulted}, {\em i.e.} the value "None" will be +passed for them. +% +All other missing parameters (without corresponding argument), both +optional and non-optional, will be kept, and the result of the +function will still be a function of these missing parameters to the +body of $f$. + +As a special case, if the function has a known arity, all the +arguments are unlabeled, and their number matches the number of +non-optional parameters, then labels are ignored and non-optional +parameters are matched in their definition order. Optional arguments +are defaulted. + +In all cases but exact match of order and labels, without optional +parameters, the function type should be known at the application +point. This can be ensured by adding a type constraint. Principality +of the derivation can be checked in the "-principal" mode. + +\subsubsection*{Function definition} + +Two syntactic forms are provided to define functions. The first form +is introduced by the keyword "function": +\ikwd{function\@\texttt{function}} + +$$\begin{array}{rlll} +\token{function} & \textsl{pattern}_1 & \token{->} & \textsl{expr}_1 \\ +\token{|} & \ldots \\ +\token{|} & \textsl{pattern}_n & \token{->} & \textsl{expr}_n +\end{array}$$ +This expression evaluates to a functional value with one argument. +When this function is applied to a value \var{v}, this value is +matched against each pattern @pattern_1@ to @pattern_n@. +If one of these matchings succeeds, that is, if the value \var{v} +matches the pattern @pattern_i@ for some \var{i}, +then the expression @expr_i@ associated to the selected pattern +is evaluated, and its value becomes the value of the function +application. The evaluation of @expr_i@ takes place in an +environment enriched by the bindings performed during the matching. + +If several patterns match the argument \var{v}, the one that occurs +first in the function definition is selected. If none of the patterns +matches the argument, the exception "Match_failure" is raised. +% +\index{Matchfailure\@\verb`Match_failure`} + +\medskip + +The other form of function definition is introduced by the keyword "fun": +\ikwd{fun\@\texttt{fun}} +\begin{center} +@"fun" parameter_1 \ldots parameter_n "->" expr@ +\end{center} +This expression is equivalent to: +\begin{center} +@"fun" parameter_1 "->" \ldots "fun" parameter_n "->" expr@ +\end{center} + +The parameter patterns @"~"lab@ and @"~("lab [":" typ]")"@ +are shorthands for respectively @"~"lab":"lab@ and +@"~"lab":("lab [":" typ]")"@, and similarly for their optional +counterparts. + +A function of the form @"fun" "?" lab ":(" pattern '=' expr_0 ')' '->' +expr@ is equivalent to +\begin{center} +@"fun" "?" lab ":" ident '->' + "let" pattern '=' + "match" ident "with" "Some" ident "->" ident '|' "None" '->' expr_0 + "in" expr@ +\end{center} +where @ident@ +is a fresh variable, except that it is unspecified when @expr_0@ is evaluated. + +After these two transformations, expressions are of the form +\begin{center} +@"fun" [label_1] pattern_1 "->" \ldots "fun" [label_n] pattern_n "->" expr@ +\end{center} +If we ignore labels, which will only be meaningful at function +application, this is equivalent to +\begin{center} +@"function" pattern_1 "->" \ldots "function" pattern_n "->" expr@ +\end{center} +That is, the @"fun"@ expression above evaluates to a curried function +with \var{n} arguments: after applying this function $n$ times to the +values @@v@_1 \ldots @v@_n@, the values will be matched +in parallel against the patterns @pattern_1 \ldots pattern_n@. +If the matching succeeds, the function returns the value of @expr@ in +an environment enriched by the bindings performed during the matchings. +If the matching fails, the exception "Match_failure" is raised. + +\subsubsection*{Guards in pattern-matchings} + +\ikwd{when\@\texttt{when}} +The cases of a pattern matching (in the @"function"@, @"match"@ and +@"try"@ constructs) can include guard expressions, which are +arbitrary boolean expressions that must evaluate to "true" for the +match case to be selected. Guards occur just before the @"->"@ token and +are introduced by the @"when"@ keyword: + +$$\begin{array}{rlll} +\token{function} & \nt{pattern}_1 \; [\token{when} \; \nt{cond}_1] & \token{->} & \nt{expr}_1 \\ +\token{|} & \ldots \\ +\token{|} & \nt{pattern}_n \; [\token{when} \; \nt{cond}_n] & \token{->} & \nt{expr}_n +\end{array}$$ + + +Matching proceeds as described before, except that if the value +matches some pattern @pattern_i@ which has a guard @@cond@_i@, then the +expression @@cond@_i@ is evaluated (in an environment enriched by the +bindings performed during matching). If @@cond@_i@ evaluates to "true", +then @expr_i@ is evaluated and its value returned as the result of the +matching, as usual. But if @@cond@_i@ evaluates to "false", the matching +is resumed against the patterns following @pattern_i@. + +\subsubsection*{Local definitions} \label{s:localdef} + +\ikwd{let\@\texttt{let}} + +The @"let"@ and @"let" "rec"@ constructs bind value names locally. +The construct +\begin{center} +@"let" pattern_1 "=" expr_1 "and" \ldots "and" pattern_n "=" expr_n "in" expr@ +\end{center} +evaluates @expr_1 \ldots expr_n@ in some unspecified order and matches +their values against the patterns @pattern_1 \ldots pattern_n@. If the +matchings succeed, @expr@ is evaluated in the environment enriched by +the bindings performed during matching, and the value of @expr@ is +returned as the value of the whole @"let"@ expression. If one of the +matchings fails, the exception "Match_failure" is raised. +% +\index{Matchfailure\@\verb`Match_failure`} + +An alternate syntax is provided to bind variables to functional +values: instead of writing +\begin{center} +@"let" ident "=" "fun" parameter_1 \ldots parameter_m "->" expr@ +\end{center} +in a @"let"@ expression, one may instead write +\begin{center} +@"let" ident parameter_1 \ldots parameter_m "=" expr@ +\end{center} + +\medskip +\noindent +Recursive definitions of names are introduced by @"let" "rec"@: +\begin{center} +@"let" "rec" pattern_1 "=" expr_1 "and" \ldots "and" pattern_n "=" expr_n + "in" expr@ +\end{center} +The only difference with the @"let"@ construct described above is +that the bindings of names to values performed by the +pattern-matching are considered already performed when the expressions +@expr_1@ to @expr_n@ are evaluated. That is, the expressions @expr_1@ +to @expr_n@ can reference identifiers that are bound by one of the +patterns @pattern_1, \ldots, pattern_n@, and expect them to have the +same value as in @expr@, the body of the @"let" "rec"@ construct. + +The recursive definition is guaranteed to behave as described above if +the expressions @expr_1@ to @expr_n@ are function definitions +(@"fun" \ldots@ or @"function" \ldots@), and the patterns @pattern_1 +\ldots pattern_n@ are just value names, as in: +\begin{center} +@"let" "rec" name_1 "=" "fun" \ldots +"and" \ldots +"and" name_n "=" "fun" \ldots +"in" expr@ +\end{center} +This defines @name_1 \ldots name_n@ as mutually recursive functions +local to @expr@. + +The behavior of other forms of @"let" "rec"@ definitions is +implementation-dependent. The current implementation also supports +a certain class of recursive definitions of non-functional values, +as explained in section~\ref{s:letrecvalues}. + +\subsection{Control structures} + +\subsubsection*{Sequence} + +The expression @expr_1 ";" expr_2@ evaluates @expr_1@ first, then +@expr_2@, and returns the value of @expr_2@. + +\subsubsection*{Conditional} +\ikwd{if\@\texttt{if}} + +The expression @"if" expr_1 "then" expr_2 "else" expr_3@ evaluates to +the value of @expr_2@ if @expr_1@ evaluates to the boolean @"true"@, +and to the value of @expr_3@ if @expr_1@ evaluates to the boolean +@"false"@. + +The @"else" expr_3@ part can be omitted, in which case it defaults to +@"else" "()"@. + +\subsubsection*{Case expression}\ikwd{match\@\texttt{match}} + +The expression +$$\begin{array}{rlll} +\token{match} & \textsl{expr} \\ +\token{with} & \textsl{pattern}_1 & \token{->} & \textsl{expr}_1 \\ +\token{|} & \ldots \\ +\token{|} & \textsl{pattern}_n & \token{->} & \textsl{expr}_n +\end{array}$$ +matches the value of @expr@ against the patterns @pattern_1@ to +@pattern_n@. If the matching against @pattern_i@ succeeds, the +associated expression @expr_i@ is evaluated, and its value becomes the +value of the whole @'match'@ expression. The evaluation of +@expr_i@ takes place in an environment enriched by the bindings +performed during matching. If several patterns match the value of +@expr@, the one that occurs first in the @'match'@ expression is +selected. If none of the patterns match the value of @expr@, the +exception "Match_failure" is raised. +% +\index{Matchfailure\@\verb`Match_failure`} + +\subsubsection*{Boolean operators} + +The expression @expr_1 '&&' expr_2@ evaluates to @'true'@ if both +@expr_1@ and @expr_2@ evaluate to @'true'@; otherwise, it evaluates to +@'false'@. The first component, @expr_1@, is evaluated first. The +second component, @expr_2@, is not evaluated if the first component +evaluates to @'false'@. Hence, the expression @expr_1 '&&' expr_2@ behaves +exactly as +\begin{center} +@'if' expr_1 'then' expr_2 'else' 'false'@. +\end{center} + +The expression @expr_1 '||' expr_2@ evaluates to @'true'@ if one of +the expressions +@expr_1@ and @expr_2@ evaluates to @'true'@; otherwise, it evaluates to +@'false'@. The first component, @expr_1@, is evaluated first. The +second component, @expr_2@, is not evaluated if the first component +evaluates to @'true'@. Hence, the expression @expr_1 '||' expr_2@ behaves +exactly as +\begin{center} +@'if' expr_1 'then' 'true' 'else' expr_2@. +\end{center} + +\ikwd{or\@\texttt{or}} +The boolean operators @'&'@ and @'or'@ are deprecated synonyms for +(respectively) @'&&'@ and @'||'@. + +\subsubsection*{Loops} + +\ikwd{while\@\texttt{while}} +The expression @'while' expr_1 'do' expr_2 'done'@ repeatedly +evaluates @expr_2@ while @expr_1@ evaluates to @'true'@. The loop +condition @expr_1@ is evaluated and tested at the beginning of each +iteration. The whole @'while' \ldots 'done'@ expression evaluates to +the unit value @'()'@. + +\ikwd{for\@\texttt{for}} +The expression @'for' name '=' expr_1 'to' expr_2 'do' expr_3 'done'@ +first evaluates the expressions @expr_1@ and @expr_2@ (the boundaries) +into integer values \var{n} and \var{p}. Then, the loop body @expr_3@ is +repeatedly evaluated in an environment where @name@ is successively +bound to the values + $n$, $n+1$, \ldots, $p-1$, $p$. + The loop body is never evaluated if $n > p$. + + +The expression @'for' name '=' expr_1 'downto' expr_2 'do' expr_3 'done'@ +evaluates similarly, except that @name@ is successively bound to the values + $n$, $n-1$, \ldots, $p+1$, $p$. + The loop body is never evaluated if $n < p$. + + +In both cases, the whole @'for'@ expression evaluates to the unit +value @'()'@. + +\subsubsection*{Exception handling} +\ikwd{try\@\texttt{try}} + +The expression +$$\begin{array}{rlll} +\token{try~} & \textsl{expr} \\ +\token{with} & \textsl{pattern}_1 & \token{->} & \textsl{expr}_1 \\ +\token{|} & \ldots \\ +\token{|} & \textsl{pattern}_n & \token{->} & \textsl{expr}_n +\end{array}$$ +evaluates the expression @expr@ and returns its value if the +evaluation of @expr@ does not raise any exception. If the evaluation +of @expr@ raises an exception, the exception value is matched against +the patterns @pattern_1@ to @pattern_n@. If the matching against +@pattern_i@ succeeds, the associated expression @expr_i@ is evaluated, +and its value becomes the value of the whole @'try'@ expression. The +evaluation of @expr_i@ takes place in an environment enriched by the +bindings performed during matching. If several patterns match the value of +@expr@, the one that occurs first in the @'try'@ expression is +selected. If none of the patterns matches the value of @expr@, the +exception value is raised again, thereby transparently ``passing +through'' the @'try'@ construct. + +\subsection{Operations on data structures} + +\subsubsection*{Products} + +The expression @expr_1 ',' \ldots ',' expr_n@ evaluates to the +\var{n}-tuple of the values of expressions @expr_1@ to @expr_n@. The +evaluation order of the subexpressions is not specified. + +\subsubsection*{Variants} + +The expression @constr expr@ evaluates to the unary variant value +whose constructor is @constr@, and whose argument is the value of +@expr@. Similarly, the expression @constr '(' expr_1 ',' \ldots ',' +expr_n ')'@ evaluates to the n-ary variant value whose constructor is +@constr@ and whose arguments are the values of @expr_1, \ldots, +expr_n@. + +The expression @constr '('expr_1, \ldots, expr_n')'@ evaluates to the +variant value whose constructor is @constr@, and whose arguments are +the values of @expr_1 \ldots expr_n@. + +For lists, some syntactic sugar is provided. The expression +@expr_1 '::' expr_2@ stands for the constructor @'(' '::' ')' @ +applied to the arguments @'(' expr_1 ',' expr_2 ')'@, and therefore +evaluates to the list whose head is the value of @expr_1@ and whose tail +is the value of @expr_2@. The expression @'[' expr_1 ';' \ldots ';' +expr_n ']'@ is equivalent to @expr_1 '::' \ldots '::' expr_n '::' +'[]'@, and therefore evaluates to the list whose elements are the +values of @expr_1@ to @expr_n@. + +\subsubsection*{Polymorphic variants} + +The expression @"`"tag-name expr@ evaluates to the polymorphic variant +value whose tag is @tag-name@, and whose argument is the value of @expr@. + +\subsubsection*{Records} + +The expression @'{' field_1 '=' expr_1 ';' \ldots ';' field_n '=' +expr_n '}'@ evaluates to the record value +$\{ field_1 = v_1; \ldots; field_n = v_n \}$ +where $v_i$ is the value of @expr_i@ for \fromoneto{i}{n}. +The fields @field_1@ to @field_n@ must all belong to the same record +type; each field of this record type must appear exactly +once in the record expression, though they can appear in any +order. The order in which @expr_1@ to @expr_n@ are evaluated is not +specified. + +The expression +@"{" expr "with" field_1 "=" expr_1 ";" \ldots ";" field_n "=" expr_n "}"@ +builds a fresh record with fields @field_1 \ldots field_n@ equal to +@expr_1 \ldots expr_n@, and all other fields having the same value as +in the record @expr@. In other terms, it returns a shallow copy of +the record @expr@, except for the fields @field_1 \ldots field_n@, +which are initialized to @expr_1 \ldots expr_n@. + +The expression @expr_1 '.' field@ evaluates @expr_1@ to a record +value, and returns the value associated to @field@ in this record +value. + +The expression @expr_1 '.' field '<-' expr_2@ evaluates @expr_1@ to a record +value, which is then modified in-place by replacing the value +associated to @field@ in this record by the value of +@expr_2@. This operation is permitted only if @field@ has been +declared @'mutable'@ in the definition of the record type. The whole +expression @expr_1 '.' field '<-' expr_2@ evaluates to the unit value +@'()'@. + +\subsubsection*{Arrays} + +The expression @'[|' expr_1 ';' \ldots ';' expr_n '|]'@ evaluates to +a \var{n}-element array, whose elements are initialized with the values of +@expr_1@ to @expr_n@ respectively. The order in which these +expressions are evaluated is unspecified. + +The expression @expr_1 '.(' expr_2 ')'@ returns the value of element +number @expr_2@ in the array denoted by @expr_1@. The first element +has number 0; the last element has number $n-1$, where \var{n} is the +size of the array. The exception "Invalid_argument" is raised if the +access is out of bounds. + +The expression @expr_1 '.(' expr_2 ')' '<-' expr_3@ modifies in-place +the array denoted by @expr_1@, replacing element number @expr_2@ by +the value of @expr_3@. The exception "Invalid_argument" is raised if +the access is out of bounds. The value of the whole expression is @'()'@. + +\subsubsection*{Strings} + +The expression @expr_1 '.[' expr_2 ']'@ returns the value of character +number @expr_2@ in the string denoted by @expr_1@. The first character +has number 0; the last character has number $n-1$, where \var{n} is the +length of the string. The exception "Invalid_argument" is raised if the +access is out of bounds. + +The expression @expr_1 '.[' expr_2 ']' '<-' expr_3@ modifies in-place +the string denoted by @expr_1@, replacing character number @expr_2@ by +the value of @expr_3@. The exception "Invalid_argument" is raised if +the access is out of bounds. The value of the whole expression is @'()'@. + +{\bf Note:} this possibility is offered only for backward +compatibility with older versions of OCaml and will be removed in a +future version. New code should use byte sequences and the "Bytes.set" +function. + +\subsection{Operators} + +Symbols from the class @infix-symbol@, as well as the keywords +@"*"@, @"+"@, @"-"@, @'-.'@, @"="@, @"!="@, @"<"@, @">"@, @"or"@, @"||"@, +@"&"@, @"&&"@, @":="@, @"mod"@, @"land"@, @"lor"@, @"lxor"@, @"lsl"@, @"lsr"@, +and @"asr"@ can appear in infix position (between two +expressions). Symbols from the class @prefix-symbol@, as well as +the keywords @"-"@ and @"-."@ +can appear in prefix position (in front of an expression). + +Infix and prefix symbols do not have a fixed meaning: they are simply +interpreted as applications of functions bound to the names +corresponding to the symbols. The expression @prefix-symbol expr@ is +interpreted as the application @'(' prefix-symbol ')' +expr@. Similarly, the expression @expr_1 infix-symbol expr_2@ is +interpreted as the application @'(' infix-symbol ')' expr_1 expr_2@. + +The table below lists the symbols defined in the initial environment +and their initial meaning. (See the description of the core +library module "Pervasives" in chapter~\ref{c:corelib} for more +details). Their meaning may be changed at any time using +@"let" "(" infix-op ")" name_1 name_2 "=" \ldots@ + +Note: the operators @'&&'@, @'||'@, and @'~-'@ are handled specially +and it is not advisable to change their meaning. + +The keywords @'-'@ and @'-.'@ can appear both as infix and +prefix operators. When they appear as prefix operators, they are +interpreted respectively as the functions @'(~-)'@ and @'(~-.)'@. + +%% Conversely, a regular function identifier can also be used as an infix +%% operator by enclosing it in backquotes: @expr_1 '`' ident '`' expr_2@ +%% is interpreted as the application @ident expr_1 expr_2@. + +\begin{tableau}{|l|p{12cm}|}{Operator}{Initial meaning} +\entree{"+"}{Integer addition.} +\entree{"-" (infix)}{Integer subtraction.} +\entree{"~- -" (prefix)}{Integer negation.} +\entree{"*"}{Integer multiplication.} +\entree{"/"}{Integer division. + Raise "Division_by_zero" if second argument is zero.} +\entree{"mod"}{Integer modulus. Raise + "Division_by_zero" if second argument is zero.} +\entree{"land"}{Bitwise logical ``and'' on integers.} +\entree{"lor"}{Bitwise logical ``or'' on integers.} +\entree{"lxor"}{Bitwise logical ``exclusive or'' on integers.} +\entree{"lsl"}{Bitwise logical shift left on integers.} +\entree{"lsr"}{Bitwise logical shift right on integers.} +\entree{"asr"}{Bitwise arithmetic shift right on integers.} +\entree{"+."}{Floating-point addition.} +\entree{"-." (infix)}{Floating-point subtraction.} +\entree{"~-. -." (prefix)}{Floating-point negation.} +\entree{"*."}{Floating-point multiplication.} +\entree{"/."}{Floating-point division.} +\entree{"**"}{Floating-point exponentiation.} +\entree{{\tt\char64} }{List concatenation.} +\entree{"^" }{String concatenation.} +\entree{"!" }{Dereferencing (return the current + contents of a reference).} +\entree{":="}{Reference assignment (update the + reference given as first argument with the value of the second + argument).} +\entree{"=" }{Structural equality test.} +\entree{"<>" }{Structural inequality test.} +\entree{"==" }{Physical equality test.} +\entree{"!=" }{Physical inequality test.} +\entree{"<" }{Test ``less than''.} +\entree{"<=" }{Test ``less than or equal''.} +\entree{">" }{Test ``greater than''.} +\entree{">=" }{Test ``greater than or equal''.} +\entree{"&& &"}{Boolean conjunction.} +\entree{"|| or"}{Boolean disjunction.} +\end{tableau} + +\subsection{Objects} \label{s:objects} + +\subsubsection*{Object creation} + +\ikwd{new\@\texttt{new}} + +When @class-path@ evaluates to a class body, @'new' class-path@ +evaluates to a new object containing the instance variables and +methods of this class. + +When @class-path@ evaluates to a class function, @'new' class-path@ +evaluates to a function expecting the same number of arguments and +returning a new object of this class. + +\subsubsection*{Immediate object creation} + +\ikwd{object\@\texttt{object}} + +Creating directly an object through the @'object' class-body 'end'@ +construct is operationally equivalent to defining locally a @'class' +class-name '=' 'object' class-body 'end'@ ---see sections +\ref{ss:class-body} and following for the syntax of @class-body@--- +and immediately creating a single object from it by @'new' class-name@. + +The typing of immediate objects is slightly different from explicitly +defining a class in two respects. First, the inferred object type may +contain free type variables. Second, since the class body of an +immediate object will never be extended, its self type can be unified +with a closed object type. + +\subsubsection*{Method invocation} + +The expression @expr '#' method-name@ invokes the method +@method-name@ of the object denoted by @expr@. + +If @method-name@ is a polymorphic method, its type should be known at +the invocation site. This is true for instance if @expr@ is the name +of a fresh object (@'let' ident = 'new' class-path \dots @) or if +there is a type constraint. Principality of the derivation can be +checked in the "-principal" mode. + +\subsubsection*{Accessing and modifying instance variables} + +The instance variables of a class are visible only in the body of the +methods defined in the same class or a class that inherits from the +class defining the instance variables. The expression @inst-var-name@ +evaluates to the value of the given instance variable. The expression +@inst-var-name '<-' expr@ assigns the value of @expr@ to the instance +variable @inst-var-name@, which must be mutable. The whole expression +@inst-var-name '<-' expr@ evaluates to @"()"@. + + +\subsubsection*{Object duplication} + +An object can be duplicated using the library function "Oo.copy" +(see +\ifouthtml \ahref{libref/Oo.html}{Module \texttt{Oo}}\else +section~\ref{Oo}\fi). Inside a method, the expression +@ '{<' inst-var-name '=' expr { ';' inst-var-name '=' expr } '>}'@ +returns a copy of self with the given instance variables replaced by +the values of the associated expressions; other instance variables +have the same value in the returned object as in self. + +\subsection{Coercions} \label{s:coercions} + +Expressions whose type contains object or polymorphic variant types +can be explicitly coerced (weakened) to a supertype. +% +The expression @'('expr ':>' typexpr')'@ coerces the expression @expr@ +to type @typexpr@. +% +The expression @'('expr ':' typexpr_1 ':>' typexpr_2')'@ coerces the +expression @expr@ from type @typexpr_1@ to type @typexpr_2@. + +The former operator will sometimes fail to coerce an expression @expr@ +from a type @typ_1@ to a type @typ_2@ +even if type @typ_1@ is a subtype of type +@typ_2@: in the current implementation it only expands two levels of +type abbreviations containing objects and/or polymorphic variants, +keeping only recursion when it is explicit in the class type (for objects). +As an exception to the above algorithm, if both the inferred type of @expr@ +and @typ@ are ground ({\em i.e.} do not contain type variables), the +former operator behaves as the latter one, taking the inferred type of +@expr@ as @typ_1@. In case of failure with the former operator, +the latter one should be used. + +It is only possible to coerce an expression @expr@ from type +@typ_1@ to type @typ_2@, if the type of @expr@ is an instance of +@typ_1@ (like for a type annotation), and @typ_1@ is a subtype +of @typ_2@. The type of the coerced expression is an +instance of @typ_2@. If the types contain variables, +they may be instantiated by the subtyping algorithm, but this is only +done after determining whether @typ_1@ is a potential subtype of +@typ_2@. This means that typing may fail during this latter +unification step, even if some instance of @typ_1@ is a subtype of +some instance of @typ_2@. +% +In the following paragraphs we describe the subtyping relation used. + +\subsubsection*{Object types} + +A fixed object type admits as subtype any object type that includes all +its methods. The types of the methods shall be subtypes of those in +the supertype. Namely, +\begin{center} +@ '<' met_1 ':' typ_1 ';' \dots ';' met_n ':' typ_n '>' @ +\end{center} +is a supertype of +\begin{center} +@ '<' met_1 ':' typ@$'_1$@ ';' \dots ';' met_n ':' typ@$'_n$@ ';' +met@$_{n+1}$@ ':' typ@$'_{n+1}$@ ';' \dots ';' met@$_{n+m}$@ ':' typ@$'_{n+m}$@ +~[';' '..'] '>' @ +\end{center} +which may contain an ellipsis ".." if every @typ_i@ is a supertype of +the corresponding @typ@$'_i$. + +A monomorphic method type can be a supertype of a polymorphic method +type. Namely, if @typ@ is an instance of @typ@$'$, then @ "'"@a@_1 +\dots "'"@a@_n '.' typ@$'$ is a subtype of @typ@. + +Inside a class definition, newly defined types are not available for +subtyping, as the type abbreviations are not yet completely +defined. There is an exception for coercing @@self@@ to the (exact) +type of its class: this is allowed if the type of @@self@@ does not +appear in a contravariant position in the class type, {\em i.e.} if +there are no binary methods. + +\subsubsection*{Polymorphic variant types} + +A polymorphic variant type @typ@ is a subtype of another polymorphic +variant type @typ@$'$ if the upper bound of @typ@ ({\em i.e.} the +maximum set of constructors that may appear in an instance of @typ@) +is included in the lower bound of @typ@$'$, and the types of arguments +for the constructors of @typ@ are subtypes of those in +@typ@$'$. Namely, +\begin{center} +@ "["["<"] "`"C_1 "of" typ_1 "|" \dots "|" "`"C_n "of" typ_n "]" @ +\end{center} +which may be a shrinkable type, is a subtype of +\begin{center} +@ "["[">"] "`"C_1 "of" typ@$'_1$@ "|" \dots "|" "`"C_n "of" typ@$'_n$@ + "|" "`"C@$_{n+1}$@ "of" typ@$'_{n+1}$@ "|" \dots "|" "`"C@$_{n+m}$@ "of" + typ@$'_{n+m}$@ "]" @ +\end{center} +which may be an extensible type, if every @typ_i@ is a subtype of @typ@$'_i$. + +\subsubsection*{Variance} + +Other types do not introduce new subtyping, but they may propagate the +subtyping of their arguments. For instance, @typ_1 "*" typ_2@ is a +subtype of @typ@$'_1$@ "*" typ@$'_2$ when @typ_1@ and @typ_2@ are +respectively subtypes of @typ@$'_1$ and @typ@$'_2$. +For function types, the relation is more subtle: +@typ_1 "->" typ_2@ is a subtype of @typ@$'_1$@~"->" typ@$'_2$ +if @typ_1@ is a supertype of @typ@$'_1$ and @typ_2@ is a +subtype of @typ@$'_2$. For this reason, function types are covariant in +their second argument (like tuples), but contravariant in their first +argument. Mutable types, like "array" or "ref" are neither covariant +nor contravariant, they are nonvariant, that is they do not propagate +subtyping. + +For user-defined types, the variance is automatically inferred: a +parameter is covariant if it has only covariant occurrences, +contravariant if it has only contravariant occurrences, +variance-free if it has no occurrences, and nonvariant otherwise. +A variance-free parameter may change freely through subtyping, it does +not have to be a subtype or a supertype. +% +For abstract and private types, the variance must be given explicitly +(see section~\ref{s:type-defs}), +otherwise the default is nonvariant. This is also the case for +constrained arguments in type definitions. + +%% \newpage diff --git a/manual/manual/refman/exten.etex b/manual/manual/refman/exten.etex new file mode 100644 index 0000000000..83d6a23a00 --- /dev/null +++ b/manual/manual/refman/exten.etex @@ -0,0 +1,2059 @@ +\chapter{Language extensions} \label{c:extensions} +\pdfchapter{Language extensions} +%HEVEA\cutname{extn.html} + +This chapter describes language extensions and convenience features +that are implemented in OCaml, but not described in the +OCaml reference manual. + +\section{Integer literals for types \texttt{int32}, \texttt{int64} + and \texttt{nativeint}} + +(Introduced in Objective Caml 3.07) + +\begin{syntax} +constant: ... + | int32-literal + | int64-literal + | nativeint-literal +; +int32-literal: integer-literal 'l' +; +int64-literal: integer-literal 'L' +; +nativeint-literal: integer-literal 'n' +\end{syntax} + +An integer literal can be followed by one of the letters "l", "L" or "n" +to indicate that this integer has type "int32", "int64" or "nativeint" +respectively, instead of the default type "int" for integer literals. +\index{int32\@\verb`int32`} +\index{int64\@\verb`int64`} +\index{nativeint\@\verb`nativeint`} +The library modules "Int32"[\moduleref{Int32}], +"Int64"[\moduleref{Int64}] and "Nativeint"[\moduleref{Nativeint}] +provide operations on these integer types. + +\section{Streams and stream parsers} \label{s:streams} + +(Removed in Objective Caml 3.03) + +The syntax for streams and stream parsers is no longer part of the +OCaml language, but available through a Camlp4 syntax +extension. See the Camlp4 reference manual for more information. +Support for basic operations on streams is still available through the +"Stream"[\moduleref{Stream}] module of the standard library. +OCaml programs +that use the stream parser syntax should be compiled with the +"-pp camlp4o" option to "ocamlc" and "ocamlopt". For interactive use, +run "ocaml" and issue the \verb'#load "dynlink.cma";;' command, +followed by the \verb'#load "camlp4o.cma";;' command. + +\section{Recursive definitions of values} \label{s:letrecvalues} + +(Introduced in Objective Caml 1.00) + +As mentioned in section~\ref{s:localdef}, the @'let' 'rec'@ binding +construct, in addition to the definition of recursive functions, +also supports a certain class of recursive definitions of +non-functional values, such as +\begin{center} +@"let" "rec" name_1 "=" "1" "::" name_2 +"and" name_2 "=" "2" "::" name_1 +"in" expr@ +\end{center} +which binds @name_1@ to the cyclic list "1::2::1::2::"\ldots, and +@name_2@ to the cyclic list "2::1::2::1::"\ldots +Informally, the class of accepted definitions consists of those +definitions where the defined names occur only inside function +bodies or as argument to a data constructor. + +More precisely, consider the expression: +\begin{center} +@"let" "rec" name_1 "=" expr_1 "and" \ldots "and" name_n "=" expr_n "in" expr@ +\end{center} +It will be accepted if each one of @expr_1 \ldots expr_n@ is +statically constructive with respect to @name_1 \ldots name_n@, +is not immediately linked to any of @name_1 \ldots name_n@, +and is not an array constructor whose arguments have abstract type. + +An expression @@e@@ is said to be {\em statically constructive +with respect to} the variables @name_1 \ldots name_n@ if at least +one of the following conditions is true: +\begin{itemize} +\item @@e@@ has no free occurrence of any of @name_1 \ldots name_n@ +\item @@e@@ is a variable +\item @@e@@ has the form @"fun" \ldots "->" \ldots@ +\item @@e@@ has the form @"function" \ldots "->" \ldots@ +\item @@e@@ has the form @"lazy" "(" \ldots ")"@ +\item @@e@@ has one of the following forms, where each one of + @expr_1 \ldots expr_m@ is statically constructive with respect to + @name_1 \ldots name_n@, and @expr_0@ is statically constructive with + respect to @name_1 \ldots name_n, xname_1 \ldots xname_m@: + \begin{itemize} + \item @"let" ["rec"] xname_1 "=" expr_1 "and" \ldots + "and" xname_m "=" expr_m "in" expr_0@ + \item @"let" "module" \ldots "in" expr_1@ + \item @constr "("expr_1"," \ldots "," expr_m")"@ + \item @"`"tag-name "("expr_1"," \ldots "," expr_m")"@ + \item @"[|" expr_1";" \ldots ";" expr_m "|]"@ + \item @"{" field_1 "=" expr_1";" \ldots ";" field_m = expr_m "}"@ + \item @"{" expr_1 "with" field_2 "=" expr_2";" \ldots ";" + field_m = expr_m "}"@ where @expr_1@ is not immediately + linked to @name_1 \ldots name_n@ + \item @"(" expr_1"," \ldots "," expr_m ")"@ + \item @expr_1";" \ldots ";" expr_m@ + \end{itemize} +\end{itemize} + +An expression @@e@@ is said to be {\em immediately linked to} the variable +@name@ in the following cases: +\begin{itemize} +\item @@e@@ is @name@ +\item @@e@@ has the form @expr_1";" \ldots ";" expr_m@ where @expr_m@ + is immediately linked to @name@ +\item @@e@@ has the form @"let" ["rec"] xname_1 "=" expr_1 "and" \ldots + "and" xname_m "=" expr_m "in" expr_0@ where @expr_0@ is immediately + linked to @name@ or to one of the @xname_i@ such that @expr_i@ + is immediately linked to @name@. +\end{itemize} + +\section{Range patterns} \label{s:range-patterns} + +(Introduced in Objective Caml 1.00) + +\begin{syntax} +pattern: ... + | char-literal '..' char-literal +\end{syntax} + +In patterns, OCaml recognizes the form +@"'" @c@ "'" ".." "'" @d@ "'"@ as shorthand for the pattern +\begin{center} +@"'" @c@ "'" "|" "'" @c@_1 "'" "|" "'" @c@_2 "'" "|" \ldots + "|" "'" @c@_n "'" "|" "'" @d@ "'"@ +\end{center} +where \nth{c}{1}, \nth{c}{2}, \ldots, \nth{c}{n} are the characters +that occur between \var{c} and \var{d} in the ASCII character set. For +instance, the pattern "'0'"@'..'@"'9'" matches all characters that are digits. + +\section{Assertion checking}\label{s:assert} +\ikwd{assert\@\texttt{assert}} + +(Introduced in Objective Caml 1.06) + +\begin{syntax} +expr: ... + | 'assert' expr +\end{syntax} + +OCaml supports the @"assert"@ construct to check debugging assertions. +The expression @"assert" expr@ evaluates the expression @expr@ and +returns @"()"@ if @expr@ evaluates to @"true"@. If it evaluates to +@"false"@ the exception +"Assert_failure" is raised with the source file name and the +location of @expr@ as arguments. Assertion +checking can be turned off with the "-noassert" compiler option. In +this case, @expr@ is not evaluated at all. + +As a special case, @"assert false"@ is reduced to +@'raise' '('@"Assert_failure ..."@')'@, which gives it a polymorphic +type. This means that it can be used in place of any expression (for +example as a branch of any pattern-matching). It also means that +the @"assert false"@ ``assertions'' cannot be turned off by the +"-noassert" option. +% +\index{Assertfailure\@\verb`Assert_failure`} + + +\section{Lazy evaluation} \label{s:lazy} +\ikwd{lazy\@\texttt{lazy}} + +\subsection{Lazy expressions} +(Introduced in Objective Caml 1.06) + +\begin{syntax} +expr: ... + | 'lazy' expr +\end{syntax} + +The expression @"lazy" expr@ returns a value \var{v} of type "Lazy.t" that +encapsulates the computation of @expr@. The argument @expr@ is not +evaluated at this point in the program. Instead, its evaluation will +be performed the first time the function "Lazy.force" is applied to the value +\var{v}, returning the actual value of @expr@. Subsequent applications +of "Lazy.force" to \var{v} do not evaluate @expr@ again. Applications +of "Lazy.force" may be implicit through pattern matching (see below). + +\subsection{Lazy patterns} +(Introduced in Objective Caml 3.11) + +\begin{syntax} +pattern: ... + | 'lazy' pattern +\end{syntax} + +The pattern @"lazy" pattern@ matches a value \var{v} of type "Lazy.t", +provided @pattern@ matches the result of forcing \var{v} with +"Lazy.force". A successful match of a pattern containing @"lazy"@ +sub-patterns forces the corresponding parts of the value being matched, even +those that imply no test such as @"lazy" value-name@ or @"lazy" "_"@. +Matching a value with a @pattern-matching@ where some patterns +contain @"lazy"@ sub-patterns may imply forcing parts of the value, +even when the pattern selected in the end has no @"lazy"@ sub-pattern. + +For more information, see the description of module "Lazy" in the +standard library ( +\ifouthtml +\ahref{libref/Lazy.html}{Module \texttt{Lazy}}\else section~\ref{Lazy}\fi). +% +\index{Lazy (module)\@\verb`Lazy` (module)}% +\index{force\@\verb`force`}% + +\section{Local modules} +\ikwd{let\@\texttt{let}} +\ikwd{module\@\texttt{module}} + +(Introduced in Objective Caml 2.00) + +\begin{syntax} +expr: + ... + | 'let' 'module' module-name { '(' module-name ':' module-type ')' } + [ ':' module-type ] \\ '=' module-expr 'in' expr +\end{syntax} + +The expression +@"let" "module" module-name "=" module-expr "in" expr@ +locally binds the module expression @module-expr@ to the identifier +@module-name@ during the evaluation of the expression @expr@. +It then returns the value of @expr@. For example: +\begin{verbatim} + let remove_duplicates comparison_fun string_list = + let module StringSet = + Set.Make(struct type t = string + let compare = comparison_fun end) in + StringSet.elements + (List.fold_right StringSet.add string_list StringSet.empty) +\end{verbatim} + +\section{Recursive modules} \label{s-recursive-modules} +\ikwd{module\@\texttt{module}} +\ikwd{rec\@\texttt{rec}} + +(Introduced in Objective Caml 3.07) + +% TODO: relaxed syntax + +\begin{syntax} +definition: + ... + | 'module' 'rec' module-name ':' module-type '=' module-expr \\ + { 'and' module-name ':' module-type '=' module-expr } +; +specification: + ... + | 'module' 'rec' module-name ':' module-type + { 'and' module-name':' module-type } +\end{syntax} + +Recursive module definitions, introduced by the @"module rec"@ \ldots +@"and"@ \ldots\ construction, generalize regular module definitions +@'module' module-name '=' module-expr@ and module specifications +@'module' module-name ':' module-type@ by allowing the defining +@module-expr@ and the @module-type@ to refer recursively to the module +identifiers being defined. A typical example of a recursive module +definition is: +\begin{verbatim} + module rec A : sig + type t = Leaf of string | Node of ASet.t + val compare: t -> t -> int + end + = struct + type t = Leaf of string | Node of ASet.t + let compare t1 t2 = + match (t1, t2) with + (Leaf s1, Leaf s2) -> Pervasives.compare s1 s2 + | (Leaf _, Node _) -> 1 + | (Node _, Leaf _) -> -1 + | (Node n1, Node n2) -> ASet.compare n1 n2 + end + and ASet : Set.S with type elt = A.t + = Set.Make(A) +\end{verbatim} +It can be given the following specification: +\begin{verbatim} + module rec A : sig + type t = Leaf of string | Node of ASet.t + val compare: t -> t -> int + end + and ASet : Set.S with type elt = A.t +\end{verbatim} + +This is an experimental extension of OCaml: the class of +recursive definitions accepted, as well as its dynamic semantics are +not final and subject to change in future releases. + +Currently, the compiler requires that all dependency cycles between +the recursively-defined module identifiers go through at least one +``safe'' module. A module is ``safe'' if all value definitions that +it contains have function types @typexpr_1 '->' typexpr_2@. Evaluation of a +recursive module definition proceeds by building initial values for +the safe modules involved, binding all (functional) values to +@'fun' '_' '->' 'raise' @"Undefined_recursive_module". The defining +module expressions are then evaluated, and the initial values +for the safe modules are replaced by the values thus computed. If a +function component of a safe module is applied during this computation +(which corresponds to an ill-founded recursive definition), the +"Undefined_recursive_module" exception is raised. + +Note that, in the @specification@ case, the @module-type@s must be +parenthesized if they use the @'with' mod-constraint@ construct. + +\section{Private types} +\ikwd{private\@\texttt{private}} + +Private type declarations in module signatures, of the form +"type t = private ...", enable libraries to +reveal some, but not all aspects of the implementation of a type to +clients of the library. In this respect, they strike a middle ground +between abstract type declarations, where no information is revealed +on the type implementation, and data type definitions and type +abbreviations, where all aspects of the type implementation are +publicized. Private type declarations come in three flavors: for +variant and record types (section~\ref{s-private-types-variant}), +for type abbreviations (section~\ref{s-private-types-abbrev}), +and for row types (section~\ref{s-private-rows}). + +\subsection{Private variant and record types} \label{s-private-types-variant} + +(Introduced in Objective Caml 3.07) + +\begin{syntax} +type-representation: + ... + | '=' 'private' [ '|' ] constr-decl { '|' constr-decl } + | '=' 'private' '{' field-decl { ';' field-decl } [ ';' ] '}' +\end{syntax} + +Values of a variant or record type declared @"private"@ +can be de-structured normally in pattern-matching or via +the @expr '.' field@ notation for record accesses. However, values of +these types cannot be constructed directly by constructor application +or record construction. Moreover, assignment on a mutable field of a +private record type is not allowed. + +The typical use of private types is in the export signature of a +module, to ensure that construction of values of the private type always +go through the functions provided by the module, while still allowing +pattern-matching outside the defining module. For example: +\begin{verbatim} + module M : sig + type t = private A | B of int + val a : t + val b : int -> t + end + = struct + type t = A | B of int + let a = A + let b n = assert (n > 0); B n + end +\end{verbatim} +Here, the @"private"@ declaration ensures that in any value of type +"M.t", the argument to the "B" constructor is always a positive integer. + +With respect to the variance of their parameters, private types are +handled like abstract types. That is, if a private type has +parameters, their variance is the one explicitly given by prefixing +the parameter by a `"+"' or a `"-"', it is invariant otherwise. + +\subsection{Private type abbreviations} \label{s-private-types-abbrev} + +(Introduced in Objective Caml 3.11) + +\begin{syntax} +type-equation: + ... + | '=' 'private' typexpr +\end{syntax} + +Unlike a regular type abbreviation, a private type abbreviation +declares a type that is distinct from its implementation type @typexpr@. +However, coercions from the type to @typexpr@ are permitted. +Moreover, the compiler ``knows'' the implementation type and can take +advantage of this knowledge to perform type-directed optimizations. +For ambiguity reasons, @typexpr@ cannot be an object or polymorphic +variant type, but a similar behaviour can be obtained through private +row types. + +The following example uses a private type abbreviation to define a +module of nonnegative integers: +\begin{verbatim} + module N : sig + type t = private int + val of_int: int -> t + val to_int: t -> int + end + = struct + type t = int + let of_int n = assert (n >= 0); n + let to_int n = n + end +\end{verbatim} +The type "N.t" is incompatible with "int", ensuring that nonnegative +integers and regular integers are not confused. However, if "x" has +type "N.t", the coercion "(x :> int)" is legal and returns the +underlying integer, just like "N.to_int x". Deep coercions are also +supported: if "l" has type "N.t list", the coercion "(l :> int list)" +returns the list of underlying integers, like "List.map N.to_int l" +but without copying the list "l". + +Note that the coercion @"(" expr ":>" typexpr ")"@ is actually an abbreviated +form, +and will only work in presence of private abbreviations if neither the +type of @expr@ nor @typexpr@ contain any type variables. If they do, +you must use the full form @"(" expr ":" typexpr_1 ":>" typexpr_2 ")"@ where +@typexpr_1@ is the expected type of @expr@. Concretely, this would be "(x : +N.t :> int)" and "(l : N.t list :> int list)" for the above examples. + +\subsection{Private row types} \label{s-private-rows} +\ikwd{private\@\texttt{private}} + +(Introduced in Objective Caml 3.09) + +\begin{syntax} +type-equation: + ... + | '=' 'private' typexpr +\end{syntax} + +Private row types are type abbreviations where part of the +structure of the type is left abstract. Concretely @typexpr@ in the +above should denote either an object type or a polymorphic variant +type, with some possibility of refinement left. If the private +declaration is used in an interface, the corresponding implementation +may either provide a ground instance, or a refined private type. +\begin{verbatim} + module M : sig type c = private < x : int; .. > val o : c end = + struct + class c = object method x = 3 method y = 2 end + let o = new c + end +\end{verbatim} +This declaration does more than hiding the "y" method, it also makes +the type "c" incompatible with any other closed object type, meaning +that only "o" will be of type "c". In that respect it behaves +similarly to private record types. But private row types are +more flexible with respect to incremental refinement. This feature can +be used in combination with functors. +\begin{verbatim} + module F(X : sig type c = private < x : int; .. > end) = + struct + let get_x (o : X.c) = o#x + end + module G(X : sig type c = private < x : int; y : int; .. > end) = + struct + include F(X) + let get_y (o : X.c) = o#y + end +\end{verbatim} + +A polymorphic variant type [t], for example +\begin{verbatim} + type t = [ `A of int | `B of bool ] +\end{verbatim} +can be refined in two ways. A definition [u] may add new field to [t], +and the declaration +\begin{verbatim} + type u = private [> t] +\end{verbatim} +will keep those new fields abstract. Construction of values of type +[u] is possible using the known variants of [t], but any +pattern-matching will require a default case to handle the potential +extra fields. Dually, a declaration [u] may restrict the fields of [t] +through abstraction: the declaration +\begin{verbatim} + type v = private [< t > `A] +\end{verbatim} +corresponds to private variant types. One cannot create a value of the +private type [v], except using the constructors that are explicitly +listed as present, "(`A n)" in this example; yet, when +patter-matching on a [v], one should assume that any of the +constructors of [t] could be present. + +Similarly to abstract types, the variance of type parameters +is not inferred, and must be given explicitly. + + +\section{Local opens} +\ikwd{let\@\texttt{let}} +\ikwd{open\@\texttt{open}} + +(Introduced in OCaml 3.12) + +\begin{syntax} +expr: + ... + | "let" "open" module-path "in" expr + | module-path '.(' expr ')' +\end{syntax} + +The expressions +@"let" "open" module-path "in" expr@ +and +@module-path'.('expr')'@ are strictly equivalent. They +locally open the module referred to by the module path @module-path@ in +the scope of the expression @expr@. + +Restricting opening to the scope of a single expression instead of a +whole structure allows one to benefit from shorter syntax to refer to +components of the opened module, without polluting the global +scope. Also, this can make the code easier to read (the open statement is +closer to where it is used) and to refactor (because the code +fragment is more self-contained). + +\paragraph{Local opens for delimited expressions} (Introduced in OCaml 4.02) + +\begin{syntax} +expr: + ... + | module-path '.[' expr ']' + | module-path '.[|' expr '|]' + | module-path '.{' expr '}' + | module-path '.{<' expr '>}' +\end{syntax} + +When the body of a local open expression is delimited by @'[' ']'@, @'[|' '|]'@, @'{' '}'@, or @'{<' '>}'@, the parentheses can be omitted. For example, @module-path'.['expr']'@ is equivalent to @module-path'.(['expr'])'@, and @module-path'.[|' expr '|]'@ is equivalent to @module-path'.([|' expr '|])'@. + +\section{Record notations} + +(Introduced in OCaml 3.12) + +\begin{syntax} +pattern: + ... + | '{' field ['=' pattern] { ';' field ['=' pattern] } [';' '_' ] [';'] '}' +; +expr: + ... + | '{' field ['=' expr] { ';' field ['=' expr] } [';'] '}' + | '{' expr 'with' field ['=' expr] { ';' field ['=' expr] } [';'] '}' +\end{syntax} + +In a record pattern or a record construction expression, a single +identifier @id@ stands for @id '=' id@, and a qualified identifier +@module-path '.' id@ stands for @module-path '.' id '=' id@. +For example, assuming +the record type +\begin{verbatim} + type point = { x: float; y: float } +\end{verbatim} +has been declared, the following expressions are equivalent: +\begin{verbatim} + let x = 1 and y = 2 in { x = x; y = y } + let x = 1 and y = 2 in { x; y } + let x = 1 and y = 2 in { x = x; y } +\end{verbatim} +Likewise, the following functions are equivalent: +\begin{verbatim} + fun {x = x; y = y} -> x + y + fun {x; y} -> x + y +\end{verbatim} + +Optionally, a record pattern can be terminated by @';' '_'@ to convey the +fact that not all fields of the record type are listed in the record +pattern and that it is intentional. By default, the compiler ignores +the @';' '_'@ annotation. If warning 9 is turned on, +the compiler will warn when a record pattern fails to list all fields of +the corresponding record type and is not terminated by @';' '_'@. +Continuing the "point" example above, +\begin{verbatim} + fun {x} -> x + 1 +\end{verbatim} +\noindent will warn if warning 9 is on, while +\begin{verbatim} + fun {x; _} -> x + 1 +\end{verbatim} +\noindent will not warn. This warning can help spot program points where +record patterns may need to be modified after new fields are added to a +record type. + +\section{Explicit polymorphic type annotations} +\ikwd{let\@\texttt{let}} + +(Introduced in OCaml 3.12) + +\begin{syntax} +let-binding: + ... + | value-name ':' poly-typexpr '=' expr +\end{syntax} + +Polymorphic type annotations in @"let"@-definitions behave in a way +similar to polymorphic methods: they explicitly require the defined +value to be polymorphic, and allow one to use this polymorphism in +recursive occurrences (when using @"let" "rec"@). Note however that this +is a normal polymorphic type, unifiable with any instance of +itself. + +There are two possible applications of this feature. One is polymorphic +recursion: +\begin{verbatim} + type 'a t = Leaf of 'a | Node of ('a * 'a) t + let rec depth : 'a. 'a t -> 'b = function + Leaf _ -> 1 + | Node x -> 1 + depth x +\end{verbatim} +Note that "'b" is not explicitly polymorphic here, and it will +actually be unified with "int". + +The other application is to ensure that some definition is sufficiently +polymorphic. +\caml +\?let id : \textquotesingle\-a. \textquotesingle\-a -> \textquotesingle\-a = \<fun x -> x+1\> ;; +\:Error: This definition has type int -> int which is less general than +\: \textquotesingle\-a. \textquotesingle\-a -> \textquotesingle\-a +\endcaml + +\section{Locally abstract types} +\ikwd{type\@\texttt{type}} +\ikwd{fun\@\texttt{fun}} + +(Introduced in OCaml 3.12) + +\begin{syntax} +parameter: + ... + | '(' "type" typeconstr-name ')' +\end{syntax} + +The expression @"fun" '(' "type" typeconstr-name ')' "->" expr@ introduces a +type constructor named @typeconstr-name@ which is considered abstract +in the scope of the sub-expression, but then replaced by a fresh type +variable. Note that contrary to what the syntax could suggest, the +expression @"fun" '(' "type" typeconstr-name ')' "->" expr@ itself does not +suspend the evaluation of @expr@ as a regular abstraction would. The +syntax has been chosen to fit nicely in the context of function +declarations, where it is generally used. It is possible to freely mix +regular function parameters with pseudo type parameters, as in: +\begin{verbatim} + let f = fun (type t) (foo : t list) -> ... +\end{verbatim} +and even use the alternative syntax for declaring functions: +\begin{verbatim} + let f (type t) (foo : t list) = ... +\end{verbatim} + +This construction is useful because the type constructor it introduces +can be used in places where a type variable is not allowed. For +instance, one can use it to define an exception in a local module +within a polymorphic function. +\begin{verbatim} + let f (type t) () = + let module M = struct exception E of t end in + (fun x -> M.E x), (function M.E x -> Some x | _ -> None) +\end{verbatim} + +Here is another example: +\begin{verbatim} + let sort_uniq (type s) (cmp : s -> s -> int) = + let module S = Set.Make(struct type t = s let compare = cmp end) in + fun l -> + S.elements (List.fold_right S.add l S.empty) +\end{verbatim} + +It is also extremely useful for first-class modules and GADTs. + +\paragraph{Polymorphic syntax} (Introduced in OCaml 4.00) + +\begin{syntax} +let-binding: + ... + | value-name ":" "type" {{ typeconstr-name }} "." typexpr "=" expr +; +class-field: + ... + | 'method' ['private'] method-name ':' 'type' + {{ typeconstr-name }} '.' typexpr '=' expr + | 'method!' ['private'] method-name ':' 'type' + {{ typeconstr-name }} '.' typexpr '=' expr +\end{syntax} + +The @"(type" typeconstr-name")"@ syntax construction by itself does not make +polymorphic the type variable it introduces, but it can be combined +with explicit polymorphic annotations where needed. +The above rule is provided as syntactic sugar to make this easier: +\begin{verbatim} + let rec f : type t1 t2. t1 * t2 list -> t1 = ... +\end{verbatim} +\noindent +is automatically expanded into +\begin{verbatim} + let rec f : 't1 't2. 't1 * 't2 list -> 't1 = + fun (type t1) (type t2) -> (... : t1 * t2 list -> t1) +\end{verbatim} + +The same feature is provided for method definitions. +The @'method!'@ form combines this extension with the +``explicit overriding'' extension described in +section~\ref{s:explicit-overriding}. + +\section{First-class modules}\label{s-first-class-modules} +\ikwd{module\@\texttt{module}} +\ikwd{val\@\texttt{val}} +\ikwd{with\@\texttt{with}} + +(Introduced in OCaml 3.12; pattern syntax and package type inference +introduced in 4.00; structural comparison of package types introduced in 4.02.) + +\begin{syntax} +typexpr: + ... + | '(''module' package-type')' +; +module-expr: + ... + | '(''val' expr [':' package-type]')' +; +expr: + ... + | '(''module' module-expr [':' package-type]')' +; +pattern: + ... + | '(''module' module-name [':' package-type]')' +; +package-type: + modtype-path + | modtype-path 'with' package-constraint { 'and' package-constraint } +; +package-constraint: + 'type' typeconstr '=' typexpr +; +\end{syntax} + +Modules are typically thought of as static components. This extension +makes it possible to pack a module as a first-class value, which can +later be dynamically unpacked into a module. + +The expression @'(' 'module' module-expr ':' package-type ')'@ converts the +module (structure or functor) denoted by module expression @module-expr@ +to a value of the core language that encapsulates this module. The +type of this core language value is @'(' 'module' package-type ')'@. +The @package-type@ annotation can be omitted if it can be inferred +from the context. + +Conversely, the module expression @'(' 'val' expr ':' package-type ')'@ +evaluates the core language expression @expr@ to a value, which must +have type @'module' package-type@, and extracts the module that was +encapsulated in this value. Again @package-type@ can be omitted if the +type of @expr@ is known. + +The pattern @'(' 'module' module-name ':' package-type ')'@ matches a +package with type @package-type@ and binds it to @module-name@. +It is not allowed in toplevel let bindings. +Again @package-type@ can be omitted if it can be inferred from the +enclosing pattern. + +The @package-type@ syntactic class appearing in the @'(' 'module' +package-type ')'@ type expression and in the annotated forms represents a +subset of module types. +This subset consists of named module types with optional constraints +of a limited form: only non-parametrized types can be specified. + +For type-checking purposes (and starting from OCaml 4.02), package types +are compared using the structural comparison of module types. + +In general, the module expression @'(' "val" expr ":" package-type +')'@ cannot be used in the body of a functor, because this could cause +unsoundness in conjunction with applicative functors. +Since OCaml 4.02, this is relaxed in two ways: +if @package-type@ does not contain nominal type declarations ({\em + i.e.} types that are created with a proper identity), then this +expression can be used anywhere, and even if it contains such types +it can be used inside the body of a generative +functor, described in section~\ref{s:generative-functors}. +It can also be used anywhere in the context of a local module binding +@'let' 'module' module-name '=' '(' "val" expr_1 ":" package-type ')' + "in" expr_2@. + +\paragraph{Basic example} A typical use of first-class modules is to +select at run-time among several implementations of a signature. +Each implementation is a structure that we can encapsulate as a +first-class module, then store in a data structure such as a hash +table: +\begin{verbatim} + module type DEVICE = sig ... end + let devices : (string, (module DEVICE)) Hashtbl.t = Hashtbl.create 17 + + module SVG = struct ... end + let _ = Hashtbl.add devices "SVG" (module SVG : DEVICE) + + module PDF = struct ... end + let _ = Hashtbl.add devices "PDF" (module PDF: DEVICE) +\end{verbatim} +We can then select one implementation based on command-line +arguments, for instance: +\begin{verbatim} + module Device = + (val (try Hashtbl.find devices (parse_cmdline()) + with Not_found -> eprintf "Unknown device %s\n"; exit 2) + : DEVICE) +\end{verbatim} +Alternatively, the selection can be performed within a function: +\begin{verbatim} + let draw_using_device device_name picture = + let module Device = + (val (Hashtbl.find_devices device_name) : DEVICE) + in + Device.draw picture +\end{verbatim} + +\paragraph{Advanced examples} +With first-class modules, it is possible to parametrize some code over the +implementation of a module without using a functor. + +\begin{verbatim} + let sort (type s) (module Set : Set.S with type elt = s) l = + Set.elements (List.fold_right Set.add l Set.empty) + val sort : (module Set.S with type elt = 'a) -> 'a list -> 'a list +\end{verbatim} + +To use this function, one can wrap the "Set.Make" functor: + +\begin{verbatim} + let make_set (type s) cmp = + let module S = Set.Make(struct + type t = s + let compare = cmp + end) in + (module S : Set.S with type elt = s) + val make_set : ('a -> 'a -> int) -> (module Set.S with type elt = 'a) +\end{verbatim} + +\iffalse +Another advanced use of first-class module is to encode existential +types. In particular, they can be used to simulate generalized +algebraic data types (GADT). To demonstrate this, we first define a type +of witnesses for type equalities: + +\begin{verbatim} + module TypEq : sig + type ('a, 'b) t + val apply: ('a, 'b) t -> 'a -> 'b + val refl: ('a, 'a) t + val sym: ('a, 'b) t -> ('b, 'a) t + end = struct + type ('a, 'b) t = ('a -> 'b) * ('b -> 'a) + let refl = (fun x -> x), (fun x -> x) + let apply (f, _) x = f x + let sym (f, g) = (g, f) + end +\end{verbatim} + +We can then define a parametrized algebraic data type whose +constructors provide some information about the type parameter: + +\begin{verbatim} + module rec Typ : sig + module type PAIR = sig + type t and t1 and t2 + val eq: (t, t1 * t2) TypEq.t + val t1: t1 Typ.typ + val t2: t2 Typ.typ + end + + type 'a typ = + | Int of ('a, int) TypEq.t + | String of ('a, string) TypEq.t + | Pair of (module PAIR with type t = 'a) + end = Typ +\end{verbatim} + +Values of type "'a typ" are supposed to be runtime representations for +the type "'a". The constructors "Int" and "String" are easy: they +directly give a witness of type equality between the parameter "'a" +and the ground types "int" (resp. "string"). The constructor "Pair" is +more complex. One wants to give a witness of type equality between +"'a" and a type of the form "t1 * t2" together with the representations +for "t1" and "t2". However, these two types are unknown. The code above +shows how to use first-class modules to simulate existentials. + +Here is how to construct values of type "'a typ": + +\begin{verbatim} + let int = Typ.Int TypEq.refl + + let str = Typ.String TypEq.refl + + let pair (type s1) (type s2) t1 t2 = + let module P = struct + type t = s1 * s2 + type t1 = s1 + type t2 = s2 + let eq = TypEq.refl + let t1 = t1 + let t2 = t2 + end in + let pair = (module P : Typ.PAIR with type t = s1 * s2) in + Typ.Pair pair +\end{verbatim} + +And finally, here is an example of a polymorphic function that takes the +runtime representation of some type "'a" and a value of the same type, +then pretty-prints the value into a string: + +\begin{verbatim} + open Typ + let rec to_string: 'a. 'a Typ.typ -> 'a -> string = + fun (type s) t x -> + match t with + | Int eq -> string_of_int (TypEq.apply eq x) + | String eq -> Printf.sprintf "%S" (TypEq.apply eq x) + | Pair p -> + let module P = (val p : PAIR with type t = s) in + let (x1, x2) = TypEq.apply P.eq x in + Printf.sprintf "(%s,%s)" (to_string P.t1 x1) (to_string P.t2 x2) +\end{verbatim} + +Note that this function uses an explicit polymorphic annotation to obtain +polymorphic recursion. +\fi + +\section{Recovering the type of a module} + +\ikwd{module\@\texttt{module}} +\ikwd{type\@\texttt{type}} +\ikwd{include\@\texttt{include}} + +(Introduced in OCaml 3.12) + +\begin{syntax} +module-type: + ... + | 'module' 'type' 'of' module-expr +\end{syntax} + +The construction @'module' 'type' 'of' module-expr@ expands to the module type +(signature or functor type) inferred for the module expression @module-expr@. +To make this module type reusable in many situations, it is +intentionally not strengthened: abstract types and datatypes are not +explicitly related with the types of the original module. +For the same reason, module aliases in the inferred type are expanded. + +A typical use, in conjunction with the signature-level @'include'@ +construct, is to extend the signature of an existing structure. +In that case, one wants to keep the types equal to types in the +original module. This can done using the following idiom. +\begin{verbatim} + module type MYHASH = sig + include module type of struct include Hashtbl end + val replace: ('a, 'b) t -> 'a -> 'b -> unit + end +\end{verbatim} +The signature "MYHASH" then contains all the fields of the signature +of the module "Hashtbl" (with strengthened type definitions), plus the +new field "replace". An implementation of this signature can be +obtained easily by using the @'include'@ construct again, but this +time at the structure level: +\begin{verbatim} + module MyHash : MYHASH = struct + include Hashtbl + let replace t k v = remove t k; add t k v + end +\end{verbatim} + +Another application where the absence of strengthening comes handy, is +to provide an alternative implementation for an existing module. +\begin{verbatim} + module MySet : module type of Set = struct + ... + end +\end{verbatim} +This idiom guarantees that "Myset" is compatible with Set, but allows +it to represent sets internally in a different way. + +\section{Substituting inside a signature} +\ikwd{with\@\texttt{with}} +\ikwd{module\@\texttt{module}} +\ikwd{type\@\texttt{type}} + +(Introduced in OCaml 3.12) + +\begin{syntax} +mod-constraint: + ... + | 'type' [type-params] typeconstr-name ':=' typexpr + | 'module' module-name ':=' extended-module-path +\end{syntax} + +``Destructive'' substitution (@'with' ... ':=' ...@) behaves essentially like +normal signature constraints (@'with' ... '=' ...@), but it additionally removes +the redefined type or module from the signature. There are a number of +restrictions: one can only remove types and modules at the outermost +level (not inside submodules), and in the case of @'with type'@ the +definition must be another type constructor with the same type +parameters. + +A natural application of destructive substitution is merging two +signatures sharing a type name. +\begin{verbatim} + module type Printable = sig + type t + val print : Format.formatter -> t -> unit + end + module type Comparable = sig + type t + val compare : t -> t -> int + end + module type PrintableComparable = sig + include Printable + include Comparable with type t := t + end +\end{verbatim} + +One can also use this to completely remove a field: +\caml +\?module type S = Comparable with type t := int;; +\:module type S = sig val compare : int -> int -> int end +\endcaml +or to rename one: +\caml +\?module type S = sig +\? type u +\? include Comparable with type t := u +\?end;; +\:module type S = sig type u val compare : u -> u -> int end +\endcaml + +Note that you can also remove manifest types, by substituting with the +same type. +\caml +\?module type ComparableInt = Comparable with type t = int ;; +\:module type ComparableInt = sig type t = int val compare : t -> t -> int end +\?module type CompareInt = ComparableInt with type t := int ;; +\:module type CompareInt = sig val compare : int -> int -> int end +\endcaml + +\section{Type-level module aliases} +\ikwd{module\@\texttt{module}} +\label{s:module-alias} + +(Introduced in OCaml 4.02) + +\begin{syntax} +specification: + ... + | 'module' module-name '=' module-path +\end{syntax} + +The above specification, inside a signature, only matches a module +definition equal to @module-path@. Conversely, a type-level module +alias can be matched by itself, or by any supertype of the type of the +module it references. + +There are several restrictions on @module-path@: +\begin{enumerate} +\item it should be of the form \(M_0.M_1...M_n\) ({\em i.e.} without + functor applications); +\item inside the body of a functor, \(M_0\) should not be one of the + functor parameters; +\item inside a recursive module definition, \(M_0\) should not be one of + the recursively defined modules. +\end{enumerate} + +Such specifications are also inferred. Namely, when @P@ is a path +satisfying the above constraints, +\caml +\?module N = P +\endcaml +has type +\caml +\:module N = P +\endcaml + +Type-level module aliases are used when checking module path +equalities. That is, in a context where module name @N@ is known to be +an alias for @P@, not only these two module paths check as equal, but +@F(N)@ and @F(P)@ are also recognized as equal. In the default +compilation mode, this is the only difference with the previous +approach of module aliases having just the same module type as the +module they reference. + +When the compiler flag @'-no-alias-deps'@ is enabled, type-level +module aliases are also exploited to avoid introducing dependencies +between compilation units. Namely, a module alias referring to a +module inside another compilation unit does not introduce a link-time +dependency on that compilation unit, as long as it is not +dereferenced; it still introduces a compile-time dependency if the +interface needs to be read, {\em i.e.} if the module is a submodule +of the compilation unit, or if some type components are referred to. +Additionally, accessing a module alias introduces a link-time +dependency on the compilation unit containing the module referenced by +the alias, rather than the compilation unit containing the alias. +Note that these differences in link-time behavior may be incompatible +with the previous behavior, as some compilation units might not be +extracted from libraries, and their side-effects ignored. + +These weakened dependencies make possible to use module aliases in +place of the @'-pack'@ mechanism. Suppose that you have a library +@'Mylib'@ composed of modules @'A'@ and @'B'@. Using @'-pack'@, one +would issue the command line +\begin{verbatim} + ocamlc -pack a.cmo b.cmo -o mylib.cmo +\end{verbatim} +and as a result obtain a @'Mylib'@ compilation unit, containing +physically @'A'@ and @'B'@ as submodules, and with no dependencies on +their respective compilation units. +Here is a concrete example of a possible alternative approach: +\begin{enumerate} +\item Rename the files containing @'A'@ and @'B'@ to @'Mylib_A'@ and + @'Mylib_B'@. +\item Create a packing interface @'Mylib.ml'@, containing the + following lines. +\begin{verbatim} + module A = Mylib_A + module B = Mylib_B +\end{verbatim} +\item Compile @'Mylib.ml'@ using @'-no-alias-deps'@, and the other + files using @'-no-alias-deps'@ and @'-open' 'Mylib'@ (the last one is + equivalent to adding the line @'open!' 'Mylib'@ at the top of each + file). +\begin{verbatim} + ocamlc -c -no-alias-deps Mylib.ml + ocamlc -c -no-alias-deps -open Mylib Mylib_*.mli Mylib_*.ml +\end{verbatim} +\item Finally, create a library containing all the compilation units, + and export all the compiled interfaces. +\begin{verbatim} + ocamlc -a Mylib*.cmo -o Mylib.cma +\end{verbatim} +\end{enumerate} +This approach lets you access @'A'@ and @'B'@ directly inside the +library, and as @'Mylib.A'@ and @'Mylib.B'@ from outside. +It also has the advantage that @'Mylib'@ is no longer monolithic: if +you use @'Mylib.A'@, only @'Mylib_A'@ will be linked in, not +@'Mylib_B'@. +%Note that in the above @'Mylib.cmo'@ is actually empty, and one could +%name the interface @'Mylib.mli'@, but this would require that all +%clients are compiled with the @'-no-alias-deps'@ flag. + +\section{Explicit overriding in class definitions}\label{s:explicit-overriding} +\ikwd{method.\@\texttt{method\char33}} +\ikwd{val.\@\texttt{val\char33}} +\ikwd{inherit.\@\texttt{inherit\char33}} + +(Introduced in OCaml 3.12) + +\begin{syntax} +class-field: + ... + | 'inherit!' class-expr ['as' lowercase-ident] + | 'val!' ['mutable'] inst-var-name [':' typexpr] '=' expr + | 'method!' ['private'] method-name {parameter} [':' typexpr] '=' expr + | 'method!' ['private'] method-name ':' poly-typexpr '=' expr +\end{syntax} + +The keywords @"inherit!"@, @"val!"@ and @"method!"@ have the same semantics +as @"inherit"@, @"val"@ and @"method"@, but they additionally require the +definition they introduce to be an overriding. Namely, @"method!"@ +requires @method-name@ to be already defined in this class, @"val!"@ +requires @inst-var-name@ to be already defined in this class, and +@"inherit!"@ requires @class-expr@ to override some definitions. +If no such overriding occurs, an error is signaled. + +As a side-effect, these 3 keywords avoid the warnings~7 +(method override) and~13 (instance variable override). +Note that warning~7 is disabled by default. + +\section{Overriding in open statements}\label{s:explicit-overriding-open} +\ikwd{open.\@\texttt{open\char33}} + +(Introduced in OCaml 4.01) + +\begin{syntax} +definition: + ... + | 'open!' module-path +; +specification: + ... + | 'open!' module-path +; +expr: + ... + | 'let' 'open!' module-path 'in' expr +\end{syntax} + +Since OCaml 4.01, @"open"@ statements shadowing an existing identifier +(which is later used) trigger the warning 44. Adding a @"!"@ +character after the @"open"@ keyword indicates that such a shadowing is +intentional and should not trigger the warning. + + +\section{Generalized algebraic datatypes} \ikwd{type\@\texttt{type}} +\ikwd{match\@\texttt{match}} + +(Introduced in OCaml 4.00) + +\begin{syntax} +constr-decl: + ... + | constr-name ':' [ typexpr { '*' typexpr } '->' ] typexpr +; +type-param: + ... + | [variance] '_' +\end{syntax} + +Generalized algebraic datatypes, or GADTs, extend usual sum types in +two ways: constraints on type parameters may change depending on the +value constructor, and some type variables may be existentially +quantified. +Adding constraints is done by giving an explicit return type +(the rightmost @typexpr@ in the above syntax), where type parameters +are instantiated. +This return type must use the same type constructor as the type being +defined, and have the same number of parameters. +Variables are made existential when they appear inside a constructor's +argument, but not in its return type. + +Since the use of a return type often eliminates the need to name type +parameters in the left-hand side of a type definition, one can replace +them with anonymous types @"_"@ in that case. + +The constraints associated to each constructor can be recovered +through pattern-matching. +Namely, if the type of the scrutinee of a pattern-matching contains +a locally abstract type, this type can be refined according to the +constructor used. +These extra constraints are only valid inside the corresponding branch +of the pattern-matching. +If a constructor has some existential variables, fresh locally +abstract types are generated, and they must not escape the +scope of this branch. + +Here is a concrete example: +\begin{verbatim} + type _ term = + | Int : int -> int term + | Add : (int -> int -> int) term + | App : ('b -> 'a) term * 'b term -> 'a term + + let rec eval : type a. a term -> a = function + | Int n -> n (* a = int *) + | Add -> (fun x y -> x+y) (* a = int -> int -> int *) + | App(f,x) -> (eval f) (eval x) + (* eval called at types (b->a) and b for fresh b *) + + let two = eval (App (App (Add, Int 1), Int 1)) + val two : int = 2 +\end{verbatim} + +Type inference for GADTs is notoriously hard. +This is due to the fact some types may become ambiguous when escaping +from a branch. +For instance, in the "Int" case above, "n" could have either type "int" +or "a", and they are not equivalent outside of that branch. +As a first approximation, type inference will always work if a +pattern-matching is annotated with types containing no free type +variables (both on the scrutinee and the return type). +This is the case in the above example, thanks to the type annotation +containing only locally abstract types. + +In practice, type inference is a bit more clever than that: type +annotations do not need to be immediately on the pattern-matching, and +the types do not have to be always closed. +As a result, it is usually enough to only annotate functions, as in +the example above. Type annotations are +propagated in two ways: for the scrutinee, they follow the flow of +type inference, in a way similar to polymorphic methods; for the +return type, they follow the structure of the program, they are split +on functions, propagated to all branches of a pattern matching, +and go through tuples, records, and sum types. +Moreover, the notion of ambiguity used is stronger: a type is only +seen as ambiguous if it was mixed with incompatible types (equated by +constraints), without type annotations between them. +For instance, the following program types correctly. +\begin{verbatim} + let rec sum : type a. a term -> _ = fun x -> + let y = + match x with + | Int n -> n + | Add -> 0 + | App(f,x) -> sum f + sum x + in y + 1 + val sum : 'a term -> int = <fun> +\end{verbatim} +Here the return type "int" is never mixed with "a", so it is seen as +non-ambiguous, and can be inferred. +When using such partial type annotations we strongly suggest +specifying the "-principal" mode, to check that inference is +principal. + +The exhaustiveness check is aware of GADT constraints, and can +automatically infer that some cases cannot happen. +For instance, the following pattern matching is correctly seen as +exhaustive (the "Add" case cannot happen). +\begin{verbatim} + let get_int : int term -> int = function + | Int n -> n + | App(_,_) -> 0 +\end{verbatim} + +\paragraph{Advanced examples} +The "term" type we have defined above is an {\em indexed} type, where +a type parameter reflects a property of the value contents. +Another use of GADTs is {\em singleton} types, where a GADT value +represents exactly one type. This value can be used as runtime +representation for this type, and a function receiving it can have a +polytypic behavior. + +Here is an example of a polymorphic function that takes the +runtime representation of some type "t" and a value of the same type, +then pretty-prints the value as a string: +\begin{verbatim} + type _ typ = + | Int : int typ + | String : string typ + | Pair : 'a typ * 'b typ -> ('a * 'b) typ + + let rec to_string: type t. t typ -> t -> string = + fun t x -> + match t with + | Int -> string_of_int x + | String -> Printf.sprintf "%S" x + | Pair(t1,t2) -> + let (x1, x2) = x in + Printf.sprintf "(%s,%s)" (to_string t1 x1) (to_string t2 x2) +\end{verbatim} + +Another frequent application of GADTs is equality witnesses. +\begin{verbatim} + type (_,_) eq = Eq : ('a,'a) eq + + let cast : type a b. (a,b) eq -> a -> b = fun Eq x -> x +\end{verbatim} +Here type "eq" has only one constructor, and by matching on it one +adds a local constraint allowing the conversion between "a" and "b". +By building such equality witnesses, one can make equal types which +are syntactically different. + +Here is an example using both singleton types and equality witnesses +to implement dynamic types. +\begin{verbatim} + let rec eq_type : type a b. a typ -> b typ -> (a,b) eq option = + fun a b -> + match a, b with + | Int, Int -> Some Eq + | String, String -> Some Eq + | Pair(a1,a2), Pair(b1,b2) -> + begin match eq_type a1 b1, eq_type a2 b2 with + | Some Eq, Some Eq -> Some Eq + | _ -> None + end + | _ -> None + + type dyn = Dyn : 'a typ * 'a -> dyn + + let get_dyn : type a. a typ -> dyn -> a option = + fun a (Dyn(b,x)) -> + match eq_type a b with + | None -> None + | Some Eq -> Some x +\end{verbatim} + +\section{Syntax for Bigarray access}\label{s:bigarray-access} + +(Introduced in Objective Caml 3.00, deprecated in 4.03) + +This extension has been superseded by the customizable index operators extension~\ref{s:index-operators}. +Some source compatibility problems are documented in~\ref{s:bigarray-indexop-compatibility}. + +\section{Attributes}\label{s:attributes} + +(Introduced in OCaml 4.02) + +Attributes are ``decorations'' of the syntax tree which are mostly +ignored by the type-checker but can be used by external tools. An +attribute is made of an identifier and a payload, which can be a +structure, a type expression (prefixed with ":") or a pattern +(prefixed with "?") optionally followed by a "when" clause: + + +\begin{syntax} +attr-id: + lowercase-ident + | capitalized-ident + | attr-id '.' attr-id +; +attr-payload: + [ module-items ] + | ':' typexpr + | '?' pattern ['when' expr] +; +\end{syntax} + +The first form of attributes is attached with a postfix notation on +``algebraic'' categories: + +\begin{syntax} +attribute: + '[@' attr-id attr-payload ']' +; +expr: ... + | expr attribute +; +typexpr: ... + | typexpr attribute +; +pattern: ... + | pattern attribute +; +module-expr: ... + | module-expr attribute +; +module-type: ... + | module-type attribute +; +class-expr: ... + | class-expr attribute +; +class-type: ... + | class-type attribute +; +\end{syntax} + +This form of attributes can also be inserted after the @'`'tag-name@ +in polymorphic variant type expressions (@tag-spec-first@, @tag-spec@, +@tag-spec-full@) or after the @method-name@ in @method-type@. + +The same syntactic form is also used to attach attributes to labels and +constructors in type declarations: + +\begin{syntax} +field-decl: + ['mutable'] field-name ':' poly-typexpr {attribute} +; +constr-decl: + (constr-name || '()') [ 'of' typexpr { '*' typexpr } ] {attribute} +; +\end{syntax} + + +The second form of attributes are attached to ``blocks'' such as type +declarations, class fields, etc: + +\begin{syntax} +item-attribute: + '[@@' attr-id attr-payload ']' +; +typedef: ... + | typedef item-attribute +; +exception-definition: + 'exception' constr-name { attribute } [ 'of' typexpr { '*' typexpr } ] + | 'exception' constr-name '=' constr +; +module-items: + [';;'] ( definition || expr { item-attribute } ) { [';;'] definition || ';;' expr { item-attribute } } [';;'] +; +class-binding: ... + | class-binding item-attribute +; +class-spec: ... + | class-spec item-attribute +; +classtype-def: ... + | classtype-def item-attribute +; +definition: + 'let' ['rec'] let-binding { 'and' let-binding } + | 'external' value-name ':' typexpr '=' external-declaration { item-attribute } + | type-definition + | exception-definition { item-attribute } + | class-definition + | classtype-definition + | 'module' module-name { '(' module-name ':' module-type ')' } + [ ':' module-type ] \\ '=' module-expr { item-attribute } + | 'module' 'type' modtype-name '=' module-type { item-attribute } + | 'open' module-path { item-attribute } + | 'include' module-expr { item-attribute } + | 'module' 'rec' module-name ':' module-type '=' \\ + module-expr { item-attribute } \\ + { 'and' module-name ':' module-type '=' module-expr \\ + { item-attribute } } +; +specification: + 'val' value-name ':' typexpr { item-attribute } + | 'external' value-name ':' typexpr '=' external-declaration { item-attribute } + | type-definition + | 'exception' constr-decl { item-attribute } + | class-specification + | classtype-definition + | 'module' module-name ':' module-type { item-attribute } + | 'module' module-name { '(' module-name ':' module-type ')' } + ':' module-type { item-attribute } + | 'module' 'type' modtype-name { item-attribute } + | 'module' 'type' modtype-name '=' module-type { item-attribute } + | 'open' module-path { item-attribute } + | 'include' module-type { item-attribute } +; +class-field-spec: ... + | class-field-spec item-attribute +; +class-field: ... + | class-field item-attribute +; +\end{syntax} + +A third form of attributes appears as stand-alone structure or +signature items in the module or class sub-languages. They are not +attached to any specific node in the syntax tree: + +\begin{syntax} +floating-attribute: + '[@@@' attr-id attr-payload ']' +; +definition: ... + | floating-attribute +; +specification: ... + | floating-attribute +; +class-field-spec: ... + | floating-attribute +; +class-field: ... + | floating-attribute +; +\end{syntax} + +(Note: contrary to what the grammar above describes, @item-attributes@ +cannot be attached to these floating attributes in @class-field-spec@ +and @class-field@.) + + +It is also possible to specify attributes on expressions using an +infix syntax. This applies to all expressions starting with one or +two keywords: "assert", "begin", "for", "fun", "function", "if", +"lazy", "let", "let module", "let open", "match", "new", "object", +"try", "while". Those expressions supports adding one or several +attributes just after those initial keyword(s). For instance: + +\begin{verbatim} +let [@foo][@bar x] x = 2 in x + 1 === (let x = 2 in x + 1)[@foo][@bar x] +begin[@foo] ... end === (begin ... end)[@foo] +\end{verbatim} + +\subsection{Built-in attributes} + +Some attributes are understood by the type-checker: +\begin{itemize} +\item + ``ocaml.warning'' or ``warning'', with a string literal payload. + This can be used as floating attributes in a + signature/structure/object/object type. The string is parsed and has + the same effect as the "-w" command-line option, in the scope between + the attribute and the end of the current + signature/structure/object/object type. The attribute can also be + used on an expression, + in which case its scope is limited to that expression. + Note that it is not well-defined which scope is used for a specific + warning. This is implementation dependant and can change between versions. + For instance, warnings triggerd by the ``ppwarning'' attribute (see below) + are issued using the global warning configuration. +\item + ``ocaml.warnerror'' or ``warnerror'', with a string literal payload. + Same as ``ocaml.warning'', for the "-warn-error" command-line option. +\item + ``ocaml.deprecated'' or ``deprecated''. + Can be applied to most kind of items in signatures or + structures. When the element is later referenced, a warning (3) is + triggered. If the payload of the attribute is a string literal, + the warning message includes this text. +\item + ``ocaml.ppwarning'' or ``ppwarning'', in any context, with + a string literal payload. The text is reported as warning (22) + by the compiler (currently, the warning location is the location + of the string payload). This is mostly useful for preprocessors which + need to communicate warnings to the user. This could also be used + to mark explicitly some code location for further inspection. +\end{itemize} + +\begin{verbatim} +module X = struct + [@@@warning "+9"] (* locally enable warning 9 in this structure *) + ... +end + +let x = begin[@warning "+9] ... end in .... + +type t = A | B + [@@deprecated "Please use type 's' instead.] + + +let f x = + assert (x >= 0) [@ppwarning "TODO: remove this later"]; + .... + +\end{verbatim} + + +\section{Extension nodes}\label{s:extension-nodes} + +(Introduced in OCaml 4.02) + +Extension nodes are generic placeholders in the syntax tree. They are +rejected by the type-checker and are intended to be ``expanded'' by external +tools such as "-ppx" rewriters. + +Extension nodes share the same notion of identifier and payload as +attributes~\ref{s:attributes}. + +The first form of extension node is used for ``algebraic'' categories: + +\begin{syntax} +extension: + '[%' attr-id attr-payload ']' +; +expr: ... + | extension +; +typexpr: ... + | extension +; +pattern: ... + | extension +; +module-expr: ... + | extension +; +module-type: ... + | extension +; +class-expr: ... + | extension +; +class-type: ... + | extension +; +\end{syntax} + +A second form of extension node can be used in structures and +signatures, both in the module and object languages: + +\begin{syntax} +item-extension: + '[%%' attr-id attr-payload ']' +; +definition: ... + | item-extension +; +specification: ... + | item-extension +; +class-field-spec: ... + | item-extension +; +class-field: + | item-extension +; +\end{syntax} + +An infix form is available for extension nodes as expressions, when +the payload is a single expression. This form applies to all +expressions starting with one or two keywords: the percent sign and then +and extension identifier follow immediately the initial keyword(s). + +Examples: + +\begin{verbatim} +let%foo x = 2 in x + 1 === [%foo let x = 2 in x + 1] +begin%foo ... end === [%foo begin ... end] +\end{verbatim} + +When this form is used together with the infix syntax for attributes, +the attributes are considered to apply to the payload: + +\begin{verbatim} +let%foo[@bar] x = 2 in x + 1 === [%foo (let x = 2 in x + 1) [@bar]] +\end{verbatim} + +\section{Quoted strings}\label{s:quoted-strings} + +(Introduced in OCaml 4.02) + +Quoted strings provide a different lexical syntax to write string +literals in OCaml code. This can be used to embed pieces of foreign +syntax fragments in OCaml code, to be interpret by a "-ppx" filter or +just a library. + +\begin{syntax} +string-literal: ... + | '{' quoted-string-id '|' ........ '|' quoted-string-id '}' +; +quoted-string-id: + { 'a'...'z' || '_' } +; +\end{syntax} + +The opening delimiter has the form "{id|" where "id" is a (possibly + empty) sequence of lowercase letters and underscores. The + corresponding closing delimiter is "|id}" (with the same +identifier). Unlike regular OCaml string literals, quoted +strings do not interpret any character in a special way. + +Example: + +\begin{verbatim} +String.length {|\"|} (* returns 2 *) +String.length {foo|\"|foo} (* returns 2 *) +\end{verbatim} + + +\section{Exception cases in pattern matching}\label{s:exception-match} + +(Introduced in OCaml 4.02) + +A new form of exception patterns is allowed, only as a toplevel +pattern under a "match"..."with" pattern-matching (other occurrences +are rejected by the type-checker). + +\begin{syntax} +pattern: ... + | 'exception' pattern +; +\end{syntax} + +Cases with such a toplevel pattern are called ``exception cases'', +as opposed to regular ``value cases''. Exception cases are applied +when the evaluation of the matched expression raises an exception. +The exception value is then matched against all the exception cases +and re-raised if none of them accept the exception (as for a +"try"..."with" block). Since the bodies of all exception and value +cases is outside the scope of the exception handler, they are all +considered to be in tail-position: if the "match"..."with" block +itself is in tail position in the current function, any function call +in tail position in one of the case bodies results in an actual tail +call. + +It is an error if all cases are exception cases in a given pattern +matching. + +\section{Extensible variant types}\label{s:extensible-variants} + +(Introduced in OCaml 4.02) + +\begin{syntax} +type-representation: + ... + | '=' '..' +; +specification: + ... + | 'type' [type-params] typeconstr type-extension-spec +; +definition: + ... + | 'type' [type-params] typeconstr type-extension-def +; +type-extension-spec: '+=' ['private'] ['|'] constr-decl { '|' constr-decl } +; +type-extension-def: '+=' ['private'] ['|'] constr-def { '|' constr-def } +; +constr-def: + constr-decl + | constr-name '=' constr +; +\end{syntax} + +Extensible variant types are variant types which can be extended with +new variant constructors. Extensible variant types are defined using +"..". New variant constructors are added using "+=". +\begin{verbatim} + type attr = .. + + type attr += Str of string + + type attr += + | Int of int + | Float of float +\end{verbatim} + +Pattern matching on an extensible variant type requires a default case +to handle unknown variant constructors: +\begin{verbatim} + let to_string = function + | Str s -> s + | Int i -> string_of_int i + | Float f -> string_of_float f + | _ -> "?" +\end{verbatim} + +A preexisting example of an extensible variant type is the built-in +"exn" type used for exceptions. Indeed, exception constructors can be +declared using the type extension syntax: +\begin{verbatim} + type exn += Exc of int +\end{verbatim} + +Extensible variant constructors can be rebound to a different name. This +allows exporting variants from another module. +\begin{verbatim} + type Expr.attr += Str = Expr.Str +\end{verbatim} + +Extensible variant constructors can be declared "private". As with +regular variants, this prevents them from being constructed directly by +constructor application while still allowing them to be de-structured in +pattern-matching. + + +\section{Generative functors}\label{s:generative-functors} + +(Introduced in OCaml 4.02) + +\begin{syntax} +module-expr: + ... + | 'functor' '()' '->' module-expr + | module-expr '()' +; +definition: + ... + | 'module' module-name { '(' module-name ':' module-type ')' || '()' } + [ ':' module-type ] \\ '=' module-expr +; +module-type: + ... + | 'functor' '()' '->' module-type +; +specification: + ... + | 'module' module-name { '(' module-name ':' module-type ')' || '()' } + ':' module-type +; +\end{syntax} + +A generative functor takes a unit "()" argument. +In order to use it, one must necessarily apply it to this unit argument, +ensuring that all type components in the result of the functor behave +in a generative way, {\em i.e.} they are different from types obtained +by other applications of the same functor. +This is equivalent to taking an argument of signature "sig end", and always +applying to "struct end", but not to some defined module (in the +latter case, applying twice to the same module would return identical +types). + +As a side-effect of this generativity, one is allowed to unpack +first-class modules in the body of generative functors. + +\section{Extension operators} \label{s:ext-ops} +(Introduced in Ocaml 4.02.2 ) +\begin{syntax} +infix-symbol: + ... + | "#" {operator-chars} "#" {operator-char '|' "#"} +; +\end{syntax} + +Operator names starting with a "#" character and containing more than +one "#" character in their name are accepted during parsing and +rejected during type-checking. These operators can therefore not be +used directly in vanilla Ocaml. However, "-ppx" rewriters and other +external tools can use this parser leniency to extend the language +with new extension specific "#"-operators. + +\section{Customizable index operators} \label{s:index-operators} + +( Introduced in OCaml 4.03 ) + +\begin{syntax} +expr: + ... + | expr '.(' expr ')' + | expr '.(' expr ')' '<-' expr + | expr '.[' expr ']' + | expr '.[' expr ']' '<-' expr + | expr '.{' expr '}' + | expr '.{' expr '}' '<-' expr +; +operator: + ... + | '.' '(' ')' + | '.' '(' ')' '<-' + | '.' '[' ']' + | '.' '[' ']' '<-' + | '.' '{' '}' + | '.' '{' '}' '<-' +\end{syntax} + +This extension provides syntactic sugar for getting and setting +elements for custom array-like types. The quintessential examples of +array-like type are the standard array, string, bytes and bigarray +types. For these types, elements can be accessed and assigned with the +well-know syntax +% +\begin{center}\begin{tabular}{lll} \hline + & acces & assignment \\ \hline +array & @expr_a".("expr_i")"@ & @expr_a".("expr_i")" "<-" expr_v@ \\ +string or bytes & @expr_s".["expr_i"]"@ & @expr_s".["expr_i"]" "<-" expr_v@ \\ +bigarray & @expr_s".{"expr_i"}"@ & @expr_s".{"expr_i"}" "<-" expr_v@ \\ \hline +\end{tabular}\end{center} + +This extension generalizes this standard syntax by associating these +access and assignment operations to three new families of index +operators: +% +\begin{center}\begin{tabular}{lll} \hline + & acces & assignment \\ \hline +array-like & ".()" & ".()<-" \\ +string-like & ".[]" & ".[]<-" \\ +bigarray-like & ".{}" & ".{}<-" \\ \hline +\end{tabular}\end{center} + +The concrete syntaxes "array.(index)",\dots, "bigarray.{index}<-value" +are then redefined as syntactic sugar for these new operators +% +\begin{center}\begin{tabular}{lll} \hline + & concrete syntax & translation \\ \hline +array-like & @expr_a'.('expr_i')'@ & @'(' '.()' ')' expr_a expr_i@ \\ + & @expr_a'.('expr_i')' '<-' expr_v@ & @'(' '.()<-' ')' expr_a expr_i expr_v@ \\ \hline +string-like & @expr_s'.['expr_i']'@ & @'(' '.[]' ')' expr_s expr_i@ \\ + & @expr_s'.['expr_i']' '<-' expr_v@ & @'(' '.[]<-' ')' expr_s expr_i expr_v@ \\ \hline +bigarray-like & @expr_s'.{'expr_i^*'}'@ & @'(' '.{}' ')' expr_s expr_i^*@ \\ + & @expr_s'.{'expr_i^*'}' '<-' expr_v@ & @'(' '.{}<-' ')' expr_s expr_i^* expr_v@ \\ \hline +\end{tabular}\end{center} +% +Note that the syntactic sugar for the "( .{} )" bigarray operator +family presents some particularities to support multidimensional +arrays. These particularities are detailed in +subsection~\ref{s:m-index-operators}. + +This new syntactic sugar is used in the Pervasives +module[\moduleref{Pervasives}] to redefine the standard syntax for the +array and string types. Similarly, the +"Bigarray"[\moduleref{Bigarray}] library uses the "( .{} )" operator +family. + +Customized access and assignment operators can be defined for +arbitrary types by overriding the corresponding operators. For +instance, we can define python-like dictionary +% +\begin{verbatim} +module Dict = struct + include Hashtbl + let ( .[] ) tabl index = find tabl index + let ( .[]<- ) tabl index value = add tabl index value +end + +let dict = + let open Dict in + let dict = create 10 in + dict.["one"] <- 1; + dict.["two"] <- 2; + dict + +let () = + let open Dict in + assert( dict.["one"] = 1 ) +\end{verbatim} + +\subsection{Multidimensional index operators}\label{s:m-index-operators} + +\begin{syntax} +expr: + ... + | expr '.{' expr { ',' expr } '}' + | expr '.{' expr { ',' expr } '}' '<-' expr +; +operator: + ... + | '.' '{' ',' '}' + | '.' '{' ',' ',' '}' + | '.' '{' ',' '..' ',' '}' + | '.' '{' ',' '}' '<-' + | '.' '{' ',' ',' '}' '<-' + | '.' '{' ',' '..' ',' '}' '<-' + + +\end{syntax} + +In addition to the two regular "( .{} )" and "( .{}<- )" index +operators, the "( .{} )" operator family includes $6$ specific index +operators. These operators are designed to lighten access and +assignment for multidimensional array of dimension $2$, $3$ and $n>3$: +% +\begin{center}\begin{tabular}{lll} \hline + & acces & assignment \\ \hline +dimension $1$ & ".{}" & ".{}<-" \\ +dimension $2$ & ".{,}" & ".{,}<-" \\ +dimension $3$ & ".{,,}" & ".{,,}<-" \\ +dimension $n>3$ & ".{,..,}" & ".{,..,}<-" \\ \hline +\end{tabular}\end{center} +% +The concrete syntaxes @expr_a'.{'expr_1,..,expr_n'}'@ and +@expr_a'.{'expr_1,..,expr_n'}<-expr_v'@ are translated to the +corresponding $n$-dimensional operators in function of the number $n$ +of comma-separated expressions inside the braces: +% +\begin{center}\begin{tabular}{lll} \hline +dimension & concrete syntax & translation \\ \hline +dimension $1$ & @expr_a'.{'expr_i'}'@ & @'(.{})' expr_a expr_i@ \\ + & @expr_a'.{'expr_i'}' '<-' expr_v@ & @'(.{}<-)' expr_a expr_i expr_v@ \\ \hline +dimension $2$ & @expr_a'.{'i_1,i_2'}'@ & @'(.{,})' expr_a i_1 i_2@ \\ + & @expr_a'.{'i_1,i_2} '<-' expr_v@ & @'(.{,}<-)' expr_a i_1 i_2 expr_v@ \\ \hline +dimension $3$ & @expr_a'.{'i_1,i_2,i_3'}'@ & @'(.{,,})' expr_a i_1 i_2 i_3@ \\ + & @expr_a'.{'i_1,i_2,i_3'}' '<-' expr_v@ & @'(.{,,}<-)' expr_a i_1 i_2 i_3 expr_v@ \\ \hline +dimension $n>3$ & @expr_a'.{'i_1,\ldots,i_n'}'@ & @'(.{,..,})' expr_a '[|'i_1';'\ldots';'i_n'|]'@ \\ + & @expr_a'.{'i_1,\ldots,i_n'}' '<-' expr_v@ & @'(.{,..,}<-)' expr_a '[|'i_1';'\ldots';'i_n'|]' expr_v@ \\ \hline +\end{tabular}\end{center} +% +Note that for multidimensional array of dimension $n > 3$, all the +index arguments are packed into a single array which is then passed as +an argument to the "( .{,..,} )" or "( .{,..,}<- )" operators. + +These multidimensional index operators can also be redefined to be +used with custom types. For instance, we can implement an unified "'a +tensor" type for vectors and matrices and define separated access +operators for vectors and matrices using "( .{} )" and "( .{,} )": +% +\begin{verbatim} +type 'a tensor = { dim: int; array: float array } constraint 'a = [< `Vector | `Matrix ] +let ( .{} ) ( v: [`Vector] tensor ) i = v.array.(i) +let ( .{,} ) ( mat: [`Matrix] tensor ) i j = mat.array.( i*mat.dim + j ) +... +let x = v.{i} (* vector access *) +let m = mat.{i,j} (* matrix access *) +\end{verbatim} + +Another example, the "Bigarray"[\moduleref{Bigarray}] library defines these operators as +% +\begin{tableau}{ll}{operator}{function} +\entree{ "( .{} )" } {"Array1.get"} +\entree{ "( .{}<- )" } {"Array1.set"} +\entree{ "( .{,} )" } {"Array2.get"} +\entree{ "( .{,}<- )" } {"Array2.set"} +\entree{ "( .{,,} )" } {"Array3.get"} +\entree{ "( .{,,}<- )" } {"Array3.set"} +\entree{ "( .{,..,} )" } {"Genarray.get"} +\entree{ "( .{,..,}<- )" } {"Genarray.set"} +\end{tableau} +% +With these definitions, it is then possible to use the short syntax +@bigarray'.{'index'}'@ with bigarray values by opening the "Bigarray" +module in scope. + +\subsection{Backward compatibility warning for the "Bigarray" library} \label{s:bigarray-indexop-compatibility} + +One of the reasons behind the existence of the 6 special +@"( .{"\ldots"} )"@ operators is to preserve backward compatibility +with the "Bigarray" library special syntax. However, this extension +\emph{does break} partially source compatibility with the bigarray +syntax extension: before Ocaml 4.03, it was possible to use the +@bigarray'.{'index'}'@ syntax without opening the "Bigarray" +module. This usage is no longer possible since the @"(.{"\ldots"})"@ +index operators are now defined inside the "Bigarray" module. This +problem can be fixed by opening the "Bigarray" module (or by bringing +in scope the index operators defined in the "Bigarray" module). diff --git a/manual/manual/refman/impl.etex b/manual/manual/refman/impl.etex new file mode 100644 index 0000000000..ec7117eb33 --- /dev/null +++ b/manual/manual/refman/impl.etex @@ -0,0 +1,54 @@ +\section{Module implementations} + +\begin{syntax} +implementation: + { impl-phrase ';;' } +; +impl-phrase: + expr + | value-definition + | type-definition + | exception-definition + | directive +; +value-definition: + 'let' ['rec'] let-binding { 'and' let-binding } +\end{syntax} + +A module implementation consists in a sequence of implementation +phrases, terminated by double semicolons. An implementation phrase is +either an expression, a value definition, a type or exception +definition, or a directive. At run-time, implementation phrases are +evaluated sequentially, in the order in which they appear in the +module implementation. + +Implementation phrases consisting in an expression are +evaluated for their side-effects. + +Value definitions bind global value variables in the same way as a +@'let' \ldots 'in' \ldots@ expression binds local variables. The +expressions are evaluated, and their values are matched against the +left-hand sides of the @'='@ sides, as explained in +section~\ref{s:localdef}. If the matching succeeds, the bindings of +identifiers to values performed during matching are interpreted as +bindings to the global value variables whose local name is the +identifier, and whose module name is the name of the module. +If the matching fails, the exception \verb"Match_failure" is +raised. The scope of these bindings is the phrases that follow the +value definition in the module implementation. + +Type and exception definitions introduce type constructors, variant +constructors and record fields as described in sections +\ref{s:typdef}~and~\ref{s:excdef}. +The scope of these definitions is the phrases that follow the value +definition in the module implementation. The evaluation of an +implementation phrase consisting in a type or exception definition +produces no effect at run-time. + +Directives modify the behavior of the compiler on the subsequent +phrases of the module implementation, as described in +section~\ref{s:directives}. The evaluation of an implementation phrase +consisting in a directive produces no effect at run-time. Directives +apply only to the module currently being compiled; in particular, they +have no effect on other modules that refer to globals exported by the +module being compiled. diff --git a/manual/manual/refman/intf.etex b/manual/manual/refman/intf.etex new file mode 100644 index 0000000000..b85353a96c --- /dev/null +++ b/manual/manual/refman/intf.etex @@ -0,0 +1,55 @@ +\section{Module interfaces} +\ikwd{value\@\texttt{value}} +\ikwd{type\@\texttt{type}} +\ikwd{exception\@\texttt{exception}} + +\begin{syntax} +interface: + { intf-phrase ';;' } +; +intf-phrase: + value-declaration + | type-definition + | exception-definition + | directive +; +value-declaration: + 'value' ident ':' typexpr { 'and' ident ':' typexpr } +\end{syntax} + +Module interfaces declare the global objects (value variables, type +constructors, variant constructors, record fields) that a module +exports, that is, makes available to other modules. +Other modules can refer to these globals using qualified identifiers +or the \verb"#open" directive, as explained in section~\ref{s:names}. + +A module interface consists in a sequence of interface +phrases, terminated by double semicolons. An interface phrase is +either a value declaration, a type definition, an exception +definition, or a directive. + +Value declarations declare global value variables that are +exported by the module implementation, and the types with which they are +exported. The module implementation must define these variables, with +types at least as general as the types declared in the interface. The +scope of the bindings for these global variables extends from the +module implementation itself to all modules that refer to those variables. + +Type or exception definitions introduce type constructors, variant +constructors and record fields as described in sections +\ref{s:typdef}~and~\ref{s:excdef}. Exception definitions and type +definitions that are not abstract type declarations also take effect +in the module implementation; that is, the type constructors, variant +constructors and record fields they define are considered bound on +entrance to the module implementation, and can be referred to by the +implementation phrases. Type definitions that are not abstract type +declarations must not be redefined in the module implementation. In +contrast, the type constructors that are declared abstract in a module +interface must be defined in the module implementation, with the same names. + +Directives modify the behavior of the compiler on the subsequent +phrases of the module interface, as described in +section~\ref{s:directives}. Directives apply only to the interface +currently being compiled; in particular, they have no effect on other +modules that refer to globals exported by the interface being +compiled. diff --git a/manual/manual/refman/lex.etex b/manual/manual/refman/lex.etex new file mode 100644 index 0000000000..98f1e6547b --- /dev/null +++ b/manual/manual/refman/lex.etex @@ -0,0 +1,274 @@ +\section{Lexical conventions} +\pdfsection{Lexical conventions} +%HEVEA\cutname{lex.html} +\subsubsection*{Blanks} + +The following characters are considered as blanks: space, +horizontal tabulation, carriage return, line feed and form feed. Blanks are +ignored, but they separate adjacent identifiers, literals and +keywords that would otherwise be confused as one single identifier, +literal or keyword. + +\subsubsection*{Comments} + +Comments are introduced by the two characters @"(*"@, with no +intervening blanks, and terminated by the characters @"*)"@, with +no intervening blanks. Comments are treated as blank characters. +Comments do not occur inside string or character literals. Nested +comments are handled correctly. + +\subsubsection*{Identifiers} + +\begin{syntax} +ident: ( letter || "_" ) { letter || "0" \ldots "9" || "_" || "'" } ; +capitalized-ident: ("A" \ldots "Z") { letter || "0" \ldots "9" || "_" || "'" } ; +lowercase-ident: + ("a" \ldots "z" || "_") { letter || "0" \ldots "9" || "_" || "'" } ; +letter: "A" \ldots "Z" || "a" \ldots "z" +\end{syntax} + +Identifiers are sequences of letters, digits, "_" (the underscore +character), and "'" (the single quote), starting with a +letter or an underscore. +Letters contain at least the 52 lowercase and uppercase +letters from the ASCII set. The current implementation +also recognizes as letters some characters from the ISO +8859-1 set (characters 192--214 and 216--222 as uppercase letters; +characters 223--246 and 248--255 as lowercase letters). This +feature is deprecated and should be avoided for future compatibility. + +All characters in an identifier are +meaningful. The current implementation accepts identifiers up to +16000000 characters in length. + +In many places, OCaml makes a distinction between capitalized +identifiers and identifiers that begin with a lowercase letter. The +underscore character is considered a lowercase letter for this +purpose. + +\subsubsection*{Integer literals} + +\begin{syntax} +integer-literal: + ["-"] ("0"\ldots"9") { "0"\ldots"9" || "_" } + | ["-"] ("0x"||"0X") ("0"\ldots"9"||"A"\ldots"F"||"a"\ldots"f") + { "0"\ldots"9"||"A"\ldots"F"||"a"\ldots"f"||"_" } + | ["-"] ("0o"||"0O") ("0"\ldots"7") { "0"\ldots"7"||"_" } + | ["-"] ("0b"||"0B") ("0"\ldots"1") { "0"\ldots"1"||"_" } +\end{syntax} + +An integer literal is a sequence of one or more digits, optionally +preceded by a minus sign. By default, integer literals are in decimal +(radix 10). The following prefixes select a different radix: +\begin{tableau}{|l|l|}{Prefix}{Radix} +\entree{"0x", "0X"}{hexadecimal (radix 16)} +\entree{"0o", "0O"}{octal (radix 8)} +\entree{"0b", "0B"}{binary (radix 2)} +\end{tableau} +(The initial @"0"@ is the digit zero; the @"O"@ for octal is the letter O.) +The interpretation of integer literals that fall outside the range of +representable integer values is undefined. + +For convenience and readability, underscore characters (@"_"@) are accepted +(and ignored) within integer literals. + +\subsubsection*{Floating-point literals} + +\begin{syntax} +float-literal: + ["-"] ("0"\ldots"9") { "0"\ldots"9"||"_" } ["." { "0"\ldots"9"||"_" }] + [("e"||"E") ["+"||"-"] ("0"\ldots"9") { "0"\ldots"9"||"_" }] +\end{syntax} + +Floating-point decimals consist in an integer part, a decimal part and +an exponent part. The integer part is a sequence of one or more +digits, optionally preceded by a minus sign. The decimal part is a +decimal point followed by zero, one or more digits. +The exponent part is the character @"e"@ or @"E"@ followed by an +optional @"+"@ or @"-"@ sign, followed by one or more digits. +The decimal part or the exponent part can be omitted but not both, to +avoid ambiguity with integer literals. +The interpretation of floating-point literals that fall outside the +range of representable floating-point values is undefined. + +For convenience and readability, underscore characters (@"_"@) are accepted +(and ignored) within floating-point literals. + +\subsubsection*{Character literals} +\label{s:characterliteral} + +\begin{syntax} +char-literal: + "'" regular-char "'" + | "'" escape-sequence "'" +; +escape-sequence: + "\" ( "\" || '"' || "'" || "n" || "t" || "b" || "r" || space ) + | "\" ("0"\ldots"9") ("0"\ldots"9") ("0"\ldots"9") + | "\x" ("0"\ldots"9"||"A"\ldots"F"||"a"\ldots"f") + ("0"\ldots"9"||"A"\ldots"F"||"a"\ldots"f") +\end{syntax} + +Character literals are delimited by @"'"@ (single quote) characters. +The two single quotes enclose either one character different from +@"'"@ and @'\'@, or one of the escape sequences below: +\begin{tableau}{|l|l|}{Sequence}{Character denoted} +\entree{"\\\\"}{backslash ("\\")} +\entree{"\\\""}{double quote ("\"")} +\entree{"\\'"}{single quote ("'")} +\entree{"\\n"}{linefeed (LF)} +\entree{"\\r"}{carriage return (CR)} +\entree{"\\t"}{horizontal tabulation (TAB)} +\entree{"\\b"}{backspace (BS)} +\entree{"\\"\var{space}}{space (SPC)} +\entree{"\\"\var{ddd}}{the character with ASCII code \var{ddd} in decimal} +\entree{"\\x"\var{hh}}{the character with ASCII code \var{hh} in hexadecimal} +\end{tableau} + +\subsubsection*{String literals} +\label{s:stringliteral} + +\begin{syntax} +string-literal: + '"' { string-character } '"' +; +string-character: + regular-string-char + | escape-sequence + | '\' newline { space || tab } +\end{syntax} + +String literals are delimited by @'"'@ (double quote) characters. +The two double quotes enclose a sequence of either characters +different from @'"'@ and @'\'@, or escape sequences from the +table given above for character literals. + +To allow splitting long string literals across lines, the sequence +"\\"\var{newline}~\var{spaces-or-tabs} (a backslash at the end of a line +followed by any number of spaces and horizontal tabulations at the +beginning of the next line) is ignored inside string literals. + +The current implementation places practically no restrictions on the +length of string literals. + +\subsubsection*{Naming labels} +\label{s:labelname} + +To avoid ambiguities, naming labels in expressions cannot just be defined +syntactically as the sequence of the three tokens "~", @ident@ and +":", and have to be defined at the lexical level. + +\begin{syntax} +label-name: lowercase-ident +; +label: "~" label-name ":" +; +optlabel: "?" label-name ":" +\end{syntax} + +Naming labels come in two flavours: @label@ for normal arguments and +@optlabel@ for optional ones. They are simply distinguished by their +first character, either "~" or "?". + +Despite @label@ and @optlabel@ being lexical entities in expressions, +their expansions @'~' label-name ':'@ and @'?' label-name ':'@ will be +used in grammars, for the sake of readability. Note also that inside +type expressions, this expansion can be taken literally, {\em i.e.} +there are really 3 tokens, with optional blanks between them. + +\subsubsection*{Prefix and infix symbols} + +%% || '`' lowercase-ident '`' + +\begin{syntax} +infix-symbol: + ('=' || '<' || '>' || '@' || '^' || '|' || '&' || + '+' || '-' || '*' || '/' || '$' || '%') { operator-char } + | "#" {{ operator-char }} +; +prefix-symbol: + '!' { operator-char } + | ('?' || '~') {{ operator-char }} +; +operator-char: + '!' || '$' || '%' || '&' || '*' || '+' || '-' || '.' || + '/' || ':' || '<' || '=' || '>' || '?' || '@' || + '^' || '|' || '~' +\end{syntax} + +Sequences of ``operator characters'', such as "<=>" or "!!", +are read as a single token from the @infix-symbol@ or @prefix-symbol@ +class. These symbols are parsed as prefix and infix operators inside +expressions, but otherwise behave like normal identifiers. +%% Identifiers starting with a lowercase letter and enclosed +%% between backquote characters @'`' lowercase-ident '`'@ are also parsed +%% as infix operators. + +\subsubsection*{Keywords} + +The identifiers below are reserved as keywords, and cannot be employed +otherwise: +\begin{verbatim} + and as assert asr begin class + constraint do done downto else end + exception external false for fun function + functor if in include inherit initializer + land lazy let lor lsl lsr + lxor match method mod module mutable + new object of open or private + rec sig struct then to true + try type val virtual when while + with nonrec +\end{verbatim} +% +\goodbreak% +% +The following character sequences are also keywords: +% +%% FIXME the token >] is not used anywhere in the syntax +% +\begin{alltt} +" != # & && ' ( ) * + , -" +" -. -> . .. : :: := :> ; ;; <" +" <- = > >] >} ? [ [< [> [| ]" +" _ ` { {< | |] || } ~" +\end{alltt} +% +Note that the following identifiers are keywords of the Camlp4 +extensions and should be avoided for compatibility reasons. +% +\begin{verbatim} + parser value $ $$ $: <: << >> ?? +\end{verbatim} + +\subsubsection*{Ambiguities} + +Lexical ambiguities are resolved according to the ``longest match'' +rule: when a character sequence can be decomposed into two tokens in +several different ways, the decomposition retained is the one with the +longest first token. + +\subsubsection*{Line number directives} + +\begin{syntax} +linenum-directive: + '#' {{"0" \ldots "9"}} + | '#' {{"0" \ldots "9"}} '"' { string-character } '"' +\end{syntax} + +Preprocessors that generate OCaml source code can insert line number +directives in their output so that error messages produced by the +compiler contain line numbers and file names referring to the source +file before preprocessing, instead of after preprocessing. +A line number directive is composed of a @"#"@ (sharp sign), followed by +a positive integer (the source line number), optionally followed by a +character string (the source file name). +Line number directives are treated as blanks during lexical +analysis. + +% FIXME spaces and tabs are allowed before and after the number +% FIXME ``string-character'' is inaccurate: everything is allowed except +% CR, LF, and doublequote; moreover, backslash escapes are not +% interpreted (especially backslash-doublequote) +% FIXME any number of random characters are allowed (and ignored) at the +% end of the line, except CR and LF. diff --git a/manual/manual/refman/modtypes.etex b/manual/manual/refman/modtypes.etex new file mode 100644 index 0000000000..e500496cc1 --- /dev/null +++ b/manual/manual/refman/modtypes.etex @@ -0,0 +1,290 @@ +\section{Module types (module specifications)} +\pdfsection{Module types (module specifications)} +%HEVEA\cutname{modtypes.html} + +Module types are the module-level equivalent of type expressions: they +specify the general shape and type properties of modules. + +\ikwd{sig\@\texttt{sig}} +\ikwd{end\@\texttt{end}} +\ikwd{functor\@\texttt{functor}} +\ikwd{with\@\texttt{with}} +\ikwd{val\@\texttt{val}} +\ikwd{external\@\texttt{external}} +\ikwd{type\@\texttt{type}} +\ikwd{exception\@\texttt{exception}} +\ikwd{class\@\texttt{class}} +\ikwd{module\@\texttt{module}} +\ikwd{open\@\texttt{open}} +\ikwd{include\@\texttt{include}} + +\begin{syntax} +module-type: + modtype-path + | 'sig' { specification [';;'] } 'end' + | 'functor' '(' module-name ':' module-type ')' '->' module-type + | module-type 'with' mod-constraint { 'and' mod-constraint } + | '(' module-type ')' +; +mod-constraint: + 'type' [type-params] typeconstr type-equation + | 'module' module-path '=' extended-module-path +; +%BEGIN LATEX +\end{syntax} +\begin{syntax} +%END LATEX +specification: + 'val' value-name ':' typexpr + | 'external' value-name ':' typexpr '=' external-declaration + | type-definition + | 'exception' constr-decl + | class-specification + | classtype-definition + | 'module' module-name ':' module-type + | 'module' module-name { '(' module-name ':' module-type ')' } + ':' module-type + | 'module' 'type' modtype-name + | 'module' 'type' modtype-name '=' module-type + | 'open' module-path + | 'include' module-type +\end{syntax} + +\subsection{Simple module types} + +The expression @modtype-path@ is equivalent to the module type bound +to the name @modtype-path@. +The expression @'(' module-type ')'@ denotes the same type as +@module-type@. + +\subsection{Signatures} + +\ikwd{sig\@\texttt{sig}} +\ikwd{end\@\texttt{end}} + +Signatures are type specifications for structures. Signatures +@'sig' \ldots 'end'@ are collections of type specifications for value +names, type names, exceptions, module names and module type names. A +structure will match a signature if the structure provides definitions +(implementations) for all the names specified in the signature (and +possibly more), and these definitions meet the type requirements given +in the signature. + +An optional @";;"@ is allowed after each specification in a +signature. It serves as a syntactic separator with no semantic +meaning. + +\subsubsection*{Value specifications} + +\ikwd{val\@\texttt{val}} + +A specification of a value component in a signature is written +@'val' value-name ':' typexpr@, where @value-name@ is the name of the +value and @typexpr@ its expected type. + +\ikwd{external\@\texttt{external}} + +The form @'external' value-name ':' typexpr '=' external-declaration@ +is similar, except that it requires in addition the name to be +implemented as the external function specified in @external-declaration@ +(see chapter~\ref{c:intf-c}). + +\subsubsection*{Type specifications} + +\ikwd{type\@\texttt{type}} + +A specification of one or several type components in a signature is +written @'type' typedef { 'and' typedef }@ and consists of a sequence +of mutually recursive definitions of type names. + +Each type definition in the signature specifies an optional type +equation @'=' typexpr@ and an optional type representation +@'=' constr-decl \ldots@ or @'=' '{' field-decl \ldots '}'@. +The implementation of the type name in a matching structure must +be compatible with the type expression specified in the equation (if +given), and have the specified representation (if given). Conversely, +users of that signature will be able to rely on the type equation +or type representation, if given. More precisely, we have the +following four situations: + +\begin{description} +\item[Abstract type: no equation, no representation.] ~ \\ +Names that are defined as abstract types in a signature can be +implemented in a matching structure by any kind of type definition +(provided it has the same number of type parameters). The exact +implementation of the type will be hidden to the users of the +structure. In particular, if the type is implemented as a variant type +or record type, the associated constructors and fields will not be +accessible to the users; if the type is implemented as an +abbreviation, the type equality between the type name and the +right-hand side of the abbreviation will be hidden from the users of the +structure. Users of the structure consider that type as incompatible +with any other type: a fresh type has been generated. + +\item[Type abbreviation: an equation @'=' typexpr@, no representation.] ~ \\ +The type name must be implemented by a type compatible with @typexpr@. +All users of the structure know that the type name is +compatible with @typexpr@. + +\item[New variant type or record type: no equation, a representation.] ~ \\ +The type name must be implemented by a variant type or record type +with exactly the constructors or fields specified. All users of the +structure have access to the constructors or fields, and can use them +to create or inspect values of that type. However, users of the +structure consider that type as incompatible with any other type: a +fresh type has been generated. + +\item[Re-exported variant type or record type: an equation, +a representation.] ~ \\ +This case combines the previous two: the representation of the type is +made visible to all users, and no fresh type is generated. +\end{description} + +\subsubsection*{Exception specification} + +\ikwd{exception\@\texttt{exception}} + +The specification @'exception' constr-decl@ in a signature requires the +matching structure to provide an exception with the name and arguments +specified in the definition, and makes the exception available to all +users of the structure. + +\subsubsection*{Class specifications} + +\ikwd{class\@\texttt{class}} + +A specification of one or several classes in a signature is written +@'class' class-spec { 'and' class-spec }@ and consists of a sequence +of mutually recursive definitions of class names. + +Class specifications are described more precisely in +section~\ref{s:class-spec}. + +\subsubsection*{Class type specifications} + +\ikwd{class\@\texttt{class}} +\ikwd{type\@\texttt{type}} + +A specification of one or several classe types in a signature is +written @'class' 'type' classtype-def@ @{ 'and' classtype-def }@ and +consists of a sequence of mutually recursive definitions of class type +names. Class type specifications are described more precisely in +section~\ref{s:classtype}. + +\subsubsection*{Module specifications} + +\ikwd{module\@\texttt{module}} + +A specification of a module component in a signature is written +@'module' module-name ':' module-type@, where @module-name@ is the +name of the module component and @module-type@ its expected type. +Modules can be nested arbitrarily; in particular, functors can appear +as components of structures and functor types as components of +signatures. + +For specifying a module component that is a functor, one may write +\begin{center} +@'module' module-name '(' name_1 ':' module-type_1 ')' + \ldots '(' name_n ':' module-type_n ')' + ':' module-type@ +\end{center} +instead of +\begin{center} +@'module' module-name ':' + 'functor' '(' name_1 ':' module-type_1 ')' '->' \ldots + '->' module-type@ +\end{center} + +\subsubsection*{Module type specifications} + +\ikwd{type\@\texttt{type}} +\ikwd{module\@\texttt{module}} + +A module type component of a signature can be specified either as a +manifest module type or as an abstract module type. + +An abstract module type specification +@'module' 'type' modtype-name@ allows the name @modtype-name@ to be +implemented by any module type in a matching signature, but hides the +implementation of the module type to all users of the signature. + +A manifest module type specification +@'module' 'type' modtype-name '=' module-type@ +requires the name @modtype-name@ to be implemented by the module type +@module-type@ in a matching signature, but makes the equality between +@modtype-name@ and @module-type@ apparent to all users of the signature. + +\subsubsection{Opening a module path} + +\ikwd{open\@\texttt{open}} + +The expression @'open' module-path@ in a signature does not specify +any components. It simply affects the parsing of the following items +of the signature, allowing components of the module denoted by +@module-path@ to be referred to by their simple names @name@ instead of +path accesses @module-path '.' name@. The scope of the @"open"@ +stops at the end of the signature expression. + +\subsubsection{Including a signature} + +\ikwd{include\@\texttt{include}} + +The expression @'include' module-type@ in a signature performs textual +inclusion of the components of the signature denoted by @module-type@. +It behaves as if the components of the included signature were copied +at the location of the @'include'@. The @module-type@ argument must +refer to a module type that is a signature, not a functor type. + +\subsection{Functor types} + +\ikwd{functor\@\texttt{functor}} + +The module type expression +@'functor' '(' module-name ':' module-type_1 ')' '->' module-type_2@ +is the type of functors (functions from modules to modules) that take +as argument a module of type @module-type_1@ and return as result a +module of type @module-type_2@. The module type @module-type_2@ can +use the name @module-name@ to refer to type components of the actual +argument of the functor. No restrictions are placed on the type of the +functor argument; in particular, a functor may take another functor as +argument (``higher-order'' functor). + +\subsection{The "with" operator} + +\ikwd{with\@\texttt{with}} + +Assuming @module-type@ denotes a signature, the expression +@module-type 'with' mod-constraint@ @{ 'and' mod-constraint }@ denotes +the same signature where type equations have been added to some of the +type specifications, as described by the constraints following the +"with" keyword. The constraint @'type' [type-parameters] typeconstr +'=' typexpr@ adds the type equation @'=' typexpr@ to the specification +of the type component named @typeconstr@ of the constrained signature. +The constraint @'module' module-path '=' extended-module-path@ adds +type equations to all type components of the sub-structure denoted by +@module-path@, making them equivalent to the corresponding type +components of the structure denoted by @extended-module-path@. + +For instance, if the module type name "S" is bound to the signature +\begin{verbatim} + sig type t module M: (sig type u end) end +\end{verbatim} +then "S with type t=int" denotes the signature +\begin{verbatim} + sig type t=int module M: (sig type u end) end +\end{verbatim} +and "S with module M = N" denotes the signature +\begin{verbatim} + sig type t module M: (sig type u=N.u end) end +\end{verbatim} +A functor taking two arguments of type "S" that share their "t" component +is written +\begin{verbatim} + functor (A: S) (B: S with type t = A.t) ... +\end{verbatim} + +Constraints are added left to right. After each constraint has been +applied, the resulting signature must be a subtype of the signature +before the constraint was applied. Thus, the @'with'@ operator can +only add information on the type components of a signature, but never +remove information. diff --git a/manual/manual/refman/modules.etex b/manual/manual/refman/modules.etex new file mode 100644 index 0000000000..574b0ff410 --- /dev/null +++ b/manual/manual/refman/modules.etex @@ -0,0 +1,231 @@ +\section{Module\label{s:module-expr} expressions (module implementations)} +\pdfsection{Module expressions (module implementations)} +%HEVEA\cutname{modules.html} + +Module expressions are the module-level equivalent of value +expressions: they evaluate to modules, thus providing implementations +for the specifications expressed in module types. + +\ikwd{struct\@\texttt{struct}} +\ikwd{end\@\texttt{end}} +\ikwd{functor\@\texttt{functor}} +\ikwd{let\@\texttt{let}} +\ikwd{external\@\texttt{external}} +\ikwd{type\@\texttt{type}} +\ikwd{exception\@\texttt{exception}} +\ikwd{class\@\texttt{class}} +\ikwd{module\@\texttt{module}} +\ikwd{open\@\texttt{open}} +\ikwd{include\@\texttt{include}} + +\begin{syntax} +module-expr: + module-path + | 'struct' [ module-items ] 'end' + | 'functor' '(' module-name ':' module-type ')' '->' module-expr + | module-expr '(' module-expr ')' + | '(' module-expr ')' + | '(' module-expr ':' module-type ')' +; +module-items: + {';;'} ( definition || expr ) { {';;'} ( definition || ';;' expr) } {';;'} +; +%\end{syntax} \begin{syntax} +definition: + 'let' ['rec'] let-binding { 'and' let-binding } + | 'external' value-name ':' typexpr '=' external-declaration + | type-definition + | exception-definition + | class-definition + | classtype-definition + | 'module' module-name { '(' module-name ':' module-type ')' } + [ ':' module-type ] \\ '=' module-expr + | 'module' 'type' modtype-name '=' module-type + | 'open' module-path + | 'include' module-expr +\end{syntax} + +\subsection{Simple module expressions} + +The expression @module-path@ evaluates to the module bound to the name +@module-path@. + +The expression @'(' module-expr ')'@ evaluates to the same module as +@module-expr@. + +The expression @'(' module-expr ':' module-type ')'@ checks that the +type of @module-expr@ is a subtype of @module-type@, that is, that all +components specified in @module-type@ are implemented in +@module-expr@, and their implementation meets the requirements given +in @module-type@. In other terms, it checks that the implementation +@module-expr@ meets the type specification @module-type@. The whole +expression evaluates to the same module as @module-expr@, except that +all components not specified in @module-type@ are hidden and can no +longer be accessed. + +\subsection{Structures} + +\ikwd{struct\@\texttt{struct}} +\ikwd{end\@\texttt{end}} + +Structures @'struct' \ldots 'end'@ are collections of definitions for +value names, type names, exceptions, module names and module type +names. The definitions are evaluated in the order in which they appear +in the structure. The scopes of the bindings performed by the +definitions extend to the end of the structure. As a consequence, a +definition may refer to names bound by earlier definitions in the same +structure. + +For compatibility with toplevel phrases (chapter~\ref{c:camllight}), +optional @";;"@ are allowed after and before each definition in a structure. These +@";;"@ have no semantic meanings. Similarly, an @expr@ preceded by ";;" is allowed as +a component of a structure. It is equivalent to @'let' '_' '=' expr@, i.e. @expr@ is +evaluated for its side-effects but is not bound to any identifier. If @expr@ is +the first component of a structure, the preceding ";;" can be omitted. + +\subsubsection*{Value definitions} + +\ikwd{let\@\texttt{let}} + +A value definition @'let' ['rec'] let-binding { 'and' let-binding }@ +bind value names in the same way as a @'let' \ldots 'in' \ldots@ expression +(see section~\ref{s:localdef}). The value names appearing in the +left-hand sides of the bindings are bound to the corresponding values +in the right-hand sides. + +\ikwd{external\@\texttt{external}} + +A value definition @'external' value-name ':' typexpr '=' external-declaration@ +implements @value-name@ as the external function specified in +@external-declaration@ (see chapter~\ref{c:intf-c}). + +\subsubsection*{Type definitions} + +\ikwd{type\@\texttt{type}} + +A definition of one or several type components is written +@'type' typedef { 'and' typedef }@ and consists of a sequence +of mutually recursive definitions of type names. + +\subsubsection*{Exception definitions} + +\ikwd{exception\@\texttt{exception}} + +Exceptions are defined with the syntax @'exception' constr-decl@ +or @'exception' constr-name '=' constr@. + +\subsubsection*{Class definitions} + +\ikwd{class\@\texttt{class}} + +A definition of one or several classes is written @'class' +class-binding { 'and' class-binding }@ and consists of a sequence of +mutually recursive definitions of class names. Class definitions are +described more precisely in section~\ref{s:classdef}. + +\subsubsection*{Class type definitions} + +\ikwd{class\@\texttt{class}} +\ikwd{type\@\texttt{type}} + +A definition of one or several classes is written +@'class' 'type' classtype-def { 'and' classtype-def }@ and consists of +a sequence of mutually recursive definitions of class type names. +Class type definitions are described more precisely in +section~\ref{s:classtype}. + +\subsubsection*{Module definitions} + +\ikwd{module\@\texttt{module}} + +The basic form for defining a module component is +@'module' module-name '=' module-expr@, which evaluates @module-expr@ and binds +the result to the name @module-name@. + +One can write +\begin{center} +@'module' module-name ':' module-type '=' module-expr@ +\end{center} +instead of +\begin{center} +@'module' module-name '=' '(' module-expr ':' module-type ')'@. +\end{center} +Another derived form is +\begin{center} +@'module' module-name '(' name_1 ':' module-type_1 ')' \ldots + '(' name_n ':' module-type_n ')' '=' module-expr@ +\end{center} +which is equivalent to +\begin{center} +@'module' module-name '=' + 'functor' '(' name_1 ':' module-type_1 ')' '->' \ldots + '->' module-expr@ +\end{center} + +\subsubsection*{Module type definitions} + +\ikwd{type\@\texttt{type}} +\ikwd{module\@\texttt{module}} + +A definition for a module type is written +@'module' 'type' modtype-name '=' module-type@. +It binds the name @modtype-name@ to the module type denoted by the +expression @module-type@. + +\subsubsection*{Opening a module path} + +\ikwd{open\@\texttt{open}} + +The expression @'open' module-path@ in a structure does not define any +components nor perform any bindings. It simply affects the parsing of +the following items of the structure, allowing components of the +module denoted by @module-path@ to be referred to by their simple names +@name@ instead of path accesses @module-path '.' name@. The scope of +the @"open"@ stops at the end of the structure expression. + +\subsubsection*{Including the components of another structure} + +\ikwd{include\@\texttt{include}} + +The expression @'include' module-expr@ in a structure re-exports in +the current structure all definitions of the structure denoted by +@module-expr@. For instance, if the identifier "S" is bound to the +module +\begin{verbatim} + struct type t = int let x = 2 end +\end{verbatim} +the module expression +\begin{verbatim} + struct include S let y = (x + 1 : t) end +\end{verbatim} +is equivalent to the module expression +\begin{verbatim} + struct type t = S.t let x = S.x let y = (x + 1 : t) end +\end{verbatim} +The difference between @'open'@ and @'include'@ is that @'open'@ +simply provides short names for the components of the opened +structure, without defining any components of the current structure, +while @'include'@ also adds definitions for the components of the +included structure. + +\subsection{Functors} + +\subsubsection*{Functor definition} + +\ikwd{functor\@\texttt{functor}} + +The expression @'functor' '(' module-name ':' module-type ')' '->' +module-expr@ evaluates to a functor that takes as argument modules of +the type @module-type_1@, binds @module-name@ to these modules, +evaluates @module-expr@ in the extended environment, and returns the +resulting modules as results. No restrictions are placed on the type of the +functor argument; in particular, a functor may take another functor as +argument (``higher-order'' functor). + +\subsubsection*{Functor application} + +The expression @module-expr_1 '(' module-expr_2 ')'@ evaluates +@module-expr_1@ to a functor and @module-expr_2@ to a module, and +applies the former to the latter. The type of @module-expr_2@ must +match the type expected for the arguments of the functor @module-expr_1@. + diff --git a/manual/manual/refman/names.etex b/manual/manual/refman/names.etex new file mode 100644 index 0000000000..acdaa0d71d --- /dev/null +++ b/manual/manual/refman/names.etex @@ -0,0 +1,144 @@ +\section{Names} \label{s:names} +\pdfsection{Names} +%HEVEA\cutname{names.html} + +Identifiers are used to give names to several classes of language +objects and refer to these objects by name later: +\begin{itemize} +\item value names (syntactic class @value-name@), +\item value constructors and exception constructors (class @constr-name@), +\item labels (@label-name@, defined in section~\ref{s:labelname}), +\item polymorphic variant tags (@tag-name@), +\item type constructors (@typeconstr-name@), +\item record fields (@field-name@), +\item class names (@class-name@), +\item method names (@method-name@), +\item instance variable names (@inst-var-name@), +\item module names (@module-name@), +\item module type names (@modtype-name@). +\end{itemize} +These eleven name spaces are distinguished both by the context and by the +capitalization of the identifier: whether the first letter of the +identifier is in lowercase (written @lowercase-ident@ below) or in +uppercase (written @capitalized-ident@). Underscore is considered a +lowercase letter for this purpose. + +\ikwd{false\@\texttt{false}} +\ikwd{true\@\texttt{true}} + +\subsubsection*{Naming objects} + +\begin{syntax} +value-name: + lowercase-ident + | '(' operator-name ')' +; +operator-name: + prefix-symbol || infix-op +; +infix-op: + infix-symbol + | '*' || '+' || '-' || '-.' || '=' || '!=' || '<' || '>' || 'or' || '||' + || '&' || '&&' || ':=' + | 'mod' || 'land' || 'lor' || 'lxor' || 'lsl' || 'lsr' || 'asr' +; +constr-name: + capitalized-ident +; +tag-name: + capitalized-ident +; +typeconstr-name: + lowercase-ident +; +field-name: + lowercase-ident +; +module-name: + capitalized-ident +; +modtype-name: + ident +; +class-name: + lowercase-ident +; +inst-var-name: + lowercase-ident +; +method-name: + lowercase-ident +\end{syntax} +As shown above, prefix and infix symbols as well as some keywords can +be used as value names, provided they are written between parentheses. +The capitalization rules are summarized in the table below. + +\begin{tableau}{|l|l|}{Name space}{Case of first letter} +\entree{Values}{lowercase} +\entree{Constructors}{uppercase} +\entree{Labels}{lowercase} +\entree{Polymorphic variant tags}{uppercase} +\entree{Exceptions}{uppercase} +\entree{Type constructors}{lowercase} +\entree{Record fields}{lowercase} +\entree{Classes}{lowercase} +\entree{Instance variables}{lowercase} +\entree{Methods}{lowercase} +\entree{Modules}{uppercase} +\entree{Module types}{any} +\end{tableau} + +{\it Note on polymorphic variant tags:\/} the current implementation accepts +lowercase variant tags in addition to capitalized variant tags, but we +suggest you avoid lowercase variant tags for portability and +compatibility with future OCaml versions. + +\subsubsection*{Referring to named objects} + +\begin{syntax} +value-path: + [ module-path '.' ] value-name +; +constr: + [ module-path '.' ] constr-name +; +typeconstr: + [ extended-module-path '.' ] typeconstr-name +; +field: + [ module-path '.' ] field-name +; +modtype-path: + [ extended-module-path '.' ] modtype-name +; +class-path: + [ module-path '.' ] class-name +; +classtype-path: + [ extended-module-path '.' ] class-name +; +module-path: + module-name { '.' module-name } +; +extended-module-path: + extended-module-name { '.' extended-module-name } +; +extended-module-name: + module-name { '(' extended-module-path ')' } +\end{syntax} + +A named object can be referred to either by its name (following the +usual static scoping rules for names) or by an access path @prefix '.' name@, +where @prefix@ designates a module and @name@ is the name of an object +defined in that module. The first component of the path, @prefix@, is +either a simple module name or an access path @name_1 '.' name_2 \ldots@, +in case the defining module is itself nested inside other modules. +For referring to type constructors, module types, or class types, +the @prefix@ can +also contain simple functor applications (as in the syntactic class +@extended-module-path@ above) in case the defining module is the +result of a functor application. + +Label names, tag names, method names and instance variable names need +not be qualified: the former three are global labels, while the latter +are local to a class. diff --git a/manual/manual/refman/patterns.etex b/manual/manual/refman/patterns.etex new file mode 100644 index 0000000000..fa286e0886 --- /dev/null +++ b/manual/manual/refman/patterns.etex @@ -0,0 +1,149 @@ +\section{Patterns} +\pdfsection{Patterns} +\ikwd{as\@\texttt{as}} +%HEVEA\cutname{patterns.html} +\begin{syntax} +pattern: + value-name + | '_' + | constant + | pattern 'as' value-name + | '(' pattern ')' + | '(' pattern ':' typexpr ')' + | pattern '|' pattern + | constr pattern + | "`"tag-name pattern + | "#"typeconstr + | pattern {{ ',' pattern }} + | '{' field '=' pattern { ';' field '=' pattern } [ ';' ] '}' + | '[' pattern { ';' pattern } [ ';' ] ']' + | pattern '::' pattern + | '[|' pattern { ';' pattern } [ ';' ] '|]' +\end{syntax} + +The table below shows the relative precedences and associativity of +operators and non-closed pattern constructions. The constructions with +higher precedences come first. +\ikwd{as\@\texttt{as}} +\begin{tableau}{|l|l|}{Operator}{Associativity} +\entree{".." (see section~\ref{s:range-patterns})}{--} +\entree{"lazy" (see section~\ref{s:lazy})}{--} +\entree{Constructor application, Tag application}{right} +\entree{"::"}{right} +\entree{","}{--} +\entree{"|"}{left} +\entree{"as"}{--} +\end{tableau} + +Patterns are templates that allow selecting data structures of a +given shape, and binding identifiers to components of the data +structure. This selection operation is called pattern matching; its +outcome is either ``this value does not match this pattern'', or +``this value matches this pattern, resulting in the following bindings +of names to values''. + +\subsubsection*{Variable patterns} + +A pattern that consists in a value name matches any value, +binding the name to the value. The pattern @"_"@ also matches +any value, but does not bind any name. + +Patterns are {\em linear\/}: a variable cannot be bound several times by +a given pattern. In particular, there is no way to test for equality +between two parts of a data structure using only a pattern (but +@"when"@ guards can be used for this purpose). + +\subsubsection*{Constant patterns} + +A pattern consisting in a constant matches the values that +are equal to this constant. + +%% FIXME for negative numbers, blanks are allowed between the minus +%% sign and the first digit. + +\subsubsection*{Alias patterns} +\ikwd{as\@\texttt{as}} + +The pattern @pattern_1 "as" value-name@ matches the same values as +@pattern_1@. If the matching against @pattern_1@ is successful, +the name @value-name@ is bound to the matched value, in addition to the +bindings performed by the matching against @pattern_1@. + +\subsubsection*{Parenthesized patterns} + +The pattern @"(" pattern_1 ")"@ matches the same values as +@pattern_1@. A type constraint can appear in a +parenthesized pattern, as in @"(" pattern_1 ":" typexpr ")"@. This +constraint forces the type of @pattern_1@ to be compatible with +@typexpr@. + +\subsubsection*{``Or'' patterns} + +The pattern @pattern_1 "|" pattern_2@ represents the logical ``or'' of +the two patterns @pattern_1@ and @pattern_2@. A value matches +@pattern_1 "|" pattern_2@ if it matches @pattern_1@ or +@pattern_2@. The two sub-patterns @pattern_1@ and @pattern_2@ +must bind exactly the same identifiers to values having the same types. +Matching is performed from left to right. +More precisely, +in case some value~$v$ matches @pattern_1 "|" pattern_2@, the bindings +performed are those of @pattern_1@ when $v$ matches @pattern_1@. +Otherwise, value~$v$ matches @pattern_2@ whose bindings are performed. + + +\subsubsection*{Variant patterns} + +The pattern @constr '(' pattern_1 ',' \ldots ',' pattern_n ')'@ matches +all variants whose +constructor is equal to @constr@, and whose arguments match +@pattern_1 \ldots pattern_n@. It is a type error if $n$ is not the +number of arguments expected by the constructor. + +The pattern @constr '_'@ matches all variants whose constructor is +@constr@. + +The pattern @pattern_1 "::" pattern_2@ matches non-empty lists whose +heads match @pattern_1@, and whose tails match @pattern_2@. + +The pattern @"[" pattern_1 ";" \ldots ";" pattern_n "]"@ matches lists +of length $n$ whose elements match @pattern_1@ \ldots @pattern_n@, +respectively. This pattern behaves like +@pattern_1 "::" \ldots "::" pattern_n "::" "[]"@. + +\subsubsection*{Polymorphic variant patterns} + +The pattern @"`"tag-name pattern_1@ matches all polymorphic variants +whose tag is equal to @tag-name@, and whose argument matches +@pattern_1@. + +\subsubsection*{Polymorphic variant abbreviation patterns} + +If the type @["('a,'b,"\ldots")"] typeconstr = "[" "`"tag-name_1 typexpr_1 "|" +\ldots "|" "`"tag-name_n typexpr_n"]"@ is defined, then the pattern @"#"typeconstr@ +is a shorthand for the following or-pattern: +@"(" "`"tag-name_1"(_" ":" typexpr_1")" "|" \ldots "|" "`"tag-name_n"(_" +":" typexpr_n"))"@. It matches all values of type @"[<" typeconstr "]"@. + +\subsubsection*{Tuple patterns} + +The pattern @pattern_1 "," \ldots "," pattern_n@ matches $n$-tuples +whose components match the patterns @pattern_1@ through @pattern_n@. That +is, the pattern matches the tuple values $(v_1, \ldots, v_n)$ such that +@pattern_i@ matches $v_i$ for \fromoneto{i}{n}. + +\subsubsection*{Record patterns} + +The pattern @"{" field_1 "=" pattern_1 ";" \ldots ";" field_n "=" +pattern_n "}"@ matches records that define at least the fields +@field_1@ through @field_n@, and such that the value associated to +@field_i@ matches the pattern @pattern_i@, for \fromoneto{i}{n}. +The record value can define more fields than @field_1@ \ldots +@field_n@; the values associated to these extra fields are not taken +into account for matching. + +\subsubsection*{Array patterns} + +The pattern @"[|" pattern_1 ";" \ldots ";" pattern_n "|]"@ +matches arrays of length $n$ such that the $i$-th array element +matches the pattern @pattern_i@, for \fromoneto{i}{n}. + diff --git a/manual/manual/refman/refman.etex b/manual/manual/refman/refman.etex new file mode 100644 index 0000000000..a7daea0231 --- /dev/null +++ b/manual/manual/refman/refman.etex @@ -0,0 +1,48 @@ +\chapter{The OCaml language} \label{c:refman} +\pdfchapterfold{-12}{Reference manual for the OCaml language} +%HEVEA\cutname{language.html} + +%better html output that way, sniff. +%HEVEA\subsection*{Foreword} +%BEGIN LATEX +\section*{Foreword} +%END LATEX + +This document is intended as a reference manual for the OCaml +language. It lists the language constructs, and gives their precise +syntax and informal semantics. It is by no means a tutorial +introduction to the language: there is not a single example. A good +working knowledge of OCaml is assumed. + +No attempt has been made at mathematical rigor: words are employed +with their intuitive meaning, without further definition. As a +consequence, the typing rules have been left out, by lack of the +mathematical framework required to express them, while they are +definitely part of a full formal definition of the language. + + +\subsection*{Notations} + +The syntax of the language is given in BNF-like notation. Terminal +symbols are set in typewriter font (@'like' 'this'@). +Non-terminal symbols are set in italic font (@like that@). +Square brackets @[\ldots]@ denote optional components. Curly brackets +@{\ldots}@ denotes zero, one or several repetitions of the enclosed +components. Curly brackets with a trailing plus sign @{{\ldots}}@ +denote one or several repetitions of the enclosed components. +Parentheses @(\ldots)@ denote grouping. + +%HEVEA\cutdef{section} +\input{lex} +\input{values} +\input{names} +\input{types} +\input{const} +\input{patterns} +\input{expr} +\input{typedecl} +\input{classes} +\input{modtypes} +\input{modules} +\input{compunit} +%HEVEA\cutend diff --git a/manual/manual/refman/typedecl.etex b/manual/manual/refman/typedecl.etex new file mode 100644 index 0000000000..f28a2110c7 --- /dev/null +++ b/manual/manual/refman/typedecl.etex @@ -0,0 +1,209 @@ +\section{Type and exception definitions} +%HEVEA\cutname{typedecl.html}% +\pdfsection{Type and exception definitions} + +\subsection{Type definitions} +\label{s:type-defs} + +Type definitions bind type constructors to data types: either +variant types, record types, type abbreviations, or abstract data +types. They also bind the value constructors and record fields +associated with the definition. + +\ikwd{type\@\texttt{type}} + +\begin{syntax} +type-definition: + 'type' ['nonrec'] typedef { 'and' typedef } +; +typedef: + [type-params] typeconstr-name type-information +; +type-information: + [type-equation] [type-representation] { type-constraint } +; +type-equation: + '=' typexpr +; +type-representation: + '=' ['|'] constr-decl { '|' constr-decl } + | '=' '{' field-decl { ';' field-decl } [';'] '}' +; +type-params: + type-param + | '(' type-param { "," type-param } ')' +; +type-param: + [variance] "'" ident +; +variance: + '+' + | '-' +; +constr-decl: + (constr-name || '()') [ 'of' typexpr { '*' typexpr } ] +; +field-decl: + ['mutable'] field-name ':' poly-typexpr +; +type-constraint: + 'constraint' "'" ident '=' typexpr +\end{syntax} +\ikwd{mutable\@\texttt{mutable}} +\ikwd{constraint\@\texttt{constraint}} + +Type definitions are introduced by the "type" keyword, and +consist in one or several simple definitions, possibly mutually +recursive, separated by the "and" keyword. Each simple definition +defines one type constructor. + +A simple definition consists in a lowercase identifier, possibly +preceded by one or several type parameters, and followed by an +optional type equation, then an optional type representation, and then +a constraint clause. The identifier is the name of the type +constructor being defined. + +In the right-hand side of type definitions, references to one of the +type constructor name being defined are considered as recursive, +unless "type" is followed by "nonrec". The "nonrec" keyword was +introduced in OCaml 4.02.2. + +The optional type parameters are either one type variable @"'" ident@, +for type constructors with one parameter, or a list of type variables +@"('"ident_1,\ldots,"'"ident_n")"@, for type constructors with several +parameters. Each type parameter may be prefixed by a variance +constraint @"+"@ (resp. @"-"@) indicating that the parameter is +covariant (resp. contravariant). These type parameters can appear in +the type expressions of the right-hand side of the definition, +optionally restricted by a variance constraint ; {\em i.e.\/} a +covariant parameter may only appear on the right side of a functional +arrow (more precisely, follow the left branch of an even number of +arrows), and a contravariant parameter only the left side (left branch of +an odd number of arrows). If the type has a representation or +an equation, and the parameter is free ({\em i.e.\/} not bound via a +type constraint to a constructed type), its variance constraint is +checked but subtyping {\em etc.\/} will use the inferred variance of the +parameter, which may be less restrictive; otherwise ({\em i.e.\/} for abstract +types or non-free parameters), the variance must be given explicitly, +and the parameter is invariant if no variance is given. + +The optional type equation @'=' typexpr@ makes the defined type +equivalent to the type expression @typexpr@: +one can be substituted for the other during typing. +If no type equation is given, a new type is generated: the defined type +is incompatible with any other type. + +The optional type representation describes the data structure +representing the defined type, by giving the list of associated +constructors (if it is a variant type) or associated fields (if it is +a record type). If no type representation is given, nothing is +assumed on the structure of the type besides what is stated in the +optional type equation. + +The type representation @'=' ['|'] constr-decl { '|' constr-decl }@ +describes a variant type. The constructor declarations +@constr-decl_1, \ldots, constr-decl_n@ describe the constructors +associated to this variant type. The constructor +declaration @constr-name 'of' typexpr_1 '*' \ldots '*' typexpr_n@ +declares the name @constr-name@ as a non-constant constructor, whose +arguments have types @typexpr_1@ \ldots @typexpr_n@. +The constructor declaration @constr-name@ +declares the name @constr-name@ as a constant +constructor. Constructor names must be capitalized. + +The type representation @'=' '{' field-decl { ';' field-decl } [';'] '}'@ +describes a record type. The field declarations @field-decl_1, \ldots, +field-decl_n@ describe the fields associated to this record type. +The field declaration @field-name ':' poly-typexpr@ declares +@field-name@ as a field whose argument has type @poly-typexpr@. +The field declaration @'mutable' field-name ':' poly-typexpr@ +\ikwd{mutable\@\texttt{mutable}} +behaves similarly; in addition, it allows physical modification of +this field. +Immutable fields are covariant, mutable fields are non-variant. +Both mutable and immutable fields may have a explicitly polymorphic +types. The polymorphism of the contents is statically checked whenever +a record value is created or modified. Extracted values may have their +types instantiated. + +The two components of a type definition, the optional equation and the +optional representation, can be combined independently, giving +rise to four typical situations: + +\begin{description} +\item[Abstract type: no equation, no representation.] ~\\ +When appearing in a module signature, this definition specifies +nothing on the type constructor, besides its number of parameters: +its representation is hidden and it is assumed incompatible with any +other type. + +\item[Type abbreviation: an equation, no representation.] ~\\ +This defines the type constructor as an abbreviation for the type +expression on the right of the @'='@ sign. + +\item[New variant type or record type: no equation, a representation.] ~\\ +This generates a new type constructor and defines associated +constructors or fields, through which values of that type can be +directly built or inspected. + +\item[Re-exported variant type or record type: an equation, +a representation.] ~\\ +In this case, the type constructor is defined as an abbreviation for +the type expression given in the equation, but in addition the +constructors or fields given in the representation remain attached to +the defined type constructor. The type expression in the equation part +must agree with the representation: it must be of the same kind +(record or variant) and have exactly the same constructors or fields, +in the same order, with the same arguments. +\end{description} + +The type variables appearing as type parameters can optionally be +prefixed by "+" or "-" to indicate that the type constructor is +covariant or contravariant with respect to this parameter. This +variance information is used to decide subtyping relations when +checking the validity of @":>"@ coercions (see section \ref{s:coercions}). + +For instance, "type +'a t" declares "t" as an abstract type that is +covariant in its parameter; this means that if the type $\tau$ is a +subtype of the type $\sigma$, then $\tau " t"$ is a subtype of $\sigma +" t"$. Similarly, "type -'a t" declares that the abstract type "t" is +contravariant in its parameter: if $\tau$ is a subtype of $\sigma$, then +$\sigma " t"$ is a subtype of $\tau " t"$. If no "+" or "-" variance +annotation is given, the type constructor is assumed non-variant in the +corresponding parameter. For instance, the abstract type declaration +"type 'a t" means that $\tau " t"$ is neither a subtype nor a +supertype of $\sigma " t"$ if $\tau$ is subtype of $\sigma$. + +The variance indicated by the "+" and "-" annotations on parameters +is enforced only for abstract and private types, or when there are +type constraints. +Otherwise, for abbreviations, variant and record types without type +constraints, the variance properties of the type constructor +are inferred from its definition, and the variance annotations are +only checked for conformance with the definition. + +\ikwd{constraint\@\texttt{constraint}} +The construct @ 'constraint' "'" ident '=' typexpr @ allows the +specification of +type parameters. Any actual type argument corresponding to the type +parameter @ident@ has to be an instance of @typexpr@ (more precisely, +@ident@ and @typexpr@ are unified). Type variables of @typexpr@ can +appear in the type equation and the type declaration. + +\subsection{Exception definitions} \label{s:excdef} +\ikwd{exception\@\texttt{exception}} + +\begin{syntax} +exception-definition: + 'exception' constr-name [ 'of' typexpr { '*' typexpr } ] + | 'exception' constr-name '=' constr +\end{syntax} + +Exception definitions add new constructors to the built-in variant +type \verb"exn" of exception values. The constructors are declared as +for a definition of a variant type. + +The form @'exception' constr-name ['of' typexpr {'*' typexpr}]@ +generates a new exception, distinct from all other exceptions in the system. +The form @'exception' constr-name '=' constr@ +gives an alternate name to an existing exception. diff --git a/manual/manual/refman/types.etex b/manual/manual/refman/types.etex new file mode 100644 index 0000000000..fabd5e00ac --- /dev/null +++ b/manual/manual/refman/types.etex @@ -0,0 +1,233 @@ +\section{Type expressions} +\pdfsection{Type expressions} +%HEVEA\cutname{types.html} +\ikwd{as\@\texttt{as}} + +\begin{syntax} +typexpr: + "'" ident + | "_" + | '(' typexpr ')' + | [['?']label-name':'] typexpr '->' typexpr + | typexpr {{ '*' typexpr }} + | typeconstr + | typexpr typeconstr + | '(' typexpr { ',' typexpr } ')' typeconstr + | typexpr 'as' "'" ident + | polymorphic-variant-type + | '<' ['..'] '>' + | '<' method-type { ';' method-type } [';' || ';' '..'] '>' + | '#' class-path + | typexpr '#' class-path + | '(' typexpr { ',' typexpr } ')' '#' class-path +; +poly-typexpr: + typexpr + | {{ "'" ident }} '.' typexpr +; +method-type: + method-name ':' poly-typexpr +\end{syntax} + +The table below shows the relative precedences and associativity of +operators and non-closed type constructions. The constructions with +higher precedences come first. +\ikwd{as\@\texttt{as}} +\begin{tableau}{|l|l|}{Operator}{Associativity} +\entree{Type constructor application}{--} +\entree{"#"}{--} +\entree{"*"}{--} +\entree{"->"}{right} +\entree{"as"}{--} +\end{tableau} + +Type expressions denote types in definitions of data types as well as +in type constraints over patterns and expressions. + +\subsubsection*{Type variables} + +The type expression @"'" ident@ stands for the type variable named +@ident@. The type expression @"_"@ stands for an anonymous type variable. +In data type definitions, type variables are names for the +data type parameters. In type constraints, they represent unspecified +types that can be instantiated by any type to satisfy the type +constraint. In general the scope of a named type variable is the +whole top-level phrase where it appears, and it can only be +generalized when leaving +this scope. Anonymous variables have no such restriction. +In the following cases, the scope of named type variables is +restricted to the type expression where they appear: 1) for universal +(explicitly polymorphic) type variables; 2) for type variables that +only appear in public method specifications (as those variables will +be made universal, as described in section~\ref{sec-methspec}); +3) for variables used as aliases, when the type they are aliased to +would be invalid in the scope of the enclosing definition ({\it i.e.} +when it contains free universal type variables, or locally +defined types.) + +\subsubsection*{Parenthesized types} + +The type expression @"(" typexpr ")"@ denotes the same type as +@typexpr@. + +\subsubsection*{Function types} + +The type expression @typexpr_1 '->' typexpr_2@ denotes the type of +functions mapping arguments of type @typexpr_1@ to results of type +@typexpr_2@. + +@label-name ':' typexpr_1 '->' typexpr_2@ denotes the same function type, but +the argument is labeled @label@. + +@'?' label-name ':' typexpr_1 '->' typexpr_2@ denotes the type of functions +mapping an optional labeled argument of type @typexpr_1@ to results of +type @typexpr_2@. That is, the physical type of the function will be +@typexpr_1 "option" '->' typexpr_2@. + +\subsubsection*{Tuple types} + +The type expression @typexpr_1 '*' \ldots '*' typexpr_n@ +denotes the type of tuples whose elements belong to types @typexpr_1, +\ldots typexpr_n@ respectively. + +\subsubsection*{Constructed types} + +Type constructors with no parameter, as in @typeconstr@, are type +expressions. + +The type expression @typexpr typeconstr@, where @typeconstr@ is a type +constructor with one parameter, denotes the application of the unary type +constructor @typeconstr@ to the type @typexpr@. + +The type expression @(typexpr_1,\ldots,typexpr_n) typeconstr@, where +@typeconstr@ is a type constructor with $n$ parameters, denotes the +application of the $n$-ary type constructor @typeconstr@ to the types +@typexpr_1@ through @typexpr_n@. + +\subsubsection*{Aliased and recursive types} + +\ikwd{as\@\texttt{as}} + +The type expression @typexpr 'as' "'" ident@ denotes the same type as +@typexpr@, and also binds the type variable @ident@ to type @typexpr@ both +in @typexpr@ and in other types. In general the scope of an alias is +the same as for a named type variable, and covers the whole enclosing +definition. If the type variable +@ident@ actually occurs in @typexpr@, a recursive type is created. Recursive +types for which there exists a recursive path that does not contain +an object or polymorphic variant type constructor are rejected, except +when the "-rectypes" mode is selected. + +If @"'" ident@ denotes an explicit polymorphic variable, and @typexpr@ +denotes either an object or polymorphic variant type, the row variable +of @typexpr@ is captured by @"'" ident@, and quantified upon. + +\subsubsection*{Polymorphic variant types} + +\begin{syntax} +polymorphic-variant-type: + '[' tag-spec-first { '|' tag-spec } ']' + | '[>' [ tag-spec ] { '|' tag-spec } ']' + | '[<' ['|'] tag-spec-full { '|' tag-spec-full } + [ '>' {{ '`'tag-name }} ] ']' +; +%\end{syntax} \begin{syntax} +tag-spec-first: + '`'tag-name [ 'of' typexpr ] + | [ typexpr ] '|' tag-spec +; +tag-spec: + '`'tag-name [ "of" typexpr ] + | typexpr +; +tag-spec-full: + '`'tag-name [ "of" ['&'] typexpr { '&' typexpr } ] + | typexpr +\end{syntax} + +Polymorphic variant types describe the values a polymorphic variant +may take. + +The first case is an exact variant type: all possible tags are +known, with their associated types, and they can all be present. +Its structure is fully known. + +The second case is an open variant type, describing a polymorphic +variant value: it gives the list of all tags the value could take, +with their associated types. This type is still compatible with a +variant type containing more tags. A special case is the unknown +type, which does not define any tag, and is compatible with any +variant type. + +The third case is a closed variant type. It gives information about +all the possible tags and their associated types, and which tags are +known to potentially appear in values. The exact variant type (first +case) is +just an abbreviation for a closed variant type where all possible tags +are also potentially present. + +In all three cases, tags may be either specified directly in the +@'`'tag-name ["of" typexpr]@ form, or indirectly through a type +expression, which must expand to an +exact variant type, whose tag specifications are inserted in its +place. + +Full specifications of variant tags are only used for non-exact closed +types. They can be understood as a conjunctive type for the argument: +it is intended to have all the types enumerated in the +specification. + +Such conjunctive constraints may be unsatisfiable. In such a case the +corresponding tag may not be used in a value of this type. This +does not mean that the whole type is not valid: one can still use +other available tags. +Conjunctive constraints are mainly intended as output from the type +checker. When they are used in source programs, unsolvable constraints +may cause early failures. + +\subsubsection*{Object types} + +An object type +@'<' [method-type { ';' method-type }] '>'@ +is a record of method types. + +Each method may have an explicit polymorphic type: @{{ "'" ident }} +'.' typexpr@. Explicit polymorphic variables have a local scope, and +an explicit polymorphic type can only be unified to an +equivalent one, where only the order and names of polymorphic +variables may change. + +The type @'<' {method-type ';'} '..' '>'@ is the +type of an object whose method names and types are described by +@method-type_1, \ldots, method-type_n@, and possibly some other +methods represented by the ellipsis. This ellipsis actually is +a special kind of type variable (called {\em row variable} in the +literature) that stands for any number of extra method types. + +\subsubsection*{\#-types} +\label{s:sharp-types} + +The type @'#' class-path@ is a special kind of abbreviation. This +abbreviation unifies with the type of any object belonging to a subclass +of class @class-path@. +% +It is handled in a special way as it usually hides a type variable (an +ellipsis, representing the methods that may be added in a subclass). +In particular, it vanishes when the ellipsis gets instantiated. +% +Each type expression @'#' class-path@ defines a new type variable, so +type @'#' class-path '->' '#' class-path@ is usually not the same as +type @('#' class-path 'as' "'" ident) '->' "'" ident@. +% + +Use of \#-types to abbreviate polymorphic variant types is deprecated. +If @@t@@ is an exact variant type then @"#"@t@@ translates to @"[<" @t@"]"@, +and @"#"@t@"[>" "`"tag_1 \dots"`"tag_k"]"@ translates to +@"[<" @t@ ">" "`"tag_1 \dots"`"tag_k"]"@ + +\subsubsection*{Variant and record types} + +There are no type expressions describing (defined) variant types nor +record types, since those are always named, i.e. defined before use +and referred to by name. Type definitions are described in +section~\ref{s:type-defs}. diff --git a/manual/manual/refman/values.etex b/manual/manual/refman/values.etex new file mode 100644 index 0000000000..d5d01f0f28 --- /dev/null +++ b/manual/manual/refman/values.etex @@ -0,0 +1,97 @@ +\section{Values} +\pdfsection{Values} +%HEVEA\cutname{values.html} + +This section describes the kinds of values that are manipulated by +OCaml programs. + +\subsection{Base values} + +\subsubsection*{Integer numbers} + +Integer values are integer numbers from $-2^{30}$ to $2^{30}-1$, that +is $-1073741824$ to $1073741823$. The implementation may support a +wider range of integer values: on 64-bit platforms, the current +implementation supports integers ranging from $-2^{62}$ to $2^{62}-1$. + +\subsubsection*{Floating-point numbers} + +Floating-point values are numbers in floating-point representation. +The current implementation uses double-precision floating-point +numbers conforming to the IEEE 754 standard, with 53 bits of mantissa +and an exponent ranging from $-1022$ to $1023$. + +\subsubsection*{Characters} + +Character values are represented as 8-bit integers between 0 and 255. +Character codes between 0 and 127 are interpreted following the ASCII +standard. The current implementation interprets character codes +between 128 and 255 following the ISO 8859-1 standard. + +\subsubsection*{Character strings} \label{s:string-val} + +String values are finite sequences of characters. The current +implementation supports strings containing up to $2^{24} - 5$ +characters (16777211 characters); on 64-bit platforms, the limit is +$2^{57} - 9$. + +\subsection{Tuples} + +Tuples of values are written @'('@v@_1',' \ldots',' @v@_n')'@, standing for the +$n$-tuple of values @@v@_1@ to @@v@_n@. The current implementation +supports tuple of up to $2^{22} - 1$ elements (4194303 elements). + +\subsection{Records} + +Record values are labeled tuples of values. The record value written +@'{' field_1 '=' @v@_1';' \ldots';' field_n '=' @v@_n '}'@ associates the value +@@v@_i@ to the record field @field_i@, for $i = 1 \ldots n$. The current +implementation supports records with up to $2^{22} - 1$ fields +(4194303 fields). + +\subsection{Arrays} + +Arrays are finite, variable-sized sequences of values of the same +type. The current implementation supports arrays containing up to +$2^{22} - 1$ elements (4194303 elements) unless the elements are +floating-point numbers (2097151 elements in this case); on 64-bit +platforms, the limit is $2^{54} - 1$ for all arrays. + +\subsection{Variant values} + +Variant values are either a constant constructor, or a non-constant +constructor applied to a number of values. The former case is written +@constr@; the latter case is written @constr '('@v@_1',' ... ',' @v@_n +')'@, where the @@v@_i@ are said to be the arguments of the non-constant +constructor @constr@. The parentheses may be omitted if there is only +one argument. + +The following constants are treated like built-in constant +constructors: +\begin{tableau}{|l|l|}{Constant}{Constructor} +\entree{"false"}{the boolean false} +\entree{"true"}{the boolean true} +\entree{"()"}{the ``unit'' value} +\entree{"[]"}{the empty list} +\end{tableau} + +The current implementation limits each variant type to have at most +246 non-constant constructors and $2^{30}-1$ constant constructors. + +\subsection{Polymorphic variants} + +Polymorphic variants are an alternate form of variant values, not +belonging explicitly to a predefined variant type, and following +specific typing rules. They can be either constant, written +@"`"tag-name@, or non-constant, written @"`"tag-name'('@v@')'@. + +\subsection{Functions} + +Functional values are mappings from values to values. + +\subsection{Objects} + +Objects are composed of a hidden internal state which is a +record of instance variables, and a set of methods for accessing and +modifying these variables. The structure of an object is described by +the toplevel class that created it. diff --git a/manual/manual/style.css b/manual/manual/style.css new file mode 100644 index 0000000000..fc3887e54a --- /dev/null +++ b/manual/manual/style.css @@ -0,0 +1,32 @@ +a:visited {color : #416DFF; text-decoration : none; } +a:link {color : #416DFF; text-decoration : none;} +a:hover {color : Red; text-decoration : none; background-color: #5FFF88} +a:active {color : Red; text-decoration : underline; } +.keyword { font-weight : bold ; color : Red } +.keywordsign { color : #C04600 } +.superscript { font-size : 4 } +.subscript { font-size : 4 } +.comment { color : Green } +.constructor { color : Blue } +.type { color : #5C6585 } +.string { color : Maroon } +.warning { color : Red ; font-weight : bold } +.info { margin-left : 3em; margin-right : 3em } +.code { color : #465F91 ; } +h1 { font-size : 20pt ; text-align: center; } +h2 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #90BDFF ;padding: 2px; } +h3 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #90DDFF ;padding: 2px; } +h4 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #90EDFF ;padding: 2px; } +h5 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #90FDFF ;padding: 2px; } +h6 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #90BDFF ; padding: 2px; } +div.h7 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #90DDFF ; padding: 2px; } +div.h8 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #F0FFFF ; padding: 2px; } +div.h9 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #FFFFFF ; padding: 2px; } +.typetable { border-style : hidden } +.indextable { border-style : hidden } +.paramstable { border-style : hidden ; padding: 5pt 5pt} +body { background-color : White } +tr { background-color : White } +td.typefieldcomment { background-color : #FFFFFF } +pre { margin-bottom: 4px } +div.sig_block {margin-left: 2em}
\ No newline at end of file diff --git a/manual/manual/texstuff/.cvsignore b/manual/manual/texstuff/.cvsignore new file mode 100644 index 0000000000..84eade834c --- /dev/null +++ b/manual/manual/texstuff/.cvsignore @@ -0,0 +1,13 @@ +*.aux +*.dvi +*.idx +*.ilg +*.ind +*.log +*.toc +*.ipr +*.txt +*.pdf +*.ps +pdfmanual.out +manual.out diff --git a/manual/manual/texstuff/.gitignore b/manual/manual/texstuff/.gitignore new file mode 100644 index 0000000000..84eade834c --- /dev/null +++ b/manual/manual/texstuff/.gitignore @@ -0,0 +1,13 @@ +*.aux +*.dvi +*.idx +*.ilg +*.ind +*.log +*.toc +*.ipr +*.txt +*.pdf +*.ps +pdfmanual.out +manual.out diff --git a/manual/manual/textman/.cvsignore b/manual/manual/textman/.cvsignore new file mode 100644 index 0000000000..7247584502 --- /dev/null +++ b/manual/manual/textman/.cvsignore @@ -0,0 +1,5 @@ +manual.txt +manual.hmanual.kwd +*.haux +*.hind +*.htoc diff --git a/manual/manual/textman/.gitignore b/manual/manual/textman/.gitignore new file mode 100644 index 0000000000..7247584502 --- /dev/null +++ b/manual/manual/textman/.gitignore @@ -0,0 +1,5 @@ +manual.txt +manual.hmanual.kwd +*.haux +*.hind +*.htoc diff --git a/manual/manual/tutorials/.cvsignore b/manual/manual/tutorials/.cvsignore new file mode 100644 index 0000000000..81ccbe7105 --- /dev/null +++ b/manual/manual/tutorials/.cvsignore @@ -0,0 +1,2 @@ +*.tex +*.htex diff --git a/manual/manual/tutorials/.gitignore b/manual/manual/tutorials/.gitignore new file mode 100644 index 0000000000..81ccbe7105 --- /dev/null +++ b/manual/manual/tutorials/.gitignore @@ -0,0 +1,2 @@ +*.tex +*.htex diff --git a/manual/manual/tutorials/Makefile b/manual/manual/tutorials/Makefile new file mode 100644 index 0000000000..8dc6eee9ae --- /dev/null +++ b/manual/manual/tutorials/Makefile @@ -0,0 +1,19 @@ +FILES= coreexamples.tex lablexamples.tex objectexamples.tex moduleexamples.tex advexamples.tex + +CAMLLATEX=../../tools/caml-tex2 +TEXQUOTE=../../tools/texquote2 + +ALLFILES=$(FILES) + +all: $(ALLFILES) + +clean: + rm -f $(ALLFILES) + +.SUFFIXES: +.SUFFIXES: .etex .tex + +.etex.tex: + $(CAMLLATEX) -n 80 -o - $*.etex | $(TEXQUOTE) > $*.tex + +$(ALLFILES): $(CAMLLATEX) $(TEXQUOTE) diff --git a/manual/manual/tutorials/advexamples.etex b/manual/manual/tutorials/advexamples.etex new file mode 100644 index 0000000000..fff3f10836 --- /dev/null +++ b/manual/manual/tutorials/advexamples.etex @@ -0,0 +1,644 @@ +\chapter{Advanced examples with classes and modules} +\pdfchapterfold{-3}{Tutorial: Advanced examples with classes and modules} +%HEVEA\cutname{advexamples.html} +\label{c:advexamples} + +{\it (Chapter written by Didier Rémy)} + +\bigskip + +\noindent + +In this chapter, we show some larger examples using objects, classes +and modules. We review many of the object features simultaneously on +the example of a bank account. We show how modules taken from the +standard library can be expressed as classes. Lastly, we describe a +programming pattern know of as {\em virtual types} through the example +of window managers. + +\section{Extended example: bank accounts} +\pdfsection{Extended example: bank accounts} +\label{ss:bank-accounts} + +In this section, we illustrate most aspects of Object and inheritance +by refining, debugging, and specializing the following +initial naive definition of a simple bank account. (We reuse the +module "Euro" defined at the end of chapter~\ref{c:objectexamples}.) +\begin{caml_eval} +module type MONEY = + sig + type t + class c : float -> + object ('a) + val repr : t + method value : t + method print : unit + method times : float -> 'a + method leq : 'a -> bool + method plus : 'a -> 'a + end + end;; +module Euro : MONEY = + struct + type t = float + class c x = + object (self : 'a) + val repr = x + method value = repr + method print = print_float repr + method times k = {< repr = k *. x >} + method leq (p : 'a) = repr <= p#value + method plus (p : 'a) = {< repr = x +. p#value >} + end + end;; +\end{caml_eval} +\begin{caml_example} +let euro = new Euro.c;; +let zero = euro 0.;; +let neg x = x#times (-1.);; +class account = + object + val mutable balance = zero + method balance = balance + method deposit x = balance <- balance # plus x + method withdraw x = + if x#leq balance then (balance <- balance # plus (neg x); x) else zero + end;; +let c = new account in c # deposit (euro 100.); c # withdraw (euro 50.);; +\end{caml_example} +We now refine this definition with a method to compute interest. +\begin{caml_example} +class account_with_interests = + object (self) + inherit account + method private interest = self # deposit (self # balance # times 0.03) + end;; +\end{caml_example} +We make the method "interest" private, since clearly it should not be +called freely from the outside. Here, it is only made accessible to subclasses +that will manage monthly or yearly updates of the account. + +We should soon fix a bug in the current definition: the deposit method can +be used for withdrawing money by depositing negative amounts. We can +fix this directly: +\begin{caml_example} +class safe_account = + object + inherit account + method deposit x = if zero#leq x then balance <- balance#plus x + end;; +\end{caml_example} +However, the bug might be fixed more safely by the following definition: +\begin{caml_example} +class safe_account = + object + inherit account as unsafe + method deposit x = + if zero#leq x then unsafe # deposit x + else raise (Invalid_argument "deposit") + end;; +\end{caml_example} +In particular, this does not require the knowledge of the implementation of +the method "deposit". + +To keep track of operations, we extend the class with a mutable field +"history" and a private method "trace" to add an operation in the +log. Then each method to be traced is redefined. +\begin{caml_example} +type 'a operation = Deposit of 'a | Retrieval of 'a;; +class account_with_history = + object (self) + inherit safe_account as super + val mutable history = [] + method private trace x = history <- x :: history + method deposit x = self#trace (Deposit x); super#deposit x + method withdraw x = self#trace (Retrieval x); super#withdraw x + method history = List.rev history + end;; +\end{caml_example} +%% \label{ss:bank:initializer} +One may wish to open an account and simultaneously deposit some initial +amount. Although the initial implementation did not address this +requirement, it can be achieved by using an initializer. +\begin{caml_example} +class account_with_deposit x = + object + inherit account_with_history + initializer balance <- x + end;; +\end{caml_example} +A better alternative is: +\begin{caml_example} +class account_with_deposit x = + object (self) + inherit account_with_history + initializer self#deposit x + end;; +\end{caml_example} +Indeed, the latter is safer since the call to "deposit" will automatically +benefit from safety checks and from the trace. +Let's test it: +\begin{caml_example} +let ccp = new account_with_deposit (euro 100.) in +let _balance = ccp#withdraw (euro 50.) in +ccp#history;; +\end{caml_example} +Closing an account can be done with the following polymorphic function: +\begin{caml_example} +let close c = c#withdraw c#balance;; +\end{caml_example} +Of course, this applies to all sorts of accounts. + +Finally, we gather several versions of the account into a module "Account" +abstracted over some currency. +\begin{caml_example*} +let today () = (01,01,2000) (* an approximation *) +module Account (M:MONEY) = + struct + type m = M.c + let m = new M.c + let zero = m 0. + + class bank = + object (self) + val mutable balance = zero + method balance = balance + val mutable history = [] + method private trace x = history <- x::history + method deposit x = + self#trace (Deposit x); + if zero#leq x then balance <- balance # plus x + else raise (Invalid_argument "deposit") + method withdraw x = + if x#leq balance then + (balance <- balance # plus (neg x); self#trace (Retrieval x); x) + else zero + method history = List.rev history + end + + class type client_view = + object + method deposit : m -> unit + method history : m operation list + method withdraw : m -> m + method balance : m + end + + class virtual check_client x = + let y = if (m 100.)#leq x then x + else raise (Failure "Insufficient initial deposit") in + object (self) initializer self#deposit y end + + module Client (B : sig class bank : client_view end) = + struct + class account x : client_view = + object + inherit B.bank + inherit check_client x + end + + let discount x = + let c = new account x in + if today() < (1998,10,30) then c # deposit (m 100.); c + end + end;; +\end{caml_example*} +This shows the use of modules to group several class definitions that can in +fact be thought of as a single unit. This unit would be provided by a bank +for both internal and external uses. +This is implemented as a functor that abstracts over the currency so that +the same code can be used to provide accounts in different currencies. + +The class "bank" is the {\em real} implementation of the bank account (it +could have been inlined). This is the one that will be used for further +extensions, refinements, etc. Conversely, the client will only be given the client view. +\begin{caml_example*} +module Euro_account = Account(Euro);; +module Client = Euro_account.Client (Euro_account);; +new Client.account (new Euro.c 100.);; +\end{caml_example*} +Hence, the clients do not have direct access to the "balance", nor the +"history" of their own accounts. Their only way to change their balance is +to deposit or withdraw money. It is important to give the clients +a class and not just the ability to create accounts (such as the +promotional "discount" account), so that they can +personalize their account. +For instance, a client may refine the "deposit" and "withdraw" methods +so as to do his own financial bookkeeping, automatically. On the +other hand, the function "discount" is given as such, with no +possibility for further personalization. + +It is important to provide the client's view as a functor +"Client" so that client accounts can still be built after a possible +specialization of the "bank". +The functor "Client" may remain unchanged and be passed +the new definition to initialize a client's view of the extended account. +\begin{caml_example*} +module Investment_account (M : MONEY) = + struct + type m = M.c + module A = Account(M) + + class bank = + object + inherit A.bank as super + method deposit x = + if (new M.c 1000.)#leq x then + print_string "Would you like to invest?"; + super#deposit x + end + + module Client = A.Client + end;; +\end{caml_example*} +\begin{caml_eval} +module Euro_account = Investment_account (Euro);; +module Client = Euro_account.Client (Euro_account);; +new Client.account (new Euro.c 100.);; +\end{caml_eval} +The functor "Client" may also be redefined when some new features of the +account can be given to the client. +\begin{caml_example*} +module Internet_account (M : MONEY) = + struct + type m = M.c + module A = Account(M) + + class bank = + object + inherit A.bank + method mail s = print_string s + end + + class type client_view = + object + method deposit : m -> unit + method history : m operation list + method withdraw : m -> m + method balance : m + method mail : string -> unit + end + + module Client (B : sig class bank : client_view end) = + struct + class account x : client_view = + object + inherit B.bank + inherit A.check_client x + end + end + end;; +\end{caml_example*} +\begin{caml_eval} +module Euro_account = Internet_account (Euro);; +module Client = Euro_account.Client (Euro_account);; +new Client.account (new Euro.c 100.);; +\end{caml_eval} + + +\section{Simple modules as classes} +\pdfsection{Simple modules as classes} +\label{ss:modules-as-classes} + +One may wonder whether it is possible to treat primitive types such as +integers and strings as objects. Although this is usually uninteresting +for integers or strings, there may be some situations where +this is desirable. The class "money" above is such an example. +We show here how to do it for strings. + +\subsection{Strings} +\label{module:string} + +A naive definition of strings as objects could be: +\begin{caml_example} +class ostring s = + object + method get n = String.get s n + method print = print_string s + method escaped = new ostring (String.escaped s) + end;; +\end{caml_example} +However, the method "escaped" returns an object of the class "ostring", +and not an object of the current class. Hence, if the class is further +extended, the method "escaped" will only return an object of the parent +class. +\begin{caml_example} +class sub_string s = + object + inherit ostring s + method sub start len = new sub_string (String.sub s start len) + end;; +\end{caml_example} +As seen in section \ref{ss:binary-methods}, the solution is to use +functional update instead. We need to create an instance variable +containing the representation "s" of the string. +\begin{caml_example} +class better_string s = + object + val repr = s + method get n = String.get repr n + method print = print_string repr + method escaped = {< repr = String.escaped repr >} + method sub start len = {< repr = String.sub s start len >} + end;; +\end{caml_example} +As shown in the inferred type, the methods "escaped" and "sub" now return +objects of the same type as the one of the class. + +Another difficulty is the implementation of the method "concat". +In order to concatenate a string with another string of the same class, +one must be able to access the instance variable externally. Thus, a method +"repr" returning s must be defined. Here is the correct definition of +strings: +\begin{caml_example} +class ostring s = + object (self : 'mytype) + val repr = s + method repr = repr + method get n = String.get repr n + method print = print_string repr + method escaped = {< repr = String.escaped repr >} + method sub start len = {< repr = String.sub s start len >} + method concat (t : 'mytype) = {< repr = repr ^ t#repr >} + end;; +\end{caml_example} +Another constructor of the class string can be defined to return a new +string of a given length: +\begin{caml_example} +class cstring n = ostring (String.make n ' ');; +\end{caml_example} +Here, exposing the representation of strings is probably harmless. We do +could also hide the representation of strings as we hid the currency in the +class "money" of section~\ref{ss:friends}. + +\subsubsection{Stacks} +\label{module:stack} + +There is sometimes an alternative between using modules or classes for +parametric data types. +Indeed, there are situations when the two approaches are quite similar. +For instance, a stack can be straightforwardly implemented as a class: +\begin{caml_example} +exception Empty;; +class ['a] stack = + object + val mutable l = ([] : 'a list) + method push x = l <- x::l + method pop = match l with [] -> raise Empty | a::l' -> l <- l'; a + method clear = l <- [] + method length = List.length l + end;; +\end{caml_example} +However, writing a method for iterating over a stack is more +problematic. A method "fold" would have type +"('b -> 'a -> 'b) -> 'b -> 'b". Here "'a" is the parameter of the stack. +The parameter "'b" is not related to the class "'a stack" but to the +argument that will be passed to the method "fold". +%The intuition is that method "fold" should be polymorphic, i.e. of type +%"All ('a) ('b -> 'a -> 'b) -> 'b -> 'b". +A naive approach is to make "'b" an extra parameter of class "stack": +\begin{caml_example} +class ['a, 'b] stack2 = + object + inherit ['a] stack + method fold f (x : 'b) = List.fold_left f x l + end;; +\end{caml_example} +However, the method "fold" of a given object can only be +applied to functions that all have the same type: +\begin{caml_example} +let s = new stack2;; +s#fold ( + ) 0;; +s;; +\end{caml_example} +A better solution is to use polymorphic methods, which were +introduced in OCaml version 3.05. Polymorphic methods makes +it possible to treat the type variable "'b" in the type of "fold" as +universally quantified, giving "fold" the polymorphic type +"Forall 'b. ('b -> 'a -> 'b) -> 'b -> 'b". +An explicit type declaration on the method "fold" is required, since +the type checker cannot infer the polymorphic type by itself. +\begin{caml_example} +class ['a] stack3 = + object + inherit ['a] stack + method fold : 'b. ('b -> 'a -> 'b) -> 'b -> 'b + = fun f x -> List.fold_left f x l + end;; +\end{caml_example} + +% However, the nice correspondence between the implementations of stacks as +% modules or classes is a very particular case. + +% XXX Maps + +\subsection{Hashtbl} +\label{module:hashtbl} + +A simplified version of object-oriented hash tables should have the +following class type. +\begin{caml_example} +class type ['a, 'b] hash_table = + object + method find : 'a -> 'b + method add : 'a -> 'b -> unit + end;; +\end{caml_example} +A simple implementation, which is quite reasonable for small hash tables is +to use an association list: +\begin{caml_example} +class ['a, 'b] small_hashtbl : ['a, 'b] hash_table = + object + val mutable table = [] + method find key = List.assoc key table + method add key valeur = table <- (key, valeur) :: table + end;; +\end{caml_example} +A better implementation, and one that scales up better, is to use a +true hash table\ldots\ whose elements are small hash tables! +\begin{caml_example} +class ['a, 'b] hashtbl size : ['a, 'b] hash_table = + object (self) + val table = Array.init size (fun i -> new small_hashtbl) + method private hash key = + (Hashtbl.hash key) mod (Array.length table) + method find key = table.(self#hash key) # find key + method add key = table.(self#hash key) # add key + end;; +\end{caml_example} + +% problem + +% solution + +\subsection{Sets} +\label{module:set} + +Implementing sets leads to another difficulty. Indeed, the method +"union" needs to be able to access the internal representation of +another object of the same class. + +This is another instance of friend functions as seen in section +\ref{ss:friends}. Indeed, this is the same mechanism used in the module +"Set" in the absence of objects. + +In the object-oriented version of sets, we only need to add an additional +method "tag" to return the representation of a set. Since sets are +parametric in the type of elements, the method "tag" has a parametric type +"'a tag", concrete within +the module definition but abstract in its signature. +From outside, it will then be guaranteed that two objects with a method "tag" +of the same type will share the same representation. +\begin{caml_example*} +module type SET = + sig + type 'a tag + class ['a] c : + object ('b) + method is_empty : bool + method mem : 'a -> bool + method add : 'a -> 'b + method union : 'b -> 'b + method iter : ('a -> unit) -> unit + method tag : 'a tag + end + end;; +module Set : SET = + struct + let rec merge l1 l2 = + match l1 with + [] -> l2 + | h1 :: t1 -> + match l2 with + [] -> l1 + | h2 :: t2 -> + if h1 < h2 then h1 :: merge t1 l2 + else if h1 > h2 then h2 :: merge l1 t2 + else merge t1 l2 + type 'a tag = 'a list + class ['a] c = + object (_ : 'b) + val repr = ([] : 'a list) + method is_empty = (repr = []) + method mem x = List.exists (( = ) x) repr + method add x = {< repr = merge [x] repr >} + method union (s : 'b) = {< repr = merge repr s#tag >} + method iter (f : 'a -> unit) = List.iter f repr + method tag = repr + end + end;; +\end{caml_example*} + +\section{The subject/observer pattern} +\pdfsection{The subject/observer pattern} +\label{ss:subject-observer} + +The following example, known as the subject/observer pattern, is often +presented in the literature as a difficult inheritance problem with +inter-connected classes. +The general pattern amounts to the definition a pair of two +classes that recursively interact with one another. + +The class "observer" has a distinguished method "notify" that requires +two arguments, a subject and an event to execute an action. +\begin{caml_example} +class virtual ['subject, 'event] observer = + object + method virtual notify : 'subject -> 'event -> unit + end;; +\end{caml_example} +The class "subject" remembers a list of observers in an instance variable, +and has a distinguished method "notify_observers" to broadcast the message +"notify" to all observers with a particular event "e". +\begin{caml_example} +class ['observer, 'event] subject = + object (self) + val mutable observers = ([]:'observer list) + method add_observer obs = observers <- (obs :: observers) + method notify_observers (e : 'event) = + List.iter (fun x -> x#notify self e) observers + end;; +\end{caml_example} +The difficulty usually lies in defining instances of the pattern above +by inheritance. This can be done in a natural and obvious manner in +OCaml, as shown on the following example manipulating windows. +\begin{caml_example} +type event = Raise | Resize | Move;; +let string_of_event = function + Raise -> "Raise" | Resize -> "Resize" | Move -> "Move";; +let count = ref 0;; +class ['observer] window_subject = + let id = count := succ !count; !count in + object (self) + inherit ['observer, event] subject + val mutable position = 0 + method identity = id + method move x = position <- position + x; self#notify_observers Move + method draw = Printf.printf "{Position = %d}\n" position; + end;; +class ['subject] window_observer = + object + inherit ['subject, event] observer + method notify s e = s#draw + end;; +\end{caml_example} +As can be expected, the type of "window" is recursive. +\begin{caml_example} +let window = new window_subject;; +\end{caml_example} +However, the two classes of "window_subject" and "window_observer" are not +mutually recursive. +\begin{caml_example} +let window_observer = new window_observer;; +window#add_observer window_observer;; +window#move 1;; +\end{caml_example} + +Classes "window_observer" and "window_subject" can still be extended by +inheritance. For instance, one may enrich the "subject" with new +behaviors and refine the behavior of the observer. +\begin{caml_example} +class ['observer] richer_window_subject = + object (self) + inherit ['observer] window_subject + val mutable size = 1 + method resize x = size <- size + x; self#notify_observers Resize + val mutable top = false + method raise = top <- true; self#notify_observers Raise + method draw = Printf.printf "{Position = %d; Size = %d}\n" position size; + end;; +class ['subject] richer_window_observer = + object + inherit ['subject] window_observer as super + method notify s e = if e <> Raise then s#raise; super#notify s e + end;; +\end{caml_example} +We can also create a different kind of observer: +\begin{caml_example} +class ['subject] trace_observer = + object + inherit ['subject, event] observer + method notify s e = + Printf.printf + "<Window %d <== %s>\n" s#identity (string_of_event e) + end;; +\end{caml_example} +and attach several observers to the same object: +\begin{caml_example} +let window = new richer_window_subject;; +window#add_observer (new richer_window_observer);; +window#add_observer (new trace_observer);; +window#move 1; window#resize 2;; +\end{caml_example} + +%\subsection{Classes used as modules with inheritance} +% +% to be filled for next release... +% +% an example of stateless objects used to provide inheritance in modules +% + + +% LocalWords: objectexamples bsection init caml val int Oo succ incr ref +% LocalWords: typecheck leq bool cp eval sig struct ABSPOINT Abspoint iter neg +% LocalWords: accu mem rec repr Euro euro ccp inlined ostring len concat OCaml diff --git a/manual/manual/tutorials/coreexamples.etex b/manual/manual/tutorials/coreexamples.etex new file mode 100644 index 0000000000..fcc60dce95 --- /dev/null +++ b/manual/manual/tutorials/coreexamples.etex @@ -0,0 +1,611 @@ +\chapter{The core language} \label{c:core-xamples} +\pdfchapterfold{-9}{Tutorial: The core language} +%HEVEA\cutname{coreexamples.html} + +This part of the manual is a tutorial introduction to the +OCaml language. A good familiarity with programming in a conventional +languages (say, Pascal or C) is assumed, but no prior exposure to +functional languages is required. The present chapter introduces the +core language. Chapter~\ref{c:moduleexamples} deals with the +module system, chapter~\ref{c:objectexamples} with the +object-oriented features, chapter~\ref{c:labl-examples} with +extensions to the core language (labeled arguments and polymorphic +variants), and chapter~\ref{c:advexamples} gives some advanced examples. + +\section{Basics} +\pdfsection{Basics} + +For this overview of OCaml, we use the interactive system, which +is started by running "ocaml" from the Unix shell, or by launching the +"OCamlwin.exe" application under Windows. This tutorial is presented +as the transcript of a session with the interactive system: +lines starting with "#" represent user input; the system responses are +printed below, without a leading "#". + +Under the interactive system, the user types OCaml phrases terminated +by ";;" in response to the "#" prompt, and the system compiles them +on the fly, executes them, and prints the outcome of evaluation. +Phrases are either simple expressions, or "let" definitions of +identifiers (either values or functions). +\begin{caml_example} +1+2*3;; +let pi = 4.0 *. atan 1.0;; +let square x = x *. x;; +square (sin pi) +. square (cos pi);; +\end{caml_example} +The OCaml system computes both the value and the type for +each phrase. Even function parameters need no explicit type declaration: +the system infers their types from their usage in the +function. Notice also that integers and floating-point numbers are +distinct types, with distinct operators: "+" and "*" operate on +integers, but "+." and "*." operate on floats. +\begin{caml_example} +1.0 * 2;; +\end{caml_example} + +Recursive functions are defined with the "let rec" binding: +\begin{caml_example} +let rec fib n = + if n < 2 then n else fib (n-1) + fib (n-2);; +fib 10;; +\end{caml_example} + +\section{Data types} +\pdfsection{Data types} + +In addition to integers and floating-point numbers, OCaml offers the +usual basic data types: booleans, characters, and immutable character strings. +\begin{caml_example} +(1 < 2) = false;; +'a';; +"Hello world";; +\end{caml_example} + +Predefined data structures include tuples, arrays, and lists. General +mechanisms for defining your own data structures are also provided. +They will be covered in more details later; for now, we concentrate on lists. +Lists are either given in extension as a bracketed list of +semicolon-separated elements, or built from the empty list "[]" +(pronounce ``nil'') by adding elements in front using the "::" +(``cons'') operator. +\begin{caml_example} +let l = ["is"; "a"; "tale"; "told"; "etc."];; +"Life" :: l;; +\end{caml_example} +As with all other OCaml data structures, lists do not need to be +explicitly allocated and deallocated from memory: all memory +management is entirely automatic in OCaml. Similarly, there is no +explicit handling of pointers: the OCaml compiler silently introduces +pointers where necessary. + +As with most OCaml data structures, inspecting and destructuring lists +is performed by pattern-matching. List patterns have the exact same +shape as list expressions, with identifier representing unspecified +parts of the list. As an example, here is insertion sort on a list: +\begin{caml_example} +let rec sort lst = + match lst with + [] -> [] + | head :: tail -> insert head (sort tail) +and insert elt lst = + match lst with + [] -> [elt] + | head :: tail -> if elt <= head then elt :: lst else head :: insert elt tail +;; +sort l;; +\end{caml_example} + +The type inferred for "sort", "'a list -> 'a list", means that "sort" +can actually apply to lists of any type, and returns a list of the +same type. The type "'a" is a {\em type variable}, and stands for any +given type. The reason why "sort" can apply to lists of any type is +that the comparisons ("=", "<=", etc.) are {\em polymorphic} in OCaml: +they operate between any two values of the same type. This makes +"sort" itself polymorphic over all list types. +\begin{caml_example} +sort [6;2;5;3];; +sort [3.14; 2.718];; +\end{caml_example} + +The "sort" function above does not modify its input list: it builds +and returns a new list containing the same elements as the input list, +in ascending order. There is actually no way in OCaml to modify +in-place a list once it is built: we say that lists are {\em immutable} +data structures. Most OCaml data structures are immutable, but a few +(most notably arrays) are {\em mutable}, meaning that they can be +modified in-place at any time. + +\section{Functions as values} +\pdfsection{Functions as values} + +OCaml is a functional language: functions in the full mathematical +sense are supported and can be passed around freely just as any other +piece of data. For instance, here is a "deriv" function that takes any +float function as argument and returns an approximation of its +derivative function: +\begin{caml_example} +let deriv f dx = function x -> (f (x +. dx) -. f x) /. dx;; +let sin' = deriv sin 1e-6;; +sin' pi;; +\end{caml_example} +Even function composition is definable: +\begin{caml_example} +let compose f g = function x -> f (g x);; +let cos2 = compose square cos;; +\end{caml_example} + +Functions that take other functions as arguments are called +``functionals'', or ``higher-order functions''. Functionals are +especially useful to provide iterators or similar generic operations +over a data structure. For instance, the standard OCaml library +provides a "List.map" functional that applies a given function to each +element of a list, and returns the list of the results: +\begin{caml_example} +List.map (function n -> n * 2 + 1) [0;1;2;3;4];; +\end{caml_example} +This functional, along with a number of other list and array +functionals, is predefined because it is often useful, but there is +nothing magic with it: it can easily be defined as follows. +\begin{caml_example} +let rec map f l = + match l with + [] -> [] + | hd :: tl -> f hd :: map f tl;; +\end{caml_example} + +\section{Records and variants} +\pdfsection{Records and variants} +\label{s:tut-recvariants} + +User-defined data structures include records and variants. Both are +defined with the "type" declaration. Here, we declare a record type to +represent rational numbers. +\begin{caml_example} +type ratio = {num: int; denom: int};; +let add_ratio r1 r2 = + {num = r1.num * r2.denom + r2.num * r1.denom; + denom = r1.denom * r2.denom};; +add_ratio {num=1; denom=3} {num=2; denom=5};; +\end{caml_example} + +The declaration of a variant type lists all possible shapes for values +of that type. Each case is identified by a name, called a constructor, +which serves both for constructing values of the variant type and +inspecting them by pattern-matching. Constructor names are capitalized +to distinguish them from variable names (which must start with a +lowercase letter). For instance, here is a variant +type for doing mixed arithmetic (integers and floats): +\begin{caml_example} +type number = Int of int | Float of float | Error;; +\end{caml_example} +This declaration expresses that a value of type "number" is either an +integer, a floating-point number, or the constant "Error" representing +the result of an invalid operation (e.g. a division by zero). + +Enumerated types are a special case of variant types, where all +alternatives are constants: +\begin{caml_example} +type sign = Positive | Negative;; +let sign_int n = if n >= 0 then Positive else Negative;; +\end{caml_example} + +To define arithmetic operations for the "number" type, we use +pattern-matching on the two numbers involved: +\begin{caml_example} +let add_num n1 n2 = + match (n1, n2) with + (Int i1, Int i2) -> + (* Check for overflow of integer addition *) + if sign_int i1 = sign_int i2 && sign_int (i1 + i2) <> sign_int i1 + then Float(float i1 +. float i2) + else Int(i1 + i2) + | (Int i1, Float f2) -> Float(float i1 +. f2) + | (Float f1, Int i2) -> Float(f1 +. float i2) + | (Float f1, Float f2) -> Float(f1 +. f2) + | (Error, _) -> Error + | (_, Error) -> Error;; +add_num (Int 123) (Float 3.14159);; +\end{caml_example} + +The most common usage of variant types is to describe recursive data +structures. Consider for example the type of binary trees: +\begin{caml_example} +type 'a btree = Empty | Node of 'a * 'a btree * 'a btree;; +\end{caml_example} +This definition reads as follow: a binary tree containing values of +type "'a" (an arbitrary type) is either empty, or is a node containing +one value of type "'a" and two subtrees containing also values of type +"'a", that is, two "'a btree". + +Operations on binary trees are naturally expressed as recursive functions +following the same structure as the type definition itself. For +instance, here are functions performing lookup and insertion in +ordered binary trees (elements increase from left to right): +\begin{caml_example} +let rec member x btree = + match btree with + Empty -> false + | Node(y, left, right) -> + if x = y then true else + if x < y then member x left else member x right;; +let rec insert x btree = + match btree with + Empty -> Node(x, Empty, Empty) + | Node(y, left, right) -> + if x <= y then Node(y, insert x left, right) + else Node(y, left, insert x right);; +\end{caml_example} + +\section{Imperative features} +\pdfsection{Imperative features} + +Though all examples so far were written in purely applicative style, +OCaml is also equipped with full imperative features. This includes the +usual "while" and "for" loops, as well as mutable data structures such +as arrays. Arrays are either given in extension between "[|" and "|]" +brackets, or allocated and initialized with the "Array.make" +function, then filled up later by assignments. For instance, the +function below sums two vectors (represented as float arrays) componentwise. +\begin{caml_example} +let add_vect v1 v2 = + let len = min (Array.length v1) (Array.length v2) in + let res = Array.make len 0.0 in + for i = 0 to len - 1 do + res.(i) <- v1.(i) +. v2.(i) + done; + res;; +add_vect [| 1.0; 2.0 |] [| 3.0; 4.0 |];; +\end{caml_example} + +Record fields can also be modified by assignment, provided they are +declared "mutable" in the definition of the record type: +\begin{caml_example} +type mutable_point = { mutable x: float; mutable y: float };; +let translate p dx dy = + p.x <- p.x +. dx; p.y <- p.y +. dy;; +let mypoint = { x = 0.0; y = 0.0 };; +translate mypoint 1.0 2.0;; +mypoint;; +\end{caml_example} + +OCaml has no built-in notion of variable -- identifiers whose current +value can be changed by assignment. (The "let" binding is not an +assignment, it introduces a new identifier with a new scope.) +However, the standard library provides references, which are mutable +indirection cells (or one-element arrays), with operators "!" to fetch +the current contents of the reference and ":=" to assign the contents. +Variables can then be emulated by "let"-binding a reference. For +instance, here is an in-place insertion sort over arrays: +\begin{caml_example} +let insertion_sort a = + for i = 1 to Array.length a - 1 do + let val_i = a.(i) in + let j = ref i in + while !j > 0 && val_i < a.(!j - 1) do + a.(!j) <- a.(!j - 1); + j := !j - 1 + done; + a.(!j) <- val_i + done;; +\end{caml_example} + +References are also useful to write functions that maintain a current +state between two calls to the function. For instance, the following +pseudo-random number generator keeps the last returned number in a +reference: +\begin{caml_example} +let current_rand = ref 0;; +let random () = + current_rand := !current_rand * 25713 + 1345; + !current_rand;; +\end{caml_example} + +Again, there is nothing magical with references: they are implemented as +a single-field mutable record, as follows. +\begin{caml_example} +type 'a ref = { mutable contents: 'a };; +let ( ! ) r = r.contents;; +let ( := ) r newval = r.contents <- newval;; +\end{caml_example} + +In some special cases, you may need to store a polymorphic function in +a data structure, keeping its polymorphism. Without user-provided +type annotations, this is not allowed, as polymorphism is only +introduced on a global level. However, you can give explicitly +polymorphic types to record fields. +\begin{caml_example} +type idref = { mutable id: 'a. 'a -> 'a };; +let r = {id = fun x -> x};; +let g s = (s.id 1, s.id true);; +r.id <- (fun x -> print_string "called id\n"; x);; +g r;; +\end{caml_example} + +\section{Exceptions} +\pdfsection{Exceptions} + +OCaml provides exceptions for signalling and handling exceptional +conditions. Exceptions can also be used as a general-purpose non-local +control structure. Exceptions are declared with the "exception" +construct, and signalled with the "raise" operator. For instance, the +function below for taking the head of a list uses an exception to +signal the case where an empty list is given. +\begin{caml_example} +exception Empty_list;; +let head l = + match l with + [] -> raise Empty_list + | hd :: tl -> hd;; +head [1;2];; +head [];; +\end{caml_example} + +Exceptions are used throughout the standard library to signal cases +where the library functions cannot complete normally. For instance, +the "List.assoc" function, which returns the data associated with a +given key in a list of (key, data) pairs, raises the predefined +exception "Not_found" when the key does not appear in the list: +\begin{caml_example} +List.assoc 1 [(0, "zero"); (1, "one")];; +List.assoc 2 [(0, "zero"); (1, "one")];; +\end{caml_example} + +Exceptions can be trapped with the "try"\ldots"with" construct: +\begin{caml_example} +let name_of_binary_digit digit = + try + List.assoc digit [0, "zero"; 1, "one"] + with Not_found -> + "not a binary digit";; +name_of_binary_digit 0;; +name_of_binary_digit (-1);; +\end{caml_example} + +The "with" part is actually a regular pattern-matching on the +exception value. Thus, several exceptions can be caught by one +"try"\ldots"with" construct. Also, finalization can be performed by +trapping all exceptions, performing the finalization, then raising +again the exception: +\begin{caml_example} +let temporarily_set_reference ref newval funct = + let oldval = !ref in + try + ref := newval; + let res = funct () in + ref := oldval; + res + with x -> + ref := oldval; + raise x;; +\end{caml_example} + +\section{Symbolic processing of expressions} +\pdfsection{Symbolic processing of expressions} + +We finish this introduction with a more complete example +representative of the use of OCaml for symbolic processing: formal +manipulations of arithmetic expressions containing variables. The +following variant type describes the expressions we shall manipulate: +\begin{caml_example} +type expression = + Const of float + | Var of string + | Sum of expression * expression (* e1 + e2 *) + | Diff of expression * expression (* e1 - e2 *) + | Prod of expression * expression (* e1 * e2 *) + | Quot of expression * expression (* e1 / e2 *) +;; +\end{caml_example} + +We first define a function to evaluate an expression given an +environment that maps variable names to their values. For simplicity, +the environment is represented as an association list. +\begin{caml_example} +exception Unbound_variable of string;; +let rec eval env exp = + match exp with + Const c -> c + | Var v -> + (try List.assoc v env with Not_found -> raise (Unbound_variable v)) + | Sum(f, g) -> eval env f +. eval env g + | Diff(f, g) -> eval env f -. eval env g + | Prod(f, g) -> eval env f *. eval env g + | Quot(f, g) -> eval env f /. eval env g;; +eval [("x", 1.0); ("y", 3.14)] (Prod(Sum(Var "x", Const 2.0), Var "y"));; +\end{caml_example} + +Now for a real symbolic processing, we define the derivative of an +expression with respect to a variable "dv": +\begin{caml_example} +let rec deriv exp dv = + match exp with + Const c -> Const 0.0 + | Var v -> if v = dv then Const 1.0 else Const 0.0 + | Sum(f, g) -> Sum(deriv f dv, deriv g dv) + | Diff(f, g) -> Diff(deriv f dv, deriv g dv) + | Prod(f, g) -> Sum(Prod(f, deriv g dv), Prod(deriv f dv, g)) + | Quot(f, g) -> Quot(Diff(Prod(deriv f dv, g), Prod(f, deriv g dv)), + Prod(g, g)) +;; +deriv (Quot(Const 1.0, Var "x")) "x";; +\end{caml_example} + +\section{Pretty-printing and parsing} +\pdfsection{Pretty-printing and parsing} + +As shown in the examples above, the internal representation (also +called {\em abstract syntax\/}) of expressions quickly becomes hard to +read and write as the expressions get larger. We need a printer and a +parser to go back and forth between the abstract syntax and the {\em +concrete syntax}, which in the case of expressions is the familiar +algebraic notation (e.g. "2*x+1"). + +For the printing function, we take into account the usual precedence +rules (i.e. "*" binds tighter than "+") to avoid printing unnecessary +parentheses. To this end, we maintain the current operator precedence +and print parentheses around an operator only if its precedence is +less than the current precedence. +\begin{caml_example} +let print_expr exp = + (* Local function definitions *) + let open_paren prec op_prec = + if prec > op_prec then print_string "(" in + let close_paren prec op_prec = + if prec > op_prec then print_string ")" in + let rec print prec exp = (* prec is the current precedence *) + match exp with + Const c -> print_float c + | Var v -> print_string v + | Sum(f, g) -> + open_paren prec 0; + print 0 f; print_string " + "; print 0 g; + close_paren prec 0 + | Diff(f, g) -> + open_paren prec 0; + print 0 f; print_string " - "; print 1 g; + close_paren prec 0 + | Prod(f, g) -> + open_paren prec 2; + print 2 f; print_string " * "; print 2 g; + close_paren prec 2 + | Quot(f, g) -> + open_paren prec 2; + print 2 f; print_string " / "; print 3 g; + close_paren prec 2 + in print 0 exp;; +let e = Sum(Prod(Const 2.0, Var "x"), Const 1.0);; +print_expr e; print_newline ();; +print_expr (deriv e "x"); print_newline ();; +\end{caml_example} + +%%%%%%%%%%% Should be moved to the camlp4 documentation. +%% Parsing (transforming concrete syntax into abstract syntax) is usually +%% more delicate. OCaml offers several tools to help write parsers: +%% on the one hand, OCaml versions of the lexer generator Lex and the +%% parser generator Yacc (see chapter~\ref{c:ocamlyacc}), which handle +%% LALR(1) languages using push-down automata; on the other hand, a +%% predefined type of streams (of characters or tokens) and +%% pattern-matching over streams, which facilitate the writing of +%% recursive-descent parsers for LL(1) languages. An example using +%% "ocamllex" and "ocamlyacc" is given in +%% chapter~\ref{c:ocamlyacc}. Here, we will use stream parsers. +%% The syntactic support for stream parsers is provided by the Camlp4 +%% preprocessor, which can be loaded into the interactive toplevel via +%% the "#load" directives below. +%% +%% \begin{caml_example} +%% #load "dynlink.cma";; +%% #load "camlp4o.cma";; +%% open Genlex;; +%% let lexer = make_lexer ["("; ")"; "+"; "-"; "*"; "/"];; +%% \end{caml_example} +%% For the lexical analysis phase (transformation of the input text into +%% a stream of tokens), we use a ``generic'' lexer provided in the +%% standard library module "Genlex". The "make_lexer" function takes a +%% list of keywords and returns a lexing function that ``tokenizes'' an +%% input stream of characters. Tokens are either identifiers, keywords, +%% or literals (integer, floats, characters, strings). Whitespace and +%% comments are skipped. +%% \begin{caml_example} +%% let token_stream = lexer (Stream.of_string "1.0 +x");; +%% Stream.next token_stream;; +%% Stream.next token_stream;; +%% Stream.next token_stream;; +%% \end{caml_example} +%% +%% The parser itself operates by pattern-matching on the stream of +%% tokens. As usual with recursive descent parsers, we use several +%% intermediate parsing functions to reflect the precedence and +%% associativity of operators. Pattern-matching over streams is more +%% powerful than on regular data structures, as it allows recursive calls +%% to parsing functions inside the patterns, for matching sub-components of +%% the input stream. See the Camlp4 documentation for more details. +%% +%% %Already said above +%% %In order to use stream parsers at toplevel, we must first load the +%% %"camlp4" preprocessor. +%% %\begin{caml_example} +%% %#load"camlp4o.cma";; +%% %\end{caml_example} +%% %Then we are ready to define our parser. +%% \begin{caml_example} +%% let rec parse_expr = parser +%% [< e1 = parse_mult; e = parse_more_adds e1 >] -> e +%% and parse_more_adds e1 = parser +%% [< 'Kwd "+"; e2 = parse_mult; e = parse_more_adds (Sum(e1, e2)) >] -> e +%% | [< 'Kwd "-"; e2 = parse_mult; e = parse_more_adds (Diff(e1, e2)) >] -> e +%% | [< >] -> e1 +%% and parse_mult = parser +%% [< e1 = parse_simple; e = parse_more_mults e1 >] -> e +%% and parse_more_mults e1 = parser +%% [< 'Kwd "*"; e2 = parse_simple; e = parse_more_mults (Prod(e1, e2)) >] -> e +%% | [< 'Kwd "/"; e2 = parse_simple; e = parse_more_mults (Quot(e1, e2)) >] -> e +%% | [< >] -> e1 +%% and parse_simple = parser +%% [< 'Ident s >] -> Var s +%% | [< 'Int i >] -> Const(float i) +%% | [< 'Float f >] -> Const f +%% | [< 'Kwd "("; e = parse_expr; 'Kwd ")" >] -> e;; +%% let parse_expression = parser [< e = parse_expr; _ = Stream.empty >] -> e;; +%% \end{caml_example} +%% +%% Composing the lexer and parser, we finally obtain a function to read +%% an expression from a character string: +%% \begin{caml_example} +%% let read_expression s = parse_expression (lexer (Stream.of_string s));; +%% read_expression "2*(x+y)";; +%% \end{caml_example} +%% A small puzzle: why do we get different results in the following two +%% examples? +%% \begin{caml_example} +%% read_expression "x - 1";; +%% read_expression "x-1";; +%% \end{caml_example} +%% Answer: the generic lexer provided by "Genlex" recognizes negative +%% integer literals as one integer token. Hence, "x-1" is read as +%% the token "Ident \"x\"" followed by the token "Int(-1)"; this sequence +%% does not match any of the parser rules. On the other hand, +%% the second space in "x - 1" causes the lexer to return the three +%% expected tokens: "Ident \"x\"", then "Kwd \"-\"", then "Int(1)". + +\section{Standalone OCaml programs} +\pdfsection{Standalone OCaml programs} + +All examples given so far were executed under the interactive system. +OCaml code can also be compiled separately and executed +non-interactively using the batch compilers "ocamlc" and "ocamlopt". +The source code must be put in a file with extension ".ml". It +consists of a sequence of phrases, which will be evaluated at runtime +in their order of appearance in the source file. Unlike in interactive +mode, types and values are not printed automatically; the program must +call printing functions explicitly to produce some output. Here is a +sample standalone program to print Fibonacci numbers: +\begin{verbatim} +(* File fib.ml *) +let rec fib n = + if n < 2 then 1 else fib (n-1) + fib (n-2);; +let main () = + let arg = int_of_string Sys.argv.(1) in + print_int (fib arg); + print_newline (); + exit 0;; +main ();; +\end{verbatim} +"Sys.argv" is an array of strings containing the command-line +parameters. "Sys.argv.(1)" is thus the first command-line parameter. +The program above is compiled and executed with the following shell +commands: +\begin{verbatim} +$ ocamlc -o fib fib.ml +$ ./fib 10 +89 +$ ./fib 20 +10946 +\end{verbatim} + +More complex standalone OCaml programs are typically composed of +multiple source files, and can link with precompiled libraries. +Chapters~\ref{c:camlc} and~\ref{c:nativecomp} explain how to use the +batch compilers "ocamlc" and "ocamlopt". Recompilation of +multi-file OCaml projects can be automated using the "ocamlbuild" +compilation manager, documented in chapter~\ref{c:ocamlbuild}. diff --git a/manual/manual/tutorials/lablexamples.etex b/manual/manual/tutorials/lablexamples.etex new file mode 100644 index 0000000000..8cf2c7a3ad --- /dev/null +++ b/manual/manual/tutorials/lablexamples.etex @@ -0,0 +1,489 @@ +\chapter{Labels and variants} \label{c:labl-examples} +\pdfchapterfold{-2}{Tutorial: Labels and variants} +%HEVEA\cutname{lablexamples.html} +{\it (Chapter written by Jacques Garrigue)} + +\bigskip + +\noindent This chapter gives an overview of the new features in +OCaml 3: labels, and polymorphic variants. + +\section{Labels} +\pdfsection{Labels} + +If you have a look at modules ending in "Labels" in the standard +library, you will see that function types have annotations you did not +have in the functions you defined yourself. + +\begin{caml_example} +ListLabels.map;; +StringLabels.sub;; +\end{caml_example} + +Such annotations of the form "name:" are called {\em labels}. They are +meant to document the code, allow more checking, and give more +flexibility to function application. +You can give such names to arguments in your programs, by prefixing them +with a tilde "~". + +\begin{caml_example} +let f ~x ~y = x - y;; +let x = 3 and y = 2 in f ~x ~y;; +\end{caml_example} + +When you want to use distinct names for the variable and the label +appearing in the type, you can use a naming label of the form +"~name:". This also applies when the argument is not a variable. + +\begin{caml_example} +let f ~x:x1 ~y:y1 = x1 - y1;; +f ~x:3 ~y:2;; +\end{caml_example} + +Labels obey the same rules as other identifiers in OCaml, that is you +cannot use a reserved keyword (like "in" or "to") as label. + +Formal parameters and arguments are matched according to their +respective labels\footnote{This correspond to the commuting label mode +of Objective Caml 3.00 through 3.02, with some additional flexibility +on total applications. The so-called classic mode ("-nolabels" +options) is now deprecated for normal use.}, the absence of label +being interpreted as the empty label. +% +This allows commuting arguments in applications. One can also +partially apply a function on any argument, creating a new function of +the remaining parameters. + +\begin{caml_example} +let f ~x ~y = x - y;; +f ~y:2 ~x:3;; +ListLabels.fold_left;; +ListLabels.fold_left [1;2;3] ~init:0 ~f:( + );; +ListLabels.fold_left ~init:0;; +\end{caml_example} + +If several arguments of a function bear the same label (or no label), +they will not commute among themselves, and order matters. But they +can still commute with other arguments. + +\begin{caml_example} +let hline ~x:x1 ~x:x2 ~y = (x1, x2, y);; +hline ~x:3 ~y:2 ~x:5;; +\end{caml_example} + +As an exception to the above parameter matching rules, if an +application is total (omitting all optional arguments), labels may be +omitted. +In practice, many applications are total, so that labels can often be +omitted. +\begin{caml_example} +f 3 2;; +ListLabels.map succ [1;2;3];; +\end{caml_example} +But beware that functions like "ListLabels.fold_left" whose result +type is a type variable will never be considered as totally applied. +\begin{caml_example} +ListLabels.fold_left ( + ) 0 [1;2;3];; +\end{caml_example} + +When a function is passed as an argument to a higher-order function, +labels must match in both types. Neither adding nor removing labels +are allowed. +\begin{caml_example} +let h g = g ~x:3 ~y:2;; +h f;; +h ( + );; +\end{caml_example} +Note that when you don't need an argument, you can still use a wildcard +pattern, but you must prefix it with the label. +\begin{caml_example} +h (fun ~x:_ ~y -> y+1);; +\end{caml_example} + +\subsection{Optional arguments} + +An interesting feature of labeled arguments is that they can be made +optional. For optional parameters, the question mark "?" replaces the +tilde "~" of non-optional ones, and the label is also prefixed by "?" +in the function type. +Default values may be given for such optional parameters. + +\begin{caml_example} +let bump ?(step = 1) x = x + step;; +bump 2;; +bump ~step:3 2;; +\end{caml_example} + +A function taking some optional arguments must also take at least one +non-optional argument. The criterion for deciding whether an optional +argument has been omitted is the non-labeled application of an +argument appearing after this optional argument in the function type. +Note that if that argument is labeled, you will only be able to +eliminate optional arguments through the special case for total +applications. + +\begin{caml_example} +let test ?(x = 0) ?(y = 0) () ?(z = 0) () = (x, y, z);; +test ();; +test ~x:2 () ~z:3 ();; +\end{caml_example} + +Optional parameters may also commute with non-optional or unlabeled +ones, as long as they are applied simultaneously. By nature, optional +arguments do not commute with unlabeled arguments applied +independently. +\begin{caml_example} +test ~y:2 ~x:3 () ();; +test () () ~z:1 ~y:2 ~x:3;; +(test () ()) ~z:1;; +\end{caml_example} +Here "(test () ())" is already "(0,0,0)" and cannot be further +applied. + +Optional arguments are actually implemented as option types. If +you do not give a default value, you have access to their internal +representation, "type 'a option = None | Some of 'a". You can then +provide different behaviors when an argument is present or not. + +\begin{caml_example} +let bump ?step x = + match step with + | None -> x * 2 + | Some y -> x + y +;; +\end{caml_example} + +It may also be useful to relay an optional argument from a function +call to another. This can be done by prefixing the applied argument +with "?". This question mark disables the wrapping of optional +argument in an option type. + +\begin{caml_example} +let test2 ?x ?y () = test ?x ?y () ();; +test2 ?x:None;; +\end{caml_example} + +\subsection{Labels and type inference} +\label{ss:label-inference} + +While they provide an increased comfort for writing function +applications, labels and optional arguments have the pitfall that they +cannot be inferred as completely as the rest of the language. + +You can see it in the following two examples. +\begin{caml_example} +let h' g = g ~y:2 ~x:3;; +h' f;; +let bump_it bump x = + bump ~step:2 x;; +bump_it bump 1;; +\end{caml_example} +The first case is simple: "g" is passed "~y" and then "~x", but "f" +expects "~x" and then "~y". This is correctly handled if we know the +type of "g" to be "x:int -> y:int -> int" in advance, but otherwise +this causes the above type clash. The simplest workaround is to apply +formal parameters in a standard order. + +The second example is more subtle: while we intended the argument +"bump" to be of type "?step:int -> int -> int", it is inferred as +"step:int -> int -> 'a". +% +These two types being incompatible (internally normal and optional +arguments are different), a type error occurs when applying "bump_it" +to the real "bump". + +We will not try here to explain in detail how type inference works. +One must just understand that there is not enough information in the +above program to deduce the correct type of "g" or "bump". That is, +there is no way to know whether an argument is optional or not, or +which is the correct order, by looking only at how a function is +applied. The strategy used by the compiler is to assume that there are +no optional arguments, and that applications are done in the right +order. + +The right way to solve this problem for optional parameters is to add +a type annotation to the argument "bump". +\begin{caml_example} +let bump_it (bump : ?step:int -> int -> int) x = + bump ~step:2 x;; +bump_it bump 1;; +\end{caml_example} +In practice, such problems appear mostly when using objects whose +methods have optional arguments, so that writing the type of object +arguments is often a good idea. + +Normally the compiler generates a type error if you attempt to pass to +a function a parameter whose type is different from the expected one. +However, in the specific case where the expected type is a non-labeled +function type, and the argument is a function expecting optional +parameters, the compiler will attempt to transform the argument to +have it match the expected type, by passing "None" for all optional +parameters. + +\begin{caml_example} +let twice f (x : int) = f(f x);; +twice bump 2;; +\end{caml_example} + +This transformation is coherent with the intended semantics, +including side-effects. That is, if the application of optional +parameters shall produce side-effects, these are delayed until the +received function is really applied to an argument. + +\subsection{Suggestions for labeling} + +Like for names, choosing labels for functions is not an easy task. A +good labeling is a labeling which + +\begin{itemize} +\item makes programs more readable, +\item is easy to remember, +\item when possible, allows useful partial applications. +\end{itemize} + +We explain here the rules we applied when labeling OCaml +libraries. + +To speak in an ``object-oriented'' way, one can consider that each +function has a main argument, its {\em object}, and other arguments +related with its action, the {\em parameters}. To permit the +combination of functions through functionals in commuting label mode, the +object will not be labeled. Its role is clear from the function +itself. The parameters are labeled with names reminding of +their nature or their role. The best labels combine nature and +role. When this is not possible the role is to be preferred, since the +nature will +often be given by the type itself. Obscure abbreviations should be +avoided. +\begin{alltt} +"ListLabels.map : f:('a -> 'b) -> 'a list -> 'b list" +UnixLabels.write : file_descr -> buf:bytes -> pos:int -> len:int -> unit +\end{alltt} + +When there are several objects of same nature and role, they are all +left unlabeled. +\begin{alltt} +"ListLabels.iter2 : f:('a -> 'b -> 'c) -> 'a list -> 'b list -> unit" +\end{alltt} + +When there is no preferable object, all arguments are labeled. +\begin{alltt} +BytesLabels.blit : + src:bytes -> src_pos:int -> dst:bytes -> dst_pos:int -> len:int -> unit +\end{alltt} + +However, when there is only one argument, it is often left unlabeled. +\begin{alltt} +BytesLabels.create : int -> bytes +\end{alltt} +This principle also applies to functions of several arguments whose +return type is a type variable, as long as the role of each argument +is not ambiguous. Labeling such functions may lead to awkward error +messages when one attempts to omit labels in an application, as we +have seen with "ListLabels.fold_left". + +Here are some of the label names you will find throughout the +libraries. + +\begin{tableau}{|l|l|}{Label}{Meaning} +\entree{"f:"}{a function to be applied} +\entree{"pos:"}{a position in a string, array or byte sequence} +\entree{"len:"}{a length} +\entree{"buf:"}{a byte sequence or string used as buffer} +\entree{"src:"}{the source of an operation} +\entree{"dst:"}{the destination of an operation} +\entree{"init:"}{the initial value for an iterator} +\entree{"cmp:"}{a comparison function, {\it e.g.} "Pervasives.compare"} +\entree{"mode:"}{an operation mode or a flag list} +\end{tableau} + +All these are only suggestions, but keep in mind that the +choice of labels is essential for readability. Bizarre choices will +make the program harder to maintain. + +In the ideal, the right function name with right labels should be +enough to understand the function's meaning. Since one can get this +information with OCamlBrowser or the "ocaml" toplevel, the documentation +is only used when a more detailed specification is needed. + +\begin{caml_eval} +#label false;; +\end{caml_eval} + + +\section{Polymorphic variants} +\pdfsection{Polymorphic variants} + +Variants as presented in section~\ref{s:tut-recvariants} are a +powerful tool to build data structures and algorithms. However they +sometimes lack flexibility when used in modular programming. This is +due to the fact every constructor reserves a name to be used with a +unique type. One cannot use the same name in another type, or consider +a value of some type to belong to some other type with more +constructors. + +With polymorphic variants, this original assumption is removed. That +is, a variant tag does not belong to any type in particular, the type +system will just check that it is an admissible value according to its +use. You need not define a type before using a variant tag. A variant +type will be inferred independently for each of its uses. + +\subsection*{Basic use} + +In programs, polymorphic variants work like usual ones. You just have +to prefix their names with a backquote character "`". +\begin{caml_example} +[`On; `Off];; +`Number 1;; +let f = function `On -> 1 | `Off -> 0 | `Number n -> n;; +List.map f [`On; `Off];; +\end{caml_example} +"[>`Off|`On] list" means that to match this list, you should at +least be able to match "`Off" and "`On", without argument. +"[<`On|`Off|`Number of int]" means that "f" may be applied to "`Off", +"`On" (both without argument), or "`Number" $n$ where +$n$ is an integer. +The ">" and "<" inside the variant types show that they may still be +refined, either by defining more tags or by allowing less. As such, they +contain an implicit type variable. Because each of the variant types +appears only once in the whole type, their implicit type variables are +not shown. + +The above variant types were polymorphic, allowing further refinement. +When writing type annotations, one will most often describe fixed +variant types, that is types that cannot be refined. This is +also the case for type abbreviations. Such types do not contain "<" or +">", but just an enumeration of the tags and their associated types, +just like in a normal datatype definition. +\begin{caml_example} +type 'a vlist = [`Nil | `Cons of 'a * 'a vlist];; +let rec map f : 'a vlist -> 'b vlist = function + | `Nil -> `Nil + | `Cons(a, l) -> `Cons(f a, map f l) +;; +\end{caml_example} + +\subsection*{Advanced use} + +Type-checking polymorphic variants is a subtle thing, and some +expressions may result in more complex type information. + +\begin{caml_example} +let f = function `A -> `C | `B -> `D | x -> x;; +f `E;; +\end{caml_example} +Here we are seeing two phenomena. First, since this matching is open +(the last case catches any tag), we obtain the type "[> `A | `B]" +rather than "[< `A | `B]" in a closed matching. Then, since "x" is +returned as is, input and return types are identical. The notation "as +'a" denotes such type sharing. If we apply "f" to yet another tag +"`E", it gets added to the list. + +\begin{caml_example} +let f1 = function `A x -> x = 1 | `B -> true | `C -> false +let f2 = function `A x -> x = "a" | `B -> true ;; +let f x = f1 x && f2 x;; +\end{caml_example} +Here "f1" and "f2" both accept the variant tags "`A" and "`B", but the +argument of "`A" is "int" for "f1" and "string" for "f2". In "f"'s +type "`C", only accepted by "f1", disappears, but both argument types +appear for "`A" as "int & string". This means that if we +pass the variant tag "`A" to "f", its argument should be {\em both} +"int" and "string". Since there is no such value, "f" cannot be +applied to "`A", and "`B" is the only accepted input. + +Even if a value has a fixed variant type, one can still give it a +larger type through coercions. Coercions are normally written with +both the source type and the destination type, but in simple cases the +source type may be omitted. +\begin{caml_example} +type 'a wlist = [`Nil | `Cons of 'a * 'a wlist | `Snoc of 'a wlist * 'a];; +let wlist_of_vlist l = (l : 'a vlist :> 'a wlist);; +let open_vlist l = (l : 'a vlist :> [> 'a vlist]);; +fun x -> (x :> [`A|`B|`C]);; +\end{caml_example} + +You may also selectively coerce values through pattern matching. +\begin{caml_example} +let split_cases = function + | `Nil | `Cons _ as x -> `A x + | `Snoc _ as x -> `B x +;; +\end{caml_example} +When an or-pattern composed of variant tags is wrapped inside an +alias-pattern, the alias is given a type containing only the tags +enumerated in the or-pattern. This allows for many useful idioms, like +incremental definition of functions. + +\begin{caml_example} +let num x = `Num x +let eval1 eval (`Num x) = x +let rec eval x = eval1 eval x ;; +let plus x y = `Plus(x,y) +let eval2 eval = function + | `Plus(x,y) -> eval x + eval y + | `Num _ as x -> eval1 eval x +let rec eval x = eval2 eval x ;; +\end{caml_example} + +To make this even more comfortable, you may use type definitions as +abbreviations for or-patterns. That is, if you have defined "type +myvariant = [`Tag1 of int | `Tag2 of bool]", then the pattern "#myvariant" is +equivalent to writing "(`Tag1(_ : int) | `Tag2(_ : bool))". +\begin{caml_eval} +type myvariant = [`Tag1 of int | `Tag2 of bool];; +\end{caml_eval} + +Such abbreviations may be used alone, +\begin{caml_example} +let f = function + | #myvariant -> "myvariant" + | `Tag3 -> "Tag3";; +\end{caml_example} +or combined with with aliases. +\begin{caml_example} +let g1 = function `Tag1 _ -> "Tag1" | `Tag2 _ -> "Tag2";; +let g = function + | #myvariant as x -> g1 x + | `Tag3 -> "Tag3";; +\end{caml_example} + +\subsection{Weaknesses of polymorphic variants} + +After seeing the power of polymorphic variants, one may wonder why +they were added to core language variants, rather than replacing them. + +The answer is twofold. One first aspect is that while being pretty +efficient, the lack of static type information allows for less +optimizations, and makes polymorphic variants slightly heavier than +core language ones. However noticeable differences would only +appear on huge data structures. + +More important is the fact that polymorphic variants, while being +type-safe, result in a weaker type discipline. That is, core language +variants do actually much more than ensuring type-safety, they also +check that you use only declared constructors, that all constructors +present in a data-structure are compatible, and they enforce typing +constraints to their parameters. + +For this reason, you must be more careful about making types explicit +when you use polymorphic variants. When you write a library, this is +easy since you can describe exact types in interfaces, but for simple +programs you are probably better off with core language variants. + +Beware also that some idioms make trivial errors very hard to find. +For instance, the following code is probably wrong but the compiler +has no way to see it. +\begin{caml_example} +type abc = [`A | `B | `C] ;; +let f = function + | `As -> "A" + | #abc -> "other" ;; +let f : abc -> string = f ;; +\end{caml_example} +You can avoid such risks by annotating the definition itself. +\begin{caml_example} +let f : abc -> string = function + | `As -> "A" + | #abc -> "other" ;; +\end{caml_example} diff --git a/manual/manual/tutorials/moduleexamples.etex b/manual/manual/tutorials/moduleexamples.etex new file mode 100644 index 0000000000..4e70775f9c --- /dev/null +++ b/manual/manual/tutorials/moduleexamples.etex @@ -0,0 +1,311 @@ +\chapter{The module system} \label{c:moduleexamples} +\pdfchapterfold{-5}{Tutorial: The module system} +%HEVEA\cutname{moduleexamples.html} + +This chapter introduces the module system of OCaml. + +\section{Structures} +\pdfsection{Structures} + +A primary motivation for modules is to package together related +definitions (such as the definitions of a data type and associated +operations over that type) and enforce a consistent naming scheme for +these definitions. This avoids running out of names or accidentally +confusing names. Such a package is called a {\em structure} and +is introduced by the "struct"\ldots"end" construct, which contains an +arbitrary sequence of definitions. The structure is usually given a +name with the "module" binding. Here is for instance a structure +packaging together a type of priority queues and their operations: +\begin{caml_example} +module PrioQueue = + struct + type priority = int + type 'a queue = Empty | Node of priority * 'a * 'a queue * 'a queue + let empty = Empty + let rec insert queue prio elt = + match queue with + Empty -> Node(prio, elt, Empty, Empty) + | Node(p, e, left, right) -> + if prio <= p + then Node(prio, elt, insert right p e, left) + else Node(p, e, insert right prio elt, left) + exception Queue_is_empty + let rec remove_top = function + Empty -> raise Queue_is_empty + | Node(prio, elt, left, Empty) -> left + | Node(prio, elt, Empty, right) -> right + | Node(prio, elt, (Node(lprio, lelt, _, _) as left), + (Node(rprio, relt, _, _) as right)) -> + if lprio <= rprio + then Node(lprio, lelt, remove_top left, right) + else Node(rprio, relt, left, remove_top right) + let extract = function + Empty -> raise Queue_is_empty + | Node(prio, elt, _, _) as queue -> (prio, elt, remove_top queue) + end;; +\end{caml_example} +Outside the structure, its components can be referred to using the +``dot notation'', that is, identifiers qualified by a structure name. +For instance, "PrioQueue.insert" is the function "insert" defined +inside the structure "PrioQueue" and "PrioQueue.queue" is the type +"queue" defined in "PrioQueue". +\begin{caml_example} +PrioQueue.insert PrioQueue.empty 1 "hello";; +\end{caml_example} + +\section{Signatures} +\pdfsection{Signatures} + +Signatures are interfaces for structures. A signature specifies +which components of a structure are accessible from the outside, and +with which type. It can be used to hide some components of a structure +(e.g. local function definitions) or export some components with a +restricted type. For instance, the signature below specifies the three +priority queue operations "empty", "insert" and "extract", but not the +auxiliary function "remove_top". Similarly, it makes the "queue" type +abstract (by not providing its actual representation as a concrete type). +\begin{caml_example} +module type PRIOQUEUE = + sig + type priority = int (* still concrete *) + type 'a queue (* now abstract *) + val empty : 'a queue + val insert : 'a queue -> int -> 'a -> 'a queue + val extract : 'a queue -> int * 'a * 'a queue + exception Queue_is_empty + end;; +\end{caml_example} +Restricting the "PrioQueue" structure by this signature results in +another view of the "PrioQueue" structure where the "remove_top" +function is not accessible and the actual representation of priority +queues is hidden: +\begin{caml_example} +module AbstractPrioQueue = (PrioQueue : PRIOQUEUE);; +AbstractPrioQueue.remove_top;; +AbstractPrioQueue.insert AbstractPrioQueue.empty 1 "hello";; +\end{caml_example} +The restriction can also be performed during the definition of the +structure, as in +\begin{verbatim} +module PrioQueue = (struct ... end : PRIOQUEUE);; +\end{verbatim} +An alternate syntax is provided for the above: +\begin{verbatim} +module PrioQueue : PRIOQUEUE = struct ... end;; +\end{verbatim} + +\section{Functors} +\pdfsection{Functors} + +Functors are ``functions'' from structures to structures. They are used to +express parameterized structures: a structure \var{A} parameterized by a +structure \var{B} is simply a functor \var{F} with a formal parameter +\var{B} (along with the expected signature for \var{B}) which returns +the actual structure \var{A} itself. The functor \var{F} can then be +applied to one or several implementations \nth{B}{1} \ldots \nth{B}{n} +of \var{B}, yielding the corresponding structures +\nth{A}{1} \ldots \nth{A}{n}. + +For instance, here is a structure implementing sets as sorted lists, +parameterized by a structure providing the type of the set elements +and an ordering function over this type (used to keep the sets +sorted): +\begin{caml_example} +type comparison = Less | Equal | Greater;; +module type ORDERED_TYPE = + sig + type t + val compare: t -> t -> comparison + end;; +module Set = + functor (Elt: ORDERED_TYPE) -> + struct + type element = Elt.t + type set = element list + let empty = [] + let rec add x s = + match s with + [] -> [x] + | hd::tl -> + match Elt.compare x hd with + Equal -> s (* x is already in s *) + | Less -> x :: s (* x is smaller than all elements of s *) + | Greater -> hd :: add x tl + let rec member x s = + match s with + [] -> false + | hd::tl -> + match Elt.compare x hd with + Equal -> true (* x belongs to s *) + | Less -> false (* x is smaller than all elements of s *) + | Greater -> member x tl + end;; +\end{caml_example} +By applying the "Set" functor to a structure implementing an ordered +type, we obtain set operations for this type: +\begin{caml_example} +module OrderedString = + struct + type t = string + let compare x y = if x = y then Equal else if x < y then Less else Greater + end;; +module StringSet = Set(OrderedString);; +StringSet.member "bar" (StringSet.add "foo" StringSet.empty);; +\end{caml_example} + +\section{Functors and type abstraction} +\pdfsection{Functors and type abstraction} + +As in the "PrioQueue" example, it would be good style to hide the +actual implementation of the type "set", so that users of the +structure will not rely on sets being lists, and we can switch later +to another, more efficient representation of sets without breaking +their code. This can be achieved by restricting "Set" by a suitable +functor signature: +\begin{caml_example} +module type SETFUNCTOR = + functor (Elt: ORDERED_TYPE) -> + sig + type element = Elt.t (* concrete *) + type set (* abstract *) + val empty : set + val add : element -> set -> set + val member : element -> set -> bool + end;; +module AbstractSet = (Set : SETFUNCTOR);; +module AbstractStringSet = AbstractSet(OrderedString);; +AbstractStringSet.add "gee" AbstractStringSet.empty;; +\end{caml_example} + +In an attempt to write the type constraint above more elegantly, +one may wish to name the signature of the structure +returned by the functor, then use that signature in the constraint: +\begin{caml_example} +module type SET = + sig + type element + type set + val empty : set + val add : element -> set -> set + val member : element -> set -> bool + end;; +module WrongSet = (Set : functor(Elt: ORDERED_TYPE) -> SET);; +module WrongStringSet = WrongSet(OrderedString);; +WrongStringSet.add "gee" WrongStringSet.empty;; +\end{caml_example} +The problem here is that "SET" specifies the type "element" +abstractly, so that the type equality between "element" in the result +of the functor and "t" in its argument is forgotten. Consequently, +"WrongStringSet.element" is not the same type as "string", and the +operations of "WrongStringSet" cannot be applied to strings. +As demonstrated above, it is important that the type "element" in the +signature "SET" be declared equal to "Elt.t"; unfortunately, this is +impossible above since "SET" is defined in a context where "Elt" does +not exist. To overcome this difficulty, OCaml provides a +"with type" construct over signatures that allows enriching a signature +with extra type equalities: +\begin{caml_example} +module AbstractSet2 = + (Set : functor(Elt: ORDERED_TYPE) -> (SET with type element = Elt.t));; +\end{caml_example} + +As in the case of simple structures, an alternate syntax is provided +for defining functors and restricting their result: +\begin{verbatim} +module AbstractSet2(Elt: ORDERED_TYPE) : (SET with type element = Elt.t) = + struct ... end;; +\end{verbatim} + +Abstracting a type component in a functor result is a powerful +technique that provides a high degree of type safety, as we now +illustrate. Consider an ordering over character strings that is +different from the standard ordering implemented in the +"OrderedString" structure. For instance, we compare strings without +distinguishing upper and lower case. +\begin{caml_example} +module NoCaseString = + struct + type t = string + let compare s1 s2 = + OrderedString.compare (String.lowercase s1) (String.lowercase s2) + end;; +module NoCaseStringSet = AbstractSet(NoCaseString);; +NoCaseStringSet.add "FOO" AbstractStringSet.empty;; +\end{caml_example} +Note that the two types "AbstractStringSet.set" and +"NoCaseStringSet.set" are not compatible, and values of these +two types do not match. This is the correct behavior: even though both +set types contain elements of the same type (strings), they are built +upon different orderings of that type, and different invariants need +to be maintained by the operations (being strictly increasing for the +standard ordering and for the case-insensitive ordering). Applying +operations from "AbstractStringSet" to values of type +"NoCaseStringSet.set" could give incorrect results, or build +lists that violate the invariants of "NoCaseStringSet". + +\section{Modules and separate compilation} +\pdfsection{Modules and separate compilation} + +All examples of modules so far have been given in the context of the +interactive system. However, modules are most useful for large, +batch-compiled programs. For these programs, it is a practical +necessity to split the source into several files, called compilation +units, that can be compiled separately, thus minimizing recompilation +after changes. + +In OCaml, compilation units are special cases of structures +and signatures, and the relationship between the units can be +explained easily in terms of the module system. A compilation unit \var{A} +comprises two files: +\begin{itemize} +\item the implementation file \var{A}".ml", which contains a sequence +of definitions, analogous to the inside of a "struct"\ldots"end" +construct; +\item the interface file \var{A}".mli", which contains a sequence of +specifications, analogous to the inside of a "sig"\ldots"end" +construct. +\end{itemize} +These two files together define a structure named \var{A} as if +the following definition was entered at top-level: +\begin{alltt} +module \var{A}: sig (* \hbox{contents of file} \var{A}.mli *) end + = struct (* \hbox{contents of file} \var{A}.ml *) end;; +\end{alltt} +The files that define the compilation units can be compiled separately +using the "ocamlc -c" command (the "-c" option means ``compile only, do +not try to link''); this produces compiled interface files (with +extension ".cmi") and compiled object code files (with extension +".cmo"). When all units have been compiled, their ".cmo" files are +linked together using the "ocamlc" command. For instance, the following +commands compile and link a program composed of two compilation units +"Aux" and "Main": +\begin{verbatim} +$ ocamlc -c Aux.mli # produces aux.cmi +$ ocamlc -c Aux.ml # produces aux.cmo +$ ocamlc -c Main.mli # produces main.cmi +$ ocamlc -c Main.ml # produces main.cmo +$ ocamlc -o theprogram Aux.cmo Main.cmo +\end{verbatim} +The program behaves exactly as if the following phrases were entered +at top-level: +\begin{alltt} +module Aux: sig (* \rminalltt{contents of} Aux.mli *) end + = struct (* \rminalltt{contents of} Aux.ml *) end;; +module Main: sig (* \rminalltt{contents of} Main.mli *) end + = struct (* \rminalltt{contents of} Main.ml *) end;; +\end{alltt} +In particular, "Main" can refer to "Aux": the definitions and +declarations contained in "Main.ml" and "Main.mli" can refer to +definition in "Aux.ml", using the "Aux."\var{ident} notation, provided +these definitions are exported in "Aux.mli". + +The order in which the ".cmo" files are given to "ocamlc" during the +linking phase determines the order in which the module definitions +occur. Hence, in the example above, "Aux" appears first and "Main" can +refer to it, but "Aux" cannot refer to "Main". + +Note that only top-level structures can be mapped to +separately-compiled files, but neither functors nor module types. +However, all module-class objects can appear as components of a +structure, so the solution is to put the functor or module type +inside a structure, which can then be mapped to a file. diff --git a/manual/manual/tutorials/objectexamples.etex b/manual/manual/tutorials/objectexamples.etex new file mode 100644 index 0000000000..609ac170db --- /dev/null +++ b/manual/manual/tutorials/objectexamples.etex @@ -0,0 +1,1266 @@ +\chapter{Objects in OCaml} +\label{c:objectexamples} +\pdfchapterfold{-15}{Tutorial: Objects in OCaml} +%HEVEA\cutname{objectexamples.html} +{\it (Chapter written by Jérôme Vouillon, Didier Rémy and Jacques Garrigue)} + +\bigskip + +\noindent This chapter gives an overview of the object-oriented features of +OCaml. Note that the relation between object, class and type +in OCaml is very different from that in mainstream +object-oriented languages like Java or C++, so that you should not +assume that similar keywords mean the same thing. + +\begin{htmlonly} + +\ref{ss:classes-and-objects} Classes and objects \\ +\ref{ss:immediate-objects} Immediate objects \\ +\ref{ss:reference-to-self} Reference to self \\ +\ref{ss:initializers} Initializers \\ +\ref{ss:virtual-methods} Virtual methods \\ +\ref{ss:private-methods} Private methods \\ +\ref{ss:class-interfaces} Class interfaces \\ +\ref{ss:inheritance} Inheritance \\ +\ref{ss:multiple-inheritance} Multiple inheritance \\ +\ref{ss:parameterized-classes} Parameterized classes \\ +\ref{ss:polymorphic-methods} Polymorphic methods \\ +\ref{ss:using-coercions} Using coercions \\ +\ref{ss:functional-objects} Functional objects \\ +\ref{ss:cloning-objects} Cloning objects \\ +\ref{ss:recursive-classes} Recursive classes \\ +\ref{ss:binary-methods} Binary methods \\ +\ref{ss:friends} Friends \\ + +%%\ref{s:advanced-examples} {\bf Advanced examples} +%% +%%\ref{ss:bank-accounts} An extended example of bank accounts \\ +%%\ref{ss:modules-as-classes} Simple modules as classes: +%% \ref{module:string} Strings +%% \ref{module:stack} Stacks +%% \ref{module:hashtbl} Hash tables +%% \ref{module:set} Sets \\ +%%\ref{ss:subject-observer} The subject/observer pattern \\ + +\end{htmlonly} + +\section{Classes and objects} +\pdfsection{Classes and objects} +\label{ss:classes-and-objects} + +The class "point" below defines one instance variable "x" and two methods +"get_x" and "move". The initial value of the instance variable is "0". +The variable "x" is declared mutable, so the method "move" can change +its value. +\begin{caml_example} +class point = + object + val mutable x = 0 + method get_x = x + method move d = x <- x + d + end;; +\end{caml_example} + +We now create a new point "p", instance of the "point" class. +\begin{caml_example} +let p = new point;; +\end{caml_example} +Note that the type of "p" is "point". This is an abbreviation +automatically defined by the class definition above. It stands for the +object type "<get_x : int; move : int -> unit>", listing the methods +of class "point" along with their types. + +We now invoke some methods to "p": +\begin{caml_example} +p#get_x;; +p#move 3;; +p#get_x;; +\end{caml_example} + +The evaluation of the body of a class only takes place at object +creation time. Therefore, in the following example, the instance +variable "x" is initialized to different values for two different +objects. +\begin{caml_example} +let x0 = ref 0;; +class point = + object + val mutable x = incr x0; !x0 + method get_x = x + method move d = x <- x + d + end;; +new point#get_x;; +new point#get_x;; +\end{caml_example} + +The class "point" can also be abstracted over the initial values of +the "x" coordinate. +\begin{caml_example} +class point = fun x_init -> + object + val mutable x = x_init + method get_x = x + method move d = x <- x + d + end;; +\end{caml_example} +Like in function definitions, the definition above can be +abbreviated as: +\begin{caml_example} +class point x_init = + object + val mutable x = x_init + method get_x = x + method move d = x <- x + d + end;; +\end{caml_example} +An instance of the class "point" is now a function that expects an +initial parameter to create a point object: +\begin{caml_example} +new point;; +let p = new point 7;; +\end{caml_example} +The parameter "x_init" is, of course, visible in the whole body of the +definition, including methods. For instance, the method "get_offset" +in the class below returns the position of the object relative to its +initial position. +\begin{caml_example} +class point x_init = + object + val mutable x = x_init + method get_x = x + method get_offset = x - x_init + method move d = x <- x + d + end;; +\end{caml_example} +%Instance variables can only be used inside methods. For instance it would +%not be possible to define +%\begin{caml_example} +%class point x_init = +% object +% val mutable x = x_init +% val origin = x +% method get_offset = x - origin +% method move d = x <- x + d +% end;; +%\end{caml_example} +Expressions can be evaluated and bound before defining the object body +of the class. This is useful to enforce invariants. For instance, +points can be automatically adjusted to the nearest point on a grid, +as follows: +\begin{caml_example} +class adjusted_point x_init = + let origin = (x_init / 10) * 10 in + object + val mutable x = origin + method get_x = x + method get_offset = x - origin + method move d = x <- x + d + end;; +\end{caml_example} +(One could also raise an exception if the "x_init" coordinate is not +on the grid.) In fact, the same effect could here be obtained by +calling the definition of class "point" with the value of the +"origin". +\begin{caml_example} +class adjusted_point x_init = point ((x_init / 10) * 10);; +\end{caml_example} +An alternate solution would have been to define the adjustment in +a special allocation function: +\begin{caml_example} +let new_adjusted_point x_init = new point ((x_init / 10) * 10);; +\end{caml_example} +However, the former pattern is generally more appropriate, since +the code for adjustment is part of the definition of the class and will be +inherited. + +This ability provides class constructors as can be found in other +languages. Several constructors can be defined this way to build objects of +the same class but with different initialization patterns; an +alternative is to use initializers, as described below in section +\ref{ss:initializers}. + +\section{Immediate objects} +\pdfsection{Immediate objects} +\label{ss:immediate-objects} + +There is another, more direct way to create an object: create it +without going through a class. + +The syntax is exactly the same as for class expressions, but the +result is a single object rather than a class. All the constructs +described in the rest of this section also apply to immediate objects. +\begin{caml_example} +let p = + object + val mutable x = 0 + method get_x = x + method move d = x <- x + d + end;; +p#get_x;; +p#move 3;; +p#get_x;; +\end{caml_example} + +Unlike classes, which cannot be defined inside an expression, +immediate objects can appear anywhere, using variables from their +environment. +\begin{caml_example} +let minmax x y = + if x < y then object method min = x method max = y end + else object method min = y method max = x end;; +\end{caml_example} + +Immediate objects have two weaknesses compared to classes: their types +are not abbreviated, and you cannot inherit from them. But these two +weaknesses can be advantages in some situations, as we will see +in sections \ref{ss:reference-to-self} and \ref{ss:parameterized-classes}. + +\section{Reference to self} +\pdfsection{Reference to self} +\label{ss:reference-to-self} + +A method or an initializer can send messages to self (that is, +the current object). For that, self must be explicitly bound, here to +the variable "s" ("s" could be any identifier, even though we will +often choose the name "self".) +\begin{caml_example} +class printable_point x_init = + object (s) + val mutable x = x_init + method get_x = x + method move d = x <- x + d + method print = print_int s#get_x + end;; +let p = new printable_point 7;; +p#print;; +\end{caml_example} +Dynamically, the variable "s" is bound at the invocation of a method. In +particular, when the class "printable_point" is inherited, the variable +"s" will be correctly bound to the object of the subclass. + +A common problem with self is that, as its type may be extended in +subclasses, you cannot fix it in advance. Here is a simple example. +\begin{caml_example} +let ints = ref [];; +class my_int = + object (self) + method n = 1 + method register = ints := self :: !ints + end;; +\end{caml_example} +You can ignore the first two lines of the error message. What matters +is the last one: putting self into an external reference would make it +impossible to extend it through inheritance. +We will see in section \ref{ss:using-coercions} a workaround to this +problem. +Note however that, since immediate objects are not extensible, the +problem does not occur with them. +\begin{caml_example} +let my_int = + object (self) + method n = 1 + method register = ints := self :: !ints + end;; +\end{caml_example} + +\section{Initializers} +\pdfsection{Initializers} +\label{ss:initializers} + +Let-bindings within class definitions are evaluated before the object +is constructed. It is also possible to evaluate an expression +immediately after the object has been built. Such code is written as +an anonymous hidden method called an initializer. Therefore, it can +access self and the instance variables. +\begin{caml_example} +class printable_point x_init = + let origin = (x_init / 10) * 10 in + object (self) + val mutable x = origin + method get_x = x + method move d = x <- x + d + method print = print_int self#get_x + initializer print_string "new point at "; self#print; print_newline () + end;; +let p = new printable_point 17;; +\end{caml_example} +Initializers cannot be overridden. On the contrary, all initializers are +evaluated sequentially. +Initializers are particularly useful to enforce invariants. +Another example can be seen in section \ref{ss:bank-accounts}. + + +\section{Virtual methods} +\pdfsection{Virtual methods and variables} +\label{ss:virtual-methods} + +It is possible to declare a method without actually defining it, using +the keyword "virtual". This method will be provided later in +subclasses. A class containing virtual methods must be flagged +"virtual", and cannot be instantiated (that is, no object of this class +can be created). It still defines type abbreviations (treating virtual methods +as other methods.) +\begin{caml_example} +class virtual abstract_point x_init = + object (self) + method virtual get_x : int + method get_offset = self#get_x - x_init + method virtual move : int -> unit + end;; +class point x_init = + object + inherit abstract_point x_init + val mutable x = x_init + method get_x = x + method move d = x <- x + d + end;; +\end{caml_example} + +Instance variables can also be declared as virtual, with the same effect +as with methods. +\begin{caml_example} +class virtual abstract_point2 = + object + val mutable virtual x : int + method move d = x <- x + d + end;; +class point2 x_init = + object + inherit abstract_point2 + val mutable x = x_init + method get_offset = x - x_init + end;; +\end{caml_example} + +\section{Private methods} +\pdfsection{Private methods} +\label{ss:private-methods} + +Private methods are methods that do not appear in object interfaces. +They can only be invoked from other methods of the same object. +\begin{caml_example} +class restricted_point x_init = + object (self) + val mutable x = x_init + method get_x = x + method private move d = x <- x + d + method bump = self#move 1 + end;; +let p = new restricted_point 0;; +p#move 10;; +p#bump;; +\end{caml_example} +Note that this is not the same thing as private and protected methods +in Java or C++, which can be called from other objects of the same +class. This is a direct consequence of the independence between types +and classes in OCaml: two unrelated classes may produce +objects of the same type, and there is no way at the type level to +ensure that an object comes from a specific class. However a possible +encoding of friend methods is given in section \ref{ss:friends}. + +Private methods are inherited (they are by default visible in subclasses), +unless they are hidden by signature matching, as described below. + +Private methods can be made public in a subclass. +\begin{caml_example} +class point_again x = + object (self) + inherit restricted_point x + method virtual move : _ + end;; +\end{caml_example} +The annotation "virtual" here is only used to mention a method without +providing its definition. Since we didn't add the "private" +annotation, this makes the method public, keeping the original +definition. + +An alternative definition is +\begin{caml_example} +class point_again x = + object (self : < move : _; ..> ) + inherit restricted_point x + end;; +\end{caml_example} +The constraint on self's type is requiring a public "move" method, and +this is sufficient to override "private". + +One could think that a private method should remain private in a subclass. +However, since the method is visible in a subclass, it is always possible +to pick its code and define a method of the same name that runs that +code, so yet another (heavier) solution would be: +\begin{caml_example} +class point_again x = + object + inherit restricted_point x as super + method move = super#move + end;; +\end{caml_example} + +Of course, private methods can also be virtual. Then, the keywords must +appear in this order "method private virtual". + +\section{Class interfaces} +\pdfsection{Class interfaces} +\label{ss:class-interfaces} + + +%XXX Differentiate class type and class interface ? + +Class interfaces are inferred from class definitions. They may also +be defined directly and used to restrict the type of a class. Like class +declarations, they also define a new type abbreviation. +\begin{caml_example} +class type restricted_point_type = + object + method get_x : int + method bump : unit +end;; +fun (x : restricted_point_type) -> x;; +\end{caml_example} +In addition to program documentation, class interfaces can be used to +constrain the type of a class. Both concrete instance variables and concrete +private methods can be hidden by a class type constraint. Public +methods and virtual members, however, cannot. +\begin{caml_example} +class restricted_point' x = (restricted_point x : restricted_point_type);; +\end{caml_example} +Or, equivalently: +\begin{caml_example} +class restricted_point' = (restricted_point : int -> restricted_point_type);; +\end{caml_example} +The interface of a class can also be specified in a module +signature, and used to restrict the inferred signature of a module. +\begin{caml_example} +module type POINT = sig + class restricted_point' : int -> + object + method get_x : int + method bump : unit + end +end;; +module Point : POINT = struct + class restricted_point' = restricted_point +end;; +\end{caml_example} + +\section{Inheritance} +\pdfsection{Inheritance} +\label{ss:inheritance} + +We illustrate inheritance by defining a class of colored points that +inherits from the class of points. This class has all instance +variables and all methods of class "point", plus a new instance +variable "c" and a new method "color". +\begin{caml_example} +class colored_point x (c : string) = + object + inherit point x + val c = c + method color = c + end;; +let p' = new colored_point 5 "red";; +p'#get_x, p'#color;; +\end{caml_example} +A point and a colored point have incompatible types, since a point has +no method "color". However, the function "get_x" below is a generic +function applying method "get_x" to any object "p" that has this +method (and possibly some others, which are represented by an ellipsis +in the type). Thus, it applies to both points and colored points. +\begin{caml_example} +let get_succ_x p = p#get_x + 1;; +get_succ_x p + get_succ_x p';; +\end{caml_example} +Methods need not be declared previously, as shown by the example: +\begin{caml_example} +let set_x p = p#set_x;; +let incr p = set_x p (get_succ_x p);; +\end{caml_example} + +\section{Multiple inheritance} +\pdfsection{Multiple inheritance} +\label{ss:multiple-inheritance} + +Multiple inheritance is allowed. Only the last definition of a method +is kept: the redefinition in a subclass of a method that was visible in +the parent class overrides the definition in the parent class. +Previous definitions of a method can be reused by binding the related +ancestor. Below, "super" is bound to the ancestor "printable_point". +The name "super" is a pseudo value identifier that can only be used to +invoke a super-class method, as in "super#print". +\begin{caml_example} +class printable_colored_point y c = + object (self) + val c = c + method color = c + inherit printable_point y as super + method print = + print_string "("; + super#print; + print_string ", "; + print_string (self#color); + print_string ")" + end;; +let p' = new printable_colored_point 17 "red";; +p'#print;; +\end{caml_example} +A private method that has been hidden in the parent class is no longer +visible, and is thus not overridden. Since initializers are treated as +private methods, all initializers along the class hierarchy are evaluated, +in the order they are introduced. + +\section{Parameterized classes} +\pdfsection{Parameterized classes} +\label{ss:parameterized-classes} + +Reference cells can be implemented as objects. +The naive definition fails to typecheck: +\begin{caml_example} +class ref x_init = + object + val mutable x = x_init + method get = x + method set y = x <- y + end;; +\end{caml_example} +The reason is that at least one of the methods has a polymorphic type +(here, the type of the value stored in the reference cell), thus +either the class should be parametric, or the method type should be +constrained to a monomorphic type. A monomorphic instance of the class could +be defined by: +\begin{caml_example} +class ref (x_init:int) = + object + val mutable x = x_init + method get = x + method set y = x <- y + end;; +\end{caml_example} +Note that since immediate objects do not define a class type, they have +no such restriction. +\begin{caml_example} +let new_ref x_init = + object + val mutable x = x_init + method get = x + method set y = x <- y + end;; +\end{caml_example} +On the other hand, a class for polymorphic references must explicitly +list the type parameters in its declaration. Class type parameters are +listed between "[" and "]". The type parameters must also be +bound somewhere in the class body by a type constraint. +\begin{caml_example} +class ['a] ref x_init = + object + val mutable x = (x_init : 'a) + method get = x + method set y = x <- y + end;; +let r = new ref 1 in r#set 2; (r#get);; +\end{caml_example} +The type parameter in the declaration may actually be constrained in the +body of the class definition. In the class type, the actual value of +the type parameter is displayed in the "constraint" clause. +\begin{caml_example} +class ['a] ref_succ (x_init:'a) = + object + val mutable x = x_init + 1 + method get = x + method set y = x <- y + end;; +\end{caml_example} +Let us consider a more complex example: define a circle, whose center +may be any kind of point. We put an additional type +constraint in method "move", since no free variables must remain +unaccounted for by the class type parameters. +\begin{caml_example} +class ['a] circle (c : 'a) = + object + val mutable center = c + method center = center + method set_center c = center <- c + method move = (center#move : int -> unit) + end;; +\end{caml_example} +An alternate definition of "circle", using a "constraint" clause in +the class definition, is shown below. The type "#point" used below in +the "constraint" clause is an abbreviation produced by the definition +of class "point". This abbreviation unifies with the type of any +object belonging to a subclass of class "point". It actually expands to +"< get_x : int; move : int -> unit; .. >". This leads to the following +alternate definition of "circle", which has slightly stronger +constraints on its argument, as we now expect "center" to have a +method "get_x". +\begin{caml_example} +class ['a] circle (c : 'a) = + object + constraint 'a = #point + val mutable center = c + method center = center + method set_center c = center <- c + method move = center#move + end;; +\end{caml_example} +The class "colored_circle" is a specialized version of class +"circle" that requires the type of the center to unify with +"#colored_point", and adds a method "color". Note that when specializing a +parameterized class, the instance of type parameter must always be +explicitly given. It is again written between "[" and "]". +\begin{caml_example} +class ['a] colored_circle c = + object + constraint 'a = #colored_point + inherit ['a] circle c + method color = center#color + end;; +\end{caml_example} + +\section{Polymorphic methods} +\pdfsection{Polymorphic methods} +\label{ss:polymorphic-methods} + +While parameterized classes may be polymorphic in their contents, they +are not enough to allow polymorphism of method use. + +A classical example is defining an iterator. +\begin{caml_example} +List.fold_left;; +class ['a] intlist (l : int list) = + object + method empty = (l = []) + method fold f (accu : 'a) = List.fold_left f accu l + end;; +\end{caml_example} +At first look, we seem to have a polymorphic iterator, however this +does not work in practice. +\begin{caml_example} +let l = new intlist [1; 2; 3];; +l#fold (fun x y -> x+y) 0;; +l;; +l#fold (fun s x -> s ^ string_of_int x ^ " ") "";; +\end{caml_example} +Our iterator works, as shows its first use for summation. However, +since objects themselves are not polymorphic (only their constructors +are), using the "fold" method fixes its type for this individual object. +Our next attempt to use it as a string iterator fails. + +The problem here is that quantification was wrongly located: it is +not the class we want to be polymorphic, but the "fold" method. +This can be achieved by giving an explicitly polymorphic type in the +method definition. +\begin{caml_example} +class intlist (l : int list) = + object + method empty = (l = []) + method fold : 'a. ('a -> int -> 'a) -> 'a -> 'a = + fun f accu -> List.fold_left f accu l + end;; +let l = new intlist [1; 2; 3];; +l#fold (fun x y -> x+y) 0;; +l#fold (fun s x -> s ^ string_of_int x ^ " ") "";; +\end{caml_example} +As you can see in the class type shown by the compiler, while +polymorphic method types must be fully explicit in class definitions +(appearing immediately after the method name), quantified type +variables can be left implicit in class descriptions. Why require types +to be explicit? The problem is that "(int -> int -> int) -> int -> +int" would also be a valid type for "fold", and it happens to be +incompatible with the polymorphic type we gave (automatic +instantiation only works for toplevel types variables, not for inner +quantifiers, where it becomes an undecidable problem.) So the compiler +cannot choose between those two types, and must be helped. + +However, the type can be completely omitted in the class definition if +it is already known, through inheritance or type constraints on self. +Here is an example of method overriding. +\begin{caml_example*} +class intlist_rev l = + object + inherit intlist l + method fold f accu = List.fold_left f accu (List.rev l) + end;; +\end{caml_example*} +The following idiom separates description and definition. +\begin{caml_example*} +class type ['a] iterator = + object method fold : ('b -> 'a -> 'b) -> 'b -> 'b end;; +class intlist l = + object (self : int #iterator) + method empty = (l = []) + method fold f accu = List.fold_left f accu l + end;; +\end{caml_example*} +Note here the "(self : int #iterator)" idiom, which ensures that this +object implements the interface "iterator". + +Polymorphic methods are called in exactly the same way as normal +methods, but you should be aware of some limitations of type +inference. Namely, a polymorphic method can only be called if its +type is known at the call site. Otherwise, the method will be assumed +to be monomorphic, and given an incompatible type. +\begin{caml_example} +let sum lst = lst#fold (fun x y -> x+y) 0;; +sum l;; +\end{caml_example} +The workaround is easy: you should put a type constraint on the +parameter. +\begin{caml_example} +let sum (lst : _ #iterator) = lst#fold (fun x y -> x+y) 0;; +\end{caml_example} +Of course the constraint may also be an explicit method type. +Only occurences of quantified variables are required. +\begin{caml_example} +let sum lst = + (lst : < fold : 'a. ('a -> _ -> 'a) -> 'a -> 'a; .. >)#fold (+) 0;; +\end{caml_example} + +Another use of polymorphic methods is to allow some form of implicit +subtyping in method arguments. We have already seen in section +\ref{ss:inheritance} how some functions may be polymorphic in the +class of their argument. This can be extended to methods. +\begin{caml_example} +class type point0 = object method get_x : int end;; +class distance_point x = + object + inherit point x + method distance : 'a. (#point0 as 'a) -> int = + fun other -> abs (other#get_x - x) + end;; +let p = new distance_point 3 in +(p#distance (new point 8), p#distance (new colored_point 1 "blue"));; +\end{caml_example} +Note here the special syntax "(#point0 as 'a)" we have to use to +quantify the extensible part of "#point0". As for the variable binder, +it can be omitted in class specifications. If you want polymorphism +inside object field it must be quantified independently. +\begin{caml_example} +class multi_poly = + object + method m1 : 'a. (< n1 : 'b. 'b -> 'b; .. > as 'a) -> _ = + fun o -> o#n1 true, o#n1 "hello" + method m2 : 'a 'b. (< n2 : 'b -> bool; .. > as 'a) -> 'b -> _ = + fun o x -> o#n2 x + end;; +\end{caml_example} +In method "m1", "o" must be an object with at least a method "n1", +itself polymorphic. In method "m2", the argument of "n2" and "x" must +have the same type, which is quantified at the same level as "'a". + +\section{Using coercions} +\pdfsection{Using coercions} +\label{ss:using-coercions} + +Subtyping is never implicit. There are, however, two ways to perform +subtyping. The most general construction is fully explicit: both the +domain and the codomain of the type coercion must be given. + +We have seen that points and colored points have incompatible types. +For instance, they cannot be mixed in the same list. However, a +colored point can be coerced to a point, hiding its "color" method: +\begin{caml_example} +let colored_point_to_point cp = (cp : colored_point :> point);; +let p = new point 3 and q = new colored_point 4 "blue";; +let l = [p; (colored_point_to_point q)];; +\end{caml_example} +An object of type "t" can be seen as an object of type "t'" +only if "t" is a subtype of "t'". For instance, a point cannot be +seen as a colored point. +\begin{caml_example} +(p : point :> colored_point);; +\end{caml_example} +Indeed, narrowing coercions without runtime checks would be unsafe. +Runtime type checks might raise exceptions, and they would require +the presence of type information at runtime, which is not the case in +the OCaml system. +For these reasons, there is no such operation available in the language. + +Be aware that subtyping and inheritance are not related. Inheritance is a +syntactic relation between classes while subtyping is a semantic relation +between types. For instance, the class of colored points could have been +defined directly, without inheriting from the class of points; the type of +colored points would remain unchanged and thus still be a subtype of +points. +% Conversely, the class "int_comparable" inherits from class +%"comparable", but type "int_comparable" is not a subtype of "comparable". +%\begin{caml_example} +%function x -> (x : int_comparable :> comparable);; +%\end{caml_example} + +The domain of a coercion can often be omitted. For instance, one can +define: +\begin{caml_example} +let to_point cp = (cp :> point);; +\end{caml_example} +In this case, the function "colored_point_to_point" is an instance of the +function "to_point". This is not always true, however. The fully +explicit coercion is more precise and is sometimes unavoidable. +Consider, for example, the following class: +\begin{caml_example} +class c0 = object method m = {< >} method n = 0 end;; +\end{caml_example} +The object type "c0" is an abbreviation for "<m : 'a; n : int> as 'a". +Consider now the type declaration: +\begin{caml_example} +class type c1 = object method m : c1 end;; +\end{caml_example} +The object type "c1" is an abbreviation for the type "<m : 'a> as 'a". +The coercion from an object of type "c0" to an object of type "c1" is +correct: +\begin{caml_example} +fun (x:c0) -> (x : c0 :> c1);; +\end{caml_example} +%%% FIXME come up with a better example. +% However, the domain of the coercion cannot be omitted here: +% \begin{caml_example} +% fun (x:c0) -> (x :> c1);; +% \end{caml_example} +However, the domain of the coercion cannot always be omitted. +In that case, the solution is to use the explicit form. +% +Sometimes, a change in the class-type definition can also solve the problem +\begin{caml_example} +class type c2 = object ('a) method m : 'a end;; +fun (x:c0) -> (x :> c2);; +\end{caml_example} +While class types "c1" and "c2" are different, both object types +"c1" and "c2" expand to the same object type (same method names and types). +Yet, when the domain of a coercion is left implicit and its co-domain +is an abbreviation of a known class type, then the class type, rather +than the object type, is used to derive the coercion function. This +allows leaving the domain implicit in most cases when coercing form a +subclass to its superclass. +% +The type of a coercion can always be seen as below: +\begin{caml_example} +let to_c1 x = (x :> c1);; +let to_c2 x = (x :> c2);; +\end{caml_example} +Note the difference between these two coercions: in the case of "to_c2", +the type +"#c2 = < m : 'a; .. > as 'a" is polymorphically recursive (according +to the explicit recursion in the class type of "c2"); hence the +success of applying this coercion to an object of class "c0". +On the other hand, in the first case, "c1" was only expanded and +unrolled twice to obtain "< m : < m : c1; .. >; .. >" (remember "#c1 = +< m : c1; .. >"), without introducing recursion. +You may also note that the type of "to_c2" is "#c2 -> c2" while +the type of "to_c1" is more general than "#c1 -> c1". This is not always true, +since there are class types for which some instances of "#c" are not subtypes +of "c", as explained in section~\ref{ss:binary-methods}. Yet, for +parameterless classes the coercion "(_ :> c)" is always more general than +"(_ : #c :> c)". +%If a class type exposes the type of self through one of its parameters, this +%is no longer true. Here is a counter-example. +%\begin{caml_example} +%class type ['a] c = object ('a) method m : 'a end;; +%let to_c x = (x :> _ c);; +%\end{caml_example} + + +A common problem may occur when one tries to define a coercion to a +class "c" while defining class "c". The problem is due to the type +abbreviation not being completely defined yet, and so its subtypes are not +clearly known. Then, a coercion "(_ :> c)" or "(_ : #c :> c)" is taken to be +the identity function, as in +\begin{caml_example} +function x -> (x :> 'a);; +\end{caml_example} +As a consequence, if the coercion is applied to "self", as in the +following example, the type of "self" is unified with the closed type +"c" (a closed object type is an object type without ellipsis). This +would constrain the type of self be closed and is thus rejected. +Indeed, the type of self cannot be closed: this would prevent any +further extension of the class. Therefore, a type error is generated +when the unification of this type with another type would result in a +closed object type. +\begin{caml_example} +class c = object method m = 1 end +and d = object (self) + inherit c + method n = 2 + method as_c = (self :> c) +end;; +\end{caml_example} +However, the most common instance of this problem, coercing self to +its current class, is detected as a special case by the type checker, +and properly typed. +\begin{caml_example} +class c = object (self) method m = (self :> c) end;; +\end{caml_example} +This allows the following idiom, keeping a list of all objects +belonging to a class or its subclasses: +\begin{caml_example} +let all_c = ref [];; +class c (m : int) = + object (self) + method m = m + initializer all_c := (self :> c) :: !all_c + end;; +\end{caml_example} +This idiom can in turn be used to retrieve an object whose type has +been weakened: +\begin{caml_example} +let rec lookup_obj obj = function [] -> raise Not_found + | obj' :: l -> + if (obj :> < >) = (obj' :> < >) then obj' else lookup_obj obj l ;; +let lookup_c obj = lookup_obj obj !all_c;; +\end{caml_example} +The type "< m : int >" we see here is just the expansion of "c", due +to the use of a reference; we have succeeded in getting back an object +of type "c". + +\medskip +The previous coercion problem can often be avoided by first +defining the abbreviation, using a class type: +\begin{caml_example} +class type c' = object method m : int end;; +class c : c' = object method m = 1 end +and d = object (self) + inherit c + method n = 2 + method as_c = (self :> c') +end;; +\end{caml_example} +It is also possible to use a virtual class. Inheriting from this class +simultaneously forces all methods of "c" to have the same +type as the methods of "c'". +\begin{caml_example} +class virtual c' = object method virtual m : int end;; +class c = object (self) inherit c' method m = 1 end;; +\end{caml_example} +One could think of defining the type abbreviation directly: +\begin{caml_example*} +type c' = <m : int>;; +\end{caml_example*} +However, the abbreviation "#c'" cannot be defined directly in a similar way. +It can only be defined by a class or a class-type definition. +This is because a "#"-abbreviation carries an implicit anonymous +variable ".." that cannot be explicitly named. +The closer you get to it is: +\begin{caml_example*} +type 'a c'_class = 'a constraint 'a = < m : int; .. >;; +\end{caml_example*} +with an extra type variable capturing the open object type. + +\section{Functional objects} +\pdfsection{Functional objects} +\label{ss:functional-objects} + +It is possible to write a version of class "point" without assignments +on the instance variables. The override construct "{< ... >}" returns a copy of +``self'' (that is, the current object), possibly changing the value of +some instance variables. +\begin{caml_example} +class functional_point y = + object + val x = y + method get_x = x + method move d = {< x = x + d >} + end;; +let p = new functional_point 7;; +p#get_x;; +(p#move 3)#get_x;; +p#get_x;; +\end{caml_example} +Note that the type abbreviation "functional_point" is recursive, which can +be seen in the class type of "functional_point": the type of self is "'a" +and "'a" appears inside the type of the method "move". + +The above definition of "functional_point" is not equivalent +to the following: +\begin{caml_example} +class bad_functional_point y = + object + val x = y + method get_x = x + method move d = new bad_functional_point (x+d) + end;; +\end{caml_example} +While objects of either class will behave the same, objects of their +subclasses will be different. In a subclass of "bad_functional_point", +the method "move" will +keep returning an object of the parent class. On the contrary, in a +subclass of "functional_point", the method "move" will return an +object of the subclass. + +Functional update is often used in conjunction with binary methods +as illustrated in section \ref{module:string}. + +\section{Cloning objects} +\pdfsection{Cloning objects} +\label{ss:cloning-objects} + +Objects can also be cloned, whether they are functional or imperative. +The library function "Oo.copy" makes a shallow copy of an object. That is, +it returns a new object that has the same methods and instance +variables as its argument. The +instance variables are copied but their contents are shared. +Assigning a new value to an instance variable of the copy (using a method +call) will not affect instance variables of the original, and conversely. +A deeper assignment (for example if the instance variable is a reference cell) +will of course affect both the original and the copy. + +The type of "Oo.copy" is the following: +\begin{caml_example} +Oo.copy;; +\end{caml_example} +The keyword "as" in that type binds the type variable "'a" to +the object type "< .. >". Therefore, "Oo.copy" takes an object with +any methods (represented by the ellipsis), and returns an object of +the same type. The type of "Oo.copy" is different from type "< .. > -> +< .. >" as each ellipsis represents a different set of methods. +Ellipsis actually behaves as a type variable. +\begin{caml_example} +let p = new point 5;; +let q = Oo.copy p;; +q#move 7; (p#get_x, q#get_x);; +\end{caml_example} +In fact, "Oo.copy p" will behave as "p#copy" assuming that a public +method "copy" with body "{< >}" has been defined in the class of "p". + +Objects can be compared using the generic comparison functions "=" and "<>". +Two objects are equal if and only if they are physically equal. In +particular, an object and its copy are not equal. +\begin{caml_example} +let q = Oo.copy p;; +p = q, p = p;; +\end{caml_example} +Other generic comparisons such as ("<", "<=", ...) can also be used on +objects. The +relation "<" defines an unspecified but strict ordering on objects. The +ordering relationship between two objects is fixed once for all after the +two objects have been created and it is not affected by mutation of fields. + +Cloning and override have a non empty intersection. +They are interchangeable when used within an object and without +overriding any field: +\begin{caml_example} +class copy = + object + method copy = {< >} + end;; +class copy = + object (self) + method copy = Oo.copy self + end;; +\end{caml_example} +Only the override can be used to actually override fields, and +only the "Oo.copy" primitive can be used externally. + +Cloning can also be used to provide facilities for saving and +restoring the state of objects. +\begin{caml_example} +class backup = + object (self : 'mytype) + val mutable copy = None + method save = copy <- Some {< copy = None >} + method restore = match copy with Some x -> x | None -> self + end;; +\end{caml_example} +The above definition will only backup one level. +The backup facility can be added to any class by using multiple inheritance. +\begin{caml_example} +class ['a] backup_ref x = object inherit ['a] ref x inherit backup end;; +let rec get p n = if n = 0 then p # get else get (p # restore) (n-1);; +let p = new backup_ref 0 in +p # save; p # set 1; p # save; p # set 2; +[get p 0; get p 1; get p 2; get p 3; get p 4];; +\end{caml_example} +We can define a variant of backup that retains all copies. (We also +add a method "clear" to manually erase all copies.) +\begin{caml_example} +class backup = + object (self : 'mytype) + val mutable copy = None + method save = copy <- Some {< >} + method restore = match copy with Some x -> x | None -> self + method clear = copy <- None + end;; +\end{caml_example} +\begin{caml_example} +class ['a] backup_ref x = object inherit ['a] ref x inherit backup end;; +let p = new backup_ref 0 in +p # save; p # set 1; p # save; p # set 2; +[get p 0; get p 1; get p 2; get p 3; get p 4];; +\end{caml_example} + + + +\section{Recursive classes} +\pdfsection{Recursive classes} +\label{ss:recursive-classes} + +Recursive classes can be used to define objects whose types are +mutually recursive. +\begin{caml_example} +class window = + object + val mutable top_widget = (None : widget option) + method top_widget = top_widget + end +and widget (w : window) = + object + val window = w + method window = window + end;; +\end{caml_example} +Although their types are mutually recursive, the classes "widget" and +"window" are themselves independent. + + +\section{Binary methods} +\pdfsection{Binary methods} +\label{ss:binary-methods} + +A binary method is a method which takes an argument of the same type +as self. The class "comparable" below is a template for classes with a +binary method "leq" of type "'a -> bool" where the type variable "'a" +is bound to the type of self. Therefore, "#comparable" expands to "< +leq : 'a -> bool; .. > as 'a". We see here that the binder "as" also +allows writing recursive types. +\begin{caml_example} +class virtual comparable = + object (_ : 'a) + method virtual leq : 'a -> bool + end;; +\end{caml_example} +We then define a subclass "money" of "comparable". The class "money" +simply wraps floats as comparable objects. We will extend it below with +more operations. We have to use a type constraint on the class parameter "x" +because the primitive "<=" is a polymorphic function in +OCaml. The "inherit" clause ensures that the type of objects +of this class is an instance of "#comparable". +\begin{caml_example} +class money (x : float) = + object + inherit comparable + val repr = x + method value = repr + method leq p = repr <= p#value + end;; +\end{caml_example} +% not explained: mutability can be hidden +Note that the type "money" is not a subtype of type +"comparable", as the self type appears in contravariant position +in the type of method "leq". +Indeed, an object "m" of class "money" has a method "leq" +that expects an argument of type "money" since it accesses +its "value" method. Considering "m" of type "comparable" would allow a +call to method "leq" on "m" with an argument that does not have a method +"value", which would be an error. + +Similarly, the type "money2" below is not a subtype of type "money". +\begin{caml_example} +class money2 x = + object + inherit money x + method times k = {< repr = k *. repr >} + end;; +\end{caml_example} +It is however possible to define functions that manipulate objects of +type either "money" or "money2": the function "min" +will return the minimum of any two objects whose type unifies with +"#comparable". The type of "min" is not the same as "#comparable -> +#comparable -> #comparable", as the abbreviation "#comparable" hides a +type variable (an ellipsis). Each occurrence of this abbreviation +generates a new variable. +\begin{caml_example} +let min (x : #comparable) y = + if x#leq y then x else y;; +\end{caml_example} +This function can be applied to objects of type "money" +or "money2". +\begin{caml_example} +(min (new money 1.3) (new money 3.1))#value;; +(min (new money2 5.0) (new money2 3.14))#value;; +\end{caml_example} + +More examples of binary methods can be found in sections +\ref{module:string} and \ref{module:set}. + +Note the use of override for method "times". +Writing "new money2 (k *. repr)" instead of "{< repr = k *. repr >}" +would not behave well with inheritance: in a subclass "money3" of "money2" +the "times" method would return an object of class "money2" but not of class +"money3" as would be expected. + +The class "money" could naturally carry another binary method. Here is a +direct definition: +\begin{caml_example} +class money x = + object (self : 'a) + val repr = x + method value = repr + method print = print_float repr + method times k = {< repr = k *. x >} + method leq (p : 'a) = repr <= p#value + method plus (p : 'a) = {< repr = x +. p#value >} + end;; +\end{caml_example} + +\section{Friends} +\pdfsection{Friends} +\label{ss:friends} + +The above class "money" reveals a problem that often occurs with binary +methods. In order to interact with other objects of the same class, the +representation of "money" objects must be revealed, using a method such as +"value". If we remove all binary methods (here "plus" and "leq"), +the representation can easily be hidden inside objects by removing the method +"value" as well. However, this is not possible as soon as some binary +method requires access to the representation of objects of the same +class (other than self). +\begin{caml_example} +class safe_money x = + object (self : 'a) + val repr = x + method print = print_float repr + method times k = {< repr = k *. x >} + end;; +\end{caml_example} +Here, the representation of the object is known only to a particular object. +To make it available to other objects of the same class, we are forced to +make it available to the whole world. However we can easily restrict the +visibility of the representation using the module system. +\begin{caml_example*} +module type MONEY = + sig + type t + class c : float -> + object ('a) + val repr : t + method value : t + method print : unit + method times : float -> 'a + method leq : 'a -> bool + method plus : 'a -> 'a + end + end;; +module Euro : MONEY = + struct + type t = float + class c x = + object (self : 'a) + val repr = x + method value = repr + method print = print_float repr + method times k = {< repr = k *. x >} + method leq (p : 'a) = repr <= p#value + method plus (p : 'a) = {< repr = x +. p#value >} + end + end;; +\end{caml_example*} +Another example of friend functions may be found in section +\ref{module:set}. These examples occur when a group of objects (here +objects of the same class) and functions should see each others internal +representation, while their representation should be hidden from the +outside. The solution is always to define all friends in the same module, +give access to the representation and use a signature constraint to make the +representation abstract outside the module. + + + +% LocalWords: typecheck monomorphic uncaptured Subtyping subtyping leq repr Oo +% LocalWords: val sig bool Euro struct OCaml Vouillon Didier int ref incr init +% LocalWords: succ mytype rec + diff --git a/manual/styles/altindex.sty b/manual/styles/altindex.sty new file mode 100644 index 0000000000..d236e71416 --- /dev/null +++ b/manual/styles/altindex.sty @@ -0,0 +1,39 @@ +%% An attempt to have several index files +%% +%% Defines \altindex{filename}{word to index} +%% and \makealtindex{filename} +%% +%% It is possible to define a macro for each index as follows: +%% \newcommand{\myindex}{\altindex{myindexfile}} +%% +%% This code is not really clean, there are still a number of things +%% that I don't understand... but it works. + +%% \makealtindex{filename} opens filename.idx for writing. + +\def\makealtindex#1{\if@filesw + \expandafter\newwrite\csname @#1altindexfile\endcsname + \immediate\openout\expandafter\csname @#1altindexfile\endcsname=#1.idx + \typeout{Writing alternate index file #1.idx}\fi} + +%% \@wraltindex makes the assumes that a trailing `\fi' will get bound +%% to #2. So, it `eats' it as second parameter and reinserts it. +%% Quick and dirty, I know... +%% Writes the index entry #3 into #1. + +\def\@wraltindex#1#2#3{\let\thepage\relax + \xdef\@gtempa{\write#1{\string + \indexentry{#3}{\thepage}}}\fi\endgroup\@gtempa + \if@nobreak \ifvmode\nobreak\fi\fi\@esphack} + +%% \altindex{filename}{index entry} does nothing if +%% \@<filename>altindexfile is \relax (i.e. filename.idx not open). +%% Otherwise, writes the index entry, and closes the whole stuff (some +%% groups, and some \if). + +\def\altindex#1{\@bsphack\begingroup + \def\protect##1{\string##1\space}\@sanitize + \@ifundefined{@#1altindexfile}% + {\endgroup\@esphack}% + {\@wraltindex{\expandafter\csname @#1altindexfile\endcsname}} +} diff --git a/manual/styles/caml-sl.sty b/manual/styles/caml-sl.sty new file mode 100644 index 0000000000..c4061e4a4d --- /dev/null +++ b/manual/styles/caml-sl.sty @@ -0,0 +1,43 @@ +% CAML style option, for use with the caml-latex filter. + +\typeout{Document Style option `caml-sl' <7 Apr 92>.} + +{\catcode`\^^M=\active % + \gdef\@camlinputline#1^^M{\normalsize\tt\# #1\par} % + \gdef\@camloutputline#1^^M{\small\ttfamily\slshape#1\par} } % +\def\@camlblankline{\medskip} +\chardef\@camlbackslash="5C +\def\@bunderline{\setbox0\hbox\bgroup\let\par\@parinunderline} + +\def \@parinunderline {\futurelet \@next \@@parinunderline} +\def \@@parinunderline {\ifx \@next \? \let \@do \@@par@inunderline \else \let \@do \@@@parinunderline \fi \@do} +\def \@@par@inunderline #1{\@eunderline\@oldpar\?\@bunderline} +\def \@@@parinunderline {\@eunderline\@oldpar\@bunderline} +\def\@eunderline{\egroup\underline{\box0}} +\def\@camlnoop{} + +\def\caml{ + \bgroup + \flushleft + \parindent 0pt + \parskip 0pt + \let\do\@makeother\dospecials + \catcode13=\active % 13 = ^M = CR + \catcode92=0 % 92 = \ + \catcode32=\active % 32 = SPC + \frenchspacing + \@vobeyspaces + \let\@oldpar\par + \let\?\@camlinputline + \let\:\@camloutputline + \let\;\@camlblankline + \let\<\@bunderline + \let\>\@eunderline + \let\\\@camlbackslash + \let\-\@camlnoop +} + +\def\endcaml{ + \endflushleft + \egroup\noindent +} diff --git a/manual/styles/caml.sty b/manual/styles/caml.sty new file mode 100644 index 0000000000..3f5753caaa --- /dev/null +++ b/manual/styles/caml.sty @@ -0,0 +1,31 @@ +% CAML style option, for use with the caml-latex filter. + +\typeout{Document Style option `caml' <7 Apr 92>.} + +{\catcode`\^^M=\active % + \gdef\@camlinputline#1^^M{\tt\##1\par} % + \gdef\@camloutputline#1^^M{\tt#1\par} } % +\def\@camlblankline{\medskip} +\chardef\@camlbackslash="5C + +\def\caml{ + \bgroup + \flushleft + \parindent 0pt + \parskip 0pt + \let\do\@makeother\dospecials + \catcode`\^^M=\active + \catcode`\\=0 + \catcode`\ \active + \frenchspacing + \@vobeyspaces + \let\?\@camlinputline + \let\:\@camloutputline + \let\;\@camlblankline + \let\\\@camlbackslash +} + +\def\endcaml{ + \endflushleft + \egroup\noindent +} diff --git a/manual/styles/doc.tfm b/manual/styles/doc.tfm Binary files differnew file mode 100644 index 0000000000..d010f29edd --- /dev/null +++ b/manual/styles/doc.tfm diff --git a/manual/styles/docbf.tfm b/manual/styles/docbf.tfm Binary files differnew file mode 100644 index 0000000000..d010f29edd --- /dev/null +++ b/manual/styles/docbf.tfm diff --git a/manual/styles/docit.tfm b/manual/styles/docit.tfm Binary files differnew file mode 100644 index 0000000000..d010f29edd --- /dev/null +++ b/manual/styles/docit.tfm diff --git a/manual/styles/docmi.tfm b/manual/styles/docmi.tfm Binary files differnew file mode 100644 index 0000000000..d010f29edd --- /dev/null +++ b/manual/styles/docmi.tfm diff --git a/manual/styles/docrm.tfm b/manual/styles/docrm.tfm Binary files differnew file mode 100644 index 0000000000..d010f29edd --- /dev/null +++ b/manual/styles/docrm.tfm diff --git a/manual/styles/doctt.tfm b/manual/styles/doctt.tfm Binary files differnew file mode 100644 index 0000000000..d010f29edd --- /dev/null +++ b/manual/styles/doctt.tfm diff --git a/manual/styles/fullpage.sty b/manual/styles/fullpage.sty new file mode 100644 index 0000000000..6ecbeb761b --- /dev/null +++ b/manual/styles/fullpage.sty @@ -0,0 +1,2 @@ +\marginparwidth 0pt \oddsidemargin 0pt \evensidemargin 0pt \marginparsep 0pt +\topmargin 0pt \textwidth 6.5in \textheight 8.5 in diff --git a/manual/styles/html.sty b/manual/styles/html.sty new file mode 100644 index 0000000000..6a9e92535c --- /dev/null +++ b/manual/styles/html.sty @@ -0,0 +1,222 @@ +% LaTeX2HTML Version 0.6.4 : html.sty +% +% This file contains definitions of LaTeX commands which are +% processed in a special way by the translator. +% For example, there are commands for embedding external hypertext links, +% for cross-references between documents or for including +% raw HTML. +% This file includes the comments.sty file v2.0 by Victor Eijkhout +% In most cases these commands do nothing when processed by LaTeX. + +% Modifications: +% +% nd = Nikos Drakos <nikos@cbl.leeds.ac.uk> +% jz = Jelle van Zeijl <jvzeijl@isou17.estec.esa.nl> + +% jz 22-APR-94 - Added support for htmlref +% nd - Created + + + +% Exit if the style file is already loaded +% (suggested by Lee Shombert <las@potomac.wash.inmet.com> +\ifx \htmlstyloaded\relax \endinput\else\let\htmlstyloaded\relax\fi + +%%% LINKS TO EXTERNAL DOCUMENTS +% +% This can be used to provide links to arbitrary documents. +% The first argumment should be the text that is going to be +% highlighted and the second argument a URL. +% The hyperlink will appear as a hyperlink in the HTML +% document and as a footnote in the dvi or ps files. +% +\newcommand{\htmladdnormallinkfoot}[2]{ #1\footnote{#2}} + +% This is an alternative definition of the command above which +% will ignore the URL in the dvi or ps files. +\newcommand{\htmladdnormallink}[2]{ #1 } + +% This command takes as argument a URL pointing to an image. +% The image will be embedded in the HTML document but will +% be ignored in the dvi and ps files. +% +\newcommand{\htmladdimg}[1]{ } + +%%% CROSS-REFERENCES BETWEEN (LOCAL OR REMOTE) DOCUMENTS +% +% This can be used to refer to symbolic labels in other Latex +% documents that have already been processed by the translator. +% The arguments should be: +% #1 : the URL to the directory containing the external document +% #2 : the path to the labels.pl file of the external document. +% If the external document lives on a remote machine then labels.pl +% must be copied on the local machine. +% +%e.g. \externallabels{http://cbl.leeds.ac.uk/nikos/WWW/doc/tex2html/latex2html} +% {/usr/cblelca/nikos/tmp/labels.pl} +% The arguments are ignored in the dvi and ps files. +% +\newcommand{\externallabels}[2]{ } + +% This complements the \externallabels command above. The argument +% should be a label defined in another latex document and will be +% ignored in the dvi and ps files. +% +\newcommand{\externalref}[1]{ } + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Comment.sty version 2.0, 19 June 1992 +% selectively in/exclude pieces of text: the user can define new +% comment versions, and each is controlled separately. +% This style can be used with plain TeX or LaTeX, and probably +% most other packages too. +% +% Examples of use in LaTeX and TeX follow \endinput +% +% Author +% Victor Eijkhout +% Department of Computer Science +% University Tennessee at Knoxville +% 104 Ayres Hall +% Knoxville, TN 37996 +% USA +% +% eijkhout@cs.utk.edu +% +% Usage: all text included in between +% \comment ... \endcomment +% or \begin{comment} ... \end{comment} +% is discarded. The closing command should appear on a line +% of its own. No starting spaces, nothing after it. +% This environment should work with arbitrary amounts +% of comment. +% +% Other 'comment' environments are defined by +% and are selected/deselected with +% \includecomment{versiona} +% \excludecoment{versionb} +% +% These environments are used as +% \versiona ... \endversiona +% or \begin{versiona} ... \end{versiona} +% with the closing command again on a line of its own. +% +% Basic approach: +% to comment something out, scoop up every line in verbatim mode +% as macro argument, then throw it away. +% For inclusions, both the opening and closing comands +% are defined as noop +% +% Changed \next to \html@next to prevent clashes with other sty files +% (mike@emn.fr) +% Changed \html@next to \htmlnext so the \makeatletter and +% \makeatother commands could be removed (they were cuasing other +% style files - changebar.sty - to crash) (nikos@cbl.leeds.ac.uk) + + +\def\makeinnocent#1{\catcode`#1=12 } +\def\csarg#1#2{\expandafter#1\csname#2\endcsname} + +\def\ThrowAwayComment#1{\begingroup + \def\CurrentComment{#1}% + \let\do\makeinnocent \dospecials + \makeinnocent\^^L% and whatever other special cases + \endlinechar`\^^M \catcode`\^^M=12 \xComment} +{\catcode`\^^M=12 \endlinechar=-1 % + \gdef\xComment#1^^M{\def\test{#1} + \csarg\ifx{PlainEnd\CurrentComment Test}\test + \let\htmlnext\endgroup + \else \csarg\ifx{LaLaEnd\CurrentComment Test}\test + \edef\htmlnext{\endgroup\noexpand\end{\CurrentComment}} + \else \let\htmlnext\xComment + \fi \fi \htmlnext} +} + +\def\includecomment + #1{\expandafter\def\csname#1\endcsname{}% + \expandafter\def\csname end#1\endcsname{}} +\def\excludecomment + #1{\expandafter\def\csname#1\endcsname{\ThrowAwayComment{#1}}% + {\escapechar=-1\relax + \csarg\xdef{PlainEnd#1Test}{\string\\end#1}% + \csarg\xdef{LaLaEnd#1Test}{\string\\end\string\{#1\string\}}% + }} + +\excludecomment{comment} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%% RAW HTML +% +% Enclose raw HTML between a \begin{rawhtml} and \end{rawhtml}. +% The html environment ignores its body +% +\excludecomment{rawhtml} + +%%% HTML ONLY +% +% Enclose LaTeX constructs which will only appear in the +% HTML output and will be ignored by LaTeX with +% \begin{htmlonly} and \end{htmlonly} +% +\excludecomment{htmlonly} + +%%% LaTeX ONLY +% Enclose LaTeX constructs which will only appear in the +% DVI output and will be ignored by latex2html with +%\begin{latexonly} and \end{latexonly} +% +\newenvironment{latexonly}{}{} + +%%% HYPERREF +% Suggested by Eric M. Carol <eric@ca.utoronto.utcc.enfm> +% Similar to \ref but accepts conditional text. +% The first argument is HTML text which will become ``hyperized'' +% (underlined). +% The second and third arguments are text which will appear only in the paper +% version (DVI file), enclosing the fourth argument which is a reference to a label. +% +%e.g. \hyperref{using the tracer}{using the tracer (see Section}{)}{trace} +% where there is a corresponding \label{trace} +% +\newcommand{\hyperref}[4]{#2\ref{#4}#3} + +%%% HTMLREF +% Reference in HTML version only. +% Mix between \htmladdnormallink and \hyperref. +% First arg is text for in both versions, second is label for use in HTML +% version. +\newcommand{\htmlref}[2]{#1} + +%%% HTMLIMAGE +% This command can be used inside any environment that is converted +% into an inlined image (eg a "figure" environment) in order to change +% the way the image will be translated. The argument of \htmlimage +% is really a string of options separated by commas ie +% [scale=<scale factor>],[external],[thumbnail=<reduction factor> +% The scale option allows control over the size of the final image. +% The ``external'' option will cause the image not to be inlined +% (images are inlined by default). External images will be accessible +% via a hypertext link. +% The ``thumbnail'' option will cause a small inlined image to be +% placed in the caption. The size of the thumbnail depends on the +% reduction factor. The use of the ``thumbnail'' option implies +% the ``external'' option. +% +% Example: +% \htmlimage{scale=1.5,external,thumbnail=0.2} +% will cause a small thumbnail image 1/5th of the original size to be +% placed in the final document, pointing to an external image 1.5 +% times bigger than the original. +% +\newcommand{\htmlimage}[1]{} + +%%% HTMLADDTONAVIGATION +% This command appends its argument to the buttons in the navigation +% panel. It is ignored by LaTeX. +% +% Example: +% \htmladdtonavigation{\htmladdnormallink +% {\htmladdimg{http://server/path/to/gif}} +% {http://server/path}} +\newcommand{\htmladdtonavigation}[1]{} diff --git a/manual/styles/isolatin.sty b/manual/styles/isolatin.sty new file mode 100644 index 0000000000..9a6850979d --- /dev/null +++ b/manual/styles/isolatin.sty @@ -0,0 +1,174 @@ +% 1-Jun-1992 +% +% File bases on iso1ibm.tex Version 1.0 of May, 9 1990 +\message{ISO-latin-1 input coding, version 0.9 of 1-Jun-1992.} +% +% For input of 8 bits character. +% This allows reading ISO-8859 Latin-1 codes. +% +\chardef \atcode = \the \catcode `\@ +\catcode `\@ = 11 +% +\catcode160=13 \def^^a0{{\bf?}} % 160 '240, "a0 +\catcode161=13 \def^^a1{!`} % 161 '241, "a1 +\catcode162=13 \def^^a2{{\bf?}} % 162 '242, "a2 +\catcode163=13 \def^^a3{\pounds{}} % 163 '243, "a3 +\catcode164=13 \def^^a4{{\bf?}} % 164 '244, "a4 +\catcode165=13 \def^^a5{{\bf?}} % 165 '245, "a5 +\catcode166=13 \def^^a6{$\vert$} % 166 '246, "a6 +\catcode167=13 \def^^a7{\S{}} % 167 '247, "a7 \S{} ISO-1, +\catcode168=13 \def^^a8{\"{ }} % 168 '250, "a8 +\catcode169=13 \def^^a9{\copyright{}}% 169, '251, "a9 +\catcode170=13 \def^^aa{{\bf?}} % 170 '252, "aa +\catcode171=13 % 171 '253, "ab, +\@ifundefined{lguill}{\def^^ab{$<<$}}{\def^^ab{\lguill}} +\catcode172=13 \def^^ac{{\bf?}} % 172 '254, "ac +\catcode173=13 \def^^ad{{\bf?}} % 173 '255 "ad +\catcode174=13 \def^^ae{{\bf?}} % 174 '256, "ae +\catcode175=13 \def^^af{{\bf?}} % 175 '257, "af +\catcode176=13 \def^^b0{{\bf?}} % 176 '260, "b0 ?? \No +\catcode177=13 \def^^b1{$\pm$} % 177 '261, "b1 ISO-1 plus-minus +\catcode178=13 \def^^b2{${}^2$} % 178, '262, "b2 +\catcode179=13 \def^^b3{${}^3$} % 179, '263, "b3 +\catcode180=13 \def^^b4{\'{ }} % 180, '264, "b4 +\catcode181=13 \def^^b5{{\bf?}} % 181, '265, "b5 +\catcode182=13 \def^^b6{\P{}} % 182, '266, "b6 +\catcode183=13 \def^^b7{$\cdot$} % 183, '267, "b7 +\catcode184=13 \def^^b8{\c{ }} % 184, '270, "b8 +\catcode185=13 \def^^b9{${}^1$} % 185, '271, "b9 +\catcode186=13 \def^^ba{{\bf?}} % 186, '272, "ba +\catcode187=13 % 187, '273, "bb +\@ifundefined{rguill}{\def^^bb{$>>$}}{\def^^bb{\rguill}} +\catcode188=13 \def^^bc{$\frac 1 4$} % 188, '274, "bc +\catcode189=13 \def^^bd{$\frac 1 2$} % 189, '275, "bd +\catcode190=13 \def^^be{$\frac 3 4$} % 190, '276, "be +\catcode191=13 \def^^bf{?`} % 191, '277, "bf +\catcode192=13 \def^^c0{\`A} % 192, '300, "c0 +\@ifundefined{@grave@A@grave@}{\def^^c0{\`A}}{\let^^c0=\@grave@A@grave@} +\catcode193=13 \def^^c1{\'A} % 193, '301, "c1 +\@ifundefined{@acute@A@acute@}{\def^^c1{\'A}}{\let^^c1=\@acute@A@acute@} +\catcode194=13 \def^^c2{\^A} % 194, '302, "c2 +\@ifundefined{@circflx@A@circflx@}{\def^^c2{\^A}}{\let^^c2=\@circflx@A@circflx@} +\catcode195=13 \def^^c3{\~A} % 195, '303, "c3 +\@ifundefined{@tileda@A@tilda@}{\def^^c3{\~A}}{\let^^c3=\@tileda@A@tilda@} +\catcode196=13 \def^^c4{\"A} % 196, '304, "c4 +\@ifundefined{@Umlaut@A@Umlaut@}{\def^^c4{\"A}}{\let^^c4=\@Umlaut@A@Umlaut@} +\catcode197=13 \def^^c5{\AA{}} % 197, '305, "c5 +\@ifundefined{@A@A@}{\def^^c5{\AA{}}}{\let^^c5=\@A@A@} +\catcode198=13 \def^^c6{\AE{}} % 198, '306, "c6 +\@ifundefined{@A@E@}{\def^^c6{\AE{}}}{\let^^c6=\@A@E@} +\catcode199=13 \def^^c7{\c{C}} % 199, '307, "c7 +\@ifundefined{@cedilla@C@cedilla}{\def^^c7{\c{C}}}{\let^^c7=\@cedilla@C@cedilla} +\catcode200=13 \def^^c8{\`E} % 200, '310, "c8 +\@ifundefined{@grave@E@grave@}{\def^^c8{\`E}}{\let^^c8=\@grave@E@grave@} +\catcode201=13 \def^^c9{\'E} % 201, '311, "c9 +\@ifundefined{@acute@E@acute@}{\def^^c9{\'E}}{\let^^c9=\@acute@E@acute@} +\catcode202=13 \def^^ca{\^E} % 202, '312, "ca +\@ifundefined{@circflx@E@circflx@}{\def^^ca{\^E}}{\let^^ca=\@circflx@E@circflx@} +\catcode203=13 \def^^cb{{\"E}} % 203, '313, "cb +\@ifundefined{@Umlaut@E@Umlaut@}{\def^^cb{\"E}}{\let^^cb=\@Umlaut@E@Umlaut@} +\catcode204=13 \def^^cc{\`I} % 204, '314, "cc +\@ifundefined{@grave@I@grave@}{\def^^cc{\`I}}{\let^^cc=\@grave@I@grave@} +\catcode205=13 \def^^cd{\'I} % 205, '315, "cd +\@ifundefined{@acute@I@acute@}{\def^^cd{\'I}}{\let^^cd=\@acute@I@acute@} +\catcode206=13 \def^^ce{\^I} % 206, '316, "ce +\@ifundefined{@circflx@I@circflx@}{\def^^ce{\^I}}{\let^^ce=\@circflx@I@circflx@} +\catcode207=13 \def^^cf{{\"I}} % 207, '317, "cf +\@ifundefined{@Umlaut@I@Umlaut@}{\def^^cf{\"I}}{\let^^cf=\@Umlaut@I@Umlaut@} +\catcode208=13 \def^^d0{\rlap{\raise0.3ex\hbox{--}}D} % 208, '320, "d0 +\@ifundefined{@Eth@}{}{\let^^d0=\@Eth@} +\catcode209=13 \def^^d1{¥} % 209, '321, "d1 +\@ifundefined{@tileda@N@tilda@}{\def^^d1{\~N}}{\let^^d1\@tileda@N@tilda@} +\catcode210=13 \def^^d2{\`O} % 210, '322, "d2 +\@ifundefined{@grave@O@grave@}{\def^^d2{\`O}}{\let^^d2=\@grave@O@grave@} +\catcode211=13 \def^^d3{\'O} % 211, '323, "d3 +\@ifundefined{@acute@O@acute@}{\def^^d3{\'O}}{\let^^d3\@acute@O@acute@} +\catcode212=13 \def^^d4{\^O} % 212, '324, "d4 +\@ifundefined{@circflx@O@circflx@}{\def^^d4{\^O}}{\let^^d4=\@circflx@O@circflx@} +\catcode213=13 \def^^d5{\~O} % 213, '325, "d5 +\@ifundefined{@tileda@O@tilda@}{\def^^d5{\~O}}{\let^^d5\@tileda@O@tilda@} +\catcode214=13 \def^^d6{\"O} % 214, '326, "d6 +\@ifundefined{@Umlaut@O@Umlaut@}{\def^^d6{\"O}}{\let^^d6=\@Umlaut@O@Umlaut@} +\catcode215=13 \def^^d7{$\times$}% 215, '327, "d7 +\catcode216=13 \def^^d8{\O{}} % 216, '330, "d8 +\@ifundefined{@OOO@}{\def^^d8{\O{}}}{\let^^d8=\@OOO@} +\catcode217=13 \def^^d9{\`U} % 217, '331, "d9 +\@ifundefined{@grave@U@grave@}{\def^^d9{\`U}}{\let^^d9=\@grave@U@grave@} +\catcode218=13 \def^^da{\'U} % 218, '332, "da +\@ifundefined{@acute@U@acute@}{\def^^da{\'U}}{\let^^da=\@acute@U@acute@} +\catcode219=13 \def^^db{\^U} % 219, '333, "db +\@ifundefined{@circflx@U@circflx@}{\def^^db{\^U}}{\let^^db=\@circflx@U@circflx@} +\catcode220=13 \def^^dc{\"U} % 220, '334, "dc +\@ifundefined{@Umlaut@U@Umlaut@}{\def^^dc{\"U}}{\let^^dc=\@Umlaut@U@Umlaut@} +\catcode221=13 \def^^dd{{\'Y}} % 221, '335, "dd +\@ifundefined{@acute@Y@acute@}{\def^^dd{\'Y}}{\let^^dd=\@acute@Y@acute@} +\catcode222=13 \def^^de{\lower 0.7ex \hbox{l}\hskip-1ex\relax b} % 222, '336, "de +\@ifundefined{@Thorn@}{}{\let^^de=\@Thorn@} +\catcode223=13 \def^^df{\ss{}} % 223, '337, "df +\@ifundefined{@sss@}{\def^^df{\ss{}}}{\let^^df=\@sss@} +\catcode224=13 \def^^e0{\`a} % 224, '340, "e0 +\@ifundefined{@grave@a@grave@}{\def^^e0{\`a}}{\let^^e0=\@grave@a@grave@} +\catcode225=13 \def^^e1{\'a} % 225, '341, "e1 +\@ifundefined{@acute@a@acute@}{\def^^e1{\'a}}{\let^^e1=\@acute@a@acute@} +\catcode226=13 \def^^e2{\^a} % 226, '342, "e2 +\@ifundefined{@circflx@a@circflx@}{\def^^e2{\^a}}{\let^^e2=\@circflx@a@circflx@} +\catcode227=13 \def^^e3{\~a} % 227, '343, "e3 +\@ifundefined{@tileda@a@tilda@}{\def^^e3{\~a}}{\let^^e3=\@tileda@a@tilda@} +\catcode228=13 \def^^e4{\"a} % 228, '344, "e4 +\@ifundefined{@Umlaut@a@Umlaut@}{\def^^e4{\"a}}{\let^^e4=\@Umlaut@a@Umlaut@} +\catcode229=13 \def^^e5{\aa{}} % 229, '345, "e5 +\@ifundefined{@a@a@}{\def^^e5{\aa{}}}{\let^^e5=\@a@a@} +\catcode230=13 \def^^e6{\ae{}} % 230, '346, "e6 +\@ifundefined{@a@e@}{\def^^e6{\ae{}}}{\let^^e6=\@a@e@} +\catcode231=13 \def^^e7{\c{c}} % 231, '347, "e7 +\@ifundefined{@cedilla@c@cedilla}{\def^^e7{\c{c}}}{\let^^e7=\@cedilla@c@cedilla} +\catcode232=13 \def^^e8{\`e} % 232, '350, "e8 +\@ifundefined{@grave@e@grave@}{\def^^e8{\`e}}{\let^^e8=\@grave@e@grave@} +\catcode233=13 \def^^e9{\'e} % 233, '351, "e9 +\@ifundefined{@acute@e@acute@}{\def^^e9{\'e}}{\let^^e9=\@acute@e@acute@} +\catcode234=13 \def^^ea{\^e} % 234, '352, "ea +\@ifundefined{@circflx@e@circflx@}{\def^^ea{\^e}}{\let^^ea=\@circflx@e@circflx@} +\catcode235=13 \def^^eb{\"e} % 235, '353, "eb +\@ifundefined{@Umlaut@e@Umlaut@}{\def^^eb{\"e}}{\let^^eb=\@Umlaut@e@Umlaut@} +\catcode236=13 \def^^ec{\`{\i}} % 236, '354, "ec +\@ifundefined{@grave@i@grave@}{\def^^ec{\`{\i}}}{\let^^ec=\@grave@i@grave@} +\catcode237=13 \def^^ed{\'{\i}} % 237, '355, "ed +\@ifundefined{@acute@i@acute@}{\def^^ed{\'{\i}}}{\let^^ed=\@acute@i@acute@} +\catcode238=13 \def^^ee{\^{\i}} % 238, '356, "ee +\@ifundefined{@circflx@i@circflx@}{\def^^ee{\^{\i}}}{\let^^ee=\@circflx@i@circflx@} +\catcode239=13 \def^^ef{\"{\i}} % 239, '357, "ef +\@ifundefined{@Umlaut@i@Umlaut@}{\def^^ef{\"{\i}}}{\let^^ef=\@Umlaut@i@Umlaut@} +\catcode240=13 \def^^f0{$\partial$} % 240, '360, "f0 +\@ifundefined{@eth@}{\def^^f0{$\partial$}}{\let^^f0=\@eth@} +\catcode241=13 \def^^f1{\~n} % 241, '361, "f1 +\@ifundefined{@tileda@n@tilda@}{\def^^f1{\~n}}{\let^^f1\@tileda@n@tilda@} +\catcode242=13 \def^^f2{\`o} % 242, '362, "f2 +\@ifundefined{@grave@o@grave@}{\def^^f2{\`o}}{\let^^f2=\@grave@o@grave@} +\catcode243=13 \def^^f3{\'o} % 243, '363, "f3 +\@ifundefined{@acute@o@acute@}{\def^^f3{\'o}}{\let^^f3\@acute@o@acute@} +\catcode244=13 \def^^f4{\^o} % 244, '364, "f4 +\@ifundefined{@circflx@o@circflx@}{\def^^f4{\^o}}{\let^^f4=\@circflx@o@circflx@} +\catcode245=13 \def^^f5{\~o} % 245, '365, "f5 +\@ifundefined{@tileda@o@tilda@}{\def^^f5{\~o}}{\let^^f5\@tileda@o@tilda@} +\catcode246=13 \def^^f6{\"o} % 246, '366, "f6 +\@ifundefined{@Umlaut@o@Umlaut@}{\def^^f6{\"o}}{\let^^f6=\@Umlaut@o@Umlaut@} +\catcode247=13 \def^^f7{$\div$} % 247, '367, "f7 +\catcode248=13 \def^^f8{\o{}} % 248, '370, "f8 +\@ifundefined{@ooo@}{\def^^f8{\o{}}}{\let^^f8=\@ooo@} +\catcode249=13 \def^^f9{\`u} % 249, '371, "f9 +\@ifundefined{@grave@u@grave@}{\def^^f9{\`u}}{\let^^f9=\@grave@u@grave@} +\catcode250=13 \def^^fa{\'u} % 250, '372, "fa +\@ifundefined{@acute@u@acute@}{\def^^fa{\'u}}{\let^^fa=\@acute@u@acute@} +\catcode251=13 \def^^fb{\^u} % 251, '373, "fb +\@ifundefined{@circflx@u@circflx@}{\def^^fb{\^u}}{\let^^fb=\@circflx@u@circflx@} +\catcode252=13 \def^^fc{\"u} % 252, '374, "fc +\@ifundefined{@Umlaut@u@Umlaut@}{\def^^fc{\"u}}{\let^^fc=\@Umlaut@u@Umlaut@} +\catcode253=13 \def^^fd{\'y} % 253, '375, "fd +\@ifundefined{@acute@y@acute@}{\def^^fd{\'y}}{\let^^fd=\@acute@y@acute@} +\catcode254=13 \def^^fe{\lower 0.8ex\hbox{l}\hskip-1ex\relax b} % 254, '376, "fe +\@ifundefined{@thorn@}{}{\let^^fe=\@thorn@} +\catcode255=13 \def^^ff{\"y} % 255, '377, "ff +\@ifundefined{@Umlaut@y@Umlaut@}{\def^^ff{\"y}}{\let^^ff=\@Umlaut@y@Umlaut@} +\catcode `\@ = \the \atcode +\endinput +% End of iso-latin-1.tex diff --git a/manual/styles/multicols.sty b/manual/styles/multicols.sty new file mode 100644 index 0000000000..2d94548855 --- /dev/null +++ b/manual/styles/multicols.sty @@ -0,0 +1,176 @@ +% Save file as: MULTICOLS.STY Source: FILESERV@SHSU.BITNET +% multicols.sty version 1.0 +% Allows for multiple column typesetting +% From TUGboat, voulme 10 (1989), No. 3 +% +% Frank Mittelback +% Electronic Data Systems +% (Deutschland) GmbH +% Eisenstrasse 56 +% D-6090 Russelsheim +% Federal Republic of Germany +% Bitnet: pzf5hz@drueds2 +% +% Variables: +% \premulticols - If the space left on the page is less than this, a new +% page is started before the multiple columns. Otherwise, a \vskip +% of \multicolsep is added. +% \postmulticols - analogous to \premulticols +% \columnseprule - the width of the rule separating the columns. +% +% Commands: +% \raggedcolumns - don't align bottom lines of columns +% \flushcolumns - align bottom lines (default) +% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\@ifundefined{mult@cols}{}{\endinput} + +\def\multicols#1{\col@number#1\relax + \ifnum\col@number<\@ne + \@warning{Using '\number\col@number' columns doesn't seem a good idea.^^J + I therefore use two columns instead}% + \col@number\tw@ \fi + \@ifnextchar[\mult@cols{\mult@cols[]}} + +\def\mult@cols[#1]{\@ifnextchar[% + {\mult@@cols{#1}}% + {\mult@@cols{#1}[\premulticols]}} + +\def\mult@@cols#1[#2]{% + \enough@room#2% + #1\par\addvspace\multicolsep + \begingroup + \prepare@multicols\ignorespaces} + +\def\enough@room#1{\par \penalty\z@ + \page@free \pagegoal + \advance \page@free -\pagetotal + \ifdim \page@free <#1\newpage \fi} + +\def\prepare@multicols{% + \output{\global\setbox\partial@page + \vbox{\unvbox\@cclv}}\eject + \vbadness9999 \hbadness5000 + \tolerance\multicoltolerance + \doublecol@number\col@number + \multiply\doublecol@number\tw@ + \advance\baselineskip\multicolbaselineskip + \advance\@colroom-\ht\partial@page + \vsize\col@number\@colroom + \advance\vsize\c@collectmore\baselineskip + \hsize\columnwidth \advance\hsize\columnsep + \advance\hsize-\col@number\columnsep + \divide\hsize\col@number + \linewidth\hsize + \output{\multi@columnout}% + \multiply\count\footins\col@number + \multiply\skip \footins\col@number + \reinsert@footnotes} + +\def\endmulticols{\par\penalty\z@ + \output{\balance@columns}\eject + \endgroup \reinsert@footnotes + \global\c@unbalance\z@ + \enough@room\postmulticols + \addvspace\multicolsep} + +\newcount\c@unbalance \c@unbalance = 0 +\newcount\c@collectmore \c@collectmore = 0 +\newcount\col@number +\newcount\doublecol@number +\newcount\multicoltolerance \multicoltolerance = 9999 +\newdimen\page@free +\newdimen\premulticols \premulticols = 50pt +\newdimen\postmulticols \postmulticols = 20pt +\newskip\multicolsep \multicolsep = 12pt plus 4pt minus 3pt +\newskip\multicolbaselineskip \multicolbaselineskip=0pt +\newbox\partial@page + +\def\process@cols#1#2{\count@#1\relax + \loop #2% + \advance\count@\tw@ + \ifnum\count@<\doublecol@number + \repeat} + +\def\page@sofar{\unvbox\partial@page + \process@cols\z@{\wd\count@\hsize}% + \hbox to\textwidth{% + \process@cols\tw@{\box\count@ + \hss\vrule\@width\columnseprule\hss}% + \box\z@}} + +\def\reinsert@footnotes{\ifvoid\footins\else + \insert\footins{\unvbox\footins}\fi} + +\def\multi@columnout{% + \ifnum\outputpenalty <-\@Mi + \speci@ls \else + \splittopskip\topskip + \splitmaxdepth\maxdepth + \dimen@\@colroom + \divide\skip\footins\col@number + \ifvoid\footins \else + \advance\dimen@-\skip\footins + \advance\dimen@-\ht\footins \fi + \process@cols\tw@{\setbox\count@ + \vsplit\@cclv to\dimen@}% + \setbox\z@\vsplit\@cclv to\dimen@ + \ifvoid\@cclv \else + \unvbox\@cclv + \penalty\outputpenalty + \fi + \setbox\@cclv\vbox{\page@sofar}% + \@makecol\@outputpage + \global\@colroom\@colht + \process@deferreds + \global\vsize\col@number\@colroom + \global\advance\vsize + \c@collectmore\baselineskip + \multiply\skip\footins\col@number\fi} + +\def\speci@ls{% + \typeout{floats and marginpars not allowed inside `multicols' environment}% + \unvbox\@cclv\reinsert@footnotes + \gdef\@currlist{}} + +\def\process@deferreds{% + \@floatplacement + \begingroup + \let\@tempb\@deferlist + \gdef\@deferlist{}% + \let\@elt\@scolelt + \@tempb \endgroup} + +\newif\ifshr@nking + +\def\raggedcolumns{% + \@bsphack\shr@nkingtrue\@esphack} +\def\flushcolumns{% + \@bsphack\shr@nkingfale\@esphack} + +\def\balance@columns{% + \splittopskip\topskip + \splitmaxdepth\maxdepth + \setbox\z@\vbox{\unvbox\@cclv}\dimen@\ht\z@ + \advance\dimen@\col@number\topskip + \advance\dimen@-\col@number\baselineskip + \divide\dimen@\col@number + \advance\dimen@\c@unbalance\baselineskip + {\vbadness\@M \loop + {\process@cols\@ne{\global\setbox\count@\box\voidb@x}}% + \global\setbox\@ne\copy\z@ + {\process@cols\thr@@{\global\setbox\count@\vsplit\@ne to\dimen@}}% + \ifshr@nking \global\setbox\thr@@\vbox{\unvbox\thr@@}% + \fi + \ifdim\ht\@ne >\ht\thr@@ + \global\advance\dimen@\p@ + \repeat}% + \dimen@\ht\thr@@ + \process@cols\z@{\@tempcnta\count@ + \advance\@tempcnta\@ne + \setbox\count@\vtop to\dimen@ + {\unvbox\@tempcnta + \ifshr@nking\vfill\fi}}% + \global\vsize\@colroom + \global\advance\vsize\ht\partial@page + \page@sofar} diff --git a/manual/styles/multind.sty b/manual/styles/multind.sty new file mode 100644 index 0000000000..ef91c28df0 --- /dev/null +++ b/manual/styles/multind.sty @@ -0,0 +1,65 @@ +% indexes document style option for producing multiple indexes +% for use with the modified bbok style, CHbook.sty +% Written by F.W. Long, Version 1.1, 12 August 1991. + +% Modified by F.W. Long, Version 1.1a, 29 August 1991 +% to get the index heading correctly spaced. + +% Modified by F.W. Long, Version 1.1b, 31 August 1991 +% to remove the abbreviation \ix (which should be in the document, not here). + +% Modified \makeindex and \index commands to allow multiple indexes +% in both cases the first parameter is the index name. +% They now work more like \@starttoc and \addcontentsline. +% \index is no longer defined inside \makeindex but determines +% whether the appropriate file is defined before writing to it. + +\def\makeindex#1{\begingroup + \makeatletter + \if@filesw \expandafter\newwrite\csname #1@idxfile\endcsname + \expandafter\immediate\openout \csname #1@idxfile\endcsname #1.idx\relax + \typeout{Writing index file #1.idx }\fi \endgroup} + +\def\index#1{\@bsphack\begingroup + \def\protect##1{\string##1\space}\@sanitize + \@wrindex{#1}} + +% \@wrindex now checks that the appropriate file is defined. + +\def\@wrindex#1#2{\let\thepage\relax + \xdef\@gtempa{\@ifundefined{#1@idxfile}{}{\expandafter + \write\csname #1@idxfile\endcsname{\string + \indexentry{#2}{\thepage}}}}\endgroup\@gtempa + \if@nobreak \ifvmode\nobreak\fi\fi\@esphack} + +% Modified \printindex command to allow multiple indexes. +% This now takes over much of the work of \theindex. +% Again, the first parameter is the index name. +% The second parameter is the index title (as printed). + +\newif\if@restonecol +\def\printindex#1#2{\@restonecoltrue\if@twocolumn\@restonecolfalse\fi + \columnseprule \z@ \columnsep 35pt + \newpage \twocolumn[{\Large\bf #2 \vskip4ex}] + \markright{\uppercase{#2}} + \addcontentsline{toc}{section}{#2} + \@input{#1.ind}} + +% The following index commands are taken from book.sty. +% \theindex is modified to not start a chapter. + +\def\theindex{\parindent\z@ + \parskip\z@ plus .3pt\relax\let\item\@idxitem} +\def\@idxitem{\par\hangindent 40pt} +\def\subitem{\par\hangindent 40pt \hspace*{20pt}} +\def\subsubitem{\par\hangindent 40pt \hspace*{30pt}} +\def\endtheindex{\if@restonecol\onecolumn\else\clearpage\fi} +\def\indexspace{\par \vskip 10pt plus 5pt minus 3pt\relax} + +% the command \ix allows an abbreviation for the general index + +%\def\ix#1{#1\index{general}{#1}} + +% define the \see command from makeidx.sty + +\def\see#1#2{{\em see\/} #1} diff --git a/manual/styles/ocamldoc.hva b/manual/styles/ocamldoc.hva new file mode 100644 index 0000000000..58b7bb1219 --- /dev/null +++ b/manual/styles/ocamldoc.hva @@ -0,0 +1,20 @@ +\usepackage{alltt} +\newenvironment{ocamldoccode}{\begin{alltt}}{\end{alltt}} +\newenvironment{ocamldocdescription}{\begin{quote}}{\end{quote}} +\newenvironment{ocamldoccomment}{\begin{quote}}{\end{quote}} + + +\newenvironment{ocamldocindent}{\list{}{}\item\relax}{\endlist} +\newenvironment{ocamldocsigend} + {\noindent\quad\texttt{sig}\ocamldocindent} + {\endocamldocindent + \noindent\quad\texttt{end}\medskip} +\newenvironment{ocamldocobjectend} + {\noindent\quad\texttt{object}\ocamldocindent} + {\endocamldocindent + \noindent\quad\texttt{end}\medskip} + +\newcommand{\moduleref}[1]{\ifhtml\ahref{libref/#1.html}{\texttt{#1}}\fi} + +# For processing .tex generated by ocamldoc (for text manual) +\newcommand{\ocamldocvspace}[1]{\vspace{#1}}
\ No newline at end of file diff --git a/manual/styles/ocamldoc.sty b/manual/styles/ocamldoc.sty new file mode 100644 index 0000000000..b176c9b141 --- /dev/null +++ b/manual/styles/ocamldoc.sty @@ -0,0 +1,75 @@ + +%% Support macros for LaTeX documentation generated by ocamldoc. +%% This file is in the public domain; do what you want with it. + +\NeedsTeXFormat{LaTeX2e} +\ProvidesPackage{ocamldoc} + [2001/12/04 v1.0 ocamldoc support] + +\newenvironment{ocamldoccode}{% + \bgroup + \leftskip\@totalleftmargin + \rightskip\z@skip + \parindent\z@ + \parfillskip\@flushglue + \parskip\z@skip + %\noindent + \@@par\smallskip + \@tempswafalse + \def\par{% + \if@tempswa + \leavevmode\null\@@par\penalty\interlinepenalty + \else + \@tempswatrue + \ifhmode\@@par\penalty\interlinepenalty\fi + \fi} + \obeylines + \verbatim@font + \let\org@prime~% + \@noligs + \let\org@dospecials\dospecials + \g@remfrom@specials{\\} + \g@remfrom@specials{\{} + \g@remfrom@specials{\}} + \let\do\@makeother + \dospecials + \let\dospecials\org@dospecials + \frenchspacing\@vobeyspaces + \everypar \expandafter{\the\everypar \unpenalty}} +{\egroup\par} + +\def\g@remfrom@specials#1{% + \def\@new@specials{} + \def\@remove##1{% + \ifx##1#1\else + \g@addto@macro\@new@specials{\do ##1}\fi} + \let\do\@remove\dospecials + \let\dospecials\@new@specials + } + +\newenvironment{ocamldocdescription} +{\list{}{\rightmargin0pt \topsep0pt}\raggedright\item\noindent\relax\ignorespaces} +{\endlist\medskip} + +\newenvironment{ocamldoccomment} +{\list{}{\leftmargin 2\leftmargini \rightmargin0pt \topsep0pt}\raggedright\item\noindent\relax} +{\endlist} + +\let \ocamldocparagraph \paragraph +\def \paragraph #1{\ocamldocparagraph {#1}\noindent} +\let \ocamldocsubparagraph \subparagraph +\def \subparagraph #1{\ocamldocsubparagraph {#1}\noindent} + +\let\ocamldocvspace\vspace + +\newenvironment{ocamldocindent}{\list{}{}\item\relax}{\endlist} +\newenvironment{ocamldocsigend} + {\noindent\quad\texttt{sig}\ocamldocindent} + {\endocamldocindent\vskip -\lastskip + \noindent\quad\texttt{end}\medskip} +\newenvironment{ocamldocobjectend} + {\noindent\quad\texttt{object}\ocamldocindent} + {\endocamldocindent\vskip -\lastskip + \noindent\quad\texttt{end}\medskip} + +\endinput diff --git a/manual/styles/plaintext.sty b/manual/styles/plaintext.sty new file mode 100644 index 0000000000..2d1b366ce5 --- /dev/null +++ b/manual/styles/plaintext.sty @@ -0,0 +1,268 @@ +% Plain text style file. + +\typeout{Style option Plaintext} + +% Version from John Pavel's dvidoc.sty, March 1987 +% Heavily hacked by Xavier Leroy, June 1993. + +% Redefine all fonts to be the "doc" pseudo-font, with fixed spacing. +% Since rm, tt and mi have different character encodings, we keep +% several copies of the doc font, with different names, so that dvi2txt +% can select the right encoding according to the name. Also, we use +% different names for boldface and italics, so that dvi2txt can select +% the right style whenever possible. + +\def\sl{\rm} +\def\sc{\rm} + +\def\vpt{} +\def\vipt{} +\def\viipt{} +\def\viiipt{} +\def\ixpt{} +\def\xipt{} +\def\xiipt{} +\def\xivpt{} +\def\xviipt{} +\def\xxpt{} +\def\xxvpt{} + +%%% for i in fiv six sev egt nin ten elv twl frtn svnt twty twfv; do +%%% echo "\\font\\${i}rm = docrm" +%%% echo "\\font\\${i}mi = docmi" +%%% echo "\\font\\${i}sy = cmsy10" +%%% echo "\\font\\${i}it = docit" +%%% echo "\\font\\${i}bf = docbf" +%%% echo "\\font\\${i}tt = doctt" +%%% echo "\\font\\${i}sf = docrm" +%%% done + +\font\fivrm = docrm +\font\fivmi = docmi +\font\fivsy = cmsy10 +\font\fivit = docit +\font\fivbf = docbf +\font\fivtt = doctt +\font\fivsf = docrm +\font\sixrm = docrm +\font\sixmi = docmi +\font\sixsy = cmsy10 +\font\sixit = docit +\font\sixbf = docbf +\font\sixtt = doctt +\font\sixsf = docrm +\font\sevrm = docrm +\font\sevmi = docmi +\font\sevsy = cmsy10 +\font\sevit = docit +\font\sevbf = docbf +\font\sevtt = doctt +\font\sevsf = docrm +\font\egtrm = docrm +\font\egtmi = docmi +\font\egtsy = cmsy10 +\font\egtit = docit +\font\egtbf = docbf +\font\egttt = doctt +\font\egtsf = docrm +\font\ninrm = docrm +\font\ninmi = docmi +\font\ninsy = cmsy10 +\font\ninit = docit +\font\ninbf = docbf +\font\nintt = doctt +\font\ninsf = docrm +\font\tenrm = docrm +\font\tenmi = docmi +\font\tensy = cmsy10 +\font\tenit = docit +\font\tenbf = docbf +\font\tentt = doctt +\font\tensf = docrm +\font\elvrm = docrm +\font\elvmi = docmi +\font\elvsy = cmsy10 +\font\elvit = docit +\font\elvbf = docbf +\font\elvtt = doctt +\font\elvsf = docrm +\font\twlrm = docrm +\font\twlmi = docmi +\font\twlsy = cmsy10 +\font\twlit = docit +\font\twlbf = docbf +\font\twltt = doctt +\font\twlsf = docrm +\font\frtnrm = docrm +\font\frtnmi = docmi +\font\frtnsy = cmsy10 +\font\frtnit = docit +\font\frtnbf = docbf +\font\frtntt = doctt +\font\frtnsf = docrm +\font\svtnrm = docrm +\font\svtnmi = docmi +\font\svtnsy = cmsy10 +\font\svtnit = docit +\font\svtnbf = docbf +\font\svtntt = doctt +\font\svtnsf = docrm +\font\twtyrm = docrm +\font\twtymi = docmi +\font\twtysy = cmsy10 +\font\twtyit = docit +\font\twtybf = docbf +\font\twtytt = doctt +\font\twtysf = docrm +\font\twfvrm = docrm +\font\twfvmi = docmi +\font\twfvsy = cmsy10 +\font\twfvit = docit +\font\twfvbf = docbf +\font\twfvtt = doctt +\font\twfvsf = docrm + +\rm + +% Dimensions + +\hsize 78 em % 78 characters per line so fit any screen +\textwidth 78 em +\raggedright % Do not try to align on the right +\parindent=2em % Two blanks for paragraph indentation +\def\enspace{\kern 1em} \def\enskip{\hskip 1em\relax} + +% Vertical skips may best be multiples of \baselineskip +\baselineskip=12pt % 6 lines per inch +\normalbaselineskip=\baselineskip +\vsize 58\baselineskip % 58 lines per page +\textheight 58\baselineskip +\voffset=0pt +\parskip=0pt +\smallskipamount=0pt +\medskipamount= \baselineskip +\bigskipamount=2\baselineskip +\raggedbottom % do not try to align the page bottom + +% By default itemize is done with bullets, which don't look good. + +\def\labelitemi{-} +\def\labelitemii{-} +\def\labelitemiii{-} +\def\labelitemiv{-} + +% Fix up table of contents. Default latex uses fractional spacing between +% the section number and title. This comes out as no space in the doc file +% so we add a space to numberline, and expand tempdima by one em to allow +% for it. Also, go out of math mode for the dot in the leader. Dots in +% math mode turn out to be colons! +% +\def\@dottedtocline#1#2#3#4#5{\ifnum #1>\c@tocdepth \else + \vskip \z@ plus .2pt + {\hangindent #2\relax \rightskip \@tocrmarg \parfillskip -\rightskip + \parindent #2\relax\@afterindenttrue + \interlinepenalty\@M + \leavevmode + \@tempdima #3\relax + \addtolength\@tempdima{1em} + #4\nobreak\leaders\hbox to 2em{\hss.\hss}\hfill \nobreak \hbox to\@pnumwidth{\hfil\rm #5}\par}\fi} +\def\numberline#1{\advance\hangindent\@tempdima \hbox to\@tempdima{#1\hfil}\ } +% +% Can't really do superscripts, so do footnotes with [] +% +\def\@makefnmark{\hbox{(\@thefnmark)}} +\long\def\@makefntext#1{\parindent 1em\noindent + \hbox to 3em{\hss\@thefnmark.}\ #1} +\skip\footins 24pt plus 4pt minus 2pt +\def\footnoterule{\kern-12\p@ +\hbox to .4\columnwidth{\leaders\hbox{-}\hfill}} +% +% \arrayrulewidth 1em \doublerulesep 1em +% +% Some fairly obvious hacks. No odd/even pages in doc files. Can't do the +% fancy TeX symbols. +% +\oddsidemargin 0pt \evensidemargin 0pt +\def\TeX{TeX} +\def\LaTeX{LaTeX} +\def\SliTeX{SliTeX} +\def\BibTeX{BibTeX} +% +% special versions of stuff from xxx10.sty, since only one font size +% +\def\@normalsize{\@setsize\normalsize{12pt}\xpt\@xpt +\abovedisplayskip 12pt +\belowdisplayskip 12pt +\abovedisplayshortskip 12pt +\belowdisplayshortskip 12pt +\let\@listi\@listI} % Setting of \@listi added 9 Jun 87 +\let\small\@normalsize +\let\footnotesize\@normalsize +\normalsize +\footnotesep 12pt +\labelsep 10pt +\def\@listI{\leftmargin\leftmargini \parsep 12pt% +\topsep 12pt% +\partopsep 0pt% +\itemsep 0pt} +\let\@listi\@listI +\let\@listii\@listI +\let\@listiii\@listI +\let\@listiv\@listI +\let\@listv\@listI +\let\@listvi\@listI +\@listI + +% We had sort of random numbers of blank lines around section numbers. +% Turns out they used various fractional spacing. Rather than depend +% upon the definition of startsection, just wrap something around it +% that normalizes the arguments to 12pt. Negative args have special +% meanings. +\let\@oldstartsec\@startsection +\def\@startsection#1#2#3#4#5#6{ + \@tempskipa #4\relax + \@tempskipb #5\relax + \ifdim \@tempskipa <\z@ \@tempskipa -12pt \else \@tempskipa 12pt \fi + \ifdim \@tempskipb >\z@ \@tempskipb 12pt \fi +\@oldstartsec{#1}{#2}{#3}{\@tempskipa}{\@tempskipb}{#6} +} + +% To get even spacing in the table of contents + +\def\@pnumwidth{3em} + +\def\l@part#1#2{\addpenalty{-\@highpenalty}% + \addvspace{12pt}% space above part line + \begingroup + \@tempdima 3em + \parindent \z@ \rightskip \@pnumwidth + \parfillskip -\@pnumwidth + {\large \bf + \leavevmode + #1\hfil \hbox to\@pnumwidth{\hss #2}}\par + \nobreak + \global\@nobreaktrue + \everypar{\global\@nobreakfalse\everypar{}}%% suggested by + + \endgroup} + +\def\l@chapter#1#2{\addpenalty{-\@highpenalty}% + \vskip 12pt + \@tempdima 2em + \begingroup + \parindent \z@ \rightskip \@pnumwidth + \parfillskip -\@pnumwidth + \bf + \leavevmode + \advance\leftskip\@tempdima + \hskip -\leftskip + #1\nobreak\hfil \nobreak\hbox to\@pnumwidth{\hss #2}\par + \penalty\@highpenalty + \endgroup} + +\def\l@section{\@dottedtocline{1}{2em}{3em}} +\def\l@subsection{\@dottedtocline{2}{4em}{3em}} +\def\l@subsubsection{\@dottedtocline{3}{7em}{4em}} +\def\l@paragraph{\@dottedtocline{4}{10em}{5em}} +\def\l@subparagraph{\@dottedtocline{5}{12em}{6em}} + diff --git a/manual/styles/scroll.sty b/manual/styles/scroll.sty new file mode 100644 index 0000000000..a344b03dce --- /dev/null +++ b/manual/styles/scroll.sty @@ -0,0 +1,5 @@ +% Modification to plaintext.sty to suppress page headings +% and make pages contiguous when processed with dvi2txt + +\pagestyle{empty} +\advance\voffset by -2\baselineskip diff --git a/manual/styles/syntaxdef.hva b/manual/styles/syntaxdef.hva new file mode 100644 index 0000000000..39dbff5fc1 --- /dev/null +++ b/manual/styles/syntaxdef.hva @@ -0,0 +1,157 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Hevea code for syntax definitions of the ocaml manual % +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Important commands +% \token, for typesetting grammar terminals +% \nonterm, for typesetting grammar non-terminals +% +% Beware: \nonterm introduces either a local anchor or a local reference +% -Anchors are introduced when \nonterm occurs in the first column of +% syntax definitions (environment 'syntax') +% - References are introduced everywhere else +% +% For pure typesetting effect without links (eg. to typeset 'e' as 'expr') +% use the \nt command (eg. \nt{e}). +% In syntax definitions, the tool 'transf' translates @word@ into \nt{word}. +% +% Warnings are produced +% - For references to non-defined non terminals +% - For multiple definitions of the same non-terminal +% Warnings can be avoided for a given non-terminal 'expr' by issuing +% the command \stx@silent{'expr'} +% +%It is also possible to alias a nonterminal: +%\stx@alias{name}{othername} +%will make reference to 'name' point to the definition of non-terminal +%'othername' +\newif\ifspace +\def\addspace{\ifspace\;\spacefalse\fi} +\ifhtml +\newcommand{\token}[1]{\texttt{\blue#1}} +\else +\newcommand{\token}[1]{\texttt{#1}} +\fi +%%% warnings +\def\stx@warning#1#2{\@ifundefined{stx@#1@silent}{\hva@warn{#2}}{}} +\def\stx@silent#1{\def\csname stx@#1@silent\endcsname{}} +%%% Do not warn about those +%initial example +\stx@silent{like}\stx@silent{that}% +%Not defined +\stx@silent{regular-char}% +\stx@silent{regular-string-char}% +%\stx@silent{regular-char-str}% +\stx@silent{lowercase-ident}% +\stx@silent{capitalized-ident}% +\stx@silent{space}% +\stx@silent{tab}% +\stx@silent{newline}% +%Used in many places +\stx@silent{prefix}% +\stx@silent{name}% +\stx@silent{xname}% +%Not defined +\stx@silent{external-declaration}% +\stx@silent{unit-name}% +%%Redefined in exten.etex +\stx@silent{parameter}% +\stx@silent{pattern}% +\stx@silent{constr-decl}% +\stx@silent{type-param}% +\stx@silent{let-binding}% +\stx@silent{expr}% +\stx@silent{typexpr}% +\stx@silent{module-expr}% +\stx@silent{type-representation}% +\stx@silent{definition}% +\stx@silent{specification}% +\stx@silent{type-equation}% +\stx@silent{class-field}% +\stx@silent{mod-constraint}% +\stx@silent{module-type}% +\stx@silent{constant}% +%%Redefined in names.etex +\stx@silent{label-name}% +%%Not really defined in lexyacc.etex +\stx@silent{character-set}% +\stx@silent{symbol}% +%%Not defined in debugger.etex +\stx@silent{integer} +%%Not defined in ocamldoc.etex +\stx@silent{string} +\stx@silent{id} +\stx@silent{Exc} +\stx@silent{URL} +%%%%%%%%%%%%% +%% Aliases %% +%%%%%%%%%%%%% +\newcommand{\stx@alias}[2]{\def\csname stx@#1@alias\endcsname{#2}} +\stx@alias{typ}{typexpr}% +\stx@alias{met}{method-name}% +\stx@alias{tag}{tag-name}% +\stx@alias{lab}{label-name}% +\stx@alias{C}{constr-name} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%special anchor +\newstyle{a.syntax:link}{color:maroon;text-decoration:underline} +\newstyle{a.syntax:visited}{color:maroon;text-decoration:underline} +\newstyle{a.syntax:hover}{color:black;text-decoration:none;background-color:\#FF6060} +%compatibility for hevea-1.1?/heeva-2.?? +\ifu\@tr@url +\providecommand{\@tr@url}[1]{#1}\def\stx@id{NAME}\else +\def\stx@id{id}\fi +\newcommand{\@syntaxlocref}[2] +{\@aelement{href="\@print{#}\@tr@url{#1}" class="syntax"}{#2}} +\newcommand{\@syntaxaname}[2] +{\@aelement{\stx@id="#1" class="syntax"}{#2}} +%%Refer to anchor, internal : +%#1 -> anchor #2 -> visible tag +\def\@ref@anchor#1#2{% +\@ifundefined{stx@#1@exists} +{\stx@warning{#1}{Undefined non-terminal: '#1'}#2} +{\@syntaxlocref{#1}{#2}}} +%%Refer to anchor +\def\ref@anchor#1{% +\ifu\csname stx@#1@alias\endcsname +\@ref@anchor{#1}{#1}\else +\@ref@anchor{\csname stx@#1@alias\endcsname}{#1}\fi} +\def\stx@exists#1{\def\csname stx@#1@exists\endcsname{}} +%%Define anachor +\def\def@anchor#1{% +\@ifundefined{stx@#1} +{{\@nostyle\@auxdowrite{\string\stx@exists\{#1\}}}% +\gdef\csname stx@#1\endcsname{}\@syntaxaname{#1}{#1}} +{\@ifundefined{stx@#1@silent} +{\hva@warn{Redefinition of non-terminal '#1'}#1} +{\ref@anchor{#1}}}} +%%%Change \@anchor and initial definition, for html only, of course! +\ifhtml +\def\set@name{\let\@anchor\def@anchor} +\let\@anchor\ref@anchor +\else +\def\set@name{} +\def\@anchor{} +\fi +%%%Format non-terminal +\def\nt#1{\textit{\maroon#1}} +%%%Link for non-terminal and format +\def\nonterm#1{\addspace\nt{\@anchor{#1}}\spacetrue} +\def\brepet{\addspace\{} +\def\erepet{\}} +\def\boption{\addspace[} +\def\eoption{]} +\def\brepets{\addspace\{} +\def\erepets{\}^+} +\def\bparen{\addspace(} +\def\eparen{)} +\def\orelse{\mid \spacefalse} +\def\is{ & ::= & \spacefalse } +\def\alt{ \\ & \mid & \spacefalse } +\def\sep{ \\ \\ \spacefalse } +\def\cutline{} +\def\emptystring{\epsilon} +\def\syntax{$$\begin{array}{>{\set@name}rcl}\spacefalse} +\def\endsyntax{\end{array}$$} +\def\syntaxleft{$\begin{array}{>{\set@name}rcl}\spacefalse} +\def\endsyntaxleft{\end{array}$} +\def\synt#1{$\spacefalse#1$} diff --git a/manual/styles/syntaxdef.sty b/manual/styles/syntaxdef.sty new file mode 100644 index 0000000000..1db6f5bf61 --- /dev/null +++ b/manual/styles/syntaxdef.sty @@ -0,0 +1,26 @@ +\newif\ifspace +\def\addspace{\ifspace \; \spacefalse \fi} +\def\token#1{\addspace\hbox{\tt #1} \spacetrue} +\def\nonterm#1{\addspace\nt{#1} \spacetrue} +\def\nt#1{\hbox{\sl #1\/}} +\def\brepet{\addspace\{} +\def\erepet{\}} +\def\boption{\addspace[} +\def\eoption{]} +\def\brepets{\addspace\{} +\def\erepets{\}^+} +\def\bparen{\addspace(} +\def\eparen{)} +\def\orelse{\mid \spacefalse} +\def\is{ & ::= & \spacefalse } +\def\alt{ \\ & \mid & \spacefalse } +\def\cutline{ \\ & & \spacefalse } +\def\sep{ \\[2mm] \spacefalse } +\def\emptystring{\epsilon} +\def\syntax{$$\begin{array}{rrl}\spacefalse} +\def\endsyntax{\end{array}$$} +\def\syntaxleft{$\begin{array}{rrl}\spacefalse} +\def\endsyntaxleft{\end{array}$} +\let\oldldots=\ldots +\def\ldots{\spacefalse\oldldots} +\def\synt#1{$\spacefalse#1$} diff --git a/manual/styles/syntaxdeftxt.sty b/manual/styles/syntaxdeftxt.sty new file mode 100644 index 0000000000..370b6580d4 --- /dev/null +++ b/manual/styles/syntaxdeftxt.sty @@ -0,0 +1,22 @@ +\newif\ifspace +\def\addspace{\ifspace\ \spacefalse\fi} +\def\token#1{\addspace\hbox{\tt #1}\spacetrue\ignorespaces} +%%% \def\nonterm#1{\addspace\hbox{\tt <#1>}\spacetrue\ignorespaces} +\def\nonterm#1{\addspace\hbox{\it #1}\spacetrue\ignorespaces} +\def\brepet{\addspace\hbox to1em{$\{$\hfil}\ignorespaces} +\def\erepet{\hbox to1em{$\}$\hfil}\ignorespaces} +\def\boption{\addspace[\ignorespaces} +\def\eoption{]\ignorespaces} +\def\brepets{\brepet\ignorespaces} +\def\erepets{\erepet+\ignorespaces} +\def\bparen{\addspace(\ignorespaces} +\def\eparen{)\ignorespaces} +\def\orelse{~\hbox to1em{$|$\hfil}~\spacefalse\ignorespaces} +\def\is{& ::= & \spacefalse\ignorespaces} +\def\alt{\\ & \hbox to1em{$|$\hfil} & \spacefalse } +\def\sep{\\[\baselineskip] \spacefalse} +\def\emptystring{nothing} +\def\syntax{\begin{center}\begin{tabular}{rrl}\spacefalse\ignorespaces} +\def\endsyntax{\end{tabular}\end{center}} +\def\ldots{\spacefalse...\ignorespaces} +\def\synt#1{$\spacefalse#1$} diff --git a/manual/tools/.gitignore b/manual/tools/.gitignore new file mode 100644 index 0000000000..db7f8368b1 --- /dev/null +++ b/manual/tools/.gitignore @@ -0,0 +1,12 @@ +transf.ml +texquote2 +htmltransf.ml +transf +htmlgen +htmlquote +latexscan.ml +dvi2txt +caml-tex2 +*.dSYM +*.cm[io] +*.o diff --git a/manual/tools/.ignore b/manual/tools/.ignore new file mode 100644 index 0000000000..12c72e4a58 --- /dev/null +++ b/manual/tools/.ignore @@ -0,0 +1,11 @@ +transf.ml +texquote2 +htmltransf.ml +transf +htmlgen +htmlquote +latexscan.ml +dvi2txt +caml-tex2 +*.dSYM +*.cm[io] diff --git a/manual/tools/Makefile b/manual/tools/Makefile new file mode 100644 index 0000000000..c46a77f2e7 --- /dev/null +++ b/manual/tools/Makefile @@ -0,0 +1,51 @@ +CFLAGS=-g -O + +all: texquote2 transf htmlquote htmlgen dvi2txt caml-tex2 + +dvi2txt: + cd dvi_to_txt; ${MAKE} + +transf: transf.cmo htmltransf.cmo transfmain.cmo + ocamlc -o transf -g transf.cmo htmltransf.cmo transfmain.cmo + +transf.ml: transf.mll + ocamllex transf.mll + +htmltransf.ml: htmltransf.mll + ocamllex htmltransf.mll + +htmlgen: latexmacros.cmo latexscan.cmo latexmain.cmo + ocamlc -o htmlgen -g latexmacros.cmo latexscan.cmo latexmain.cmo + +latexscan.ml: latexscan.mll + ocamllex latexscan.mll + +caml-tex2: caml_tex2.cmo + ocamlc -o caml-tex2 str.cma unix.cma caml_tex2.cmo +# ocamlc -custom -o caml-tex2 str.cma unix.cma caml-tex2.cmo \ +# -cclib -lunix -cclib -lstr + +.SUFFIXES: +.SUFFIXES: .ml .cmo .mli .cmi .c + +.ml.cmo: + ocamlc -c $< + +.mli.cmi: + ocamlc -c $< + +.c: + $(CC) $(CFLAGS) -o $@ $< + +clean: + rm -f transf.ml latexscan.ml htmltransf.ml + rm -f texquote2 transf htmlquote htmlgen dvi2txt + rm -f transf.ml latex.ml + rm -f *.o *.cm? *.cmxa + rm -f *~ #*# + cd dvi_to_txt; ${MAKE} clean + +latexmacros.cmo: latexmacros.cmi +latexmain.cmo: latexscan.cmo +latexscan.cmo: latexmacros.cmi +transfmain.cmo: transf.cmo htmltransf.cmo diff --git a/manual/tools/caml-tex b/manual/tools/caml-tex new file mode 100755 index 0000000000..594fde500f --- /dev/null +++ b/manual/tools/caml-tex @@ -0,0 +1,131 @@ +#!/usr/bin/perl + +$camllight = "TERM=dumb ocaml"; +$camlbegin = "\\caml\n"; +$camlend = "\\endcaml\n"; +$camlin = "\\?"; +$camlout = "\\:"; +$camlblank = "\\;\n"; + +$linelen = 72; +$output = ""; +$cut_at_blanks = 0; + +while ($#ARGV >= 0) { + $_ = $ARGV[0]; + last unless (/^-/); + $linelen = $ARGV[1], shift, shift, next if (/^-n$/); + $output = $ARGV[1], shift, shift, next if (/^-o$/); + $camllight = $ARGV[1], shift, shift, next if (/^-caml$/); + $cut_at_blanks = 1, shift, next if (/^-w$/); + printf STDERR ("Unknown option '%s', ignored\n", $_); + shift; +} + +# First pass: extract the Caml phrases to evaluate + +open(ML, "> .input.ml") || die("Cannot create .input.ml : $!"); + +foreach $infile (@ARGV) { + open(IN, $infile) || die("Cannot open $infile : $!"); + while(<IN>) { + if (m/^\\begin{caml_(example|example\*|eval)}\s*$/) { + while(<IN>) { + last if m/^\\end{caml_(example|example\*|eval)}\s*$/; + print ML $_; + } + } + } + close(IN); +} + +close(ML); + +# Feed the phrases to a Caml toplevel + +open(TOPLEVEL, "$camllight 2>&1 < .input.ml |") || + die("Cannot start camllight : $!"); + +<TOPLEVEL>; <TOPLEVEL>; # skip the banner +$lastread = <TOPLEVEL>; +$lastread =~ s/^# //; + +# Second pass: shuffle the TeX source and the output of the toplevel + +if ($output) { + if ($output eq "-") { + open(OUT, ">&STDOUT"); + } else { + open(OUT, ">$output") || die("Cannot create $output: $!"); + } +} + +foreach $infile (@ARGV) { + open(IN, $infile) || die("Cannot open $infile: $!"); + if (! $output) { + $outfile = $infile; + $outfile =~ s/\.tex$//; + open(OUT, "> $outfile.ml.tex") || die("Cannot create $outfile.ml.tex: $!"); + } + while(<IN>) { + if (m/^\\begin{caml_example(\*?)}\s*$/) { + $omit_answer = $1; # true if caml_example*, false if caml_example + print OUT $camlbegin; + $severalphrases = 0; + while(<IN>) { + last if m/\\end{caml_example\*?}\s*$/; + print OUT $camlblank if ($severalphrases); + while(1) { + s/\\/\\\\/g; + print OUT $camlin, $_; + last if m/;; *$/; + $_ = <IN>; + } + while ($lastread =~ s/^ //) { } + while($lastread) { + last if $lastread =~ s/^# //; + print STDERR $lastread; + if (! $omit_answer) { + while (length($lastread) > $linelen) { + if ($cut_at_blanks) { + $cutpos = rindex($lastread, ' ', $linelen); + if ($cutpos == -1) { $cutpos = $linelen; } else { $cutpos++; } + } else { + $cutpos = $linelen; + } + $line = substr($lastread, 0, $cutpos); + $line =~ s/\\/\\\\/g; + print OUT $camlout, $line, "\n"; + $lastread = substr($lastread, $cutpos, + length($lastread) - $cutpos); + } + $lastread =~ s/\\/\\\\/g; + print OUT $camlout, $lastread; + } + $lastread = <TOPLEVEL>; + } + $severalphrases = 1; + } + print OUT $camlend; + } + elsif (m/^\\begin{caml_eval}\s*$/) { + while(<IN>) { + last if m/^\\end{caml_eval}\s*$/; + if (m/;; *$/) { + while ($lastread =~ s/^ //) { } + while($lastread) { + last if $lastread =~ s/^#//; + print STDERR $lastread; + $lastread = <TOPLEVEL>; + } + } + } + } + else { + print OUT $_; + } + } + close(IN); +} + +close(TOPLEVEL); diff --git a/manual/tools/caml_tex2.ml b/manual/tools/caml_tex2.ml new file mode 100644 index 0000000000..d2f6ceaea9 --- /dev/null +++ b/manual/tools/caml_tex2.ml @@ -0,0 +1,165 @@ +(* $Id$ *) + +open StdLabels +open Printf +open Str + +let camlbegin = "\\caml\n" +let camlend = "\\endcaml\n" +let camlin = "\\\\?\\1" +let camlout = "\\\\:\\1" +let camlbunderline = "\\<" +let camleunderline = "\\>" + +let camllight = ref "TERM=norepeat ocaml" +let linelen = ref 72 +let outfile = ref "" +let cut_at_blanks = ref false +let files = ref [] + +let _ = + Arg.parse ["-n", Arg.Int (fun n -> linelen := n), "line length"; + "-o", Arg.String (fun s -> outfile := s), "output"; + "-caml", Arg.String (fun s -> camllight := s), "toplevel"; + "-w", Arg.Set cut_at_blanks, "cut at blanks"] + (fun s -> files := s :: !files) + "caml-tex2: " + +let (~!) = + let memo = ref [] in + fun key -> + try List.assq key !memo + with Not_found -> + let data = Str.regexp key in + memo := (key, data) :: !memo; + data + +let caml_input, caml_output = + let cmd = !camllight ^ " 2>&1" in + try Unix.open_process cmd with _ -> failwith "Cannot start toplevel" +let () = + at_exit (fun () -> ignore (Unix.close_process (caml_input, caml_output))); + ignore (input_line caml_input); + ignore (input_line caml_input) + +let read_output () = + let input = ref (input_line caml_input) in + input := replace_first ~!"^# *" "" !input; + let underline = + if string_match ~!"Characters *\\([0-9]+\\)-\\([0-9]+\\):$" !input 0 + then + let b = int_of_string (matched_group 1 !input) + and e = int_of_string (matched_group 2 !input) in + input := input_line caml_input; + b, e + else 0, 0 + in + let output = Buffer.create 256 in + while not (string_match ~!".*\"end_of_input\"$" !input 0) do + prerr_endline !input; + Buffer.add_string output !input; + Buffer.add_char output '\n'; + input := input_line caml_input; + done; + Buffer.contents output, underline + +let escape_specials s = + let s1 = global_replace ~!"\\\\" "\\\\\\\\" s in + let s2 = global_replace ~!"'" "\\\\textquotesingle\\\\-" s1 in + let s3 = global_replace ~!"`" "\\\\textasciigrave\\\\-" s2 in + s3 + +let process_file file = + prerr_endline ("Processing " ^ file); + let ic = try open_in file with _ -> failwith "Cannot read input file" in + let oc = + try if !outfile = "-" then + stdout + else if !outfile = "" then + open_out (replace_first ~!"\\.tex$" "" file ^ ".ml.tex") + else + open_out_gen [Open_wronly; Open_creat; Open_append; Open_text] + 0x666 !outfile + with _ -> failwith "Cannot open output file" in + try while true do + let input = ref (input_line ic) in + if string_match ~!"\\\\begin{caml_example\\(\\*?\\)}[ \t]*$" + !input 0 + then begin + let omit_answer = matched_group 1 !input = "*" in + output_string oc camlbegin; + let first = ref true in + let read_phrase () = + let phrase = Buffer.create 256 in + while + let input = input_line ic in + if string_match ~!"\\\\end{caml_example\\*?}[ \t]*$" + input 0 + then raise End_of_file; + if Buffer.length phrase > 0 then Buffer.add_char phrase '\n'; + Buffer.add_string phrase input; + not (string_match ~!".*;;[ \t]*$" input 0) + do + () + done; + Buffer.contents phrase + in + try while true do + let phrase = read_phrase () in + fprintf caml_output "%s\n" phrase; + flush caml_output; + output_string caml_output "\"end_of_input\";;\n"; + flush caml_output; + let output, (b, e) = read_output () in + let phrase = + if b < e then begin + let start = String.sub phrase ~pos:0 ~len:b + and underlined = String.sub phrase ~pos:b ~len:(e-b) + and rest = + String.sub phrase ~pos:e ~len:(String.length phrase - e) + in + String.concat "" + [escape_specials start; "\\<"; + escape_specials underlined; "\\>"; + escape_specials rest] + end else + escape_specials phrase in + (* Special characters may also appear in output strings -Didier *) + let output = escape_specials output in + let phrase = global_replace ~!"^\\(.\\)" camlin phrase + and output = global_replace ~!"^\\(.\\)" camlout output in + if not !first then output_string oc "\\;\n"; + fprintf oc "%s\n" phrase; + if not omit_answer then fprintf oc "%s" output; + flush oc; + first := false + done + with End_of_file -> output_string oc camlend + end + else if string_match ~!"\\\\begin{caml_eval}[ \t]*$" !input 0 + then begin + while input := input_line ic; + not (string_match ~!"\\\\end{caml_eval}[ \t]*$" !input 0) + do + fprintf caml_output "%s\n" !input; + if string_match ~!".*;;[ \t]*$" !input 0 then begin + flush caml_output; + output_string caml_output "\"end_of_input\";;\n"; + flush caml_output; + ignore (read_output ()) + end + done + end else begin + fprintf oc "%s\n" !input; + flush oc + end + done with + End_of_file -> close_in ic; close_out oc + +let _ = + if !outfile <> "-" && !outfile <> "" then begin + try close_out (open_out !outfile) + with _ -> failwith "Cannot open output file" + end; + List.iter process_file (List.rev !files) + diff --git a/manual/tools/dvi_to_txt/Makefile b/manual/tools/dvi_to_txt/Makefile new file mode 100644 index 0000000000..852996a991 --- /dev/null +++ b/manual/tools/dvi_to_txt/Makefile @@ -0,0 +1,8 @@ +OBJS=io.o interp.o output.o main.o print.o print_rtf.o print_styl.o +CFLAGS=-g + +../dvi2txt: $(OBJS) + $(CC) $(CFLAGS) -o ../dvi2txt $(OBJS) + +clean: + rm -f ../dvi2txt *.o *~ #*# diff --git a/manual/tools/dvi_to_txt/dvi.h b/manual/tools/dvi_to_txt/dvi.h new file mode 100644 index 0000000000..8dfb25dcbe --- /dev/null +++ b/manual/tools/dvi_to_txt/dvi.h @@ -0,0 +1,8 @@ +enum { + SET_CHAR_0=0, SET_CHAR_127=127, SET1=128, SET2, SET3, SET4, SET_RULE, + PUT1, PUT2, PUT3, PUT4, PUT_RULE, NOP, BOP, EOP, PUSH, POP, RIGHT1, + RIGHT2, RIGHT3, RIGHT4, W0, W1, W2, W3, W4, X0, X1, X2, X3, X4, DOWN1, + DOWN2, DOWN3, DOWN4, Y0, Y1, Y2, Y3, Y4, Z0, Z1, Z2, Z3, Z4, + FNT_NUM_0=171, FNT_NUM_63=234, FNT1=235, FNT2, FNT3, FNT4, XXX1, XXX2, + XXX3, XXX4, FNT_DEF1, FNT_DEF2, FNT_DEF3, FNT_DEF4, PRE, POST, POST_POST +}; diff --git a/manual/tools/dvi_to_txt/interp.c b/manual/tools/dvi_to_txt/interp.c new file mode 100644 index 0000000000..e50aed36c0 --- /dev/null +++ b/manual/tools/dvi_to_txt/interp.c @@ -0,0 +1,305 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include "io.h" +#include "dvi.h" +#include "output.h" + +#define SEEK_CUR 1 + +int h, v, w, x, y, z, sp; +int currfont; +int encoding; +int style; + +#define FONT_NAME_SIZE 31 +#define NUM_FONTS 256 + +struct { + char name[FONT_NAME_SIZE+1]; + int encoding; + int style; +} font[NUM_FONTS]; + +#define TYPEWRITER 0 +#define ROMAN 1 +#define MATH_ITALIC 2 +#define MATH_SYMBOL 3 +#define MATH_EXTENSION 4 +#define LINE_SEGMENTS 5 +#define CIRCLE_SEGMENTS 6 +#define LATEX_SYMBOLS 7 + +char * transcode[] = { +/* 0.......+.......1.......+.......2.......+.......3.......+.......4.......+.......5.......+.......6.......+.......7.......+....... */ +/* TYPEWRITER */ + "GDTLXPSUPYO##################### !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~#", +/* ROMAN */ + "GDTLXPSUPYO***** 0'!\"#$%&'()*+,-./0123456789:;!=??@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\"]^.`abcdefghijklmnopqrstuvwxyz--\"~#", +/* MATH_ITALIC */ + "GDTLXPSUPYOabgdezhtiklmnxpystupxyoeuorsp----`'><0123456789.,</>*dABCDEFGHIJKLMNOPQRSTUVWXYZ#####labcdefghijklmnopqrstuvwxyzij###", +/* MATH_SYMBOL */ + "-.x*/###+-x/.ooo==##<><>==##<><><>||####<>||####'#####/|###0RIT##ABCDEFGHIJKLMNOPQRSTUVWXYZ###########{}<>||||\\|################", +/* MATH_EXTENSION */ + "()[]||||{}<>||##()[]||||{}<>||##()[]||||{}<>||##########################################################[]||||{}################", +/* LINE_SEGMENTS */ + "/||||| _ / / _/ // _ / / __// / _ / \\||||| \\ \\ \\ _\\ \\\\ _ \\ \\ __\\\\ \\ _ \\ ", +/* CIRCLE_SEGMENTS */ + " ", +/* LATEX_SYMBOLS */ + " <<>> U#O0 ~~[] " +}; + +#define STACK_SIZE 100 + +struct { int sh, sv, sw, sx, sy, sz; } stack[STACK_SIZE]; + +struct known_font_struct { + char * prefix; + int encoding, style; +} known_fonts[] = { + "docrm", ROMAN, PLAIN, + "doctt", TYPEWRITER, MONOSPACED, + "docit", ROMAN, ITALICS, + "docbf", ROMAN, BOLD, + "docmi", MATH_ITALIC, PLAIN, + "cmsy", MATH_SYMBOL, PLAIN, + "cmex", MATH_EXTENSION, PLAIN, + "line", LINE_SEGMENTS, PLAIN, + "lcircle", CIRCLE_SEGMENTS, PLAIN, + "lasy", LATEX_SYMBOLS, PLAIN +}; + +void fontdef(input, fontnum) + FILE * input; + int fontnum; +{ + int a, l, i; + + assert(fontnum >= 0 && fontnum < NUM_FONTS); + fseek(input, 12, SEEK_CUR); /* skip c, s and d parameters */ + a = get8u(input); + l = get8u(input); + assert(l < FONT_NAME_SIZE); + fseek(input, a, SEEK_CUR); /* skip the "area" part */ + fread(font[fontnum].name, 1, l, input); /* read the font name */ + font[fontnum].name[l] = 0; + for (i = 0; + i < sizeof(known_fonts) / sizeof(struct known_font_struct); + i++) { + if (strncmp(font[fontnum].name, known_fonts[i].prefix, + strlen(known_fonts[i].prefix)) == 0) { + font[fontnum].encoding = known_fonts[i].encoding; + font[fontnum].style = known_fonts[i].style; + return; + } + } + fprintf(stderr, "Warning: unknown font `%s'\n", font[fontnum].name); + font[fontnum].encoding = ROMAN; + font[fontnum].style = PLAIN; +} + +void setfont(fontnum) + int fontnum; +{ + currfont = fontnum; + encoding = font[fontnum].encoding; + style = font[fontnum].style; +} + +int outchar(c) + int c; +{ + if (c < 0 || c > 127) + out(h, v, '#', PLAIN); + else + out(h, v, transcode[encoding][c], style); + return scalex; +} + +void outrule(height, width) + int height, width; +{ + char c; + int dx, dy; + + if (height <= 0 || width <= 0) return; + c = height >= width ? '|' : '-'; + dy = 0; + do { + dx = 0; + do { + out(h + dx, v - dy, c, PLAIN); + dx += scalex; + } while (dx <= width); + dy += scaley; + } while (dy < height); +} + +void interprete(input) + FILE * input; +{ + int c, n, height, width, mag; + + sp = 0; + c = get8u(input); + n = get8u(input); + if (c != PRE || n != 2) { + fprintf(stderr, "File does not start with DVI preamble.\n"); + exit(2); + } + (void) get32s(input); + (void) get32s(input); + mag = get32s(input); + scalex = SCALEX * mag / 1000; + scaley = SCALEY * mag / 1000; + n = get8u(input); + fseek(input, n, SEEK_CUR); /* skip comment */ + + begin_document(); + + while (1) { + c = get8u(input); + if (c >= SET_CHAR_0 && c <= SET_CHAR_127) + h += outchar(c); + else if (c >= FNT_NUM_0 && c <= FNT_NUM_63) + setfont(c - FNT_NUM_0); + else switch(c) { + case SET1: + h += outchar(get8u(input)); break; + case SET2: + h += outchar(get16u(input)); break; + case SET3: + h += outchar(get24u(input)); break; + case SET4: + h += outchar(get32s(input)); break; + case SET_RULE: + height = get32s(input); + width = get32s(input); + outrule(height, width); + h += width; + break; + case PUT1: + (void) outchar(get8u(input)); break; + case PUT2: + (void) outchar(get16u(input)); break; + case PUT3: + (void) outchar(get24u(input)); break; + case PUT4: + (void) outchar(get32s(input)); break; + case PUT_RULE: + height = get32s(input); + width = get32s(input); + outrule(height, width); + break; + case NOP: + break; + case BOP: + clear_page(); + h = v = w = x = y = z = 0; + sp = 0; + fseek(input, 44, SEEK_CUR); /* skip c0...c9 and ptr to previous page */ + break; + case EOP: + output_page(); + break; + case PUSH: + assert(sp < STACK_SIZE); + stack[sp].sh = h; stack[sp].sv = v; stack[sp].sw = w; + stack[sp].sx = x; stack[sp].sy = y; stack[sp].sz = z; + sp++; + break; + case POP: + assert(sp > 0); + sp--; + h = stack[sp].sh; v = stack[sp].sv; w = stack[sp].sw; + x = stack[sp].sx; y = stack[sp].sy; z = stack[sp].sz; + break; + case RIGHT1: + h += get8s(input); break; + case RIGHT2: + h += get16s(input); break; + case RIGHT3: + h += get24s(input); break; + case RIGHT4: + h += get32s(input); break; + case W0: + h += w; break; + case W1: + w = get8s(input); h += w; break; + case W2: + w = get16s(input); h += w; break; + case W3: + w = get24s(input); h += w; break; + case W4: + w = get32s(input); h += w; break; + case X0: + h += x; break; + case X1: + x = get8s(input); h += x; break; + case X2: + x = get16s(input); h += x; break; + case X3: + x = get24s(input); h += x; break; + case X4: + x = get32s(input); h += x; break; + case DOWN1: + v += get8s(input); break; + case DOWN2: + v += get16s(input); break; + case DOWN3: + v += get24s(input); break; + case DOWN4: + v += get32s(input); break; + case Y0: + v += y; break; + case Y1: + y = get8s(input); v += y; break; + case Y2: + y = get16s(input); v += y; break; + case Y3: + y = get24s(input); v += y; break; + case Y4: + y = get32s(input); v += y; break; + case Z0: + v += z; break; + case Z1: + z = get8s(input); v += z; break; + case Z2: + z = get16s(input); v += z; break; + case Z3: + z = get24s(input); v += z; break; + case Z4: + z = get32s(input); v += z; break; + case FNT1: + setfont(get8u(input)); break; + case FNT2: + setfont(get16u(input)); break; + case FNT3: + setfont(get24u(input)); break; + case FNT4: + setfont(get32s(input)); break; + case XXX1: + n = get8u(input); fseek(input, n, SEEK_CUR); break; + case XXX2: + n = get16u(input); fseek(input, n, SEEK_CUR); break; + case XXX3: + n = get24u(input); fseek(input, n, SEEK_CUR); break; + case XXX4: + n = get32s(input); fseek(input, n, SEEK_CUR); break; + case FNT_DEF1: + fontdef(input, get8u(input)); break; + case FNT_DEF2: + fontdef(input, get16u(input)); break; + case FNT_DEF3: + fontdef(input, get24u(input)); break; + case FNT_DEF4: + fontdef(input, get32s(input)); break; + case POST: + end_document(); return; + default: + assert(0); + } + } +} diff --git a/manual/tools/dvi_to_txt/io.c b/manual/tools/dvi_to_txt/io.c new file mode 100644 index 0000000000..5dfe1ccc55 --- /dev/null +++ b/manual/tools/dvi_to_txt/io.c @@ -0,0 +1,43 @@ +#include <stdio.h> +#include "io.h" + +int get16u(input) + FILE * input; +{ + int b1 = getc(input); + int b2 = getc(input); + return (b1 << 8) + b2; +} +int get16s(input) + FILE * input; +{ + int b1 = (schar) getc(input); + int b2 = getc(input); + return (b1 << 8) + b2; +} +int get24u(input) + FILE * input; +{ + int b1 = getc(input); + int b2 = getc(input); + int b3 = getc(input); + return (b1 << 16) + (b2 << 8) + b3; +} +int get24s(input) + FILE * input; +{ + int b1 = (schar) getc(input); + int b2 = getc(input); + int b3 = getc(input); + return (b1 << 16) + (b2 << 8) + b3; +} +int get32s(input) + FILE * input; +{ + int b1 = (schar) getc(input); + int b2 = getc(input); + int b3 = getc(input); + int b4 = getc(input); + return (b1 << 24) + (b2 << 16) + (b3 << 8) + b4; +} + diff --git a/manual/tools/dvi_to_txt/io.h b/manual/tools/dvi_to_txt/io.h new file mode 100644 index 0000000000..f9a800d40c --- /dev/null +++ b/manual/tools/dvi_to_txt/io.h @@ -0,0 +1,10 @@ +#ifdef __STDC__ +typedef signed char schar; +#else +typedef char schar; +#endif + +#define get8u(input) getc(input) +#define get8s(input) (schar) getc(input) + +int get16u(), get16s(), get24u(), get24s(), get32u(), get32s(); diff --git a/manual/tools/dvi_to_txt/main.c b/manual/tools/dvi_to_txt/main.c new file mode 100644 index 0000000000..0fb03c4341 --- /dev/null +++ b/manual/tools/dvi_to_txt/main.c @@ -0,0 +1,47 @@ +#include <stdio.h> +#include "output.h" + +void interprete(FILE *input); + +char * input_name; + +int main(argc, argv) + int argc; + char ** argv; +{ + FILE * f; + int i; + + output_device = OUTPUT_PLAIN; + standout_tt = 0; + for (i = 1; i < argc && argv[i][0] == '-'; i++) { + switch(argv[i][1]) { + case 'p': + output_device = OUTPUT_PRINTER; break; + case 'r': + output_device = OUTPUT_RTF; break; + case 's': + output_device = OUTPUT_STYL; break; + case 't': + standout_tt = 1; break; + default: + fprintf(stderr, "Unknown option `%s', ignored\n", argv[i]); + } + } + if (i >= argc) { + input_name = "unknown.dvi"; + interprete(stdin); + } else { + for (/*nothing*/; i < argc; i++) { + f = fopen(argv[i], "r"); + if (f == NULL) { + perror(argv[i]); + continue; + } + input_name = argv[i]; + interprete(f); + fclose(f); + } + } + return 0; +} diff --git a/manual/tools/dvi_to_txt/output.c b/manual/tools/dvi_to_txt/output.c new file mode 100644 index 0000000000..fc79ed8066 --- /dev/null +++ b/manual/tools/dvi_to_txt/output.c @@ -0,0 +1,209 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "output.h" + +void null(), print_FF(), plain_line(), printer_line(); +void begin_rtf_document(), end_rtf_document(), end_rtf_page(), rtf_line(); +void begin_styl_page(), end_styl_page(), styl_line(); + +struct output_device { + void (*begin_document)(); + void (*end_document)(); + void (*begin_page)(); + void (*end_page)(); + void (*line)(); +} device[] = { + null, null, null, print_FF, plain_line, + null, null, null, print_FF, printer_line, + begin_rtf_document, end_rtf_document, null, end_rtf_page, rtf_line, + null, null, begin_styl_page, end_styl_page, styl_line +}; + +#define SIZEX 160 + +struct line { + int ypos; + int len; + char * contents; + char * styles; + struct line * next_in_bucket; +}; + +#define NBUCKETS 101 + +struct line * screenlines[NBUCKETS]; + +int numlines; + +char * xmalloc(size) + int size; +{ + char * res = (char *) malloc(size); + if (res == NULL) { + fprintf(stderr, "Out of memory\n"); + exit(2); + } + return res; +} + +char * xrealloc(ptr, size) + char * ptr; + int size; +{ + char * res = (char *) realloc(ptr, size); + if (res == NULL) { + fprintf(stderr, "Out of memory\n"); + exit(2); + } + return res; +} + +void begin_document() +{ + device[output_device].begin_document(); +} + +void end_document() +{ + device[output_device].end_document(); +} + +void clear_page() +{ + int i; + + for (i = 0; i < NBUCKETS; i++) screenlines[i] = NULL; + numlines = 0; +} + +void out(x, y, c, style) + int x, y; + char c; + char style; +{ + unsigned int h; + struct line * line; + + h = ((unsigned int) y) % NBUCKETS; + line = screenlines[h]; + while (line != NULL && line->ypos != y) line = line->next_in_bucket; + if (line == NULL) { + line = (struct line *) xmalloc(sizeof(struct line)); + line->ypos = y; + line->len = 80; + line->contents = (char *) xmalloc(line->len); + memset(line->contents, ' ', line->len); + line->styles = (char *) xmalloc(line->len); + memset(line->styles, PLAIN, line->len); + line->next_in_bucket = screenlines[h]; + screenlines[h] = line; + numlines++; + } + x = x / scalex; + if (x < 0) return; + while (x >= line->len) { + int newlen = 2 * line->len; + line->contents = (char *) xrealloc(line->contents, newlen); + memset(line->contents + line->len, ' ', newlen - line->len); + line->styles = (char *) xrealloc(line->styles, newlen); + memset(line->styles + line->len, PLAIN, newlen - line->len); + line->len = newlen; + } + line->contents[x] = c; + line->styles[x] = style; +} + +static void free_bucket(l) + struct line * l; +{ + if (l != NULL) { + free(l->contents); + free(l->styles); + free_bucket(l->next_in_bucket); + free(l); + } +} + +static void free_buckets() +{ + int i; + for (i = 0; i < NBUCKETS; i++) free_bucket(screenlines[i]); +} + +static int compare_lines(l1, l2) + struct line ** l1, ** l2; +{ + return (**l1).ypos - (**l2).ypos; +} + +void output_page() +{ + struct line ** lines; + struct line * l; + int i, j, k, y; + char * p, * q, * style_p, * style_q, * s; + + device[output_device].begin_page(); + + /* First, sort the lines by y coordinate */ + lines = (struct line **) malloc(numlines * sizeof(struct line *)); + if (lines == NULL) { + printf("*** Out of memory ***\n\014"); + free_buckets(); + return; + } + j = 0; + for (i = 0; i < NBUCKETS; i++) + for (l = screenlines[i]; l != NULL; l = l->next_in_bucket) + lines[j++] = l; + qsort(lines, numlines, sizeof(struct line *), compare_lines); + + /* Output the lines */ + + y = 0; + for (i = 0; i < numlines; i++) { + /* Emit blank lines to reach the current line ypos */ + while (lines[i]->ypos - y >= 3 * scaley / 2) { + device[output_device].line(NULL, NULL, 0); + y += scaley; + } + /* If next line is close to current line, attempt to merge them */ + while (i + 1 < numlines && + lines[i+1]->ypos - lines[i]->ypos < scaley) { + p = lines[i]->contents; + q = lines[i+1]->contents; + style_p = lines[i]->styles; + style_q = lines[i+1]->styles; + for (j = lines[i]->len; j < lines[i+1]->len; j++) + if (q[j] != ' ') goto cannot_merge; + for (j = lines[i+1]->len; j < lines[i]->len; j++) + if (p[j] != ' ') goto cannot_merge; + k = lines[i]->len; + if (k > lines[i+1]->len) k = lines[i+1]->len; + for (j = 0; j < k; j++) + if (p[j] != ' ' && q[j] != ' ') goto cannot_merge; + /* Seems OK, do the merging */ + for (j = 0; j < k; j++) + if (p[j] != ' ') { + q[j] = p[j]; + style_q[j] = style_p[j]; + } + /* Now consider next line */ + i++; + } + cannot_merge: + /* Now print the current line */ + p = lines[i]->contents; + q = p + lines[i]->len; + while (q >= p && *--q == ' ') /*nothing*/; + device[output_device].line(p, lines[i]->styles, q-p+1); + /* Go on with next line */ + y = lines[i]->ypos; + } + + device[output_device].end_page(); + free(lines); + free_buckets(); +} + diff --git a/manual/tools/dvi_to_txt/output.h b/manual/tools/dvi_to_txt/output.h new file mode 100644 index 0000000000..43ce317b35 --- /dev/null +++ b/manual/tools/dvi_to_txt/output.h @@ -0,0 +1,24 @@ +#define SCALEX 404685 +#define SCALEY 786432 + +int scalex; +int scaley; + +#define PLAIN 0 +#define ITALICS 1 +#define BOLD 2 +#define MONOSPACED 3 + +void begin_document(); +void end_document(); +void clear_page(); +void output_page(); +void out(); + +int output_device; +int standout_tt; + +#define OUTPUT_PLAIN 0 +#define OUTPUT_PRINTER 1 +#define OUTPUT_RTF 2 +#define OUTPUT_STYL 3 diff --git a/manual/tools/dvi_to_txt/print.c b/manual/tools/dvi_to_txt/print.c new file mode 100644 index 0000000000..d0f02cc349 --- /dev/null +++ b/manual/tools/dvi_to_txt/print.c @@ -0,0 +1,41 @@ +#include <stdio.h> +#include "output.h" + +/* Low-level output functions */ + +void null() +{ +} + +void print_FF() +{ + putchar('\014'); +} + +void plain_line(txt, style, len) + char * txt, * style; + int len; +{ + fwrite(txt, 1, len, stdout); + putchar('\n'); +} + +void printer_line(txt, style, len) + char * txt, * style; + int len; +{ + for (/*nothing*/; len > 0; len--, txt++, style++) { + putchar(*txt); + switch(*style) { + case ITALICS: + putchar('\b'); putchar('_'); break; + case BOLD: + putchar('\b'); putchar(*txt); break; + case MONOSPACED: + if (standout_tt) { putchar('\b'); putchar(*txt); } + break; + } + } + putchar('\n'); +} + diff --git a/manual/tools/dvi_to_txt/print_rtf.c b/manual/tools/dvi_to_txt/print_rtf.c new file mode 100644 index 0000000000..c12c67a321 --- /dev/null +++ b/manual/tools/dvi_to_txt/print_rtf.c @@ -0,0 +1,80 @@ +#include <stdio.h> +#include "output.h" + +/* Rich Text Format */ + +void begin_rtf_document() +{ + printf("{\\rtf1\\ansi\\deff0\n"); + printf("{\\fonttbl{\\f0\\fmodern Courier;}}\n"); + printf("\\f0\\fs20\n"); +} + +void end_rtf_document() +{ + printf("}\n"); +} + +void end_rtf_page() +{ + printf("\\page\n"); +} + +void rtf_line(txt, style, len) + char * txt, * style; + int len; +{ + int currstyle; + + for (currstyle = PLAIN; len > 0; len--, txt++, style++) { + if (*txt != ' ') { + switch(*style) { + case PLAIN: + if (currstyle != PLAIN) { + putchar('}'); + currstyle = PLAIN; + } + break; + case ITALICS: + if (currstyle != ITALICS) { + if (currstyle != PLAIN) putchar('}'); + printf("{\\i "); + currstyle = ITALICS; + } + break; + case BOLD: + if (currstyle != BOLD) { + if (currstyle != PLAIN) putchar('}'); + printf("{\\b "); + currstyle = BOLD; + } + break; + case MONOSPACED: + if (standout_tt) { + if (currstyle != BOLD) { + if (currstyle != PLAIN) putchar('}'); + printf("{\\b "); + currstyle = BOLD; + } + } else { + if (currstyle != PLAIN) { + putchar('}'); + currstyle = PLAIN; + } + } + break; + } + } + switch(*txt) { + case '\\': + case '{': + case '}': + putchar('\\'); putchar(*txt); break; + default: + putchar(*txt); break; + } + } + if (currstyle != PLAIN) putchar('}'); + printf("\\par\n"); +} + diff --git a/manual/tools/dvi_to_txt/print_styl.c b/manual/tools/dvi_to_txt/print_styl.c new file mode 100644 index 0000000000..39135f23b7 --- /dev/null +++ b/manual/tools/dvi_to_txt/print_styl.c @@ -0,0 +1,90 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "output.h" + +/* Macintosh STYL tables */ + +extern char * input_name; + +static FILE * text; +static FILE * styl; +static int partnum = 0; +static int currstyle; +static int currstart; +static int currpos; + +static void output_current_style() +{ + int style_code; + + switch(currstyle) { + case PLAIN: + style_code = 0; break; + case ITALICS: + style_code = 2; break; + case BOLD: + style_code = 1 + 32; break; /* bold condensed */ + case MONOSPACED: + style_code = standout_tt ? 1 + 32 : 0; break; + } + fprintf(styl, "%d %d Monaco %d 9 0 0 0\n", currstart, currpos, style_code); +} + + +static void output_style_change(newstyle) + int newstyle; +{ + if (!standout_tt && (newstyle == PLAIN && currstyle == MONOSPACED || + newstyle == MONOSPACED && currstyle == PLAIN)) { + currstyle = newstyle; + return; + } + output_current_style(); + currstyle = newstyle; + currstart = currpos; +} + +void begin_styl_page() +{ + char name[1024], buffer[1024]; + int n; + + strcpy(name, input_name); + n = strlen(name); + if (n >= 4 && strcmp(name + n - 4, ".dvi") == 0) name[n - 4] = 0; + partnum++; + sprintf(buffer, "%s.%03d.txt", name, partnum); + text = fopen(buffer, "w"); + if (text == NULL) { perror(buffer); exit(2); } + sprintf(buffer, "%s.%03d.stl", name, partnum); + styl = fopen(buffer, "w"); + if (styl == NULL) { perror(buffer); exit(2); } + currstyle = PLAIN; + currstart = 0; + currpos = 0; +} + +void end_styl_page() +{ + output_current_style(); + fclose(text); + fclose(styl); +} + +void styl_line(txt, style, len) + char * txt, * style; + int len; +{ + for (/*nothing*/; len > 0; len--, txt++, style++, currpos++) { + putc(*txt, text); + if (*txt != ' ' && *style != currstyle) { + output_style_change(*style); + } + } + putc('\n', text); + currpos++; +} + + + diff --git a/manual/tools/fix_index.sh b/manual/tools/fix_index.sh new file mode 100755 index 0000000000..7dd1ab421f --- /dev/null +++ b/manual/tools/fix_index.sh @@ -0,0 +1,52 @@ +#!/bin/sh + +# usage: fix_index.sh <file>.idx + +# This script works around a hyperref bug: hyperref does not handle +# quotes in \index arguments properly. +# +# Symptom: +# When \index{-pipe-pipe@\verb`("|"|)`} appears in your .tex, the hyperref +# package mangles it and produces this line in your .idx: +# \indexentry{(-pipe-pipe)@\verb`("|hyperindexformat{\"}}{292} +# instead of the expected: +# \indexentry{(-pipe-pipe)@\verb`("|"|)`|hyperpage}{292} +# +# This is because it fails to handle quoted characters correctly. +# +# The workaround: +# Look for the buggy line in the given .idx file and change it. + +# Note: this bug will happen every time you have a | (pipe) character +# in an index entry (properly quoted with a " (double-quote) before it). +# We fix only the one case that appears in the OCaml documentation. +# We do not attempt a general solution because hyperref erases part +# of the argument, so we cannot recover the correct string from its +# output. + +# Note 2013-06-19: +# The above was for the || operator in the stdlib's Pervasives module. +# Now we have the same problem with the |> operator that was added +# to the same module in commit 13739, hence the second special case. + +usage(){ + echo "usage: fix_index.sh <file>.idx" >&2 + exit 2 +} + +case $# in + 1) ;; + *) usage;; +esac + +ed "$1" <<'EOF' +/-pipe-pipe/s/verb`("|hyperindexformat{\\"}/verb`("|"|)`|hyperpage/ +/-pipe-gt/s/verb`("|hyperindexformat{\\>)`}/verb`("|>)`|hyperpage/ +w +q +EOF + +case $? in + 0) echo "fix_index.sh: fixed $1 successfully.";; + *) echo "fix_index.sh: some error occurred."; exit 2;; +esac diff --git a/manual/tools/format-intf b/manual/tools/format-intf new file mode 100755 index 0000000000..0228ecc18e --- /dev/null +++ b/manual/tools/format-intf @@ -0,0 +1,153 @@ +#!/usr/bin/perl + +$sep = "\246"; + +$html = 0; +if ($ARGV[0] eq "-html") { + $html = 1; + shift; +} + +# Skip initial junk + +while(($_ = <>) && ! m/^\(\* Module \[(.*)\]:/) { } +m/^\(\* Module \[(.*)\]:/; +$modname = $1; +chop; +s/^\(\* *//; +s/ *\*\) *$//; +s/\[/{\\tt /g; +s/\]/}/g; +print "\\section{$_}\n\n"; +$label = $modname; $label =~ s/[^A-Za-z0-9]//g; +print "\\label{s:$label}\n"; +print "\\index{$modname (module)@\\verb~$modname~ (module)}%\n\n"; +s/{\\tt //g; +s/}//g; +s/_//g; +print "\\pdfsection{$_}\n\n"; + +$incomment = 0; +$inverbatim = 0; + +line: +while(<>) { + chop; + last line if /^\s*\(\*--/; + if (s/^\(\*- //) { + s/ *\*\)$//; + } + if (m/^\s*\(\*\*\*\s*(.*)\*\)\s*$/) { + if ($inverbatim) { + do end_verbatim(); + } + print "\\subsection*{", $1, "}\n"; + next line; + } + if (m/^\s*\(\*\*\s*(.*)\*\)\s*$/) { + if ($inverbatim) { + do end_verbatim(); + } + print "\\subsubsection*{", $1, "}\n"; + next line; + } + if (s/^\s*\(\*//) { + if ($inverbatim) { + do end_verbatim(); + } + print "\\begin{comment}\n"; + $incomment = 1; + } + if ($incomment) { + $endcomment = s/\*\)\s*$//; + if (m/^\s*\[\s*$/) { + print "\\begin{restoreindent}\n" unless $html; + print "\\begin{verbatim}\n"; + while (($_ = <>) && ! m/^\s*\]\s*$/) { + print $_; + } + print "\\end{verbatim}\n"; + print "\\end{restoreindent}\n" unless $html; + } else { + if (s/^-//) { + print "\\\\"; + print "[\\smallskipamount]" unless $html; + } + s/^\s*//; + $count = 0; + foreach $part (split(/(\\?[\[\]])/, $_)) { + if ($part eq "[") { + print ($count == 0 ? "\\verb$sep" : "["); + $count++; + } elsif ($part eq "]") { + $count--; + print ($count == 0 ? "$sep" : "]"); + } elsif ($part =~ m/^\\([\[\]])$/) { + print $1; + } else { + print $part; + } + } + } + if ($endcomment) { + print "\n\\end{comment}"; + $incomment = 0; + $inverbatim = 0; + } + } else { + next line if /^$/; + if (! $inverbatim) { + print "\\begin{verbatim}\n"; + $inverbatim = 1; + } + s/^external /val /; + s/ = ("[^"]*"\s*)+$//; + next line if /^\s*$/; + s/^val \( ([^ )]+) \)/val (\1)/; + { + do indexentry($1, " (operator)"), last + if (m/^val \(([^)]*)\)/); + do indexentry($1, ""), last + if (m/^val ([a-zA-Z0-9_']*)/); + do indexentry($1, " (type)"), last + if (m/^type\s.*([a-zA-Z0-9_']*)\s*=/); + do indexentry($1, " (exception)"), last + if (m/^exception ([a-zA-Z0-9_']*)/); + do indexentry($1, " (module type)"), last + if (m/^module type ([a-zA-Z0-9_']*)/); + do indexentry($1, " (functor)"), last + if (m/^module ([a-zA-Z0-9_']*)\s*\(/); + do indexentry($1, " (module)"), last + if (m/^module ([a-zA-Z0-9_']*)/); + } + print $_; + } + print "\n"; +} +do end_verbatim() if $inverbatim; +print "\\end{comment}\n" if $incomment; + +sub indexentry { + local ($_, $comment) = @_; + return if m/^$/ || m/^[a-zA-Z]$/; + s/([@|!])/"$1/g; + if (! m|`|) { + $s = "`"; + } elsif (! m|~|) { + $s = "~"; + } elsif (! m/\|/) { + $s = "|"; + } else { + die("Can't find quote character for $_"); + } + push (@index, "\\index{$_$comment@\\verb$s$_$s$comment}"); +} + +sub end_verbatim { + print "\\end{verbatim}\n"; + foreach $idx (@index) { + print $idx, "%\n"; + } + undef(@index); + $inverbatim = 0; +} diff --git a/manual/tools/htmlcut b/manual/tools/htmlcut new file mode 100755 index 0000000000..be079abb2b --- /dev/null +++ b/manual/tools/htmlcut @@ -0,0 +1,111 @@ +#!/usr/local/bin/perl +# Split an HTML file into smaller nodes. +# Split at <H1> headers and also at some <H2> headers. + +$h0 = "H0"; +$h1 = "H1"; +$h2 = "H2"; + +# Parse options + +option: +while(1) { + $_ = $ARGV[0]; + if (/^-([0-9]+)$/) { + $split2[$1] = 1; + } + elsif (/^-article/) { + $h0 = "H1"; + $h1 = "H2"; + $h2 = "H3"; + } + else { + last option; + } + shift(@ARGV); +} + +$infile = $ARGV[0]; + +# Find URL's for the links + +$level0 = 0; +$level1 = 0; +$uselabel = 1; +open(INPUT, $infile); +while(<INPUT>) { + if (m|^<$h0>(.*)</$h0>|o) { + $level0++; + $currfile = "node" . ($level1 + 1) . ".html"; + $lblnum = $level0; + $uselabel = 0; + } + if (m|^<$h1>(.*)</$h1>|o) { + $level1++; + $level2 = 0; + $currfile = "node$level1.html"; + $lblnum = $level1; + $uselabel = 1; + } + if (m|^<$h2>(.*)</$h2>|o) { + $level2++; + if ($split2[$level1]) { $currfile = "node$level1.$level2.html"; } + $lblnum = "$level1.$level2"; + } + s|<A NAME="([^"]*)"></A>|do set_url($1)|ige; +} + +sub set_url { + local ($lbl) = @_; + if ($uselabel) { + $url{$lbl} = "$currfile#$lbl"; + } else { + $url{$lbl} = $currfile; + } + $label{$lbl} = $lblnum; +} + +# Cut the file + +$level1 = 0; +open(INPUT, $infile); +while(<INPUT>) { + if (m|^<$h0>(.*)</$h0>|o) { + if ($level2 > 0) { print FILE1 "</UL>\n"; } + select(STDOUT); + if ($level1 >= 1) { print "</UL>"; } + print "<$h2>$1</$h2>\n"; + if ($level1 >= 1) { print "<UL>"; } + next; + } + if (m|^<$h1>(.*)</$h1>|o) { + if ($level2 > 0) { print FILE1 "</UL>\n"; } + $level1++; + $level2 = 0; + select(STDOUT); + if ($level1 == 1) { print "<HR><BR><UL>\n"; } + print "<LI><A HREF=\"node$level1.html\">$1</A>\n"; + open(FILE1, "> node$level1.html"); + select(FILE1); + &print_title($1); + } + if ($split2[$level1] && m|^<$h2>(.*)</$h2>|o) { + $level2++; + select(FILE1); + if ($level2 == 1) { print "<HR><BR><UL>\n"; } + print "<LI><A HREF=\"node$level1.$level2.html\">$1</A>\n"; + open(FILE2, "> node$level1.$level2.html"); + select(FILE2); + &print_title($1); + } + s|<A HREF="#([^"]*)">X</A>|'<A HREF="' . $url{$1} . '">' . $label{$1} . '</A>'|ige; + print $_; +} +select(STDOUT); +if ($level1 >= 1) { print "</UL>\n"; } + +sub print_title { + local ($title) = @_; + $title =~ s|<[a-zA-Z/]+>||g; + print "<TITLE>$title</TITLE>\n"; +} diff --git a/manual/tools/htmlquote.c b/manual/tools/htmlquote.c new file mode 100644 index 0000000000..1d11eca545 --- /dev/null +++ b/manual/tools/htmlquote.c @@ -0,0 +1,87 @@ +#include <stdio.h> +#include <ctype.h> + +#define LINE_LENGTH 1024 + +char line[LINE_LENGTH]; + +int isprefix(s, pref) + char * s; + char * pref; +{ + while (1) { + if (*pref == 0) return 1; + if (*s == 0) return 0; + if (*s != *pref) return 0; + s++; + pref++; + } +} + +int main(argc, argv) + int argc; + char * argv []; +{ + unsigned char * p; + int c; + int inquote; + int inverb; + int inverbatim; + + inverbatim = 0; + inquote = 0; + + while(fgets(line, LINE_LENGTH, stdin) != NULL) { + if (inverbatim) { + fputs(line, stdout); + if (isprefix(line, "\\end{verbatim") + || isprefix(line, "\\end{alltt}")) inverbatim = 0; + continue; + } + if (isprefix(line, "\\begin{verbatim") + || isprefix(line, "\\begin{alltt}")) { + fputs(line, stdout); + inverbatim = 1; + continue; + } + inverb = 0; + for (p = (unsigned char *) line; *p != 0; p++) { + c = *p; + if (inverb) { + if (c == inverb) inverb = 0; + putchar(c); + continue; + } + switch(c) { + case '"': + if (inquote) { + fputs("\001", stdout); + inquote = 0; + } else { + fputs("\\verb\001", stdout); + inquote = 1; + } + break; + case '\\': + if (isprefix(p, "\\verb") && p[5] != 0 && !isalpha(p[5])) { + inverb = p[5]; + p = p + 5; + fputs("\\verb", stdout); + putchar(inverb); + } else if (inquote) { + if (p[1] == '"' || p[1] == '\\') { + c = p[1]; + p++; + } + putchar(c); + } else { + putchar('\\'); + } + break; + default: + putchar(c); + } + } + } + return 0; +} diff --git a/manual/tools/htmltbl b/manual/tools/htmltbl new file mode 100755 index 0000000000..4b7b41a7db --- /dev/null +++ b/manual/tools/htmltbl @@ -0,0 +1,134 @@ +#!/usr/local/bin/perl + +while (<>) { + if (m|^<tbl[> ]|) { + while (! m|</tbl>$|) { $_ .= <>; } + s/\n//g; + print "<pre>\n"; + do format_table($_); + print "</pre>\n"; + } else { + print $_; + } +} + +sub format_table { +# On input, $_ contains: +# <tbl [border]><th>Header 1<th>Header2<th>...<th>Header M<tr> +# <td>Data11<td>Data12<td>...<td>Data1M<tr> +# ... +# <td>DataN1<td>DataN2<td>...<td>DataNM<tr> +# </tbl> + +# Extract the entries and compute the number of lines and columns + + $numlines = 0; + $numcols = 0; + $border = 0; + $header = 0; + $x = 0; + $y = 0; + foreach $_ (split(/(<tbl[ a-zA-Z]*>|<th>|<td>|<tr>|<\/tbl>)/, $_)) { + if (/^$/) { next; } + elsif (/<tbl border>/) { $border = 1; } + elsif (/<tr>/i) { + if ($x > $numcols) { $numcols = $x; } + $x = 0; + $y++; + } + elsif (/<th>/) { $header = 1; } + elsif (!/(<tbl[ a-zA-Z]*>|<th>|<td>|<tr>|<\/tbl>)/) { + s|</?[a-zA-Z]*>||g; # Remove embedded tags + s/^\s*//; # and initial blanks + s/\s*$//; # and final blanks + s/\s\s\s*/ /g; # and extra blanks + s/</</g; # Unescape HTML specials + s/>/>/g; + s/&/&/g; + $entry{$x, $y} = $_; + $x++; + } + } + $numlines = $y; + +# Compute the max width of each column + + $totalwidth = 0; + + for ($x = 0; $x < $numcols; $x++) { + $max = 0; + for ($y = 0; $y < $numlines; $y++) { + $len = length($entry{$x, $y}); + if ($len > $max) { $max = $len; } + } + $width[$x] = $max; + $totalwidth += $max; + } + +# If it does not fit in one line, turn wide fields into multi-line fields + + if ($totalwidth >= 65) { + $totalwidth = 0; + $maxwidth = 65 / $numcols; + for ($x = 0; $x < $numcols; $x++) { + if ($width[$x] > $maxwidth) { + if ($x < $numcols - 1) { + $width[$x] = $maxwidth; + } else { + $width[$x] = 70 - $totalwidth; + } + } + $totalwidth += $width[$x]; + } + } + +# Compute the separators + + if ($border) { + $horsep = '+-'; + for ($x = 0; $x < $numcols; $x++) { + if ($x > 0) { $horsep .= '-+-'; } + $horsep .= '-' x $width[$x]; + } + $horsep .= '-+'; + $verleft = '| '; + $versep = ' | '; + $verright = ' |'; + } else { + $horsep = ''; + $verleft = ' '; + $versep = ' '; + $verright = ' '; + } + +# Print the table + print $horsep, "\n"; + for ($y = 0; $y < $numlines; $y++) { + do { + $overflow = 0; + print $verleft; + for ($x = 0; $x < $numcols; $x++) { + if ($x > 0) { print $versep; } + $_ = $entry{$x, $y}; + if (length($_) > $width[$x]) { + $pos = rindex($_, ' ', $width[$x]); + if ($pos < 0) { $pos = $width[$x]; } else { $pos++; } + $entry{$x, $y} = substr($_, $pos); + $_ = substr($_, 0, $pos - 1); + $overflow = 1; + } else { + $entry{$x, $y} = ''; + } + $len = length($_); + s/&/&/g; + s/</</g; + s/>/>/g; + print $_, ' ' x ($width[$x] - $len); + } + print $verright, "\n"; + } while($overflow); + if ($header && $y == 0) { print $horsep, "\n"; } + } + print $horsep, "\n"; +} + diff --git a/manual/tools/htmlthread b/manual/tools/htmlthread new file mode 100755 index 0000000000..fa66cdb25c --- /dev/null +++ b/manual/tools/htmlthread @@ -0,0 +1,58 @@ +#!/usr/local/bin/perl +# Insert Next/Previous/Contents buttons in a set of pages. + +@pages = sort fragmentorder @ARGV; + +sub fragmentorder { + $a =~ /^node([0-9]+)/; $na = $1; + if ($a =~ /^node[0-9]+\.([0-9]+)\.html/) { $fa = $1; } else { $fa = 0; } + $b =~ /^node([0-9]+)/; $nb = $1; + if ($b =~ /^node[0-9]+\.([0-9]+)\.html/) { $fb = $1; } else { $fb = 0; } + return (($na <=> $nb) || ($fa <=> $fb)); +} + +for ($i = 0; $i <= $#pages; $i++) { + open(SRC, $pages[$i]); + open(DST, "> newpage.html"); + select(DST); + $_ = <SRC>; # Title line + print "<HEAD>\n"; + print $_; + do links(); + print "</HEAD>\n"; + print "<BODY>\n"; + do buttons(); + print "<HR>\n"; + $numlines = 0; + while (<SRC>) { + $numlines++; + print $_; + } + if ($numlines >= 40) { + print "<HR>\n"; + do buttons(); + } + close(SRC); + close(DST); + rename("newpage.html", $pages[$i]); +} + +sub links { + if ($i > 0) { + print '<LINK REL="Prev" HREF="', $pages[$i-1], "\">\n"; + } + if ($i < $#pages) { + print '<LINK REL="Next" HREF="', $pages[$i+1], "\">\n"; + } + print "<LINK REL=\"ToC\" HREF=\"index.html\">\n"; +} + +sub buttons { + if ($i > 0) { + print '<A HREF="', $pages[$i-1], '"><IMG SRC="previous_motif.gif" ALT="Previous"></A>', "\n"; + } + if ($i < $#pages) { + print '<A HREF="', $pages[$i+1], '"><IMG SRC="next_motif.gif" ALT="Next"></A>', "\n"; + } + print '<A HREF="index.html"><IMG SRC="contents_motif.gif" ALT="Contents"></A>', "\n"; +} diff --git a/manual/tools/htmltransf.mll b/manual/tools/htmltransf.mll new file mode 100644 index 0000000000..3db5e31f37 --- /dev/null +++ b/manual/tools/htmltransf.mll @@ -0,0 +1,117 @@ +{ +open Lexing;; + +let need_space = + ref false;; + +let addspace () = + if !need_space then begin print_char ' '; need_space := false end;; +} + +rule main = parse + "\\begin{syntax}" { + print_string "\\begin{rawhtml}\n<PRE>\n"; + need_space := false; + syntax lexbuf; + print_string "</PRE>\n\\end{rawhtml}\n"; + main lexbuf } + | "\\@" { + print_string "@"; + main lexbuf } + | "@" { + print_string "%\n\\begin{rawhtml}"; + need_space := false; + syntax lexbuf; + print_string "\\end{rawhtml}%\n"; + main lexbuf } + | _ { + print_char (lexeme_char lexbuf 0); main lexbuf } + | eof { + () } + +and syntax = parse + "\\end{syntax}" { () } + | "@" { () } + | '\'' { + addspace(); + print_string "<font color=\"blue\"><code>"; + inquote lexbuf; + print_string "</code></font>"; + need_space := true; + syntax lexbuf } + | '\"' { + addspace(); + print_string "<font color=\"blue\"><code>"; + indoublequote lexbuf; + print_string "</code></font>"; + need_space := true; + syntax lexbuf } + | ['a'-'z'] ['a'-'z' '0'-'9' '-'] * { + addspace(); + print_string "<i>"; + print_string (lexeme lexbuf); + print_string "</i>"; + need_space := true; + syntax lexbuf } + | '\\' ['a'-'z''A'-'Z'] + { + begin match lexeme lexbuf with + "\\ldots" -> print_string "..."; need_space := false + | s -> Printf.eprintf "Warning: %s ignored.\n" s + end; + syntax lexbuf } + | '_' _ { + print_string "<SUB>"; + print_char(lexeme_char lexbuf 1); + print_string "</SUB>"; + syntax lexbuf } + | '^' _ { + print_string "<SUP>"; + print_char(lexeme_char lexbuf 1); + print_string "</SUP>"; + syntax lexbuf } + | ":" { + print_string ":\n "; + need_space := false; + syntax lexbuf } + | "|" { + print_string "\n | "; + need_space := false; + syntax lexbuf } + | ";" { + print_string "\n\n"; + need_space := false; + syntax lexbuf } + | [ '{' '[' '('] { + addspace(); print_string (lexeme lexbuf); syntax lexbuf } + | [ '}' ']' ')'] { + print_string (lexeme lexbuf); syntax lexbuf } + | "{{" { + addspace(); print_string "{"; syntax lexbuf } + | "}}" { + print_string "}+"; syntax lexbuf } + | "||" { + print_string " | "; need_space := false; syntax lexbuf } + | [ ' ' '\n' '\t' '~'] { + syntax lexbuf } + | [ ',' ] { + print_char(lexeme_char lexbuf 0); syntax lexbuf } + | _ { + Printf.eprintf "Warning: %s ignored at char %d.\n" + (lexeme lexbuf) (lexeme_start lexbuf); + syntax lexbuf } + +and inquote = parse + '\'' { () } + | '&' { print_string "&"; inquote lexbuf } + | '<' { print_string "<"; inquote lexbuf } + | '>' { print_string ">"; inquote lexbuf } + | _ { print_char (lexeme_char lexbuf 0); inquote lexbuf } + +and indoublequote = parse + '"' { () } + | '&' { print_string "&"; indoublequote lexbuf } + | '<' { print_string "<"; indoublequote lexbuf } + | '>' { print_string ">"; indoublequote lexbuf } + | _ { print_char (lexeme_char lexbuf 0); indoublequote lexbuf } + + diff --git a/manual/tools/latexmacros.ml b/manual/tools/latexmacros.ml new file mode 100644 index 0000000000..7353b49443 --- /dev/null +++ b/manual/tools/latexmacros.ml @@ -0,0 +1,149 @@ +type action = + Print of string + | Print_arg + | Skip_arg;; + +let cmdtable = (Hashtbl.create 19 : (string, action list) Hashtbl.t);; + +let def_macro name action = + Hashtbl.add cmdtable name action;; + +let find_macro name = + try + Hashtbl.find cmdtable name + with Not_found -> + prerr_string "Unknown macro: "; prerr_endline name; [];; + +(* General LaTeX macros *) + +def_macro "\\part" + [Print "<H0>"; Print_arg; Print "</H0>\n"]; +def_macro "\\chapter" + [Print "<H1>"; Print_arg; Print "</H1>\n"]; +def_macro "\\chapter*" + [Print "<H1>"; Print_arg; Print "</H1>\n"]; +def_macro "\\section" + [Print "<H2>"; Print_arg; Print "</H2>\n"]; +def_macro "\\section*" + [Print "<H2>"; Print_arg; Print "</H2>\n"]; +def_macro "\\subsection" + [Print "<H3>"; Print_arg; Print "</H3>\n"]; +def_macro "\\subsection*" + [Print "<H3>"; Print_arg; Print "</H3>\n"]; +def_macro "\\subsubsection" + [Print "<H4>"; Print_arg; Print "</H4>\n"]; +def_macro "\\subsubsection*" + [Print "<H4>"; Print_arg; Print "</H4>\n"]; +def_macro "\\paragraph" + [Print "<B>"; Print_arg; Print "</B> \n"]; +def_macro "\\begin{alltt}" [Print "<pre>"]; +def_macro "\\end{alltt}" [Print "</pre>"]; +def_macro "\\begin{itemize}" [Print "<p><ul>"]; +def_macro "\\end{itemize}" [Print "</ul>"]; +def_macro "\\begin{enumerate}" [Print "<p><ol>"]; +def_macro "\\end{enumerate}" [Print "</ol>"]; +def_macro "\\begin{description}" [Print "<p><dl>"]; +def_macro "\\end{description}" [Print "</dl>"]; +def_macro "\\begin{center}" [Print "<blockquote>"]; +def_macro "\\end{center}" [Print "</blockquote>"]; +def_macro "\\begin{quote}" [Print "<blockquote>"]; +def_macro "\\end{quote}" [Print "</blockquote>"]; +def_macro "\\begin{quotation}" [Print "<blockquote>"]; +def_macro "\\end{quotation}" [Print "</blockquote>"]; +def_macro "\\smallskip" []; +def_macro "\\medskip" []; +def_macro "\\bigskip" []; +def_macro "\\markboth" [Skip_arg; Skip_arg]; +def_macro "\\ldots" [Print "..."]; +def_macro "\\ " [Print " "]; +def_macro "\\{" [Print "{"]; +def_macro "\\}" [Print "}"]; +def_macro "\\%" [Print "%"]; +def_macro "\\$" [Print "$"]; +def_macro "\\#" [Print "#"]; +def_macro "\\/" []; +def_macro "\\newpage" []; +def_macro "\\label" [Print "<A name=\""; Print_arg; Print "\"></A>"]; +def_macro "\\ref" [Print "<A href=\"#"; Print_arg; Print "\">X</A>"]; +def_macro "\\pageref" [Print "<A href=\"#"; Print_arg; Print "\">X</A>"]; +def_macro "\\index" [Skip_arg]; +def_macro "\\oe" [Print "oe"]; +def_macro "\\&" [Print "&"]; +def_macro "\\_" [Print "_"]; +def_macro "\\leq" [Print "<="]; +def_macro "\\geq" [Print ">="]; +def_macro "\\hbox" [Print_arg]; +def_macro "\\copyright" [Print "\169"]; +def_macro "\\noindent" []; +def_macro "\\begin{flushleft}" [Print "<blockquote>"]; +def_macro "\\end{flushleft}" [Print "</blockquote>"]; +def_macro "\\\\" [Print "<br>"]; +def_macro "\\begin{htmlonly}" []; +def_macro "\\end{htmlonly}" []; +();; + +(* Macros specific to the Caml manual *) + +def_macro "\\begin{options}" [Print "<p><dl>"]; +def_macro "\\end{options}" [Print "</dl>"]; +def_macro "\\var" [Print "<i>"; Print_arg; Print "</i>"]; +def_macro "\\optvar" [Print "[<i>"; Print_arg; Print "</i>]"]; +def_macro "\\nth" [Print "<i>"; Print_arg; + Print "</i><sub>"; Print_arg; Print "</sub>"]; +def_macro "\\nmth" [Print "<i>"; Print_arg; + Print "</i><sub>"; Print_arg; + Print "</sub><sup>"; Print_arg; + Print "</sup>"]; +def_macro "\\begin{unix}" [Print "<dl><dt><b>Unix:</b><dd>"]; +def_macro "\\end{unix}" [Print "</dl>"]; +def_macro "\\begin{macos}" [Print "<dl><dt><b>MacOS:</b><dd>"]; +def_macro "\\end{macos}" [Print "</dl>"]; +def_macro "\\begin{windows}" [Print "<dl><dt><b>Windows:</b><dd>"]; +def_macro "\\end{windows}" [Print "</dl>"]; +def_macro "\\begin{requirements}" [Print "<dl><dt><b>Requirements:</b><dd>"]; +def_macro "\\end{requirements}" [Print "</dl>"]; +def_macro "\\begin{troubleshooting}" [Print "<dl><dt><b>Troubleshooting:</b><dd>"]; +def_macro "\\end{troubleshooting}" [Print "</dl>"]; +def_macro "\\begin{installation}" [Print "<dl><dt><b>Installation:</b><dd>"]; +def_macro "\\end{installation}" [Print "</dl>"]; +def_macro "\\index" [Skip_arg]; +def_macro "\\ikwd" [Skip_arg]; +def_macro "\\th" [Print "-th"]; +def_macro "\\begin{library}" []; +def_macro "\\end{library}" []; +def_macro "\\begin{comment}" [Print "<dl><dd>"]; +def_macro "\\end{comment}" [Print "</dl>"]; +def_macro "\\begin{tableau}" + [Skip_arg; + Print "<table border>\n<tr><th>"; + Print_arg; + Print "</th><th>"; + Print_arg; + Print "</th></tr>"]; +def_macro "\\entree" + [Print "<tr><td>"; Print_arg; + Print "</td><td>"; Print_arg; Print "</td></tr>"]; +def_macro "\\end{tableau}" [Print "</table>"]; +def_macro "\\begin{gcrule}" [Print "<dl><dt><b>Rule:</b><dd>"]; +def_macro "\\end{gcrule}" [Print "</dl>"]; +def_macro "\\begin{tableauoperateurs}" + [Print "<table border>\n<tr><th>Operator</th><th>Associated ident</th><th>Behavior in the default environment</th></tr>"]; +def_macro "\\end{tableauoperateurs}" [Print "</table>\n"]; +def_macro "\\entreeoperateur" + [Print "<tr><td>"; Print_arg; Print "</td><td>"; Print_arg; + Print "</td><td>"; Print_arg; Print "</td></tr>"]; +def_macro "\\fromoneto" + [Print "<i>"; Print_arg; Print "</i> = 1, ..., <i>"; + Print_arg; Print "</i>"]; +def_macro "\\caml" [Print "<pre>"]; +def_macro "\\endcaml" [Print "</pre>"]; +def_macro "\\<" [Print "<u>"]; +def_macro "\\>" [Print "</u>"]; +def_macro "\\rminalltt" [Print_arg]; +def_macro "\\event" [Print "<font color=\"red\">*</font>"]; +def_macro "\\pdfchapter" [Skip_arg]; +def_macro "\\pdfchapterfold" [Skip_arg; Skip_arg]; +def_macro "\\pdfsection" [Skip_arg]; +def_macro "\\transl" [Print "<"; Print_arg; Print ">"]; +();; + diff --git a/manual/tools/latexmacros.mli b/manual/tools/latexmacros.mli new file mode 100644 index 0000000000..e7e4066f0e --- /dev/null +++ b/manual/tools/latexmacros.mli @@ -0,0 +1,8 @@ +type action = + Print of string + | Print_arg + | Skip_arg;; + +val find_macro: string -> action list;; + +val def_macro: string -> action list -> unit;; diff --git a/manual/tools/latexmain.ml b/manual/tools/latexmain.ml new file mode 100644 index 0000000000..02d936f58b --- /dev/null +++ b/manual/tools/latexmain.ml @@ -0,0 +1,4 @@ +let main () = + Latexscan.main (Lexing.from_channel stdin);; + +Printexc.print main (); exit 0;; diff --git a/manual/tools/latexscan.mll b/manual/tools/latexscan.mll new file mode 100644 index 0000000000..1acc523144 --- /dev/null +++ b/manual/tools/latexscan.mll @@ -0,0 +1,166 @@ +{ +open Lexing;; +open Latexmacros;; + +let delimiter = ref (char_of_int 0);; + +let upto delim lexfun lexbuf = + let old_delim = !delimiter in + delimiter := delim; + lexfun lexbuf; + delimiter := old_delim;; + +let verb_delim = ref (char_of_int 0);; + +let brace_nesting = ref 0;; + +let rindex c s = + let rec find i = + if i < 0 then raise Not_found else + if s.[i] = c then i else find (i-1) in + find (String.length s - 1);; + +let first_caml_line = ref true;; +let in_caml = ref false;; +} + +rule main = parse +(* Comments *) + '%' [^ '\n'] * '\n' { main lexbuf } +(* Paragraphs *) + | "\n\n" '\n' * + { print_string "<P>\n"; main lexbuf } +(* Font changes *) + | "{\\it" " "* | "{\\em" " "* + { print_string "<i>"; upto '}' main lexbuf; + print_string "</i>"; main lexbuf } + | "{\\bf" " "* { print_string "<b>"; upto '}' main lexbuf; + print_string "</b>"; main lexbuf } + | "{\\rm" " "* { print_string "<u>"; upto '}' main lexbuf; + print_string "</u>"; main lexbuf } + | "{\\tt" " "* { print_string "<tt>"; upto '}' main lexbuf; + print_string "</tt>"; main lexbuf } + | '"' { print_string "<tt>"; indoublequote lexbuf; + print_string "</tt>"; main lexbuf } +(* Verb, verbatim *) + | "\\verb" _ { verb_delim := lexeme_char lexbuf 5; + print_string "<tt>"; inverb lexbuf; print_string "</tt>"; + main lexbuf } + | "\\begin{verbatim}" + { print_string "<pre>"; inverbatim lexbuf; + print_string "</pre>"; main lexbuf } +(* Caml programs *) + | "\\caml" + { print_string "<pre>"; + first_caml_line := true; in_caml := false; + camlprog lexbuf; print_string "</pre>"; main lexbuf } +(* Raw html, latex only *) + | "\\begin{rawhtml}" + { rawhtml lexbuf; main lexbuf } + | "\\begin{latexonly}" + { latexonly lexbuf; main lexbuf } +(* Itemize and similar environments *) + | "\\item[" { print_string "<dt>"; upto ']' main lexbuf; + print_string "<dd>"; main lexbuf } + | "\\item" { print_string "<li>"; main lexbuf } +(* Math mode (hmph) *) + | "$" { main lexbuf } +(* Special characters *) + | "\\char" ['0'-'9']+ + { let lxm = lexeme lexbuf in + let code = String.sub lxm 5 (String.length lxm - 5) in + print_char(char_of_int(int_of_string code)); + main lexbuf } + | "<" { print_string "<"; main lexbuf } + | ">" { print_string ">"; main lexbuf } + | "~" { print_string " "; main lexbuf } +(* Definitions of very simple macros *) + | "\\def\\" (['A'-'Z' 'a'-'z']+ | [^ 'A'-'Z' 'a'-'z']) "{" [^ '{' '}']* "}" + { let s = lexeme lexbuf in + let l = String.length s in + let p = rindex '{' s in + let name = String.sub s 4 (p - 4) in + let expansion = String.sub s (p + 1) (l - p - 2) in + def_macro name [Print expansion]; + main lexbuf } +(* General case for environments and commands *) + | ("\\begin{" | "\\end{") ['A'-'Z' 'a'-'z']+ "}" | + "\\" (['A'-'Z' 'a'-'z']+ '*'? | [^ 'A'-'Z' 'a'-'z']) + { let exec_action = function + Print str -> print_string str + | Print_arg -> print_arg lexbuf + | Skip_arg -> skip_arg lexbuf in + List.iter exec_action (find_macro(lexeme lexbuf)); + main lexbuf } +(* Default rule for other characters *) + | eof { () } + | _ { let c = lexeme_char lexbuf 0 in + if c == !delimiter then () else (print_char c; main lexbuf) } + +and indoublequote = parse + '"' { () } + | "<" { print_string "<"; indoublequote lexbuf } + | ">" { print_string ">"; indoublequote lexbuf } + | "&" { print_string "&"; indoublequote lexbuf } + | "\\\"" { print_string "\""; indoublequote lexbuf } + | "\\\\" { print_string "\\"; indoublequote lexbuf } + | _ { print_char(lexeme_char lexbuf 0); indoublequote lexbuf } + +and inverb = parse + "<" { print_string "<"; inverb lexbuf } + | ">" { print_string ">"; inverb lexbuf } + | "&" { print_string "&"; inverb lexbuf } + | _ { let c = lexeme_char lexbuf 0 in + if c == !verb_delim then () + else (print_char c; inverb lexbuf) } +and inverbatim = parse + "<" { print_string "<"; inverbatim lexbuf } + | ">" { print_string ">"; inverbatim lexbuf } + | "&" { print_string "&"; inverbatim lexbuf } + | "\\end{verbatim}" { () } + | _ { print_char(lexeme_char lexbuf 0); inverbatim lexbuf } + +and camlprog = parse + "<" { print_string "<"; camlprog lexbuf } + | ">" { print_string ">"; camlprog lexbuf } + | "&" { print_string "&"; camlprog lexbuf } + | "\\?" { if !first_caml_line then begin + print_string "# <FONT COLOR=\"blue\">"; + first_caml_line := false + end else + print_string " <FONT COLOR=\"blue\">"; + in_caml := true; + camlprog lexbuf } + | "\\:" { print_string "<FONT COLOR=\"green\">"; + in_caml := true; + camlprog lexbuf } + | "\\;" { first_caml_line := true; camlprog lexbuf } + | "\\\\" { print_string "\\"; camlprog lexbuf } + | "\\endcaml" { () } + | "\n" { if !in_caml then begin + print_string "</FONT>"; + in_caml := false + end; + print_char '\n'; + camlprog lexbuf } + | _ { print_char(lexeme_char lexbuf 0); camlprog lexbuf } + +and rawhtml = parse + "\\end{rawhtml}" { () } + | _ { print_char(lexeme_char lexbuf 0); rawhtml lexbuf } + +and latexonly = parse + "\\end{latexonly}" { () } + | _ { latexonly lexbuf } + +and print_arg = parse + [' ' '\n'] * "{" { upto '}' main lexbuf } + | _ { print_char(lexeme_char lexbuf 0); rawhtml lexbuf } + +and skip_arg = parse + "{" { incr brace_nesting; skip_arg lexbuf } + | "}" { decr brace_nesting; + if !brace_nesting > 0 then skip_arg lexbuf } + | _ { skip_arg lexbuf } + + diff --git a/manual/tools/texexpand b/manual/tools/texexpand new file mode 100755 index 0000000000..b2d8032f40 --- /dev/null +++ b/manual/tools/texexpand @@ -0,0 +1,40 @@ +#!/usr/local/bin/perl +# Expand \input commands + +@path = split(/:/, $ENV{'TEXINPUTS'}); + +while(<>) { + if (/^\\input\s*([^\s]*)/) { + do expand($1); + } else { + print $_; + } +} + +sub expand { + local ($filename) = @_; + local (*INPUT); + $filename =~ s/\.tex$//; + $filename = do find_in_path($filename); + open(INPUT, $filename) || (warn("cannot find $filename"), return); + print "%%% $filename\n"; + while(<INPUT>) { + if (/^\\input\s*([^\s]*)/) { + do expand($1); + } else { + print $_; + } + } + close(INPUT); +} + +sub find_in_path { + local ($name) = @_; + local ($dir); + foreach $dir (@path) { + return "$dir/$name.htex" if (-f "$dir/$name.htex"); + return "$dir/$name.tex" if (-f "$dir/$name.tex"); + } + return $name; +} + diff --git a/manual/tools/texquote2.c b/manual/tools/texquote2.c new file mode 100644 index 0000000000..e2522acf75 --- /dev/null +++ b/manual/tools/texquote2.c @@ -0,0 +1,167 @@ +#include <stdio.h> +#include <ctype.h> + +char * transl[256]; + +#define LINE_LENGTH 1024 + +char line[LINE_LENGTH]; + +int isprefix(s, pref) + char * s; + char * pref; +{ + while (1) { + if (*pref == 0) return 1; + if (*s == 0) return 0; + if (*s != *pref) return 0; + s++; + pref++; + } +} + +int main(argc, argv) + int argc; + char * argv []; +{ + unsigned char * p; + int c; + int inquote; + int inverb; + int inverbatim_like; + int incaml; + int inverbatim = 0; + char *verbatim_end_in = ""; + char *verbatim_end_out = ""; + + for (c = 0; c < 256; c++) transl[c] = NULL; +#ifdef TIE_BLANKS + transl[' '] = "~"; + transl['\n'] = "~"; +#else + transl[' '] = "\\ "; + transl['\n'] = "\\ "; +#endif + transl['{'] = "{\\char123}"; + transl['}'] = "{\\char125}"; + transl['^'] = "{\\char94}"; + transl['_'] = "{\\char95}"; + transl['\\'] = "{\\char92}"; + transl['~'] = "{\\char126}"; + transl['$'] = "\\$"; + transl['&'] = "{\\char38}"; + transl['#'] = "\\#"; + transl['%'] = "\\%"; + transl['\''] = "{\\textquotesingle}"; + transl['`'] = "{\\textasciigrave}"; + inverbatim_like = 0; + incaml = 0; + inquote = 0; + inverbatim = 0; + + puts ("% THIS FILE IS GENERATED.\n"); + + while(fgets(line, LINE_LENGTH, stdin) != NULL) { + if (inverbatim_like) { + fputs(line, stdout); + if (isprefix(line, "\\end{caml_") + || isprefix(line, "\\end{rawhtml}")) inverbatim_like = 0; + continue; + } + if (incaml) { + fputs(line, stdout); + if (isprefix(line, "\\endcaml")) incaml = 0; + continue; + } + if (inverbatim){ + if (isprefix (line, verbatim_end_in)){ + fputs (verbatim_end_out, stdout); + inverbatim = 0; + }else{ + for (p = (unsigned char *) line; *p != 0; p++){ + c = *p; + if (c == ' ' || c == '\n' || transl[c] == NULL){ + putchar (c); + }else{ + fputs (transl[c], stdout); + } + } + } + continue; + } + if (isprefix(line, "\\begin{caml_") + || isprefix(line, "\\begin{rawhtml}")) { + fputs(line, stdout); + inverbatim_like = 1; + continue; + } + if (isprefix(line, "\\caml")) { + fputs(line, stdout); + incaml = 1; + continue; + } + if (isprefix (line, "\\begin{verbatim}")){ + fputs ("\\begin{machineenv}", stdout); + inverbatim = 1; + verbatim_end_in = "\\end{verbatim}"; + verbatim_end_out = "\\end{machineenv}"; + continue; + } + if (isprefix (line, "\\begin{ocamldoccode}")){ + fputs ("\\begin{ocamldoccode}", stdout); + inverbatim = 1; + verbatim_end_in = "\\end{ocamldoccode}"; + verbatim_end_out = "\\end{ocamldoccode}"; + continue; + } + inverb = 0; + for (p = (unsigned char *) line; *p != 0; p++) { + c = *p; + if (inverb) { + if (c == inverb){ + inverb = 0; + }else if (c == '\'' || c == '`'){ + fprintf (stderr, "Warning: %c found in \\verb\n", c); + } + putchar(c); + continue; + } + switch(c) { + case '"': + if (inquote) { + fputs("}}", stdout); + inquote = 0; + } else { + fputs("{\\machine{", stdout); + inquote = 1; + } + break; + case '\\': + if (inquote) { + if (p[1] == '"' || p[1] == '\\') { + c = p[1]; + p++; + } + if (transl[c] != NULL) + fputs(transl[c], stdout); + else + putchar(c); + } else if (isprefix(p, "\\verb") && p[5] != 0 && !isalpha(p[5])) { + inverb = p[5]; + p = p + 5; + fputs("\\verb", stdout); + putchar(inverb); + } else { + putchar('\\'); + } + break; + default: + if (inquote && transl[c] != NULL) + fputs(transl[c], stdout); + else + putchar(c); + } + } + } + return 0; +} diff --git a/manual/tools/transf.mll b/manual/tools/transf.mll new file mode 100644 index 0000000000..7d17b11b4d --- /dev/null +++ b/manual/tools/transf.mll @@ -0,0 +1,107 @@ +{ + open Lexing;; + open Printf;; + + let print_char_repr c = + match c with + | '\'' -> printf "{\\textquotesingle}" + | '`' -> printf "{\\textasciigrave}" + | _ -> printf "\\char%d" (int_of_char c); + ;; +} + +rule main = parse + "\\begin{syntax}" { + print_string "\\begin{syntax}"; + syntax lexbuf } + | "\\begin{verbatim}" { + print_string "\\begin{verbatim}"; + verbatim lexbuf } + | "\\@" { + print_string "@"; + main lexbuf } + | "@" { + print_string "\\synt{"; + syntax lexbuf } + | _ { + print_char (lexeme_char lexbuf 0); main lexbuf } + | eof { + () } + +and syntax = parse + "\\end{syntax}" { + print_string "\\end{syntax}"; + main lexbuf } + | "@" { + print_string "}"; + main lexbuf } + | '\'' { + print_string "\\token{"; + inquote lexbuf } + | '\"' { + print_string "\\token{"; + indoublequote lexbuf } + | "epsilon" { print_string "\\emptystring"; syntax lexbuf } + | ['a'-'z' 'A'-'Z'] ['a'-'z' 'A'-'Z' '0'-'9' '-'] * as lxm { + print_string "\\nonterm{"; + print_string lxm ; + print_string"}"; + syntax lexbuf } + | '@' (['a'-'z' 'A'-'Z'] ['a'-'z' 'A'-'Z' '0'-'9' '-'] * as lxm) '@' { + print_string "\\nt{"; + print_string lxm ; + print_string"}"; + syntax lexbuf } + + | '\\' ['a'-'z''A'-'Z'] + { + print_string (lexeme lexbuf); + syntax lexbuf } + | ['_' '^'] _ { + print_string (lexeme lexbuf); + syntax lexbuf } + | "{" { print_string "\\brepet{}"; syntax lexbuf } + | "}" { print_string "\\erepet{}"; syntax lexbuf } + | "{{" { print_string "\\brepets{}"; syntax lexbuf } + | "}}" { print_string "\\erepets{}"; syntax lexbuf } + | "[" { print_string "\\boption{}"; syntax lexbuf } + | "]" { print_string "\\eoption{}"; syntax lexbuf } + | "(" { print_string "\\bparen{}"; syntax lexbuf } + | ")" { print_string "\\eparen{}"; syntax lexbuf } + | "||" { print_string "\\orelse{}"; syntax lexbuf } + | ":" { print_string "\\is{}"; syntax lexbuf } + | "|" { print_string "\\alt{}"; syntax lexbuf } + | ";" { print_string "\\sep{}"; syntax lexbuf } + | "\\\\" { print_string "\\cutline{}"; syntax lexbuf } + | _ { + print_char (lexeme_char lexbuf 0); + syntax lexbuf } + +and inquote = parse + ['A'-'Z' 'a'-'z' '0'-'9'] { + print_char (lexeme_char lexbuf 0); + inquote lexbuf } + | '\'' { + print_string "}"; + syntax lexbuf } + | _ { + print_char_repr (lexeme_char lexbuf 0); + inquote lexbuf } + +and indoublequote = parse + ['A'-'Z' 'a'-'z' '0'-'9'] { + print_char (lexeme_char lexbuf 0); + indoublequote lexbuf } + | '"' { + print_string "}"; + syntax lexbuf } + | _ { + print_char_repr (lexeme_char lexbuf 0); + indoublequote lexbuf } + +and verbatim = parse + "\n\\end{verbatim}" { + print_string "\n\\end{verbatim}"; + main lexbuf } + | _ { + print_char (lexeme_char lexbuf 0); + verbatim lexbuf } diff --git a/manual/tools/transfmain.ml b/manual/tools/transfmain.ml new file mode 100644 index 0000000000..49d9840de4 --- /dev/null +++ b/manual/tools/transfmain.ml @@ -0,0 +1,8 @@ +let main() = + let lexbuf = Lexing.from_channel stdin in + if Array.length Sys.argv >= 2 && Sys.argv.(1) = "-html" + then Htmltransf.main lexbuf + else Transf.main lexbuf; + exit 0;; + +Printexc.print main ();; diff --git a/ocamlbuild/ocaml_specific.ml b/ocamlbuild/ocaml_specific.ml index b902810ad7..a73b7a5c92 100644 --- a/ocamlbuild/ocaml_specific.ml +++ b/ocamlbuild/ocaml_specific.ml @@ -698,15 +698,25 @@ ocaml_lib ~extern:true ~tag_name:"use_toplevel" "toplevellib";; ocaml_lib ~extern:true ~dir:"+ocamldoc" "ocamldoc";; ocaml_lib ~extern:true ~dir:"+ocamlbuild" ~tag_name:"use_ocamlbuild" "ocamlbuildlib";; -ocaml_lib ~extern:true ~dir:"+camlp4" ~tag_name:"use_camlp4" "camlp4lib";; -ocaml_lib ~extern:true ~dir:"+camlp4" ~tag_name:"use_old_camlp4" "camlp4";; -ocaml_lib ~extern:true ~dir:"+camlp4" ~tag_name:"use_camlp4_full" "camlp4fulllib";; +let camlp4dir = + Findlib.( + try + if sys_command "sh -c 'ocamlfind list >/dev/null' 2>/dev/null" != 0 + then raise (Findlib_error Cannot_run_ocamlfind); + (query "camlp4").location + with Findlib_error _ -> + "+camlp4" + );; + +ocaml_lib ~extern:true ~dir:camlp4dir ~tag_name:"use_camlp4" "camlp4lib";; +ocaml_lib ~extern:true ~dir:camlp4dir ~tag_name:"use_old_camlp4" "camlp4";; +ocaml_lib ~extern:true ~dir:camlp4dir ~tag_name:"use_camlp4_full" "camlp4fulllib";; flag ["ocaml"; "compile"; "use_camlp4_full"] - (S[A"-I"; A"+camlp4/Camlp4Parsers"; - A"-I"; A"+camlp4/Camlp4Printers"; - A"-I"; A"+camlp4/Camlp4Filters"]);; -flag ["ocaml"; "use_camlp4_bin"; "link"; "byte"] (A"+camlp4/Camlp4Bin.cmo");; -flag ["ocaml"; "use_camlp4_bin"; "link"; "native"] (A"+camlp4/Camlp4Bin.cmx");; + (S[A"-I"; A(camlp4dir^"/Camlp4Parsers"); + A"-I"; A(camlp4dir^"/Camlp4Printers"); + A"-I"; A(camlp4dir^"/Camlp4Filters")]);; +flag ["ocaml"; "use_camlp4_bin"; "link"; "byte"] (A(camlp4dir^"/Camlp4Bin.cmo"));; +flag ["ocaml"; "use_camlp4_bin"; "link"; "native"] (A(camlp4dir^"/Camlp4Bin.cmx"));; flag ["ocaml"; "debug"; "compile"; "byte"] (A "-g");; flag ["ocaml"; "debug"; "link"; "byte"; "program"] (A "-g");; diff --git a/stdlib/obj.ml b/stdlib/obj.ml index 5cb970b8e4..76a4e67b54 100644 --- a/stdlib/obj.ml +++ b/stdlib/obj.ml @@ -60,7 +60,7 @@ let int_tag = 1000 let out_of_heap_tag = 1001 let unaligned_tag = 1002 -let extension_slot x = +let extension_constructor x = let x = repr x in let slot = if (is_block x) && (tag x) <> object_tag && (size x) >= 1 then field x 0 @@ -68,24 +68,13 @@ let extension_slot x = in let name = if (is_block slot) && (tag slot) = object_tag then field slot 0 - else raise Not_found + else invalid_arg "Obj.extension_constructor" in - if (tag name) = string_tag then slot - else raise Not_found + if (tag name) = string_tag then (obj slot : extension_constructor) + else invalid_arg "Obj.extension_constructor" -let extension_name x = - try - let slot = extension_slot x in - (obj (field slot 0) : string) - with Not_found -> invalid_arg "Obj.extension_name" +let extension_name (slot : extension_constructor) = + (obj (field (repr slot) 0) : string) -let extension_id x = - try - let slot = extension_slot x in - (obj (field slot 1) : int) - with Not_found -> invalid_arg "Obj.extension_id" - -let extension_slot x = - try - extension_slot x - with Not_found -> invalid_arg "Obj.extension_slot" +let extension_id (slot : extension_constructor) = + (obj (field (repr slot) 1) : int) diff --git a/stdlib/obj.mli b/stdlib/obj.mli index 692ec3cfdc..5bc727940a 100644 --- a/stdlib/obj.mli +++ b/stdlib/obj.mli @@ -61,9 +61,9 @@ val int_tag : int val out_of_heap_tag : int val unaligned_tag : int (* should never happen @since 3.11.0 *) -val extension_name : 'a -> string -val extension_id : 'a -> int -val extension_slot : 'a -> t +val extension_constructor : 'a -> extension_constructor +val extension_name : extension_constructor -> string +val extension_id : extension_constructor -> int (** The following two functions are deprecated. Use module {!Marshal} instead. *) diff --git a/stdlib/scanf.ml b/stdlib/scanf.ml index 9f00387669..b81142efd4 100644 --- a/stdlib/scanf.ml +++ b/stdlib/scanf.ml @@ -804,31 +804,6 @@ let scan_float width precision ib = scan_exp_part width ib, precision ;; -let scan_caml_float width precision ib = - let width = scan_optionally_signed_decimal_int width ib in - if width = 0 then bad_float () else - let c = Scanning.peek_char ib in - if Scanning.eof ib then bad_float () else - match c with - | '.' -> - let width = Scanning.store_char width ib c in - (* The effective width available for scanning the fractional part is - the minimum of declared precision and width left. *) - let precision = min width precision in - (* After scanning the fractional part with [precision] provisional width, - [width_precision] is left. *) - let width_precision = scan_frac_part precision ib in - (* Hence, scanning the fractional part took exactly - [precision - width_precision] chars. *) - let frac_width = precision - width_precision in - (* And new provisional width is [width - width_precision. *) - let width = width - frac_width in - scan_exp_part width ib - | 'e' | 'E' -> - scan_exp_part width ib - | _ -> bad_float () -;; - let check_case_insensitive_string width ib error str = let lowercase c = match c with | 'A' .. 'Z' -> char_of_int (int_of_char c - int_of_char 'A' + int_of_char 'a') @@ -842,6 +817,7 @@ let check_case_insensitive_string width ib error str = width := Scanning.store_char !width ib c; done; !width +;; let scan_hex_float width precision ib = if width = 0 || Scanning.end_of_input ib then bad_hex_float (); @@ -872,7 +848,7 @@ let scan_hex_float width precision ib = match Scanning.peek_char ib with | 'p' | 'P' as c -> let width = Scanning.store_char width ib c in - if width = 0 then bad_hex_float (); + if width = 0 || Scanning.end_of_input ib then bad_hex_float (); scan_optionally_signed_decimal_int width ib | _ -> width ) @@ -885,6 +861,75 @@ let scan_hex_float width precision ib = if width = 0 || Scanning.end_of_input ib then bad_hex_float (); check_case_insensitive_string width ib bad_hex_float "nfinity" | _ -> bad_hex_float () +;; + +let scan_caml_float_rest width precision ib = + if width = 0 || Scanning.end_of_input ib then bad_float (); + let width = scan_decimal_digits width ib in + if width = 0 || Scanning.end_of_input ib then bad_float (); + match Scanning.peek_char ib with + | '.' as c -> + let width = Scanning.store_char width ib c in + (* The effective width available for scanning the fractional part is + the minimum of declared precision and width left. *) + let precision = min width precision in + (* After scanning the fractional part with [precision] provisional width, + [width_precision] is left. *) + let width_precision = scan_frac_part precision ib in + (* Hence, scanning the fractional part took exactly + [precision - width_precision] chars. *) + let frac_width = precision - width_precision in + (* And new provisional width is [width - width_precision. *) + let width = width - frac_width in + scan_exp_part width ib + | 'e' | 'E' -> + scan_exp_part width ib + | _ -> bad_float () +;; + +let scan_caml_float width precision ib = + if width = 0 || Scanning.end_of_input ib then bad_float (); + let width = scan_sign width ib in + if width = 0 || Scanning.end_of_input ib then bad_float (); + match Scanning.peek_char ib with + | '0' as c -> ( + let width = Scanning.store_char width ib c in + if width = 0 || Scanning.end_of_input ib then bad_float (); + match Scanning.peek_char ib with + | 'x' | 'X' as c -> ( + let width = Scanning.store_char width ib c in + if width = 0 || Scanning.end_of_input ib then bad_float (); + let width = scan_hexadecimal_int width ib in + if width = 0 || Scanning.end_of_input ib then bad_float (); + let width = match Scanning.peek_char ib with + | '.' as c -> ( + let width = Scanning.store_char width ib c in + if width = 0 || Scanning.end_of_input ib then width else + match Scanning.peek_char ib with + | 'p' | 'P' -> width + | _ -> + let precision = min width precision in + width - (precision - scan_hexadecimal_int precision ib) + ) + | 'p' | 'P' -> width + | _ -> bad_float () in + if width = 0 || Scanning.end_of_input ib then width else + match Scanning.peek_char ib with + | 'p' | 'P' as c -> + let width = Scanning.store_char width ib c in + if width = 0 || Scanning.end_of_input ib then bad_hex_float (); + scan_optionally_signed_decimal_int width ib + | _ -> width + ) + | _ -> + scan_caml_float_rest width precision ib + ) + | '1' .. '9' as c -> + let width = Scanning.store_char width ib c in + if width = 0 || Scanning.end_of_input ib then bad_float (); + scan_caml_float_rest width precision ib + | _ -> bad_float () +;; (* Scan a regular string: stops when encountering a space, if no scanning indication has been given; diff --git a/testsuite/tests/extension-constructor/Makefile b/testsuite/tests/extension-constructor/Makefile new file mode 100644 index 0000000000..299656b246 --- /dev/null +++ b/testsuite/tests/extension-constructor/Makefile @@ -0,0 +1,15 @@ +######################################################################### +# # +# OCaml # +# # +# Xavier Clerc, SED, INRIA Rocquencourt # +# # +# Copyright 2010 Institut National de Recherche en Informatique et # +# en Automatique. All rights reserved. This file is distributed # +# under the terms of the Q Public License version 1.0. # +# # +######################################################################### + +BASEDIR=../.. +include $(BASEDIR)/makefiles/Makefile.several +include $(BASEDIR)/makefiles/Makefile.common diff --git a/testsuite/tests/extension-constructor/test.ml b/testsuite/tests/extension-constructor/test.ml new file mode 100644 index 0000000000..781e72dd54 --- /dev/null +++ b/testsuite/tests/extension-constructor/test.ml @@ -0,0 +1,29 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Jeremie Dimino, Jane Street Europe *) +(* *) +(* Copyright 2015 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +type t = .. + +module M = struct + type t += A + type t += B of int +end + +type t += C +type t += D of int * string + +let () = + assert (Obj.extension_constructor M.A == [%extension_constructor M.A]); + assert (Obj.extension_constructor (M.B 42) == [%extension_constructor M.B]); + assert (Obj.extension_constructor C == [%extension_constructor C ]); + assert (Obj.extension_constructor (D (42, "")) == [%extension_constructor D ]) + +let () = print_endline "OK" diff --git a/testsuite/tests/extension-constructor/test.reference b/testsuite/tests/extension-constructor/test.reference new file mode 100644 index 0000000000..d86bac9de5 --- /dev/null +++ b/testsuite/tests/extension-constructor/test.reference @@ -0,0 +1 @@ +OK diff --git a/testsuite/tests/lib-format/tformat.ml b/testsuite/tests/lib-format/tformat.ml index 13c6094bd1..ad0f16cc73 100644 --- a/testsuite/tests/lib-format/tformat.ml +++ b/testsuite/tests/lib-format/tformat.ml @@ -252,6 +252,36 @@ try test (sprintf "%.3F" 0.0042 = "0.004"); *) + say "\nh\n%!"; + test (sprintf "%+h" (+0.) = "+0x0p+0"); + test (sprintf "%+h" (-0.) = "-0x0p+0"); + test (sprintf "%+h" (+1.) = "+0x1p+0"); + test (sprintf "%+h" (-1.) = "-0x1p+0"); + test (sprintf "%+h" (+1024.) = "+0x1p+10"); + test (sprintf "%+h" (-1024.) = "-0x1p+10"); + test (sprintf "%h" 0x123.456 = "0x1.23456p+8"); + test (sprintf "%h" 0x123456789ABCDE. = "0x1.23456789abcdep+52"); + test (sprintf "%h" epsilon_float = "0x1p-52"); + test (sprintf "%h" nan = "nan"); + test (sprintf "%h" infinity = "infinity"); + test (sprintf "%h" neg_infinity = "-infinity"); + test (sprintf "%h" (4. *. atan 1.) = "0x1.921fb54442d18p+1"); + + say "\nH\n%!"; + test (sprintf "%+H" (+0.) = "+0X0P+0"); + test (sprintf "%+H" (-0.) = "-0X0P+0"); + test (sprintf "%+H" (+1.) = "+0X1P+0"); + test (sprintf "%+H" (-1.) = "-0X1P+0"); + test (sprintf "%+H" (+1024.) = "+0X1P+10"); + test (sprintf "%+H" (-1024.) = "-0X1P+10"); + test (sprintf "%H" 0X123.456 = "0X1.23456P+8"); + test (sprintf "%H" 0X123456789ABCDE. = "0X1.23456789ABCDEP+52"); + test (sprintf "%H" epsilon_float = "0X1P-52"); + test (sprintf "%H" nan = "NAN"); + test (sprintf "%H" infinity = "INFINITY"); + test (sprintf "%H" neg_infinity = "-INFINITY"); + test (sprintf "%H" (4. *. atan 1.) = "0X1.921FB54442D18P+1"); + say "\ne\n%!"; test (sprintf "%e" (-42.42) =* "-4.242000e+01"); test (sprintf "%-15e" (-42.42) =* "-4.242000e+01 "); diff --git a/testsuite/tests/lib-format/tformat.reference b/testsuite/tests/lib-format/tformat.reference index cf2b241ce7..461fe9dada 100644 --- a/testsuite/tests/lib-format/tformat.reference +++ b/testsuite/tests/lib-format/tformat.reference @@ -30,62 +30,66 @@ f 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 F 119 120 121 122 123 124 125 +h + 126 127 128 129 130 131 132 133 134 135 136 137 138 +H + 139 140 141 142 143 144 145 146 147 148 149 150 151 e - 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 + 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 E - 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 + 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 B - 162 163 + 188 189 ld/li positive - 164 165 166 167 168 169 170 171 172 + 190 191 192 193 194 195 196 197 198 ld/li negative - 173 174 175 176 177 178 179 180 181 + 199 200 201 202 203 204 205 206 207 lu positive - 182 183 184 185 186 187 188 189 190 + 208 209 210 211 212 213 214 215 216 lu negative - 191 + 217 lx positive - 192 193 194 195 196 197 198 199 200 + 218 219 220 221 222 223 224 225 226 lx negative - 201 + 227 lX positive - 202 203 204 205 206 207 208 209 210 + 228 229 230 231 232 233 234 235 236 lx negative - 211 + 237 lo positive - 212 213 214 215 216 217 218 219 220 + 238 239 240 241 242 243 244 245 246 lo negative - 221 + 247 Ld/Li positive - 222 223 224 225 226 227 228 229 230 + 248 249 250 251 252 253 254 255 256 Ld/Li negative - 231 232 233 234 235 236 237 238 239 + 257 258 259 260 261 262 263 264 265 Lu positive - 240 241 242 243 244 245 246 247 248 + 266 267 268 269 270 271 272 273 274 Lu negative - 249 + 275 Lx positive - 250 251 252 253 254 255 256 257 258 + 276 277 278 279 280 281 282 283 284 Lx negative - 259 + 285 LX positive - 260 261 262 263 264 265 266 267 268 + 286 287 288 289 290 291 292 293 294 Lx negative - 269 + 295 Lo positive - 270 271 272 273 274 275 276 277 278 + 296 297 298 299 300 301 302 303 304 Lo negative - 279 + 305 a - 280 + 306 t - 281 + 307 {...%} - 282 + 308 (...%) - 283 + 309 ! % @ , and constants - 284 285 286 287 288 289 290 + 310 311 312 313 314 315 316 end of tests All tests succeeded. diff --git a/testsuite/tests/lib-printf/tprintf.ml b/testsuite/tests/lib-printf/tprintf.ml index cb4ee657b8..1535d0e37a 100644 --- a/testsuite/tests/lib-printf/tprintf.ml +++ b/testsuite/tests/lib-printf/tprintf.ml @@ -286,6 +286,36 @@ try test (sprintf "%.3F" 0.0042 = "0.004"); *) + printf "\nh\n%!"; + test (sprintf "%+h" (+0.) = "+0x0p+0"); + test (sprintf "%+h" (-0.) = "-0x0p+0"); + test (sprintf "%+h" (+1.) = "+0x1p+0"); + test (sprintf "%+h" (-1.) = "-0x1p+0"); + test (sprintf "%+h" (+1024.) = "+0x1p+10"); + test (sprintf "%+h" (-1024.) = "-0x1p+10"); + test (sprintf "%h" 0x123.456 = "0x1.23456p+8"); + test (sprintf "%h" 0x123456789ABCDE. = "0x1.23456789abcdep+52"); + test (sprintf "%h" epsilon_float = "0x1p-52"); + test (sprintf "%h" nan = "nan"); + test (sprintf "%h" infinity = "infinity"); + test (sprintf "%h" neg_infinity = "-infinity"); + test (sprintf "%h" (4. *. atan 1.) = "0x1.921fb54442d18p+1"); + + printf "\nH\n%!"; + test (sprintf "%+H" (+0.) = "+0X0P+0"); + test (sprintf "%+H" (-0.) = "-0X0P+0"); + test (sprintf "%+H" (+1.) = "+0X1P+0"); + test (sprintf "%+H" (-1.) = "-0X1P+0"); + test (sprintf "%+H" (+1024.) = "+0X1P+10"); + test (sprintf "%+H" (-1024.) = "-0X1P+10"); + test (sprintf "%H" 0X123.456 = "0X1.23456P+8"); + test (sprintf "%H" 0X123456789ABCDE. = "0X1.23456789ABCDEP+52"); + test (sprintf "%H" epsilon_float = "0X1P-52"); + test (sprintf "%H" nan = "NAN"); + test (sprintf "%H" infinity = "INFINITY"); + test (sprintf "%H" neg_infinity = "-INFINITY"); + test (sprintf "%H" (4. *. atan 1.) = "0X1.921FB54442D18P+1"); + printf "\ne\n%!"; test (sprintf "%e" (-42.42) =* "-4.242000e+01"); test (sprintf "%-15e" (-42.42) =* "-4.242000e+01 "); diff --git a/testsuite/tests/lib-printf/tprintf.reference b/testsuite/tests/lib-printf/tprintf.reference index 3a6c3f0dbc..de3dc1dbdd 100644 --- a/testsuite/tests/lib-printf/tprintf.reference +++ b/testsuite/tests/lib-printf/tprintf.reference @@ -30,62 +30,66 @@ f 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 F 85 86 87 88 89 90 91 +h + 92 93 94 95 96 97 98 99 100 101 102 103 104 +H + 105 106 107 108 109 110 111 112 113 114 115 116 117 e - 92 93 94 95 96 97 98 99 100 101 102 103 104 105 + 118 119 120 121 122 123 124 125 126 127 128 129 130 131 E - 106 107 108 109 110 111 112 113 114 115 116 117 118 119 + 132 133 134 135 136 137 138 139 140 141 142 143 144 145 B - 120 121 + 146 147 ld/li positive - 122 123 124 125 126 127 128 + 148 149 150 151 152 153 154 ld/li negative - 129 130 131 132 133 134 135 + 155 156 157 158 159 160 161 lu positive - 136 137 138 139 140 + 162 163 164 165 166 lu negative - 141 + 167 lx positive - 142 143 144 145 146 147 + 168 169 170 171 172 173 lx negative - 148 + 174 lX positive - 149 150 151 152 153 154 + 175 176 177 178 179 180 lx negative - 155 + 181 lo positive - 156 157 158 159 160 161 + 182 183 184 185 186 187 lo negative - 162 + 188 Ld/Li positive - 163 164 165 166 167 + 189 190 191 192 193 Ld/Li negative - 168 169 170 171 172 + 194 195 196 197 198 Lu positive - 173 174 175 176 177 + 199 200 201 202 203 Lu negative - 178 + 204 Lx positive - 179 180 181 182 183 184 + 205 206 207 208 209 210 Lx negative - 185 + 211 LX positive - 186 187 188 189 190 191 + 212 213 214 215 216 217 Lx negative - 192 + 218 Lo positive - 193 194 195 196 197 198 + 219 220 221 222 223 224 Lo negative - 199 + 225 a - 200 + 226 t - 201 + 227 {...%} - 202 + 228 (...%) - 203 + 229 ! % @ , and constants - 204 205 206 207 208 209 210 + 230 231 232 233 234 235 236 end of tests All tests succeeded. diff --git a/testsuite/tests/lib-scanf/tscanf.ml b/testsuite/tests/lib-scanf/tscanf.ml index a93637ec04..c19bb367a7 100644 --- a/testsuite/tests/lib-scanf/tscanf.ml +++ b/testsuite/tests/lib-scanf/tscanf.ml @@ -104,6 +104,12 @@ let test4 () = test (test4 ()) ;; +let same_float x y = + let is_nan z = (z <> z) in + if is_nan x then is_nan y + else Int64.bits_of_float y = Int64.bits_of_float x +;; + (* e style. *) let test5 () = bscanf (Scanning.from_string "1e1") @@ -122,6 +128,54 @@ let test5 () = "%g %g %g %g" (fun b1 b2 b3 b4 -> b1 = 1.0 && b2 = 1.1 && b3 = 0.0 && b4 = 0.13) + && +(* F style *) + bscanf (Scanning.from_string "1.5 1.5e0 15e-1 0x1.8 0X1.8") + "%F %F %f %F %F" + (fun b1 b2 b3 b4 b5 -> b1 = b2 && b2 = b3 && b3 = b4 && b4 = b5) + && +(* h style *) + begin + let roundtrip x = + bscanf (Printf.ksprintf Scanning.from_string "%h" x) "%h" (same_float x) + in + roundtrip (+0.) && + roundtrip (-0.) && + roundtrip (+1.) && + roundtrip (-1.) && + roundtrip (+1024.) && + roundtrip (-1024.) && + roundtrip 0X123.456 && + roundtrip 0X123456789ABCDE. && + roundtrip epsilon_float && + roundtrip nan && + roundtrip infinity && + roundtrip neg_infinity && + roundtrip (4. *. atan 1.) && + true + end + && + + (* H style *) + begin + let roundtrip x = + bscanf (Printf.ksprintf Scanning.from_string "%H" x) "%H" (same_float x) + in + roundtrip (+0.) && + roundtrip (-0.) && + roundtrip (+1.) && + roundtrip (-1.) && + roundtrip (+1024.) && + roundtrip (-1024.) && + roundtrip 0X123.456 && + roundtrip 0X123456789ABCDE. && + roundtrip epsilon_float && + roundtrip nan && + roundtrip infinity && + roundtrip neg_infinity && + roundtrip (4. *. atan 1.) && + true + end ;; test (test5 ()) diff --git a/testsuite/tests/typing-extension-constructor/Makefile b/testsuite/tests/typing-extension-constructor/Makefile new file mode 100644 index 0000000000..c9433b2ecb --- /dev/null +++ b/testsuite/tests/typing-extension-constructor/Makefile @@ -0,0 +1,15 @@ +######################################################################### +# # +# OCaml # +# # +# Xavier Clerc, SED, INRIA Rocquencourt # +# # +# Copyright 2010 Institut National de Recherche en Informatique et # +# en Automatique. All rights reserved. This file is distributed # +# under the terms of the Q Public License version 1.0. # +# # +######################################################################### + +BASEDIR=../.. +include $(BASEDIR)/makefiles/Makefile.toplevel +include $(BASEDIR)/makefiles/Makefile.common diff --git a/testsuite/tests/typing-extension-constructor/test.ml b/testsuite/tests/typing-extension-constructor/test.ml new file mode 100644 index 0000000000..f40f275269 --- /dev/null +++ b/testsuite/tests/typing-extension-constructor/test.ml @@ -0,0 +1,21 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Jeremie Dimino, Jane Street Europe *) +(* *) +(* Copyright 2015 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +type t = ..;; +type t += A;; + +[%extension_constructor A];; +([%extension_constructor A] : extension_constructor);; + +type extension_constructor = int;; + +([%extension_constructor A] : extension_constructor);; diff --git a/testsuite/tests/typing-extension-constructor/test.ml.reference b/testsuite/tests/typing-extension-constructor/test.ml.reference new file mode 100644 index 0000000000..ae63c27823 --- /dev/null +++ b/testsuite/tests/typing-extension-constructor/test.ml.reference @@ -0,0 +1,13 @@ + +# type t = .. +# type t += A +# - : extension_constructor = <abstr> +# - : extension_constructor = <abstr> +# type extension_constructor = int +# Characters 2-28: + ([%extension_constructor A] : extension_constructor);; + ^^^^^^^^^^^^^^^^^^^^^^^^^^ +Error: This expression has type extension_constructor/16 + but an expression was expected of type + extension_constructor/1214 = int +# diff --git a/testsuite/tests/typing-extensions/extensions.ml b/testsuite/tests/typing-extensions/extensions.ml index 3539be332f..edb14c8b22 100644 --- a/testsuite/tests/typing-extensions/extensions.ml +++ b/testsuite/tests/typing-extensions/extensions.ml @@ -296,19 +296,22 @@ type foo += | Bar of int ;; -let n1 = Obj.extension_name Foo +let extension_name e = Obj.extension_name (Obj.extension_constructor e);; +let extension_id e = Obj.extension_id (Obj.extension_constructor e);; + +let n1 = extension_name Foo ;; -let n2 = Obj.extension_name (Bar 1) +let n2 = extension_name (Bar 1) ;; -let t = (Obj.extension_id (Bar 2)) = (Obj.extension_id (Bar 3)) (* true *) +let t = (extension_id (Bar 2)) = (extension_id (Bar 3)) (* true *) ;; -let f = (Obj.extension_id (Bar 2)) = (Obj.extension_id Foo) (* false *) +let f = (extension_id (Bar 2)) = (extension_id Foo) (* false *) ;; -let is_foo x = (Obj.extension_id Foo) = (Obj.extension_id x) +let is_foo x = (extension_id Foo) = (extension_id x) type foo += Foo ;; @@ -316,8 +319,8 @@ type foo += Foo let f = is_foo Foo ;; -let _ = Obj.extension_name 7 (* Invald_arg *) +let _ = Obj.extension_constructor 7 (* Invald_arg *) ;; -let _ = Obj.extension_id (object method m = 3 end) (* Invald_arg *) +let _ = Obj.extension_constructor (object method m = 3 end) (* Invald_arg *) ;; diff --git a/testsuite/tests/typing-extensions/extensions.ml.reference b/testsuite/tests/typing-extensions/extensions.ml.reference index 2f827076ec..2a9183f77e 100644 --- a/testsuite/tests/typing-extensions/extensions.ml.reference +++ b/testsuite/tests/typing-extensions/extensions.ml.reference @@ -119,6 +119,8 @@ Error: This extension does not match the definition of type bar # val y : exn * exn = (Foo (3, _), Bar (Some 5)) # type foo = .. # type foo += Foo | Bar of int +# val extension_name : 'a -> string = <fun> +# val extension_id : 'a -> int = <fun> # val n1 : string = "Foo" # val n2 : string = "Bar" # val t : bool = true @@ -126,6 +128,6 @@ Error: This extension does not match the definition of type bar # val is_foo : 'a -> bool = <fun> type foo += Foo # val f : bool = false -# Exception: Invalid_argument "Obj.extension_name". -# Exception: Invalid_argument "Obj.extension_id". +# Exception: Invalid_argument "Obj.extension_constructor". +# Exception: Invalid_argument "Obj.extension_constructor". # diff --git a/testsuite/tests/typing-gadts/pr6980.ml b/testsuite/tests/typing-gadts/pr6980.ml new file mode 100644 index 0000000000..d42002ad33 --- /dev/null +++ b/testsuite/tests/typing-gadts/pr6980.ml @@ -0,0 +1,11 @@ +type 'a t = [< `Foo | `Bar] as 'a;; +type 'a s = [< `Foo | `Bar | `Baz > `Bar] as 'a;; + +type 'a first = First : 'a second -> ('b t as 'a) first +and 'a second = Second : ('b s as 'a) second;; + +type aux = Aux : 'a t second * ('a -> int) -> aux;; + +let it : 'a. [< `Bar | `Foo > `Bar ] as 'a = `Bar;; + +let g (Aux(Second, f)) = f it;; diff --git a/testsuite/tests/typing-gadts/pr6980.ml.reference b/testsuite/tests/typing-gadts/pr6980.ml.reference new file mode 100644 index 0000000000..5fd89921c2 --- /dev/null +++ b/testsuite/tests/typing-gadts/pr6980.ml.reference @@ -0,0 +1,14 @@ + +# type 'a t = 'a constraint 'a = [< `Bar | `Foo ] +# type 'a s = 'a constraint 'a = [< `Bar | `Baz | `Foo > `Bar ] +# type 'a first = First : 'b t second -> ([< `Bar | `Foo ] as 'b) t first +and 'a second = Second : [< `Bar | `Baz | `Foo > `Bar ] s second +# type aux = Aux : ([< `Bar | `Foo ] as 'a) t second * ('a -> int) -> aux +# val it : [< `Bar | `Foo > `Bar ] = `Bar +# Characters 28-30: + let g (Aux(Second, f)) = f it;; + ^^ +Error: This expression has type [< `Bar | `Foo > `Bar ] + but an expression was expected of type [< `Bar | `Foo ] + Types for tag `Bar are incompatible +# diff --git a/testsuite/tests/typing-misc-bugs/pr6946_bad.ml b/testsuite/tests/typing-misc-bugs/pr6946_bad.ml new file mode 100644 index 0000000000..bbaefe9054 --- /dev/null +++ b/testsuite/tests/typing-misc-bugs/pr6946_bad.ml @@ -0,0 +1,2 @@ +external foo : int = "%ignore";; +let _ = foo ();; diff --git a/testsuite/tests/typing-modules-bugs/pr6981_ok.ml b/testsuite/tests/typing-modules-bugs/pr6981_ok.ml new file mode 100644 index 0000000000..e2b285b9d8 --- /dev/null +++ b/testsuite/tests/typing-modules-bugs/pr6981_ok.ml @@ -0,0 +1,10 @@ +module type S = +sig + type a + type b +end +module Foo + (Bar : S with type a = private [> `A]) + (Baz : S with type b = private < b : Bar.b ; .. >) = +struct +end diff --git a/testsuite/tests/typing-warnings/application.ml b/testsuite/tests/typing-warnings/application.ml new file mode 100644 index 0000000000..a0c420616f --- /dev/null +++ b/testsuite/tests/typing-warnings/application.ml @@ -0,0 +1,2 @@ +let _ = ignore (+);; +let _ = raise Exit 3;; diff --git a/testsuite/tests/typing-warnings/application.ml.reference b/testsuite/tests/typing-warnings/application.ml.reference new file mode 100644 index 0000000000..da825fd089 --- /dev/null +++ b/testsuite/tests/typing-warnings/application.ml.reference @@ -0,0 +1,13 @@ + +# Characters 15-18: + let _ = ignore (+);; + ^^^ +Warning 5: this function application is partial, +maybe some arguments are missing. +- : unit = () +# Characters 19-20: + let _ = raise Exit 3;; + ^ +Warning 20: this argument will not be used by the function. +Exception: Pervasives.Exit. +# diff --git a/tools/depend.ml b/tools/depend.ml index b9c84368e7..92b6cdc9ae 100644 --- a/tools/depend.ml +++ b/tools/depend.ml @@ -200,6 +200,12 @@ let rec add_expr bv exp = | Pexp_newtype (_, e) -> add_expr bv e | Pexp_pack m -> add_module bv m | Pexp_open (_ovf, m, e) -> open_module bv m.txt; add_expr bv e + | Pexp_extension ({ txt = ("ocaml.extension_constructor"|"extension_constructor"); _ }, + PStr [item]) -> + begin match item.pstr_desc with + | Pstr_eval ({ pexp_desc = Pexp_construct (c, None) }, _) -> add bv c + | _ -> () + end | Pexp_extension _ -> () | Pexp_unreachable -> () diff --git a/typing/ctype.ml b/typing/ctype.ml index c1df6c0b5f..d69fba248e 100644 --- a/typing/ctype.ml +++ b/typing/ctype.ml @@ -593,32 +593,17 @@ let duplicate_class_type ty = [expand_abbrev] (via [subst]) requires these expansions to be preserved. Does it worth duplicating this code ? *) -let rec iter_generalize tyl ty = +let rec generalize ty = let ty = repr ty in if (ty.level > !current_level) && (ty.level <> generic_level) then begin set_level ty generic_level; begin match ty.desc with Tconstr (_, _, abbrev) -> - iter_abbrev (iter_generalize tyl) !abbrev + iter_abbrev generalize !abbrev | _ -> () end; - iter_type_expr (iter_generalize tyl) ty - end else - tyl := ty :: !tyl - -let iter_generalize tyl ty = - simple_abbrevs := Mnil; - iter_generalize tyl ty - -let generalize ty = - iter_generalize (ref []) ty - -(* Efficient repeated generalisation of the same type *) -let iterative_generalization min_level tyl = - let tyl' = ref [] in - List.iter (iter_generalize tyl') tyl; - List.fold_right (fun ty l -> if ty.level <= min_level then l else ty::l) - !tyl' [] + iter_type_expr generalize ty + end (* Generalize the structure and lower the variables *) @@ -3291,19 +3276,23 @@ and eqtype_row rename type_pairs subst env row1 row2 = | _ -> raise (Unify [])) pairs +(* Must empty univar_pairs first *) +let eqtype_list rename type_pairs subst env tl1 tl2 = + univar_pairs := []; + let snap = Btype.snapshot () in + try eqtype_list rename type_pairs subst env tl1 tl2; backtrack snap + with exn -> backtrack snap; raise exn + +let eqtype rename type_pairs subst env t1 t2 = + eqtype_list rename type_pairs subst env [t1] [t2] + (* Two modes: with or without renaming of variables *) let equal env rename tyl1 tyl2 = try - univar_pairs := []; eqtype_list rename (TypePairs.create 11) (ref []) env tyl1 tyl2; true with Unify _ -> false -(* Must empty univar_pairs first *) -let eqtype rename type_pairs subst env t1 t2 = - univar_pairs := []; - eqtype rename type_pairs subst env t1 t2 - (*************************) (* Class type matching *) diff --git a/typing/ctype.mli b/typing/ctype.mli index 90bfcbe6de..6a0b506e06 100644 --- a/typing/ctype.mli +++ b/typing/ctype.mli @@ -90,8 +90,6 @@ val filter_row_fields: val generalize: type_expr -> unit (* Generalize in-place the given type *) -val iterative_generalization: int -> type_expr list -> type_expr list - (* Efficient repeated generalization of a type *) val generalize_expansive: Env.t -> type_expr -> unit (* Generalize the covariant part of a type, making contravariant branches non-generalizable *) diff --git a/typing/predef.ml b/typing/predef.ml index 2219d29a76..f59d0a07e3 100644 --- a/typing/predef.ml +++ b/typing/predef.ml @@ -41,6 +41,7 @@ and ident_int32 = ident_create "int32" and ident_int64 = ident_create "int64" and ident_lazy_t = ident_create "lazy_t" and ident_string = ident_create "string" +and ident_extension_constructor = ident_create "extension_constructor" let path_int = Pident ident_int and path_char = Pident ident_char @@ -57,6 +58,7 @@ and path_int32 = Pident ident_int32 and path_int64 = Pident ident_int64 and path_lazy_t = Pident ident_lazy_t and path_string = Pident ident_string +and path_extension_constructor = Pident ident_extension_constructor let type_int = newgenty (Tconstr(path_int, [], ref Mnil)) and type_char = newgenty (Tconstr(path_char, [], ref Mnil)) @@ -73,6 +75,7 @@ and type_int32 = newgenty (Tconstr(path_int32, [], ref Mnil)) and type_int64 = newgenty (Tconstr(path_int64, [], ref Mnil)) and type_lazy_t t = newgenty (Tconstr(path_lazy_t, [t], ref Mnil)) and type_string = newgenty (Tconstr(path_string, [], ref Mnil)) +and type_extension_constructor = newgenty (Tconstr(path_extension_constructor, [], ref Mnil)) let ident_match_failure = ident_create_predef_exn "Match_failure" and ident_out_of_memory = ident_create_predef_exn "Out_of_memory" @@ -213,7 +216,8 @@ let common_initial_env add_type add_extension empty_env = add_type ident_string decl_abstr ( add_type ident_char decl_abstr ( add_type ident_int decl_abstr ( - empty_env)))))))))))))))))))))))))) + add_type ident_extension_constructor decl_abstr ( + empty_env))))))))))))))))))))))))))) let build_initial_env add_type add_exception empty_env = let common = common_initial_env add_type add_exception empty_env in diff --git a/typing/predef.mli b/typing/predef.mli index 8cdac5a051..a5d4914a8a 100644 --- a/typing/predef.mli +++ b/typing/predef.mli @@ -29,6 +29,7 @@ val type_nativeint: type_expr val type_int32: type_expr val type_int64: type_expr val type_lazy_t: type_expr -> type_expr +val type_extension_constructor:type_expr val path_int: Path.t val path_char: Path.t @@ -45,6 +46,7 @@ val path_nativeint: Path.t val path_int32: Path.t val path_int64: Path.t val path_lazy_t: Path.t +val path_extension_constructor: Path.t val path_match_failure: Path.t val path_assert_failure : Path.t diff --git a/typing/printtyped.ml b/typing/printtyped.ml index 1e0423efb4..68ac2718e1 100644 --- a/typing/printtyped.ml +++ b/typing/printtyped.ml @@ -373,6 +373,8 @@ and expression i ppf x = module_expr i ppf me | Texp_unreachable -> line i ppf "Texp_unreachable" + | Texp_extension_constructor (li, _) -> + line i ppf "Texp_extension_constructor %a" fmt_longident li and value_description i ppf x = line i ppf "value_description %a %a\n" fmt_ident x.val_id fmt_location diff --git a/typing/tast_mapper.ml b/typing/tast_mapper.ml index ee2b9f3ec1..0524bd0e37 100644 --- a/typing/tast_mapper.ml +++ b/typing/tast_mapper.ml @@ -330,6 +330,8 @@ let expr sub x = Texp_pack (sub.module_expr sub mexpr) | Texp_unreachable -> Texp_unreachable + | Texp_extension_constructor _ as e -> + e in {x with exp_extra; exp_desc; exp_env} diff --git a/typing/typecore.ml b/typing/typecore.ml index 8fe012ce3c..0dcff0e57b 100644 --- a/typing/typecore.ml +++ b/typing/typecore.ml @@ -69,6 +69,8 @@ type error = | Exception_pattern_below_toplevel | Inlined_record_escape | Unrefuted_pattern of pattern + | Invalid_extension_constructor_payload + | Not_an_extension_constructor exception Error of Location.t * Env.t * error exception Error_forward of Location.error @@ -2839,6 +2841,27 @@ and type_expect_ ?in_function ?(recarg=Rejected) env sexp ty_expected = sexp.pexp_attributes) :: exp.exp_extra; } + + | Pexp_extension ({ txt = ("ocaml.extension_constructor"|"extension_constructor"); _ }, + payload) -> + begin match payload with + | PStr [ { pstr_desc = + Pstr_eval ({ pexp_desc = Pexp_construct (lid, None); _ }, _) + } ] -> + let path = + match (Typetexp.find_constructor env lid.loc lid.txt).cstr_tag with + | Cstr_extension (path, _) -> path + | _ -> raise (Error (lid.loc, env, Not_an_extension_constructor)) + in + rue { + exp_desc = Texp_extension_constructor (lid, path); + exp_loc = loc; exp_extra = []; + exp_type = instance_def Predef.type_extension_constructor; + exp_attributes = sexp.pexp_attributes; + exp_env = env } + | _ -> + raise (Error (loc, env, Invalid_extension_constructor_payload)) + end | Pexp_extension ext -> raise (Error_forward (Typetexp.error_of_extension ext)) @@ -3485,10 +3508,17 @@ and type_application env funct sargs = type_unknown_args args omitted ty_fun0 (sargs @ more_sargs) in - match funct.exp_desc, sargs with + let is_ignore funct = + match funct.exp_desc with + Texp_ident (_, _, {val_kind=Val_prim{Primitive.prim_name="%ignore"}}) -> + (try ignore (filter_arrow env (instance env funct.exp_type) Nolabel); + true + with Unify _ -> false) + | _ -> false + in + match sargs with (* Special case for ignore: avoid discarding warning *) - Texp_ident (_, _, {val_kind=Val_prim{Primitive.prim_name="%ignore"}}), - [Nolabel, sarg] -> + [Nolabel, sarg] when is_ignore funct -> let ty_arg, ty_res = filter_arrow env (instance env funct.exp_type) Nolabel in @@ -4213,6 +4243,12 @@ let report_error env ppf = function "This match case could not be refuted." "Here is an example of a value that would reach it:" Parmatch.top_pretty pat + | Invalid_extension_constructor_payload -> + fprintf ppf + "Invalid [%%extension_constructor] payload, a constructor is expected." + | Not_an_extension_constructor -> + fprintf ppf + "This constructor is not an extension constructor." let report_error env ppf err = wrap_printing_env env (fun () -> report_error env ppf err) diff --git a/typing/typecore.mli b/typing/typecore.mli index 06942a74a7..5434093ca6 100644 --- a/typing/typecore.mli +++ b/typing/typecore.mli @@ -114,6 +114,8 @@ type error = | Exception_pattern_below_toplevel | Inlined_record_escape | Unrefuted_pattern of Typedtree.pattern + | Invalid_extension_constructor_payload + | Not_an_extension_constructor exception Error of Location.t * Env.t * error exception Error_forward of Location.error diff --git a/typing/typedtree.ml b/typing/typedtree.ml index b8033795c3..16525a6f6c 100644 --- a/typing/typedtree.ml +++ b/typing/typedtree.ml @@ -105,6 +105,7 @@ and expression_desc = | Texp_object of class_structure * string list | Texp_pack of module_expr | Texp_unreachable + | Texp_extension_constructor of Longident.t loc * Path.t and meth = Tmeth_name of string diff --git a/typing/typedtree.mli b/typing/typedtree.mli index 89ef544717..24439d113c 100644 --- a/typing/typedtree.mli +++ b/typing/typedtree.mli @@ -205,6 +205,7 @@ and expression_desc = | Texp_object of class_structure * string list | Texp_pack of module_expr | Texp_unreachable + | Texp_extension_constructor of Longident.t loc * Path.t and meth = Tmeth_name of string diff --git a/typing/typedtreeIter.ml b/typing/typedtreeIter.ml index b3e5437cf2..419d67c709 100644 --- a/typing/typedtreeIter.ml +++ b/typing/typedtreeIter.ml @@ -347,6 +347,8 @@ module MakeIterator(Iter : IteratorArgument) : sig iter_module_expr mexpr | Texp_unreachable -> () + | Texp_extension_constructor _ -> + () end; Iter.leave_expression exp; diff --git a/typing/typedtreeMap.ml b/typing/typedtreeMap.ml index 5965b16a9d..6a4d8b4f8b 100644 --- a/typing/typedtreeMap.ml +++ b/typing/typedtreeMap.ml @@ -376,6 +376,8 @@ module MakeMap(Map : MapArgument) = struct Texp_pack (map_module_expr mexpr) | Texp_unreachable -> Texp_unreachable + | Texp_extension_constructor _ as e -> + e in let exp_extra = List.map map_exp_extra exp.exp_extra in Map.leave_expression { diff --git a/typing/typemod.ml b/typing/typemod.ml index 855488c609..3e2c8054a6 100644 --- a/typing/typemod.ml +++ b/typing/typemod.ml @@ -1104,6 +1104,7 @@ let rec type_module ?(alias=false) sttn funct_body anchor env smod = let (id, newenv), funct_body = match ty_arg with None -> (Ident.create "*", env), false | Some mty -> Env.enter_module ~arg:true name.txt mty env, true in + Ctype.init_def(Ident.current_time()); (* PR#6981 *) let body = type_module sttn funct_body None newenv sbody in rm { mod_desc = Tmod_functor(id, name, mty, body); mod_type = Mty_functor(id, ty_arg, body.mod_type); diff --git a/typing/untypeast.ml b/typing/untypeast.ml index 0a61cb9c5d..d2bdd28819 100644 --- a/typing/untypeast.ml +++ b/typing/untypeast.ml @@ -446,6 +446,11 @@ let expression sub exp = Pexp_pack (sub.module_expr sub mexpr) | Texp_unreachable -> Pexp_unreachable + | Texp_extension_constructor (lid, _) -> + Pexp_extension ({ txt = "ocaml.extension_constructor"; loc }, + PStr [ Str.eval ~loc + (Exp.construct ~loc (map_loc sub lid) None) + ]) in List.fold_right (exp_extra sub) exp.exp_extra (Exp.mk ~loc ~attrs desc) diff --git a/utils/config.mlp b/utils/config.mlp index c668d13214..09f639cd86 100644 --- a/utils/config.mlp +++ b/utils/config.mlp @@ -57,7 +57,7 @@ and cmxa_magic_number = "Caml1999Z014" and ast_impl_magic_number = "Caml1999M019" and ast_intf_magic_number = "Caml1999N018" and cmxs_magic_number = "Caml2007D002" -and cmt_magic_number = "Caml2012T006" +and cmt_magic_number = "Caml2012T007" let load_path = ref ([] : string list) |