diff options
author | michele.simionato <devnull@localhost> | 2007-12-19 05:11:48 +0000 |
---|---|---|
committer | michele.simionato <devnull@localhost> | 2007-12-19 05:11:48 +0000 |
commit | 63ec666f20135b0e7c74c9a9ecca8f7262979c4d (patch) | |
tree | c1d3f099855010ba70d5b80ecf1b4b8ad05267d0 /ml | |
parent | e50d231eb3ca09f476374ddbb3f69a58fc198c02 (diff) | |
download | micheles-63ec666f20135b0e7c74c9a9ecca8f7262979c4d.tar.gz |
Added a few missing files
Diffstat (limited to 'ml')
-rw-r--r-- | ml/index.html | 1819 | ||||
-rw-r--r-- | ml/index.txt | 7 | ||||
-rw-r--r-- | ml/simple-io.aml | 31 |
3 files changed, 1857 insertions, 0 deletions
diff --git a/ml/index.html b/ml/index.html new file mode 100644 index 0000000..95a90af --- /dev/null +++ b/ml/index.html @@ -0,0 +1,1819 @@ +<?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<meta name="generator" content="Docutils 0.4: http://docutils.sourceforge.net/" /> +<title></title> +<style type="text/css"> + +.highlight { background: #f8f8f8; } +.highlight .c { color: #408080; font-style: italic } /* Comment */ +.highlight .err { border: 1px solid #FF0000 } /* Error */ +.highlight .k { color: #008000; font-weight: bold } /* Keyword */ +.highlight .o { color: #666666 } /* Operator */ +.highlight .cm { color: #408080; font-style: italic } /* Comment.Multiline */ +.highlight .cp { color: #BC7A00 } /* Comment.Preproc */ +.highlight .c1 { color: #408080; font-style: italic } /* Comment.Single */ +.highlight .cs { color: #408080; font-style: italic } /* Comment.Special */ +.highlight .gd { color: #A00000 } /* Generic.Deleted */ +.highlight .ge { font-style: italic } /* Generic.Emph */ +.highlight .gr { color: #FF0000 } /* Generic.Error */ +.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +.highlight .gi { color: #00A000 } /* Generic.Inserted */ +.highlight .go { color: #808080 } /* Generic.Output */ +.highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +.highlight .gs { font-weight: bold } /* Generic.Strong */ +.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +.highlight .gt { color: #0040D0 } /* Generic.Traceback */ +.highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +.highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +.highlight .kp { color: #008000 } /* Keyword.Pseudo */ +.highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +.highlight .kt { color: #008000; font-weight: bold } /* Keyword.Type */ +.highlight .m { color: #666666 } /* Literal.Number */ +.highlight .s { color: #BA2121 } /* Literal.String */ +.highlight .na { color: #7D9029 } /* Name.Attribute */ +.highlight .nb { color: #008000 } /* Name.Builtin */ +.highlight .nc { color: #0000FF; font-weight: bold } /* Name.Class */ +.highlight .no { color: #880000 } /* Name.Constant */ +.highlight .nd { color: #AA22FF } /* Name.Decorator */ +.highlight .ni { color: #999999; font-weight: bold } /* Name.Entity */ +.highlight .ne { color: #D2413A; font-weight: bold } /* Name.Exception */ +.highlight .nf { color: #0000FF } /* Name.Function */ +.highlight .nl { color: #A0A000 } /* Name.Label */ +.highlight .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */ +.highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +.highlight .nv { color: #19177C } /* Name.Variable */ +.highlight .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */ +.highlight .w { color: #bbbbbb } /* Text.Whitespace */ +.highlight .mf { color: #666666 } /* Literal.Number.Float */ +.highlight .mh { color: #666666 } /* Literal.Number.Hex */ +.highlight .mi { color: #666666 } /* Literal.Number.Integer */ +.highlight .mo { color: #666666 } /* Literal.Number.Oct */ +.highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +.highlight .sc { color: #BA2121 } /* Literal.String.Char */ +.highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +.highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +.highlight .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */ +.highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +.highlight .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */ +.highlight .sx { color: #008000 } /* Literal.String.Other */ +.highlight .sr { color: #BB6688 } /* Literal.String.Regex */ +.highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +.highlight .ss { color: #19177C } /* Literal.String.Symbol */ +.highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +.highlight .vc { color: #19177C } /* Name.Variable.Class */ +.highlight .vg { color: #19177C } /* Name.Variable.Global */ +.highlight .vi { color: #19177C } /* Name.Variable.Instance */ +.highlight .il { color: #666666 } /* Literal.Number.Integer.Long */ + +</style> +</head> +<body> +<div class="document"> +<div class="contents topic"> +<p class="topic-title first"><a id="contents" name="contents">Contents</a></p> +<ul class="simple"> +<li><a class="reference" href="#functional-programming-for-dynamic-programmers-part-1" id="id23" name="id23">Functional Programming For Dynamic Programmers - Part 1</a><ul> +<li><a class="reference" href="#declaration-of-intents" id="id24" name="id24">Declaration of intents</a></li> +<li><a class="reference" href="#micro-introduction-to-functional-languages" id="id25" name="id25">Micro-introduction to functional languages</a></li> +<li><a class="reference" href="#getting-an-sml-compiler" id="id26" name="id26">Getting an SML compiler</a></li> +<li><a class="reference" href="#hello-world" id="id27" name="id27">Hello world</a></li> +</ul> +</li> +<li><a class="reference" href="#functional-programming-for-dynamic-programmers-part-2" id="id28" name="id28">Functional Programming For Dynamic Programmers - Part 2</a><ul> +<li><a class="reference" href="#functional-programming-and-input-output" id="id29" name="id29">Functional programming and Input/Output</a></li> +<li><a class="reference" href="#loops-and-recursion" id="id30" name="id30">Loops and recursion</a></li> +<li><a class="reference" href="#higher-order-functions" id="id31" name="id31">Higher order functions</a></li> +</ul> +</li> +<li><a class="reference" href="#functional-programming-for-dynamic-programmers-part-3" id="id32" name="id32">Functional Programming For Dynamic Programmers - Part 3</a><ul> +<li><a class="reference" href="#writing-your-first-module" id="id33" name="id33">Writing your first module</a></li> +<li><a class="reference" href="#importing-modules" id="id34" name="id34">Importing modules</a></li> +<li><a class="reference" href="#structures-are-not-first-class-values" id="id35" name="id35">Structures are not first class values</a></li> +<li><a class="reference" href="#abstract-signatures-and-abstract-types" id="id36" name="id36">Abstract signatures and abstract types</a></li> +</ul> +</li> +<li><a class="reference" href="#functional-programming-for-dynamic-programmers-part-4" id="id37" name="id37">Functional Programming For Dynamic Programmers - Part 4</a><ul> +<li><a class="reference" href="#academic-languages-vs-enterprise-languages" id="id38" name="id38">Academic languages vs enterprise languages</a></li> +<li><a class="reference" href="#string-interpolation-the-practical-way" id="id39" name="id39">String interpolation the practical way</a></li> +<li><a class="reference" href="#string-interpolation-the-mathematical-way" id="id40" name="id40">String interpolation the mathematical way</a></li> +</ul> +</li> +<li><a class="reference" href="#functional-programming-for-dynamic-programmers-part-5" id="id41" name="id41">Functional Programming For Dynamic Programmers - Part 5</a><ul> +<li><a class="reference" href="#the-standard-ml-type-system" id="id42" name="id42">The Standard ML type system</a></li> +<li><a class="reference" href="#more-on-datatypes" id="id43" name="id43">More on datatypes</a></li> +<li><a class="reference" href="#poor-man-s-polymorphism" id="id44" name="id44">Poor man's polymorphism</a></li> +<li><a class="reference" href="#input-and-output-revisited" id="id45" name="id45">Input and Output revisited</a></li> +</ul> +</li> +</ul> +</div> +<div class="section"> +<h1><a class="toc-backref" href="#id23" id="functional-programming-for-dynamic-programmers-part-1" name="functional-programming-for-dynamic-programmers-part-1">Functional Programming For Dynamic Programmers - Part 1</a></h1> +<table class="docutils field-list" frame="void" rules="none"> +<col class="field-name" /> +<col class="field-body" /> +<tbody valign="top"> +<tr class="field"><th class="field-name">author:</th><td class="field-body">Michele Simionato</td> +</tr> +<tr class="field"><th class="field-name">date:</th><td class="field-body">December 2007</td> +</tr> +</tbody> +</table> +<p>This is the first of a series of articles on functional programming in +statically typed languages. It is intended for dynamic programmers, i.e. +for programmers with a background in dynamically typed languages, such as Perl, +Python, Ruby, or languages in the Lisp family. The approch is eminently practical +and example-based; the main goal is to see if we can stole some good idea from +statically typed languages. In order to be concrete, I will consider languages +in the ML family, because they are pretty nice and much easier to understand +that Haskell.</p> +<div class="section"> +<h2><a class="toc-backref" href="#id24" id="declaration-of-intents" name="declaration-of-intents">Declaration of intents</a></h2> +<p>One may rightly wonder why to study functional languages, and in +particular statically-typed ones such as SML and Haskell: after all, +their relevance in enterprise programming is negligible, they are practically +unknown for system +administration task, and unless you are a computer science researcher +you have no need to know them. All that is true; on the other hand, if +you believed it, you would not be a dynamic programmer, you would have +stick with C and you would not be a reader of my papers. +A dynamic programmer is the kind of person who, when everbody says +"the Earth is flat", will wonder: "is the Earth <em>really</em> flat?". So, +even if everybody says that object orientation is good and that +inheritance is the best thing after sliced bread, a dynamic programmer +will be suspicious of these claims and will seek for alternatives. +Exactly in the same way he escaped from the enslaving by static typing +in old fashioned languages, he will escape from the enslaving by +dynamic typing languages, if he finds valid alternatives. A dynamic +programmer has a kind of Casanova complex, and he will never be loyal +to a single language. +In particular, a dynamic programmer will judge academic languages +worth of investigation: he may decided not to use them, but still he may get +some good ideas from them. After all, Python stole the list comprehension +idea from Haskell and iterators from Icon; Ruby stole many ideas from Scheme +and Smalltalk; Perl stole from everybody. +I always wanted to learn a modern statically-typed functional +language, at least to have something intelligent to say in the endless +usenet discussions on dynamic typing vs. static typing; recently, by +accident, I run into Standard ML (SML, ML stands for Meta Language) +and I found out it has a very clean syntax, so I decided to learn it <a class="footnote-reference" href="#id3" id="id1" name="id1">[1]</a>.</p> +<p>SML is practically unknown outside academia (its cousins Caml and F# are a +little more known, but they are certainly very far away from being +mainstream languages) and I am not interested in advocating it for use +in enterprise programming; after all in enterprise programming the technical +advantages of a language count very little compared to things like the support +you can get, the ability to find skilled programmers, the interoperability with +your existent codebase and development environment (I mean the tools and +libraries you use, the database, etc). My motivation here is to learn something +from SML, and to try to avoid mistakes in mainstream languages which have +been already fixed in SML. Let me give a concrete example: in the past I have been +using Zope and Twisted, which are enterprise-level Python frameworks. They +share a common implementation of interfaces. I have always had a bad gut +feeling about Zope interfaces: I always thought they were overkill, complex, and +not doing by default what they (In my opinion) should do, i.e. interface checking. +I always thought there should be a better solution (Python 3.0 actually will +have a simpler implementation of interfaces in the core language) +but until I studied SML I had no idea of how the solution should look like. +I think SML has the better module system I have ever seen; and my goal +here is to popularize it, trying to get some of SML ideas into Python frameworks.</p> +</div> +<div class="section"> +<h2><a class="toc-backref" href="#id25" id="micro-introduction-to-functional-languages" name="micro-introduction-to-functional-languages">Micro-introduction to functional languages</a></h2> +<p>Simply put, functional languages are programming languages that try +very hard to avoid mutation and side effects. There are no pure +functional languages in use, since an absolutely pure language would +have to rule out input and output and it would be pretty much useless; +nevertheless, some functional languages are purer that others. For +instance, there are languages which have some support for functional +programming, but are not considered functional (Python being an +example); others, like Lisp, which is regarded as the grandfather of +functional languages, that are pretty much regarded as impure +nowadays; Scheme is somewhat perceived as more functional than Common +Lisp, but this is debatable, and in any case it is not very pure; +Standard ML (SML) is often perceived as purer than Scheme, but mostly +because of syntactical convenience, not of technical merit; Haskell is +possibly the purest functional language commonly used, but still can +be used in an imperative way. This classification is sloppy and wrong +in more than one way; nevertheless it is useful (in my personal +opinion) in the sense that it gives and indication of how much the +language departs from a mainstream imperative language, and how +difficult it is to learn it. Functional languages come in two kinds: +dynamically typed (Lisp, Scheme, Erlang ...) and statically typed +(SML, OCaml, Haskell ...). Programmers used to dynamic typing (i.e +Pythonistas, Rubystas, etc) will have less trouble to understand +dynamically typed functional language, so it is more interesting (in +my opinion) to look at statically typed languages. After all, I think +a programmer should have at least a statically typed language in his +toolbox (C++ and Java do not count). SML is the simplest statically +language out there, so l decided to start with it.</p> +<p>In the following, I will often contrast the functional way with the imperative +way, and I will mostly use Python as an example of imperative language, +because it is a language that I know decently well, and because it is +so readable that the examples will be easy to understand even for +programmers not familiar with Python.</p> +</div> +<div class="section"> +<h2><a class="toc-backref" href="#id26" id="getting-an-sml-compiler" name="getting-an-sml-compiler">Getting an SML compiler</a></h2> +<p>SML is a language with many implementations, so if you want to try it, +you must decide which implementation to use. I have played with some +of them, including SML of New Jersey (<a class="reference" href="http://www.smlnj.org">http://www.smlnj.org</a>) and MLton +(<a class="reference" href="http://mlton.org">http://mlton.org</a>) but at the end I have decided to use Alice ML, +since it has a lot of innovative features, an interesting set of libraries, and +some clean syntax extensions. It is also a very dynamic version of SML, +so that dynamic programmers will not feel too constrained ;-) +On Ubuntu, you can install it with +<tt class="docutils literal"><span class="pre">apt-get</span> <span class="pre">install</span> <span class="pre">alice-runtime</span></tt> (having added +<tt class="docutils literal"><span class="pre">deb</span> <span class="pre">http://www.ps.uni-sb.de/alice/download/debian</span> <span class="pre">stable</span> <span class="pre">contrib</span></tt> to your +<tt class="docutils literal"><span class="pre">/etc/apt/sources.list</span></tt>) where on the Mac, you can download the +<tt class="docutils literal"><span class="pre">.dmg</span></tt> installer and add <tt class="docutils literal"><span class="pre">/Applications/Alice.app/Contents/Resources/bin</span></tt> +to your PATH. There is also an installer for Windows, but if you are a dynamic +programmer I do not expect you to use Windows, unless you are forced +to ;)</p> +<p>Having installed Alice you can start the "interpreter" or REPL +(read-eval-print-loop) <a class="footnote-reference" href="#id4" id="id2" name="id2">[2]</a> as follows:</p> +<pre class="literal-block"> +$ alice +Alice 1.4 ("Kraftwerk 'Equaliser' Album") mastered 2007/04/24 +### loaded signature from x-alice:/lib/system/Print +### loaded signature from x-alice:/lib/tools/Inspector +### loaded signature from x-alice:/lib/distribution/Remote +</pre> +<p>Now you can write your first mandatory "Hello world" program:</p> +<pre class="literal-block"> +- print "Hello world\n"; +Hello world +val it : unit = () +</pre> +<p>and you are ready to start. If you are an old timer Emacs user, you +may also want to install (Aqua)Emacs and the sml-mode for Emacs (Alice +has a custom version of it, that you can download from <a class="reference" href="http://www.ps.uni-sb.de/alice/download/sml-mode-4.0+alice.zip">here</a> ).</p> +<table class="docutils footnote" frame="void" id="id3" rules="none"> +<colgroup><col class="label" /><col /></colgroup> +<tbody valign="top"> +<tr><td class="label"><a class="fn-backref" href="#id1" name="id3">[1]</a></td><td>In principle, one may argue that choosing a language because of +its syntax is stupid and that one should look at its features, +libraries, reliability, etc; in practice, many languages are +more or less equivalent in terms of features, libraries and +reliability, so that the main factor for the choice of the +language is it syntax. Also, when you are a beginner and you +need to write only simple programs, syntax is all that +matters.</td></tr> +</tbody> +</table> +<table class="docutils footnote" frame="void" id="id4" rules="none"> +<colgroup><col class="label" /><col /></colgroup> +<tbody valign="top"> +<tr><td class="label"><a class="fn-backref" href="#id2" name="id4">[2]</a></td><td>Technically the Alice REPL is not an interpreter: the +expressions you enter are compiled on the fly and executed +immediately, not interpreted. However, the overall +user-experience is the same as if you had an +interpreter. This approach is commons in ML and Lisp implementations.</td></tr> +</tbody> +</table> +<p>It may be useful sometime to contrast Alice with more traditional ML +implementations (especially to see if the error messages are more +understandable with another compiler) so you want to run +<tt class="docutils literal"><span class="pre">apt-get</span> <span class="pre">install</span> <span class="pre">smlnj</span> <span class="pre">mlton</span></tt> as well.</p> +</div> +<div class="section"> +<h2><a class="toc-backref" href="#id27" id="hello-world" name="hello-world">Hello world</a></h2> +<p>In the previous section I have shown how the traditional "Hello world" +program looks in ML. ML is a language without statements (as usual in +functional languages; statements are actually a bad idea IMO, even +for imperative languages <a class="footnote-reference" href="#id7" id="id5" name="id5">[3]</a>) so <em>print</em> is a function taking a string in +input and returning an empty tuple in output, as you +can see from the last line of output</p> +<p><tt class="docutils literal"><span class="pre">val</span> <span class="pre">it:</span> <span class="pre">unit</span> <span class="pre">=</span> <span class="pre">()</span></tt></p> +<p>The empty tuple is of type <tt class="docutils literal"><span class="pre">unit</span></tt> and it is the sole representative of that +type (i.e. it is a singleton). In general, any object has a type; +for instance, if you enter the number 42 at the prompt you will get</p> +<pre class="literal-block"> +- 42; +val it: int = 42 +</pre> +<p>The REPL just prints a string representation of the value +you entered, together with its type (in this case, int).</p> +<p>A point to notice in the "Hello world" program +is the lack of parenthesis: we wrote <tt class="docutils literal"><span class="pre">print</span> +<span class="pre">"Hello</span> <span class="pre">world\n";</span></tt> and not <tt class="docutils literal"><span class="pre">print("Hello</span> <span class="pre">world\n")</span></tt>. This is +possible since the <tt class="docutils literal"><span class="pre">print</span></tt> function in ML takes only a <em>single</em> +argument; actually <em>all</em> functions in SML takes a single argument!</p> +<p>You may wonder how is it possible to program anything serious if all +functions take a single argument: the answer is that the single +argument can be of a composite type, for instance a tuple. So you may +define functions like the following</p> +<pre class="literal-block"> +- fun print2 (arg1, arg2)= (print arg1; print arg2); +val print2 : string * string -> unit = _fn +</pre> +<p>Here <tt class="docutils literal"><span class="pre">print2</span></tt> takes a single argument, which is a 2-tuple (a pair), +and returns the null (unit) value. The interesting thing is that the +compiler has been able to figure out by itself that the arguments must +be of string type, since <tt class="docutils literal"><span class="pre">print</span></tt> expects a string, and that +<tt class="docutils literal"><span class="pre">print2</span></tt> will return an <tt class="docutils literal"><span class="pre">unit</span></tt> value, since <tt class="docutils literal"><span class="pre">print</span></tt> does so: we +are seeing here the type -inferencer at work. We also see that a +function is just a value as any other value, i.e. functions are first +class objects, as usual in dynamics languages; its type is +<tt class="docutils literal"><span class="pre">string</span> <span class="pre">*</span> <span class="pre">string</span> <span class="pre">-></span> <span class="pre">unit</span></tt>.</p> +<p>Let us check that <tt class="docutils literal"><span class="pre">print2</span></tt> works as expected:</p> +<pre class="literal-block"> +- print2 ("hello ", "world"); +hello world +val it: unit = () +</pre> +<p>If you want to print a generic number of arguments, you can use a list:</p> +<pre class="literal-block"> +- fun printList lst = app print lst; +val printList : string list -> unit =_fn +- printList["Hello ", "World", "!", "\n"]; +Hello World! +val it : unit = () +</pre> +<p>The builtin function <tt class="docutils literal"><span class="pre">app</span></tt> apply the <tt class="docutils literal"><span class="pre">print</span></tt> function to each element of the +list from left to right.</p> +<p>ML does not have default arguments, so you cannot write the analogous +of this Python function:</p> +<pre class="literal-block"> +>>> def printUser(user="Michele"): +... print "Hello,", user +>>> printUser() +Hello, Michele +>>> printUser("Mario") +Hello, Mario +</pre> +<p>In Python, <tt class="docutils literal"><span class="pre">printUser</span></tt> can be called with zero or one arguments, but in ML this +is not possible, since functions with different arity are of a different type and +ML does not support function overloading; however, you can however work around +this feature <a class="footnote-reference" href="#id8" id="id6" name="id6">[4]</a> by using options. ML has a standard <em>option</em> type, used for +objects which may have a NONE value; any object can be converted into an option +by wrapping it with SOME. An example will make it clear. Let me define:</p> +<pre class="literal-block"> +- fun printUser(userOpt: string option) = + printList ["Hello, ", getOpt(userOpt, "Michele"), "\n"]; +val printUser : string option -> unit = _fn +</pre> +<p>where <em>getOpt(opt, default)</em> is a builtin function which returns the +value of the option, or the default if the option have value NONE. We +may call <em>printUser</em> as follows:</p> +<pre class="literal-block"> +- printUser NONE; +Hello, Michele +- printUser (SOME "Mario"); +Hello, Mario +</pre> +<p>The syntax is definitively ugly, and there are actually clever tricks +to implement default arguments in a nicer way, but they are too +advanced for this introduction. But stay tuned for the next issues ;)</p> +<table class="docutils footnote" frame="void" id="id7" rules="none"> +<colgroup><col class="label" /><col /></colgroup> +<tbody valign="top"> +<tr><td class="label"><a class="fn-backref" href="#id5" name="id7">[3]</a></td><td>Incidentally, in current Python <tt class="docutils literal"><span class="pre">print</span></tt> is a statement and it +does not return anything, but in Python 3000 <tt class="docutils literal"><span class="pre">print</span></tt> will be +function returning None</td></tr> +</tbody> +</table> +<table class="docutils footnote" frame="void" id="id8" rules="none"> +<colgroup><col class="label" /><col /></colgroup> +<tbody valign="top"> +<tr><td class="label"><a class="fn-backref" href="#id6" name="id8">[4]</a></td><td>The lack of function overloading may be seen as a feature or as +a limitation, depending on the reader.</td></tr> +</tbody> +</table> +<hr class="docutils" /> +<p><em>A long journey starts with the first step</em></p> +<blockquote> +-- old Chinese saying</blockquote> +</div> +</div> +<div class="section"> +<h1><a class="toc-backref" href="#id28" id="functional-programming-for-dynamic-programmers-part-2" name="functional-programming-for-dynamic-programmers-part-2">Functional Programming For Dynamic Programmers - Part 2</a></h1> +<table class="docutils field-list" frame="void" rules="none"> +<col class="field-name" /> +<col class="field-body" /> +<tbody valign="top"> +<tr class="field"><th class="field-name">author:</th><td class="field-body">Michele Simionato</td> +</tr> +<tr class="field"><th class="field-name">date:</th><td class="field-body">December 2007</td> +</tr> +</tbody> +</table> +<p>This is the second of a series of articles on functional programming in +statically typed languages. It is intended for dynamic programmers, i.e. +for programmers with a background in dynamically typed languages, such as Perl, +Python, Ruby, or languages in the Lisp family. The approch is eminently practical +and example-based; the main goal is to see if we can stole some good idea from +statically typed languages. In order to be concrete, I will consider languages +in the ML family, because they are pretty nice and much easier to understand +that Haskell.</p> +<div class="section"> +<h2><a class="toc-backref" href="#id29" id="functional-programming-and-input-output" name="functional-programming-and-input-output">Functional programming and Input/Output</a></h2> +<p>Input/output is the black ship of functional programming and it is +often relegated at the end of tutorials and sometimes even omitted, as +if shameful. Sometime, it is not even standardized and +implementation-dependent. On the other hand, <em>all</em> programs must +perform some kind of input/output, or at least of output, otherwise we +would have no way to see what a program did. Therefore input/output +<em>must</em> be explained at the beginning of any tutorial, not at the +end. I always hated books not telling me how to read a file until the +appendix! In this section I will explain how you can read a file in +SML. Reading a file is for some reason seen as "inpure" since it is +not functional. Let me explain that a bit more: in mathematics, a +function takes an argument in input, and returns an output; if I call +twice the function with the same argument, I get twice the <em>same</em> +output. In programming, a procedure reading a line from a file returns +a <em>different</em> line at each invocation, even if called with the same +arguments (the arguments maybe simply the empty tuple), so it is not a +function in the mathematical sense. On the other hand, a procedure +writing a line into a file, is also impure, since it is modifying an +object, the file itself. +In strict functional programming there is no +mutation, objects are declared once and for all times they do not +change. Functional programming in its purest form is therefore useless: +notice that even the paradigmatic "Hello world" program is not a functional +program, since the "Hello world" string is printed via an imperative side +effect. In other words, every functional programming language must find a +non-functional way to manage input-output. Haskell tries very hard to hide +non-functional constructs into monads; SML just uses traditional +imperative constructs to perform input/output. We already saw the +<tt class="docutils literal"><span class="pre">print</span></tt> function to write on stdout; to read from stdin, you can +define the following utility:</p> +<pre class="literal-block"> +- fun input prompt = (print prompt; getOpt(TextIO.inputLine TextIO.stdIn, "")) +val input : string -> string = _fn +</pre> +<p>The parenthesis here are needed since the right hand side of a function definition +must be an expression, and the parenthesis (plus the expression separator ";") +allows you to compose expressions in a single expression returning as value +the value of the last expression. <tt class="docutils literal"><span class="pre">TextIO</span></tt> is a module in the standard library +managing textual I/O and <tt class="docutils literal"><span class="pre">TextIO.inputLine</span> <span class="pre">inputFile</span></tt> is a function +returning a line wrapped into an option object or NONE if we arrived at the +end of the input file. In particular, in the case of standard input, +we arrive at the end when we give CTRL-D (in a Unix-like system); in +this case <tt class="docutils literal"><span class="pre">input</span></tt> will return the empty string, otherwise it returns the +input line, comprehensive of the newline character.</p> +<p>Here is an example of usage:</p> +<pre class="literal-block"> +- input "Enter a string: "; +Enter a string: hello +val it : string = "hello\n" +- input "Enter CTRL-D "; +- Enter CTRL-D +val it : string = "" +</pre> +<p>I we want to read a text file (assuming it fits in memory) +the simplest way is to use <tt class="docutils literal"><span class="pre">TextIO.inputAll</span></tt>:</p> +<pre class="literal-block"> +- fun fileToString filename = let + val file = TextIO.openIn filename + in + TextIO.inputAll file finally TextIO.closeIn file + end +</pre> +<p>Conversely, if we want to write a text on a file we can define:</p> +<pre class="literal-block"> +- fun saveFile (fname, content) = let + val out = TextIO.openOut fname + in + TextIO.output(out, content) finally TextIO.closeOut out + end; +</pre> +<p>The examples here use the standard <em>let</em> expression, which has the form:</p> +<pre class="literal-block"> +let + <bindings> +in + <expression; .. ; expression> +end +</pre> +<p>and returns as value the value of the last expression. Moreover, the examples +here use the <em>finally</em> construct, which is an Alice extension to SML, and +works as you would expect:</p> +<pre class="literal-block"> +<expression possibly raising exceptions> finally <cleanup expression> +</pre> +<p>the finally clause if executed always, even +if there is an exception in the first expression.</p> +<p>The examples here are very simple and do not address the issue or reading +or writing large files which do not fit in memory. But don't worry, +I will address those situations in the next issues. In order to do so, +however, I need to perform a digression on two of the most common +programming techniques in functional languages, recursion and higher order +functions. You, as a dynamic programmer, will probably already know +both concepts, but in languages such are Python, Perl, or even Common Lisp +they are not as pervasive as in truly functional languages.</p> +</div> +<div class="section"> +<h2><a class="toc-backref" href="#id30" id="loops-and-recursion" name="loops-and-recursion">Loops and recursion</a></h2> +<p>Perhaps the most common construct in imperative programming is the <em>for</em> loop; +in spite of that, <em>for</em> loops are usually missing in functional languages. In this +section I will explain and the way to work around this omission. +To start with a practical example, suppose we want to define a function +to count the number of lines in a text file, something akin to the following +Python code <a class="footnote-reference" href="#id10" id="id9" name="id9">[5]</a>:</p> +<pre class="literal-block"> +def countLines(fname): + counter = 0 + with file(fname) as f: + for line in f: + counter += 1 + return counter +</pre> +<table class="docutils footnote" frame="void" id="id10" rules="none"> +<colgroup><col class="label" /><col /></colgroup> +<tbody valign="top"> +<tr><td class="label"><a class="fn-backref" href="#id9" name="id10">[5]</a></td><td>We are using here the <tt class="docutils literal"><span class="pre">with</span></tt> statement, available in Python 2.5 by +importing it from the future: <tt class="docutils literal"><span class="pre">from</span> <span class="pre">__future__</span> <span class="pre">import</span> <span class="pre">with_statement</span></tt>.</td></tr> +</tbody> +</table> +<p>How can we implement it without a <em>for</em> loop? +One solution is to cheat and to use a <em>while</em> loop, which exists in SML:</p> +<pre class="literal-block"> +fun countLines fname = let + val counter = ref 0 + val inp = TextIO.openIn fname +in + while not (TextIO.endOfStream inp) do + (TextIO.inputLine inp; counter := !counter + 1) + finally TextIO.closeIn inp; + !counter +end +</pre> +<p>Some explanation about ML references is in order here. +ML does not allow to mutate a variable directly (as a matter of fact +most types in ML are immutable); if you want to increment an integer, +you must wrap it into a reference object, and you must mutate the +reference. Some simple experiment at the prompt should give you +the gist of what references are.</p> +<p>Put the integer 0 into a memory location and returns a reference to it:</p> +<pre class="literal-block"> +- val counter = ref 0; +val counter : int ref = ref 0 +</pre> +<p>Return the value in the location pointed by the reference:</p> +<pre class="literal-block"> +- !counter; +val it : int = 0 +</pre> +<p>Put in that location the value 0+1; return the empty tuple:</p> +<pre class="literal-block"> +- counter := !counter + 1; +val it : unit = () +</pre> +<p>Return the new value of the counter:</p> +<pre class="literal-block"> +- !counter +val it : int = 1 +</pre> +<p>From this explanation, it should be obvious what <tt class="docutils literal"><span class="pre">countLines</span></tt> does; +the implementation works, but it is very ugly and strongly discouraged. I +show it here, to prove that SML can be used in an imperative way, not +to suggest to code in this way. However, it may be +not obvious to re-implement <tt class="docutils literal"><span class="pre">countLines</span></tt> without mutation, unless +you have already coded in functional languages.</p> +<p>There is a standard tecnique to solve this kind of problem, i.e. the conversion of an +imperative loop into a functional recursion: <em>the accumulator tecnique</em>.</p> +<p>The accumulator is the mutating parameter in the imperative loop, in this case, the +counter. The accumulator tecnique consists in rewriting the loop +as a recursive function, promoting the accumulator to the rank of additional +parameter in the function signature:</p> +<pre class="literal-block"> +- fun countLines' (file, counter) = + case TextIO.inputLine file + of NONE => counter + | SOME line => countLines'(file, counter + 1); +val countLines' : TextIO.instream * int -> int = _fn +</pre> +<p>As you see, <tt class="docutils literal"><span class="pre">countLines'</span></tt> is a recursive function calling itself +with an incremented counter, until the file is read completely, then +it returns the final value of the accumulator. You can then rewrite the +original function as</p> +<pre class="literal-block"> +- fun countLines (fname) = let + val file = TextIO.openIn fname + in + countLines' (file, 0) + finally TextIO.closeIn file + end; +val countLines : string -> int = _fn +</pre> +<p>Recursion and accumulators are ubiquitous in functional programming, so the +time spent understanding this example is worth it. I will close this paragraph +by noting that the accumulator tecnique is also used to convert non-tail-call +recursive functions like the good old factorial:</p> +<pre class="literal-block"> +- fun fact 0 = 1 + | fact n = n*fact (n-1); +</pre> +<p>into a tail-call recursive function, i.e. a function returning either a value without +making a recursive call, or directly the result of a recursive call:</p> +<pre class="literal-block"> +- fun fact n = fact' (n, 1) + and fact' (0, acc) = acc + | fact' (n, acc) = fact' (n-1, acc*n); +</pre> +<p>Here I have use the <tt class="docutils literal"><span class="pre">and</span></tt> syntax to define two or more conceptually connected +functions in a single step.</p> +<p>Tail-call recursive functions are equivalent to imperative loops, in this case to +the Python code:</p> +<pre class="literal-block"> +def fact(n): + acc = 1 + while n > 0: + acc = acc*n + n = n-1 + return acc +</pre> +<p>Internally, the compiler of functional languages are able to translate tail-call +recursive functions back to imperative loops, so that the implementation is +very efficient. In a language like Python instead, which is not a truly functional +language, it is possible to write:</p> +<pre class="literal-block"> +def fact(n, acc): + if n == 0: + return acc + else: + return fact(n-1, acc*n) +</pre> +<p>but this function is not converted into a loop, you will have a loss of performance +and you will incur in the recursion limit if try to compute the factorial of a +large enough integer (say 1000) <a class="footnote-reference" href="#id12" id="id11" name="id11">[6]</a> .</p> +<table class="docutils footnote" frame="void" id="id12" rules="none"> +<colgroup><col class="label" /><col /></colgroup> +<tbody valign="top"> +<tr><td class="label"><a class="fn-backref" href="#id11" name="id12">[6]</a></td><td>Notice that Python does not make tail call optimization <em>on +purpose</em>, essentially for two reasons: 1) the language has a +philosophy of preferring iteration over recursion; 2) keeping the +recursive calls in the stack gives more informative tracebacks and +helps debugging. Since I am at that, let me notice that Python error +reporting and debugging features are infinitely superior to the ones +of any SML or Scheme implementation I have seen. This is not because +of lack of tail call optimization, it is because as a general +philosophy Python does not care about speed but cares about +programmer; moreover, despite the fact that SML +and Scheme are twice as old as Python, a lot more of work went into +Python than in SML/Scheme for what concerns practical issues.</td></tr> +</tbody> +</table> +</div> +<div class="section"> +<h2><a class="toc-backref" href="#id31" id="higher-order-functions" name="higher-order-functions">Higher order functions</a></h2> +<p>Just as recursion is pervasive in functional programming, so are +higher order functions. You may judge how much functional is a language +by measuring how good is the support for recursion and for higher order +functions. In this respect, ML shines since it has a particularly elegant syntax to +define higher order functions i.e. functions returning functions.</p> +<!-- higher order functions: http://en.wikipedia.org/wiki/Higher_order_functions --> +<p>The canonical example of higher order function is the adder:</p> +<pre class="literal-block"> +- fun adder n = fn x => x+n; +val adder : int -> int -> int = _fn +</pre> +<p>The notation <tt class="docutils literal"><span class="pre">fn</span> <span class="pre">x</span> <span class="pre">=></span> <span class="pre">x+n</span></tt> denotes what is usually called a lambda +function, according to the Lisp tradition; in this example it means +that the <tt class="docutils literal"><span class="pre">adder</span></tt> returns a function which adds <em>n</em> to any integer +number; for instance:</p> +<pre class="literal-block"> +- val add1 = adder 1; (* adds 1 to any number *) +val add1 : int -> int = _fn +- add1 1; +val it : int = 2 +</pre> +<p>Notice that ML use the notation <tt class="docutils literal"><span class="pre">int</span> <span class="pre">-></span> <span class="pre">int</span> <span class="pre">-></span> <span class="pre">int</span></tt> to denote the type of +the adder, which should be read as</p> +<blockquote> +<tt class="docutils literal"><span class="pre">int</span> <span class="pre">-></span> <span class="pre">(int</span> <span class="pre">-></span> <span class="pre">int)</span></tt></blockquote> +<p>i.e. <em>the arrow associates on the right</em> and should be interpreted as +"the adder is a function taking an <em>int</em> and returning a function <em>int->int</em>". +On the other hand, <em>function application associates on the left</em> and +`` adder 1 2`` should be read as</p> +<blockquote> +<tt class="docutils literal"><span class="pre">(adder</span> <span class="pre">1)</span> <span class="pre">2</span></tt></blockquote> +<p>An equivalent, but cleaner notation for the adder is</p> +<pre class="literal-block"> +- fun adder n x = x+n; +val adder : int -> int -> int = _fn +</pre> +<p>Notice the difference betwen <tt class="docutils literal"><span class="pre">adder(n,</span> <span class="pre">x)</span></tt>, denoting a function of +two arguments, and <tt class="docutils literal"><span class="pre">adder</span> <span class="pre">n</span> <span class="pre">x</span></tt> denoting an higher order function, +so that <tt class="docutils literal"><span class="pre">adder</span> <span class="pre">n</span></tt> is a function itself.</p> +<p>The <tt class="docutils literal"><span class="pre">adder</span></tt> is a simple example, +but it does not make justice to the usefulness of higher order functions. +To give a practical example, we may consider the problem of avoiding +the boilerplate when opening/closing a file. Lisp use for that a few +macros (<tt class="docutils literal"><span class="pre">with-input-from-file</span></tt>, <tt class="docutils literal"><span class="pre">with-output-to-file</span></tt>) whereas +Python use a special syntax (the <tt class="docutils literal"><span class="pre">with</span></tt> statement); in ML it is quite +natural to define a higher order function such as the following:</p> +<pre class="literal-block"> +- fun ` manage fname = let + val file = TextIO.openIn fname + in + manage file + finally TextIO.closeIn file + end; + val ` : (TextIO.instream -> 'a) -> string -> 'a = _fn +</pre> +<p>Here <tt class="docutils literal"><span class="pre">`</span></tt> is just an identifier for the higher order function; I could have used +for it a longer name, such as +<tt class="docutils literal"><span class="pre">higherOrderFunctionAddingAutomaticOpenCloseFunctionality</span></tt>, but I am lazy, +and also I wanted to show that in ML one has a considerable freedom in the choice +of valid identifiers. The action of <tt class="docutils literal"><span class="pre">`</span></tt> is clear: it takes a function which operates +on TextIO.stream objects (i.e. text files) and converts it into a function that +operates on strings (i.e. file names) and has opening/closing functionality +embedded. The type of the return value is not specified at this stage, and +this is indicated by the notation <em>'a</em> (to be read alpha), which denotes a +generic type. +Using <tt class="docutils literal"><span class="pre">`</span></tt> the <tt class="docutils literal"><span class="pre">fileToString</span></tt> function defined in the first +article of this series could be written as simply as:</p> +<pre class="literal-block"> +- val fileToString = `TextIO.inputAll; +val fileToString : string -> string = _fn +</pre> +<p>whereas <tt class="docutils literal"><span class="pre">countLines</span></tt> could be written as:</p> +<pre class="literal-block"> +- val countLines = `(fn file => countLines'(file, 0)) +val countLines : string -> int = _fn +</pre> +<p>You should begin to appreciate the power of higher order functions, now ;) +An evident weakness of the approach used here, is that it works only +for text files (actually only for files opened for input; we would need to +define a different higher order function for files opened for output); +if we wanted to wrap binary files, we would need +to define equivalent higher order functions using the <tt class="docutils literal"><span class="pre">BinIO</span></tt> library +instead of <tt class="docutils literal"><span class="pre">TextIO</span></tt>; then, if we wanted to use it for sockets, we would need +to define yet another higher order function; in general there +are infinite resources which can be opened and closed, and we could define +an infinite number of higher order functions doing all the same thing. +This is bad; fortunately this potentially infinite code duplication can be solved +using functors, but you will have to wait for the next articles to see how to do it ;)</p> +<hr class="docutils" /> +<p><em>Debugging is twice as hard as writing the code in the first +place. Therefore, if you write the code as cleverly as possible, you +are, by definition, not smart enough to debug it.</em> -- Brian W. Kernighan</p> +</div> +</div> +<div class="section"> +<h1><a class="toc-backref" href="#id32" id="functional-programming-for-dynamic-programmers-part-3" name="functional-programming-for-dynamic-programmers-part-3">Functional Programming For Dynamic Programmers - Part 3</a></h1> +<table class="docutils field-list" frame="void" rules="none"> +<col class="field-name" /> +<col class="field-body" /> +<tbody valign="top"> +<tr class="field"><th class="field-name">author:</th><td class="field-body">Michele Simionato</td> +</tr> +<tr class="field"><th class="field-name">date:</th><td class="field-body">December 2007</td> +</tr> +</tbody> +</table> +<p>This is the third of a series of articles on functional programming in +statically typed languages. It is intended for dynamic programmers, i.e. +for programmers with a background in dynamically typed languages, such as Perl, +Python, Ruby, or languages in the Lisp family. The approch is eminently practical +and example-based; the main goal is to see if we can stole some good idea from +statically typed languages. In order to be concrete, I will consider languages +in the ML family, because they are pretty nice and much easier to understand +that Haskell.</p> +<div class="section"> +<h2><a class="toc-backref" href="#id33" id="writing-your-first-module" name="writing-your-first-module">Writing your first module</a></h2> +<p>Usually programs start small, but they have an unfortunate tendency to +grow, getting larger and larger and eventually to explode. To control +this tendency, programmers have invented various techniques, which are +essentially all variations of a same idea: large programs should be +decomposed in smaller, more manageable, conceptually connected units +of code. These units of code go under various names, depending on the +language you are using; common terms are structures, classes, +namespaces, modules, packages, libraries, components etc. In this +article I will focus on the ML terminology and tecniques.</p> +<p>In SML, the basic mechanism of code control is the structure, which +takes the place of what is called a module in other languages. We +already encountered structures before, such as the <tt class="docutils literal"><span class="pre">TextIO</span></tt> +structure, containing functions such as <tt class="docutils literal"><span class="pre">TextIO.openIn</span></tt>, +<tt class="docutils literal"><span class="pre">TextIO.closeIn</span></tt> etc.</p> +<p>It is also possible to define custom structures. For instance, suppose +we want to implement Python-like file-iteration in SML in order to be +able to write things like <tt class="docutils literal"><span class="pre">Iter.app</span> <span class="pre">print</span> <span class="pre">(Iter.file</span> <span class="pre">fname)</span></tt> to +print all the lines in a text file. We can do so with the following +structure:</p> +<pre class="literal-block"> +- structure Iter = struct + + exception StopIteration + + fun app func iter = + (while true do func (iter ())) handle StopIteration => () + + fun map func iter = + fn () => func (iter ()) + + fun file fname = let + val inp = TextIO.openIn fname + in fn () => + (case TextIO.inputLine inp + of NONE => raise StopIteration + | SOME line => line) + handle err => (TextIO.closeIn inp; raise err) + end + end; + structure Iter : + sig + exception StopIteration + val app : ('a -> 'b) -> (unit -> 'a) -> unit + val map : ('a -> 'b) -> (unit -> 'a) -> unit -> 'b + val file : string -> unit -> string + end +</pre> +<p>We see many interesting things here. First of all, the REPL returns us +a string representation of the so-called <em>signature</em> of the structure, +i.e. a description of the types of the objects encapsulated by the +structure. Iterators have been implemented as thunks, i.e. functions +of type <tt class="docutils literal"><span class="pre">unit</span> <span class="pre">-></span> <span class="pre">'a</span></tt>; in particular, <tt class="docutils literal"><span class="pre">file</span></tt> is a higher order +function taking a filename and returning a string-value thunk: at each +call of the closure``Iter.file fname`` we get a different line of the +file. All types have been inferred correctly: <tt class="docutils literal"><span class="pre">Iter.app</span></tt> is an +higher order function taking a generic function <tt class="docutils literal"><span class="pre">'a->'b</span></tt> (which +means a function taking an unspecified type <tt class="docutils literal"><span class="pre">'a</span></tt> and returning an +unspecified type <tt class="docutils literal"><span class="pre">'b</span></tt>, possibly different from <tt class="docutils literal"><span class="pre">'a</span></tt>) and +converting it into a function <tt class="docutils literal"><span class="pre">iterator</span> <span class="pre">-></span> <span class="pre">unit</span></tt>, whereas +<tt class="docutils literal"><span class="pre">Iter.map</span></tt> applies a generic function to an iterator and returns an +iterator. For instance, suppose we want to define an utility to +convert to upper case a text file, by applying the function <a class="footnote-reference" href="#id15" id="id13" name="id13">[7]</a></p> +<blockquote> +<tt class="docutils literal"><span class="pre">-</span> <span class="pre">fun</span> <span class="pre">upper</span> <span class="pre">str</span> <span class="pre">=</span> <span class="pre">String.map</span> <span class="pre">Char.toUpper</span> <span class="pre">str;</span></tt></blockquote> +<p>to each line; we can test it by writing a file like the following:</p> +<pre class="literal-block"> +$ cat three-lines.txt + line 1 + line 2 + line 3 +</pre> +<p>and by defining</p> +<pre class="literal-block"> +- val next = Iter.map upper (Iter.file "three-lines.txt"); +val next : unit -> string = _fn +</pre> +<p>With this definitions, we get the same behavior as in Python <a class="footnote-reference" href="#id16" id="id14" name="id14">[8]</a></p> +<pre class="literal-block"> +- next (); +val it : string = " LINE 1\n" +- next (); +val it : string = " LINE 2\n" +- next (); +val it : string = " LINE 3\n" +- next (); +Uncaught exception + StopIteration +</pre> +<table class="docutils footnote" frame="void" id="id15" rules="none"> +<colgroup><col class="label" /><col /></colgroup> +<tbody valign="top"> +<tr><td class="label"><a class="fn-backref" href="#id13" name="id15">[7]</a></td><td>In this example I am using the <tt class="docutils literal"><span class="pre">String</span></tt> and <tt class="docutils literal"><span class="pre">Char</span></tt> structures of the +SML standard library, documented here +<a class="reference" href="http://www.standardml.org/Basis/string.html">http://www.standardml.org/Basis/string.html</a> and here +<a class="reference" href="http://www.standardml.org/Basis/char.html">http://www.standardml.org/Basis/char.html</a></td></tr> +</tbody> +</table> +<table class="docutils footnote" frame="void" id="id16" rules="none"> +<colgroup><col class="label" /><col /></colgroup> +<tbody valign="top"> +<tr><td class="label"><a class="fn-backref" href="#id14" name="id16">[8]</a></td><td>In Python we would have</td></tr> +</tbody> +</table> +<pre class="literal-block"> +>>> it = itertools.imap(str.upper, file("three-lines.txt")) +>>> it.next() +' LINE 1\n' +>>> it.next() +' LINE 2\n' +>>> it.next() +' LINE 3\n' +>>> it.next() +Traceback (most recent call last): + File "<stdin>", line 1, in <module> +StopIteration +</pre> +</div> +<div class="section"> +<h2><a class="toc-backref" href="#id34" id="importing-modules" name="importing-modules">Importing modules</a></h2> +<p>Structures in the standard library are automatically available; on the other +hand, if you write your own structure and save it into a file, you need to +import the file before you can use it. The simplest way to import a file is +through the <tt class="docutils literal"><span class="pre">use</span></tt> expression:</p> +<pre class="literal-block"> +- use "<filename.aml>"; +</pre> +<p>This simply includes the imported file in the current program, and everything +is compiled together. In most SML implementation it is also possible to compile +a file as a standalone component; the details vary with the implementation. +In Alice you can just save the above structure into a file called <tt class="docutils literal"><span class="pre">iter.aml</span></tt> +and compile it as</p> +<blockquote> +<tt class="docutils literal"><span class="pre">alicec</span> <span class="pre">iter.aml</span></tt></blockquote> +<p>This creates a bytecode compiled file called <tt class="docutils literal"><span class="pre">iter.alc</span></tt>. +The compiled structure can be imported in your programs with the line</p> +<blockquote> +<tt class="docutils literal"><span class="pre">import</span> <span class="pre">structure</span> <span class="pre">Iter</span> <span class="pre">from</span> <span class="pre">"iter"</span></tt></blockquote> +<p>assuming <tt class="docutils literal"><span class="pre">iter.alc</span></tt> is in the same directory as your main program. +If you want to import in the program namespace all the objects defined +inside the structure, you can open it:</p> +<blockquote> +<tt class="docutils literal"><span class="pre">-</span> <span class="pre">open</span> <span class="pre">Iter;</span></tt></blockquote> +<p>this is however actively discouraged, to prevent namespace pollution, +i.e. name conflicts, since an exported name can shadow a pre-existent +name. There is actually a good use case for opening a structure, +i.e. inside another structure. In particular, it is possible to +redefine structures, augmenting them with additional objects. +For instance, we could supplement the <tt class="docutils literal"><span class="pre">Iter</span></tt> structure with +a routine to read binary files in chunks:</p> +<pre class="literal-block"> +- structure Iter = struct + open Iter + fun binfile (fname, chunksize) = let + val inp = BinIO.openIn fname + in + fn () => let + val vec = BinIO.inputN (inp, chunksize) (* read N bytes *) + in + if Word8Vector.length(vec) = 0 then raise StopIteration else vec + handle err => (BinIO.closeIn inp; raise err) + end + end +end; +</pre> +<p>Notice that <tt class="docutils literal"><span class="pre">BinIO.inputN</span></tt> returns a vector +of bytes, not a string; however, you can get a <em>bona fide</em> string by +applying the standard library function <a class="reference" href="http://www.standardml.org/Basis/byte.html">Byte.bytesToString</a> to +the returned chunks. Here is an example:</p> +<pre class="literal-block"> +- Byte.bytesToString (Iter.binfile("three-lines.txt", 40) ()); +val it : string = " line 1\n line 2\n line 3\n" +</pre> +<p>An importanting to notice is that <em>you can redefine even standard library +structures</em>, augmenting them with new features, or replacing objects +with others, even <em>with a different signature</em>. This is similar to what +happens in Ruby, where you can add methods even to builtin classes, +and should be done with care.</p> +</div> +<div class="section"> +<h2><a class="toc-backref" href="#id35" id="structures-are-not-first-class-values" name="structures-are-not-first-class-values">Structures are not first class values</a></h2> +<p>The problem with structures is that they are not first class values, i.e. they cannot +be passed to functions and they cannot be inspected. This is the reason why giving at +the prompt the name of a structure does not work:</p> +<pre class="literal-block"> +- TextIO; +1.0-1.6: unknown value or constructor `TextIO' +</pre> +<p><tt class="docutils literal"><span class="pre">TextIO</span></tt> is not recognized as the name of a know value. Structures live +in a completely separate namespace <a class="footnote-reference" href="#id18" id="id17" name="id17">[9]</a>, so that you can associate any value +to the name <tt class="docutils literal"><span class="pre">TextIO</span></tt> and still you can access the contents of the structure +without any problem:</p> +<pre class="literal-block"> +- val TextIO = "astring"; +1.4-1.10: warning: value identifier `TextIO' violates standard naming conventions, +which suggest value names of the form foo, foo', fooBar +val TextIO : string = "astring" + +- TextIO.print; +val it : string -> unit = _lazy +</pre> +<p>Notice the warning about violating standard naming conventions, since +a regular value such as a string should be denoted with a +non-capitalized identitifier, whereas capitalized identitifiers should +be reserved to structures.</p> +<p>Structures can be given name and aliases via the <tt class="docutils literal"><span class="pre">structure</span></tt> declaration; +for instance, if you want to give a shorter alias to <tt class="docutils literal"><span class="pre">TextIO</span></tt> you can define</p> +<blockquote> +<tt class="docutils literal"><span class="pre">-</span> <span class="pre">structure</span> <span class="pre">T</span> <span class="pre">=</span> <span class="pre">TextIO;</span></tt></blockquote> +<p>Structures can be arbitrarily nested, i.e they can contain +substructures at any level of nesting. For instance, if you want to +extract the substructure <tt class="docutils literal"><span class="pre">StreamIO</span></tt> from <tt class="docutils literal"><span class="pre">TextIO</span></tt> you can define</p> +<blockquote> +<tt class="docutils literal"><span class="pre">-</span> <span class="pre">structure</span> <span class="pre">S</span> <span class="pre">=</span> <span class="pre">TextIO.StreamIO;</span></tt></blockquote> +<p>The lack of first class structures is motivated by reasons of +(premature) optimization. Suppose structure where first class values: +then you could write things like</p> +<blockquote> +<tt class="docutils literal"><span class="pre">val</span> <span class="pre">S</span> <span class="pre">=</span> <span class="pre">if</span> <span class="pre">someRuntimeCondition()</span> <span class="pre">then</span> <span class="pre">struct</span> <span class="pre">...</span> <span class="pre">end</span> <span class="pre">else</span> <span class="pre">struct</span> <span class="pre">...</span> <span class="pre">end</span></tt></blockquote> +<p>and that would make life pretty hard for the compiler: basically, it would be +impossible to compile the structure once and for all, and you would be forced +to recompile it at runtime. In these days of just-in-time +compilation this is less of an issue than in the past, and in particular +Alice ML allows you to do that, by wrapping structures in packages. +But in order to discuss that, we must first discuss signatures, which is +the SML name for interfaces.</p> +<table class="docutils footnote" frame="void" id="id18" rules="none"> +<colgroup><col class="label" /><col /></colgroup> +<tbody valign="top"> +<tr><td class="label"><a class="fn-backref" href="#id17" name="id18">[9]</a></td><td>Readers familiar with Common Lisp will be familiar with the concept +of having separate namespaces for different kind of objects.</td></tr> +</tbody> +</table> +</div> +<div class="section"> +<h2><a class="toc-backref" href="#id36" id="abstract-signatures-and-abstract-types" name="abstract-signatures-and-abstract-types">Abstract signatures and abstract types</a></h2> +<p>Strictly speaking, it is not necessary to define signatures for your +own structures, since even if you do not specify a signature, the +compiler is able to figure out the complete signature, thanks to type +inference. The problem is exactly that: the compiler extracts the +<em>complete</em> signature, a.k.a. the <em>principal</em> signature of you library, +which is too much. Typically, you don't want to expose all the objects +in your structure, for safety reason (your structure may contain +private functions which should not be exported to client code), for +sake of simplicity (you may want to implement the <a class="reference" href="http://en.wikipedia.org/wiki/Facade_pattern">facade pattern</a>) +or for code reuse. Using computer science buzzwords, we may say that +<a class="reference" href="http://en.wikipedia.org/wiki/Information_hiding">information hiding</a> is implemented in SML by defining <em>abstract +signatures</em>, as opposed to the <em>concrete</em> (or principal) signatures we +saw until now.</p> +<p>To give a concrete example, let me consider the <tt class="docutils literal"><span class="pre">List</span></tt> and +<tt class="docutils literal"><span class="pre">Vector</span></tt> structures in the standard library: the <tt class="docutils literal"><span class="pre">List</span></tt> structure +defines the type <tt class="docutils literal"><span class="pre">list</span></tt>, whereas the <tt class="docutils literal"><span class="pre">Vector</span></tt> structure defines +the type <tt class="docutils literal"><span class="pre">vector</span></tt>; both types have a common alias <tt class="docutils literal"><span class="pre">t</span></tt>. +<tt class="docutils literal"><span class="pre">list</span></tt> and <tt class="docutils literal"><span class="pre">vector</span></tt> are container types, in the sense that a +list/vector may contain objects of any type, as long as all the elements +are homogenous, so you may have a <tt class="docutils literal"><span class="pre">string</span> <span class="pre">list</span></tt>, an <tt class="docutils literal"><span class="pre">int</span> <span class="pre">list</span></tt>, +a <tt class="docutils literal"><span class="pre">string</span> <span class="pre">list</span> <span class="pre">list</span></tt>, etc <a class="footnote-reference" href="#id20" id="id19" name="id19">[10]</a>:</p> +<pre class="literal-block"> +- val lst = [0,1,2]; +val lst : int list = [0, 1, 2] +- val vec = #[0,1,2]; +val vec : int vector = #[0, 1, 2] +- val lst = [["hello", "world"]]; +val lst : string list list = [["hello", "world"]] +</pre> +<p>Both <tt class="docutils literal"><span class="pre">List</span></tt> and <tt class="docutils literal"><span class="pre">Vector</span></tt> provide a function <tt class="docutils literal"><span class="pre">length</span></tt> returning the +number of elements of the list/vector, and a function <tt class="docutils literal"><span class="pre">sub</span></tt> return +the i-th element of the list/vector:</p> +<pre class="literal-block"> +- List.length(lst); +val it : int = 3 +- Vector.length(vec); +val it : int = 3 + +-List.sub(lst, 1) (*) take the second element of the list +val it : int = 1 +-Vector.sub(vec, 1) (*) take the second element of the vector +val it : int = 1 +</pre> +<p>We may abstract this common behavior by defining an abstract signature:</p> +<pre class="literal-block"> +- signature SIMPLE_SEQUENCE = sig + type 'a t + val length: 'a t -> int + val sub: 'a t * int -> 'a + end + sig + type 'a t + val length : 'a t -> int + val sub : 'a t * int -> 'a + end +</pre> +<p>Here the type t is <em>abstract</em>: it becomes concrete only if you <em>ascribe</em> +the signature to a concrete structure:</p> +<pre class="literal-block"> +- structure L=List:SIMPLE_SEQUENCE; +structure L : + sig + type t = list + val length : 'a List.t -> int + val sub : 'a List.t * int -> 'a + end = List + +- structure V=Vector:SIMPLE_SEQUENCE; +structure V : + sig + type t = vector + val length : 'a Vector.t -> int + val sub : 'a Vector.t * int -> 'a + end = Vector +</pre> +<p>i.e. the abstract type <tt class="docutils literal"><span class="pre">t</span></tt> is replaced by the concrete type <tt class="docutils literal"><span class="pre">list</span></tt> when the +signature is ascribed to the <tt class="docutils literal"><span class="pre">List</span></tt> structure and by the concrete type <tt class="docutils literal"><span class="pre">vector</span></tt> +when the signature is ascribed to the <tt class="docutils literal"><span class="pre">Vector</span></tt> structure. +Moreover, having ascribed the <tt class="docutils literal"><span class="pre">SIMPLE_SEQUENCE</span></tt> signature to the +structures <tt class="docutils literal"><span class="pre">L</span></tt> and <tt class="docutils literal"><span class="pre">V</span></tt>, we have automatically hidden all the additional +functionality of the original structures, so that for instance <tt class="docutils literal"><span class="pre">L.map</span></tt> +and <tt class="docutils literal"><span class="pre">V.map</span></tt> are not accessible, even if <tt class="docutils literal"><span class="pre">List.map</span></tt> and <tt class="docutils literal"><span class="pre">Vector.map</span></tt> +do exists:</p> +<pre class="literal-block"> +- L.map +1.2-1.5: unknown value or constructor `map' +- V.map; +1.2-1.5: unknown value or constructor `map' +</pre> +<p>In a sense, you may see ascribing the signature as a way of importing a selected +subset of the functionality of the original structure; you could even import a subset +of the names defined in the original structures in the current namespace +by opening the ascribed structure:</p> +<pre class="literal-block"> +- open Vector:SIMPLE_SEQUENCE; +structure _id20 : + sig + type t = vector + val length : 'a Vector.t -> int + val sub : 'a Vector.t * int -> 'a + end = Vector +val sub : 'a Vector.t * int -> 'a = _fn +val length : 'a Vector.t -> int = _fn +type t = Vector.t +</pre> +<p>Signatures allows much more than that, and the practical usage of abstract +signatures in SML will become clear once we will introduce the concept of +functors and packages. Nevertheless, I am sure that the +expert object oriented programmer has already understood the point +behind abstract signatures: they are nothing else than +interfaces. Code written to work with a <tt class="docutils literal"><span class="pre">SIMPLE_SEQUENCE</span></tt> interface +will automatically work with lists, vectors, and any data structure +(builtin <em>and</em> custom) satisfying the interface.</p> +<table class="docutils footnote" frame="void" id="id20" rules="none"> +<colgroup><col class="label" /><col /></colgroup> +<tbody valign="top"> +<tr><td class="label"><a class="fn-backref" href="#id19" name="id20">[10]</a></td><td>Notice that since SML is a functional language, both lists and +vectors are immutable.</td></tr> +</tbody> +</table> +<hr class="docutils" /> +<blockquote> +<p><em>divide et impera</em></p> +<blockquote> +-- old Roman saying</blockquote> +</blockquote> +</div> +</div> +<div class="section"> +<h1><a class="toc-backref" href="#id37" id="functional-programming-for-dynamic-programmers-part-4" name="functional-programming-for-dynamic-programmers-part-4">Functional Programming For Dynamic Programmers - Part 4</a></h1> +<table class="docutils field-list" frame="void" rules="none"> +<col class="field-name" /> +<col class="field-body" /> +<tbody valign="top"> +<tr class="field"><th class="field-name">author:</th><td class="field-body">Michele Simionato</td> +</tr> +<tr class="field"><th class="field-name">date:</th><td class="field-body">December 2007</td> +</tr> +</tbody> +</table> +<p>This is the fourth of a series of articles on functional programming in +statically typed languages. It is intended for dynamic programmers, i.e. +for programmers with a background in dynamically typed languages, such as Perl, +Python, Ruby, or languages in the Lisp family. The approch is eminently practical +and example-based; the main goal is to see if we can stole some good idea from +statically typed languages. In order to be concrete, I will consider languages +in the ML family, because they are pretty nice and much easier to understand +that Haskell.</p> +<div class="section"> +<h2><a class="toc-backref" href="#id38" id="academic-languages-vs-enterprise-languages" name="academic-languages-vs-enterprise-languages">Academic languages vs enterprise languages</a></h2> +<p>There is a widespread misconception that academic languages are clean but +unpractical, whereas enterprise languages are practical but dirty. I never +bought that argument, since I have seen examples of the opposite, and also +because I always believed that a language can be both clean <em>and</em> practical.</p> +<p>For instance in my opinion both Python and Ruby are reasonably clean and +reasonably practical and this is part of the reason why they are having so +much success lately. I believe the reason why they are so clean is that from +the beginning they managed to stay away from optimization, the root of all evil. +On the other hand, I am also convinced that it possible to write a language +which is both practical, clean and fast. Unfortunately, such a language does +not exist yet. SML is fast and somewhat clean but it is definitely not practical. +Here by practical I mean enterprise-ready.</p> +<p>There is an irriducible <a class="reference" href="http://en.wikipedia.org/wiki/Aporia">aporia</a> between enterprise programmers and +academics: people working on enterprise are facing every day problems +that are already solved; to them, the importanting thing is to avoid +reinventing the wheel. Given two languages, one that provides a solution +to a problem they have, and one providing the tools to build the solution +themselves, they prefer the first one. This is natural since in an enterprise +environment one has deadlines and it is essential to be as productive +as possible; of course the best way to be productive is to <em>not write</em> code, +and to re-use code written by others. +On the other hand, in academia one has to do with new problems, or +with new techniques to solve old problems: the proof of concept is +more important than the practical implementation. Also, academics +are (rightly) interested in writing papers, not in writing code. For all these +reasons it is clear that you cannot face an +academic language such as SML with an enterprise mindset. +For instance, if you come from an enterprise environment you will be used to +expect ready availabity of a nearly infinite number +of libraries doing everything you may need and more. This is certainly +the situation for all enterprise-level languages such as C, C++, Java, C#, +Perl, Python and now even Ruby, which is younger but rapidly spreading.</p> +<p>As I said many time, I assume that my readers are dynamic programmers, +well acquainted with scripting languages; I feel the urge warn them that the +situation in SML is very different than in Perl, Python or Ruby <a class="footnote-reference" href="#id22" id="id21" name="id21">[11]</a> . Here +are a few differences.</p> +<ol class="arabic"> +<li><p class="first">Scripting languages have a dominant (sometimes unique) +implementation; ML has dozens of implementations, each with +useful features which are not in the standard, so that +different implementations provides incompatible extensions.</p> +</li> +<li><p class="first">Scripting languages have a <a class="reference" href="http://en.wikipedia.org/wiki/BDFL">BDFL</a> (Benevolent Dictator For Life, +i.e. Guido van Rossum for Python, Larry Wall for Perl, Yukihiro +Matsumoto for Ruby) whereas SML is a language designed by +committee. That means that a scripting language can evolve much +<em>much</em> faster than a language designed by committee. Also, +languages designed by committee are compromises whereas +single-person languages are much more consistent.</p> +</li> +<li><p class="first">SML has a very small community: the support you can have in newsgroup +and mailing lists is simply not comparable to the support you can get in +the scripting languages mailing list and newsgroups.</p> +<p>if Andreas Rossberg won the first price +at the lottery and decided to leave for a tropical paradise, nobody would +provide support for Alice ML on the mailing list</p> +</li> +<li><p class="first">SML is a deserted language, even compared to non-mainstream languages +as OCaml, Haskell, Scheme, Common Lisp, Smalltalk, Tcl, ...; it is perhaps the +least used language I have ever seen and this is a pity.</p> +</li> +<li><p class="first">Generally speaking, if you want bindings for external libraries you have to write +it yourself; there are no standard bindings for GUI toolkits, database drivers, +scientific programming libraries.</p> +</li> +<li><p class="first">Scripting languages have traditionally a strong support for Web programming +which is next to absent in SML.</p> +</li> +</ol> +<p>All the issues I have just listed are just accidental, not +structural: in principle one could just solve them by writing code; in principle +a <a class="reference" href="http://en.wikipedia.org/wiki/DHH">DHH</a> could take an SML implementation and make it the next Ruby on Rails. +However that has not happened yet, so if you want to work with SML right now you +will be forced to reinvent the wheel. In this article I will reinvent a few wheels, +just to be able to give some meaninful example to the enterprise programmer +in the next articles.</p> +<table class="docutils footnote" frame="void" id="id22" rules="none"> +<colgroup><col class="label" /><col /></colgroup> +<tbody valign="top"> +<tr><td class="label"><a class="fn-backref" href="#id21" name="id22">[11]</a></td><td>Notice that I put Perl, Python and Ruby in the mainstream +languages, since even if the number of dynamic programmers is +inferior the number of say C/C++, Java or .NET programmers, +in practice scripting programmers have available nearly everything the +other languages have available, and much better languages.</td></tr> +</tbody> +</table> +</div> +<div class="section"> +<h2><a class="toc-backref" href="#id39" id="string-interpolation-the-practical-way" name="string-interpolation-the-practical-way">String interpolation the practical way</a></h2> +<p>Every language has some builtin mechanism to do string interpolation: C +has <tt class="docutils literal"><span class="pre">sprintf</span></tt>, Python has the <tt class="docutils literal"><span class="pre">%</span></tt> operator, Lisp has <tt class="docutils literal"><span class="pre">format</span></tt>, but +SML has none. Therefore I will provide here a very simple interpolation library +for SML, so that I will be able to provide more interesting examples later. +The library is based on list processing. +Lists are the most common data structures in +functional languages (they formed the core of Lisp at its beginning) +and you simply cannot live without them. You should look at the <a class="reference" href="http://www.standardml.org/Basis/list.html">basis library</a> +to see what you can do with lists. Here I will just say that SML lists are +linked lists in the same sense of Lisp or Scheme (with the difference +that they are immutable) and not as in Python (Python lists are actually arrays, +Python is abusing the name). Just as in Scheme, where</p> +<blockquote> +<tt class="docutils literal"><span class="pre">(list</span> <span class="pre">1</span> <span class="pre">2)</span></tt></blockquote> +<p>is a shortcut for</p> +<blockquote> +<tt class="docutils literal"><span class="pre">(cons</span> <span class="pre">1</span> <span class="pre">(cons</span> <span class="pre">2</span> <span class="pre">'()))</span></tt></blockquote> +<p>in ML</p> +<blockquote> +<tt class="docutils literal"><span class="pre">[1,</span> <span class="pre">2]</span></tt></blockquote> +<p>is a shortcut for</p> +<blockquote> +<tt class="docutils literal"><span class="pre">1::2::[]</span></tt></blockquote> +<p>and <tt class="docutils literal"><span class="pre">::</span></tt> is the <em>cons</em> operator (short for constructor). +If you don't know Scheme, a SML lists should be thought of as of a +nested Python list, i.e.</p> +<blockquote> +<tt class="docutils literal"><span class="pre">[1,</span> <span class="pre">2]</span> <span class="pre">(SML)</span> <span class="pre">=></span> <span class="pre">[1,</span> <span class="pre">[2,</span> <span class="pre">[]]]</span> <span class="pre">(Python)</span></tt></blockquote> +<p>Here is the code:</p> +<pre class="literal-block"> +$ cat format.aml +</pre> +<pre class="literal-block"> +(* +A library implementing a very simple string interpolation facility. +It exports a single higher order function 'format' converting a template +string into a 'string list -> string' function replacing the template +with the provided list of arguments. An example of usage is + +do print(format "The square of $ is $\n" ["3", "9"]) +*) + +signature FORMAT = sig + val format : string -> string list -> string +end + +structure Format = struct + exception ArityError of string + + fun checkArity(templN1, argsN) = let + val n1 = Int.toString (length templN1 - 1) + val n = Int.toString (length argsN) + in + if n1=n then () else raise ArityError("Expected "^n1^" arguments, got"^n) + end + + val rec interp' = + fn (t :: [], [], acc) => concat(rev (t :: acc)) + | (t :: templ, a :: args, acc) => interp'(templ, args, t :: a :: acc) + | _ => raise ArityError "This should never happen" + + and interp'' = + fn (templN1, argsN) => ( + checkArity (templN1, argsN); interp' (templN1, argsN, [])) + + and format = + fn templ => let + val templN1 = String.fields (fn c => c = #"$") templ + in + fn args => interp'' (templN1, args) + end + +end: FORMAT + +</pre> +<p>A few comments are in order.</p> +<ol class="arabic"> +<li><dl class="first docutils"> +<dt>We used the ability to define exceptions with a value:</dt> +<dd><p class="first last"><tt class="docutils literal"><span class="pre">exception</span> <span class="pre">ArityError</span> <span class="pre">of</span> <span class="pre">string</span></tt> means that <tt class="docutils literal"><span class="pre">ArityError</span></tt> accepts +a string argument (the error message).</p> +</dd> +</dl> +</li> +<li><p class="first">We used the standard library <tt class="docutils literal"><span class="pre">String.fields</span></tt> utility, which splits a string +according to a delimiter; in our case <tt class="docutils literal"><span class="pre">String.fields</span> <span class="pre">(fn</span> <span class="pre">c</span> <span class="pre">=></span> <span class="pre">c</span> <span class="pre">=</span> <span class="pre">#"$")</span> <span class="pre">templ</span></tt> +splits the template into a list a strings using the character <tt class="docutils literal"><span class="pre">$</span></tt> as delimiter. +In SML characters are specified with a sharp notation, so the character <tt class="docutils literal"><span class="pre">$</span></tt> +is written <tt class="docutils literal"><span class="pre">#"$"</span></tt>.</p> +</li> +<li><p class="first">At the core of the algorithm is the recursive function +<tt class="docutils literal"><span class="pre">interp'(templN1,</span> <span class="pre">argsN)</span></tt> which takes a list of N+1 strings (the +splitted template) and a list of N strings (the arguments) and returns the +interpolated string. For instance, for the template <tt class="docutils literal"><span class="pre">The</span> <span class="pre">square</span> <span class="pre">of</span> <span class="pre">$</span> <span class="pre">is</span> <span class="pre">$\n</span></tt> +which contains N=2 delimiters, +<tt class="docutils literal"><span class="pre">templN1</span></tt> would be <tt class="docutils literal"><span class="pre">["The</span> <span class="pre">square</span> <span class="pre">of</span> <span class="pre">",</span> <span class="pre">"</span> <span class="pre">is</span> <span class="pre">",</span> <span class="pre">"\n"]</span></tt>.</p> +</li> +<li><p class="first">The builtin <tt class="docutils literal"><span class="pre">concat</span></tt> concatenates a list of strings whereas <tt class="docutils literal"><span class="pre">rev</span></tt> reverts +a list; you are advised to read the documentation or any book on SML for more.</p> +</li> +</ol> +</div> +<div class="section"> +<h2><a class="toc-backref" href="#id40" id="string-interpolation-the-mathematical-way" name="string-interpolation-the-mathematical-way">String interpolation the mathematical way</a></h2> +<p>Functional languages have a reputation for abstractness +but in this series of articles I have focused solely on very concrete +earth-bound aspects. To correct the balance, in this section I will discuss +a few programming techniques which are quite common in +functional languages, but unheard of in "regular" languages, and that +require a mathematically-inclined mind to be understood. I am doing so +because these tricks are actually used a lot in functional languages, and +I don't want to give a false impression by ignoring them +and just discussing the easy things.</p> +<p>For instance, you should not believe that functional +programming is just about functions; there is an higher order +approach to functional programming, in which the primary objects +are not functions, but operators operating on functions, the so +called <em>combinators</em>. For people with a background in Physics, I will +say this is exactly analogous to the switch from the +Schroedinger picture, in which the emphasis is on the wave functions, +to the Heisenberg picture, in which the emphasis is on the quantum +operators.</p> +<p>Having warned my readers that this section is not for the faint of heart +(but you may safely skip it if you don't feel adventurous) +I will start from the concatenation operator</p> +<pre class="literal-block"> +^ : string * string -> string +</pre> +<p>which satifies the properties:</p> +<pre class="literal-block"> +a ^ (b ^ c) = (a ^ b) ^ c = a ^ b ^ c +a ^ "" = "" ^ a = a +</pre> +<p>Mathematically speaking, the set of strings in SML +is a <a class="reference" href="http://en.wikipedia.org/wiki/Monoid">monoid</a> with respect to concatenation and the empty +string "" is the identity element.</p> +<p>If you have a background in Mathematics or in Physics you will be familiar +with the theory of <a class="reference" href="http://en.wikipedia.org/wiki/Group_representation">group representations</a>; in particular groups (and monoids +too) can be represented with operators operating on function spaces. +In the case at hand, I can define a lift transformation converting plain +strings into operators by preserving the composition law:</p> +<pre class="literal-block"> +- fun L s f s' = f (s' ^ s); +val L : string -> (string -> 'a) -> string -> 'a = _fn +</pre> +<p>(this is the same as <tt class="docutils literal"><span class="pre">fun</span> <span class="pre">L</span> <span class="pre">s</span> <span class="pre">=</span> <span class="pre">fn</span> <span class="pre">f</span> <span class="pre">=></span> <span class="pre">fn</span> <span class="pre">s'</span> <span class="pre">=></span> <span class="pre">f</span> <span class="pre">(s'</span> <span class="pre">^</span> <span class="pre">s)</span></tt>). +In other words, <tt class="docutils literal"><span class="pre">L</span> <span class="pre">s</span></tt> is an operator (combinator) taking a function and +returning a function, with the property</p> +<blockquote> +<tt class="docutils literal"><span class="pre">L</span> <span class="pre">(s1</span> <span class="pre">^</span> <span class="pre">s2)</span> <span class="pre">=</span> <span class="pre">(L</span> <span class="pre">s1)</span> <span class="pre">o</span> <span class="pre">(L</span> <span class="pre">s2)</span></tt></blockquote> +<p>where <tt class="docutils literal"><span class="pre">o</span></tt> denotes the composition of operators. Just as <tt class="docutils literal"><span class="pre">L</span></tt> is an upgrade +operation, promoving a plain simple string to the celestial world of operators +in function spaces, I can define a downgrade operation <tt class="docutils literal"><span class="pre">l</span></tt>, demoving +celestial operators back to earthly strings:</p> +<pre class="literal-block"> +- fun l O = O Fn. id ""; +val l : (('a -> 'a) -> string -> 'b) -> 'b = _fn +</pre> +<p><tt class="docutils literal"><span class="pre">l</span></tt> takes the operator, applies it to the identity function and returns a function +which is then applied to the empty string, finally bringing back at home a +plain string; <tt class="docutils literal"><span class="pre">l</span></tt> is the inverse of <tt class="docutils literal"><span class="pre">L</span></tt>, i.e. <tt class="docutils literal"><span class="pre">l</span> <span class="pre">o</span> <span class="pre">L</span></tt> is the identity +in the space of strings where <tt class="docutils literal"><span class="pre">L</span> <span class="pre">o</span> <span class="pre">l</span></tt> is the identity in the space +of operators. You can try yourself at the prompt that</p> +<pre class="literal-block"> +- l(L"a" o L"b"); +val it : string = "ab" +</pre> +<p>so we succeded in making simple things hard and abstract. But not happy with +that, we are going to make things harder and more abstract, by defining +another combinator taking a function and returning an higher order function:</p> +<pre class="literal-block"> +- fun str f s s' = f (s ^ s') +val str : (string -> 'a) -> string -> string -> 'a = _fn +</pre> +<p>(this is the same as <tt class="docutils literal"><span class="pre">fun</span> <span class="pre">str</span> <span class="pre">f</span> <span class="pre">=</span> <span class="pre">fn</span> <span class="pre">s</span> <span class="pre">=></span> <span class="pre">fn</span> <span class="pre">s'</span> <span class="pre">=></span> <span class="pre">f</span> <span class="pre">(</span> <span class="pre">s</span> <span class="pre">^</span> <span class="pre">s')</span></tt>). +This combinator is so abstract than even when lift back to the mortal world +it is more than a mere string, it is actually the identity function on strings:</p> +<pre class="literal-block"> +- l str; +val it : string -> string = _fn +- (l str) "hello"; +val it : string = "hello" +</pre> +<p>The <tt class="docutils literal"><span class="pre">str</span></tt> combinator can be composed in the celestial word: when lift +back to the mortal world, it returns a higher order version of the concatenation +operator:</p> +<pre class="literal-block"> +- l (str o str); +val it : string -> string -> string = _fn + +- l (str o str) "hello" " world"; +val it : string = "hello world" +</pre> +<p>We can also compose <tt class="docutils literal"><span class="pre">str</span></tt> with other combinators and we can write things like:</p> +<pre class="literal-block"> +- l( L"a" o str o L"b" o str o L"c") "?" ":"; +val it : string = "a?b:c" +</pre> +<p>In other words, we have reinvented string interpolation the difficult way. +Still, there is at least an advantage of this approach with respect to the +approach we used in the previous paragraph: combinator-based string interpolation +happens at compile type and <em>mistakes are discovered by the compiler</em>, not +at runtime. Moreover, the approach can be easily extended to manage +different types: for instance, if we have to do with numbers, we +can define the combinators:</p> +<pre class="literal-block"> + - fun int f s n = f (s ^ (Int.toString n)) + val str : (string -> 'a) -> string -> string -> 'a = _fn + + - fun real f s n = f (s ^ (Real.toString n)) +val real : (string -> 'a) -> string -> real -> 'a = _fn +</pre> +<p>and use them as follows:</p> +<pre class="literal-block"> +- print (l(int o L" / " o int o L" = " o real o L"\n") 5 2 2.5); +5 / 2 = 2.5 +val it : unit = () +</pre> +<p>If you make a mistake like using an int instead of a real, or if you forget an +argument, you will get a compile time type error.</p> +<hr class="docutils" /> +<p><em>Give a man a fish and he will eat for a day. Teach a man to fish and he will eat +for the rest of his life.</em> -- Chinese proverb</p> +</div> +</div> +<div class="section"> +<h1><a class="toc-backref" href="#id41" id="functional-programming-for-dynamic-programmers-part-5" name="functional-programming-for-dynamic-programmers-part-5">Functional Programming For Dynamic Programmers - Part 5</a></h1> +<table class="docutils field-list" frame="void" rules="none"> +<col class="field-name" /> +<col class="field-body" /> +<tbody valign="top"> +<tr class="field"><th class="field-name">author:</th><td class="field-body">Michele Simionato</td> +</tr> +<tr class="field"><th class="field-name">date:</th><td class="field-body">December 2007</td> +</tr> +</tbody> +</table> +<p>This is the fifth of a series of articles on functional programming in +statically typed languages. It is intended for dynamic programmers, i.e. +for programmers with a background in dynamically typed languages, such as Perl, +Python, Ruby, or languages in the Lisp family. The approch is eminently practical +and example-based; the main goal is to see if we can stole some good idea from +statically typed languages. In order to be concrete, I will consider languages +in the ML family, because they are pretty nice and much easier to understand +that Haskell.</p> +<div class="section"> +<h2><a class="toc-backref" href="#id42" id="the-standard-ml-type-system" name="the-standard-ml-type-system">The Standard ML type system</a></h2> +<p>You, the dynamic programmer, are surely familiar with the object +system of languages such as Python and Ruby, or Common Lisp and +Smalltalk. In such languages you here often the mantra <em>everything is +an object</em> meaning that everything is a first class value which can +created and inspected at runtime. Moreover, any object has a class, +and classes themselves are objects, i.e. they are instances of +metaclasses. SML has a type system which is completely different from +the object system of dynamic language. There a lots of things which +are not first class values (structures, exceptions, signatures, ...) +and even types are such: in SML any value has a type, but types itself +are not values. Therefore passing the name of a type to the prompt +gives an error:</p> +<pre class="literal-block"> +- string; +1.0-1.6: unknown value or constructor `string' +</pre> +<p>Just as structures types live in a separated namespace. +SML types are not classes in any OO sense of the term, +you cannot introspect them, there are no methods and no inheritance, +thus the first step in order to learn the SML type system is to forget +everything you know about OOP. Having said that, you can certainly +program in an OO style in ML, but you don't use types for OOP, types are used +internally by the compiler, but from the user's point of view they are just +labels attached to values.</p> +<p>The type system of SML is extensible, and the user can define new types (i.e. +new labels) in terms of primitive types. The labels are just that; but together +with a new label, the definition of a new type includes the definition +of one or more functions, the <em>constructors</em> of the type, which are first class +values and extremely useful for pattern matching. +Let me give a concrete example: an +<tt class="docutils literal"><span class="pre">int_or_string</span></tt> datatype with two constructors <tt class="docutils literal"><span class="pre">INT</span></tt> and <tt class="docutils literal"><span class="pre">STR</span></tt>, defined +as follows:</p> +<pre class="literal-block"> +- datatype int_or_string = INT of int | STR of string; +datatype int_or_string = INT of int | STR of string +</pre> +<p>This definitions tells the compiler that the label <tt class="docutils literal"><span class="pre">int_or_string</span></tt> has to +be used when showing the signature of objects of the newly defined types; +but the important things are the constructors, which are able to convert (or <em>cast</em>) +primitive values to values of the new type:</p> +<pre class="literal-block"> +- val v1 = INT 1; +val v1 : int_or_string = INT 1 +- val v2 = STR "1"; +val v2 : int_or_string = STR "1" +</pre> +<p>Actually the constructors are perfectly ordinary functions:</p> +<pre class="literal-block"> +- INT; +val it : int -> int_or_string = _fn +- STR; +val it : string -> int_or_string = _fn +</pre> +<p>Constructors are essential since you can use them in <em>pattern matching</em>. +For instance, suppose you want to define an utility function casting integer to strings. +You can do so as follows:</p> +<pre class="literal-block"> +- fun valueToString (INT x) = Int.toString x + | valueToString (STR x) = x; +val valueToString : int_or_string -> string = _fn +</pre> +<p>Let me check that it works:</p> +<pre class="literal-block"> +- valueToString (INT 1); +val it : string = "1" +- valueToString (STR "1"); +val it : string = "1" +</pre> +<p>In essence, we have just implemented type dispatching: <tt class="docutils literal"><span class="pre">valueToString</span></tt> +is now emulating a C++ overloaded function (or a Lisp generic function) which +accepts both integers and strings and behaves differently according to the type. +This in spirit, but technically <tt class="docutils literal"><span class="pre">valueToString</span></tt> is just an ordinary SML function +which takes in input the <tt class="docutils literal"><span class="pre">int_or_string</span></tt> type, so we are somewhat cheating, +but it works ;)</p> +<p>Consider for instance the issue of defining heterogeneous collections, i.e. +collections containing different types; in SML we cannot define a list containing +an integer and a string:</p> +<pre class="literal-block"> +- [1, "two"]; + 1.0-1.2: ill-typed constructor argument: + int * string list +does not match argument type + int * int list +because type + string +does not unify with + int +</pre> +<p>but we can define:</p> +<pre class="literal-block"> +- [INT 1, STR "two"]; +val it : int_or_string list = [INT 1, STR "two"] +</pre> +<p>As Harper puts it, <em>heterogenity is a special case of homogenity</em>.</p> +<p>It is also possible to define parametric types, where the constructor(s) can +accept any type as argument. For instance, we can define a parametric box type:</p> +<pre class="literal-block"> +- datatype 'a box = Box of 'a; +datatype 'a box = Box of 'a + +- Box +val it : 'a -> 'a box = _fn +</pre> +<p><tt class="docutils literal"><span class="pre">box</span></tt> is a parametric type with constructor <tt class="docutils literal"><span class="pre">Box</span></tt> and parameter <tt class="docutils literal"><span class="pre">'a</span></tt> +(to be read <em>alpha</em>), which corresponds to a generic type, so that you +can define</p> +<pre class="literal-block"> +- Box(1); + val it : int box = Box 1 +- Box("hello"); + val it : string box = Box "hello" +- Box(fn x => 2*x); +val it : (int -> int) box = Box (_fn) +</pre> +<p>In technical terms, we say that SML support <em>parametric polymorphism</em>.</p> +<p>Notice that it is pretty easy to define a function extracting the inner value from a box:</p> +<pre class="literal-block"> +- fun unbox (Box x) = x; +</pre> +<p>The builtin <tt class="docutils literal"><span class="pre">Ref</span></tt> type works as a box and the builtin``!`` function works +as <tt class="docutils literal"><span class="pre">unbox</span></tt>, the different is that our <tt class="docutils literal"><span class="pre">Box</span></tt> is immutable, i.e. we do not +have an equivalent of the assigment function <tt class="docutils literal"><span class="pre">:=</span></tt>.</p> +</div> +<div class="section"> +<h2><a class="toc-backref" href="#id43" id="more-on-datatypes" name="more-on-datatypes">More on datatypes</a></h2> +<p>We may define composite types, i.e. types with +<em>syntactic arity</em> greater than one, like the following:</p> +<pre class="literal-block"> +- datatype string_int = STRING_INT of string * int; +datatype string_int = STRING_INT of string * int +</pre> +<p>The uppercase identifier is called the constructors of the datatype, and can +be used to make concrete instances of the type:</p> +<pre class="literal-block"> +- STRING_INT("hello", 1); +val it : string_int = STRING_INT ("hello", 1) +</pre> +<p>where the constructor is a first class value, being simply a function taking +a pair <tt class="docutils literal"><span class="pre">(string,</span> <span class="pre">int)</span></tt> as input:</p> +<pre class="literal-block"> +- STRING_INT; +val it : string * int -> string_int = _fn +</pre> +<p>A more interesting example could be a <em>named_function</em> type corresponding +to a pair <em>(function, name)</em>, where <em>function</em> can be of any functions, whereas +<em>name</em> is a string. +We can define it as follows:</p> +<pre class="literal-block"> +- datatype ('a, 'b) named_function = NAMED_FUNCTION of ('a->'b) * string; +datatype ('a, 'b) named_function = NAMED_FUNCTION of ('a -> 'b) * string +</pre> +<p><em>named_function</em> is a parametric type with parameter 'a (to be read <em>alpha</em>), +which corresponds to a generic type, and NAMED_FUNCTION is its associated +constructor:</p> +<pre class="literal-block"> +- NAMED_FUNCTION; +val it : ('a -> 'b) * string -> ('a, 'b) named_function = _fn +</pre> +<p>In other words, NAMED_FUNCTION is a function converting a pair (value, name), +where <em>value</em> can be of any type, into a <em>named_function</em> parametric type. +Here is an example:</p> +<pre class="literal-block"> +- NAMED_FUNCTION (fn x=>2*x, "double"); + (* giving a name to the function x=>2*x *) +val it = NAMED_FUNCTION (fn,"double") : (int,int) named_function +</pre> +<p>SML also allows to define enumeration types, like the following one:</p> +<pre class="literal-block"> +- datatype color = RED|GREEN|BLUE; +datatype color = BLUE | GREEN | RED +</pre> +<p>but for enumeration types the name is rather improper, since they are +just values:</p> +<pre class="literal-block"> +- RED; +val it : color = RED + +- GREEN; +val it : color = GREEN + +- BLUE; +val it : color = BLUE +</pre> +<p>Finally, SML let you define aliases for previously defined types, +or builtin types, by using the <tt class="docutils literal"><span class="pre">type</span></tt> keyword:</p> +<pre class="literal-block"> +- type c = color; +type c = color + +- type func=named_function; + type func=named_function +- type ls = List.list; + type ls = List.list +</pre> +<p>This is especially convenient when writing signatures.</p> +</div> +<div class="section"> +<h2><a class="toc-backref" href="#id44" id="poor-man-s-polymorphism" name="poor-man-s-polymorphism">Poor man's polymorphism</a></h2> +<p>Possibly the most used word in statically typed languages is <em>polymorphism</em>. +The word does not exist in dynamic languages, since dynamically typed languages +are polymorphic by design, whereas in statically typed languages you have +to work to achieve polymorphism, i.e. the ability to define functions accepting +generic types. In order to give a poor man' example, suppose we want to +define a few polymorphic utilities accepting both lists and vectors. We could +do so by defining a <tt class="docutils literal"><span class="pre">ListOrVector</span></tt> structure associated with a +<tt class="docutils literal"><span class="pre">list_or_vector</span></tt> datatype:</p> +<pre class="literal-block"> +- structure ListOrVector = struct + datatype 'a list_or_vector = LST of 'a list | VEC of 'a vector + type t = list_or_vector + fun length (LST x) = List.length x + | length (VEC x) = Vector.length x + fun sub (LST x, i) = List.sub (x,i) + | sub (VEC x, i) = Vector.sub (x,i) + end; + sig + datatype 'a list_or_vector = LST of 'a list | VEC of 'a vector + type t = ListOrVector.list_or_vector + val length : 'a ListOrVector.list_or_vector -> int + val sub : 'a ListOrVector.list_or_vector * int -> 'a + end +</pre> +<p>The structure <tt class="docutils literal"><span class="pre">ListOrVector</span></tt> is associated to the parametric datatype +<tt class="docutils literal"><span class="pre">'a</span> <span class="pre">list_or_vector</span></tt> and contains the two constructors <tt class="docutils literal"><span class="pre">ListOrVector.LST</span></tt> +and <tt class="docutils literal"><span class="pre">ListOrVector.VEC</span></tt>. Moreover we defined the label <tt class="docutils literal"><span class="pre">ListOrVector.t</span></tt> +to be an alias for <tt class="docutils literal"><span class="pre">ListOrVector.list_or_vector</span></tt>, for a reason that will become +clear in the next paragraph. Here are two examples of usage:</p> +<pre class="literal-block"> +- ListOrVector.length (ListOrVector.LST [1,2,3]); +val it : int = 3 + +- ListOrVector.length (ListOrVector.VEC #[1,2,3]); +val it : int = 3 +</pre> +<p>This approach to polymorphism works for simple things, but it is not practical, +nor extensible: this is the reason why SML provides an industrial strenght +mechanism to support polymorphism, <em>functors</em>.</p> +</div> +<div class="section"> +<h2><a class="toc-backref" href="#id45" id="input-and-output-revisited" name="input-and-output-revisited">Input and Output revisited</a></h2> +<p>In accordance with the example-oriented spirit of this series, I will +introduce functors with a motivating example, again in the arena of +input and output. +We saw in an earlier installament that the standard library provides two +structures <tt class="docutils literal"><span class="pre">TextIO</span></tt> and <tt class="docutils literal"><span class="pre">BinIO</span></tt> for managing text files and binary +files respectively; we also show that the two structures have many things +in common, and it is possibile to define a (sub)signature matching both.</p> +<p>Structures are not first class objects and +they cannot be passed to and returned from regular functions. +To circumvent this restriction, the authors of ML invented +the concept of <em>functor</em>, which is basically a <em>sui generis</em> function +taking structures in input and to returning structures in output. Functors +are not-first-class objects themselves and therefore they require a specific +declaration <tt class="docutils literal"><span class="pre">functor</span> <span class="pre">Name(Struct:SIGNATURE)</span> <span class="pre">=</span> <span class="pre">funct</span> <span class="pre">...</span> <span class="pre">end</span></tt>.</p> +<pre class="literal-block"> +signature SIMPLE_IO = sig + type instream + type outstream + type vector + val openIn: string-> instream + val closeIn: instream -> unit + val openOut: string-> outstream + val closeOut: outstream -> unit + val inputAll: instream -> vector + val output: outstream * vector -> unit + end + +functor ManagedIO(SimpleIO:SIMPLE_IO) = struct + + val inputAll = SimpleIO.inputAll + val output = SimpleIO.output + + fun withInputFile fname manage = let + val file = SimpleIO.openIn fname + in + manage file + finally SimpleIO.closeIn file + end + + fun withOutputFile fname manage = let + val file = SimpleIO.openOut fname + in + manage file + finally SimpleIO.closeOut file + end +end + +</pre> +<p>The functor here is important, since it shows how it is possible to write +generic code in SML. In this case, I have just written a library which is +able to wrap <em>any</em> structure matching the <tt class="docutils literal"><span class="pre">SimpleIO</span></tt> interface, and I have +avoided a potentially infinite code duplication.</p> +<p>The specification of which specific structure to use will be the job of the client +code, not of the library author; in particular a particular user of the library +may be interested in specializing it both for <tt class="docutils literal"><span class="pre">TextIO</span></tt> and <tt class="docutils literal"><span class="pre">BinIO</span></tt> +by writing:</p> +<pre class="literal-block"> +- structure T = ManagedIO(TextIO) +- structure B = ManagedIO(BinIO) +</pre> +<p>The operation of specializing a functor is also called <em>functor instantiation</em>; +since it happens in a structure declaration it is performed by the compiler +<em>at compile time</em>. The advantage is that the compiler can generate different optimized +code for the structures <tt class="docutils literal"><span class="pre">T</span></tt> and <tt class="docutils literal"><span class="pre">B</span></tt> in the <em>client</em> program.</p> +<blockquote> +<tt class="docutils literal"><span class="pre">-</span> <span class="pre">T.withInputFile</span> <span class="pre">"three-lines.txt"</span> <span class="pre">(print</span> <span class="pre">o</span> <span class="pre">T.inputAll)</span></tt></blockquote> +<hr class="docutils" /> +<blockquote> +<em>Such things are called individuals because each of them consists +of characteristics the collection of which will never be the +same for anything else. For the characteristics of Socrates will +never be in any other particular. But the characteristics of man — +I mean of the man that is general — will be the same in +several things, or rather in all particular men insofar as they +are men.</em> -- Porphyry</blockquote> +</div> +</div> +</div> +</body> +</html> diff --git a/ml/index.txt b/ml/index.txt new file mode 100644 index 0000000..b67e4fc --- /dev/null +++ b/ml/index.txt @@ -0,0 +1,7 @@ +.. contents:: + +.. include:: intro.txt +.. include:: loops.txt +.. include:: modules.txt +.. include:: text.txt +.. include:: types.txt diff --git a/ml/simple-io.aml b/ml/simple-io.aml new file mode 100644 index 0000000..af5aab5 --- /dev/null +++ b/ml/simple-io.aml @@ -0,0 +1,31 @@ +signature SIMPLE_IO = sig + type instream + type outstream + type vector + val openIn: string-> instream + val closeIn: instream -> unit + val openOut: string-> outstream + val closeOut: outstream -> unit + val inputAll: instream -> vector + val output: outstream * vector -> unit + end + +functor ManagedIO(SimpleIO:SIMPLE_IO) = struct + + val inputAll = SimpleIO.inputAll + val output = SimpleIO.output + + fun withInputFile fname manage = let + val file = SimpleIO.openIn fname + in + manage file + finally SimpleIO.closeIn file + end + + fun withOutputFile fname manage = let + val file = SimpleIO.openOut fname + in + manage file + finally SimpleIO.closeOut file + end +end |