diff options
Diffstat (limited to 'ghc/CONTRIB')
59 files changed, 3734 insertions, 0 deletions
diff --git a/ghc/CONTRIB/README b/ghc/CONTRIB/README new file mode 100644 index 0000000000..df029e90e4 --- /dev/null +++ b/ghc/CONTRIB/README @@ -0,0 +1,17 @@ +This directory contains contributed software/bits related to the +Glasgow Haskell compiler. + +fptags Denis Howe <dbh@doc.ic.ac.uk> + Bourne-shell script. + Create an emacs tags file for one or more functional programs. + +haskell.el A Haskell mode from Simon Marlow <simonm@dcs.glasgow.ac.uk>. + +haskel.gif Provided by Lennart Augustsson <augustss@cs.chalmers.se> + +mira2hs Denis Howe <dbh@doc.ic.ac.uk> + Bourne-shell script. + Convert Miranda code to Haskell, more-or-less. + +pphs Pretty-print Haskell code in LaTeX documents. Written by + Andrew Preece while a student at Glasgow. diff --git a/ghc/CONTRIB/fptags b/ghc/CONTRIB/fptags new file mode 100644 index 0000000000..be4b5a5c30 --- /dev/null +++ b/ghc/CONTRIB/fptags @@ -0,0 +1,53 @@ +#!/bin/sh + +#fptags - Create an emacs tags file for functional programs + +#Please send me a copy of any modifications you make. +#Denis Howe <dbh@doc.ic.ac.uk> +#0.00 20-Sep-1991 created +#0.01 09-Apr-1992 don't count ==, <=, >= as definition +#0.02 09-Feb-1994 fix bug in fix 0.01. Add /=. + +# partain: got it from wombat.doc.ic.ac.uk:pub + +#The algorithm for spotting identifiers is crude to the point of +#vulgarity. Any line containing an = is assumed to define an +#identifier. If there are no non-white characters before the = then +#the definition is assumed to start on the previous line. White +#characters are space, tab and > (for literate programs). The =s in +#the relations ==, <=, >= and /= are temporarily transformed while +#searching for =s. + +#The tags file is not in the format produced by ctags but rather, +#that produced by etags and used by GNU-Emacs's find-tag command. + +#Does not tag constructors in sum data types. + +#The tags file, TAGS, is created in the current directory. It +#contains an entry for each argument file. The entry begins with a +#line containing just ^L. The next line contains the filename, a +#comma and the number of following bytes before the next ^L or EOF. +#Subsequent lines should give the location within the argument file of +#identifier definitions. Each line contains a prefix of a line from +#the argument file, a ^?, the line number within the argument file, a +#comma and the position of the start of that line in the argument file +#(first character = 1). + +[ -z "$1" ] && echo usage: $0 files && exit 1 +exec > TAGS +tf=/tmp/fp$$ +for f +do echo "" + sed 's/==//g + s/>=/>/g + s/<=/</g + s|/=|/|g' $f | awk ' + /^[> ]*=/{ print prevline "" NR-1 "," prevpos; } + /[^> ].*=/{ print $0 "" NR "," pos; } + { prevline = $0; prevpos = pos; pos += length($0)+1; } + ' pos=1 | sed 's/[ )]*=.*// + s//=/g' > $tf + echo -n $f,; echo `wc -c < $tf` #lose spaces + cat $tf +done +rm -f $tf diff --git a/ghc/CONTRIB/haskel.gif b/ghc/CONTRIB/haskel.gif Binary files differnew file mode 100644 index 0000000000..89b20abefc --- /dev/null +++ b/ghc/CONTRIB/haskel.gif diff --git a/ghc/CONTRIB/haskell.el b/ghc/CONTRIB/haskell.el new file mode 100644 index 0000000000..43461eb69f --- /dev/null +++ b/ghc/CONTRIB/haskell.el @@ -0,0 +1,185 @@ +;;; Haskell mode for emacs (c) Simon Marlow 11/1/92 + +;;; To: partain@dcs.gla.ac.uk +;;; Subject: Haskell mode for emacs +;;; Date: Mon, 14 Dec 92 17:41:56 +0000 +;;; From: Simon Marlow <simonm@dcs.gla.ac.uk> +;;; +;;; ... What it buys you: very little actually, but the nice things are +;;; +;;; (i) Pressing line feed indents the next line according to the +;;; previous one, +;;; (ii) Pressing Meta-; gives you a comment on the current line, +;;; (iii) For literate scripts, pressing line feed gives you a bird +;;; track on the next line if there was one on the previous +;;; line, and does the indentation +;;; (iv) For literate scripts, pressing Meta-Tab toggles a bird track +;;; on or off at the beginning of the current line, +;;; (v) There's a function for toggling bird tracks on all lines in a +;;; region. +;;; (vi) Emacs says "Haskell" or "Literate Haskell" in the mode line :-) +;;; +;;; You'll have to make the necessary changes in .emacs to load in the +;;; library automatically (you probably know what to do). ... + +(defvar haskell-mode-map () + "Keymap used in Haskell mode.") + +(defvar haskell-literate-mode-map () + "Keymap used in Haskell literate script mode.") + +(defvar haskell-mode-syntax-table () + "Syntax table for haskell mode.") + +(if haskell-mode-map + () + (setq haskell-mode-map (make-sparse-keymap)) + (define-key haskell-mode-map "\C-j" 'haskell-newline-and-indent)) + +(if haskell-literate-mode-map + () + (setq haskell-literate-mode-map (make-sparse-keymap)) + (define-key haskell-literate-mode-map "\C-j" 'haskell-literate-newline-and-indent) + (define-key haskell-literate-mode-map "\M-\C-i" 'haskell-literate-toggle-bird-track-line)) + +(if haskell-mode-syntax-table + () + (let ((i 0)) + (setq haskell-mode-syntax-table (make-syntax-table)) + (while (< i ?0) + (modify-syntax-entry i "." haskell-mode-syntax-table) + (setq i (1+ i))) + (while (< i (1+ ?9)) + (modify-syntax-entry i "_" haskell-mode-syntax-table) + (setq i (1+ i))) + (while (< i ?A) + (modify-syntax-entry i "." haskell-mode-syntax-table) + (setq i (1+ i))) + (while (< i (1+ ?Z)) + (modify-syntax-entry i "w" haskell-mode-syntax-table) + (setq i (1+ i))) + (while (< i ?a) + (modify-syntax-entry i "." haskell-mode-syntax-table) + (setq i (1+ i))) + (while (< i (1+ ?z)) + (modify-syntax-entry i "w" haskell-mode-syntax-table) + (setq i (1+ i))) + (while (< i 128) + (modify-syntax-entry i "." haskell-mode-syntax-table) + (setq i (1+ i))) + (modify-syntax-entry ? " " haskell-mode-syntax-table) + (modify-syntax-entry ?\t " " haskell-mode-syntax-table) + (modify-syntax-entry ?\n ">" haskell-mode-syntax-table) + (modify-syntax-entry ?\f ">" haskell-mode-syntax-table) + (modify-syntax-entry ?\" "\"" haskell-mode-syntax-table) + (modify-syntax-entry ?\' "w" haskell-mode-syntax-table) + (modify-syntax-entry ?_ "w" haskell-mode-syntax-table) + (modify-syntax-entry ?\\ "." haskell-mode-syntax-table) + (modify-syntax-entry ?\( "()" haskell-mode-syntax-table) + (modify-syntax-entry ?\) ")(" haskell-mode-syntax-table) + (modify-syntax-entry ?\[ "(]" haskell-mode-syntax-table) + (modify-syntax-entry ?\] ")[" haskell-mode-syntax-table) + (modify-syntax-entry ?{ "(}1" haskell-mode-syntax-table) + (modify-syntax-entry ?} "){4" haskell-mode-syntax-table) + (modify-syntax-entry ?- "_ 123" haskell-mode-syntax-table) + )) + +(defun haskell-vars () + (kill-all-local-variables) + (make-local-variable 'paragraph-start) + (setq paragraph-start (concat "^$\\|" page-delimiter)) + (make-local-variable 'paragraph-separate) + (setq paragraph-separate paragraph-start) + (make-local-variable 'comment-start) + (setq comment-start "--") + (make-local-variable 'comment-start-skip) + (setq comment-start-skip "--[^a-zA-Z0-9]*") + (make-local-variable 'comment-column) + (setq comment-column 40) + (make-local-variable 'comment-indent-hook) + (setq comment-indent-hook 'haskell-comment-indent)) + +(defun haskell-mode () + "Major mode for editing Haskell programs. +Blank lines separate paragraphs, Comments start with '--'. +Use Linefeed to do a newline and indent to the level of the previous line. +Tab simply inserts a TAB character. +Entry to this mode calls the value of haskell-mode-hook if non-nil." + (interactive) + (haskell-vars) + (setq major-mode 'haskell-mode) + (setq mode-name "Haskell") + (use-local-map haskell-mode-map) + (set-syntax-table haskell-mode-syntax-table) + (run-hooks 'haskell-mode-hook)) + +(defun haskell-literate-mode () + "Major mode for editing haskell programs in literate script form. +Linefeed produces a newline, indented maybe with a bird track on it. +M-TAB toggles the state of the bird track on the current-line. +Entry to this mode calls haskell-mode-hook and haskell-literate-mode-hook." + (interactive) + (haskell-vars) + (setq major-mode 'haskell-literate-mode) + (setq mode-name "Literate Haskell") + (use-local-map haskell-literate-mode-map) + (set-syntax-table haskell-mode-syntax-table) + (run-hooks 'haskell-mode-hook) + (run-hooks 'haskell-literate-mode-hook)) + +;; Find the indentation level for a comment.. +(defun haskell-comment-indent () + (skip-chars-backward " \t") + ;; if the line is blank, put the comment at the beginning, + ;; else at comment-column + (if (bolp) 0 (max (1+ (current-column)) comment-column))) + +;; Newline, and indent according to the previous line's indentation. +;; Don't forget to use 'indent-tabs-mode' if you require tabs to be used +;; for indentation. +(defun haskell-newline-and-indent () + (interactive) + (newline) + (let ((c 0)) + (save-excursion + (forward-line -1) + (back-to-indentation) + (setq c (if (eolp) 0 (current-column)))) + (indent-to c))) ;ident new line to this level + +;;; Functions for literate scripts + +;; Newline and maybe add a bird track, indent +(defun haskell-literate-newline-and-indent () + (interactive) + (newline) + (let ((bird-track nil) (indent-column 0)) + (save-excursion + (forward-line -1) + (if (= (following-char) ?>) (setq bird-track t)) + (skip-chars-forward "^ \t") + (skip-chars-forward " \t") + (setq indent-column (if (eolp) 0 (current-column)))) + (if bird-track (insert-char ?> 1)) + (indent-to indent-column))) + +;; Toggle bird-track ][ +(defun haskell-literate-toggle-bird-track-line () + (interactive) + (save-excursion + (beginning-of-line) + (if (= (following-char) ? ) + (progn (delete-char 1) (insert-char ?> 1)) + (if (= (following-char) ?>) + (progn (delete-char 1) (insert-char ? 1)) + (progn (insert-char ?> 1) (insert-char ? 1)))))) + +(defun haskell-literate-toggle-bird-track-region (start end) + (interactive "r") + (save-excursion + (goto-char start) + (while (<= (point) end) + (beginning-of-line) + (haskell-literate-toggle-bird-track-line) + (forward-line 1)))) + diff --git a/ghc/CONTRIB/haskell_poem b/ghc/CONTRIB/haskell_poem new file mode 100644 index 0000000000..1f8218648a --- /dev/null +++ b/ghc/CONTRIB/haskell_poem @@ -0,0 +1,58 @@ +From: dsmith@lucy.cs.waikato.ac.nz +Subject: A Haskell Lover's Plea +Date: Thu, 16 Mar 1995 21:06:35 -0500 +To: haskell-dist@dcs.gla.ac.uk + + + A Haskell Lover's Plea + +Why should I renounce for you, dear Haskell, +My much yearned for side-effects? +Why should I face the software dragons +Without my weapon, my manly spear of destruction? +They call you non-strict, oh so elegant and pure Ariel. +Yet side-effect celibacy is surely severe. + + Your flesh is too weak, you brutish beast. + The tarpit demons of software hell await you! + This sinful habit in which you indulge + Does more harm than good. + Restrain yourself! And you too will see + The wondrous and refined joys of referential transparency! + +Alas, I can do without goto, without call/cc. +But sans side-effects, I am lost and forlorn, can't you see? +Oh, lady fairer yet than admirable Miranda (tm), +Scheme's prolix, parenthetical tedium +Is no match for your elegant syntax. What's more, +Your list comprehensions outshine even Prolog for sure... + + Ah, flatter me not, you low-spirited Caliban! + Do you not know what advantages await + Those who renounce destructive update? + Start with an immaculate high-level specification, + Throw in some algebraic code transformation. + Soon you will have a provably correct and maintainable implementation. + +Show mercy on mere mortals like me! +How I dream still of the efficient pleasures of pointer manipulation! +How I too wish to mutate memory with thoughts born of von Neumann earthiness! +Relent! Relent! Let me have my assignment, my printf, my gensym. +Let me fulfill my destructive impulses. +Let me set bang. Let me update. Let me assign. Let me mutate. + + Fear not, lowly beast, I have heard your pleas. + To satisfy your low-level desire + I'll give you monads, linear types, MADTs, + Even single-threaded polymorphic lambda calculi. + My beauty may suffer, still I will aspire + To let you do (within typeful limits) what you please. + +Rejoice! Rejoice! I'm free! I'm free! +The best of both worlds is mine at last. +Oh, infinite progeny of Church, Hope, and ML, +I curry favor not when I say: +Scan me right, fold me left, +Lazy lady of many shapes, you've got class. + + Don Smith (dsmith@cs.waikato.ac.nz) diff --git a/ghc/CONTRIB/mira2hs b/ghc/CONTRIB/mira2hs new file mode 100644 index 0000000000..1ad61040f7 --- /dev/null +++ b/ghc/CONTRIB/mira2hs @@ -0,0 +1,364 @@ +#!/bin/sh + +# mira2hs - Convert Miranda to Haskell (or Gofer) + +# usage: mira2hs [infile [outfile]] +# +# Input defaults to stdin, output defaults to <infile>.hs or stdout if +# input is stdin + +# Copyright Denis Howe 1992 +# +# Permission is granted to make and distribute verbatim or modified +# copies of this program, provided that every such copy or derived +# work carries the above copyright notice and is distributed under +# terms identical to these. +# +# Miranda is a trademark of Research Software Limited. +# (E-mail: mira-request@ukc.ac.uk). +# +# Denis Howe <dbh@doc.ic.ac.uk> + +# NOTE: This program needs a sed which understands \<word\> regular +# expressions, eg. Sun or GNU sed (gsed). + +# partain: got it from wombat.doc.ic.ac.uk:pub + +# 1.05 18 Sep 1992 zip -> zipPair +# 1.04 29 Jul 1992 Improve handling of ==, -- and whitespace round guards +# $infix -> `infix` +# 1.03 24 Apr 1992 Incorporate Lennart's miranda.hs functions +# Replace most Miranda fns & operators +# Use \<word\> patterns, ';' -> ',' in list comprehension +# Provide example main functions +# 1.02 30 Mar 1992 Mods to header, fix handling of type,type +# Comment out String definition, Bool ops +# num -> Int, = -> == in guards +# 1.01 10 Dec 1991 Convert type names to initial capital +# 1.00 27 Sep 1991 Initial version advertised to net + +# Does NOT handle: +# continued inequalities (a < x < b) +# boolean '=' operator -> '==' (except in guards) +# main function +# multi-line type definitions +# guards on different line from body +# diagonalised list comprehensions (//) +# repeated variables in patterns (eg. LHS of function) +# filemode -> statusFile, getenv -> getEnv, read -> readFile, system +# include directives +# conflicts with prelude identifiers + +# Miranda's num type (Integral+Floating) is changed to Int so won't +# work for non-intger nums. Miranda has irrefutable ("lazy") tuple +# patterns so you may need to add a ~, like ~(x,y) in Haskell. +# Haskell functions "length" and "not" may need parentheses round +# their arguments. + +# mira2hs copes equally well with literate and illiterate scripts. It +# doesn't care what characters lines begins with - it assumes +# everything is code. It will convert code even inside comments. +# +# For literate programs you will have to turn the standard header into +# literate form and rename the output .lhs. You might want to do this +# to (a copy of) mira2hs itself if you have lots of literate progs. + +# ToDo: = inside brackets -> == + +if [ -n "$1" ] +then in=$1 + out=`basename $in .m`.hs +else in="Standard input" +fi +[ -n "$2" ] && out=$2 +tmp=/tmp/m2h$$ +script=${tmp}s + +# Prepend a standard header and some function definitions. +echo -- $in converted to Haskell by $USER on `date` > $tmp +cat << "++++" >> $tmp +module Main (main) where + +-------------------- mira2hs functions -------------------- + +cjustify :: Int -> String -> String +cjustify n s = spaces l ++ s ++ spaces r + where + m = n - length s + l = div m 2 + r = m - l + +e :: (Floating a) => a +e = exp 1 + +hugenum :: (RealFloat a) => a +hugenum = encodeFloat (r^d-1) (n-d) + where r = floatRadix hugenum + d = floatDigits hugenum + (_,n) = floatRange hugenum + +subscripts :: [a] -> [Int] -- Miranda index +subscripts xs = f xs 0 + where f [] n = [] + f (_:xs) n = n : f xs (n+1) + +integer :: (RealFrac a) => a -> Bool +integer x = x == fromIntegral (truncate x) + +lay :: [String] -> String +lay = concat . map (++"\n") + +layn :: [String] -> String +layn = concat . zipWith f [1..] + where + f :: Int -> String -> String + f n x = rjustify 4 (show n) ++ ") " ++ x ++ "\n" + +limit :: (Eq a) => [a] -> a +limit (x:y:ys) | x == y = x + | otherwise = limit (y:ys) +limit _ = error "limit: bad use" + +ljustify :: Int -> String -> String +ljustify n s = s ++ spaces (n - length s) + +member :: (Eq a) => [a] -> a -> Bool +member xs x = elem x xs + +merge :: (Ord a) => [a] -> [a] -> [a] +merge [] ys = ys +merge xs [] = xs +merge xxs@(x:xs) yys@(y:ys) | x <= y = x : merge xs yys + | otherwise = y : merge xxs ys + +numval :: (Num a) => String -> a +numval cs = read cs + +postfix :: [a] -> a -> [a] +postfix xs x = xs ++ [x] + +rep :: Int -> b -> [b] +rep n x = take n (repeat x) + +rjustify :: Int -> String -> String +rjustify n s = spaces (n - length s) ++ s + +seq :: (Eq a) => a -> b -> b +seq x y = if x == x then y else y + +shownum :: (Num a) => a -> String +shownum x = show x + +sort :: (Ord a) => [a] -> [a] +sort x | n <= 1 = x + | otherwise = merge (sort (take n2 x)) (sort (drop n2 x)) + where n = length x + n2 = div n 2 +spaces :: Int -> String +spaces 0 = "" +spaces n = ' ' : spaces (n-1) + +tinynum :: (RealFloat a) => a +tinynum = encodeFloat 1 (n-d) + where r = floatRadix tinynum + d = floatDigits tinynum + (n,_) = floatRange tinynum + +undef :: a +undef = error "undefined" + +zipPair (x,y) = zip x y + +-- Following is UNTESTED +data Sys_message = + Stdout String | Stderr String | Tofile String String | + Closefile String | Appendfile String | +-- System String | + Exit Int + +doSysMessages :: [Sys_message] -> Dialogue +doSysMessages requests responses = doMsgs requests [] + +doMsgs [] afs = [] +doMsgs ((Appendfile f):rs) afs = doMsgs rs (f:afs) +doMsgs ((Exit n) :rs) afs = [] +doMsgs (r :rs) afs + = doMsg r : doMsgs rs afs + where doMsg (Stdout s) = AppendChan stdout s + doMsg (Stderr s) = AppendChan stderr s + doMsg (Tofile f s) | elem f afs = AppendFile f s + | otherwise = WriteFile f s + doMsg (Closefile f) + = error "doSysMessages{mira2hs}: Closefile sys_message not supported" +-- doMsg (Closefile f) = CloseFile f -- optional +-- doMsg (System cmd) +-- = error "doSysMessages{mira2hs}: System sys_message not supported" + +-- Pick a main. (If I was clever main would be an overloaded fn :-). +main :: Dialogue +-- main = printString s -- s :: String +-- main = interact f -- f :: String -> String +-- main = doSysMessages l -- l :: [Sys_message] +-- main = print x -- x :: (Text a) => a + +printString :: String -> Dialogue +printString s = appendChan stdout s abort done + +-------------------- mira2hs functions end -------------------- + +++++ +# It's amazing what sed can do. +sed ' +# Type synonyms and constructed types: insert "type" or "data". Add a +# dummy :: to flag this line to the type name munging below. Beware +# ====== in comments. +/[^=]==[^=]/s/\(.*=\)=/::type \1/g +/::=/s/\(.*\)::=/::data \1=/g +# Change type variable *s to "a"s +/::/s/\*/a/g +# List length & various other renamed functions (# reused below). +s/ *# */ length /g +s/\<arctan\>/atan/g +s/\<code\>/ord/g +s/\<converse\>/flip/g +s/\<decode\>/chr/g +s/\<dropwhile\>/dropWhile/g +s/\<digit\>/isDigit/g +s/\<entier\>/floor/g +s/\<hd\>/head/g +s/\<index\>/subscripts/g +s/\<letter\>/isAlpha/g +s/\<map2\>/zipWith/g +s/\<max\>/maximum/g +s/\<max2\>/max/g +s/\<min\>/minimum/g +s/\<min2\>/min/g +s/\<mkset\>/nub/g +s/\<neg\>/negate/g +s/\<scan\>/scanl/g +s/\<tl\>/tail/g +# Miranda uncurried zip -> zipPair (above). Do before zip2->zip. +s/\<zip\>/zipPair/g +# Miranda curried zip2 -> zip +s/\<zip2\>/zip/g +# Haskel div and mod are functions, not operators +s/\<div\>/\`div\`/g +s/\<mod\>/\`mod\`/g +# Locate commas introducing guards by temporarily changing others. +# Replace comma with # when after || or unmatched ( or [ or before +# unmatched ) or ] or in string or char constants. Replace +# matched () not containing commas with _<_ _>_ and matched [] +# with _{_ _}_ and repeat until no substitutions. +: comma +s/\(||.*\),/\1#/g +s/\([[(][^])]*\),/\1#/g +s/,\([^[(]*[])]\)/#\1/g +s/(\([^),]*\))/_<_\1_>_/g +s/\[\([^],]*\)\]/_{_\1_}_/g +s/"\(.*\),\(.*\)"/"\1#\2"/g +'"#change quotes +s/','/'#'/g +"'#change quotes +t comma +# Restore () and [] +s/_<_/(/g +s/_>_/)/g +s/_{_/[/g +s/_}_/]/g +# The only commas left now introduce guards, remove optional "if" +s/,[ ]*if/,/g +s/[ ]*,[ ]*/,/g +# Temporarily change ~=, <=, >=. +s%~=%/_eq_%g +s/<=/<_eq_/g +s/>=/>_eq_/g +# Replace every = in guard with == (do after type synonyms) +: neq +s/\(,.*[^=]\)=\([^=]\)/\1==\2/ +t neq +# Fix other equals +s/_eq_/=/g +# Replace <pattern> = <rhs> , <guard> with <pattern> | (<guard>) = <rhs> +s/=\(..*\),\(..*\)/| (\2) =\1/g +s/(otherwise)/otherwise/g +# Restore other commas +s/#/,/g +# List difference. Beware ------ in comments. +s/\([^-]\)--\([^-]\)/\1\\\\\2/g +# Comments (do after list diff) +s/||/--/g +s/--|/---/g +# Boolean not, or, and (do after comments) +s/ *~ */ not /g +s% *\\/ *% || %g +s/&/&&/g +# list indexing +s/!/!!/g +# Locate semicolon in list comprehensions by temporarily replacing ones +# in string or char constants with #. Replace matched [] not +# containing semicolon with _{_ _}_ and repeat until no substitutions. +: semico +s/\[\([^];]*\)\]/_{_\1_}_/g +s/"\([^;"]*\);\([^;"]*\)"/"\1#\2"/g +'"#change quotes +s/';'/'#'/g +"'# change quotes +t semico +# Remaining [ ] must contain semicolons which we change to comas. +: lcomp +s/\(\[[^;]*\);/\1,/g +s/;\([^;]*\]\)/,\1/g +t lcomp +# Restore [] and other semicolons +s/_{_/[/g +s/_}_/]/g +s/#/;/g +# Miranda dollar turns a function into an infix operator +s/\$\([_A-Za-z0-9'\'']\{1,\}\)/`\1`/g +' $1 >> $tmp + +# Create a sed script to change the first letter of each type name to +# upper case. +# Dummy definitions for predefined types (num is special). +( + echo ::type char = + echo ::type bool = + echo ::type sys_message = + cat $tmp +) | \ +# Find type definitions & extract type names +sed -n '/::data[ ].*=/{ +h;s/::data[ ]*\([^ =]\).*/\1/p +y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/;p +g;s/::data[ ]*[^ =]\([^ =]*\).*=.*/\1/p +} +/::type[ ].*=/{ +h;s/::type[ ]*\([^ =]\).*/\1/p +y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/;p +g;s/::type[ ]*[^ =]\([^ =]*\).*=.*/\1/p +}' | \ +# Read lower case initial, upper case inital and rest of type name. +# Type is always after "::". +( +echo ": loop" +while read h; read H; read t +do echo "/::/s/\<$h$t\>/$H$t/g" +done +cat << "++++" +# num -> Int +/::/s/\<num\>/Int/g +# Loop round to catch type,type,.. +t loop +# Remove the dummy :: flags from type definitions. +s/::type/type/ +s/::data/data/ +# Comment out string type if defined. +s/\(type[ ]*String[ ]*=\)/-- \1/ +++++ +) > $script + +if [ "$out" ] +then exec > $out +fi +sed -f $script $tmp +rm -f ${tmp}* diff --git a/ghc/CONTRIB/pphs/Jmakefile b/ghc/CONTRIB/pphs/Jmakefile new file mode 100644 index 0000000000..24d546c7e7 --- /dev/null +++ b/ghc/CONTRIB/pphs/Jmakefile @@ -0,0 +1,16 @@ +SuffixRule_c_o() + +BuildPgmFromOneCFile(pphs) + +InstallBinaryTarget(pphs,$(INSTBINDIR_GHC)) + +/* These .dvi-ish rules are not right, but so what? [WDP 94/09] */ + +docs/UserGuide.dvi: docs/UserGuide.tex + $(RM) $@ + (cd docs && ../$(LTX) UserGuide.tex) + +/* Student project final report */ +docs/Report.dvi: docs/Report.tex + $(RM) $@ + (cd docs && ../$(LTX) Report.tex) diff --git a/ghc/CONTRIB/pphs/README b/ghc/CONTRIB/pphs/README new file mode 100644 index 0000000000..a99d81e8f5 --- /dev/null +++ b/ghc/CONTRIB/pphs/README @@ -0,0 +1,18 @@ +"pphs" is a Haskell code pretty-printer, written by Andrew Preece as a +senior honours project at Glasgow. + +== original README ======================================== + +* * RELEASE directory * * + +To find out how to use pphs read the User Guide by +typing xdvi User_Guide + +If you put the output of pphs into a file called Haskell.tex +then you can use Wrapper.tex to produce a ``stand alone'' +dvi file of your program. Just run latex on Wrapper.tex +by typing latex Wrapper then view with xdvi Wrapper + +* * MAINTENANCE directory * * + +Code for pphs program, LaTeX file, report, Makefile, etc. diff --git a/ghc/CONTRIB/pphs/docs/Code.tex b/ghc/CONTRIB/pphs/docs/Code.tex new file mode 100644 index 0000000000..5437457350 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/Code.tex @@ -0,0 +1,53 @@ +\chapter{Project code} + +\section{The program code - {\tt pphs.c}} \label{prog-code} + +\newpage % 8 pages of code a2ps (21.4.94) +\setcounter{page}{50} + +\section{The style file - {\tt pphs.sty}} \label{style-code} + +\begin{verbatim} +% ========================================= +% Definitions for use with the pphs program +% ========================================= + +\typeout{For use with the pphs program} + +% Definitions of commands used by pphs + +\newbox\foo +\def\skipover#1{\setbox\foo\hbox{#1}\hskip\wd\foo} +\def\plusplus{\hbox{$+\mkern-7.5mu+$}} +\def\xspa#1{\hskip#1ex} +\def\bareq{\setbox\foo\hbox{$=$}\makebox[\wd\foo]{$|$}} + +% User-redefinable commands - typefaces + +\def\keyword{\bf} +\def\iden{\it} +\def\stri{\rm} +\def\com{\rm} +\def\numb{\rm} + +% User-redefinable commands - quote marks + +\def\forquo{\hbox{\rm '}} +\def\escquo{\hbox{\rm '}} +\end{verbatim} + +\section{The make file - {\tt Makefile}} \label{make-code} + +\begin{verbatim} +# Makefile for A Preece's program... etc. + +default: + @echo "Type make pphs to create the program." + +pphs: pphs.c + cc -o pphs pphs.c + +test: pphs + pphs test + latex test.tex +\end{verbatim}
\ No newline at end of file diff --git a/ghc/CONTRIB/pphs/docs/Error_Messages.tex b/ghc/CONTRIB/pphs/docs/Error_Messages.tex new file mode 100644 index 0000000000..e53c960eb9 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/Error_Messages.tex @@ -0,0 +1,36 @@ +\chapter{Error messages given} + +The {\tt pphs} program generates error messages to {\tt stderr}, +with error codes. Normal operation of the program will be +indicated by error code {\tt 0}. + +\section{\tt Call with one file name} + +Error code {\tt 1} is produced when {\tt pphs} is not called with +exactly one filename. Either no filename was given, or too many +filenames were given. Call {\tt pphs} again with one filename. + +\section{\tt File could not be opened} + +Error code {\tt 2} is produced when the filename given when {\tt pphs} +was called could not be opened. This could be because it did not exist, +or was read-protected. Call {\tt pphs} again with a filename that exists +and is readable. + +\section{\tt Stack is too big} + +Error code {\tt 3} is produced when the program has used up too much of +the computer's memory. It is not possible to run {\tt pphs} on this file +without getting more memory for the computer to use. + +\section{\tt Queue is too big} + +Error code {\tt 4} is produced when the program has used up too much of +the computer's memory. It is not possible to run {\tt pphs} on this file +without getting more memory for the computer to use. + +\section{\tt Stack underflow} + +Error code {\tt 5} is produced when the program attempts to remove an item +from a stack in memory that doesn't exist. This should not happen in the +{\tt pphs} program. diff --git a/ghc/CONTRIB/pphs/docs/External_Specification.tex b/ghc/CONTRIB/pphs/docs/External_Specification.tex new file mode 100644 index 0000000000..4190680670 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/External_Specification.tex @@ -0,0 +1,117 @@ +\section{External specification} + +The program is to be run in UNIX by typing {\tt pphs} followed by the +filename containing the Haskell code requiring to be typeset. This will +produce the \LaTeX\ code to stdout. If there is some error, +a suitable error message is to be printed to stderr. The user may, if +desired, direct the output to another file by typing {\tt pphs infilename > outfilename}. +In this case, any error messages must still go to the screen and not the file. + +The input filename may be given in its entirety or the {\tt .hs} extension may be omitted. +In the case where there are two files with the same name, except that one has the +{\tt .hs} extension, to run the program on the file with the extension to its name +the complete filename will be typed. + +The output will consist of the \LaTeX\ code to produce the typeset Haskell +code. As this is to be made easily insertable into another \LaTeX\ document, the output +will not contain any header information such as declarations or definitions. These, +however, can +be contained in a style file which the user will include in their main document. + +Keywords and identifiers are to be distinguished in the result as typeset. +The default for keywords is to be boldface and for identifiers italics. +Numbers not forming part of an identifier are to be in roman by default +while math is to be used where appropriate. + +Haskell uses ASCII characters and combinations of ASCII characters +to substitute for mathematical characters not present on the +keyboard. Where this happens, the program is to replace the ASCII character(s) +with the corresponding mathematical character using the special \LaTeX\ commands +to generate them. The single characters are: +\begin{quote} +\begin{tabular}[t]{@{}cc@{}} +Haskell & Math\\ +{\tt *} & $\times$ +\end{tabular} +\end{quote} +The double characters are: +\begin{quote} +\begin{tabular}[t]{@{}cc@{}} +Haskell & Math\\ +{\tt ++} & {\hbox{$+\mkern-7.5mu+$}}\\ +{\tt :+} & {:}{+}\\ +{\tt <=} & $\leq$\\ +{\tt >=} & $\geq$\\ +{\tt <-} & $\leftarrow$\\ +{\tt ->} & $\rightarrow$\\ +{\tt =>} & $\Rightarrow$ +\end{tabular} +\end{quote} + +The \LaTeX\ system uses special characters to aid with the typesetting. +They are: +\begin{quote} +\(\#\ \$\ \%\ \&\ \char'176\ \_\ \char'136\ \hbox{$\setminus$}\ \hbox{$\cal \char'146\ \char'147$}\) +\end{quote} +These characters may +appear in the input, so the program must generate the correct \LaTeX\ code to +print them and +avoid having them mess up the typesetting process. + +As the output when typeset must have the same layout as the input, the program +must get the linebreaks and indentation right. As \LaTeX\ is primarily designed for normal +text, it would ignore the linebreaks and indentation in the Haskell file. Therefore +the program must insert them using the correct typesetting commands. In the case of +linebreaks it must recognise where these occur, but for indentation it must also work out +how much space needs to be inserted. + +There are two types of indentation in Haskell programs: left-hand and internal. +For the former, the program must work out what the start of the line is aligned +under in the input file. It then has to calculate how much space is required +to get the line of text to line up with this in the output once typeset. +Take, for instance, the following Haskell example input: +\begin{quote} +\begin{verbatim} +foobar a b = c + where + c = (a, b) +\end{verbatim} +\end{quote} +Notice that the {\tt w} of {\tt where} on the second line lines up +under the {\tt =} on +the first line. Similarly, the {\tt c} on the third line is aligned under the +final letter of {\tt where} on the second line. The result as typeset must +get the indentation correct like this: +\begin{quote} +\begin{tabbing} +foobar a b = c\\ +\newbox\foo +{\setbox\foo\hbox{foobar a b }\hskip\wd\foo}where\\ +{\setbox\foo\hbox{foobar a b wher}\hskip\wd\foo}c = (a, b) +\end{tabbing} +\end{quote} + +For internal indentation, the program must first recognise where it has +occurred. It must then insert the correct amount of space to get alignment +in the output. As \LaTeX\ uses variable-width characters, extra space +may be needed in lines preceding a line within an internal alignment section. +This is necessary if a lower line which +aligns in the input file is longer up to the alignment point, +due to the variable-width characters, than its predecessors +once it has been properly typeset. For example: +\begin{quote} +\begin{verbatim} +lilli :: a +wmwm :: b +\end{verbatim} +\end{quote} +becomes +\begin{quote} +\begin{tabular}[t]{@{}l@{\ }c@{\ }l} +lilli & :: & a\\ +wmwm & :: & b\\ +\end{tabular} +\end{quote} +Notice how {\tt lilli} is longer than {\tt wmwm} in the input file style +using fixed-width font but shorter when using the variable-width font +of the typeset output. diff --git a/ghc/CONTRIB/pphs/docs/Faults.tex b/ghc/CONTRIB/pphs/docs/Faults.tex new file mode 100644 index 0000000000..1c38984bb7 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/Faults.tex @@ -0,0 +1,66 @@ +\chapter{Things that don't work} \label{faults} + +The {\tt pphs} program has some deficiencies that cause it to not always produce the +correct code. These are detailed in this chapter. + +\section{Internal alignment} + +The program can deal only with simple internal alignment. It cannot deal with a +situation where there is more than one column where internal alignment is occurring +on the same line. This can occur when two sections of internal +alignment overlap by having lines in common or where one section is wholly within another. +When this happens, {\tt pphs} will only +line up one occurrence of internal alignment on each line. + +Related is left alignment under a section of internal alignment. Take this earlier example. +\begin{quote} +\input{Haskell_leftindent1} +\end{quote} +This is how this code is typeset by {\tt pphs}: +\begin{quote} +\input{LaTeX_leftindent1} +\end{quote} +Notice how the {\bf where} on the third line doesn't line up under the {\it gcd\/}$'$ on +the second. The reason for this +is the \LaTeX\ {\tt tabular} section does not respect any spaces that occur at the end +of the right hand edge of the left hand column such as those after +{\tt gcd x y} and instead moves the central column left +so it is only one space away from the longest piece of text in the left hand column, +in this case {\iden gcd\/}\xspa1 {\iden x\/}\xspa1 {\iden y\/}. +The left indentation of the lines under the internal alignment section does not take this +movement into account and so if a line is indented beyond the end of the text in the first +column of the last line of the internal alignment section then it may be incorrectly +positioned and therefore will not align with what it was aligned with in the original +program. Should a piece of text in the left hand column be longer once typeset than what was +previously the longest, due to the variable-width characters used by \LaTeX , +then the second and third columns will get moved to the right, and so, similarly, +any code indented under the other columns will be wrongly positioned. + +Where a section of internal alignment coincides with the bottom of the user's page, +it can run off the bottom of the page. This is because the {\tt tabular} environment +used for internal alignment sections does not allow pagebreaks. Therefore the pagebreak +will come after the section has been completed. + +\section{Mathematical symbols} + +Mathematical symbols are always written in math font. This means that where, say, +comments are re-defined to be in typewriter font, as in the following +example, any mathematical symbols in the comments +will appear in math font, rather than typewriter font. +\begin{quote} +\def\com{\tt} +\input{LaTeX_comment} +\end{quote} + +\section{Left indentation} + +Where a line is indented beyond the end of its predecessor and aligns under another +line, but when typeset, the predecessor becomes longer than the indentation level +due to the variable-width characters, the line's indentation will appear to be under the +predecessor line. + +\section{Floating point numbers} + +Currently {\tt pphs} will recognise strings such as {\tt 3.} or {\tt 5.6e} as +valid floating point numbers. This needs rectifying so only valid floating +point numbers are recognised.
\ No newline at end of file diff --git a/ghc/CONTRIB/pphs/docs/Future_Work.tex b/ghc/CONTRIB/pphs/docs/Future_Work.tex new file mode 100644 index 0000000000..4bf7b89692 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/Future_Work.tex @@ -0,0 +1,30 @@ +\chapter{Things remaining to be implemented} + +Due to pressure of time, not everything that was planned to be included in +{\tt pphs} was implemented. This chapter details these things. + +\section{Faults} + +The faults detailed in Chapter~\ref{faults} remain to be rectified. The fault +regarding multiple columns of internal alignment would, it seems, require a +major rethink on the way internal alignment is handled by {\tt pphs}, perhaps +using the {\tt tabbing} environment with tabs and tabstops, rather than the +{\tt tabular} environment as at present. This could also +be extended to left indentation to solve the problem with indentation under +internal alignment section. Elimination of the {\tt tabular} sections would solve +the problem of pagebreaks during internal alignment sections. + +\section{Parsing} + +Currently, {\tt pphs} only does limited parsing. This could be altered to +give a full parse by restructuring into Lex. This would be better because +it would allow sections of code to be classified more easily once they were +broken down. + +\section{Literate Haskell} + +It has been suggested that {\tt pphs} be extended to accept Literate Haskell +files as input. This is where the program code lines all start with {\tt >} +and plain text is written between sections of code to document the file. +This would be called by an additional option, say {\tt -l}, and would typeset +the sections of Haskell code, whilst leaving the text sections alone.
\ No newline at end of file diff --git a/ghc/CONTRIB/pphs/docs/Haskell_char.tex b/ghc/CONTRIB/pphs/docs/Haskell_char.tex new file mode 100644 index 0000000000..265b063bce --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/Haskell_char.tex @@ -0,0 +1,7 @@ +\begin{verbatim} +-- Character functions + +minChar, maxChar :: Char +minChar = '\0' +maxChar = '\255' +\end{verbatim}
\ No newline at end of file diff --git a/ghc/CONTRIB/pphs/docs/Haskell_internalalign1.tex b/ghc/CONTRIB/pphs/docs/Haskell_internalalign1.tex new file mode 100644 index 0000000000..b4942bb9c2 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/Haskell_internalalign1.tex @@ -0,0 +1,12 @@ +% From Haskell report PreludeComlex.hs +\begin{verbatim} +instance (RealFloat a) => Num (Complex a) where + (x:+y) + (x':+y') = (x+x') :+ (y+y') + (x:+y) - (x':+y') = (x-x') :+ (y-y') + (x:+y) * (x':+y') = (x*x'-y*y') :+ (x*y'+y*x') + negate (x:+y) = negate x :+ negate y + abs z = magnitude z :+ 0 + signum 0 = 0 + signum z@(x:+y) = x/r :+ y/r where r = magnitude z + fromInteger n = fromInteger n :+ 0 +\end{verbatim}
\ No newline at end of file diff --git a/ghc/CONTRIB/pphs/docs/Haskell_internalalign2.tex b/ghc/CONTRIB/pphs/docs/Haskell_internalalign2.tex new file mode 100644 index 0000000000..80d17b6a16 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/Haskell_internalalign2.tex @@ -0,0 +1,4 @@ +\begin{verbatim} +fst :: (a,b) -> a +fst (x,_) = x +\end{verbatim} diff --git a/ghc/CONTRIB/pphs/docs/Haskell_leftindent1.tex b/ghc/CONTRIB/pphs/docs/Haskell_leftindent1.tex new file mode 100644 index 0000000000..aac11d82e8 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/Haskell_leftindent1.tex @@ -0,0 +1,7 @@ +\begin{verbatim} +gcd :: Int -> Int -> Int +gcd x y = gcd' (abs x) (abs y) + where gcd' x 0 = x + gcd' x y = gcd' y (x `rem` y) +\end{verbatim} + diff --git a/ghc/CONTRIB/pphs/docs/Haskell_leftindent2.tex b/ghc/CONTRIB/pphs/docs/Haskell_leftindent2.tex new file mode 100644 index 0000000000..09533c8a08 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/Haskell_leftindent2.tex @@ -0,0 +1,9 @@ +% From cvh/Public/GBC/Source/Gm7.hs +\begin{verbatim} +eval :: GmState -> [GmState] +eval state = state: restStates + where + restStates | gmFinal state = [] + | otherwise = eval nextState + nextState = doAdmin (step state) +\end{verbatim} diff --git a/ghc/CONTRIB/pphs/docs/Haskell_math.tex b/ghc/CONTRIB/pphs/docs/Haskell_math.tex new file mode 100644 index 0000000000..2e67e31e05 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/Haskell_math.tex @@ -0,0 +1,5 @@ +\begin{verbatim} +-- list concatenation (right-associative) +(++) :: [a] -> [a] -> [a] +xs ++ ys = foldr (:) ys xs +\end{verbatim}
\ No newline at end of file diff --git a/ghc/CONTRIB/pphs/docs/Haskell_simple.tex b/ghc/CONTRIB/pphs/docs/Haskell_simple.tex new file mode 100644 index 0000000000..4ca2bb50c7 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/Haskell_simple.tex @@ -0,0 +1,5 @@ +\begin{verbatim} +foobar a b = c + where + c = a + b +\end{verbatim}
\ No newline at end of file diff --git a/ghc/CONTRIB/pphs/docs/Haskell_string1.tex b/ghc/CONTRIB/pphs/docs/Haskell_string1.tex new file mode 100644 index 0000000000..0284da1e3c --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/Haskell_string1.tex @@ -0,0 +1,8 @@ +\begin{verbatim} +-- File and channel names: + +stdin = "stdin" +stdout = "stdout" +stderr = "stderr" +stdecho = "stdecho" +\end{verbatim}
\ No newline at end of file diff --git a/ghc/CONTRIB/pphs/docs/Haskell_typewriter.tex b/ghc/CONTRIB/pphs/docs/Haskell_typewriter.tex new file mode 100644 index 0000000000..a8518c3e76 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/Haskell_typewriter.tex @@ -0,0 +1,7 @@ +\begin{verbatim} +Horrible typewriter font + where + everything is the same + fixed width characters + no highlighting +\end{verbatim}
\ No newline at end of file diff --git a/ghc/CONTRIB/pphs/docs/How.tex b/ghc/CONTRIB/pphs/docs/How.tex new file mode 100644 index 0000000000..10120131f8 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/How.tex @@ -0,0 +1,465 @@ +\chapter{How it does it} + +This chapter explains in detail how the program {\tt pphs} was implemented +from a programmer's viewpoint. It was implemented in the C programming +language, as this is a commonly used language often used for writing UNIX tools. +The program code is shown in Appendix~\ref{prog-code} and the makefile in +Appendix~\ref{make-code}. + +\section{General sequence of events} + +When the {\tt pphs} program is run, the program first finds out what, if any, +options it has been called with. If any have been specified, the appropriate +variables are set. The program then checks it has been called with exactly one +further argument. If not, the program terminates with an +explanatory error message. If called correctly, the program then checks that the +supplied argument is the name of a file that exists and is readable. +The program is normally used +on files ending with a {\tt .hs} extension. When called with a filename +with no extension and that file is not found, then it appends the extension and searches +for that file. If no file with that name is found or the file is unreadable, an +error message is produced and the program terminates. If the file is found, the +program starts the typesetting process by writing out the opening +\LaTeX\ command to {\tt stdout}. +This defines the \LaTeX\ environment which the program exploits to do the typesetting. +It then initialises the variables used in the program. + +This done, the first character is read. The program enters a loop and keeps +reading characters until the end of the file is reached. As each character is read +in, its typeface is established and it is stored with its typeface in something +called the {\em line store\/}. If any left indentation is +encountered, the correct characters to be skipped are identified from the {\em left +indentation stack} and copied into the line store. Internal alignment is checked +for and if any is found, appropriate variables are set accordingly. Each stored line is +added to both the left indentation stack and the {\em writing queue}. When the value of the +internal alignment changes, or it has been established that the first line in the writing +queue is not part of any internal alignment section, the lines in the queue are written out. + +Once all the lines are written out, {\tt pphs} then writes the closing \LaTeX\ command +and terminates. + +\section{Basic storage unit for a line of code} \label{line-store} + +The basic storage unit used in {\tt pphs} is the line store unit. +This stores the details of one line of Haskell code. These are +the characters on the line, the typeface associated with each +character, the length of the line, the indentation level and the position of +any internal alignment in the line. + +In the C program, {\tt ElementType} is the structure used for this type. This has +five parts: +\begin{itemize} +\item {\tt chars} which stores the characters used on the line of Haskell +code + +\item {\tt typeface} which stores the typeface values associated with the +characters on that line + +\item {\tt indentation} which stores the level of the line's indentation + +\item {\tt length} which stores the length of the line + +\item {\tt col} which stores the column where any internal alignment occurs or +is set to {\tt 0} if there is none +\end{itemize} +The variable {\tt store} in the main program is of type {\tt ElementType} and +is used as the basic storage unit for the current line. Its C declaration is +\begin{quote} +\begin{verbatim} +typedef struct ElementType_Tag { + char chars[MAXLINELENGTH]; + enum face typeface[MAXLINELENGTH]; + int indentation, length, col; +} ElementType; +\end{verbatim} +\end{quote} + +\section{Stack of lines for left indentation} + +Due to \LaTeX 's variable width characters, {\tt pphs} cannot simply uses spaces +for the left indentation as in the input Haskell file. It has to work out how far +each line is indented by finding out what it is indented under. As each line is +completed, it is added to a stack of lines, each line being stored in a basic +storage unit. If the line at the top of the stack is of a greater or equal +indentation level and of a lesser or equal length, then it is no +longer required for calculating typeset indentation +and can be disposed of. Once all lines of greater indentation level have been removed +from the top of the stack, the current line can then be added. + +When a line's indentation level, in terms of the number of spaces used in the +input, has been determined, {\tt pphs} has to find +out the characters that determine the actual typeset length of the indentation. To get this, +{\tt pphs} looks down the stack until it comes to a line whose indentation is less than +that of the current line and whose length is greater than the indentation level of the +current line. Once a suitable line is found, its characters and typefaces are copied +into the line store of the current line; then the rest of the current line is read in, +overwriting the characters beyond the indentation level. If there is no line preceding +the current one that is as long as the indentation level of the current line, spaces +are placed in the line store instead. + +A special case has been made for left indentation. Most of the time, the left-hand edge +of the characters will be aligned, but where a {\tt |} is aligned under an {\tt =} sign, it is +centered under the sign. This will be the case for any further {\tt |} symbols aligned +under this {\tt =} sign. + +The type {\tt StackType} is used in the program for the stack. This makes a stack of +the basic line storage units of {\tt ElementType}, together with a set of functions available +for use with stacks. These are {\tt CreateStack}, which returns an empty stack; +{\tt IsEmptyStack}, which returns {\tt 1} if the stack which it is called with is empty, +{\tt 0} otherwise; {\tt Push}, which takes a stack and an element and returns the stack +with the element pushed onto the top; {\tt Top}, which takes a stack and returns the top +element of the stack; {\tt Pop}, which takes a stack and returns it with the top element +removed; and {\tt PopSym}, which is the same as {\tt Pop} except that it does not free the +memory used by the top element - this function was found necessary to fix a fault caused by +returning to a stack's previous state, having popped off elements in the interim period. + +\section{Internal alignment identification} + +Internal alignment is deemed to have occurred when a character matches the one +immediately above it, the preceding characters in both lines are spaces, and there is +more than one space preceding the character on at least one of the lines. + +To check for this in {\tt pphs}, the current position on the line, indicated by +the linecounter, must be greater than one because either the current line or +the previous line will be required to have two spaces before the current position. The current +line will be located in the line store and the previous line will be at the rear of the queue +of lines waiting to be written out. + +One special case has been implemented for internal alignment. This is to allow Haskell +type declarations, such as in the example below, to align with their corresponding function +definitions. +\begin{quote} +\input{Haskell_internalalign2} +\end{quote} +The {\tt =} sign can be under either the first or second {\tt :} symbol for the +internal alignment to be recognised. + +\section{Typefaces and mathematical characters} + +Each character has a typeface value associated with it. Normally, this will +indicate the type of token the character is part of, either keyword, identifier, +string, comment, number or maths symbol, but where Haskell uses an ASCII character +simulation of a mathematical character or some other special symbol, the typeface +value will indicate this as well. + +In the program, the typeface values are of the +enumerated type called {\tt face}, which has the values shown in Table~\ref{tf-val}. +They are used in the basic storage unit {\tt ElementType} in the {\tt typeface} part. + +\begin{table} +\begin{center} +\begin{tabular}{|c|l|} \hline +{\em value\/} & {\em indicates\/} \\ \hline +{\tt KW} & keyword \\ +{\tt ID} & identifier \\ +{\tt IE} & exponent identifier \\ +{\tt ST} & string \\ +{\tt SE} & exponent string \\ +{\tt CO} & comment \\ +{\tt CE} & exponent comment \\ +{\tt NU} & number \\ +{\tt NE} & exponent number \\ +{\tt MA} & maths \\ +{\tt SP} & space \\ +{\tt LC} & line comment \\ +{\tt RC} & regional comment begin \\ +{\tt CR} & regional comment end \\ +{\tt BF} & backwards/forwards quote \\ +{\tt FQ} & forwards quote \\ +{\tt EQ} & escape quote \\ +{\tt DQ} & double quote begin \\ +{\tt QD} & double quote end \\ +{\tt EE} & escape double quote \\ +{\tt DC} & second part of double character \\ +{\tt DP} & double plus \\ +{\tt CP} & colon plus \\ +{\tt LE} & less than or equal to \\ +{\tt GE} & greater than or equal to \\ +{\tt LA} & left arrow \\ +{\tt RA} & right arrow \\ +{\tt RR} & double right arrow \\ +{\tt TI} & times \\ +{\tt EX} & double exponent character \\ +{\tt XP} & exponent \\ +{\tt BE} & bar aligned under equals \\ \hline +\end{tabular} +\end{center} +\caption{Typeface values} \label{tf-val} +\end{table} + +\subsection{Current character and retrospective update} + +The {\tt pphs} program has to determine the typeface of a character without knowledge of the +characters to follow. Therefore it allocates the value depending on the status +of various boolean variables. This may subsequently be found to be wrong once the remaining +characters of that token have been read. + +In the case of keywords and double characters, these are only identifiable +as such once all the characters of the token have been read in. Having established +the existence of a keyword or double character, {\tt pphs} then goes back and changes +the typeface values for the appropriate characters. + +The functions {\tt CheckForDoubleChar} and {\tt CheckForKeyword} perform this in the +program. + +\section{Writing lines out} + +Lines are written to {\tt stdout}, but not immediately on being read in. Instead they +are held back while it is established whether or not they form part of a section of +internal alignment. + +Before any typeset Haskell code is written, {\tt pphs} writes an opening \LaTeX\ command +{\tt \char'134 begin\char'173 tabbing\char'175 } to {\tt stdout}. This defines the +\LaTeX\ environment that the typeset code will be written in. At the end, +{\tt \char'134 end\char'173 tabbing\char'175 } is written to terminate this +environment. + +\subsection{The line queue} + +Lines are stored in a queue while they are waiting to be written out. +The elements of the queue are the basic line storage units described in +Section~\ref{line-store}. + +In the program, the queue is of type {\tt QueueType} +and a set of functions related to queues is available. This set consists of +{\tt CreateQueue}, which returns an empty queue; {\tt IsEmptyQueue}, which takes +a queue and returns {\tt 1} if the queue is empty, {\tt 0} otherwise; {\tt LengthOfQueue}, +which takes a queue and returns its length; {\tt FrontOfQueue}, which takes a queue and +returns a pointer to its front element; {\tt RearOfQueue}, which takes a queue and returns +a pointer to its rear element; {\tt AddToQueue}, which takes a queue and an element and +returns the queue with the element added to the rear; {\tt TakeFromQueue}, which takes +a queue and returns the queue with the front element removed. + +The last line in the queue is inspected to search +for internal alignment; if any is found, the internal alignment variable of that +line is altered accordingly. + +\subsection{When lines are written} + +The queue is written out by the function {\tt WriteQueue} when a section of internal +alignment is commenced or terminated +or when it has been established that there is no internal alignment involving the first line +in the queue. If the section being written out has been found to have +no internal alignment, then the last line is retained +in the queue because it may form part of the next section of internal alignment. + +At the end of the input, {\tt WriteRestOfQueue} writes all the lines remaining in the queue. +This is because the last line of Haskell code will not form part of any further section of +internal alignment and can therefore be written out. Facilities +are provided in the function {\tt WriteLine} to avoid writing the last newline +character at the end of the Haskell +file, as this would create an unwanted blank line in the final document. + +\subsection{Writing a line} + +The function {\tt WriteLine} is used in {\tt pphs} to write out one line. This is +called from either {\tt WriteQueue} or {\tt WriteRestOfQueue} and is supplied with +a basic line storage unit containing the line needing to be written out together with a +flag stating whether or not a \LaTeX\ newline character is required. + +If a line has any left indentation, this is written out first by calling the function +{\tt WriteSkipover}. The rest of the line is then written out by {\tt WriteWords} +followed if necessary by the newline character. Both these functions are given +the current line in the line store. + +\subsection{Writing left indentation} + +As \LaTeX\ uses variable width characters, fixed width spaces cannot be used for the +left indentation. Instead, the width of the characters above the current line needs +to be skipped. The {\tt \char'134 skipover} command, defined in the {\tt pphs.sty} +style file (see Section~\ref{style-file}), is used by the function {\tt WriteSkipover} +to get \LaTeX\ to do this. The command is supplied with the typefaces and characters +in the lines above, and, with this, \LaTeX\ creates the correct amount of +indentation in the typeset result. The typefaces and characters are written in +braces as the argument to {\tt \char'134 skipover} by calling {\tt WriteStartFace}, +{\tt WriteChar}, {\tt WriteSpaces} and {\tt WriteFinishFace}. The typeface functions +are called with the typeface value whereas the other two are given the line store, +current position and where the end of the skipover section is. + +Using this specially defined {\tt \char'134 skipover} command avoids having to get +information back from \LaTeX , therefore keeping the information flow unidirectional. + +\subsection{Writing the rest of a line} + +The function {\tt WriteWords} writes out the indented line once any left indentation +has been dealt with. Starting at the indentation level of the line, it uses the functions +{\tt WriteStartFace}, {\tt WriteChar}, {\tt WriteSpaces} and {\tt WriteFinishFace} to +write out each character and its typeface. The typeface functions are called with +the typeface value whereas the other two are given the line store, current position +and where the end of the line is. + +\subsection{Writing \LaTeX\ typeface commands} + +Every character has a typeface associated with it, so at the start and finish of every +line and every time the current typeface changes, typeface commands have to be written +out. This is done by the functions {\tt WriteStartFace} and {\tt WriteFinishFace}. +They write the appropriate \LaTeX\ typeface commands according to the typeface values +given as shown in Table~\ref{tf-comms}. To avoid complications, double characters have +their typefaces written out as part of the character command, therefore they need no +further typeface commands. Similarly, the user-redefinable quote mark characters +have their typeface defined in their definitions, so do not need any more typeface +commands. + +\begin{table} +\begin{center} +\begin{tabular}{|c|l|l|} \hline % ``commands'' to be over two columns +{\em value\/} & \multicolumn{2}{c|}{\em commands\/} \\ \cline{2-3} + & {\em begin\/} & {\em end\/} \\ \hline +{\tt KW} & {\tt \char'173 \char'134 keyword} & {\tt \char'134 /\char'175 }\\ +{\tt ID} & {\tt \char'173 \char'134 iden} & {\tt \char'134 /\char'175 }\\ +{\tt IE} & {\tt \char'173 \char'134 iden} & {\tt \char'134 /\char'175 \$ }\\ +{\tt ST} & {\tt \char'173 \char'134 stri} & {\tt \char'134 /\char'175 }\\ +{\tt SE} & {\tt \char'173 \char'134 stri} & {\tt \char'134 /\char'175 \$ }\\ +{\tt CO} & {\tt \char'173 \char'134 com} & {\tt \char'134 /\char'175 }\\ +{\tt CE} & {\tt \char'173 \char'134 com} & {\tt \char'134 /\char'175 \$ }\\ +{\tt NU} & {\tt \char'173 \char'134 numb} & {\tt \char'134 /\char'175 }\\ +{\tt NE} & {\tt \char'173 \char'134 numb} & {\tt \char'134 /\char'175 \$ }\\ +{\tt MA} & {\tt \$ } & {\tt \$ }\\ +{\tt SP} & & \\ +{\tt LC} & & \\ +{\tt RC} & & \\ +{\tt CR} & & \\ +{\tt BF} & & \\ +{\tt FQ} & & \\ \hline +\end{tabular} \hskip3mm \begin{tabular}{|c|l|l|} \hline +{\em value\/} & \multicolumn{2}{c|}{\em commands\/} \\ \cline{2-3} + & {\em begin\/} & {\em end\/} \\ \hline +{\tt EQ} & & \\ +{\tt DQ} & & \\ +{\tt QD} & & \\ +{\tt EE} & & \\ +{\tt DC} & & \\ +{\tt DP} & & \\ +{\tt CP} & & \\ +{\tt LE} & & \\ +{\tt GE} & & \\ +{\tt LA} & & \\ +{\tt RA} & & \\ +{\tt RR} & & \\ +{\tt TI} & {\tt \$ } & {\tt \$ } \\ +{\tt EX} & {\tt \$ } & \\ +{\tt XP} & {\tt \$ } & \\ +{\tt BE} & & \\ \hline +\end{tabular} +\end{center} +\caption{Typeface values and related \LaTeX\ commands} \label{tf-comms} +\end{table} + +\subsection{Writing characters} + +{\tt WriteChar} is the function which handles writing characters. It takes the line store, +the current position on the line and the end of the current section - either the skipover +section or the writing section - and returns the current position on the line which will +have been incremented if a double character has been written. If the first character of +a double character is the last character of a skipover section, it will not be written +so the indentation for that line will fall instead, below the start of the double +character in a line above. Most characters are written out as they were inputted, +but many require special \LaTeX\ code. + +As \LaTeX\ uses embedded typesetting commands, some characters are reserved for this +purpose. Should any of these characters appear in the input Haskell code, {\tt pphs} +has to produce the appropriate \LaTeX\ code to avoid these characters upsetting the typesetting +process. The characters and the replacement \LaTeX\ code are shown in Table~\ref{rep-chars}. +\begin{table} +\begin{center} +\begin{tabular}{|c|l|} \hline +{\em input\/} & {\em \LaTeX\ code output } \\ \hline +{\tt \#} & {\tt \char'134 \#} \\ +{\tt \$} & {\tt \char'134 \$} \\ +{\tt \%} & {\tt \char'134 \%} \\ +{\tt \&} & {\tt \char'134 \&} \\ +{\tt \char'176 } & {\tt \char'134 char'176 } \\ +{\tt \_} & {\tt \char'134 \_} \\ +{\tt \char'134} & {\tt \char'134 hbox\char'173 \$setminus\$\char'175 } \\ +{\tt \char'173} & {\tt \char'134 hbox\char'173 \$\char'134 cal \char'134 char'146 \$\char'175 } \\ +{\tt \char'175} & {\tt \char'134 hbox\char'173 \$\char'134 cal \char'134 char'147 \$\char'175 } \\ +{\tt *} & {\tt \char'134 times}\\ \hline +\end{tabular} \hskip3mm \begin{tabular}{|c|l|} \hline +{\em input\/} & {\em \LaTeX\ code output } \\ \hline +{\tt ++} & {\tt \char'134 plusplus}\\ +{\tt :+} & {\tt \char'173 :\char'175 \char'173 +\char'175}\\ +{\tt <=} & {\tt \$\char'134 leq\$}\\ +{\tt >=} & {\tt \$\char'134 geq\$}\\ +{\tt <-} & {\tt \$\char'134 leftarrow\$}\\ +{\tt ->} & {\tt \$\char'134 rightarrow\$}\\ +{\tt =>} & {\tt \$\char'134 Rightarrow\$}\\ +{\tt \char'173 -} & {\tt \char'173 \char'134 com \char'134 \char'173 -\char'134 /\char'175 }\\ +{\tt -\char'175 } & {\tt \char'173 \char'134 com -\char'134 \char'175 \char'134 /\char'175 }\\ +{\tt --} & {\tt \char'173 \char'134 rm -\char'175 \char'173 \char'134 rm -\char'175 }\\ \hline +\end{tabular} +\end{center} +\caption{Haskell input and replacement \LaTeX\ code} \label{rep-chars} +\end{table} + +When a mathematical character needs written, {\tt WriteChar} outputs the \LaTeX\ code for +the character rather than the Haskell ASCII character simulation. Some of these +simulations use more than one character, so this could cause problems if some left +indentation is aligned under the second character of such a simulation. It has been +decided that in this case, the output from {\tt pphs} will cause the indented line +to align under the start of the double character rather than the centre or end of it. +The Haskell ASCII simulations and the \LaTeX\ codes that replaces them are shown in +Table~\ref{rep-chars}. The non-standard command {\tt \char'134 plusplus} is defined +in the {\tt pphs.sty} style file (see Section~\ref{style-file}). + +When a {\tt |} symbol is aligned under an {\tt =} sign at the left indentation, +{\tt \char'134 bareq} is output. This command is defined in the {\tt pphs.sty} +style file explained in Section~\ref{style-file} and causes \LaTeX\ to write the bar symbol +centrally in the space it would have taken to write an equals sign, thereby causing +the bar to be positioned centrally under the equals sign it is aligned under and the text +following the bar to align with that after the equals sign. + +For writing spaces, {\tt WriteSpaces}, called with the line store, current position and the +position of the end of the current section, first counts the number of consecutive spaces +to be written before writing out a {\tt \char'134 xspa} command with an argument of +the number of spaces needed. This makes the output code easier to read. The +{\tt \char'134 xspa} command is defined in the {\tt pphs.sty} style file explained +in Section~\ref{style-file}. Any tab characters are treated as spaces by {\tt pphs} +with the number of spaces they represent being calculated from the current position +on the line and the {\tt tablength} variable, which may have been changed from its +default of 8 by the {\tt -t} option at the program call. + +Numbers are written by {\tt WriteChar}, including floating point numbers. + +As \LaTeX\ provides several different quote marks, it was decided that the user +should be able to choose a preferred symbol. An input quote mark {\tt '} can +either be a prime or a quote mark in the output. This requires the program to +determine which it is. In program code this is fine, but in comments or strings +the marks won't necessarily be used in a manner from which it can easily be +determined which symbol is required. In program code, an input {\tt '} is deemed +to be a quote mark if either it is preceded by punctuation or a quote has +already been opened; otherwise it is a prime. Of the quote marks, these can +either be for actual quotes or an escape quote where a quote mark is being quoted. +Special cases has been implemented when the input file contains a quote within a comment +started with a backquote and ended with a forwards quote, and for \LaTeX\ style +quotes in comments started with two backquotes and ended with two forwards quote +marks. All input {\tt '} in strings, other than escape quotes, are treated +as primes. In strings, an input {\tt '} may be an apostrophe, however, there is +little way of telling this.\label{string-apostrophe} One of five different pieces +of \LaTeX\ code can be produced having received {\tt '} as input. +\begin{itemize} +\item {\tt \char'134 forquo} for a forwards quote mark +\item {\tt \char'134 escquo} for an escape (quoted) quote mark +\item {\tt \char'173 \char'134 com '\char'134 /\char'175 } for a forward quote ending a quote +in a comment opened by a backquote +\item {\tt \char'173 \char'134 com ''\char'134 /\char'175 } for two forward quotes ending a quote +in a comment opened by two backquotes +\item {\tt '} for a prime which will be in the math font +\end{itemize} +The first two are commands defined in the {\tt pphs.sty} style file and are +thus user-redefinable as described in Section~\ref{user-adj}. Backquotes, input +as {\tt `}, are either in the comment typeface for backquotes in comments or in +math font elsewhere. + +\subsection{Writing internal alignment} + +To commence a section of internal alignment, either of the functions {\tt WriteQueue} +or {\tt WriteRestOfQueue} write out +{\tt \char'134 begin\char'173 tabular\char'175 \char'173 @\char'173 \char'175 l@\char'173 \char'134 xspa1\char'175 c@\char'173 \char'175 l\char'175 } +before writing the first line of the section. This provides an environment +with three columns. The first column accommodates the Haskell code to the left of the +internal alignment, the second has the symbols that line up vertically, while the third +has the Haskell code to the right. The Haskell code is written complete with its \LaTeX\ +typesetting commands with the addition of {\tt \&} symbols denoting the breaks between +columns. Once the internal alignment section has been completed, the +{\tt \char'134 end\char'173 tabular\char'175 } command is written to terminate the +environment.
\ No newline at end of file diff --git a/ghc/CONTRIB/pphs/docs/Introduction.tex b/ghc/CONTRIB/pphs/docs/Introduction.tex new file mode 100644 index 0000000000..141fb5940b --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/Introduction.tex @@ -0,0 +1,137 @@ +\chapter{Introduction} + +Documents, such as papers for publication, often include sections +of program code. These have to be specially typeset, as default +typesetting modes are generally intended for plain prose. +It is therefore useful to have a special-purpose system for typesetting +programs for inserting into documents. +Haskell \cite{Haskell-report} is a fairly new functional programming language and does not +yet have a full range of tools available to use with the language, +including one to do typesetting. +The goal of this project, therefore, is to provide a tool to automatically +typeset Haskell programs. + +Many people use the \LaTeX\ system \cite{LaTeX-book} +for typesetting. This uses +embedded typesetting commands in the input to arrange the typesetting. +The typeset result has variable-width characters with a choice of +font styles and sizes available. The page-size, margins and layout +are also controllable by the user. Because \LaTeX\ is so widely used and +so flexible, the tool to be created will be +for use with the \LaTeX\ system. + +Haskell programs are generally written with editors that produce ASCII +text. This has fixed-width characters and one plain font. +Indentation and vertical alignment are simple because +fixed-width characters line up in columns, one below the other. +Haskell avoids having compulsory expression terminators +by using such indentation to delimit expressions. It is thus crucial +that this indentation is retained when the text is typeset. + +The \LaTeX\ system, however, uses variable-width characters, so the indentation +level becomes dependent on the characters under which the text is aligned. +The tabs and spaces that went to make +up the indentation in the original file have to be replaced with a +suitable amount of space to make the text line up with the position +it is aligned with in the original file. + +It is also desirable to have formatting improvements, such as +highlighting keywords and identifiers, as well as to have +proper mathematical characters inserted in place of the +Haskell ASCII approximations. A tool could do this as well. + +Currently the only way of typesetting Haskell program code is to +labouriously insert formatting +commands into the text by hand. The alternative is to print out the programs +verbatim with a plain ASCII-style fixed-width font, but it would be far better +if there were a tool to do the proper typesetting. + +\subsection*{Goals} + +The proposed tool is required to comply to the following requirements: +\begin{itemize} +\item The program must take a file with a Haskell program in it and produce +\LaTeX\ code to stdout. This code must produce the input Haskell program in +typeset style when run through +the \LaTeX\ program. The typeset result must be recognisable as having the same +layout as the input file's Haskell program had. + +\item The typeset result must preserve the parse of the program. + +\item The input file will contain only Haskell code. Any documentation in the file +will be in the form of comments. + +\item The input file will not have any embedded typesetting commands, so +the program must analyse the input and decide for itself what needs to be +done to produce the correct \LaTeX\ code. + +\item The \LaTeX\ code produced must be easy to incorporate into a \LaTeX\ +document such as a paper or book. Thus the produced code must be able +to be incorporated into documents of different page and font sizes. + +\item Keywords and identifiers must be highlightable so as to distinguish +them from the rest of the Haskell program. +The user should be allowed some choice in the typeface used for +highlighting. + +\item Proper mathematical symbols must replace ASCII approximations in the +typeset output. + +\item The program must accept as input +a file of any name and thus not use an inflexible built-in filename. + +\item The program must be in keeping with conventional UNIX style to fit in with +Haskell and \LaTeX , which are also run under UNIX. +\end{itemize} + +\noindent This report describes a program written to satisfy these needs. + +\subsection*{Background} + +Haskell, being a functional programming language, uses functions as its +sole means of programming. This is unlike traditional programming +languages such as C or Pascal, where assignments and procedures are also used. +Haskell also does not normally use expression terminators, such as semi-colons, +but instead relies on the layout of the +program and, in particular, the indentation to determine the context of +lines of code. Lines of code are positioned so they are aligned under particular +points on preceding lines, and this delimits expressions. It is thus +imperative that this indentation be replicated in any attempt to pretty-print +the program code. + +\LaTeX\ is a typesetting program that takes a file with embedded typesetting +commands and produces a file containing typeset text. This is commonly used when +writing documents such as papers and books for publication. Users of \LaTeX\ +can do many things, but anything fancy requires lots of typesetting commands to +be embedded into the input file. Thus typesetting a Haskell program in the +desired way is a considerable task. More simply, a +Haskell program can be displayed in \LaTeX's verbatim mode, but this uses a fixed-width +typewriter font. Verbatim mode does not recognise tab characters, however these can be +replaced with spaces. + +It will be assumed that the user is familiar with Haskell and at least familiar with +preparing basic textual documents with \LaTeX, although it is not required for the +user to understand many of the more involved parts of typesetting with \LaTeX. + +Already in existence is a program called `Phinew' written by Phil Wadler. +This can be found in {\tt \char'176 wadler/bin}. This required the user to supply +typesetting commands embedded in their Haskell programs, meaning that the +user would have to manually pre-process their Haskell code before using +Phinew. Although simpler +than typesetting in \LaTeX, it is still better to have a program +to do all the typesetting automatically, taking an unprepared Haskell +program as input. + +\subsection*{Outline} + +In the remaining sections of this report the functionality of the program written +are discussed; in particular, how all the various layout arrangements are dealt with. The way +in which the program goes about working out what to do is explained, +along with descriptions of the algorithm and data-structures used. Examples +of the input and resulting output are used to illustrate the capabilities +of the program. The various possibilities for the user to decide what happens +are explained, along with details on how to exploit them. The user will +need to know how to incorporate the results into a document so this +is also explained. Finally, the limitations and deficiencies of the +program are detailed complete with an outline of further possible work +which could rectify these problems and make the program more complete. diff --git a/ghc/CONTRIB/pphs/docs/LaTeX-code_simple.tex b/ghc/CONTRIB/pphs/docs/LaTeX-code_simple.tex new file mode 100644 index 0000000000..8110ca4a16 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/LaTeX-code_simple.tex @@ -0,0 +1,12 @@ +\begin{verbatim} +\begin{tabbing} +{\iden foobar\/}\xspa{1}{\iden a\/}\xspa{1}{\iden b\/} + \xspa{1}$=$\xspa{1}{\iden c\/}\\ +\skipover{{\iden foobar\/}\xspa{1}{\iden a\/}\xspa{1} + {\iden b\/}\xspa{1}}{\keyword where\/}\\ +\skipover{{\iden foobar\/}\xspa{1}{\iden a\/}\xspa{1} + {\iden b\/}\xspa{1}{\keyword wher\/}}{\iden c\/} + \xspa{1}$=$\xspa{1}{\iden a\/}\xspa{1}$+$\xspa{1} + {\iden b\/} +\end{tabbing} +\end{verbatim}
\ No newline at end of file diff --git a/ghc/CONTRIB/pphs/docs/LaTeX_blankline.tex b/ghc/CONTRIB/pphs/docs/LaTeX_blankline.tex new file mode 100644 index 0000000000..1c1a67fe91 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/LaTeX_blankline.tex @@ -0,0 +1,6 @@ +\begin{tabbing} +{\iden foobar\/}\xspa{1}{\iden a\/}\xspa{1}{\iden b\/}\xspa{1}$=$\xspa{1}{\iden c\/}\\ +\skipover{{\iden foobar\/}\xspa{1}{\iden a\/}\xspa{1}{\iden b\/}\xspa{1}}{\keyword where\/}\\ +\skipover{{\iden foobar\/}\xspa{1}{\iden a\/}\xspa{1}{\iden b\/}\xspa{1}{\keyword wher\/}}{\iden c\/}\xspa{1}$=$\xspa{1}{\iden a\/}\xspa{1}$+$\xspa{1}{\iden b\/}\\ +\skipover{{\iden foobar\/}\xspa{1}{\iden a\/}\xspa{1}{\iden b\/}\xspa{1}{\keyword wher\/}} +\end{tabbing} diff --git a/ghc/CONTRIB/pphs/docs/LaTeX_char.tex b/ghc/CONTRIB/pphs/docs/LaTeX_char.tex new file mode 100644 index 0000000000..7b5a7c83c6 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/LaTeX_char.tex @@ -0,0 +1,9 @@ +\begin{tabbing} +{\rm -}{\rm -}\xspa{1}{\com Character\/}\xspa{1}{\com functions\/}\\ +\\ +\begin{tabular}{@{}l@{\xspa1}c@{}l} +{\iden minChar\/}$,$\xspa{1}{\iden maxChar\/}\xspa{8} & $::$ & \xspa{1}{\iden Char\/}\\ +{\iden minChar\/}\xspa{17} & $=$ & \xspa{1}\forquo {\stri \hbox{$\setminus$}\/}{\numb 0\/}\forquo \\ +{\iden maxChar\/}\xspa{17} & $=$ & \xspa{1}\forquo {\stri \hbox{$\setminus$}\/}{\numb 255\/}\forquo +\end{tabular} +\end{tabbing} diff --git a/ghc/CONTRIB/pphs/docs/LaTeX_comment.tex b/ghc/CONTRIB/pphs/docs/LaTeX_comment.tex new file mode 100644 index 0000000000..324be0b648 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/LaTeX_comment.tex @@ -0,0 +1,3 @@ +\begin{tabbing} +{\rm -}{\rm -}\xspa{1}{\com note\/}\xspa{1}{\com that\/}\xspa{1}{\com x\/}\xspa{1}$+$\xspa{1}{\com y\/}\xspa{1}$=$\xspa{1}{\com z\/} +\end{tabbing} diff --git a/ghc/CONTRIB/pphs/docs/LaTeX_internalalign1.tex b/ghc/CONTRIB/pphs/docs/LaTeX_internalalign1.tex new file mode 100644 index 0000000000..069691a88d --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/LaTeX_internalalign1.tex @@ -0,0 +1,13 @@ +\begin{tabbing} +\begin{tabular}{@{}l@{\xspa1}c@{}l} +{\keyword instance\/}\xspa{2}$(${\iden RealFloat\/}\xspa{1}{\iden a\/}$)$\xspa{1} & $\Rightarrow$ & \xspa{1}{\iden Num\/}\xspa{1}$(${\iden Complex\/}\xspa{1}{\iden a\/}$)$\xspa{2}{\keyword where\/}\\ +\skipover{{\keyword inst\/}}$(${\iden x\/}{:}{+}{\iden y\/}$)$\xspa{1}$+$\xspa{1}$(${\iden x\/}$'${:}{+}{\iden y\/}$')$\xspa{3} & $=$ & \xspa{2}$(${\iden x\/}$+${\iden x\/}$')$\xspa{1}{:}{+}\xspa{1}$(${\iden y\/}$+${\iden y\/}$')$\\ +\skipover{{\keyword inst\/}}$(${\iden x\/}{:}{+}{\iden y\/}$)$\xspa{1}$-$\xspa{1}$(${\iden x\/}$'${:}{+}{\iden y\/}$')$\xspa{3} & $=$ & \xspa{2}$(${\iden x\/}$-${\iden x\/}$')$\xspa{1}{:}{+}\xspa{1}$(${\iden y\/}$-${\iden y\/}$')$\\ +\skipover{{\keyword inst\/}}$(${\iden x\/}{:}{+}{\iden y\/}$)$\xspa{1}$\times $\xspa{1}$(${\iden x\/}$'${:}{+}{\iden y\/}$')$\xspa{3} & $=$ & \xspa{2}$(${\iden x\/}$\times ${\iden x\/}$'-${\iden y\/}$\times ${\iden y\/}$')$\xspa{1}{:}{+}\xspa{1}$(${\iden x\/}$\times ${\iden y\/}$'+${\iden y\/}$\times ${\iden x\/}$')$\\ +\skipover{{\keyword inst\/}}{\iden negate\/}\xspa{1}$(${\iden x\/}{:}{+}{\iden y\/}$)$\xspa{7} & $=$ & \xspa{2}{\iden negate\/}\xspa{1}{\iden x\/}\xspa{1}{:}{+}\xspa{1}{\iden negate\/}\xspa{1}{\iden y\/}\\ +\skipover{{\keyword inst\/}}{\iden abs\/}\xspa{1}{\iden z\/}\xspa{15} & $=$ & \xspa{2}{\iden magnitude\/}\xspa{1}{\iden z\/}\xspa{1}{:}{+}\xspa{1}{\numb 0\/}\\ +\skipover{{\keyword inst\/}}{\iden signum\/}\xspa{1}{\numb 0\/}\xspa{12} & $=$ & \xspa{2}{\numb 0\/}\\ +\skipover{{\keyword inst\/}}{\iden signum\/}\xspa{1}{\iden z@\/}$(${\iden x\/}{:}{+}{\iden y\/}$)$\xspa{5} & $=$ & \xspa{2}{\iden x\/}$/${\iden r\/}\xspa{1}{:}{+}\xspa{1}{\iden y\/}$/${\iden r\/}\xspa{2}{\keyword where\/}\xspa{1}{\iden r\/}\xspa{1}$=$\xspa{1}{\iden magnitude\/}\xspa{1}{\iden z\/}\\ +\skipover{{\keyword inst\/}}{\iden fromInteger\/}\xspa{1}{\iden n\/}\xspa{7} & $=$ & \xspa{2}{\iden fromInteger\/}\xspa{1}{\iden n\/}\xspa{1}{:}{+}\xspa{1}{\numb 0\/} +\end{tabular} +\end{tabbing} diff --git a/ghc/CONTRIB/pphs/docs/LaTeX_leftindent1.tex b/ghc/CONTRIB/pphs/docs/LaTeX_leftindent1.tex new file mode 100644 index 0000000000..e668990f3d --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/LaTeX_leftindent1.tex @@ -0,0 +1,8 @@ +\begin{tabbing} +\begin{tabular}{@{}l@{\xspa1}c@{}l} +{\iden gcd\/}\xspa{7} & $::$ & \xspa{1}{\iden Int\/}\xspa{1}$\rightarrow$\xspa{1}{\iden Int\/}\xspa{1}$\rightarrow$\xspa{1}{\iden Int\/}\\ +{\iden gcd\/}\xspa{1}{\iden x\/}\xspa{1}{\iden y\/}\xspa{4} & $=$ & \xspa{1}{\iden gcd\/}$'$\xspa{1}$(${\iden abs\/}\xspa{1}{\iden x\/}$)$\xspa{1}$(${\iden abs\/}\xspa{1}{\iden y\/}$)$\\ +\end{tabular}\\ +\skipover{{\iden gcd\/}\xspa{1}{\iden x\/}\xspa{1}{\iden y\/}\xspa{4}$=$\xspa{1}}{\keyword where\/}\xspa{1}{\iden gcd\/}$'$\xspa{1}{\iden x\/}\xspa{1}{\numb 0\/}\xspa{1}$=$\xspa{1}{\iden x\/}\\ +\skipover{{\iden gcd\/}\xspa{1}{\iden x\/}\xspa{1}{\iden y\/}\xspa{4}$=$\xspa{1}{\keyword where\/}\xspa{1}}{\iden gcd\/}$'$\xspa{1}{\iden x\/}\xspa{1}{\iden y\/}\xspa{1}$=$\xspa{1}{\iden gcd\/}$'$\xspa{1}{\iden y\/}\xspa{1}$(${\iden x\/}\xspa{1}$`${\iden rem\/}$`$\xspa{1}{\iden y\/}$)$ +\end{tabbing} diff --git a/ghc/CONTRIB/pphs/docs/LaTeX_leftindent2.tex b/ghc/CONTRIB/pphs/docs/LaTeX_leftindent2.tex new file mode 100644 index 0000000000..d175774169 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/LaTeX_leftindent2.tex @@ -0,0 +1,8 @@ +\begin{tabbing} +{\iden eval\/}\xspa{1}$::$\xspa{1}{\iden GmState\/}\xspa{1}$\rightarrow$\xspa{1}$[${\iden GmState\/}$]$\\ +{\iden eval\/}\xspa{1}{\iden state\/}\xspa{1}$=$\xspa{1}{\iden state\/}$:$\xspa{1}{\iden restStates\/}\\ +\skipover{{\iden eval\/}\xspa{1}{\iden state\/}\xspa{1}$=$\xspa{1}}{\keyword where\/}\\ +\skipover{{\iden eval\/}\xspa{1}{\iden state\/}\xspa{1}$=$\xspa{1}}{\iden restStates\/}\xspa{1}$|$\xspa{1}{\iden gmFinal\/}\xspa{1}{\iden state\/}\xspa{1}$=$\xspa{1}$[]$\\ +\skipover{{\iden eval\/}\xspa{1}{\iden state\/}\xspa{1}$=$\xspa{1}{\iden restStates\/}\xspa{1}}$|$\xspa{1}{\iden otherwise\/}\xspa{1}$=$\xspa{1}{\iden eval\/}\xspa{1}{\iden nextState\/}\\ +\skipover{{\iden eval\/}\xspa{1}{\iden state\/}\xspa{1}$=$\xspa{1}}{\iden nextState\/}\xspa{2}$=$\xspa{1}{\iden doAdmin\/}\xspa{1}$(${\iden step\/}\xspa{1}{\iden state\/}$)$ +\end{tabbing} diff --git a/ghc/CONTRIB/pphs/docs/LaTeX_math.tex b/ghc/CONTRIB/pphs/docs/LaTeX_math.tex new file mode 100644 index 0000000000..4b4198dde3 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/LaTeX_math.tex @@ -0,0 +1,7 @@ +\begin{tabbing} +{\rm -}{\rm -}\xspa{1}{\com list\/}\xspa{1}{\com concatenation\/}\xspa{1}$(${\com right\/}$-${\com associative\/}$)$\\ +\begin{tabular}{@{}l@{\xspa1}c@{}l} +$($\plusplus$)$\xspa{20} & $::$ & \xspa{1}$[${\iden a\/}$]$\xspa{1}$\rightarrow$\xspa{1}$[${\iden a\/}$]$\xspa{1}$\rightarrow$\xspa{1}$[${\iden a\/}$]$\\ +{\iden xs\/}\xspa{1}\plusplus\xspa{1}{\iden ys\/}\xspa{16} & $=$ & \xspa{2}{\iden foldr\/}\xspa{1}$(:)$\xspa{1}{\iden ys\/}\xspa{1}{\iden xs\/} +\end{tabular} +\end{tabbing} diff --git a/ghc/CONTRIB/pphs/docs/LaTeX_simple.tex b/ghc/CONTRIB/pphs/docs/LaTeX_simple.tex new file mode 100644 index 0000000000..956fc496c7 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/LaTeX_simple.tex @@ -0,0 +1,5 @@ +\begin{tabbing} +{\iden foobar\/}\xspa{1}{\iden a\/}\xspa{1}{\iden b\/}\xspa{1}$=$\xspa{1}{\iden c\/}\\ +\skipover{{\iden foobar\/}\xspa{1}{\iden a\/}\xspa{1}{\iden b\/}\xspa{1}}{\keyword where\/}\\ +\skipover{{\iden foobar\/}\xspa{1}{\iden a\/}\xspa{1}{\iden b\/}\xspa{1}{\keyword wher\/}}{\iden c\/}\xspa{1}$=$\xspa{1}{\iden a\/}\xspa{1}$+$\xspa{1}{\iden b\/} +\end{tabbing} diff --git a/ghc/CONTRIB/pphs/docs/LaTeX_string1.tex b/ghc/CONTRIB/pphs/docs/LaTeX_string1.tex new file mode 100644 index 0000000000..6472e1d6c1 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/LaTeX_string1.tex @@ -0,0 +1,10 @@ +\begin{tabbing} +{\rm -}{\rm -}\xspa{1}{\com File\/}\xspa{1}{\com and\/}\xspa{1}{\com channel\/}\xspa{1}{\com names\/}$:$\\ +\\ +\begin{tabular}{@{}l@{\xspa1}c@{}l} +{\iden stdin\/}\xspa{7} & $=$ & \xspa{2}{\rm ``}{\stri stdin\/}{\rm "}\\ +{\iden stdout\/}\xspa{6} & $=$ & \xspa{2}{\rm ``}{\stri stdout\/}{\rm "}\\ +{\iden stderr\/}\xspa{6} & $=$ & \xspa{2}{\rm ``}{\stri stderr\/}{\rm "}\\ +{\iden stdecho\/}\xspa{5} & $=$ & \xspa{2}{\rm ``}{\stri stdecho\/}{\rm "} +\end{tabular} +\end{tabbing} diff --git a/ghc/CONTRIB/pphs/docs/LaTeX_string2.tex b/ghc/CONTRIB/pphs/docs/LaTeX_string2.tex new file mode 100644 index 0000000000..696a2b6666 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/LaTeX_string2.tex @@ -0,0 +1,10 @@ +\begin{tabbing} +{\iden main\/}\xspa{1}$=$\xspa{1}{\iden appendChan\/}\xspa{1}{\iden stdout\/}\xspa{1}{\rm ``}{\stri please\/}\xspa{1}{\stri type\/}\xspa{1}{\stri a\/}\xspa{1}{\stri filename\hbox{$\setminus$}n\/}{\rm "}\xspa{1}{\iden exit\/}\xspa{1}$($\\ +\skipover{{\iden main\/}\xspa{1}$=$\xspa{1}}{\iden readChan\/}\xspa{1}{\iden stdin\/}\xspa{1}{\iden exit\/}\xspa{1}$(${\iden \hbox{$\setminus$}\/}\xspa{1}{\iden userInput\/}\xspa{1}$\rightarrow$\\ +\skipover{{\iden main\/}\xspa{1}$=$\xspa{1}}{\keyword let\/}\xspa{1}$(${\iden name\/}\xspa{1}$:$\xspa{1}{\iden \_\/}$)$\xspa{1}$=$\xspa{1}{\iden lines\/}\xspa{1}{\iden userInput\/}\xspa{1}{\keyword in\/}\\ +\skipover{{\iden main\/}\xspa{1}$=$\xspa{1}}{\iden appendChan\/}\xspa{1}{\iden stdout\/}\xspa{1}{\iden name\/}\xspa{1}{\iden exit\/}\xspa{1}$($\\ +\skipover{{\iden main\/}\xspa{1}$=$\xspa{1}}{\iden readFile\/}\xspa{1}{\iden name\/}\xspa{1}$(${\iden \hbox{$\setminus$}\/}\xspa{1}{\iden ioerror\/}\xspa{1}$\rightarrow$\xspa{1}{\iden appendChan\/}\xspa{1}{\iden stdout\/}\\ +\skipover{{\iden main\/}\xspa{1}$=$\xspa{1}{\iden readFile\/}\xspa{1}{\iden name\/}\xspa{1}$(${\iden \hbox{$\setminus$}\/}\xspa{1}{\iden ioerror\/}\xspa{1}$\rightarrow$\xspa{1}}{\rm ``}{\stri can\/}$'${\stri t\/}\xspa{1}{\stri open\/}\xspa{1}{\stri file\/}{\rm "}\xspa{1}{\iden exit\/}\xspa{1}{\iden done\/}$)$\\ +\skipover{{\iden main\/}\xspa{1}$=$\xspa{1}{\iden readFile\/}\xspa{1}{\iden name\/}\xspa{1}}$(${\iden \hbox{$\setminus$}\/}\xspa{1}{\iden contents\/}\xspa{1}$\rightarrow$\\ +\skipover{{\iden main\/}\xspa{1}$=$\xspa{1}}{\iden appendChan\/}\xspa{1}{\iden stdout\/}\xspa{1}{\iden contents\/}\xspa{1}{\iden exit\/}\xspa{1}{\iden done\/}$))))$ +\end{tabbing} diff --git a/ghc/CONTRIB/pphs/docs/LaTeX_wide-colons.tex b/ghc/CONTRIB/pphs/docs/LaTeX_wide-colons.tex new file mode 100644 index 0000000000..668ce57838 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/LaTeX_wide-colons.tex @@ -0,0 +1,9 @@ +\begin{tabbing} +{\rm -}{\rm -}\xspa{1}{\com Character\/}\xspa{1}{\com functions\/}\\ +\\ +\begin{tabular}{@{}l@{\xspa1}c@{}l} +{\iden minChar\/}$,$\xspa{1}{\iden maxChar\/}\xspa{8} & $:\,:$ & \xspa{1}{\iden Char\/}\\ +{\iden minChar\/}\xspa{17} & $=$ & \xspa{1}\forquo {\stri \hbox{$\setminus$}\/}{\numb 0\/}\forquo \\ +{\iden maxChar\/}\xspa{17} & $=$ & \xspa{1}\forquo {\stri \hbox{$\setminus$}\/}{\numb 255\/}\forquo +\end{tabular} +\end{tabbing} diff --git a/ghc/CONTRIB/pphs/docs/Problem_Definition.tex b/ghc/CONTRIB/pphs/docs/Problem_Definition.tex new file mode 100644 index 0000000000..8659bcc8dd --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/Problem_Definition.tex @@ -0,0 +1,37 @@ +\section{Problem definition} + +The problem is that a system is needed to typeset Haskell programs +to be inserted into documents. This would be useful in, for +instance, preparing papers for publication that are to include +Haskell programs. + +Haskell is a fairly new functional programming language and does not +as yet have a full range of tools available to use with the language. + +Many people use the \LaTeX\ system for typesetting. This uses +embedded typesetting commands in the input to arrange the typesetting. +The result as typeset has variable-width characters with a choice of +font styles and sizes available. The page-size, margins and layout +are also controllable by the user. + +Haskell programs are generally written on editors that produce ASCII +text. This has fixed-width characters and one plain font. + +In Haskell, the language avoids using +line terminators by having indentation to indicate the contextual meaning of +each line. It is thus crucial that this indentation is retained +when the text is put into \LaTeX. However as the \LaTeX\ system uses +variable width characters, the indentation +level is dependent on the characters under which the text is aligned. +The tabs and spaces that went to make +up the indentation in the original file have to be replaced with a +suitable amount of space to make the text line up with the position with which it +is aligned in the original file. + +It is also desirable to have +formatting improvements such as highlighting keywords and identifiers as well as +inserting proper mathematical characters in place of the Haskell-ASCII simulations. + +Currently the only way of doing this is by labouriously inserting formatting +commands into the text by hand. The alternative is to print out the programs +verbatim with plain ASCII-style fixed-width font. diff --git a/ghc/CONTRIB/pphs/docs/Project_Documents.tex b/ghc/CONTRIB/pphs/docs/Project_Documents.tex new file mode 100644 index 0000000000..5833c2a032 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/Project_Documents.tex @@ -0,0 +1,7 @@ +\chapter{Project documents} + +These are the original project documents from 19th January 1994. + +\input{Problem_Definition} +\input{Statement_Of_Requirements} +\input{External_Specification} diff --git a/ghc/CONTRIB/pphs/docs/Report.tex b/ghc/CONTRIB/pphs/docs/Report.tex new file mode 100644 index 0000000000..d37dd0d153 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/Report.tex @@ -0,0 +1,49 @@ +\documentstyle[12pt,fleqn,rep,pphs]{report} +\renewcommand{August 1994} +\begin{document} + +\def\sect{\section} +\def\subsect{\subsection} + +% Title page +\title{Literate Haskell} +\author{A. Preece \\\\ University of Glasgow} +\maketitle + +\setcounter{page}{2} +\tableofcontents + +\input{Introduction} +\input{What} +\input{How} +\input{Uses} +\input{Error_Messages} +\input{Faults} +\input{Future_Work} + +\appendix + +\input{Project_Documents} +\input{User_Documents} +\input{Code} + +\begin{thebibliography}{9} +\addcontentsline{toc}{chapter}{Bibliography} + +\bibitem{Haskell-report} +Hudak, P., Peyton Jones, S., Wadler, P., et al., {\em Haskell, Report on the Programming Language\/} +(1992) + +\bibitem{LaTeX-book} +Lamport, L., {\em \LaTeX : A Document Preparation System\/} +(Addison-Wesley, 1986) + +\end{thebibliography} + +\chapter*{Acknowledgements} +\addcontentsline{toc}{chapter}{Acknowledgements} + +I am very grateful for the help and advise of Project Supervisor Tom Melham, +and also for the suggestions of Phil Wadler, Richard McPhee, and Mark Pollock. + +\end{document} diff --git a/ghc/CONTRIB/pphs/docs/Statement_Of_Requirements.tex b/ghc/CONTRIB/pphs/docs/Statement_Of_Requirements.tex new file mode 100644 index 0000000000..00b8fd52e4 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/Statement_Of_Requirements.tex @@ -0,0 +1,32 @@ +\section{Statement of requirements} + +There are various things that are required of the solution to the +problem described previously. +\begin{itemize} +\item The program must take a file with a Haskell program in it and produce +\LaTeX\ code to stdout. This code must produce that Haskell program in +typeset style when run through +the \LaTeX\ program. The result as typeset must be recognisable as having the same +layout as the input file's Haskell program had. + +\item The input file will contain only Haskell code. Any documentation in the file +will be in the form of comments. + +\item The input file will not have any embedded typesetting characters in it so +the program must analyse the input and decide for itself what needs to be +done to produce the correct \LaTeX\ code. + +\item The \LaTeX\ code produced must be easy to incorporate into a \LaTeX\ +document such as a paper or book. Thus the produced code must be able +to be incorporated into documents of different page and font size. + +\item Keywords and identifiers must be highlightable so as to distinguish +them from the rest of the Haskell program. +The user should be allowed some choice in the typeface used for +highlighting. + +\item Generality of use must be retained so as to allow the program to be used in conjunction +with a file of any name and thus not use an inflexible built-in filename. + +\item The program must be in keeping with conventional UNIX style. +\end{itemize} diff --git a/ghc/CONTRIB/pphs/docs/Title.tex b/ghc/CONTRIB/pphs/docs/Title.tex new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/Title.tex diff --git a/ghc/CONTRIB/pphs/docs/UserGuide.tex b/ghc/CONTRIB/pphs/docs/UserGuide.tex new file mode 100644 index 0000000000..5f46b0861a --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/UserGuide.tex @@ -0,0 +1,9 @@ +\documentstyle[12pt,fleqn,a4,pphs]{report} +\begin{document} + +\def\sect{\section*} +\def\subsect{\subsection*} + +\input{UserGuide_Text} + +\end{document} diff --git a/ghc/CONTRIB/pphs/docs/UserGuide_Text.tex b/ghc/CONTRIB/pphs/docs/UserGuide_Text.tex new file mode 100644 index 0000000000..5dc6999ce4 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/UserGuide_Text.tex @@ -0,0 +1,231 @@ +\sect{User guide to {\tt pphs}} + +The program {\tt pphs} typesets programs in the Haskell programming +language for use with the \LaTeX\ intensional text formatting +and typesetting system. It takes +as input a file containing a Haskell program and produces \LaTeX\ +code to {\tt stdout}. There are various different features of this +process. + +\subsect{Left indentation} + +It is in the nature of Haskell programs that indentation is heavily used. As the +indentation is vital to the parsing of the program, any attempt at typesetting +Haskell code must replicate this indentation. Take, for example, the following piece of code. +\begin{quote} +\input{Haskell_leftindent2} +\end{quote} +Note how the third, fifth and sixth lines start at different levels of indentation. +The {\tt pphs} program produces the correct \LaTeX\ code to align these under the +correct position in the preceding lines once typeset. It also selects the correct +line to line up under. Note how the sixth line does not line up +under its predecessor, but under the fourth line. +The code necessary to typeset this is produced, preserving the parsing +order. Once typeset, it will look like this: +\begin{quote} +\input{LaTeX_leftindent2} +\end{quote} +Note that this +example of possible input had no `extra' typesetting commands. + +A line of Haskell code may be indented beyond the end of its predecessor. +Here, {\tt pphs} aligns it with whichever line it is lined up underneath in the +original file, or, if longer than any preceding line, inserts space to correspond +to that in the input file. + +\subsect{Internal alignment} + +Another form of alignment used in Haskell is {\em internal alignment}. This is where +there is vertical alignment of columns other than at the left-hand edge of the +Haskell code. This is typically characterised with a column of the same character +appearing in the program code, and it is this case, along with a +special case, that {\tt pphs} recognises for internal alignment having occurred. +\begin{quote} +\input{Haskell_internalalign1} +\end{quote} +In this example, see how the {\tt =} signs line up, one below the other. This makes +the program more readable, although it does not affect the parsing of the program. +As the purpose of {\tt pphs} is to make Haskell programs even more readable, it +retains this alignment. This example would be typeset to produce the following: +\begin{quote} +\input{LaTeX_internalalign1} +\end{quote} +The special case for internal alignment is a $=$ aligned under a $::$. +This will cause the same effect as would have happened if they were the same +character. + +\subsect{Token highlighting} + +To increase the readability of Haskell programs, {\tt pphs} allows various tokens +to be highlighted. By using different typefaces for some pieces of code, this +distinguishes them from the rest. The user can specify the details of +the highlighting, but the default settings are {\bf bold} for +keywords, {\it italics} for identifiers and {\rm roman} for everything else. +Strings, comments and numbers are also highlightable. + +Note that in the previous example, the keywords {\bf instance} and {\bf where} +are highlighted in bold, whereas the various identifiers are in italics. + +\subsect{Mathematical symbols} + +Rather than simply replicate the ASCII approximations of mathematical symbols +used in Haskell, {\tt pphs} +substitutes the proper symbols in the output. These are shown below. +\begin{center} +\begin{tabular}[t]{|c|c|} \hline +{\em Haskell\/} & {\em Math\/} \\ \hline +{\tt *} & $\times$ \\ +{\tt ++} & {\hbox{$+\mkern-7.5mu+$}} \\ +{\tt :+} & {:}{+} \\ +{\tt <=} & $\leq$ \\ \hline +\end{tabular} \hskip3mm \begin{tabular}[t]{|c|c|} \hline +{\em Haskell\/} & {\em Math\/} \\ \hline +{\tt >=} & $\geq$ \\ +{\tt <-} & $\leftarrow$ \\ +{\tt ->} & $\rightarrow$ \\ +{\tt =>} & $\Rightarrow$ \\ \hline +\end{tabular} +\end{center} + +\subsect{\LaTeX\ typesetting characters} + +\LaTeX\ uses embedded typesetting commands, so {\tt pphs} has to ensure that if +any of the characters used by \LaTeX\ appear in the input Haskell code, the correct +\LaTeX\ code is outputted to typeset them, rather than have the characters interfere +with the typesetting process. The characters used by \LaTeX\ for typesetting are: +\begin{quote} +\(\#\ \$\ \%\ \&\ \char'176\ \_\ \char'136\ \hbox{$\setminus$}\ \hbox{$\cal \char'146\ \char'147$}\) +\end{quote} +The user of {\tt pphs} need not worry about using any of these characters in Haskell +programs, as this will be dealt with by {\tt pphs} before \LaTeX\ gets to see the code. + +\subsect{How to call it} + +The program is called by typing {\tt pphs} followed by the name of +the file containing the Haskell program to be typeset. If the +filename ends with a {\tt .hs} extension, this may be omitted, +unless another file exists with the same name but no extension. +When no extension is specified, the program will look for a +filename with no extension before looking for a file with the +{\tt .hs} extension. + +For example, if the Haskell program was in a file called {\tt Haskell.hs}, +the program would be called by +\begin{quote} +\tt pphs Haskell.hs +\end{quote} +As the filename ends with a {\tt .hs} extension, the extension may be omitted, provided +there is no file already existing called {\tt Haskell}. If there is no such file +\begin{quote} +\tt pphs Haskell +\end{quote} +would produce the same effect as the original call. + +As the program outputs to {\tt stdout}, the code produced may be +directed to a file by using a {\tt >} symbol after the call, followed by +the name of the file to contain the \LaTeX\ code produced by the +program. Continuing the above example, if the output code is to be in +a file called {\tt Haskell.tex}, the call would now be +\begin{quote} +\tt pphs Haskell.hs > Haskell.tex +\end{quote} +It must be noted that if the file {\tt Haskell.tex} already exists, it must be +renamed or removed before making this call. + +There are three options that can be specified in the program call. +If it is desired that double colon symbols should look like $:\,:$ rather than $::$, +use {\tt -w} in the call. The length of the tab characters in the input file can +be specified with {\tt -t} followed by the length. The default tablength is 8. +If identifiers with subscripts are wanted, eg {\iden ident$_1$\/}, then use {\tt -s}. +These are written in the Haskell file as {\tt ident\_1}. + +If the length of the tabs are 4 and +the wide double colons are required, the example call above would become as follows. +\begin{quote} +\tt pphs -t4w Haskell.hs > Haskell.tex +\end{quote} + +\subsect{What to do with the produced code} + +Before including the \LaTeX\ code in the document, it is necessary +to include definitions of the \LaTeX\ commands used by {\tt pphs}. +This can be done simply by including the style file {\tt pphs.sty} +by adding {\tt pphs} to the option list of the documentstyle +command like thus: +\begin{quote} +\begin{verbatim} +\documentstyle[12pt,a4,pphs]{article} +\end{verbatim} +\end{quote} + +Once this has been done, the file containing the \LaTeX\ code +of the Haskell program code can be included. This is done +using the {\tt \char'134 input} command. If the \LaTeX\ +code is located in a file called {\tt Haskell.tex} then the +command is: +\begin{quote} +\begin{verbatim} +\input{Haskell} +\end{verbatim} +\end{quote} +This can be used in various \LaTeX\ environments such as {\tt quote}, +{\tt figure} or {\tt table} to produce different effects. An example +of possible code is: +\begin{quote} +\begin{verbatim} +\begin{quote} +\input{Haskell} +\end{quote} +\end{verbatim} +\end{quote} +See Lamport, L., {\em \LaTeX : A Document Preparation System\/} +(Addison-Wesley, 1986) for more details. + +\subsect{How to make adjustments to the output} + +The {\tt pphs} program is flexible in that it allows user choice on some aspects +of the appearance of the final result. User choice is allowed in two areas, typefaces +and qoute marks. + +The default settings for typefaces are bold for keywords, italics for identifiers and +roman for everything else that is not in the math typeface. However, keywords, identifiers, +strings, comments and numbers may be in whatever typeface the user chooses. +This is done using the {\tt \char'134 def} command to redefine the typeface commands +used by {\tt pphs}. These are {\tt \char'134 keyword}, {\tt \char'134 iden}, +{\tt \char'134 stri}, {\tt \char'134 com} and {\tt \char'134 numb} respectively. + +For example, to put all comments into typewriter font, use +{\tt \char'134 def\char'134 com\char'173 \char'134 tt\char'175} in +the document. The scope of the declaration will be from the point of introduction to +the end of the document. To cancel a redefinition, use {\tt \char'134 def} to +redefine it back to what it was originally. The different typefaces available in \LaTeX\ are +\begin{center} +\begin{tabular}{|c|l|} \hline +{\em code\/} & {\em meaning\/} \\ \hline +{\tt \char'134 bf} & {\bf Boldface} \\ +{\tt \char'134 em} & {\em Emphatic\/} \\ +{\tt \char'134 it} & {\it Italics\/} \\ +{\tt \char'134 rm} & {\rm Roman} \\ \hline +\end{tabular} \hskip3mm \begin{tabular}{|c|l|} \hline +{\em code\/} & {\em meaning\/} \\ \hline +{\tt \char'134 sc} & {\sc Small Caps} \\ +{\tt \char'134 sf} & {\sf Sans Serif} \\ +{\tt \char'134 sl} & {\sl Slanted\/} \\ +{\tt \char'134 tt} & {\tt Typewriter} \\ \hline +\end{tabular} +\end{center} +It should be noted that the emphatic typeface is just the same as italics, although +nesting emphatic sections will alternate between italics and roman. + +Two types of quote mark are redefinable, forwards quotes and escape quotes. +The default for both of them is ' but if it is wished to redefine one or +both of them, use the {\tt \char'134 def} with either {\tt \char'134 forquo} +or {\tt \char'134 escquo}. For example, to make escape quotes be +printed as {\sf '} use {\tt \char'134 def\char'134 escquo\char'173 \char'134 hbox\char'173 \char'134 sf '\char'175 \char'175} in the document. + +\subsect{Altering the output} + +As {\tt pphs} produces code which is subsequently run through \LaTeX , it is possible +to alter the code before it is run through \LaTeX . This is useful for correcting +mistakes made by {\tt pphs}. However, it is recommended that only those experienced +in \LaTeX\ try this.
\ No newline at end of file diff --git a/ghc/CONTRIB/pphs/docs/User_Documents.tex b/ghc/CONTRIB/pphs/docs/User_Documents.tex new file mode 100644 index 0000000000..0680e62daf --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/User_Documents.tex @@ -0,0 +1,5 @@ +\chapter{User documentation} + +This document is intended to be read by users of {\tt pphs}. + +\input{UserGuide_Text} diff --git a/ghc/CONTRIB/pphs/docs/Uses.tex b/ghc/CONTRIB/pphs/docs/Uses.tex new file mode 100644 index 0000000000..c488bb4263 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/Uses.tex @@ -0,0 +1,262 @@ +\chapter{Uses for output} + +This chapter describes how the output from {\tt pphs} can be used. First, +examples of the capabilities of {\tt pphs} are shown, then it is explained how +the output is incorporated into \LaTeX\ documents, and how the user can alter +the output using built in methods or by editing the output. + +\section{Examples of output} \label{examples} + +Up until now, only examples of input have been shown. Let us now see what +{\tt pphs} actually does to this input. Take this earlier example. +\begin{quote} +\input{Haskell_leftindent2} +\end{quote} +This is how this code is typeset by {\tt pphs}. +\begin{quote} +\input{LaTeX_leftindent2} +\end{quote} +Probably the most obvious thing about the typeset code is the highlighting +of the identifiers. The reserved identifier or keyword {\keyword where} has been +highlighted in boldface while all the other identifiers are in italics. +The various symbols are in roman or math font as appropriate, these do not +get put in italics. Less obvious is the indentation. Notice how the starts +of the third, fourth and sixth lines all line up under {\iden state\/} on the +second line, just like they do in the input. Similarly, the start of the fifth +line is under the $|$ on the fourth. This demonstrates {\tt pphs}'s ability to +recreate left indentation in \LaTeX. But note how the $=$ on the sixth line does +not align under the $|$ on the fifth as it does in the input. This is because +they are different characters and so {\tt pphs} does not recognise this as internal +alignment. The only special case made in this part of the program was for $::$ and $=$. +Alignment would have occurred by coincidence had the preceding characters on both lines +been of the same width. + +To illustrate internal alignment, recall this earlier example. +\begin{quote} +\input{Haskell_internalalign1} +\end{quote} +This code gets typeset like this. +\begin{quote} +\input{LaTeX_internalalign1} +\end{quote} +Notice here how the $=$ signs are aligned in a column, despite being preceded +be characters that may be of different widths. This demonstrates the ability of +{\tt pphs} to recreate internal alignment. Notice also how the {\tt '} signs +have been interpreted as primes. This is because they are immediately preceded +by identifiers. The {\tt *} signs have been transformed into multiplication signs, +while the {\tt =>} has been replaced with $\Rightarrow$. + +Here is a new example, this time illustrating a comment and strings. +\begin{quote} +\input{Haskell_string1} +\end{quote} +This example gets typeset as follows. +\begin{quote} +\input{LaTeX_string1} +\end{quote} +Note how {\tt pphs} puts the correct inverted commas at each end of the strings +and how the strings themselves and the comment are in roman typeface. +The $=$ signs show internal alignment. + +This next example demonstrates a comment, character quotes and the special case +with internal alignment where {\tt =} are aligned under {\tt ::}. +\begin{quote} +\input{Haskell_char} +\end{quote} +Typeset, this becomes +\begin{quote} +\input{LaTeX_char} +\end{quote} +The comment is typeset in roman, as are the character quotes. This example has +the default double colon. Using the {\tt -w} option, the colons can be positioned +further apart as illustrated below. +\begin{quote} +\input{LaTeX_wide-colons} +\end{quote} +It is a matter of taste which is used. + +\section{Incorporating output into \LaTeX\ documents} + +The motivation behind typesetting Haskell programs was so they could be incorporated +into \LaTeX\ documents. This section describes how to do this with the output +of {\tt pphs}. + +\subsection{The style file} \label{style-file} + +Before using the output generated by {\tt pphs}, it is necessary to incorporate the +{\tt pphs.sty} style file (see Appendix~\ref{style-code}) into the document. +This provides definitions of the non-standard +commands produced by the program. The use of the style file is announced +by adding {\tt pphs} to the option list of the documentstyle +command like thus: +\begin{quote} +\begin{verbatim} +\documentstyle[12pt,a4,pphs]{article} +\end{verbatim} +\end{quote} +Without {\tt pphs} in the option list, errors will occur when \LaTeX\ is run, +unless all the non-standard commands used by {\tt pphs} have been defined elsewhere +in the document. + +\subsection{Including the output file} + +To include the file containing the code output by {\tt pphs}, the \LaTeX\ +{\tt \char'134 input} command is used. If the file containing the output is called +{\tt output.tex} then the following command is used. +\begin{quote} +\begin{verbatim} +\input{output} +\end{verbatim} +\end{quote} +The code will appear at the left margin like this: +\input{LaTeX_simple} +This is useful for code listings. + +By using various different \LaTeX\ environments, the typeset Haskell code +can be arranged differently. +To have the code indented like the examples in Section~\ref{examples}, the +{\tt quote} environment should be used. The code +\begin{quote} +\begin{verbatim} +\begin{quote} +\input{output} +\end{quote} +\end{verbatim} +\end{quote} +would produce +\begin{quote} +\input{LaTeX_simple} +\end{quote} +The {\tt table} environment can be used to put the typeset Haskell code +into a table and also allows the code to be captioned. +The table will appear at the top of the current or next page depending on what +space is available in the document. The \LaTeX\ code used to produce this is +\begin{quote} +\begin{verbatim} +\begin{table} +\begin{center} +\begin{minipage}{5cm} +\input{output} +\end{minipage} +\end{center} +\caption{Typeset code in a table} \label{output-table} +\end{table} +\end{verbatim} +\end{quote} +and this will produce a table, in this case Table~\ref{simple-table}. +The {\tt minipage} environment is required because \LaTeX\ interprets the {\tt tabbing} +environment as occupying the full page width, even if the text doesn't actually +use all that space. The width argument, here {\tt 5cm}, is set to the width of the typeset +Haskell code. If centering is not required, omit the {\tt center} and +{\tt minipage} environments. +The table can be referenced if it is labelled with the {\tt \char'134 label} +command, as above, and can be referred to in the text by using the code +{\tt Table\char'176 \char'134 ref\char'173 output-table\char'175} which will +keep the table number consistent with the numbering of the chapter and other tables. +\begin{table} +\begin{center} +\begin{minipage}{5cm} +\input{LaTeX_simple} +\end{minipage} +\end{center} +\caption{Typeset code in a table} \label{simple-table} +\end{table} +Similarly, the {\tt figure} environment can be used. The code is +\begin{quote} +\begin{verbatim} +\begin{figure} +\begin{center} +\begin{minipage}{5cm} +\input{output} +\end{minipage} +\end{center} +\caption{Typeset code in a figure} \label{output-figure} +\end{figure} +\end{verbatim} +\end{quote} +which produces a figure, in this case Figure~\ref{simple-figure}. +Again, it can be captioned and referenced, as with tables. +\begin{figure} +\begin{center} +\begin{minipage}{5cm} +\input{LaTeX_simple} +\end{minipage} +\end{center} +\caption{Typeset code in a figure} \label{simple-figure} +\end{figure} + +The result, once included in the final document, may have too +much blank space under the typeset code such as is the case in +this next example. +\begin{quote} +\input{LaTeX_blankline} +\end{quote} +This means there were extra blank lines at the end of the input file, caused +by extra return characters. This can be +rectified by removing the extra return characters and running {\tt pphs} again. + +\subsection{Lengthy lines} + +It is always possible that the lines of typeset Haskell code will run off +the right-hand edge of the user's page in the final document. Where this happens, +it is necessary to edit the input file and re-run {\tt pphs}. Be careful not to +change the parse of the program by wrongly indenting the second part of the line. + +\section{User adjustments} \label{user-adj} + +The user is able to have some say on what the output looks like. +This makes the program more flexible and doesn't dictate what a +Haskell program should look like when typeset. There are two areas in which user +choice is allowed, other than the double colon symbol described in Chapter~\ref{wide-colons}. + +\subsection{Typefaces} + +The default settings for typefaces are bold for keywords, italics for identifiers and +roman for everything else that is not in the math typeface. However, keywords, identifiers, +strings, comments and numbers may be in whatever typeface the user chooses. +This is done using the {\tt \char'134 def} command to redefine the typeface commands +used by {\tt pphs}. These are {\tt \char'134 keyword}, {\tt \char'134 iden}, +{\tt \char'134 stri}, {\tt \char'134 com} and {\tt \char'134 numb} respectively. + +For example, to put all comments into typewriter font, use +{\tt \char'134 def\char'134 com\char'173 \char'134 tt\char'175} in +the document. The scope of the declaration will be from the point of introduction to +the end of the document. To cancel a redefinition, use {\tt \char'134 def} to +redefine it back to what it was originally. + +The different typefaces available in \LaTeX\ are shown in Table~\ref{fonts}. +It should be noted that the emphatic typeface is just the same as italics, although +nesting emphatic sections will alternate between italics and roman. +\begin{table} +\begin{center} +\begin{tabular}{|c|l|} \hline +{\em code\/} & {\em meaning\/} \\ \hline +{\tt \char'134 bf} & {\bf Boldface} \\ +{\tt \char'134 em} & {\em Emphatic\/} \\ +{\tt \char'134 it} & {\it Italics\/} \\ +{\tt \char'134 rm} & {\rm Roman} \\ \hline +\end{tabular} \hskip3mm \begin{tabular}{|c|l|} \hline +{\em code\/} & {\em meaning\/} \\ \hline +{\tt \char'134 sc} & {\sc Small Caps} \\ +{\tt \char'134 sf} & {\sf Sans Serif} \\ +{\tt \char'134 sl} & {\sl Slanted\/} \\ +{\tt \char'134 tt} & {\tt Typewriter} \\ \hline +\end{tabular} +\end{center} +\caption{Typefaces available in \LaTeX } \label{fonts} +\end{table} + +\subsection{Quote marks} + +Two types of quote mark are redefinable, forwards quotes and escape quotes. +The default for both of them is ' but if it is wished to redefine one or +both of them, use the {\tt \char'134 def} with either {\tt \char'134 forquo} +or {\tt \char'134 escquo}. For example, to make escape quotes be +printed as {\sf '} use {\tt \char'134 def\char'134 escquo\char'173 \char'134 hbox\char'173 \char'134 sf '\char'175 \char'175} in the document. + +\section{Altering the output} + +As {\tt pphs} produces code which is subsequently run through \LaTeX , it is possible +to alter the code before it is run through \LaTeX . This is useful for correcting +mistakes made by {\tt pphs}. However, it is recommended that only those experienced +in \LaTeX\ try this.
\ No newline at end of file diff --git a/ghc/CONTRIB/pphs/docs/What.tex b/ghc/CONTRIB/pphs/docs/What.tex new file mode 100644 index 0000000000..741c822fa2 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/What.tex @@ -0,0 +1,136 @@ +\chapter{What {\tt pphs} does} + +This chapter describes a program called {\tt pphs} which implements the typesetting +requirements described in the previous chapter. The description is from the user's viewpoint, +later chapters going on to describe it from that of the programmer. + +The {\tt pphs} program typesets Haskell programs for use with the \LaTeX\ +typesetting program. It takes as input a file containing a Haskell +program and produces the Haskell code to {\tt stdout}. It is called by +typing {\tt pphs}, followed by the name of the file containing the Haskell +program. For example, if the Haskell program was in a file called {\tt Haskell.hs}, +the program would be called by +\begin{quote} +\tt pphs Haskell.hs +\end{quote} + +If the filename ends with a {\tt .hs} extension, the extension may be omitted, provided +there is no file already existing with the same name but with no extension. If no +extension is given with the filename when called, the program will look for a file of +that name with no extension. If this is not found, the program will add a {\tt .hs} +extension. The above example, therefore, may be simplified to +\begin{quote} +\tt pphs Haskell +\end{quote} +unless the file {\tt Haskell} exists, in which case the original call must be made. + +As the output of {\tt pphs} is to {\tt stdout}, it may be directed to a file by using +the {\tt >} command after the call, followed by the name of the file to contain +the \LaTeX\ code. Continuing the above example, if the output code is to be put into +a file called {\tt Haskell.tex}, the call would now be +\begin{quote} +\tt pphs Haskell.hs > Haskell.tex +\end{quote} +It must be noted that if the file {\tt Haskell.tex} already exists, it should be +renamed or removed before making this call. + +Two options are allowed with the call. In the output, some people prefer \label{wide-colons} +the {\tt ::} symbol to be written $:\,:$ rather than $::$. To obtain the former, use +{\tt -w} for wide colons. A call on {\tt Haskell.hs} requiring wide colons would be +\begin{quote} +\tt pphs -w Haskell.hs +\end{quote} +When the input file's tab characters are not of the standard 8 spaces, this can be +specified with the {\tt -t} command. For example, if the tabs were 4 spaces long, type +\begin{quote} +\tt pphs -t4 Haskell.hs +\end{quote} +Both options can be used at the same time by calling +\begin{quote} +\tt pphs -t4w Haskell.hs +\end{quote} +or +\begin{quote} +\tt pphs -wt4 Haskell.hs +\end{quote} +Any positive integer can be specified for the tablength. + +\section{Left indentation} + +It is in the nature of Haskell programs that indentation is heavily used. As the +indentation is vital to the parsing of the program, any attempt at typesetting +Haskell code must replicate this indentation. Take, for example, the following piece of code. +\begin{quote} +\input{Haskell_leftindent1} +\end{quote} +Note how the third and fourth lines both start at different levels of indentation. +The {\tt pphs} program produces the correct \LaTeX\ code to align these under the +correct position in the preceding lines once typeset. It also selects the correct +line to line up under. Note how, in the following example, the sixth line does not line up +under its predecessor, but under the fourth line. +\begin{quote} +\input{Haskell_leftindent2} +\end{quote} +Again, {\tt pphs} produces the code necessary to typeset this, preserving the parsing +order. A line of Haskell code may be indented beyond the end of its predecessor. +Here, {\tt pphs} aligns it with whichever line it is lined up underneath in the +original file. Note that these +examples of possible input have no `extra' typesetting commands. + +\section{Internal alignment} + +Another form of alignment used in Haskell is {\em internal alignment}. This is where +there is vertical alignment of columns other than at the left-hand edge of the +Haskell code. +\begin{quote} +\input{Haskell_internalalign1} +\end{quote} +In this example, see how the {\tt =} signs line up, one below the other. This makes +the program more readable, although it does not affect the parsing of the program. +As the purpose of {\tt pphs} is to make Haskell programs even more readable, it +retains this alignment. + +\section{Token highlighting} + +To increase the readability of Haskell programs, {\tt pphs} allows various tokens +to be highlighted. By using different typefaces for some pieces of code, this +distinguishes them from the rest. The user can specify the details of the highlighting as +described in Section~\ref{user-adj}, but the default settings are {\bf bold} for +keywords, {\it italics} for identifiers and {\rm roman} for everything else. Strings, +comments and numbers are also highlightable (see Section~\ref{user-adj}). + +\section{Mathematical symbols} + +Rather than simply replicate the ASCII approximations of mathematical symbols +used in Haskell, {\tt pphs} +substitutes the proper symbols in the output. These are shown in Table~\ref{maths-sym}. +\begin{table} +\begin{center} +\begin{tabular}[t]{|c|c|} \hline +{\em Haskell\/} & {\em Math\/} \\ \hline +{\tt *} & $\times$ \\ +{\tt ++} & {\hbox{$+\mkern-7.5mu+$}} \\ +{\tt :+} & {:}{+} \\ +{\tt <=} & $\leq$ \\ \hline +\end{tabular} \hskip3mm \begin{tabular}[t]{|c|c|} \hline +{\em Haskell\/} & {\em Math\/} \\ \hline +{\tt >=} & $\geq$ \\ +{\tt <-} & $\leftarrow$ \\ +{\tt ->} & $\rightarrow$ \\ +{\tt =>} & $\Rightarrow$ \\ \hline +\end{tabular} +\end{center} +\caption{Haskell ASCII approximations to mathematical characters} \label{maths-sym} +\end{table} + +\section{\LaTeX\ typesetting characters} + +\LaTeX\ uses embedded typesetting commands, so {\tt pphs} has to ensure that if +any of the characters used by \LaTeX\ appear in the input Haskell code, the correct +\LaTeX\ code is outputted to typeset them, rather than have the characters interfere +with the typesetting process. The characters used by \LaTeX\ for typesetting are: +\begin{quote} +\(\#\ \$\ \%\ \&\ \char'176\ \_\ \char'136\ \hbox{$\setminus$}\ \hbox{$\cal \char'146\ \char'147$}\) +\end{quote} +The user of {\tt pphs} need not worry about using any of these characters in Haskell +programs, as this will be dealt with by {\tt pphs} before \LaTeX\ gets to see the code.
\ No newline at end of file diff --git a/ghc/CONTRIB/pphs/docs/Wrapper.tex b/ghc/CONTRIB/pphs/docs/Wrapper.tex new file mode 100644 index 0000000000..c780cd8be6 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/Wrapper.tex @@ -0,0 +1,6 @@ +\documentstyle[12pt,fleqn,a4,pphs]{article} +\begin{document} + +\input{Haskell} + +\end{document} diff --git a/ghc/CONTRIB/pphs/docs/char.hs b/ghc/CONTRIB/pphs/docs/char.hs new file mode 100644 index 0000000000..0aa661eab7 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/char.hs @@ -0,0 +1,5 @@ +-- Character functions + +minChar, maxChar :: Char +minChar = '\0' +maxChar = '\255'
\ No newline at end of file diff --git a/ghc/CONTRIB/pphs/docs/comment.hs b/ghc/CONTRIB/pphs/docs/comment.hs new file mode 100644 index 0000000000..694cc4aa2c --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/comment.hs @@ -0,0 +1 @@ +-- note that x + y = z
\ No newline at end of file diff --git a/ghc/CONTRIB/pphs/docs/internalalign1.hs b/ghc/CONTRIB/pphs/docs/internalalign1.hs new file mode 100644 index 0000000000..dad2f142b0 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/internalalign1.hs @@ -0,0 +1,9 @@ +instance (RealFloat a) => Num (Complex a) where + (x:+y) + (x':+y') = (x+x') :+ (y+y') + (x:+y) - (x':+y') = (x-x') :+ (y-y') + (x:+y) * (x':+y') = (x*x'-y*y') :+ (x*y'+y*x') + negate (x:+y) = negate x :+ negate y + abs z = magnitude z :+ 0 + signum 0 = 0 + signum z@(x:+y) = x/r :+ y/r where r = magnitude z + fromInteger n = fromInteger n :+ 0 diff --git a/ghc/CONTRIB/pphs/docs/leftindent1.hs b/ghc/CONTRIB/pphs/docs/leftindent1.hs new file mode 100644 index 0000000000..43a7cf44ed --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/leftindent1.hs @@ -0,0 +1,4 @@ +gcd :: Int -> Int -> Int +gcd x y = gcd' (abs x) (abs y) + where gcd' x 0 = x + gcd' x y = gcd' y (x `rem` y) diff --git a/ghc/CONTRIB/pphs/docs/leftindent2.hs b/ghc/CONTRIB/pphs/docs/leftindent2.hs new file mode 100644 index 0000000000..9d9fcd07c1 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/leftindent2.hs @@ -0,0 +1,6 @@ +eval :: GmState -> [GmState] +eval state = state: restStates + where + restStates | gmFinal state = [] + | otherwise = eval nextState + nextState = doAdmin (step state) diff --git a/ghc/CONTRIB/pphs/docs/math.hs b/ghc/CONTRIB/pphs/docs/math.hs new file mode 100644 index 0000000000..4906527797 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/math.hs @@ -0,0 +1,3 @@ +-- list concatenation (right-associative) +(++) :: [a] -> [a] -> [a] +xs ++ ys = foldr (:) ys xs diff --git a/ghc/CONTRIB/pphs/docs/pphs.sty b/ghc/CONTRIB/pphs/docs/pphs.sty new file mode 100644 index 0000000000..298a58ea78 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/pphs.sty @@ -0,0 +1,26 @@ +% ========================================= +% Definitions for use with the pphs program +% ========================================= + +\typeout{For use with the pphs program} + +% Definitions of commands used by pphs + +\newbox\foo +\def\skipover#1{\setbox\foo\hbox{#1}\hskip\wd\foo} +\def\plusplus{\hbox{$+\mkern-7.5mu+$}} +\def\xspa#1{\hskip#1ex} +\def\bareq{\setbox\foo\hbox{$=$}\makebox[\wd\foo]{$|$}} + +% User-redefinable commands - typefaces + +\def\keyword{\bf} +\def\iden{\it} +\def\stri{\rm} +\def\com{\rm} +\def\numb{\rm} + +% User-redefinable commands - quote marks + +\def\forquo{\hbox{\rm '}} +\def\escquo{\hbox{\rm '}} diff --git a/ghc/CONTRIB/pphs/docs/rep.sty b/ghc/CONTRIB/pphs/docs/rep.sty new file mode 100644 index 0000000000..bb4242d7a4 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/rep.sty @@ -0,0 +1,80 @@ +% ===================================================================== +% A4 layout file for documents with big left margins - for folders. +% ===================================================================== + +\typeout{A4 with big left margin document layout} + +% --------------------------------------------------------------------- +% make "@" a letter +% --------------------------------------------------------------------- +\makeatletter + +% --------------------------------------------------------------------- +% PAPER SIZE +% +% TeX expects 1 inch margins all around (1 inch = 25.4 mm). +% a4 is exactly 297mm high by 208mm wide. +% --------------------------------------------------------------------- + +\hsize=157.2truemm +\vsize=246.2truemm + +% --------------------------------------------------------------------- +% PAGE LAYOUT +% +% text size = 144.5mm wide by 231.1mm high +% +% Top Margin: 1in +% Left margin: 1.5in +% Right Margin: 1in +% --------------------------------------------------------------------- + +\textwidth 144.5truemm +\textheight 231.1truemm + +\oddsidemargin=12.7truemm +\evensidemargin=0truemm +\topmargin=0truemm + +% --------------------------------------------------------------------- +% RUNNING HEAD: none +% --------------------------------------------------------------------- +\headheight 0mm +\headsep 0mm + +% --------------------------------------------------------------------- +% FOOT: page number and other information. +% --------------------------------------------------------------------- +\footheight 12pt +\footskip 18truemm +\addtolength{\footskip}{\footheight} + +% --------------------------------------------------------------------- +% INDENTATION +% +% 5mm indentation +% --------------------------------------------------------------------- +\parindent 5truemm + +% --------------------------------------------------------------------- +% math indentation. +% --------------------------------------------------------------------- +\mathindent 10.0truemm + +% --------------------------------------------------------------------- +% FOOTNOTES +% +% Footnotes are in 10 point font. +% +% put 12+1-1 points between text and rule +% put 10pt between at start of footnote +% foot note rule 40mm long +% --------------------------------------------------------------------- +\skip\footins 12pt plus 2pt minus 2pt +\footnotesep 10pt +\def\footnoterule{\kern-3\p@ \hrule width 40mm \kern 2.6\p@} + +% --------------------------------------------------------------------- +% make "@" an other +% --------------------------------------------------------------------- +\makeatother diff --git a/ghc/CONTRIB/pphs/docs/simple.hs b/ghc/CONTRIB/pphs/docs/simple.hs new file mode 100644 index 0000000000..b31d0232b6 --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/simple.hs @@ -0,0 +1,3 @@ +foobar a b = c + where + c = a + b diff --git a/ghc/CONTRIB/pphs/docs/string1.hs b/ghc/CONTRIB/pphs/docs/string1.hs new file mode 100644 index 0000000000..437573222c --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/string1.hs @@ -0,0 +1,6 @@ +-- File and channel names: + +stdin = "stdin" +stdout = "stdout" +stderr = "stderr" +stdecho = "stdecho" diff --git a/ghc/CONTRIB/pphs/docs/string2.hs b/ghc/CONTRIB/pphs/docs/string2.hs new file mode 100644 index 0000000000..c3a063756b --- /dev/null +++ b/ghc/CONTRIB/pphs/docs/string2.hs @@ -0,0 +1,8 @@ +main = appendChan stdout "please type a filename\n" exit ( + readChan stdin exit (\ userInput -> + let (name : _) = lines userInput in + appendChan stdout name exit ( + readFile name (\ ioerror -> appendChan stdout + "can't open file" exit done) + (\ contents -> + appendChan stdout contents exit done)))) diff --git a/ghc/CONTRIB/pphs/pphs.c b/ghc/CONTRIB/pphs/pphs.c new file mode 100644 index 0000000000..aa31a3e7bd --- /dev/null +++ b/ghc/CONTRIB/pphs/pphs.c @@ -0,0 +1,1030 @@ + /* pphs - a pretty printer for Haskell code */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#define MAXLINELENGTH 256 + +enum face {KW, ID, IS, SU, ST, CO, NU, MA, SP, LC, RC, CR, BF, FQ, EQ, DQ, QD, EE, DC, DP, CP, LE, GE, LA, RA, RR, TI, BE}; + /* Possible values of typeface */ + +int widecolons = 0; /* User may want space between double colons */ +int subscripts = 0; /* User may want subscripts after '_' in identifiers */ +int tablength = 8; /* User's input file tablength */ + +typedef struct ElementType_Tag { /* Basic storage unit */ + char chars[MAXLINELENGTH]; /* Characters */ + enum face typeface[MAXLINELENGTH]; /* Typefaces */ + int indentation, length, col; /* Indentation level, non-empty length, column level */ +} ElementType; + +typedef struct StackNodeType_Tag *Link; /* Stack-related types */ +typedef struct StackNodeType_Tag { + ElementType Element; /* Stack item */ + Link Next; /* Link to next node */ +} StackNodeType; +typedef StackNodeType *StackNodePtr; +typedef StackNodePtr StackType; + +typedef int QueueSizeType; /* Queue-related types */ +typedef struct QueueNodeType_Tag *Connection; +typedef struct QueueNodeType_Tag { + ElementType Element; /* Queue item */ + Connection Next; /* Link to next node */ +} QueueNodeType; +typedef QueueNodeType *QueueNodePtr; +typedef struct QueueType_Tag { + QueueNodePtr Front, Rear; + QueueSizeType Length; +} QueueType; + +FILE *ifptr; /* input file pointer */ + + /* * * STACK FUNCTIONS * * */ +StackType + CreateStack() /* Returns an empty stack */ +{ + return(NULL); +} + +int + IsEmptyStack(s) /* Returns 1 if s is empty, 0 otherwise */ +StackType s; +{ + return(s == NULL); +} + +StackType + Push(s, x) /* Returns stack with x pushed onto s */ +StackType s; +ElementType x; +{ + StackType p; + + p = (StackNodeType *) malloc(sizeof(StackNodeType)); + if (p == NULL) { + fprintf(stderr, "pphs: Stack is too big\n"); + exit(3); + } + else { + (*p).Element = x; + (*p).Next = s; + return(p); + } +} + +ElementType + Top(s) /* Returns value of top element in s */ +StackType s; +{ + return((*s).Element); +} + +StackType + Pop(s) /* Returns stack with top element of s popped off */ +StackType s; +{ + StackType t; + + t = (*s).Next; + free(s); + return(t); +} + +StackType + PopSym(s) /* Returns stack with top element of s popped off without freeing */ +StackType s; +{ + StackType t; + + t = (*s).Next; +/* free(s); As PopSym is called within a function, free would free space needed later */ + return(t); +} + /* * * QUEUE FUNCTIONS * * */ +QueueType + CreateQueue() /* Returns an empty queue */ +{ + QueueType q; + + q.Front = NULL; + q.Rear = NULL; + q.Length = 0; + return(q); +} + +int + IsEmptyQueue(q) /* Returns 1 if q is empty, 0 otherwise */ +QueueType q; +{ + return(q.Front == NULL); +} + +int + LengthOfQueue(q) /* Returns length of q */ +QueueType q; +{ + return(q.Length); +} + +QueueNodePtr + FrontOfQueue(q) /* Returns pointer to front of q */ +QueueType q; +{ + return(q.Front); +} + +QueueNodePtr + RearOfQueue(q) /* Returns pointer to rear of q */ +QueueType q; +{ + return(q.Rear); +} + +QueueType + AddToQueue(q, x) /* Adds item x to rear of queue q */ +QueueType q; +ElementType x; +{ + QueueNodePtr p; + + p = (QueueNodeType *) malloc(sizeof(QueueNodeType)); + if (p == NULL) { + fprintf(stderr, "pphs: Queue is too big\n"); + exit(4); + } + else { + (*p).Element = x; + (*p).Next = NULL; + if (q.Front == NULL) + q.Front = p; + else + (*(q.Rear)).Next = p; + q.Rear = p; + q.Length++; + return(q); + } +} + +QueueType + TakeFromQueue(q) /* Removes front item from queue */ +QueueType q; +{ + QueueNodePtr p; + + if (q.Front == NULL) { + fprintf(stderr, "pphs: Stack underflow\n"); + exit(5); + } + else { + p = q.Front; + q.Front = (*(q.Front)).Next; + if (q.Front == NULL) + q.Rear = NULL; + q.Length--; + free(p); + return(q); + } +} + /* * * TYPEFACE FUNCTIONS * * */ +int + IsMathsChar(c) /* Returns 1 if c is a character to be in maths */ +char c; +{ + return((c == '[') || (c == ']') || (c == '/') || (c == ',') || (c == '!') + || (c == ':') || (c == ';') || (c == '(') || (c == ')') || (c == '&') + || (c == '#') || (c == '+') || (c == '-') || (c == '<') || (c == '>') + || (c == '{') || (c == '}') || (c == '=') || (c == '|') || (c == '\'') + || (c == '^')); +} + +ElementType + ChangeTypeface(store, length, finish, tf) /* Changes the typeface to tf in store + for length until finish */ +ElementType store; +int length, finish; +enum face tf; +{ + int counter; + + for (counter = (finish - length); counter < finish; counter++) + store.typeface[counter] = tf; + return(store); +} + +ElementType + CheckForDoubleChar(store, position) /* Checks for double character + in store.chars[position - 2..position - 1], + if found alters typeface */ +ElementType store; +int position; +{ + if ((position >= 2) && (store.typeface[position - 2] != DC)) { + if ((store.chars[position - 2] == '-') && (store.chars[position - 1] == '-')) { + store.typeface[position - 2] = LC; /* Haskell "--" line comment */ + store.typeface[position - 1] = LC; + } + else if ((store.chars[position - 2] == '{') && (store.chars[position - 1] == '-')) { + store.typeface[position - 2] = RC; /* Haskell "{-" regional comment begin */ + store.typeface[position - 1] = DC; + } + else if ((store.chars[position - 2] == '-') && (store.chars[position - 1] == '}')) { + store.typeface[position - 2] = CR; /* Haskell "-}" regional comment end */ + store.typeface[position - 1] = DC; + } + else if ((store.chars[position - 2] == '+') && (store.chars[position - 1] == '+')) { + store.typeface[position - 2] = DP; /* Double plus */ + store.typeface[position - 1] = DC; + } + else if ((store.chars[position - 2] == ':') && (store.chars[position - 1] == '+')) { + store.typeface[position - 2] = CP; /* Colon plus */ + store.typeface[position - 1] = DC; + } + else if ((store.chars[position - 2] == '<') && (store.chars[position - 1] == '=')) { + store.typeface[position - 2] = LE; /* Less than or equal to */ + store.typeface[position - 1] = DC; + } + else if ((store.chars[position - 2] == '>') && (store.chars[position - 1] == '=')) { + store.typeface[position - 2] = GE; /* Greater than or equal to */ + store.typeface[position - 1] = DC; + } + else if ((store.chars[position - 2] == '<') && (store.chars[position - 1] == '-')) { + store.typeface[position - 2] = LA; /* Leftarrow */ + store.typeface[position - 1] = DC; + } + else if ((store.chars[position - 2] == '-') && (store.chars[position - 1] == '>')) { + store.typeface[position - 2] = RA; /* Rightarrow */ + store.typeface[position - 1] = DC; + } + else if ((store.chars[position - 2] == '=') && (store.chars[position - 1] == '>')) { + store.typeface[position - 2] = RR; /* Double rightarrow */ + store.typeface[position - 1] = DC; + } + else if (((store.chars[position - 2] == '*') && (store.chars[position - 1] == '*')) + || ((store.chars[position - 2] == '^') && (store.chars[position - 1] == '^'))) { + store.typeface[position - 2] = MA; /* Exponent, ie not Times */ + store.typeface[position - 1] = MA; + } + } + return(store); +} + +int + IsHaskellPunc(c) /* Returns 1 if c is a punctuation mark not part of identifier */ +char c; +{ + return((c == ' ') || (c == ',') || (c == '@') || (c == '#') || (c == '$') + || (c == '%') || (c == '&') || (c == '*') || (c == '(') || (c == ')') + || (c == '-') || (c == '+') || (c == '=') || (c == '\\') || (c == '|') + || (c == '[') || (c == ']') || (c == '{') || (c == '}') || (c == ':') + || (c == ';') || (c == '"') || (c == '~') || (c == '?') || (c == '/') + || (c == '<') || (c == '>') || (c == '^')); +} + +int + IsKeyWord(str) /* Returns 1 if str is a keyword to be in keyword font */ +char str[MAXLINELENGTH]; +{ + return((!(strcmp(str, "case"))) || (!(strcmp(str, "class"))) + || (!(strcmp(str, "data"))) || (!(strcmp(str, "default"))) + || (!(strcmp(str, "deriving"))) || (!(strcmp(str, "else"))) + || (!(strcmp(str, "hiding"))) || (!(strcmp(str, "if"))) + || (!(strcmp(str, "import"))) || (!(strcmp(str, "in"))) + || (!(strcmp(str, "infix"))) || (!(strcmp(str, "infixl"))) + || (!(strcmp(str, "infixr"))) || (!(strcmp(str, "instance"))) + || (!(strcmp(str, "interface"))) || (!(strcmp(str, "let"))) + || (!(strcmp(str, "module"))) || (!(strcmp(str, "of"))) + || (!(strcmp(str, "renaming"))) || (!(strcmp(str, "then"))) + || (!(strcmp(str, "to"))) || (!(strcmp(str, "type"))) + || (!(strcmp(str, "where")))); +} + +int + KeyWord(c, store, position) /* Returns length of keyword if a keyword ends + at store.chars[position - 1] */ +char c; +ElementType store; +int position; +{ + int counter, start, end = position - 1, keywordlen = 0; + char str[MAXLINELENGTH]; + + if ((!isalpha(c)) && (c != '_') && (c != '\'') && (position)) { + for (counter = end; (counter >= 0) && ((isalpha(store.chars[counter])) + || (c == '_') || (c == '\'')) + && (counter >= store.indentation); counter--) { + ; /* Just count letters */ + } + start = ++counter; + for (counter = 0; counter + start <= end; counter++) { + str[counter] = store.chars[counter + start]; /* Copy letters into str */ + } + str[counter] = '\0'; /* Add null character to end */ + if (IsKeyWord(str)) /* Checks word in str is keyword */ + keywordlen = strlen(str); /* and measures it */ + } + return(keywordlen); +} + +ElementType + CheckForKeyword(c, store, position) /* Returns store with any possible keyword + ending at store.chars[position - 1] + identified as such in store.typeface */ +char c; +ElementType store; +int position; +{ + if (KeyWord(c, store, position)) + store = ChangeTypeface(store, KeyWord(c, store, position), position, KW); + return(store); +} + +int + IsNumber(c, store, position, statesok) /* Returns 1 if c forms part of a number */ +char c; +ElementType store; +int position, statesok; +{ + int counter, foundident = 0, foundpunc = 0; + + if (((isdigit(c)) || (c == 'e') || (c == 'E') || (c == '|') || (c == '.')) + && (statesok)) { + counter = position - 1; + while ((isdigit(store.chars[counter])) && (counter >= 0)) + counter--; + if (((store.chars[counter] == '+') || (store.chars[counter] == '-')) + && ((store.chars[counter - 1] == 'e') || (store.chars[counter - 1] == 'E')) + && (counter > 2)) + counter -= 2; + else if (((store.chars[counter] == 'e') || (store.chars[counter] == 'E')) + && (counter > 1)) + counter--; + while ((isdigit(store.chars[counter])) && (counter >= 0)) + counter--; + if ((store.chars[counter] == '.') && (counter > 1)) + counter--; + while ((isdigit(store.chars[counter])) && (counter >= 0)) + counter--; + if ((isalpha(store.chars[counter])) && (counter >= 0)) + foundident = 1; /* ie not number */ + else if ((IsHaskellPunc(store.chars[counter])) || (counter < 0)) + foundpunc = 1; /* ie is number */ + } + return(foundpunc); +} + /* * * LINE SELECTION FUNCTIONS * * */ +ElementType + SelectSkipLine(s, store, linecounter) /* Returns store containing line for skipover */ +StackType s; +ElementType store; +int linecounter; +{ + ElementType temp; + int counter; + + if (!(IsEmptyStack(s))) { + while (((Top(s)).length <= linecounter) || ((Top(s)).indentation >= linecounter)) { + temp = Top(s); + s = PopSym(s); + if (IsEmptyStack(s)) { + counter = temp.length; + while (counter < linecounter) { + temp.chars[counter] = ' '; + temp.typeface[counter++] = SP; + } + temp.chars[counter] = '\0'; /* Add null character to end */ + s = Push(s, temp); + break; + } + } + store = Top(s); + } + else { /* Stack is empty */ + counter = store.length; + while (counter < linecounter) { + store.chars[counter] = ' '; + store.typeface[counter++] = SP; + } + store.chars[counter] = '\0'; /* Add null character to end */ + } + return(store); +} + /* * * STORING FUNCTIONS * * */ +ElementType + CreateStore() /* Returns an empty store */ +{ + ElementType store; + + strcpy(store.chars, ""); + store.length = 0; + store.indentation = 0; + store.col = 0; + return(store); +} + +ElementType + StoreSpace(store, position) /* Stores a space in the store at current position */ +ElementType store; +int position; +{ + store.chars[position] = ' '; + store.typeface[position] = SP; + return(store); +} + /* * * WRITING FUNCTIONS * * */ +void + WriteStartFace(tf) /* Writes LaTeX typeface commands for start of section */ +enum face tf; +{ + if (tf == KW) /* Keywords */ + printf("{\\keyword "); + else if ((tf == ID) || (tf == IS)) /* Identifiers */ + printf("{\\iden "); + else if (tf == ST) /* Strings */ + printf("{\\stri "); + else if (tf == CO) /* Comments */ + printf("{\\com "); + else if (tf == NU) /* Numbers */ + printf("{\\numb "); + else if ((tf == MA) || (tf == TI)) /* Various maths */ + printf("$"); +} + +void + WriteFinishFace(tf) /* Writes LaTeX typeface commands for end of section */ +enum face tf; +{ + if ((tf == KW) || (tf == ID) || (tf == ST) || (tf == CO) + || (tf == NU)) /* Keywords, identifiers, strings, comments or numbers */ + printf("\\/}"); + else if ((tf == MA) || (tf == TI)) /* Various maths */ + printf("$"); + else if (tf == IS) /* Subscripts in identifiers */ + printf("\\/}$"); +} + +int + WriteSpaces(store, counter, finish) /* Writes consecutive spaces, + returning new counter value */ +ElementType store; +int counter, finish; +{ + int spaces = 0; /* The number of spaces found */ + + for (; (store.typeface[counter] == SP) && (counter < finish); counter++) + spaces++; + printf("\\xspa{%d}", spaces); + return(--counter); +} + +int + WriteChar(store, counter, finish) /* Writes charater, returning new counter value */ +ElementType store; +int counter, finish; +{ + if (store.typeface[counter] == SP) /* Space */ + printf("\\xspa1"); /* Redundant */ + else if (store.typeface[counter] == BE) /* Bar under equals sign */ + printf("\\bareq"); + else if (store.typeface[counter] == DP) { /* Double plus */ + if ((counter < finish - 1) && (store.typeface[counter + 1] == DC)) { + printf("\\plusplus"); + counter++; + } + } + else if (store.typeface[counter] == CP) { /* Colon plus */ + if ((counter < finish - 1) && (store.typeface[counter + 1] == DC)) { + printf("{:}{+}"); + counter++; + } + } + else if (store.typeface[counter] == LE) { /* Less than or equal to */ + if ((counter < finish - 1) && (store.typeface[counter + 1] == DC)) { + printf("$\\leq$"); + counter++; + } + } + else if (store.typeface[counter] == GE) { /* Greater than or equal to */ + if ((counter < finish - 1) && (store.typeface[counter + 1] == DC)) { + printf("$\\geq$"); + counter++; + } + } + else if (store.typeface[counter] == LA) { /* Leftarrow */ + if ((counter < finish - 1) && (store.typeface[counter + 1] == DC)) { + printf("$\\leftarrow$"); + counter++; + } + } + else if (store.typeface[counter] == RA) { /* Rightarrow */ + if ((counter < finish - 1) && (store.typeface[counter + 1] == DC)) { + printf("$\\rightarrow$"); + counter++; + } + } + else if (store.typeface[counter] == RR) { /* Double rightarrow */ + if ((counter < finish - 1) && (store.typeface[counter + 1] == DC)) { + printf("$\\Rightarrow$"); + counter++; + } + } + else if (store.typeface[counter] == RC) { /* Regional comment begin */ + if ((counter < finish - 1) && (store.typeface[counter + 1] == DC)) { + printf("{\\com \\{-\\/}"); + counter++; + } + else + printf("{\\com \\{\\/}"); + } + else if (store.typeface[counter] == CR) { /* Regional comment end */ + if ((counter < finish - 1) && (store.typeface[counter + 1] == DC)) { + printf("{\\com -\\}\\/}"); + counter++; + } + else + printf("{\\com -\\/}"); + } + else if ((store.typeface[counter] == LC) && (store.chars[counter] == '-')) + printf("{\\rm -}"); /* Comment - problem: "--" becomes "-" in LaTeX so fix done */ + else if (store.chars[counter] == '\\') + printf("\\hbox{$\\setminus$}"); /* Backslash */ + else if (store.chars[counter] == '*') { + if (store.typeface[counter] == TI) + printf("\\times "); /* Multiplication */ + else + printf("*"); /* Other star symbols, eg Exponent */ + } + else if ((store.chars[counter] == '_') && (store.typeface[counter] == SU)) { + if ((counter < finish - 1) && (store.typeface[counter + 1] == IS)) + printf("$_"); /* Subscript character */ + } + else if (store.chars[counter] == '^') + printf("\\char'136 "); /* Up-arrow */ + else if (store.chars[counter] == '~') + printf("\\char'176 "); /* Tilda */ + else if ((store.chars[counter] == ':') && (store.chars[counter - 1] == ':') + && (widecolons)) + printf("\\,:"); /* Double colon */ + else if (store.chars[counter] == '"') { + if ((counter) && ((store.chars[counter - 1] == '"') + || (store.chars[counter - 1] == '\''))) + printf("\\,"); /* If previous character was a quote, leave a little space */ + if (store.typeface[counter] == DQ) + printf("{\\rm ``}"); /* Open doublequote */ + else if (store.typeface[counter] == QD) + printf("{\\rm \"}"); /* Close doublequote */ + else + printf("{\\rm \\char'175}"); /* Escape doublequote in string */ + } + else if (store.chars[counter] == '\'') { + if ((counter) && ((store.chars[counter - 1] == '"') + || ((store.chars[counter - 1] == '\'') + && ((store.typeface[counter - 1] != MA) + || (store.typeface[counter] != MA))))) + printf("\\,"); /* If previous character was a quote, leave a little space + except when it's a double prime */ + if (store.typeface[counter] == FQ) + printf("\\forquo "); /* Forward single quote */ + else if (store.typeface[counter] == EQ) + printf("\\escquo "); /* Escape single quote */ + else if (store.typeface[counter] == BF) { + if ((counter + 1 < store.length) && (store.typeface[counter + 1] == BF) + && (counter + 1 != store.indentation)) { + printf("{\\com \'\'\\/}"); /* Closing LaTeX style quote */ + counter++; + } + else + printf("{\\com \'\\/}"); /* Single quote following backquote in comment */ + } + else + printf("\'"); /* Prime */ + } + else if (store.chars[counter] == '{') + printf("\\hbox{$\\cal \\char'146$}"); /* Open curly bracket */ + else if (store.chars[counter] == '}') + printf("\\hbox{$\\cal \\char'147$}"); /* Close curly bracket */ + else if ((counter) && (store.chars[counter - 1] == '[') && (store.chars[counter] == ']')) + printf("\\,]"); /* Leave small gap between adjacent square brackets */ + else if ((store.chars[counter] == '$') || (store.chars[counter] == '%') + || (store.chars[counter] == '_') || (store.chars[counter] == '#') + || (store.chars[counter] == '&')) /* Various characters needing '\' for LaTeX */ + printf("\\%c", store.chars[counter]); + else /* Other characters */ + printf("%c", store.chars[counter]); + return(counter); +} + +void + WriteSkipover(store) /* Writes the skipover portion of line in store */ +ElementType store; +{ + int counter = 0; + + printf("\\skipover{"); /* Write opening LaTeX skipover command */ + WriteStartFace(store.typeface[counter]); /* Write opening LaTeX typeface command */ + if (store.typeface[counter] == SP) + counter = WriteSpaces(store, counter, store.indentation); /* Write spaces */ + else + counter = WriteChar(store, counter, store.indentation); /* Write character */ + for (counter++; counter < store.indentation; counter++){ /* until end of skipover */ + if (store.typeface[counter - 1] != store.typeface[counter]) { /* If typeface change */ + WriteFinishFace(store.typeface[counter - 1]); /* write closing typeface command */ + WriteStartFace(store.typeface[counter]); /* write opening LaTeX typeface command */ + } + if (store.typeface[counter] == SP) + counter = WriteSpaces(store, counter, store.indentation); /* Write spaces */ + else + counter = WriteChar(store, counter, store.indentation); /* Write character */ + } + if (store.typeface[counter - 1] == SU) + ; /* If indentation is under subscript don't open math section */ + else + WriteFinishFace(store.typeface[counter - 1]); /* Write closing LaTeX typeface command */ + printf("}"); /* Write closing LaTeX skipover command */ +} + +void + WriteWords(store) /* Writes rest of line, starting at indentation level */ +ElementType store; +{ + int counter = store.indentation; + int intabular = 0; /* Boolean: is in tabular section for internal alignment */ + + WriteStartFace(store.typeface[counter]); /* Write opening LaTeX typeface command */ + if (store.typeface[counter] == SP) + counter = WriteSpaces(store, counter, store.length); /* Write spaces */ + else + counter = WriteChar(store, counter, store.length); /* Write character */ + for (counter++; counter < store.length; counter++){ /* until end of word */ + if ((store.col) && (store.col == counter)) { + printf(" & "); + if (store.chars[counter - 1] == ':') + printf("$:"); + intabular = 1; + } + if (store.typeface[counter - 1] != store.typeface[counter]) /* If typeface change */ + WriteFinishFace(store.typeface[counter - 1]); /* Write closing typeface command */ + if ((store.typeface[counter] == SP) && (intabular)) { + printf(" & "); + intabular = 0; + } + if ((store.typeface[counter - 1] != store.typeface[counter]) /* If typeface change */ + && ((store.chars[counter] != ':') || (store.col != counter + 1))) + WriteStartFace(store.typeface[counter]); /* Write opening LaTeX typeface command */ + if (store.typeface[counter] == SP) + counter = WriteSpaces(store, counter, store.length); /* Write spaces */ + else if ((store.chars[counter] != ':') || (!store.col) || (store.col != counter + 1)) + counter = WriteChar(store, counter, store.length); /* Write character */ + } + WriteFinishFace(store.typeface[counter - 1]); /* Write closing LaTeX typeface command */ +} + +void + WriteLine(store, needed) /* Writes the line in store, + only writing LaTeX newline if needed */ +ElementType store; +int needed; +{ + if (store.indentation) + WriteSkipover(store); + if (store.indentation < store.length) + WriteWords(store); + if (needed) + printf("\\\\"); /* LaTeX newline character */ + printf("\n"); +} + +QueueType + WriteQueue(q) /* Writes lines, removing them from queue, + leaves last line in queue if not in tabular section */ +QueueType q; +{ + int intabular = 0; + + if ((!(IsEmptyQueue(q))) && ((*(FrontOfQueue(q))).Element.col)) { + printf("\\begin{tabular}{@{}l@{\\xspa1}c@{}l}\n"); + intabular = 1; + } + while (LengthOfQueue(q) > !intabular) { + WriteLine((*(FrontOfQueue(q))).Element, 1); /* LaTeX newline character is needed */ + q = TakeFromQueue(q); + } + if (intabular) + printf("\\end{tabular}\\\\\n"); + return(q); +} + +QueueType + WriteRestOfQueue(q) /* Writes all lines, removing them from queue, + doesn't have LaTeX newline after last line */ +QueueType q; +{ + int intabular = 0; + + if ((!(IsEmptyQueue(q))) && ((*(FrontOfQueue(q))).Element.col)) { + printf("\\begin{tabular}{@{}l@{\\xspa1}c@{}l}\n"); + intabular = 1; + } + while (!(IsEmptyQueue(q))) { + WriteLine((*(FrontOfQueue(q))).Element, (LengthOfQueue(q) > 1)); /* Last line doesn't + need LaTeX newline character */ + q = TakeFromQueue(q); + } + if (intabular) { + printf("\\end{tabular}"); + if (!IsEmptyQueue(q)) /* Last line doesn't need LaTeX newline character */ + printf("\\\\"); + printf("\n"); + } + return(q); +} + +int +main (argc, argv) /* * * MAIN PROGRAM * * */ + int argc; + char *argv[]; +{ + int tripped = 1, instring = 0, instringincomment = 0, inlinecomment = 0; + int incharquote = 0, incharquoteincomment = 0, inbackquoteincomment = 0; + int insub = 0; + /* Booleans - just taken new line, in string, in string inside comment, in line comment, + in character quote, in character quote inside comment, in backquote inside comment, + in subscript */ + int linecounter = 0, indentcounter = 0, inregcomment = 0, pos; + /* Counters: current position on line, indentation of current line, + nesting level of regional comments, position marker */ + char c; /* Character */ + StackType s; /* Stack of previous longest lines */ + QueueType q; /* Queue of lines waiting to be printed */ + ElementType store; /* Store of letters, typefaces and non-empty length */ + + if ((argc == 3) && (argv[1][0] == '-')) { /* If options specified with call */ + if (strstr(argv[1], "s")) /* if -s option, subscripts in identifiers wanted */ + subscripts = 1; + if (strstr(argv[1], "t")) { /* if -tX option, tab characters are X spaces */ + for (pos = 1; (argv[1][pos] != 't'); pos++) /* find 't' */ + ; + for (pos++, tablength = 0; isdigit(argv[1][pos]); pos++) /* read number */ + tablength = (tablength * 10) + (argv[1][pos] - '0'); + } + if (strstr(argv[1], "w")) /* if -w option called, wide double colons wanted */ + widecolons = 1; + } + else if (argc == 2) /* If no options */ + ; + else { /* If not called with pphs and a filename */ + fprintf(stderr, "pphs: Call with one file name\n"); + exit(1); + } + + if ((strcspn(argv[argc - 1], ".") == strlen(argv[argc - 1])) /* If filename has no extention */ + && ((ifptr = fopen(argv[argc - 1], "r")) == NULL)) /* and no plain file of that name */ + strcat(argv[argc - 1], ".hs"); /* add a ".hs" extention */ + if ((ifptr = fopen(argv[argc - 1], "r")) == NULL) { /* Open input file */ + fprintf(stderr, "pphs: File could not be opened\n"); /* eg isn't there */ + exit(2); + } + else { + + printf("\\begin{tabbing}\n"); /* Start of Haskell program */ + + store = CreateStore(); /* an empty one */ + s = CreateStack(); /* an empty one */ + q = CreateQueue(); /* an empty one */ + + fscanf(ifptr, "%c", &c); /* Read character */ + while (!feof(ifptr)) { /* While not at end of input file */ + while ((isspace(c)) && (!(feof(ifptr)))) { /* Read blank characters */ + if (c == ' ') { + if (tripped) + linecounter++; /* Count leading spaces */ + else { /* or */ + store = StoreSpace(store, linecounter++); /* Store intermediate + or trailing space */ + if (store.length < linecounter) + store.chars[linecounter] = '\0'; /* Add null character to end */ + } + fscanf(ifptr, "%c", &c); /* Read next character */ + } + else if (c == '\t') { + if (tripped) + linecounter += (tablength - (linecounter % tablength)); + else { + store = StoreSpace(store, linecounter++); + for (; linecounter % tablength; linecounter++) + store = StoreSpace(store, linecounter); + if (store.length < linecounter) + store.chars[linecounter] = '\0'; /* Add null character to end */ + } + fscanf(ifptr, "%c", &c); /* Read next character */ + } + else if (c == '\n') { + tripped = 1; /* Just taken a new line */ + inlinecomment = 0; + if (!(IsEmptyStack(s))) + while (((Top(s)).length <= store.length) + && ((Top(s)).indentation >= store.length)) { + s = Pop(s); + if (IsEmptyStack(s)) + break; + } + if (store.length > 0) { /* Push non-empty line onto indentation stack */ + store.indentation = indentcounter; + s = Push(s, store); + } + if (!(IsEmptyQueue(q))) { + if ((store.col != (*(FrontOfQueue(q))).Element.col) + || (!(*(FrontOfQueue(q))).Element.col)) + q = WriteQueue(q); /* If internal alignment changes or there is none + write out lines */ + } + q = AddToQueue(q, store); /* Add to writing queue */ + linecounter = 0; /* Get ready to count leading spaces */ + store.length = linecounter; + fscanf(ifptr, "%c", &c); /* Read next character */ + } + else break; + } + if (tripped) { + indentcounter = linecounter; + store.indentation = linecounter; + store.col = 0; + } + if ((tripped) && (linecounter)) { /* Skipover necessary for indentation */ + store = SelectSkipLine(s, store, linecounter); + store.indentation = linecounter; + store.col = 0; + } + if (!feof(ifptr)) + tripped = 0; /* No longer just taken new line */ + while ((!(isspace(c))) && (!(feof(ifptr)))) { /* Read word */ + if ((linecounter > 1) && (!IsEmptyQueue(q)) + && ((*(RearOfQueue(q))).Element.length >= linecounter) + && (linecounter > store.indentation) + && (linecounter > (*(RearOfQueue(q))).Element.indentation) + && (store.chars[linecounter - 1] == ' ') + && ((((*(RearOfQueue(q))).Element.chars[linecounter - 1] == ' ') + && ((c == (*(RearOfQueue(q))).Element.chars[linecounter]) + || ((c == '=') + && ((*(RearOfQueue(q))).Element.chars[linecounter] == ':') + && ((*(RearOfQueue(q))).Element.chars[linecounter + 1] == ':')))) + || (((*(RearOfQueue(q))).Element.chars[linecounter - 1] == ':') + && ((*(RearOfQueue(q))).Element.chars[linecounter] == ':') + && (c == '='))) + && ((store.chars[linecounter - 2] == ' ') + || ((*(RearOfQueue(q))).Element.chars[linecounter - 2] == ' ')) + && (((*(RearOfQueue(q))).Element.col == 0) + || ((*(RearOfQueue(q))).Element.col == linecounter))) { + store.col = linecounter; /* Identify any internal alignment */ + (*(RearOfQueue(q))).Element.col = linecounter; + } + if ((c == '"') && (!incharquote) /* String outside comments */ + && (!inregcomment) && (!inlinecomment)) { + if (((linecounter) && (store.chars[linecounter - 1] != '\\')) + || (!linecounter)) + instring = !instring; + } + else if ((c == '"') && (!incharquoteincomment) /* String inside comment */ + && (!inbackquoteincomment) + && ((inregcomment) || (inlinecomment))) { + if (((linecounter) && (store.chars[linecounter - 1] != '\\')) + || (!linecounter)) + instringincomment = !instringincomment; + } + else if ((c == '`') && ((inlinecomment) || (inregcomment))) { + if ((linecounter) && (store.chars[linecounter - 1] == '`')) + inbackquoteincomment = 2; /* Opening LaTeX style quote in comment */ + else + inbackquoteincomment = !inbackquoteincomment; /* Backquote in comment */ + } + else if ((linecounter) && (!inlinecomment) && (!instring)) { + if ((store.chars[linecounter - 1] == '{') && (c == '-')) + inregcomment++; /* Haskell "{-" regional comment begin */ + else if ((store.chars[linecounter - 1] == '-') && (c == '}')) { + inregcomment--; /* Haskell "-}" regional comment end */ + instringincomment = 0; + incharquoteincomment = 0; + inbackquoteincomment = 0; + } + } + if (c == '|') { + if ((!IsEmptyQueue(q)) + && ((((*(RearOfQueue(q))).Element.chars[linecounter] == '=') + && (linecounter == store.indentation)) + || ((*(RearOfQueue(q))).Element.typeface[linecounter] == BE))) + store.typeface[linecounter] = BE; + else + store.typeface[linecounter] = MA; + } + else if ((c == '\'') && (linecounter) && (store.chars[linecounter - 1] == '\\')) + store.typeface[linecounter] = EQ; /* Escape character quote */ + else if ((c == '\'') && (!instring) && (!inregcomment) && (!inlinecomment)) { + if (((linecounter) && (store.chars[linecounter - 1] != '\\') + && ((IsHaskellPunc(store.chars[linecounter - 1])) || (incharquote))) + || (!linecounter)) { + incharquote = !incharquote; + store.typeface[linecounter] = FQ; /* Character quote */ + } + else + store.typeface[linecounter] = MA; /* Prime */ + } + else if ((c == '\'') && (!instringincomment) + && ((inregcomment) || (inlinecomment))) { + if (((linecounter) && (store.chars[linecounter - 1] != '\\') + && ((IsHaskellPunc(store.chars[linecounter - 1])) + || (incharquoteincomment))) + || (!linecounter)) { + incharquoteincomment = !incharquoteincomment; + store.typeface[linecounter] = FQ; /* Character quote in comment */ + } + else if (inbackquoteincomment) { + inbackquoteincomment--; + store.typeface[linecounter] = BF; /* `x' character quote in comment */ + } + else + store.typeface[linecounter] = MA; /* Prime */ + } + else if (c == '"') { + if ((!incharquote) && (!incharquoteincomment) && (!inbackquoteincomment) + && ((instring) || (instringincomment))) { + if (((linecounter) && (store.chars[linecounter - 1] != '\\')) + || (!linecounter)) + store.typeface[linecounter] = DQ; /* Open doublequote */ + else if (store.chars[linecounter - 1] == '\\') + store.typeface[linecounter] = EE; /* Escape doublequote */ + } + else if ((!incharquote) && (!incharquoteincomment) && (!inbackquoteincomment)) { + if (((linecounter) && (store.chars[linecounter - 1] != '\\')) + || (!linecounter)) + store.typeface[linecounter] = QD; /* Close doublequote */ + else if (store.chars[linecounter - 1] == '\\') + store.typeface[linecounter] = EE; /* Escape doublequote */ + } + else + store.typeface[linecounter] = EE; /* Character quote of doublequote */ + } + else if (c == '`') { + if ((inlinecomment) || (inregcomment)) + store.typeface[linecounter] = CO; + else + store.typeface[linecounter] = MA; + } + else if ((linecounter) && (subscripts) && (c == '_') + && (store.typeface[linecounter - 1] == ID)) + store.typeface[linecounter] = SU; /* Subscript in identifier */ + else if (c == '*') + store.typeface[linecounter] = TI; /* Times - may be changed by double char */ + else if (IsMathsChar(c)) + store.typeface[linecounter] = MA; /* Maths characters */ + else if (IsNumber(c, store, linecounter, + ((!inregcomment) && (!instring) && (!inlinecomment)))) + store.typeface[linecounter] = NU; /* Numbers */ + else if ((instring) || (incharquote)) + store.typeface[linecounter] = ST; /* Characters in strings */ + else if ((inlinecomment) || (inregcomment)) + store.typeface[linecounter] = CO; /* Characters in comments */ + else { + if (insub) + store.typeface[linecounter] = IS; /* Subscript identifiers */ + else + store.typeface[linecounter] = ID; /* Others */ + } + if (linecounter) + if ((store.typeface[linecounter - 1] == IS) + && (store.typeface[linecounter] != IS)) + insub = 0; /* End of subscript identifier */ + store.chars[linecounter++] = c; /* Place character in store */ + if (linecounter > store.indentation + 1) + store = CheckForDoubleChar(store, linecounter); + if ((store.typeface[linecounter - 1] == LC) && (!inregcomment) + && (!instring) && (!incharquote)) { + instringincomment = 0; + incharquoteincomment = 0; + inbackquoteincomment = 0; + inlinecomment = 1; + } + else if ((store.typeface[linecounter - 1] == SU) + && (linecounter != store.indentation)) + insub = 1; + fscanf(ifptr, "%c", &c); /* Read next character */ + if (feof(ifptr)) + c = ' '; + if ((!inregcomment) && (!inlinecomment) && (!instring)) + store = CheckForKeyword(c, store, linecounter); /* Keywords not in comments or + strings to be in keyword typeface */ + } + insub = 0; + store.chars[linecounter] = '\0'; /* String terminating null character */ + store.length = linecounter; + } + if ((!tripped) && (!store.col)) /* If last line not in internal alignment */ + q = WriteQueue(q); /* write previous lines which might */ + if (!tripped) /* Put final line in queue if non-empty */ + q = AddToQueue(q, store); + if (feof(ifptr)) /* Write remaining lines */ + q = WriteRestOfQueue(q); + + printf("\\end{tabbing}\n"); /* End of Haskell program */ + + exit(0); + } +} |