Add literate Haskell lexer.

author: gbrandl <devnull@localhost> 2007-10-14 10:54:20 +0200
committer: gbrandl <devnull@localhost> 2007-10-14 10:54:20 +0200
commit: 8fbd90da2ebaf258d3a2acfb4a608266ec44f70b (patch)
tree: 98df55bf1845168b5c05d02c77e9ae815d723cbf
parent: fab2810cb986997bb5bca2c2a8b6ca846e364321 (diff)
download: pygments-8fbd90da2ebaf258d3a2acfb4a608266ec44f70b.tar.gz
5 files changed, 875 insertions, 5 deletions
diff --git a/CHANGES b/CHANGES
index 0648da17..6da9856c 100644
--- a/CHANGES
+++ b/CHANGES
@@ -3,7 +3,7 @@ Pygments changelog
 
 Version 0.9
 -----------
-(codename Herbstzeitlose, released Sep XX, 2007)
+(codename Herbstzeitlose, released Oct 14, 2007)
 
 - The encoding handling of the command line mode (pygmentize) was
   enhanced. You shouldn't get UnicodeErrors from it anymore if you
@@ -26,6 +26,8 @@ Version 0.9
 
 - Greatly improved the Haskell and OCaml lexers.
 
+- Added a lexer for Literate Haskell.
+
 - The C# and Java lexers exhibited abysmal performance with some
   input code; this should now be fixed.
 
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py
index 3f0c4552..1fe8b3e4 100644
--- a/pygments/lexers/_mapping.py
+++ b/pygments/lexers/_mapping.py
@@ -46,7 +46,7 @@ LEXERS = {
     'GenshiTextLexer': ('pygments.lexers.templates', 'Genshi Text', ('genshitext',), (), ('application/x-genshi-text', 'text/x-genshi')),
     'GettextLexer': ('pygments.lexers.text', 'Gettext Catalog', ('pot', 'po'), ('*.pot', '*.po'), ('application/x-gettext', 'text/x-gettext', 'text/gettext')),
     'GroffLexer': ('pygments.lexers.text', 'Groff', ('groff', 'nroff', 'man'), ('*.[1234567]', '*.man'), ('application/x-troff', 'text/troff')),
-    'HaskellLexer': ('pygments.lexers.functional', 'Haskell', ('haskell', 'hs'), ('*.hs',), ()),
+    'HaskellLexer': ('pygments.lexers.functional', 'Haskell', ('haskell', 'hs'), ('*.hs',), ('text/x-haskell',)),
     'HtmlDjangoLexer': ('pygments.lexers.templates', 'HTML+Django/Jinja', ('html+django', 'html+jinja'), (), ('text/html+django', 'text/html+jinja')),
     'HtmlGenshiLexer': ('pygments.lexers.templates', 'HTML+Genshi', ('html+genshi', 'html+kid'), (), ('text/html+genshi',)),
     'HtmlLexer': ('pygments.lexers.web', 'HTML', ('html',), ('*.html', '*.htm', '*.xhtml', '*.xslt'), ('text/html', 'application/xhtml+xml')),
@@ -62,6 +62,7 @@ LEXERS = {
     'JavascriptPhpLexer': ('pygments.lexers.templates', 'JavaScript+PHP', ('js+php', 'javascript+php'), (), ('application/x-javascript+php', 'text/x-javascript+php', 'text/javascript+php')),
     'JavascriptSmartyLexer': ('pygments.lexers.templates', 'JavaScript+Smarty', ('js+smarty', 'javascript+smarty'), (), ('application/x-javascript+smarty', 'text/x-javascript+smarty', 'text/javascript+smarty')),
     'JspLexer': ('pygments.lexers.templates', 'Java Server Page', ('jsp',), ('*.jsp',), ('application/x-jsp',)),
+    'LiterateHaskellLexer': ('pygments.lexers.functional', 'Literate Haskell', ('lhs', 'literate-haskell'), ('*.lhs',), ('text/x-literate-haskell',)),
     'LlvmLexer': ('pygments.lexers.asm', 'LLVM', ('llvm',), ('*.ll',), ('text/x-llvm',)),
     'LuaLexer': ('pygments.lexers.agile', 'Lua', ('lua',), ('*.lua',), ('text/x-lua', 'application/x-lua')),
     'MOOCodeLexer': ('pygments.lexers.other', 'MOOCode', ('moocode',), ('*.moo',), ('text/x-moocode',)),
diff --git a/pygments/lexers/functional.py b/pygments/lexers/functional.py
index 78541ff5..626910a3 100644
--- a/pygments/lexers/functional.py
+++ b/pygments/lexers/functional.py
@@ -16,12 +16,14 @@ try:
 except NameError:
     from sets import Set as set
 
-from pygments.lexer import RegexLexer, bygroups, using, this, include
+from pygments.lexer import Lexer, RegexLexer, bygroups, using, this, include, \
+     do_insertions
 from pygments.token import Text, Comment, Operator, Keyword, Name, \
      String, Number, Punctuation
 
 
-__all__ = ['SchemeLexer', 'HaskellLexer', 'OcamlLexer', 'ErlangLexer']
+__all__ = ['SchemeLexer', 'HaskellLexer', 'LiterateHaskellLexer',
+           'OcamlLexer', 'ErlangLexer']
 
 
 class SchemeLexer(RegexLexer):
@@ -161,6 +163,7 @@ class HaskellLexer(RegexLexer):
     name = 'Haskell'
     aliases = ['haskell', 'hs']
     filenames = ['*.hs']
+    mimetypes = ['text/x-haskell']
 
     reserved = ['case','class','data','default','deriving','do','else',
                 'if','in','infix[lr]?','instance',
@@ -187,7 +190,9 @@ class HaskellLexer(RegexLexer):
             (r'[A-Z][\w\']*', Keyword.Type),
             #  Operators
             (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
-            (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
+            (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
+            (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
+            (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
             #  Numbers
             (r'\d+[eE][+-]?\d+', Number.Float),
             (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
@@ -266,6 +271,75 @@ class HaskellLexer(RegexLexer):
     }
 
 
+line_re = re.compile('.*?\n')
+bird_re = re.compile(r'(>[ \t]*)(.*\n)')
+
+class LiterateHaskellLexer(Lexer):
+    """
+    For Literate Haskell (Bird-style or LaTeX) source.
+
+    Additional options accepted:
+
+    `litstyle`
+        If given, must be ``"bird"`` or ``"latex"``.  If not given, the style
+        is autodetected: if the first non-whitespace character in the source
+        is a backslash or percent character, LaTeX is assumed, else Bird.
+
+    *New in Pygments 0.9.*
+    """
+    name = 'Literate Haskell'
+    aliases = ['lhs', 'literate-haskell']
+    filenames = ['*.lhs']
+    mimetypes = ['text/x-literate-haskell']
+
+    def get_tokens_unprocessed(self, text):
+        hslexer = HaskellLexer(**self.options)
+
+        style = self.options.get('litstyle')
+        if style is None:
+            style = (text.lstrip()[0] in '%\\') and 'latex' or 'bird'
+
+        code = ''
+        insertions = []
+        if style == 'bird':
+            # bird-style
+            for match in line_re.finditer(text):
+                line = match.group()
+                m = bird_re.match(line)
+                if m:
+                    insertions.append((len(code), [(0, Comment.Special, m.group(1))]))
+                    code += m.group(2)
+                else:
+                    insertions.append((len(code), [(0, Text, line)]))
+        else:
+            # latex-style
+            from pygments.lexers.text import TexLexer
+            lxlexer = TexLexer(**self.options)
+
+            codelines = 0
+            latex = ''
+            for match in line_re.finditer(text):
+                line = match.group()
+                if codelines:
+                    if line.lstrip().startswith('\\end{code}'):
+                        codelines = 0
+                        latex += line
+                    else:
+                        code += line
+                elif line.lstrip().startswith('\\begin{code}'):
+                    codelines = 1
+                    latex += line
+                    insertions.append((len(code),
+                                       list(lxlexer.get_tokens_unprocessed(latex))))
+                    latex = ''
+                else:
+                    latex += line
+            insertions.append((len(code),
+                               list(lxlexer.get_tokens_unprocessed(latex))))
+        for item in do_insertions(insertions, hslexer.get_tokens_unprocessed(code)):
+            yield item
+
+
 class OcamlLexer(RegexLexer):
     """
     For the OCaml language.
diff --git a/tests/examplefiles/DancingSudoku.lhs b/tests/examplefiles/DancingSudoku.lhs
new file mode 100644
index 00000000..368ab8e5
--- /dev/null
+++ b/tests/examplefiles/DancingSudoku.lhs
@@ -0,0 +1,411 @@
+ A Sukodku solver by Chris Kuklewicz (haskell (at) list (dot) mightyreason (dot) com)
+ The usual BSD license applies, copyright 2006.
+ Uploaded to HaskellWiki as DancingSudoku.lhs
+
+ I compile on a powerbook G4 (Mac OS X, ghc 6.4.2) using
+ ghc -optc-O3 -funbox-strict-fields -O2 --make -fglasgow-exts
+
+ This is a translation of Knuth's GDANCE from dance.w / dance.c
+
+ http://www-cs-faculty.stanford.edu/~uno/preprints.html
+ http://www-cs-faculty.stanford.edu/~uno/programs.html
+ http://en.wikipedia.org/wiki/Dancing_Links
+
+ I have an older verison that uses lazy ST to return the solutions on
+ demand, which was more useful when trying to generate new puzzles to
+ solve.
+
+> module Main where
+
+> import Prelude hiding (read)
+> import Control.Monad
+> import Control.Monad.Fix
+> import Data.Array.IArray
+> import Control.Monad.ST.Strict
+> import Data.STRef.Strict
+> import Data.Char(intToDigit,digitToInt)
+> import Data.List(unfoldr,intersperse,inits)
+
+> new = newSTRef
+> {-# INLINE new #-}
+> read = readSTRef
+> {-# INLINE read #-}
+> write = writeSTRef
+> {-# INLINE write #-}
+> modify = modifySTRef
+> {-# INLINE modify #-}
+
+ Data types to prevent mixing different index and value types
+
+> type A = Int
+> newtype R = R A deriving (Show,Read,Eq,Ord,Ix,Enum)
+> newtype C = C A deriving (Show,Read,Eq,Ord,Ix,Enum)
+> newtype V = V A deriving (Show,Read,Eq,Ord,Ix,Enum)
+> newtype B = B A deriving (Show,Read,Eq,Ord,Ix,Enum)
+
+ Sudoku also has block constraints, so we want to look up a block
+ index in an array:
+
+> lookupBlock :: Array (R,C) B
+> lookupBlock = listArray bb [ toBlock ij | ij <- range bb ]
+>     where ra :: Array Int B
+>           ra = listArray (0,pred (rangeSize b)) [B (fst b) .. B (snd b)]
+>           toBlock (R i,C j) = ra ! ( (div (index b j) 3)+3*(div (index b i) 3) )
+
+ The values for an unknown location is 'u'.
+ The bound and range are given by b and rng.  And bb is a 2D bound.
+
+> u = V 0  -- unknown value
+> b :: (Int,Int)
+> b = (1,9) -- min and max bounds
+> rng = enumFromTo (fst b)  (snd b)  -- list from '1' to '9'
+> bb = ((R (fst b),C (fst b)),(R (snd b),C (snd b)))
+
+  A Spec can be turned into a parsed array with ease:
+
+> type Hint = ((R,C),V)
+> newtype Spec = Spec [Hint] deriving (Eq,Show)
+
+> type PA = Array (R,C) V
+
+> parse :: Spec -> PA
+> parse (Spec parsed) = let acc old new = new
+>                       in accumArray acc u bb parsed
+
+ The dancing links algorithm depends on a sparse 2D node structure.
+ Each column represents a constraint.  Each row represents a Hint.
+ The number of possible hints is 9x9x9 = 271
+
+> type (MutInt st)  = (STRef st) Int
+
+ The pointer types:
+
+> type (NodePtr st) = (STRef st) (Node st)
+> type (HeadPtr st)  = (STRef st) (Head st)
+
+ The structures is a 2D grid of nodes, with Col's on the top of
+ columns and a sparse collection of nodes.  Note that topNode of Head
+ is not a strict field.  This is because the topNode needs to refer to
+ the Head, and they are both created monadically.
+
+> type HeadName = (Int,Int,Int) -- see below for meaning
+
+> data Head st = Head {headName:: !HeadName
+>                     ,topNode:: (Node st) -- header node for this column
+>                     ,len:: !(MutInt st)  -- number of nodes below this head
+>                     ,next,prev:: !(HeadPtr st)  -- doubly-linked list
+>                     }
+
+> data Node st = Node {getHint:: !Hint
+>                     ,getHead:: !(Head st)  -- head for the column this node is in
+>                     ,up,down,left,right :: !(NodePtr st)  -- two doubly-linked lists
+>                     }
+
+> instance Eq (Head st) where
+>     a == b = headName a == headName b
+
+> instance Eq (Node st) where
+>     a == b = up a == up b
+
+ To initialize the structures is a bit tedious.  Knuth's code reads in
+ the problem description from a data file and builds the structure
+ based on that.  Rather than short strings, I will use HeadName as the
+ identifier.
+ 
+ The columns are (0,4,5) for nodes that put some value in Row 4 Col 5
+                 (1,2,3) for nodes that put Val 3 in Row 2 and some column
+                 (2,7,4) for nodes that put Val 4 in Col 7 and some row
+                 (3,1,8) for nodes that put Val 8 in some (row,column) in Block 1
+
+ The first head is (0,0,0) which is the root.  The non-root head data
+ will be put in an array with the HeadName as an index.
+
+> headNames :: [HeadName]
+> headNames = let names = [0,1,2,3] 
+>             in (0,0,0):[ (l,i,j) | l<-names,i<-rng,j<-rng]
+
+ A "row" of left-right linked nodes is a move.  It is defined by a
+ list of head names.
+
+> type Move = [(Hint,HeadName)]
+
+ Initial hints are enforced by making them the only legal move for
+ that location.  Blank entries with value 'u = V 0' have a move for
+ all possible values [V 1..V 9].
+
+> parseSpec :: Spec -> [Move]
+> parseSpec spec =
+>   let rowsFrom :: Hint -> [Move]
+>       rowsFrom (rc@(R r,C c),mv@(V v')) = 
+>           if mv == u then [ rsyms v | v <- rng ]
+>           else [ rsyms v' ]
+>         where (B b) = lookupBlock ! rc
+>               rsyms :: A -> Move
+>               rsyms v = map ( (,) (rc,V v) ) [(0,r,c),(1,r,v),(2,c,v),(3,b,v)]
+>   in concatMap rowsFrom (assocs (parse spec))
+
+ mkDList creates doubly linked lists using a monadic smart
+ constructor and the recursive "mdo" notation as documented at
+ http://www.haskell.org/ghc/docs/latest/html/users_guide/syntax-extns.html#mdo-notation
+ http://www.cse.ogi.edu/PacSoft/projects/rmb/
+
+ For more fun with this, see the wiki page at
+ http://haskell.org/hawiki/TyingTheKnot
+
+> mkDList :: (MonadFix m) => (b -> a -> b -> m b) -> [a] -> m b
+> mkDList _ [] = error "must have at least one element"
+> mkDList mkNode xs = mdo (first,last) <- go last xs first
+>                         return first
+>   where go prev []     next = return (next,prev)
+>         go prev (x:xs) next = mdo this <- mkNode prev x rest
+>                                   (rest,last) <- go this xs next
+>                                   return (this,last)
+
+ toSimple takes a function and a header node and iterates (read . function)
+ until the header is reached again, but does not return the header
+ itself.
+
+> toSingle step header = loop =<< (read . step) header
+>     where loop y = if header/=y then liftM (y:) (read (step y) >>= loop)
+>                                 else return []
+>        
+
+ forEach is an optimization of (toSimple step header >>= mapM_ act)
+
+> forEach step header act = loop =<< (read . step) header
+>      where loop y = if header/=y then (act y >> (read (step y)) >>= loop)
+>                                  else return ()
+
+ Now make the root node and all the head nodes. This also exploits mdo:
+
+> makeHeads :: [HeadName] -> (ST st) (Head st)
+> makeHeads names = mkDList makeHead names
+>     where makeHead before name after = mdo
+>             ~newTopNode <- liftM4 (Node ((R 0,C 0),V 0) newHead) (new newTopNode) (new newTopNode)
+>                                                                 (new newTopNode) (new newTopNode)
+>             newHead <- liftM3 (Head name newTopNode)                         
+>                                    (new 0) (new after) (new before)
+>             return newHead
+
+ The Head nodes will be places in an array for easy lookup while building moves:
+
+> type HArray st = Array HeadName (Head st)
+> hBounds = ((0,1,1),(3,9,9))
+> type Root st =  (Head st,HArray st)
+
+ The addMove function creates the (four) nodes that represent a move and adds
+ them to the data structure.  The HArray in Root makes for a fast
+ lookup of the Head data.
+
+> addMove :: forall st. (Root st) -> Move -> (ST st) (Node st)
+> addMove (_,ha) move = mkDList addNode move
+>     where addNode :: (Node st) -> (Hint,HeadName) -> (Node st) -> (ST st) (Node st)
+>           addNode before (hint,name) after = do
+>             let head = ha ! name
+>             let below = topNode head
+>             above <- read (up below)
+>             newNode <- liftM4 (Node hint head) (new above) (new below)
+>                                                (new before) (new after)
+>             write (down above) newNode
+>             write (up below) newNode
+>             modify (len head) succ
+>             l <- read (len head)
+>             seq l (return newNode)
+
+ Create the column headers, including the fast lookup array.  These
+ will be resused between puzzles.
+
+> initHA :: (ST st) (Root st)
+> initHA = do
+>   root <- makeHeads headNames
+>   heads <- toSingle next root
+>   let ha = array hBounds (zip (map headName heads) heads)
+>   return (root,ha)
+
+ Take the Root from initHA and a puzzle Spec and fill in all the Nodes.
+
+> initRoot :: (Root st) -> Spec -> (ST st) ()
+> initRoot root spec = do
+>   let moves = parseSpec spec
+>   mapM_ (addMove root) moves
+
+  Return the column headers to their condition after initHA
+
+> resetRoot :: (Root st) -> (ST st) ()
+> resetRoot (root,ha) = do
+>   let heads@(first:_) = elems ha
+>   let resetHead head = do
+>         write (len head) 0
+>         let node = topNode head
+>         write (down node) node
+>         write (up node) node
+>       reset (last:[]) = do
+>         write (prev root) last
+>         write (next root) first
+>       reset (before:xs@(head:[])) = do
+>         resetHead head
+>         write (prev head) before
+>         write (next head) root
+>         reset xs
+>       reset (before:xs@(head:after:_)) = do
+>         resetHead head
+>         write (prev head) before
+>         write (next head) after
+>         reset xs
+>   reset (root:heads)
+
+ getBest iterates over the unmet constraints (i.e. the Head that are
+ reachable from root). It locates the one with the lowest number of
+ possible moves that will solve it, aborting early if it finds 0 or 1
+ moves.
+
+> getBest :: (Head st) -> (ST st) (Maybe (Head st))
+> getBest root = do
+>   first <- read (next root)
+>   if first == root then return Nothing
+>     else do
+>       let findMin m best head | head == root = return (Just best)
+>                               | otherwise = do
+>             l <- read (len head)
+>             if l <= 1 then return (Just head)
+>               else if l < m then findMin l head =<< read (next head)
+>                      else findMin l best =<< read (next head)
+>       findMin 10 first first
+
+ The unlink and relink operations are from where Knuth got the name
+ "dancing links".  So long as "a" does not change in between, the
+ relink call will undo the unlink call.  Similarly, the unconver will
+ undo the changes of cover and unconverOthers will undo coverOthers.
+
+> unlink :: (a->STRef st a) -> (a->STRef st a) -> a -> (ST st) ()
+> unlink prev next a = do
+>   before <- read (prev a)
+>   after <- read (next a)
+>   write (next before) after
+>   write (prev after) before
+
+> relink :: (a->STRef st a) -> (a->STRef st a) -> a -> (ST st) ()
+> relink prev next a = do
+>   before <- read (prev a)
+>   after <- read (next a)
+>   write (next before) a
+>   write (prev after) a
+
+> cover :: (Head st) -> (ST st) ()
+> cover head = do
+>   unlink prev next head
+>   let eachDown rr = forEach right rr eachRight
+>       eachRight nn = do
+>         unlink up down nn
+>         modify (len $ getHead nn) pred
+>   forEach down (topNode head) eachDown
+
+> uncover :: (Head st) -> (ST st) ()
+> uncover head = do
+>   let eachUp rr = forEach left rr eachLeft
+>       eachLeft nn = do
+>         modify (len $ getHead nn) succ
+>         relink up down nn
+>   forEach up (topNode head) eachUp
+>   relink prev next head
+
+> coverOthers :: (Node st) -> (ST st) ()
+> coverOthers node = forEach right node (cover . getHead)
+
+> uncoverOthers :: (Node st) -> (ST st) ()
+> uncoverOthers node = forEach left node (uncover . getHead)
+
+ A helper function for gdance:
+
+> choicesToSpec :: [(Node st)] -> Spec
+> choicesToSpec = Spec . (map getHint)
+
+ This is the heart of the algorithm.  I have altered it to return only
+ the first solution, or produce an error if none is found.
+
+ Knuth used several goto links to do what is done below with tail
+ recursion.
+
+> gdance :: (Head st) -> (ST st) Spec -- [Spec]
+> gdance root =
+>     let
+>         forward choices = do
+>             maybeHead <- getBest root
+>             case maybeHead of
+>                 Nothing -> if null choices
+>                              then error "No choices in forward" -- return [] -- for [Spec]
+>                              else do -- nextSols <- recover choices -- for [Spec]
+>                                      return $ (choicesToSpec choices) -- :nextSols -- for [Spec]
+>                 Just head -> do cover head
+>                                 startRow <- readSTRef (down (topNode head))
+>                                 advance (startRow:choices)
+> 
+>         advance choices@(newRow:oldChoices) = do
+>             let endOfRows = topNode (getHead newRow)
+>             if (newRow == endOfRows)
+>               then do uncover (getHead newRow)
+>                       if (null oldChoices)
+>                         then error "No choices in advance" -- return [] -- for [Spec]
+>                         else recover oldChoices
+>               else do coverOthers newRow
+>                       forward choices
+> 
+>         recover (oldRow:oldChoices) = do
+>             uncoverOthers oldRow
+>             newRow <- readSTRef (down oldRow)
+>             advance (newRow:oldChoices)
+> 
+>     in forward []
+
+
+ Convert a text board into a Spec
+
+> parseBoard :: String -> Spec
+> parseBoard s = Spec (zip rcs vs'check)
+>   where rcs :: [(R,C)]
+>         rcs = [ (R r,C c) | r <- rng, c <- rng ]
+>         isUnset c = (c=='.') || (c==' ') || (c=='0')
+>         isHint c = ('1'<=c) && (c<='9')
+>         cs = take 81 $ filter (\c -> isUnset c || isHint c) s
+>         vs :: [V]
+>         vs = map (\c -> if isUnset c then u else (V $ digitToInt c)) cs
+>         vs'check = if 81==length vs then vs else error ("parse of board failed\n"++s)
+
+ This is quite useful as a utility function which partitions the list into groups of n elements.
+ Used by showSpec.
+
+> groupTake :: Int->[a]->[[a]]
+> groupTake n b = unfoldr foo b
+>     where foo [] = Nothing
+>           foo b = Just (splitAt n b)
+ 
+ Make a nice 2D ascii board from the Spec (not used at the moment)
+
+> showSpec :: Spec -> String
+> showSpec spec = let pa = parse spec
+>                     g = groupTake 9 (map (\(V v) -> if v == 0 then '.' else intToDigit v) $ elems pa)
+>                     addV line = concat $ intersperse "|" (groupTake 3 line)
+>                     addH list = concat $ intersperse ["---+---+---"] (groupTake 3 list)
+>                 in unlines $ addH (map addV g)
+
+  One line display
+
+> showCompact spec = map (\(V v) -> intToDigit v) (elems (parse spec))
+
+ The main routine is designed to handle the input from http://www.csse.uwa.edu.au/~gordon/sudoku17
+
+> main = do
+>   all <- getContents
+>   let puzzles = zip [1..] (map parseBoard (lines all))
+>   root <- stToIO initHA
+>   let act :: (Int,Spec) -> IO ()
+>       act (i,spec) = do
+>         answer <- stToIO (do initRoot root spec 
+>                              answer <- gdance (fst root) 
+>                              resetRoot root
+>                              return answer)
+>         print (i,showCompact  answer)
+>   mapM_ act puzzles
+
+> inits' xn@(_:_) = zipWith take [0..] $ map (const xn) $ undefined:xn
+> inits' _        = undefined
diff --git a/tests/examplefiles/Sudoku.lhs b/tests/examplefiles/Sudoku.lhs
new file mode 100644
index 00000000..6829cf6c
--- /dev/null
+++ b/tests/examplefiles/Sudoku.lhs
@@ -0,0 +1,382 @@
+% Copyright 2005 Brian Alliet
+
+\documentclass[11pt]{article}
+\usepackage{palatino}
+\usepackage{fullpage}
+\usepackage{parskip}
+\usepackage{lhs}
+
+\begin{document}
+
+\title{Sudoku Solver}
+\author{Brian Alliet}
+\maketitle
+
+\ignore{
+\begin{code}
+module Sudoku (
+    Sudoku,
+    makeSudoku, solve, eliminate, analyze, backtrack,
+    main
+    ) where
+
+import Array
+import Monad
+import List (union,intersperse,transpose,(\\),nub,nubBy)
+\end{code}
+}
+
+\section{Introduction}
+
+This Haskell module implements a solver for Sudoku~\footnote{http://en.wikipedia.org/wiki/Sudoku} puzzles. It can solve
+any Sudoku puzzle, even those that require backtracking.
+
+\section{Data Types}
+
+\begin{code}
+data CellState a = Known a | Unknown [a] | Impossible deriving Eq
+\end{code}
+
+Each cell in a Sudoku grid can be in one of three states: ``Known'' if it has a known correct value~\footnote{Actually
+this doesn't always means it is correct. While we are in the backtracking stage we make our guesses ``Known''.},
+``Unknown'' if there is still more than one possible correct value, or ``Impossible'' if there is no value that can
+possibly fit the cell. Sudoku grids with ``Impossible'' cells are quickly discarded by the {\tt solve} function.
+
+\begin{code}
+type Coords = (Int,Int)
+type Grid a = Array Coords (CellState a)
+newtype Sudoku a = Sudoku { unSudoku :: Grid a } deriving Eq
+\end{code}
+
+We represent a Sudoku grid as an Array indexed by integer coordinates. We additionally define a newtype wrapper for the
+grid. The smart constructor, {\tt makeSudoku} verifies some invariants before creating the Sudoku value. All the public
+API functions operate on the Sudoku type.
+
+\begin{code}
+instance Show a => Show (Sudoku a) where showsPrec p = showParen (p>0) . showsGrid . unSudoku
+instance Show a => Show (CellState a) where showsPrec _ = showsCell
+\end{code}
+
+We define {\tt Show} instances for the above types.
+
+\section{Internal Functions}
+
+\begin{code}
+size :: Grid a -> Int
+size = (+1).fst.snd.bounds
+\end{code}
+
+{\tt size} returns the size (the width, height, and number of subboxes) for a Sudoku grid. We ensure Grid's are always
+square and indexed starting at $(0,0)$ so simply incrementing either of the array's upper bounds is correct.
+
+\begin{code}
+getRow,getCol,getBox :: Grid a -> Int -> [(Coords,CellState a)]
+getRow grid r = [let l = (r,c) in (l,grid!l)|c <- [0..size grid - 1]]
+getCol grid c = [let l = (r,c) in (l,grid!l)|r <- [0..size grid - 1]]
+getBox grid b = [let l = (r,c) in (l,grid!l)|r <- [boxR..boxR+boxN-1],c <- [boxC..boxC+boxN-1]]
+    where
+        boxN = intSqrt (size grid); boxR = b `quot` boxN * boxN; boxC = b `rem`  boxN * boxN
+
+getBoxOf :: Grid a -> Coords -> [(Coords,CellState a)]
+getBoxOf grid (r,c) = grid `getBox` ((r `quot` boxN * boxN) + (c `quot` boxN))
+    where boxN = intSqrt (size grid)
+\end{code}
+
+{\tt getRow}, {\tt getCol}, and {\tt getBox} return the coordinates and values of the cell in row, column, or box
+number {\tt n}, {\tt r}, or {\tt b}.
+
+\begin{code}
+getNeighbors :: Eq a => Grid a -> Coords -> [(Coords,CellState a)]
+getNeighbors grid l@(r,c) = filter ((/=l).fst) 
+                          $ foldr (union.($grid)) [] 
+                          [(`getRow`r),(`getCol`c),(`getBoxOf`l)]
+\end{code}
+
+{\tt getNeighbors} returns the coordinates and values of all the neighbors of this cell.
+
+\begin{code}
+impossible :: Eq a => Grid a -> Coords -> [a]
+impossible grid l = map snd $ justKnowns $ grid `getNeighbors` l
+\end{code}
+
+{\tt impossible} returns a list of impossible values for a given cell. The impossible values consist of the values any
+``Known'' neighbors.
+
+\begin{code}
+justUnknowns :: [(Coords,CellState a)] -> [(Coords,[a])]
+justUnknowns = foldr (\c -> case c of (p,Unknown xs) -> ((p,xs):); _ -> id) []
+
+justKnowns :: [(Coords,CellState a)] -> [(Coords,a)]
+justKnowns = foldr (\c -> case c of (p,Known x) -> ((p,x):); _ -> id) []
+\end{code}
+
+{\tt justUnknowns} and {\tt justKnowns} return only the Known or Unknown values (with the constructor stripped off)
+from a list of cells.
+
+\begin{code}
+updateGrid :: Grid a -> [(Coords,CellState a)] -> Maybe (Grid a)
+updateGrid _ [] = Nothing
+updateGrid grid xs = Just $ grid // nubBy (\(x,_) (y,_) -> x==y) xs
+\end{code}
+
+{\tt updateGrid} applies a set of updates to a grid and returns the new grid only if it was updated.
+
+\section{Public API}
+
+\begin{code}
+makeSudoku :: (Num a, Ord a, Enum a) => [[a]] -> Sudoku a
+makeSudoku xs
+    | not (all ((==size).length) xs) = error "error not a square"
+    | (intSqrt size)^(2::Int) /= size = error "error dims aren't perfect squares"
+    | any (\x -> x < 0 || x > fromIntegral size) (concat xs) = error "value out of range"
+    | otherwise = Sudoku (listArray ((0,0),(size-1,size-1)) states)
+    where
+        size = length xs
+        states = map f (concat xs)
+        f 0 = Unknown [1..fromIntegral size]
+        f x = Known x
+\end{code}
+
+{\tt makeSudoku} makes a {\tt Sudoku} value from a list of numbers. The given matrix must be square and have dimensions
+that are a perfect square. The possible values for each cell range from 1 to the dimension of the square with ``0''
+representing unknown values.\footnote{The rest of the code doesn't depend on any of this weird ``0'' is unknown
+representation. In fact, it doesn't depend on numeric values at all. ``0'' is just used here because it makes
+representing grids in Haskell source code easier.}
+
+\begin{code}
+eliminate :: Eq a => Sudoku a -> Maybe (Sudoku a)
+eliminate (Sudoku grid) = fmap Sudoku $ updateGrid grid changes >>= sanitize
+    where
+        changes = concatMap findChange $ assocs grid
+        findChange (l,Unknown xs) 
+            = map ((,) l) 
+            $ case filter (not.(`elem`impossible grid l)) xs of
+                [] -> return Impossible
+                [x] -> return $ Known x
+                xs'
+                    | xs' /= xs -> return $ Unknown xs'
+                    | otherwise -> mzero
+        findChange _ = mzero
+        sanitize grid = return $ grid // [(l,Impossible) | 
+            (l,x) <- justKnowns changes, x `elem` impossible grid l]
+\end{code}
+
+The {\tt eliminate} phase tries to remove possible choices for ``Unknowns'' based on ``Known'' values in the same row,
+column, or box as the ``Unknown'' value. For each cell on the grid we find its ``neighbors'', that is, cells in the
+same row, column, or box. Out of those neighbors we get a list of all the ``Known'' values. We can eliminate all of
+these from our list of candidates for this cell. If we're lucky enough to eliminate all the candidates but one we have
+a new ``Known'' value. If we're unlucky enough to have eliminates {\bf all} the possible candidates we have a new
+``Impossible'' value.
+
+After iterating though every cell we make one more pass looking for conflicting changes. {\tt sanitize} marks cells as
+``Impossible'' if we have conflicting ``Known'' values.
+
+\begin{code}
+analyze :: Eq a => Sudoku a -> Maybe (Sudoku a)
+analyze (Sudoku grid) = fmap Sudoku $ updateGrid grid $ nub [u |
+            f <- map ($grid) [getRow,getCol,getBox],
+            n <- [0..size grid - 1],
+            u <- unique (f n)]
+    where
+        unique xs = foldr f [] $ foldr (union.snd) [] unknowns \\ map snd (justKnowns xs)
+            where
+                unknowns = justUnknowns xs
+                f c = case filter ((c`elem`).snd) unknowns of
+                    [(p,_)] -> ((p,Known c):)
+                    _ -> id
+\end{code}
+
+The {\tt analyze} phase tries to turn ``Unknowns'' into ``Knowns'' when a certain ``Unknown'' is the only cell that
+contains a value needed in a given row, column, or box. We apply each of the functions {\tt getRow}, {\tt getCol}, and
+{\tt getBox} to all the indices on the grid, apply {\tt unique} to each group, and update the array with the
+results. {\tt unique}  gets a list of all  the unknown cells in the group and finds all the unknown values in each of
+those cells. Each of these values are iterated though looking for a value that is only contained in one cell. If such a
+value is found the cell containing it must be that value.
+
+\begin{code}
+backtrack :: (MonadPlus m, Eq a) => Sudoku a -> m (Sudoku a)
+backtrack (Sudoku grid) = case (justUnknowns (assocs grid)) of
+    [] -> return $ Sudoku grid
+    ((p,xs):_) -> msum $ map (\x -> solve $ Sudoku $ grid // [(p,Known x)]) xs
+\end{code}
+
+Sometimes the above two phases still aren't enough to solve a puzzle. For these rare puzzles backtracking is required.
+We attempt to solve the puzzle by replacing the first ``Unknown'' value with each of the candidate values and solving
+the resulting puzzles. Hopefully at least one of our choices will result in a solvable puzzle.
+
+We could actually solve any puzzle using backtracking alone, although this would be very inefficient. The above
+functions simplify most puzzles enough that the backtracking phase has to do hardly any work.
+
+\begin{code}
+solve :: (MonadPlus m, Eq a) => Sudoku a -> m (Sudoku a)
+solve sudoku = 
+    case eliminate sudoku of
+        Just new 
+            | any (==Impossible) (elems (unSudoku new))-> mzero
+            | otherwise -> solve new
+        Nothing -> case analyze sudoku of
+            Just new -> solve new
+            Nothing -> backtrack sudoku
+\end{code}
+
+{\tt solve} glues all the above phases together. First we run the {\tt eliminate} phase. If that found the puzzle  to
+be unsolvable we abort immediately. If {\tt eliminate} changed the grid we go though the {\tt eliminate} phase again
+hoping to eliminate more. Once {\tt eliminate} can do no more work we move on to the {\tt analyze} phase. If this
+succeeds in doing some work we start over again with the {\tt eliminate} phase. Once {\tt analyze} can do no more work
+we have no choice but to resort to backtracking. (However in most cases backtracking won't actually do anything because
+the puzzle is already solved.)
+
+\begin{code}
+showsCell :: Show a => CellState a -> ShowS
+showsCell (Known x) = shows x
+showsCell (Impossible) = showChar 'X'
+showsCell (Unknown xs) = \rest -> ('(':) 
+                       $ foldr id (')':rest)
+                       $ intersperse (showChar ' ')
+                       $ map shows xs
+\end{code}
+
+{\tt showCell} shows a cell.
+
+\begin{code}
+showsGrid :: Show a => Grid a -> ShowS
+showsGrid grid = showsTable [[grid!(r,c) | c <- [0..size grid-1]] | r <- [0..size grid-1]]
+\end{code}
+
+{\tt showGrid} show a grid.
+
+\begin{code}
+-- FEATURE: This is pretty inefficient
+showsTable :: Show a => [[a]] -> ShowS
+showsTable xs = (showChar '\n' .) $ showString $ unlines $ map (concat . intersperse " ") xs''
+    where
+        xs' = (map.map) show xs
+        colWidths = map (max 2 . maximum . map length) (transpose xs')
+        xs'' = map (zipWith (\n s -> s ++ (replicate (n - length s) ' ')) colWidths) xs'
+\end{code}
+
+{\tt showsTable} shows a table (or matrix). Every column has the same width so things line up.
+
+\begin{code}
+intSqrt :: Integral a => a -> a
+intSqrt n
+    | n < 0 = error "intSqrt: negative n"
+    | otherwise = f n
+    where
+        f x = if y < x then f y else x
+            where y = (x + (n `quot` x)) `quot` 2
+\end{code}
+
+{\tt intSqrt} is Newton`s Iteration for finding integral square roots.
+
+\ignore{
+\begin{code}
+test :: Sudoku Int
+test = makeSudoku [
+    [0,6,0,1,0,4,0,5,0],
+    [0,0,8,3,0,5,6,0,0],
+    [2,0,0,0,0,0,0,0,1],
+    [8,0,0,4,0,7,0,0,6],
+    [0,0,6,0,0,0,3,0,0],
+    [7,0,0,9,0,1,0,0,4],
+    [5,0,0,0,0,0,0,0,2],
+    [0,0,7,2,0,6,9,0,0],
+    [0,4,0,5,0,8,0,7,0]]
+
+test2 :: Sudoku Int
+test2 = makeSudoku [
+    [0,7,0,0,0,0,8,0,0],
+    [0,0,0,2,0,4,0,0,0],
+    [0,0,6,0,0,0,0,3,0],
+    [0,0,0,5,0,0,0,0,6],
+    [9,0,8,0,0,2,0,4,0],
+    [0,5,0,0,3,0,9,0,0],
+    [0,0,2,0,8,0,0,6,0],
+    [0,6,0,9,0,0,7,0,1],
+    [4,0,0,0,0,3,0,0,0]]
+
+testSmall :: Sudoku Int
+testSmall = makeSudoku [
+    [1,0,0,0,0,0,0,0,0],
+    [0,0,2,7,4,0,0,0,0],
+    [0,0,0,5,0,0,0,0,4],
+    [0,3,0,0,0,0,0,0,0],
+    [7,5,0,0,0,0,0,0,0],
+    [0,0,0,0,0,9,6,0,0],
+    [0,4,0,0,0,6,0,0,0],
+    [0,0,0,0,0,0,0,7,1],
+    [0,0,0,0,0,1,0,3,0]]
+
+testHard :: Sudoku Int
+testHard = makeSudoku [
+    [0,0,0,8,0,2,0,0,0],
+    [5,0,0,0,0,0,0,0,1],
+    [0,0,6,0,5,0,3,0,0],
+    [0,0,9,0,1,0,8,0,0],
+    [1,0,0,0,0,0,0,0,2],
+    [0,0,0,9,0,7,0,0,0],
+    [0,6,1,0,3,0,7,8,0],
+    [0,5,0,0,0,0,0,4,0],
+    [0,7,2,0,4,0,1,5,0]]
+
+testHard2 :: Sudoku Int
+testHard2 = makeSudoku [
+    [3,0,0,2,0,0,9,0,0],
+    [0,0,0,0,0,0,0,0,5],
+    [0,7,0,1,0,4,0,0,0],
+    [0,0,9,0,0,0,8,0,0],
+    [5,0,0,0,7,0,0,0,6],
+    [0,0,1,0,0,0,2,0,0],
+    [0,0,0,3,0,9,0,4,0],
+    [8,0,0,0,0,0,0,0,0],
+    [0,0,6,0,0,5,0,0,7]]
+
+testHW :: Sudoku Int
+testHW = makeSudoku [
+    [0,0,0,1,0,0,7,0,2],    
+    [0,3,0,9,5,0,0,0,0],
+    [0,0,1,0,0,2,0,0,3],
+    [5,9,0,0,0,0,3,0,1],
+    [0,2,0,0,0,0,0,7,0],
+    [7,0,3,0,0,0,0,9,8],
+    [8,0,0,2,0,0,1,0,0],
+    [0,0,0,0,8,5,0,6,0],
+    [6,0,5,0,0,9,0,0,0]]
+
+testTough :: Sudoku Int
+testTough = makeSudoku $ map (map read . words) $ lines $
+ "8 3 0  0 0 0  0 4 6\n"++
+ "0 2 0  1 0 4  0 3 0\n"++
+ "0 0 0  0 0 0  0 0 0\n"++
+ "0 0 2  9 0 6  5 0 0\n"++
+ "1 4 0  0 0 0  0 2 3\n"++
+ "0 0 5  4 0 3  1 0 0\n"++
+ "0 0 0  0 0 0  0 0 0\n"++
+ "0 6 0  3 0 8  0 7 0\n"++
+ "9 5 0  0 0 0  0 6 2\n"
+
+testDiabolical :: Sudoku Int 
+testDiabolical = makeSudoku $ map (map read . words) $ lines $
+  "8 0 0  7 0 1  0 0 2\n"++
+  "0 0 6  0 0 0  7 0 0\n"++
+  "0 1 7  0 0 0  8 9 0\n"++
+  "0 0 0  1 7 3  0 0 0\n"++
+  "7 0 0  0 0 0  0 0 6\n"++
+  "0 0 0  9 5 6  0 0 0\n"++
+  "0 9 5  0 0 0  4 1 0\n"++
+  "0 0 8  0 0 0  5 0 0\n"++
+  "3 0 0  6 0 5  0 0 7\n"
+
+main :: IO ()
+main = do
+    let
+        solve' p = case solve p of
+            [] -> fail $ "couldn't solve: " ++ show p
+            sols -> return sols
+    mapM_ (\p -> solve' p >>= putStrLn.show) [test,test2,testSmall,testHard,testHard2,testHW,testTough,testDiabolical]
+    return ()
+
+\end{code}
+}
+
+\end{document}
author	gbrandl <devnull@localhost>	2007-10-14 10:54:20 +0200
committer	gbrandl <devnull@localhost>	2007-10-14 10:54:20 +0200
commit	8fbd90da2ebaf258d3a2acfb4a608266ec44f70b (patch)
tree	98df55bf1845168b5c05d02c77e9ae815d723cbf
parent	fab2810cb986997bb5bca2c2a8b6ca846e364321 (diff)
download	pygments-8fbd90da2ebaf258d3a2acfb4a608266ec44f70b.tar.gz