diff options
author | gbrandl <devnull@localhost> | 2007-10-14 10:54:20 +0200 |
---|---|---|
committer | gbrandl <devnull@localhost> | 2007-10-14 10:54:20 +0200 |
commit | 8fbd90da2ebaf258d3a2acfb4a608266ec44f70b (patch) | |
tree | 98df55bf1845168b5c05d02c77e9ae815d723cbf | |
parent | fab2810cb986997bb5bca2c2a8b6ca846e364321 (diff) | |
download | pygments-8fbd90da2ebaf258d3a2acfb4a608266ec44f70b.tar.gz |
Add literate Haskell lexer.
-rw-r--r-- | CHANGES | 4 | ||||
-rw-r--r-- | pygments/lexers/_mapping.py | 3 | ||||
-rw-r--r-- | pygments/lexers/functional.py | 80 | ||||
-rw-r--r-- | tests/examplefiles/DancingSudoku.lhs | 411 | ||||
-rw-r--r-- | tests/examplefiles/Sudoku.lhs | 382 |
5 files changed, 875 insertions, 5 deletions
@@ -3,7 +3,7 @@ Pygments changelog Version 0.9 ----------- -(codename Herbstzeitlose, released Sep XX, 2007) +(codename Herbstzeitlose, released Oct 14, 2007) - The encoding handling of the command line mode (pygmentize) was enhanced. You shouldn't get UnicodeErrors from it anymore if you @@ -26,6 +26,8 @@ Version 0.9 - Greatly improved the Haskell and OCaml lexers. +- Added a lexer for Literate Haskell. + - The C# and Java lexers exhibited abysmal performance with some input code; this should now be fixed. diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index 3f0c4552..1fe8b3e4 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -46,7 +46,7 @@ LEXERS = { 'GenshiTextLexer': ('pygments.lexers.templates', 'Genshi Text', ('genshitext',), (), ('application/x-genshi-text', 'text/x-genshi')), 'GettextLexer': ('pygments.lexers.text', 'Gettext Catalog', ('pot', 'po'), ('*.pot', '*.po'), ('application/x-gettext', 'text/x-gettext', 'text/gettext')), 'GroffLexer': ('pygments.lexers.text', 'Groff', ('groff', 'nroff', 'man'), ('*.[1234567]', '*.man'), ('application/x-troff', 'text/troff')), - 'HaskellLexer': ('pygments.lexers.functional', 'Haskell', ('haskell', 'hs'), ('*.hs',), ()), + 'HaskellLexer': ('pygments.lexers.functional', 'Haskell', ('haskell', 'hs'), ('*.hs',), ('text/x-haskell',)), 'HtmlDjangoLexer': ('pygments.lexers.templates', 'HTML+Django/Jinja', ('html+django', 'html+jinja'), (), ('text/html+django', 'text/html+jinja')), 'HtmlGenshiLexer': ('pygments.lexers.templates', 'HTML+Genshi', ('html+genshi', 'html+kid'), (), ('text/html+genshi',)), 'HtmlLexer': ('pygments.lexers.web', 'HTML', ('html',), ('*.html', '*.htm', '*.xhtml', '*.xslt'), ('text/html', 'application/xhtml+xml')), @@ -62,6 +62,7 @@ LEXERS = { 'JavascriptPhpLexer': ('pygments.lexers.templates', 'JavaScript+PHP', ('js+php', 'javascript+php'), (), ('application/x-javascript+php', 'text/x-javascript+php', 'text/javascript+php')), 'JavascriptSmartyLexer': ('pygments.lexers.templates', 'JavaScript+Smarty', ('js+smarty', 'javascript+smarty'), (), ('application/x-javascript+smarty', 'text/x-javascript+smarty', 'text/javascript+smarty')), 'JspLexer': ('pygments.lexers.templates', 'Java Server Page', ('jsp',), ('*.jsp',), ('application/x-jsp',)), + 'LiterateHaskellLexer': ('pygments.lexers.functional', 'Literate Haskell', ('lhs', 'literate-haskell'), ('*.lhs',), ('text/x-literate-haskell',)), 'LlvmLexer': ('pygments.lexers.asm', 'LLVM', ('llvm',), ('*.ll',), ('text/x-llvm',)), 'LuaLexer': ('pygments.lexers.agile', 'Lua', ('lua',), ('*.lua',), ('text/x-lua', 'application/x-lua')), 'MOOCodeLexer': ('pygments.lexers.other', 'MOOCode', ('moocode',), ('*.moo',), ('text/x-moocode',)), diff --git a/pygments/lexers/functional.py b/pygments/lexers/functional.py index 78541ff5..626910a3 100644 --- a/pygments/lexers/functional.py +++ b/pygments/lexers/functional.py @@ -16,12 +16,14 @@ try: except NameError: from sets import Set as set -from pygments.lexer import RegexLexer, bygroups, using, this, include +from pygments.lexer import Lexer, RegexLexer, bygroups, using, this, include, \ + do_insertions from pygments.token import Text, Comment, Operator, Keyword, Name, \ String, Number, Punctuation -__all__ = ['SchemeLexer', 'HaskellLexer', 'OcamlLexer', 'ErlangLexer'] +__all__ = ['SchemeLexer', 'HaskellLexer', 'LiterateHaskellLexer', + 'OcamlLexer', 'ErlangLexer'] class SchemeLexer(RegexLexer): @@ -161,6 +163,7 @@ class HaskellLexer(RegexLexer): name = 'Haskell' aliases = ['haskell', 'hs'] filenames = ['*.hs'] + mimetypes = ['text/x-haskell'] reserved = ['case','class','data','default','deriving','do','else', 'if','in','infix[lr]?','instance', @@ -187,7 +190,9 @@ class HaskellLexer(RegexLexer): (r'[A-Z][\w\']*', Keyword.Type), # Operators (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator - (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), + (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials + (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators + (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators # Numbers (r'\d+[eE][+-]?\d+', Number.Float), (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), @@ -266,6 +271,75 @@ class HaskellLexer(RegexLexer): } +line_re = re.compile('.*?\n') +bird_re = re.compile(r'(>[ \t]*)(.*\n)') + +class LiterateHaskellLexer(Lexer): + """ + For Literate Haskell (Bird-style or LaTeX) source. + + Additional options accepted: + + `litstyle` + If given, must be ``"bird"`` or ``"latex"``. If not given, the style + is autodetected: if the first non-whitespace character in the source + is a backslash or percent character, LaTeX is assumed, else Bird. + + *New in Pygments 0.9.* + """ + name = 'Literate Haskell' + aliases = ['lhs', 'literate-haskell'] + filenames = ['*.lhs'] + mimetypes = ['text/x-literate-haskell'] + + def get_tokens_unprocessed(self, text): + hslexer = HaskellLexer(**self.options) + + style = self.options.get('litstyle') + if style is None: + style = (text.lstrip()[0] in '%\\') and 'latex' or 'bird' + + code = '' + insertions = [] + if style == 'bird': + # bird-style + for match in line_re.finditer(text): + line = match.group() + m = bird_re.match(line) + if m: + insertions.append((len(code), [(0, Comment.Special, m.group(1))])) + code += m.group(2) + else: + insertions.append((len(code), [(0, Text, line)])) + else: + # latex-style + from pygments.lexers.text import TexLexer + lxlexer = TexLexer(**self.options) + + codelines = 0 + latex = '' + for match in line_re.finditer(text): + line = match.group() + if codelines: + if line.lstrip().startswith('\\end{code}'): + codelines = 0 + latex += line + else: + code += line + elif line.lstrip().startswith('\\begin{code}'): + codelines = 1 + latex += line + insertions.append((len(code), + list(lxlexer.get_tokens_unprocessed(latex)))) + latex = '' + else: + latex += line + insertions.append((len(code), + list(lxlexer.get_tokens_unprocessed(latex)))) + for item in do_insertions(insertions, hslexer.get_tokens_unprocessed(code)): + yield item + + class OcamlLexer(RegexLexer): """ For the OCaml language. diff --git a/tests/examplefiles/DancingSudoku.lhs b/tests/examplefiles/DancingSudoku.lhs new file mode 100644 index 00000000..368ab8e5 --- /dev/null +++ b/tests/examplefiles/DancingSudoku.lhs @@ -0,0 +1,411 @@ + A Sukodku solver by Chris Kuklewicz (haskell (at) list (dot) mightyreason (dot) com) + The usual BSD license applies, copyright 2006. + Uploaded to HaskellWiki as DancingSudoku.lhs + + I compile on a powerbook G4 (Mac OS X, ghc 6.4.2) using + ghc -optc-O3 -funbox-strict-fields -O2 --make -fglasgow-exts + + This is a translation of Knuth's GDANCE from dance.w / dance.c + + http://www-cs-faculty.stanford.edu/~uno/preprints.html + http://www-cs-faculty.stanford.edu/~uno/programs.html + http://en.wikipedia.org/wiki/Dancing_Links + + I have an older verison that uses lazy ST to return the solutions on + demand, which was more useful when trying to generate new puzzles to + solve. + +> module Main where + +> import Prelude hiding (read) +> import Control.Monad +> import Control.Monad.Fix +> import Data.Array.IArray +> import Control.Monad.ST.Strict +> import Data.STRef.Strict +> import Data.Char(intToDigit,digitToInt) +> import Data.List(unfoldr,intersperse,inits) + +> new = newSTRef +> {-# INLINE new #-} +> read = readSTRef +> {-# INLINE read #-} +> write = writeSTRef +> {-# INLINE write #-} +> modify = modifySTRef +> {-# INLINE modify #-} + + Data types to prevent mixing different index and value types + +> type A = Int +> newtype R = R A deriving (Show,Read,Eq,Ord,Ix,Enum) +> newtype C = C A deriving (Show,Read,Eq,Ord,Ix,Enum) +> newtype V = V A deriving (Show,Read,Eq,Ord,Ix,Enum) +> newtype B = B A deriving (Show,Read,Eq,Ord,Ix,Enum) + + Sudoku also has block constraints, so we want to look up a block + index in an array: + +> lookupBlock :: Array (R,C) B +> lookupBlock = listArray bb [ toBlock ij | ij <- range bb ] +> where ra :: Array Int B +> ra = listArray (0,pred (rangeSize b)) [B (fst b) .. B (snd b)] +> toBlock (R i,C j) = ra ! ( (div (index b j) 3)+3*(div (index b i) 3) ) + + The values for an unknown location is 'u'. + The bound and range are given by b and rng. And bb is a 2D bound. + +> u = V 0 -- unknown value +> b :: (Int,Int) +> b = (1,9) -- min and max bounds +> rng = enumFromTo (fst b) (snd b) -- list from '1' to '9' +> bb = ((R (fst b),C (fst b)),(R (snd b),C (snd b))) + + A Spec can be turned into a parsed array with ease: + +> type Hint = ((R,C),V) +> newtype Spec = Spec [Hint] deriving (Eq,Show) + +> type PA = Array (R,C) V + +> parse :: Spec -> PA +> parse (Spec parsed) = let acc old new = new +> in accumArray acc u bb parsed + + The dancing links algorithm depends on a sparse 2D node structure. + Each column represents a constraint. Each row represents a Hint. + The number of possible hints is 9x9x9 = 271 + +> type (MutInt st) = (STRef st) Int + + The pointer types: + +> type (NodePtr st) = (STRef st) (Node st) +> type (HeadPtr st) = (STRef st) (Head st) + + The structures is a 2D grid of nodes, with Col's on the top of + columns and a sparse collection of nodes. Note that topNode of Head + is not a strict field. This is because the topNode needs to refer to + the Head, and they are both created monadically. + +> type HeadName = (Int,Int,Int) -- see below for meaning + +> data Head st = Head {headName:: !HeadName +> ,topNode:: (Node st) -- header node for this column +> ,len:: !(MutInt st) -- number of nodes below this head +> ,next,prev:: !(HeadPtr st) -- doubly-linked list +> } + +> data Node st = Node {getHint:: !Hint +> ,getHead:: !(Head st) -- head for the column this node is in +> ,up,down,left,right :: !(NodePtr st) -- two doubly-linked lists +> } + +> instance Eq (Head st) where +> a == b = headName a == headName b + +> instance Eq (Node st) where +> a == b = up a == up b + + To initialize the structures is a bit tedious. Knuth's code reads in + the problem description from a data file and builds the structure + based on that. Rather than short strings, I will use HeadName as the + identifier. + + The columns are (0,4,5) for nodes that put some value in Row 4 Col 5 + (1,2,3) for nodes that put Val 3 in Row 2 and some column + (2,7,4) for nodes that put Val 4 in Col 7 and some row + (3,1,8) for nodes that put Val 8 in some (row,column) in Block 1 + + The first head is (0,0,0) which is the root. The non-root head data + will be put in an array with the HeadName as an index. + +> headNames :: [HeadName] +> headNames = let names = [0,1,2,3] +> in (0,0,0):[ (l,i,j) | l<-names,i<-rng,j<-rng] + + A "row" of left-right linked nodes is a move. It is defined by a + list of head names. + +> type Move = [(Hint,HeadName)] + + Initial hints are enforced by making them the only legal move for + that location. Blank entries with value 'u = V 0' have a move for + all possible values [V 1..V 9]. + +> parseSpec :: Spec -> [Move] +> parseSpec spec = +> let rowsFrom :: Hint -> [Move] +> rowsFrom (rc@(R r,C c),mv@(V v')) = +> if mv == u then [ rsyms v | v <- rng ] +> else [ rsyms v' ] +> where (B b) = lookupBlock ! rc +> rsyms :: A -> Move +> rsyms v = map ( (,) (rc,V v) ) [(0,r,c),(1,r,v),(2,c,v),(3,b,v)] +> in concatMap rowsFrom (assocs (parse spec)) + + mkDList creates doubly linked lists using a monadic smart + constructor and the recursive "mdo" notation as documented at + http://www.haskell.org/ghc/docs/latest/html/users_guide/syntax-extns.html#mdo-notation + http://www.cse.ogi.edu/PacSoft/projects/rmb/ + + For more fun with this, see the wiki page at + http://haskell.org/hawiki/TyingTheKnot + +> mkDList :: (MonadFix m) => (b -> a -> b -> m b) -> [a] -> m b +> mkDList _ [] = error "must have at least one element" +> mkDList mkNode xs = mdo (first,last) <- go last xs first +> return first +> where go prev [] next = return (next,prev) +> go prev (x:xs) next = mdo this <- mkNode prev x rest +> (rest,last) <- go this xs next +> return (this,last) + + toSimple takes a function and a header node and iterates (read . function) + until the header is reached again, but does not return the header + itself. + +> toSingle step header = loop =<< (read . step) header +> where loop y = if header/=y then liftM (y:) (read (step y) >>= loop) +> else return [] +> + + forEach is an optimization of (toSimple step header >>= mapM_ act) + +> forEach step header act = loop =<< (read . step) header +> where loop y = if header/=y then (act y >> (read (step y)) >>= loop) +> else return () + + Now make the root node and all the head nodes. This also exploits mdo: + +> makeHeads :: [HeadName] -> (ST st) (Head st) +> makeHeads names = mkDList makeHead names +> where makeHead before name after = mdo +> ~newTopNode <- liftM4 (Node ((R 0,C 0),V 0) newHead) (new newTopNode) (new newTopNode) +> (new newTopNode) (new newTopNode) +> newHead <- liftM3 (Head name newTopNode) +> (new 0) (new after) (new before) +> return newHead + + The Head nodes will be places in an array for easy lookup while building moves: + +> type HArray st = Array HeadName (Head st) +> hBounds = ((0,1,1),(3,9,9)) +> type Root st = (Head st,HArray st) + + The addMove function creates the (four) nodes that represent a move and adds + them to the data structure. The HArray in Root makes for a fast + lookup of the Head data. + +> addMove :: forall st. (Root st) -> Move -> (ST st) (Node st) +> addMove (_,ha) move = mkDList addNode move +> where addNode :: (Node st) -> (Hint,HeadName) -> (Node st) -> (ST st) (Node st) +> addNode before (hint,name) after = do +> let head = ha ! name +> let below = topNode head +> above <- read (up below) +> newNode <- liftM4 (Node hint head) (new above) (new below) +> (new before) (new after) +> write (down above) newNode +> write (up below) newNode +> modify (len head) succ +> l <- read (len head) +> seq l (return newNode) + + Create the column headers, including the fast lookup array. These + will be resused between puzzles. + +> initHA :: (ST st) (Root st) +> initHA = do +> root <- makeHeads headNames +> heads <- toSingle next root +> let ha = array hBounds (zip (map headName heads) heads) +> return (root,ha) + + Take the Root from initHA and a puzzle Spec and fill in all the Nodes. + +> initRoot :: (Root st) -> Spec -> (ST st) () +> initRoot root spec = do +> let moves = parseSpec spec +> mapM_ (addMove root) moves + + Return the column headers to their condition after initHA + +> resetRoot :: (Root st) -> (ST st) () +> resetRoot (root,ha) = do +> let heads@(first:_) = elems ha +> let resetHead head = do +> write (len head) 0 +> let node = topNode head +> write (down node) node +> write (up node) node +> reset (last:[]) = do +> write (prev root) last +> write (next root) first +> reset (before:xs@(head:[])) = do +> resetHead head +> write (prev head) before +> write (next head) root +> reset xs +> reset (before:xs@(head:after:_)) = do +> resetHead head +> write (prev head) before +> write (next head) after +> reset xs +> reset (root:heads) + + getBest iterates over the unmet constraints (i.e. the Head that are + reachable from root). It locates the one with the lowest number of + possible moves that will solve it, aborting early if it finds 0 or 1 + moves. + +> getBest :: (Head st) -> (ST st) (Maybe (Head st)) +> getBest root = do +> first <- read (next root) +> if first == root then return Nothing +> else do +> let findMin m best head | head == root = return (Just best) +> | otherwise = do +> l <- read (len head) +> if l <= 1 then return (Just head) +> else if l < m then findMin l head =<< read (next head) +> else findMin l best =<< read (next head) +> findMin 10 first first + + The unlink and relink operations are from where Knuth got the name + "dancing links". So long as "a" does not change in between, the + relink call will undo the unlink call. Similarly, the unconver will + undo the changes of cover and unconverOthers will undo coverOthers. + +> unlink :: (a->STRef st a) -> (a->STRef st a) -> a -> (ST st) () +> unlink prev next a = do +> before <- read (prev a) +> after <- read (next a) +> write (next before) after +> write (prev after) before + +> relink :: (a->STRef st a) -> (a->STRef st a) -> a -> (ST st) () +> relink prev next a = do +> before <- read (prev a) +> after <- read (next a) +> write (next before) a +> write (prev after) a + +> cover :: (Head st) -> (ST st) () +> cover head = do +> unlink prev next head +> let eachDown rr = forEach right rr eachRight +> eachRight nn = do +> unlink up down nn +> modify (len $ getHead nn) pred +> forEach down (topNode head) eachDown + +> uncover :: (Head st) -> (ST st) () +> uncover head = do +> let eachUp rr = forEach left rr eachLeft +> eachLeft nn = do +> modify (len $ getHead nn) succ +> relink up down nn +> forEach up (topNode head) eachUp +> relink prev next head + +> coverOthers :: (Node st) -> (ST st) () +> coverOthers node = forEach right node (cover . getHead) + +> uncoverOthers :: (Node st) -> (ST st) () +> uncoverOthers node = forEach left node (uncover . getHead) + + A helper function for gdance: + +> choicesToSpec :: [(Node st)] -> Spec +> choicesToSpec = Spec . (map getHint) + + This is the heart of the algorithm. I have altered it to return only + the first solution, or produce an error if none is found. + + Knuth used several goto links to do what is done below with tail + recursion. + +> gdance :: (Head st) -> (ST st) Spec -- [Spec] +> gdance root = +> let +> forward choices = do +> maybeHead <- getBest root +> case maybeHead of +> Nothing -> if null choices +> then error "No choices in forward" -- return [] -- for [Spec] +> else do -- nextSols <- recover choices -- for [Spec] +> return $ (choicesToSpec choices) -- :nextSols -- for [Spec] +> Just head -> do cover head +> startRow <- readSTRef (down (topNode head)) +> advance (startRow:choices) +> +> advance choices@(newRow:oldChoices) = do +> let endOfRows = topNode (getHead newRow) +> if (newRow == endOfRows) +> then do uncover (getHead newRow) +> if (null oldChoices) +> then error "No choices in advance" -- return [] -- for [Spec] +> else recover oldChoices +> else do coverOthers newRow +> forward choices +> +> recover (oldRow:oldChoices) = do +> uncoverOthers oldRow +> newRow <- readSTRef (down oldRow) +> advance (newRow:oldChoices) +> +> in forward [] + + + Convert a text board into a Spec + +> parseBoard :: String -> Spec +> parseBoard s = Spec (zip rcs vs'check) +> where rcs :: [(R,C)] +> rcs = [ (R r,C c) | r <- rng, c <- rng ] +> isUnset c = (c=='.') || (c==' ') || (c=='0') +> isHint c = ('1'<=c) && (c<='9') +> cs = take 81 $ filter (\c -> isUnset c || isHint c) s +> vs :: [V] +> vs = map (\c -> if isUnset c then u else (V $ digitToInt c)) cs +> vs'check = if 81==length vs then vs else error ("parse of board failed\n"++s) + + This is quite useful as a utility function which partitions the list into groups of n elements. + Used by showSpec. + +> groupTake :: Int->[a]->[[a]] +> groupTake n b = unfoldr foo b +> where foo [] = Nothing +> foo b = Just (splitAt n b) + + Make a nice 2D ascii board from the Spec (not used at the moment) + +> showSpec :: Spec -> String +> showSpec spec = let pa = parse spec +> g = groupTake 9 (map (\(V v) -> if v == 0 then '.' else intToDigit v) $ elems pa) +> addV line = concat $ intersperse "|" (groupTake 3 line) +> addH list = concat $ intersperse ["---+---+---"] (groupTake 3 list) +> in unlines $ addH (map addV g) + + One line display + +> showCompact spec = map (\(V v) -> intToDigit v) (elems (parse spec)) + + The main routine is designed to handle the input from http://www.csse.uwa.edu.au/~gordon/sudoku17 + +> main = do +> all <- getContents +> let puzzles = zip [1..] (map parseBoard (lines all)) +> root <- stToIO initHA +> let act :: (Int,Spec) -> IO () +> act (i,spec) = do +> answer <- stToIO (do initRoot root spec +> answer <- gdance (fst root) +> resetRoot root +> return answer) +> print (i,showCompact answer) +> mapM_ act puzzles + +> inits' xn@(_:_) = zipWith take [0..] $ map (const xn) $ undefined:xn +> inits' _ = undefined diff --git a/tests/examplefiles/Sudoku.lhs b/tests/examplefiles/Sudoku.lhs new file mode 100644 index 00000000..6829cf6c --- /dev/null +++ b/tests/examplefiles/Sudoku.lhs @@ -0,0 +1,382 @@ +% Copyright 2005 Brian Alliet + +\documentclass[11pt]{article} +\usepackage{palatino} +\usepackage{fullpage} +\usepackage{parskip} +\usepackage{lhs} + +\begin{document} + +\title{Sudoku Solver} +\author{Brian Alliet} +\maketitle + +\ignore{ +\begin{code} +module Sudoku ( + Sudoku, + makeSudoku, solve, eliminate, analyze, backtrack, + main + ) where + +import Array +import Monad +import List (union,intersperse,transpose,(\\),nub,nubBy) +\end{code} +} + +\section{Introduction} + +This Haskell module implements a solver for Sudoku~\footnote{http://en.wikipedia.org/wiki/Sudoku} puzzles. It can solve +any Sudoku puzzle, even those that require backtracking. + +\section{Data Types} + +\begin{code} +data CellState a = Known a | Unknown [a] | Impossible deriving Eq +\end{code} + +Each cell in a Sudoku grid can be in one of three states: ``Known'' if it has a known correct value~\footnote{Actually +this doesn't always means it is correct. While we are in the backtracking stage we make our guesses ``Known''.}, +``Unknown'' if there is still more than one possible correct value, or ``Impossible'' if there is no value that can +possibly fit the cell. Sudoku grids with ``Impossible'' cells are quickly discarded by the {\tt solve} function. + +\begin{code} +type Coords = (Int,Int) +type Grid a = Array Coords (CellState a) +newtype Sudoku a = Sudoku { unSudoku :: Grid a } deriving Eq +\end{code} + +We represent a Sudoku grid as an Array indexed by integer coordinates. We additionally define a newtype wrapper for the +grid. The smart constructor, {\tt makeSudoku} verifies some invariants before creating the Sudoku value. All the public +API functions operate on the Sudoku type. + +\begin{code} +instance Show a => Show (Sudoku a) where showsPrec p = showParen (p>0) . showsGrid . unSudoku +instance Show a => Show (CellState a) where showsPrec _ = showsCell +\end{code} + +We define {\tt Show} instances for the above types. + +\section{Internal Functions} + +\begin{code} +size :: Grid a -> Int +size = (+1).fst.snd.bounds +\end{code} + +{\tt size} returns the size (the width, height, and number of subboxes) for a Sudoku grid. We ensure Grid's are always +square and indexed starting at $(0,0)$ so simply incrementing either of the array's upper bounds is correct. + +\begin{code} +getRow,getCol,getBox :: Grid a -> Int -> [(Coords,CellState a)] +getRow grid r = [let l = (r,c) in (l,grid!l)|c <- [0..size grid - 1]] +getCol grid c = [let l = (r,c) in (l,grid!l)|r <- [0..size grid - 1]] +getBox grid b = [let l = (r,c) in (l,grid!l)|r <- [boxR..boxR+boxN-1],c <- [boxC..boxC+boxN-1]] + where + boxN = intSqrt (size grid); boxR = b `quot` boxN * boxN; boxC = b `rem` boxN * boxN + +getBoxOf :: Grid a -> Coords -> [(Coords,CellState a)] +getBoxOf grid (r,c) = grid `getBox` ((r `quot` boxN * boxN) + (c `quot` boxN)) + where boxN = intSqrt (size grid) +\end{code} + +{\tt getRow}, {\tt getCol}, and {\tt getBox} return the coordinates and values of the cell in row, column, or box +number {\tt n}, {\tt r}, or {\tt b}. + +\begin{code} +getNeighbors :: Eq a => Grid a -> Coords -> [(Coords,CellState a)] +getNeighbors grid l@(r,c) = filter ((/=l).fst) + $ foldr (union.($grid)) [] + [(`getRow`r),(`getCol`c),(`getBoxOf`l)] +\end{code} + +{\tt getNeighbors} returns the coordinates and values of all the neighbors of this cell. + +\begin{code} +impossible :: Eq a => Grid a -> Coords -> [a] +impossible grid l = map snd $ justKnowns $ grid `getNeighbors` l +\end{code} + +{\tt impossible} returns a list of impossible values for a given cell. The impossible values consist of the values any +``Known'' neighbors. + +\begin{code} +justUnknowns :: [(Coords,CellState a)] -> [(Coords,[a])] +justUnknowns = foldr (\c -> case c of (p,Unknown xs) -> ((p,xs):); _ -> id) [] + +justKnowns :: [(Coords,CellState a)] -> [(Coords,a)] +justKnowns = foldr (\c -> case c of (p,Known x) -> ((p,x):); _ -> id) [] +\end{code} + +{\tt justUnknowns} and {\tt justKnowns} return only the Known or Unknown values (with the constructor stripped off) +from a list of cells. + +\begin{code} +updateGrid :: Grid a -> [(Coords,CellState a)] -> Maybe (Grid a) +updateGrid _ [] = Nothing +updateGrid grid xs = Just $ grid // nubBy (\(x,_) (y,_) -> x==y) xs +\end{code} + +{\tt updateGrid} applies a set of updates to a grid and returns the new grid only if it was updated. + +\section{Public API} + +\begin{code} +makeSudoku :: (Num a, Ord a, Enum a) => [[a]] -> Sudoku a +makeSudoku xs + | not (all ((==size).length) xs) = error "error not a square" + | (intSqrt size)^(2::Int) /= size = error "error dims aren't perfect squares" + | any (\x -> x < 0 || x > fromIntegral size) (concat xs) = error "value out of range" + | otherwise = Sudoku (listArray ((0,0),(size-1,size-1)) states) + where + size = length xs + states = map f (concat xs) + f 0 = Unknown [1..fromIntegral size] + f x = Known x +\end{code} + +{\tt makeSudoku} makes a {\tt Sudoku} value from a list of numbers. The given matrix must be square and have dimensions +that are a perfect square. The possible values for each cell range from 1 to the dimension of the square with ``0'' +representing unknown values.\footnote{The rest of the code doesn't depend on any of this weird ``0'' is unknown +representation. In fact, it doesn't depend on numeric values at all. ``0'' is just used here because it makes +representing grids in Haskell source code easier.} + +\begin{code} +eliminate :: Eq a => Sudoku a -> Maybe (Sudoku a) +eliminate (Sudoku grid) = fmap Sudoku $ updateGrid grid changes >>= sanitize + where + changes = concatMap findChange $ assocs grid + findChange (l,Unknown xs) + = map ((,) l) + $ case filter (not.(`elem`impossible grid l)) xs of + [] -> return Impossible + [x] -> return $ Known x + xs' + | xs' /= xs -> return $ Unknown xs' + | otherwise -> mzero + findChange _ = mzero + sanitize grid = return $ grid // [(l,Impossible) | + (l,x) <- justKnowns changes, x `elem` impossible grid l] +\end{code} + +The {\tt eliminate} phase tries to remove possible choices for ``Unknowns'' based on ``Known'' values in the same row, +column, or box as the ``Unknown'' value. For each cell on the grid we find its ``neighbors'', that is, cells in the +same row, column, or box. Out of those neighbors we get a list of all the ``Known'' values. We can eliminate all of +these from our list of candidates for this cell. If we're lucky enough to eliminate all the candidates but one we have +a new ``Known'' value. If we're unlucky enough to have eliminates {\bf all} the possible candidates we have a new +``Impossible'' value. + +After iterating though every cell we make one more pass looking for conflicting changes. {\tt sanitize} marks cells as +``Impossible'' if we have conflicting ``Known'' values. + +\begin{code} +analyze :: Eq a => Sudoku a -> Maybe (Sudoku a) +analyze (Sudoku grid) = fmap Sudoku $ updateGrid grid $ nub [u | + f <- map ($grid) [getRow,getCol,getBox], + n <- [0..size grid - 1], + u <- unique (f n)] + where + unique xs = foldr f [] $ foldr (union.snd) [] unknowns \\ map snd (justKnowns xs) + where + unknowns = justUnknowns xs + f c = case filter ((c`elem`).snd) unknowns of + [(p,_)] -> ((p,Known c):) + _ -> id +\end{code} + +The {\tt analyze} phase tries to turn ``Unknowns'' into ``Knowns'' when a certain ``Unknown'' is the only cell that +contains a value needed in a given row, column, or box. We apply each of the functions {\tt getRow}, {\tt getCol}, and +{\tt getBox} to all the indices on the grid, apply {\tt unique} to each group, and update the array with the +results. {\tt unique} gets a list of all the unknown cells in the group and finds all the unknown values in each of +those cells. Each of these values are iterated though looking for a value that is only contained in one cell. If such a +value is found the cell containing it must be that value. + +\begin{code} +backtrack :: (MonadPlus m, Eq a) => Sudoku a -> m (Sudoku a) +backtrack (Sudoku grid) = case (justUnknowns (assocs grid)) of + [] -> return $ Sudoku grid + ((p,xs):_) -> msum $ map (\x -> solve $ Sudoku $ grid // [(p,Known x)]) xs +\end{code} + +Sometimes the above two phases still aren't enough to solve a puzzle. For these rare puzzles backtracking is required. +We attempt to solve the puzzle by replacing the first ``Unknown'' value with each of the candidate values and solving +the resulting puzzles. Hopefully at least one of our choices will result in a solvable puzzle. + +We could actually solve any puzzle using backtracking alone, although this would be very inefficient. The above +functions simplify most puzzles enough that the backtracking phase has to do hardly any work. + +\begin{code} +solve :: (MonadPlus m, Eq a) => Sudoku a -> m (Sudoku a) +solve sudoku = + case eliminate sudoku of + Just new + | any (==Impossible) (elems (unSudoku new))-> mzero + | otherwise -> solve new + Nothing -> case analyze sudoku of + Just new -> solve new + Nothing -> backtrack sudoku +\end{code} + +{\tt solve} glues all the above phases together. First we run the {\tt eliminate} phase. If that found the puzzle to +be unsolvable we abort immediately. If {\tt eliminate} changed the grid we go though the {\tt eliminate} phase again +hoping to eliminate more. Once {\tt eliminate} can do no more work we move on to the {\tt analyze} phase. If this +succeeds in doing some work we start over again with the {\tt eliminate} phase. Once {\tt analyze} can do no more work +we have no choice but to resort to backtracking. (However in most cases backtracking won't actually do anything because +the puzzle is already solved.) + +\begin{code} +showsCell :: Show a => CellState a -> ShowS +showsCell (Known x) = shows x +showsCell (Impossible) = showChar 'X' +showsCell (Unknown xs) = \rest -> ('(':) + $ foldr id (')':rest) + $ intersperse (showChar ' ') + $ map shows xs +\end{code} + +{\tt showCell} shows a cell. + +\begin{code} +showsGrid :: Show a => Grid a -> ShowS +showsGrid grid = showsTable [[grid!(r,c) | c <- [0..size grid-1]] | r <- [0..size grid-1]] +\end{code} + +{\tt showGrid} show a grid. + +\begin{code} +-- FEATURE: This is pretty inefficient +showsTable :: Show a => [[a]] -> ShowS +showsTable xs = (showChar '\n' .) $ showString $ unlines $ map (concat . intersperse " ") xs'' + where + xs' = (map.map) show xs + colWidths = map (max 2 . maximum . map length) (transpose xs') + xs'' = map (zipWith (\n s -> s ++ (replicate (n - length s) ' ')) colWidths) xs' +\end{code} + +{\tt showsTable} shows a table (or matrix). Every column has the same width so things line up. + +\begin{code} +intSqrt :: Integral a => a -> a +intSqrt n + | n < 0 = error "intSqrt: negative n" + | otherwise = f n + where + f x = if y < x then f y else x + where y = (x + (n `quot` x)) `quot` 2 +\end{code} + +{\tt intSqrt} is Newton`s Iteration for finding integral square roots. + +\ignore{ +\begin{code} +test :: Sudoku Int +test = makeSudoku [ + [0,6,0,1,0,4,0,5,0], + [0,0,8,3,0,5,6,0,0], + [2,0,0,0,0,0,0,0,1], + [8,0,0,4,0,7,0,0,6], + [0,0,6,0,0,0,3,0,0], + [7,0,0,9,0,1,0,0,4], + [5,0,0,0,0,0,0,0,2], + [0,0,7,2,0,6,9,0,0], + [0,4,0,5,0,8,0,7,0]] + +test2 :: Sudoku Int +test2 = makeSudoku [ + [0,7,0,0,0,0,8,0,0], + [0,0,0,2,0,4,0,0,0], + [0,0,6,0,0,0,0,3,0], + [0,0,0,5,0,0,0,0,6], + [9,0,8,0,0,2,0,4,0], + [0,5,0,0,3,0,9,0,0], + [0,0,2,0,8,0,0,6,0], + [0,6,0,9,0,0,7,0,1], + [4,0,0,0,0,3,0,0,0]] + +testSmall :: Sudoku Int +testSmall = makeSudoku [ + [1,0,0,0,0,0,0,0,0], + [0,0,2,7,4,0,0,0,0], + [0,0,0,5,0,0,0,0,4], + [0,3,0,0,0,0,0,0,0], + [7,5,0,0,0,0,0,0,0], + [0,0,0,0,0,9,6,0,0], + [0,4,0,0,0,6,0,0,0], + [0,0,0,0,0,0,0,7,1], + [0,0,0,0,0,1,0,3,0]] + +testHard :: Sudoku Int +testHard = makeSudoku [ + [0,0,0,8,0,2,0,0,0], + [5,0,0,0,0,0,0,0,1], + [0,0,6,0,5,0,3,0,0], + [0,0,9,0,1,0,8,0,0], + [1,0,0,0,0,0,0,0,2], + [0,0,0,9,0,7,0,0,0], + [0,6,1,0,3,0,7,8,0], + [0,5,0,0,0,0,0,4,0], + [0,7,2,0,4,0,1,5,0]] + +testHard2 :: Sudoku Int +testHard2 = makeSudoku [ + [3,0,0,2,0,0,9,0,0], + [0,0,0,0,0,0,0,0,5], + [0,7,0,1,0,4,0,0,0], + [0,0,9,0,0,0,8,0,0], + [5,0,0,0,7,0,0,0,6], + [0,0,1,0,0,0,2,0,0], + [0,0,0,3,0,9,0,4,0], + [8,0,0,0,0,0,0,0,0], + [0,0,6,0,0,5,0,0,7]] + +testHW :: Sudoku Int +testHW = makeSudoku [ + [0,0,0,1,0,0,7,0,2], + [0,3,0,9,5,0,0,0,0], + [0,0,1,0,0,2,0,0,3], + [5,9,0,0,0,0,3,0,1], + [0,2,0,0,0,0,0,7,0], + [7,0,3,0,0,0,0,9,8], + [8,0,0,2,0,0,1,0,0], + [0,0,0,0,8,5,0,6,0], + [6,0,5,0,0,9,0,0,0]] + +testTough :: Sudoku Int +testTough = makeSudoku $ map (map read . words) $ lines $ + "8 3 0 0 0 0 0 4 6\n"++ + "0 2 0 1 0 4 0 3 0\n"++ + "0 0 0 0 0 0 0 0 0\n"++ + "0 0 2 9 0 6 5 0 0\n"++ + "1 4 0 0 0 0 0 2 3\n"++ + "0 0 5 4 0 3 1 0 0\n"++ + "0 0 0 0 0 0 0 0 0\n"++ + "0 6 0 3 0 8 0 7 0\n"++ + "9 5 0 0 0 0 0 6 2\n" + +testDiabolical :: Sudoku Int +testDiabolical = makeSudoku $ map (map read . words) $ lines $ + "8 0 0 7 0 1 0 0 2\n"++ + "0 0 6 0 0 0 7 0 0\n"++ + "0 1 7 0 0 0 8 9 0\n"++ + "0 0 0 1 7 3 0 0 0\n"++ + "7 0 0 0 0 0 0 0 6\n"++ + "0 0 0 9 5 6 0 0 0\n"++ + "0 9 5 0 0 0 4 1 0\n"++ + "0 0 8 0 0 0 5 0 0\n"++ + "3 0 0 6 0 5 0 0 7\n" + +main :: IO () +main = do + let + solve' p = case solve p of + [] -> fail $ "couldn't solve: " ++ show p + sols -> return sols + mapM_ (\p -> solve' p >>= putStrLn.show) [test,test2,testSmall,testHard,testHard2,testHW,testTough,testDiabolical] + return () + +\end{code} +} + +\end{document} |