summaryrefslogtreecommitdiff
path: root/pypers/oxford/loops.txt
diff options
context:
space:
mode:
Diffstat (limited to 'pypers/oxford/loops.txt')
-rwxr-xr-xpypers/oxford/loops.txt457
1 files changed, 457 insertions, 0 deletions
diff --git a/pypers/oxford/loops.txt b/pypers/oxford/loops.txt
new file mode 100755
index 0000000..cb0bfcb
--- /dev/null
+++ b/pypers/oxford/loops.txt
@@ -0,0 +1,457 @@
+Lecture 1: Loops (i.e. iterators & generators)
+==============================================
+
+Part I: iterators
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+Iterators are everywhere
+--------------------------------
+
+>>> for i in 1, 2, 3:
+... print i
+1
+2
+3
+
+The 'for' loop is using *iterators* internally::
+
+ it = iter((1,2,3))
+ while True:
+ try:
+ print it.next()
+ except StopIteration:
+ break
+
+Iterables and iterators
+--------------------------
+
+*Iterable* = anything you can loop over = any sequence + any object with an __iter__ method;
+
+Not every sequence has an __iter__ method:
+
+>>> "hello".__iter__()
+Traceback (most recent call last):
+ ...
+AttributeError: 'str' object has no attribute '__iter__'
+
+*Iterator* = any object with a .next method and an __iter__ method returning self
+
+Simpler way to get an iterator
+--------------------------------------------------------
+
+>>> it = iter("hello")
+>>> it.next()
+'h'
+>>> it.next()
+'e'
+>>> it.next()
+'l'
+>>> it.next()
+'l'
+>>> it.next()
+'o'
+>>> it.next()
+Traceback (most recent call last):
+ ...
+StopIteration
+
+Sentinel syntax iter(callable, sentinel)
+--------------------------------------------
+
+Example::
+
+ $ echo -e "value1\nvalue2\nEND\n" > data.txt
+ $ python -c "print list(iter(file('data.txt').readline, 'END\n'))"
+ ['value1\n', 'value2\n']
+
+Beware of infinite iterators:
+
+>>> repeat = iter(lambda : "some value", "")
+>>> repeat.next()
+'some value'
+
+Second simpler way to get an iterator: generator-expressions
+-------------------------------------------------------------
+
+>>> squares = (i*i for i in range(1,11))
+>>> list(squares)
+[1, 4, 9, 16, 25, 36, 49, 64, 81, 100]
+
+Excessive parenthesis can be skipped, so use
+
+>>> dict((i, i*i) for i in range(1,11))
+{1: 1, 2: 4, 3: 9, 4: 16, 5: 25, 6: 36, 7: 49, 8: 64, 9: 81, 10: 100}
+
+instead of
+
+>>> dict([(i, i*i) for i in range(1,11)])
+{1: 1, 2: 4, 3: 9, 4: 16, 5: 25, 6: 36, 7: 49, 8: 64, 9: 81, 10: 100}
+
+(as usual, the most elegant version is the most efficient).
+
+Iteration caveats
+--------------------------
+
+>>> ls = [i for i in (1,2,3)]
+>>> i
+3
+
+>>> it = (j for j in (1,2,3))
+>>> j
+Traceback (most recent call last):
+ ...
+NameError: name 'j' is not defined
+
+A subtler example:
+
+>>> ls = [lambda :i for i in (1,2,3)]
+>>> ls[0]()
+3
+
+instead
+
+>>> it = (lambda :i for i in (1,2,3))
+>>> it.next()()
+1
+>>> it.next()()
+2
+>>> it.next()()
+3
+
+*seems* to be working but it is not really the case:
+
+>>> it = (lambda :i for i in (1,2,3))
+>>> f1 = it.next()
+>>> f2 = it.next()
+>>> f3 = it.next()
+>>> f1()
+3
+
+The reason is that Python does LATE binding *always*. The solution is ugly:
+
+>>> it = list(lambda i=i:i for i in (1,2,3))
+>>> it[0]()
+1
+>>> it[1]()
+2
+>>> it[2]()
+3
+
+Part II: generators
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+Trivial example:
+
+>>> def gen123(): # "function" which returns an iterator over the values 1, 2, 3
+... yield 1
+... yield 2
+... yield 3
+...
+>>> it = gen123()
+>>> it.next()
+1
+>>> it.next()
+2
+>>> it.next()
+3
+>>> it.next()
+Traceback (most recent call last):
+ ...
+StopIteration
+
+Real life example: using generators to generate HTML tables
+
+::
+
+ #<htmltable.py>
+
+ def HTMLTablegen(table):
+ yield "<table>"
+ for row in table:
+ yield "<tr>"
+ for col in row:
+ yield "<td>%s</td>" % col
+ yield "</tr>"
+ yield "</table>"
+
+ def test():
+ return "\n".join(HTMLTablegen([["Row", "City"],
+ [1,'London'], [2, 'Oxford']]))
+
+ if __name__ == "__main__": # example
+ print test()
+
+ #</htmltable.py>
+
+>>> from htmltable import test
+>>> print test()
+<table>
+<tr>
+<td>Row</td>
+<td>City</td>
+</tr>
+<tr>
+<td>1</td>
+<td>London</td>
+</tr>
+<tr>
+<td>2</td>
+<td>Oxford</td>
+</tr>
+</table>
+
+A simple recipe: skip redundant
+---------------------------------
+
+How to remove duplicates by keeping the order::
+
+ #<skip_redundant.py>
+
+ def skip_redundant(iterable, skipset=None):
+ "Redundant items are repeated items or items in the original skipset."
+ if skipset is None: skipset = set()
+ for item in iterable:
+ if item not in skipset:
+ skipset.add(item)
+ yield item
+
+ #</skip_redundant.py>
+
+>>> from skip_redundant import skip_redundant
+>>> print list(skip_redundant("<hello, world>", skipset=set("<>")))
+['h', 'e', 'l', 'o', ',', ' ', 'w', 'r', 'd']
+
+Another real life example: working with nested structures
+----------------------------------------------------------
+
+::
+
+ #<walk.py>
+
+ def walk(iterable, level=0):
+ for obj in iterable:
+ if not hasattr(obj, "__iter__"): # atomic object
+ yield obj, level
+ else: # composed object: iterate again
+ for subobj, lvl in walk(obj, level + 1):
+ yield subobj, lvl
+
+ def flatten(iterable):
+ return (obj for obj, level in walk(iterable))
+
+ def pprint(iterable):
+ for obj, level in walk(iterable):
+ print " "*level, obj
+
+ #</walk.py>
+
+>>> from walk import flatten, pprint
+>>> nested_ls = [1,[2,[3,[[[4,5],6]]]],7]
+>>> pprint(nested_ls)
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+>>> pprint(flatten(nested_ls))
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+
+Another typical use case for generators: parsers
+---------------------------------------------------------
+
+A very stripped down parser for nested expressions
+
+::
+
+ #<sexpr2indent.py>
+ """A simple s-expression formatter."""
+
+ import re
+
+ def parse(sexpr):
+ position = 0
+ nesting_level = 0
+ paren = re.compile(r"(?P<paren_beg>\()|(?P<paren_end>\))")
+ while True:
+ match = paren.search(sexpr, position)
+ if match:
+ yield nesting_level, sexpr[position: match.start()]
+ if match.lastgroup == "paren_beg":
+ nesting_level += 1
+ elif match.lastgroup == "paren_end":
+ nesting_level -= 1
+ position = match.end()
+ else:
+ break
+
+ def sexpr_indent(sexpr):
+ for nesting, text in parse(sexpr.replace("\n", "")):
+ if text.strip(): print " "*nesting, text
+
+ #</sexpr2indent.py>
+
+>>> from sexpr2indent import sexpr_indent
+>>> sexpr_indent("""\
+... (html (head (title Example)) (body (h1 s-expr formatter example)
+... (a (@ (href http://www.example.com)) A link)))""")
+... #doctest: +NORMALIZE_WHITESPACE
+ html
+ head
+ title Example
+ body
+ h1 s-expr formatter example
+ a
+ @
+ href http://www.example.com
+ A link
+
+
+Other kinds of iterables
+------------------------------------------------
+
+The following class generates iterable which are not iterators:
+::
+
+ #<reiterable.py>
+
+ class ReIter(object):
+ "A re-iterable object."
+ def __iter__(self):
+ yield 1
+ yield 2
+ yield 3
+
+ #</reiterable.py>
+
+>>> from reiterable import ReIter
+>>> rit = ReIter()
+>>> list(rit)
+[1, 2, 3]
+>>> list(rit) # it is reiterable!
+[1, 2, 3]
+
+The itertools module
+----------------------------------------------------
+
+ - count([n]) --> n, n+1, n+2, ...
+ - cycle(p) --> p0, p1, ... plast, p0, p1, ...
+ - repeat(elem [,n]) --> elem, elem, elem, ... endlessly or up to n times
+ - izip(p, q, ...) --> (p[0], q[0]), (p[1], q[1]), ...
+ - ifilter(pred, seq) --> elements of seq where pred(elem) is True
+ - ifilterfalse(pred, seq) --> elements of seq where pred(elem) is False
+ - islice(seq, [start,] stop [, step]) --> elements from seq[start:stop:step]
+ - imap(fun, p, q, ...) --> fun(p0, q0), fun(p1, q1), ...
+ - starmap(fun, seq) --> fun(\*seq[0]), fun(\*seq[1]), ...
+ - tee(it, n=2) --> (it1, it2 , ... itn) splits one iterator into n
+ - chain(p, q, ...) --> p0, p1, ... plast, q0, q1, ...
+ - takewhile(pred, seq) --> seq[0], seq[1], until pred fails
+ - dropwhile(pred, seq) --> seq[n], seq[n+1], starting when pred fails
+ - groupby(iterable[, keyfunc]) --> sub-iterators grouped by value of keyfunc(v)
+
+anyTrue
+------------------------------
+
+>>> import itertools
+>>> def anyTrue(predicate, iterable):
+... return True in itertools.imap(predicate, iterable)
+...
+>>> fname = "picture.gif"
+>>> anyTrue(fname.endswith, ".jpg .gif .png".split())
+True
+
+AnyTrue does *short-circuit*:
+
+>>> def is3(i):
+... print "i=%s" % i
+... return i == 3
+
+>>> anyTrue(is3, range(10))
+i=0
+i=1
+i=2
+i=3
+True
+
+chop
+----------------------
+
+You want to chop an iterable in batches of a given size:
+
+>>> from chop import chop
+>>> list(chop([1, 2, 3, 4], 2))
+[[1, 2], [3, 4]]
+>>> list(chop([1, 2, 3, 4, 5, 6, 7],3))
+[[1, 2, 3], [4, 5, 6], [7]]
+
+Here is a possible implementation::
+
+ #<chop.py>
+
+ # see also http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/303279
+
+ import itertools
+
+ def chop(iterable, batchsize):
+ it = iter(iterable)
+ while True:
+ batch = list(itertools.islice(it, batchsize))
+ if batch: yield batch
+ else: break
+
+ #</chop.py>
+
+For people thinking Python is too readable, here is a one-liner:
+
+>>> chop = lambda it, n : itertools.izip(*(iter(it),)*n)
+...
+>>> list(chop([1,2,3,4], 2))
+[(1, 2), (3, 4)]
+
+tee
+-----------------------
+
+To make copies of iterables; typically used in parsers:
+
+>>> from itertools import tee, chain, izip
+>>> chars, prevs = tee("abc")
+>>> prevs = chain([None], prevs)
+>>> for char, prev in izip(chars, prevs):
+... print char, prev
+...
+a None
+b a
+c b
+
+Grouping and sorting
+----------------------
+
+>>> from itertools import groupby
+>>> from operator import itemgetter
+
+>>> NAME, AGE = 0, 1
+>>> query_result = ("Smith", 34), ("Donaldson", 34), ("Lee", 22), ("Orr", 22)
+
+Grouping together people of the same age:
+
+>>> for k, g in groupby(query_result, key=itemgetter(AGE)):
+... print k, list(g)
+...
+34 [('Smith', 34), ('Donaldson', 34)]
+22 [('Lee', 22), ('Orr', 22)]
+
+Sorting by name:
+
+>>> for tup in sorted(query_result, key=itemgetter(NAME)):
+... print tup
+('Donaldson', 34)
+('Lee', 22)
+('Orr', 22)
+('Smith', 34)